Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions docs/advanced/input_files/input-main.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
- [pw\_seed](#pw_seed)
- [pw\_diag\_thr](#pw_diag_thr)
- [diago\_smooth\_ethr](#diago_smooth_ethr)
- [use\_k\_continuity](#use_k_continuity)
- [pw\_diag\_nmax](#pw_diag_nmax)
- [pw\_diag\_ndim](#pw_diag_ndim)
- [erf\_ecut](#erf_ecut)
Expand Down Expand Up @@ -774,6 +775,18 @@ These variables are used to control the plane wave related parameters.
- **Description**: If `TRUE`, the smooth threshold strategy, which applies a larger threshold (10e-5) for the empty states, will be implemented in the diagonalization methods. (This strategy should not affect total energy, forces, and other ground-state properties, but computational efficiency will be improved.) If `FALSE`, the smooth threshold strategy will not be applied.
- **Default**: false

### use_k_continuity

- **Type**: Boolean
- **Availability**: Used only for plane wave basis set.
- **Description**: Whether to use k-point continuity for initializing wave functions. When enabled, this strategy exploits the similarity between wavefunctions at neighboring k-points by propagating the wavefunction from a previously initialized k-point to a new k-point, significantly reducing the computational cost of the initial guess.

**Important constraints:**
- Must be used together with `diago_smooth_ethr = 1` for optimal performance

This feature is particularly useful for calculations with dense k-point sampling where the computational cost of wavefunction initialization becomes significant.
- **Default**: false

### pw_diag_nmax

- **Type**: Integer
Expand Down
3 changes: 2 additions & 1 deletion source/module_esolver/esolver_ks_pw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,8 @@ void ESolver_KS_PW<T, Device>::hamilt2density_single(UnitCell& ucell,
hsolver::DiagoIterAssist<T, Device>::SCF_ITER,
hsolver::DiagoIterAssist<T, Device>::PW_DIAG_NMAX,
hsolver::DiagoIterAssist<T, Device>::PW_DIAG_THR,
hsolver::DiagoIterAssist<T, Device>::need_subspace);
hsolver::DiagoIterAssist<T, Device>::need_subspace,
PARAM.inp.use_k_continuity);

hsolver_pw_obj.solve(this->p_hamilt,
this->kspw_psi[0],
Expand Down
119 changes: 62 additions & 57 deletions source/module_hsolver/diago_dav_subspace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,27 +288,28 @@ void Diago_DavSubspace<T, Device>::cal_grad(const HPsiFunc& hpsi_func,
psi_iter + (nbase) * this->dim,
this->dim);

std::vector<Real> e_temp_cpu(nbase, 0);
// Eigenvalues operation section
std::vector<Real> e_temp_cpu(this->notconv, 0);
Real* e_temp_hd = e_temp_cpu.data();
if(this->device == base_device::GpuDevice)
if (this->device == base_device::GpuDevice)
{
e_temp_hd = nullptr;
resmem_real_op()(this->ctx, e_temp_hd, nbase);
}
for (int m = 0; m < notconv; m++)

for (int m = 0; m < this->notconv; m++)
{
e_temp_cpu.assign(nbase, (-1.0 * (*eigenvalue_iter)[m]));
if (this->device == base_device::GpuDevice)
{
syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, e_temp_hd, e_temp_cpu.data(), nbase);
}
vector_mul_vector_op<T, Device>()(this->ctx,
nbase,
vcc + m * this->nbase_x,
vcc + m * this->nbase_x,
e_temp_hd);
e_temp_cpu[m] = -(*eigenvalue_iter)[m];
}

if (this->device == base_device::GpuDevice)
{
syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, e_temp_hd, e_temp_cpu.data(), this->notconv);
}
if(this->device == base_device::GpuDevice)

apply_eigenvalues_op<T, Device>()(nbase, this->nbase_x, this->notconv, this->vcc, this->vcc, e_temp_hd);

if (this->device == base_device::GpuDevice)
{
delmem_real_op()(this->ctx, e_temp_hd);
}
Expand All @@ -333,54 +334,58 @@ void Diago_DavSubspace<T, Device>::cal_grad(const HPsiFunc& hpsi_func,
psi_iter + nbase * this->dim,
this->dim);

// "precondition!!!"
std::vector<Real> pre(this->dim, 0.0);
for (int m = 0; m < notconv; m++)
{
for (size_t i = 0; i < this->dim; i++)
{
// pre[i] = std::abs(this->precondition[i] - (*eigenvalue_iter)[m]);
double x = std::abs(this->precondition[i] - (*eigenvalue_iter)[m]);
pre[i] = 0.5 * (1.0 + x + sqrt(1 + (x - 1.0) * (x - 1.0)));
}
// Precondition section
#if defined(__CUDA) || defined(__ROCM)
if (this->device == base_device::GpuDevice)
{
syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, this->d_precondition, pre.data(), this->dim);
vector_div_vector_op<T, Device>()(this->ctx,
this->dim,
psi_iter + (nbase + m) * this->dim,
psi_iter + (nbase + m) * this->dim,
this->d_precondition);
}
else
if (this->device == base_device::GpuDevice)
{
Real* eigenvalues_gpu = nullptr;
resmem_real_op()(this->ctx, eigenvalues_gpu, notconv);
syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, eigenvalues_gpu, (*eigenvalue_iter).data(), notconv);

precondition_op<T, Device>()(this->dim,
psi_iter,
nbase,
notconv,
d_precondition,
eigenvalues_gpu);
delmem_real_op()(this->ctx, eigenvalues_gpu);
}
else
#endif
{
vector_div_vector_op<T, Device>()(this->ctx,
this->dim,
psi_iter + (nbase + m) * this->dim,
psi_iter + (nbase + m) * this->dim,
pre.data());
}
{
precondition_op<T, Device>()(this->dim,
psi_iter,
nbase,
notconv,
this->precondition.data(),
(*eigenvalue_iter).data());
}

// "normalize!!!" in order to improve numerical stability of subspace diagonalization
std::vector<Real> psi_norm(notconv, 0.0);
for (size_t i = 0; i < notconv; i++)
// Normalize section
#if defined(__CUDA) || defined(__ROCM)
if (this->device == base_device::GpuDevice)
{
Real* psi_norm = nullptr;
resmem_real_op()(this->ctx, psi_norm, notconv);
using setmem_real_op = base_device::memory::set_memory_op<Real, Device>;
setmem_real_op()(this->ctx, psi_norm, 0.0, notconv);

normalize_op<T, Device>()(this->dim,
psi_iter,
nbase,
notconv,
psi_norm);
delmem_real_op()(this->ctx, psi_norm);
}
else
#endif
{
psi_norm[i] = dot_real_op<T, Device>()(this->ctx,
this->dim,
psi_iter + (nbase + i) * this->dim,
psi_iter + (nbase + i) * this->dim,
true);
assert(psi_norm[i] > 0.0);
psi_norm[i] = sqrt(psi_norm[i]);

vector_div_constant_op<T, Device>()(this->ctx,
this->dim,
psi_iter + (nbase + i) * this->dim,
psi_iter + (nbase + i) * this->dim,
psi_norm[i]);
Real* psi_norm = nullptr;
normalize_op<T, Device>()(this->dim,
psi_iter,
nbase,
notconv,
psi_norm);
}

// update hpsi[:, nbase:nbase+notconv]
Expand Down
Loading
Loading