Coverage for eminus/minimizer.py: 98.40%

1# SPDX-FileCopyrightText: 2022 The eminus developers

2# SPDX-License-Identifier: Apache-2.0

3"""Minimization algorithms."""

5import copy

6import logging

8import numpy as np

10from .dft import get_epsilon, get_grad

11from .energies import get_E, get_Eentropy

12from .logger import name

13from .utils import dotprod

16def scf_step(scf, step):

17 """Perform one SCF step for a DFT calculation.

19 Calculating intermediate results speeds up the energy and gradient calculation.

20 This function is similar to H_precompute but will set all variables and energies in the SCF

21 class and returns the total energy.

23 Args:

24 scf: SCF object.

25 step: Optimization step.

27 Returns:

28 Total energy.

29 """

30 scf.callback(scf, step)

31 scf._precompute()

32 # Update occupations every smear_update'th cycle

33 if scf.atoms.occ.smearing > 0 and step % scf.smear_update == 0:

34 epsilon = get_epsilon(scf, scf.W, **scf._precomputed)

35 Efermi = scf.atoms.occ.smear(epsilon)

36 get_Eentropy(scf, epsilon, Efermi)

37 return get_E(scf)

40def check_convergence(scf, method, Elist, linmin=None, cg=None, norm_g=None):

41 """Check the energies for every SCF cycle and handle the output.

43 Args:

44 scf: SCF object.

45 method: Minimization method.

46 Elist: Total energies per SCF step.

48 Keyword Args:

49 linmin: Cosine between previous search direction and current gradient.

50 cg: Conjugate-gradient orthogonality.

51 norm_g: Gradient norm.

53 Returns:

54 Convergence condition.

55 """

56 iteration = len(Elist)

58 # Print all the data

59 print_scf_step(scf, method, Elist, linmin, cg, norm_g)

61 if iteration > 1:

62 # Check for convergence

63 if scf.gradtol is None or norm_g is None:

64 if abs(Elist[-1] - Elist[-2]) < scf.etol:

65 scf.is_converged = True

66 return True

67 # If a gradient tolerance has been set we also check norm_g for convergence

68 elif abs(Elist[-1] - Elist[-2]) < scf.etol and (np.sum(norm_g, axis=0) < scf.gradtol).all():

69 scf.is_converged = True

70 return True

71 # Check if the current energy is higher than the last two values

72 if (np.asarray(Elist[-3:-1]) < Elist[-1]).all():

73 scf._log.warning("Total energy is not decreasing.")

74 return False

77def print_scf_step(scf, method, Elist, linmin, cg, norm_g):

78 """Print the data of one SCF step and the header at the beginning.

80 Args:

81 scf: SCF object.

82 method: Minimization method.

83 Elist: Total energies per SCF step.

84 linmin: Cosine between previous search direction and current gradient.

85 cg: Conjugate-gradient orthogonality.

86 norm_g: Gradient norm.

87 """

88 iteration = len(Elist)

90 # Print a column header at the beginning

91 # The ljust values just have been chosen such that the output looks decent

92 if iteration == 1:

93 header = "Method".ljust(8)

94 header += "Iteration".ljust(11)

95 header += "Etot [Eh]".ljust(13)

96 header += "dEtot [Eh]".ljust(13)

97 # Print the gradient norm for cg methods

98 if method not in {"sd", "lm", "pclm"}:

99 header += "|Gradient|".ljust(10 * scf.atoms.occ.Nspin + 3)

100 # Print extra debugging information if available

101 if scf._log.level <= logging.DEBUG:

102 if method != "sd":

103 header += "linmin-test".ljust(10 * scf.atoms.occ.Nspin + 3)

104 if method not in {"sd", "lm", "pclm"}:

105 header += "cg-test".ljust(10 * scf.atoms.occ.Nspin + 3)

106 scf._log.debug(header)

107 else:

108 scf._log.info(header)

109

110 # Print the information for every cycle

111 # Context manager for printing norm_g, linmin, and cg

112 with np.printoptions(formatter={"float": "{:+0.2e}".format}):

113 info = f"{method:<8}{iteration:>8} {Elist[-1]:<+13,.6f}"

114 # In the first step we do not have all information yet

115 if iteration > 1:

116 info += f"{Elist[-1] - Elist[-2]:<+13,.4e}"

117 if norm_g is not None:

118 info += str(np.sum(norm_g, axis=0)).ljust(10 * scf.atoms.occ.Nspin + 3)

119 if scf._log.level <= logging.DEBUG:

120 if method != "sd" and linmin is not None:

121 info += str(np.sum(linmin, axis=0)).ljust(10 * scf.atoms.occ.Nspin + 3)

122 if method not in {"sd", "lm", "pclm"} and cg is not None:

123 info += str(np.sum(cg, axis=0)).ljust(10 * scf.atoms.occ.Nspin + 3)

124 if scf._log.level <= logging.DEBUG:

125 scf._log.debug(info)

126 else:

127 scf._log.info(info)

128

129

130def linmin_test(g, d):

131 """Do the line minimization test.

132

133 Calculate the cosine of the angle between g and d.

134

135 Reference: https://trond.hjorteland.com/thesis/node26.html

136

137 Args:

138 g: Current gradient.

139 d: Previous search direction.

140

141 Returns:

142 Linmin angle.

143 """

144 # cos = A B / |A| |B|

145 return dotprod(g, d) / np.sqrt(dotprod(g, g) * dotprod(d, d))

146

147

148def cg_test(atoms, ik, g, g_old, precondition=True):

149 """Test the gradient-orthogonality theorem, i.e., g and g_old should be orthogonal.

150

151 Calculate the cosine of the angle between g and g_old. For an angle of 90 degree the cosine goes

152 to zero.

153

154 Reference: https://math.uci.edu/~chenlong/CAMtips/CG.html

155

156 Args:

157 atoms: Atoms object.

158 ik: k-point index.

159 g: Current gradient.

160 g_old: Previous gradient.

161

162 Keyword Args:

163 precondition: Whether to use a preconditioner.

164

165 Returns:

166 CG angle.

167 """

168 if precondition:

169 Kg, Kg_old = atoms.K(g, ik), atoms.K(g_old, ik)

170 else:

171 Kg, Kg_old = g, g_old

172 # cos = A B / |A| |B|

173 return dotprod(g, Kg_old) / np.sqrt(dotprod(g, Kg) * dotprod(g_old, Kg_old))

174

175

176def cg_method(scf, ik, cgform, g, g_old, d_old, precondition=True):

177 """Do different variants of the conjugate gradient method.

178

179 Reference: https://indrag49.github.io/Numerical-Optimization/conjugate-gradient-methods-1.html

180

181 Args:

182 scf: SCF object.

183 ik: k-point index.

184 cgform: Conjugate gradient form.

185 g: Current gradient.

186 g_old: Previous gradient.

187 d_old: Previous search direction.

188

189 Keyword Args:

190 precondition: Whether to use a preconditioner.

191

192 Returns:

193 Conjugate scalar and gradient norm.

194 """

195 atoms = scf.atoms

196

197 if precondition:

198 Kg, Kg_old = atoms.K(g, ik), atoms.K(g_old, ik)

199 else:

200 Kg, Kg_old = g, g_old

201 norm_g = dotprod(g, Kg)

202

203 if cgform == 1: # Fletcher-Reeves

204 return norm_g / dotprod(g_old, Kg_old), norm_g

205 if cgform == 2: # Polak-Ribiere

206 return dotprod(g - g_old, Kg) / dotprod(g_old, Kg_old), norm_g

207 if cgform == 3: # Hestenes-Stiefel

208 return dotprod(g - g_old, Kg) / dotprod(g - g_old, d_old), norm_g

209 if cgform == 4: # Dai-Yuan

210 return norm_g / dotprod(g - g_old, d_old), norm_g

211 msg = f'No cgform found for "{cgform}".'

212 raise ValueError(msg)

213

214

215@name("steepest descent minimization")

216def sd(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, **kwargs):

217 """Steepest descent minimization algorithm.

218

219 Args:

220 scf: SCF object.

221 Nit: Maximum number of SCF steps.

222

223 Keyword Args:

224 cost: Function that will run every SCF step.

225 grad: Function that calculates the respective gradient.

226 condition: Function to check and log the convergence condition.

227 betat: Step size.

228 **kwargs: Throwaway arguments.

229

230 Returns:

231 Total energies per SCF cycle.

232 """

233 atoms = scf.atoms

234 costs = []

235

236 for i in range(Nit):

237 c = cost(scf, i)

238 costs.append(c)

239 if condition(scf, "sd", costs):

240 break

241 for ik in range(atoms.kpts.Nk):

242 for spin in range(atoms.occ.Nspin):

243 g = grad(scf, ik, spin, scf.W, **scf._precomputed)

244 scf.W[ik][spin] = scf.W[ik][spin] - betat * g

245 return costs

246

247

248@name("preconditioned line minimization")

249def pclm(

250 scf,

251 Nit,

252 cost=scf_step,

253 grad=get_grad,

254 condition=check_convergence,

255 betat=3e-5,

256 precondition=True,

257 **kwargs,

258):

259 """Preconditioned line minimization algorithm.

260

261 Args:

262 scf: SCF object.

263 Nit: Maximum number of SCF steps.

264

265 Keyword Args:

266 cost: Function that will run every SCF step.

267 grad: Function that calculates the respective gradient.

268 condition: Function to check and log the convergence condition.

269 betat: Step size.

270 precondition: Whether to use a preconditioner.

271 **kwargs: Throwaway arguments.

272

273 Returns:

274 Total energies per SCF cycle.

275 """

276 atoms = scf.atoms

277 costs = []

278

279 if precondition:

280 method = "pclm"

281 else:

282 method = "lm"

283

284 # Scalars that need to be saved for each spin

285 linmin = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))

286 # Search direction that needs to be saved for each spin

287 d = [np.empty_like(Wk) for Wk in scf.W]

288 g = [np.empty_like(Wk) for Wk in scf.W]

289

290 for i in range(Nit):

291 W_tmp = copy.deepcopy(scf.W)

292 for ik in range(atoms.kpts.Nk):

293 for spin in range(atoms.occ.Nspin):

294 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed)

295 # Calculate linmin each spin separately

296 if scf._log.level <= logging.DEBUG and i > 0:

297 linmin[ik][spin] = linmin_test(g[ik][spin], d[ik][spin])

298 if precondition:

299 d[ik][spin] = -atoms.K(g[ik][spin], ik)

300 else:

301 d[ik][spin] = -g[ik][spin]

302 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin]

303

304 scf._precompute()

305 for ik in range(atoms.kpts.Nk):

306 for spin in range(atoms.occ.Nspin):

307 gt = grad(scf, ik, spin, scf.W, **scf._precomputed)

308 beta = abs(

309 betat

310 * dotprod(g[ik][spin], d[ik][spin])

311 / dotprod(g[ik][spin] - gt, d[ik][spin])

312 )

313 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin]

314 c = cost(scf, i)

315 costs.append(c)

316 if condition(scf, method, costs, linmin):

317 break

318 return costs

319

320

321@name("line minimization")

322def lm(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, **kwargs):

323 """Line minimization algorithm.

324

325 Args:

326 scf: SCF object.

327 Nit: Maximum number of SCF steps.

328

329 Keyword Args:

330 cost: Function that will run every SCF step.

331 grad: Function that calculates the respective gradient.

332 condition: Function to check and log the convergence condition.

333 betat: Step size.

334 **kwargs: Throwaway arguments.

335

336 Returns:

337 Total energies per SCF cycle.

338 """

339 return pclm(scf, Nit, cost, grad, condition, betat, precondition=False)

340

341

342@name("preconditioned conjugate-gradient minimization")

343def pccg(

344 scf,

345 Nit,

346 cost=scf_step,

347 grad=get_grad,

348 condition=check_convergence,

349 betat=3e-5,

350 cgform=1,

351 precondition=True,

352):

353 """Preconditioned conjugate-gradient minimization algorithm.

354

355 Args:

356 scf: SCF object.

357 Nit: Maximum number of SCF steps.

358

359 Keyword Args:

360 cost: Function that will run every SCF step.

361 grad: Function that calculates the respective gradient.

362 condition: Function to check and log the convergence condition.

363 betat: Step size.

364 cgform: Conjugate gradient form.

365 precondition: Whether to use a preconditioner.

366

367 Returns:

368 Total energies per SCF cycle.

369 """

370 atoms = scf.atoms

371 costs = []

372

373 if precondition:

374 method = "pccg"

375 else:

376 method = "cg"

377

378 # Scalars that need to be saved for each spin and k-point

379 linmin = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))

380 cg = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))

381 norm_g = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))

382 # Gradients that need to be saved for each spin and k-point

383 d = [np.empty_like(Wk) for Wk in scf.W]

384 g = [np.empty_like(Wk) for Wk in scf.W]

385 d_old = [np.empty_like(Wk) for Wk in scf.W]

386 g_old = [np.empty_like(Wk) for Wk in scf.W]

387

388 # Do the first step without the linmin and cg tests, and without the cg_method

389 W_tmp = copy.deepcopy(scf.W)

390 for ik in range(atoms.kpts.Nk):

391 for spin in range(atoms.occ.Nspin):

392 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed)

393 if precondition:

394 d[ik][spin] = -atoms.K(g[ik][spin], ik)

395 else:

396 d[ik][spin] = -g[ik][spin]

397 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin]

398

399 # Calculate the optimal step width

400 scf._precompute()

401 for ik in range(atoms.kpts.Nk):

402 for spin in range(atoms.occ.Nspin):

403 gt = grad(scf, ik, spin, scf.W, **scf._precomputed)

404 beta = abs(

405 betat * dotprod(g[ik][spin], d[ik][spin]) / dotprod(g[ik][spin] - gt, d[ik][spin])

406 )

407 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin]

408 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin]

409

410 # Evaluate the cost function

411 c = cost(scf, -1)

412 costs.append(c)

413 condition(scf, method, costs)

414

415 # Start the iteration

416 for i in range(1, Nit):

417 W_tmp = copy.deepcopy(scf.W)

418 for ik in range(atoms.kpts.Nk):

419 for spin in range(atoms.occ.Nspin):

420 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed)

421 # Calculate linmin and cg for each spin and k-point separately if needed

422 if scf._log.level <= logging.DEBUG:

423 linmin[ik][spin] = linmin_test(g[ik][spin], d[ik][spin])

424 cg[ik][spin] = cg_test(atoms, ik, g[ik][spin], g_old[ik][spin], precondition)

425 beta, norm_g[ik][spin] = cg_method(

426 scf, ik, cgform, g[ik][spin], g_old[ik][spin], d_old[ik][spin], precondition

427 )

428 if precondition:

429 d[ik][spin] = -atoms.K(g[ik][spin], ik) + beta * d_old[ik][spin]

430 else:

431 d[ik][spin] = -g[ik][spin] + beta * d_old[ik][spin]

432 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin]

433

434 scf._precompute()

435 for ik in range(atoms.kpts.Nk):

436 for spin in range(atoms.occ.Nspin):

437 gt = grad(scf, ik, spin, scf.W, **scf._precomputed)

438 beta = abs(

439 betat

440 * dotprod(g[ik][spin], d[ik][spin])

441 / dotprod(g[ik][spin] - gt, d[ik][spin])

442 )

443 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin]

444 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin]

445

446 c = cost(scf, i)

447 costs.append(c)

448 if condition(scf, method, costs, linmin, cg, norm_g):

449 break

450 return costs

451

452

453@name("conjugate-gradient minimization")

454def cg(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, cgform=1):

455 """Conjugate-gradient minimization algorithm.

456

457 Args:

458 scf: SCF object.

459 Nit: Maximum number of SCF steps.

460

461 Keyword Args:

462 cost: Function that will run every SCF step.

463 grad: Function that calculates the respective gradient.

464 condition: Function to check and log the convergence condition.

465 betat: Step size.

466 cgform: Conjugate gradient form.

467

468 Returns:

469 Total energies per SCF cycle.

470 """

471 return pccg(scf, Nit, cost, grad, condition, betat, cgform, precondition=False)

472

473

474@name("auto minimization")

475def auto(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, cgform=1): # noqa: C901

476 """Automatic preconditioned conjugate-gradient minimization algorithm.

477

478 This function chooses an sd step over the pccg step if the energy goes up.

479

480 Args:

481 scf: SCF object.

482 Nit: Maximum number of SCF steps.

483

484 Keyword Args:

485 cost: Function that will run every SCF step.

486 grad: Function that calculates the respective gradient.

487 condition: Function to check and log the convergence condition.

488 betat: Step size.

489 cgform: Conjugate gradient form.

490

491 Returns:

492 Total energies per SCF cycle.

493 """

494 atoms = scf.atoms

495 costs = []

496

497 # Scalars that need to be saved for each spin

498 linmin = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))

499 cg = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))

500 norm_g = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))

501 # Gradients that need to be saved for each spin

502 d = [np.empty_like(Wk) for Wk in scf.W]

503 g = [np.empty_like(Wk) for Wk in scf.W]

504 d_old = [np.empty_like(Wk) for Wk in scf.W]

505 g_old = [np.empty_like(Wk) for Wk in scf.W]

506

507 # Do the first step without the linmin and cg tests, and without the cg_method

508 W_tmp = copy.deepcopy(scf.W)

509 for ik in range(atoms.kpts.Nk):

510 for spin in range(atoms.occ.Nspin):

511 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed)

512 d[ik][spin] = -atoms.K(g[ik][spin], ik)

513 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin]

514

515 # Calculate the optimal step width

516 scf._precompute()

517 for ik in range(atoms.kpts.Nk):

518 for spin in range(atoms.occ.Nspin):

519 gt = grad(scf, ik, spin, scf.W, **scf._precomputed)

520 beta = abs(

521 betat * dotprod(g[ik][spin], d[ik][spin]) / dotprod(g[ik][spin] - gt, d[ik][spin])

522 )

523 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin]

524 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin]

525

526 # Evaluate the cost function

527 c = cost(scf, -1)

528 costs.append(c)

529 if condition(scf, "pccg", costs):

530 return costs

531

532 # Start the iteration

533 for i in range(1, Nit):

534 W_tmp = copy.deepcopy(scf.W)

535 for ik in range(atoms.kpts.Nk):

536 for spin in range(atoms.occ.Nspin):

537 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed)

538 # Calculate linmin and cg for each spin separately

539 if scf._log.level <= logging.DEBUG:

540 linmin[ik][spin] = linmin_test(g[ik][spin], d[ik][spin])

541 cg[ik][spin] = cg_test(atoms, ik, g[ik][spin], g_old[ik][spin])

542 beta, norm_g[ik][spin] = cg_method(

543 scf, ik, cgform, g[ik][spin], g_old[ik][spin], d_old[ik][spin]

544 )

545 d[ik][spin] = -atoms.K(g[ik][spin], ik) + beta * d_old[ik][spin]

546 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin]

547

548 scf._precompute()

549 for ik in range(atoms.kpts.Nk):

550 for spin in range(atoms.occ.Nspin):

551 gt = grad(scf, ik, spin, scf.W, **scf._precomputed)

552 beta = abs(

553 betat

554 * dotprod(g[ik][spin], d[ik][spin])

555 / dotprod(g[ik][spin] - gt, d[ik][spin])

556 )

557 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin]

558 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin]

559

560 c = cost(scf, i)

561 # If the energy does not go down use the steepest descent step and recalculate the energy

562 if c > costs[-1]:

563 for ik in range(atoms.kpts.Nk):

564 scf.W[ik] = W_tmp[ik] - betat * g[ik]

565 c = cost(scf, -1)

566 costs.append(c)

567 # Do not print cg and linmin if we do the sd step

568 if condition(scf, "sd", costs, norm_g=norm_g):

569 break

570 else:

571 costs.append(c)

572 if condition(scf, "pccg", costs, linmin, cg, norm_g):

573 break

574 return costs

575

576

577#: Map minimizer names with their respective implementation.

578IMPLEMENTED = {

579 "sd": sd,

580 "lm": lm,

581 "pclm": pclm,

582 "cg": cg,

583 "pccg": pccg,

584 "auto": auto,

585}