Coverage for eminus/minimizer.py: 98.40%

250 statements  

« prev     ^ index     » next       coverage.py v7.7.0, created at 2025-03-18 09:20 +0000

1# SPDX-FileCopyrightText: 2022 The eminus developers 

2# SPDX-License-Identifier: Apache-2.0 

3"""Minimization algorithms.""" 

4 

5import copy 

6import logging 

7 

8import numpy as np 

9 

10from .dft import get_epsilon, get_grad 

11from .energies import get_E, get_Eentropy 

12from .logger import name 

13from .utils import dotprod 

14 

15 

16def scf_step(scf, step): 

17 """Perform one SCF step for a DFT calculation. 

18 

19 Calculating intermediate results speeds up the energy and gradient calculation. 

20 This function is similar to H_precompute but will set all variables and energies in the SCF 

21 class and returns the total energy. 

22 

23 Args: 

24 scf: SCF object. 

25 step: Optimization step. 

26 

27 Returns: 

28 Total energy. 

29 """ 

30 scf.callback(scf, step) 

31 scf._precompute() 

32 # Update occupations every smear_update'th cycle 

33 if scf.atoms.occ.smearing > 0 and step % scf.smear_update == 0: 

34 epsilon = get_epsilon(scf, scf.W, **scf._precomputed) 

35 Efermi = scf.atoms.occ.smear(epsilon) 

36 get_Eentropy(scf, epsilon, Efermi) 

37 return get_E(scf) 

38 

39 

40def check_convergence(scf, method, Elist, linmin=None, cg=None, norm_g=None): 

41 """Check the energies for every SCF cycle and handle the output. 

42 

43 Args: 

44 scf: SCF object. 

45 method: Minimization method. 

46 Elist: Total energies per SCF step. 

47 

48 Keyword Args: 

49 linmin: Cosine between previous search direction and current gradient. 

50 cg: Conjugate-gradient orthogonality. 

51 norm_g: Gradient norm. 

52 

53 Returns: 

54 Convergence condition. 

55 """ 

56 iteration = len(Elist) 

57 

58 # Print all the data 

59 print_scf_step(scf, method, Elist, linmin, cg, norm_g) 

60 

61 if iteration > 1: 

62 # Check for convergence 

63 if scf.gradtol is None or norm_g is None: 

64 if abs(Elist[-1] - Elist[-2]) < scf.etol: 

65 scf.is_converged = True 

66 return True 

67 # If a gradient tolerance has been set we also check norm_g for convergence 

68 elif abs(Elist[-1] - Elist[-2]) < scf.etol and (np.sum(norm_g, axis=0) < scf.gradtol).all(): 

69 scf.is_converged = True 

70 return True 

71 # Check if the current energy is higher than the last two values 

72 if (np.asarray(Elist[-3:-1]) < Elist[-1]).all(): 

73 scf._log.warning("Total energy is not decreasing.") 

74 return False 

75 

76 

77def print_scf_step(scf, method, Elist, linmin, cg, norm_g): 

78 """Print the data of one SCF step and the header at the beginning. 

79 

80 Args: 

81 scf: SCF object. 

82 method: Minimization method. 

83 Elist: Total energies per SCF step. 

84 linmin: Cosine between previous search direction and current gradient. 

85 cg: Conjugate-gradient orthogonality. 

86 norm_g: Gradient norm. 

87 """ 

88 iteration = len(Elist) 

89 

90 # Print a column header at the beginning 

91 # The ljust values just have been chosen such that the output looks decent 

92 if iteration == 1: 

93 header = "Method".ljust(8) 

94 header += "Iteration".ljust(11) 

95 header += "Etot [Eh]".ljust(13) 

96 header += "dEtot [Eh]".ljust(13) 

97 # Print the gradient norm for cg methods 

98 if method not in {"sd", "lm", "pclm"}: 

99 header += "|Gradient|".ljust(10 * scf.atoms.occ.Nspin + 3) 

100 # Print extra debugging information if available 

101 if scf._log.level <= logging.DEBUG: 

102 if method != "sd": 

103 header += "linmin-test".ljust(10 * scf.atoms.occ.Nspin + 3) 

104 if method not in {"sd", "lm", "pclm"}: 

105 header += "cg-test".ljust(10 * scf.atoms.occ.Nspin + 3) 

106 scf._log.debug(header) 

107 else: 

108 scf._log.info(header) 

109 

110 # Print the information for every cycle 

111 # Context manager for printing norm_g, linmin, and cg 

112 with np.printoptions(formatter={"float": "{:+0.2e}".format}): 

113 info = f"{method:<8}{iteration:>8} {Elist[-1]:<+13,.6f}" 

114 # In the first step we do not have all information yet 

115 if iteration > 1: 

116 info += f"{Elist[-1] - Elist[-2]:<+13,.4e}" 

117 if norm_g is not None: 

118 info += str(np.sum(norm_g, axis=0)).ljust(10 * scf.atoms.occ.Nspin + 3) 

119 if scf._log.level <= logging.DEBUG: 

120 if method != "sd" and linmin is not None: 

121 info += str(np.sum(linmin, axis=0)).ljust(10 * scf.atoms.occ.Nspin + 3) 

122 if method not in {"sd", "lm", "pclm"} and cg is not None: 

123 info += str(np.sum(cg, axis=0)).ljust(10 * scf.atoms.occ.Nspin + 3) 

124 if scf._log.level <= logging.DEBUG: 

125 scf._log.debug(info) 

126 else: 

127 scf._log.info(info) 

128 

129 

130def linmin_test(g, d): 

131 """Do the line minimization test. 

132 

133 Calculate the cosine of the angle between g and d. 

134 

135 Reference: https://trond.hjorteland.com/thesis/node26.html 

136 

137 Args: 

138 g: Current gradient. 

139 d: Previous search direction. 

140 

141 Returns: 

142 Linmin angle. 

143 """ 

144 # cos = A B / |A| |B| 

145 return dotprod(g, d) / np.sqrt(dotprod(g, g) * dotprod(d, d)) 

146 

147 

148def cg_test(atoms, ik, g, g_old, precondition=True): 

149 """Test the gradient-orthogonality theorem, i.e., g and g_old should be orthogonal. 

150 

151 Calculate the cosine of the angle between g and g_old. For an angle of 90 degree the cosine goes 

152 to zero. 

153 

154 Reference: https://math.uci.edu/~chenlong/CAMtips/CG.html 

155 

156 Args: 

157 atoms: Atoms object. 

158 ik: k-point index. 

159 g: Current gradient. 

160 g_old: Previous gradient. 

161 

162 Keyword Args: 

163 precondition: Whether to use a preconditioner. 

164 

165 Returns: 

166 CG angle. 

167 """ 

168 if precondition: 

169 Kg, Kg_old = atoms.K(g, ik), atoms.K(g_old, ik) 

170 else: 

171 Kg, Kg_old = g, g_old 

172 # cos = A B / |A| |B| 

173 return dotprod(g, Kg_old) / np.sqrt(dotprod(g, Kg) * dotprod(g_old, Kg_old)) 

174 

175 

176def cg_method(scf, ik, cgform, g, g_old, d_old, precondition=True): 

177 """Do different variants of the conjugate gradient method. 

178 

179 Reference: https://indrag49.github.io/Numerical-Optimization/conjugate-gradient-methods-1.html 

180 

181 Args: 

182 scf: SCF object. 

183 ik: k-point index. 

184 cgform: Conjugate gradient form. 

185 g: Current gradient. 

186 g_old: Previous gradient. 

187 d_old: Previous search direction. 

188 

189 Keyword Args: 

190 precondition: Whether to use a preconditioner. 

191 

192 Returns: 

193 Conjugate scalar and gradient norm. 

194 """ 

195 atoms = scf.atoms 

196 

197 if precondition: 

198 Kg, Kg_old = atoms.K(g, ik), atoms.K(g_old, ik) 

199 else: 

200 Kg, Kg_old = g, g_old 

201 norm_g = dotprod(g, Kg) 

202 

203 if cgform == 1: # Fletcher-Reeves 

204 return norm_g / dotprod(g_old, Kg_old), norm_g 

205 if cgform == 2: # Polak-Ribiere 

206 return dotprod(g - g_old, Kg) / dotprod(g_old, Kg_old), norm_g 

207 if cgform == 3: # Hestenes-Stiefel 

208 return dotprod(g - g_old, Kg) / dotprod(g - g_old, d_old), norm_g 

209 if cgform == 4: # Dai-Yuan 

210 return norm_g / dotprod(g - g_old, d_old), norm_g 

211 msg = f'No cgform found for "{cgform}".' 

212 raise ValueError(msg) 

213 

214 

215@name("steepest descent minimization") 

216def sd(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, **kwargs): 

217 """Steepest descent minimization algorithm. 

218 

219 Args: 

220 scf: SCF object. 

221 Nit: Maximum number of SCF steps. 

222 

223 Keyword Args: 

224 cost: Function that will run every SCF step. 

225 grad: Function that calculates the respective gradient. 

226 condition: Function to check and log the convergence condition. 

227 betat: Step size. 

228 **kwargs: Throwaway arguments. 

229 

230 Returns: 

231 Total energies per SCF cycle. 

232 """ 

233 atoms = scf.atoms 

234 costs = [] 

235 

236 for i in range(Nit): 

237 c = cost(scf, i) 

238 costs.append(c) 

239 if condition(scf, "sd", costs): 

240 break 

241 for ik in range(atoms.kpts.Nk): 

242 for spin in range(atoms.occ.Nspin): 

243 g = grad(scf, ik, spin, scf.W, **scf._precomputed) 

244 scf.W[ik][spin] = scf.W[ik][spin] - betat * g 

245 return costs 

246 

247 

248@name("preconditioned line minimization") 

249def pclm( 

250 scf, 

251 Nit, 

252 cost=scf_step, 

253 grad=get_grad, 

254 condition=check_convergence, 

255 betat=3e-5, 

256 precondition=True, 

257 **kwargs, 

258): 

259 """Preconditioned line minimization algorithm. 

260 

261 Args: 

262 scf: SCF object. 

263 Nit: Maximum number of SCF steps. 

264 

265 Keyword Args: 

266 cost: Function that will run every SCF step. 

267 grad: Function that calculates the respective gradient. 

268 condition: Function to check and log the convergence condition. 

269 betat: Step size. 

270 precondition: Whether to use a preconditioner. 

271 **kwargs: Throwaway arguments. 

272 

273 Returns: 

274 Total energies per SCF cycle. 

275 """ 

276 atoms = scf.atoms 

277 costs = [] 

278 

279 if precondition: 

280 method = "pclm" 

281 else: 

282 method = "lm" 

283 

284 # Scalars that need to be saved for each spin 

285 linmin = np.empty((atoms.kpts.Nk, atoms.occ.Nspin)) 

286 # Search direction that needs to be saved for each spin 

287 d = [np.empty_like(Wk) for Wk in scf.W] 

288 g = [np.empty_like(Wk) for Wk in scf.W] 

289 

290 for i in range(Nit): 

291 W_tmp = copy.deepcopy(scf.W) 

292 for ik in range(atoms.kpts.Nk): 

293 for spin in range(atoms.occ.Nspin): 

294 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed) 

295 # Calculate linmin each spin separately 

296 if scf._log.level <= logging.DEBUG and i > 0: 

297 linmin[ik][spin] = linmin_test(g[ik][spin], d[ik][spin]) 

298 if precondition: 

299 d[ik][spin] = -atoms.K(g[ik][spin], ik) 

300 else: 

301 d[ik][spin] = -g[ik][spin] 

302 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin] 

303 

304 scf._precompute() 

305 for ik in range(atoms.kpts.Nk): 

306 for spin in range(atoms.occ.Nspin): 

307 gt = grad(scf, ik, spin, scf.W, **scf._precomputed) 

308 beta = abs( 

309 betat 

310 * dotprod(g[ik][spin], d[ik][spin]) 

311 / dotprod(g[ik][spin] - gt, d[ik][spin]) 

312 ) 

313 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin] 

314 c = cost(scf, i) 

315 costs.append(c) 

316 if condition(scf, method, costs, linmin): 

317 break 

318 return costs 

319 

320 

321@name("line minimization") 

322def lm(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, **kwargs): 

323 """Line minimization algorithm. 

324 

325 Args: 

326 scf: SCF object. 

327 Nit: Maximum number of SCF steps. 

328 

329 Keyword Args: 

330 cost: Function that will run every SCF step. 

331 grad: Function that calculates the respective gradient. 

332 condition: Function to check and log the convergence condition. 

333 betat: Step size. 

334 **kwargs: Throwaway arguments. 

335 

336 Returns: 

337 Total energies per SCF cycle. 

338 """ 

339 return pclm(scf, Nit, cost, grad, condition, betat, precondition=False) 

340 

341 

342@name("preconditioned conjugate-gradient minimization") 

343def pccg( 

344 scf, 

345 Nit, 

346 cost=scf_step, 

347 grad=get_grad, 

348 condition=check_convergence, 

349 betat=3e-5, 

350 cgform=1, 

351 precondition=True, 

352): 

353 """Preconditioned conjugate-gradient minimization algorithm. 

354 

355 Args: 

356 scf: SCF object. 

357 Nit: Maximum number of SCF steps. 

358 

359 Keyword Args: 

360 cost: Function that will run every SCF step. 

361 grad: Function that calculates the respective gradient. 

362 condition: Function to check and log the convergence condition. 

363 betat: Step size. 

364 cgform: Conjugate gradient form. 

365 precondition: Whether to use a preconditioner. 

366 

367 Returns: 

368 Total energies per SCF cycle. 

369 """ 

370 atoms = scf.atoms 

371 costs = [] 

372 

373 if precondition: 

374 method = "pccg" 

375 else: 

376 method = "cg" 

377 

378 # Scalars that need to be saved for each spin and k-point 

379 linmin = np.empty((atoms.kpts.Nk, atoms.occ.Nspin)) 

380 cg = np.empty((atoms.kpts.Nk, atoms.occ.Nspin)) 

381 norm_g = np.empty((atoms.kpts.Nk, atoms.occ.Nspin)) 

382 # Gradients that need to be saved for each spin and k-point 

383 d = [np.empty_like(Wk) for Wk in scf.W] 

384 g = [np.empty_like(Wk) for Wk in scf.W] 

385 d_old = [np.empty_like(Wk) for Wk in scf.W] 

386 g_old = [np.empty_like(Wk) for Wk in scf.W] 

387 

388 # Do the first step without the linmin and cg tests, and without the cg_method 

389 W_tmp = copy.deepcopy(scf.W) 

390 for ik in range(atoms.kpts.Nk): 

391 for spin in range(atoms.occ.Nspin): 

392 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed) 

393 if precondition: 

394 d[ik][spin] = -atoms.K(g[ik][spin], ik) 

395 else: 

396 d[ik][spin] = -g[ik][spin] 

397 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin] 

398 

399 # Calculate the optimal step width 

400 scf._precompute() 

401 for ik in range(atoms.kpts.Nk): 

402 for spin in range(atoms.occ.Nspin): 

403 gt = grad(scf, ik, spin, scf.W, **scf._precomputed) 

404 beta = abs( 

405 betat * dotprod(g[ik][spin], d[ik][spin]) / dotprod(g[ik][spin] - gt, d[ik][spin]) 

406 ) 

407 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin] 

408 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin] 

409 

410 # Evaluate the cost function 

411 c = cost(scf, -1) 

412 costs.append(c) 

413 condition(scf, method, costs) 

414 

415 # Start the iteration 

416 for i in range(1, Nit): 

417 W_tmp = copy.deepcopy(scf.W) 

418 for ik in range(atoms.kpts.Nk): 

419 for spin in range(atoms.occ.Nspin): 

420 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed) 

421 # Calculate linmin and cg for each spin and k-point separately if needed 

422 if scf._log.level <= logging.DEBUG: 

423 linmin[ik][spin] = linmin_test(g[ik][spin], d[ik][spin]) 

424 cg[ik][spin] = cg_test(atoms, ik, g[ik][spin], g_old[ik][spin], precondition) 

425 beta, norm_g[ik][spin] = cg_method( 

426 scf, ik, cgform, g[ik][spin], g_old[ik][spin], d_old[ik][spin], precondition 

427 ) 

428 if precondition: 

429 d[ik][spin] = -atoms.K(g[ik][spin], ik) + beta * d_old[ik][spin] 

430 else: 

431 d[ik][spin] = -g[ik][spin] + beta * d_old[ik][spin] 

432 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin] 

433 

434 scf._precompute() 

435 for ik in range(atoms.kpts.Nk): 

436 for spin in range(atoms.occ.Nspin): 

437 gt = grad(scf, ik, spin, scf.W, **scf._precomputed) 

438 beta = abs( 

439 betat 

440 * dotprod(g[ik][spin], d[ik][spin]) 

441 / dotprod(g[ik][spin] - gt, d[ik][spin]) 

442 ) 

443 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin] 

444 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin] 

445 

446 c = cost(scf, i) 

447 costs.append(c) 

448 if condition(scf, method, costs, linmin, cg, norm_g): 

449 break 

450 return costs 

451 

452 

453@name("conjugate-gradient minimization") 

454def cg(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, cgform=1): 

455 """Conjugate-gradient minimization algorithm. 

456 

457 Args: 

458 scf: SCF object. 

459 Nit: Maximum number of SCF steps. 

460 

461 Keyword Args: 

462 cost: Function that will run every SCF step. 

463 grad: Function that calculates the respective gradient. 

464 condition: Function to check and log the convergence condition. 

465 betat: Step size. 

466 cgform: Conjugate gradient form. 

467 

468 Returns: 

469 Total energies per SCF cycle. 

470 """ 

471 return pccg(scf, Nit, cost, grad, condition, betat, cgform, precondition=False) 

472 

473 

474@name("auto minimization") 

475def auto(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, cgform=1): # noqa: C901 

476 """Automatic preconditioned conjugate-gradient minimization algorithm. 

477 

478 This function chooses an sd step over the pccg step if the energy goes up. 

479 

480 Args: 

481 scf: SCF object. 

482 Nit: Maximum number of SCF steps. 

483 

484 Keyword Args: 

485 cost: Function that will run every SCF step. 

486 grad: Function that calculates the respective gradient. 

487 condition: Function to check and log the convergence condition. 

488 betat: Step size. 

489 cgform: Conjugate gradient form. 

490 

491 Returns: 

492 Total energies per SCF cycle. 

493 """ 

494 atoms = scf.atoms 

495 costs = [] 

496 

497 # Scalars that need to be saved for each spin 

498 linmin = np.empty((atoms.kpts.Nk, atoms.occ.Nspin)) 

499 cg = np.empty((atoms.kpts.Nk, atoms.occ.Nspin)) 

500 norm_g = np.empty((atoms.kpts.Nk, atoms.occ.Nspin)) 

501 # Gradients that need to be saved for each spin 

502 d = [np.empty_like(Wk) for Wk in scf.W] 

503 g = [np.empty_like(Wk) for Wk in scf.W] 

504 d_old = [np.empty_like(Wk) for Wk in scf.W] 

505 g_old = [np.empty_like(Wk) for Wk in scf.W] 

506 

507 # Do the first step without the linmin and cg tests, and without the cg_method 

508 W_tmp = copy.deepcopy(scf.W) 

509 for ik in range(atoms.kpts.Nk): 

510 for spin in range(atoms.occ.Nspin): 

511 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed) 

512 d[ik][spin] = -atoms.K(g[ik][spin], ik) 

513 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin] 

514 

515 # Calculate the optimal step width 

516 scf._precompute() 

517 for ik in range(atoms.kpts.Nk): 

518 for spin in range(atoms.occ.Nspin): 

519 gt = grad(scf, ik, spin, scf.W, **scf._precomputed) 

520 beta = abs( 

521 betat * dotprod(g[ik][spin], d[ik][spin]) / dotprod(g[ik][spin] - gt, d[ik][spin]) 

522 ) 

523 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin] 

524 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin] 

525 

526 # Evaluate the cost function 

527 c = cost(scf, -1) 

528 costs.append(c) 

529 if condition(scf, "pccg", costs): 

530 return costs 

531 

532 # Start the iteration 

533 for i in range(1, Nit): 

534 W_tmp = copy.deepcopy(scf.W) 

535 for ik in range(atoms.kpts.Nk): 

536 for spin in range(atoms.occ.Nspin): 

537 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed) 

538 # Calculate linmin and cg for each spin separately 

539 if scf._log.level <= logging.DEBUG: 

540 linmin[ik][spin] = linmin_test(g[ik][spin], d[ik][spin]) 

541 cg[ik][spin] = cg_test(atoms, ik, g[ik][spin], g_old[ik][spin]) 

542 beta, norm_g[ik][spin] = cg_method( 

543 scf, ik, cgform, g[ik][spin], g_old[ik][spin], d_old[ik][spin] 

544 ) 

545 d[ik][spin] = -atoms.K(g[ik][spin], ik) + beta * d_old[ik][spin] 

546 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin] 

547 

548 scf._precompute() 

549 for ik in range(atoms.kpts.Nk): 

550 for spin in range(atoms.occ.Nspin): 

551 gt = grad(scf, ik, spin, scf.W, **scf._precomputed) 

552 beta = abs( 

553 betat 

554 * dotprod(g[ik][spin], d[ik][spin]) 

555 / dotprod(g[ik][spin] - gt, d[ik][spin]) 

556 ) 

557 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin] 

558 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin] 

559 

560 c = cost(scf, i) 

561 # If the energy does not go down use the steepest descent step and recalculate the energy 

562 if c > costs[-1]: 

563 for ik in range(atoms.kpts.Nk): 

564 scf.W[ik] = W_tmp[ik] - betat * g[ik] 

565 c = cost(scf, -1) 

566 costs.append(c) 

567 # Do not print cg and linmin if we do the sd step 

568 if condition(scf, "sd", costs, norm_g=norm_g): 

569 break 

570 else: 

571 costs.append(c) 

572 if condition(scf, "pccg", costs, linmin, cg, norm_g): 

573 break 

574 return costs 

575 

576 

577#: Map minimizer names with their respective implementation. 

578IMPLEMENTED = { 

579 "sd": sd, 

580 "lm": lm, 

581 "pclm": pclm, 

582 "cg": cg, 

583 "pccg": pccg, 

584 "auto": auto, 

585}