Coverage for eminus/minimizer.py: 98.39%

249 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-08 12:59 +0000

1# SPDX-FileCopyrightText: 2021 The eminus developers 

2# SPDX-License-Identifier: Apache-2.0 

3"""Minimization algorithms.""" 

4 

5import copy 

6import logging 

7 

8import numpy as np 

9 

10from .dft import get_epsilon, get_grad 

11from .energies import get_E, get_Eentropy 

12from .logger import name 

13from .utils import dotprod 

14 

15 

16def scf_step(scf, step): 

17 """Perform one SCF step for a DFT calculation. 

18 

19 Calculating intermediate results speeds up the energy and gradient calculation. 

20 This function is similar to H_precompute but will set all variables and energies in the SCF 

21 class and returns the total energy. 

22 

23 Args: 

24 scf: SCF object. 

25 step: Optimization step. 

26 

27 Returns: 

28 Total energy. 

29 """ 

30 scf._precompute() 

31 # Update occupations every smear_update'th cycle 

32 if scf.atoms.occ.smearing > 0 and step % scf.smear_update == 0: 

33 epsilon = get_epsilon(scf, scf.W, **scf._precomputed) 

34 Efermi = scf.atoms.occ.smear(epsilon) 

35 get_Eentropy(scf, epsilon, Efermi) 

36 return get_E(scf) 

37 

38 

39def check_convergence(scf, method, Elist, linmin=None, cg=None, norm_g=None): 

40 """Check the energies for every SCF cycle and handle the output. 

41 

42 Args: 

43 scf: SCF object. 

44 method: Minimization method. 

45 Elist: Total energies per SCF step. 

46 

47 Keyword Args: 

48 linmin: Cosine between previous search direction and current gradient. 

49 cg: Conjugate-gradient orthogonality. 

50 norm_g: Gradient norm. 

51 

52 Returns: 

53 Convergence condition. 

54 """ 

55 iteration = len(Elist) 

56 

57 # Print all the data 

58 print_scf_step(scf, method, Elist, linmin, cg, norm_g) 

59 

60 if iteration > 1: 

61 # Check for convergence 

62 if scf.gradtol is None or norm_g is None: 

63 if abs(Elist[-2] - Elist[-1]) < scf.etol: 

64 scf.is_converged = True 

65 return True 

66 # If a gradient tolerance has been set we also check norm_g for convergence 

67 elif abs(Elist[-2] - Elist[-1]) < scf.etol and (np.sum(norm_g, axis=0) < scf.gradtol).all(): 

68 scf.is_converged = True 

69 return True 

70 # Check if the current energy is higher than the last two values 

71 if (np.asarray(Elist[-3:-1]) < Elist[-1]).all(): 

72 scf._log.warning("Total energy is not decreasing.") 

73 return False 

74 

75 

76def print_scf_step(scf, method, Elist, linmin, cg, norm_g): 

77 """Print the data of one SCF step and the header at the beginning. 

78 

79 Args: 

80 scf: SCF object. 

81 method: Minimization method. 

82 Elist: Total energies per SCF step. 

83 linmin: Cosine between previous search direction and current gradient. 

84 cg: Conjugate-gradient orthogonality. 

85 norm_g: Gradient norm. 

86 """ 

87 iteration = len(Elist) 

88 

89 # Print a column header at the beginning 

90 # The ljust values just have been chosen such that the output looks decent 

91 if iteration == 1: 

92 header = "Method".ljust(8) 

93 header += "Iteration".ljust(11) 

94 header += "Etot [Eh]".ljust(13) 

95 header += "dEtot [Eh]".ljust(13) 

96 # Print the gradient norm for cg methods 

97 if method not in {"sd", "lm", "pclm"}: 

98 header += "|Gradient|".ljust(10 * scf.atoms.occ.Nspin + 3) 

99 # Print extra debugging information if available 

100 if scf._log.level <= logging.DEBUG: 

101 if method != "sd": 

102 header += "linmin-test".ljust(10 * scf.atoms.occ.Nspin + 3) 

103 if method not in {"sd", "lm", "pclm"}: 

104 header += "cg-test".ljust(10 * scf.atoms.occ.Nspin + 3) 

105 scf._log.debug(header) 

106 else: 

107 scf._log.info(header) 

108 

109 # Print the information for every cycle 

110 # Context manager for printing norm_g, linmin, and cg 

111 with np.printoptions(formatter={"float": "{:+0.2e}".format}): 

112 info = f"{method:<8}{iteration:>8} {Elist[-1]:<+13,.6f}" 

113 # In the first step we do not have all information yet 

114 if iteration > 1: 

115 info += f"{Elist[-2] - Elist[-1]:<+13,.4e}" 

116 if norm_g is not None: 

117 info += str(np.sum(norm_g, axis=0)).ljust(10 * scf.atoms.occ.Nspin + 3) 

118 if scf._log.level <= logging.DEBUG: 

119 if method != "sd" and linmin is not None: 

120 info += str(np.sum(linmin, axis=0)).ljust(10 * scf.atoms.occ.Nspin + 3) 

121 if method not in {"sd", "lm", "pclm"} and cg is not None: 

122 info += str(np.sum(cg, axis=0)).ljust(10 * scf.atoms.occ.Nspin + 3) 

123 if scf._log.level <= logging.DEBUG: 

124 scf._log.debug(info) 

125 else: 

126 scf._log.info(info) 

127 

128 

129def linmin_test(g, d): 

130 """Do the line minimization test. 

131 

132 Calculate the cosine of the angle between g and d. 

133 

134 Reference: https://trond.hjorteland.com/thesis/node26.html 

135 

136 Args: 

137 g: Current gradient. 

138 d: Previous search direction. 

139 

140 Returns: 

141 Linmin angle. 

142 """ 

143 # cos = A B / |A| |B| 

144 return dotprod(g, d) / np.sqrt(dotprod(g, g) * dotprod(d, d)) 

145 

146 

147def cg_test(atoms, ik, g, g_old, precondition=True): 

148 """Test the gradient-orthogonality theorem, i.e., g and g_old should be orthogonal. 

149 

150 Calculate the cosine of the angle between g and g_old. For an angle of 90 degree the cosine goes 

151 to zero. 

152 

153 Reference: https://math.uci.edu/~chenlong/CAMtips/CG.html 

154 

155 Args: 

156 atoms: Atoms object. 

157 ik: k-point index. 

158 g: Current gradient. 

159 g_old: Previous gradient. 

160 

161 Keyword Args: 

162 precondition: Whether to use a preconditioner. 

163 

164 Returns: 

165 CG angle. 

166 """ 

167 if precondition: 

168 Kg, Kg_old = atoms.K(g, ik), atoms.K(g_old, ik) 

169 else: 

170 Kg, Kg_old = g, g_old 

171 # cos = A B / |A| |B| 

172 return dotprod(g, Kg_old) / np.sqrt(dotprod(g, Kg) * dotprod(g_old, Kg_old)) 

173 

174 

175def cg_method(scf, ik, cgform, g, g_old, d_old, precondition=True): 

176 """Do different variants of the conjugate gradient method. 

177 

178 Reference: https://indrag49.github.io/Numerical-Optimization/conjugate-gradient-methods-1.html 

179 

180 Args: 

181 scf: SCF object. 

182 ik: k-point index. 

183 cgform: Conjugate gradient form. 

184 g: Current gradient. 

185 g_old: Previous gradient. 

186 d_old: Previous search direction. 

187 

188 Keyword Args: 

189 precondition: Whether to use a preconditioner. 

190 

191 Returns: 

192 Conjugate scalar and gradient norm. 

193 """ 

194 atoms = scf.atoms 

195 

196 if precondition: 

197 Kg, Kg_old = atoms.K(g, ik), atoms.K(g_old, ik) 

198 else: 

199 Kg, Kg_old = g, g_old 

200 norm_g = dotprod(g, Kg) 

201 

202 if cgform == 1: # Fletcher-Reeves 

203 return norm_g / dotprod(g_old, Kg_old), norm_g 

204 if cgform == 2: # Polak-Ribiere 

205 return dotprod(g - g_old, Kg) / dotprod(g_old, Kg_old), norm_g 

206 if cgform == 3: # Hestenes-Stiefel 

207 return dotprod(g - g_old, Kg) / dotprod(g - g_old, d_old), norm_g 

208 if cgform == 4: # Dai-Yuan 

209 return norm_g / dotprod(g - g_old, d_old), norm_g 

210 msg = f'No cgform found for "{cgform}".' 

211 raise ValueError(msg) 

212 

213 

214@name("steepest descent minimization") 

215def sd(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, **kwargs): 

216 """Steepest descent minimization algorithm. 

217 

218 Args: 

219 scf: SCF object. 

220 Nit: Maximum number of SCF steps. 

221 

222 Keyword Args: 

223 cost: Function that will run every SCF step. 

224 grad: Function that calculates the respective gradient. 

225 condition: Function to check and log the convergence condition. 

226 betat: Step size. 

227 **kwargs: Throwaway arguments. 

228 

229 Returns: 

230 Total energies per SCF cycle. 

231 """ 

232 atoms = scf.atoms 

233 costs = [] 

234 

235 for i in range(Nit): 

236 c = cost(scf, i) 

237 costs.append(c) 

238 if condition(scf, "sd", costs): 

239 break 

240 for ik in range(atoms.kpts.Nk): 

241 for spin in range(atoms.occ.Nspin): 

242 g = grad(scf, ik, spin, scf.W, **scf._precomputed) 

243 scf.W[ik][spin] = scf.W[ik][spin] - betat * g 

244 return costs 

245 

246 

247@name("preconditioned line minimization") 

248def pclm( 

249 scf, 

250 Nit, 

251 cost=scf_step, 

252 grad=get_grad, 

253 condition=check_convergence, 

254 betat=3e-5, 

255 precondition=True, 

256 **kwargs, 

257): 

258 """Preconditioned line minimization algorithm. 

259 

260 Args: 

261 scf: SCF object. 

262 Nit: Maximum number of SCF steps. 

263 

264 Keyword Args: 

265 cost: Function that will run every SCF step. 

266 grad: Function that calculates the respective gradient. 

267 condition: Function to check and log the convergence condition. 

268 betat: Step size. 

269 precondition: Whether to use a preconditioner. 

270 **kwargs: Throwaway arguments. 

271 

272 Returns: 

273 Total energies per SCF cycle. 

274 """ 

275 atoms = scf.atoms 

276 costs = [] 

277 

278 if precondition: 

279 method = "pclm" 

280 else: 

281 method = "lm" 

282 

283 # Scalars that need to be saved for each spin 

284 linmin = np.empty((atoms.kpts.Nk, atoms.occ.Nspin)) 

285 # Search direction that needs to be saved for each spin 

286 d = [np.empty_like(Wk) for Wk in scf.W] 

287 g = [np.empty_like(Wk) for Wk in scf.W] 

288 

289 for i in range(Nit): 

290 W_tmp = copy.deepcopy(scf.W) 

291 for ik in range(atoms.kpts.Nk): 

292 for spin in range(atoms.occ.Nspin): 

293 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed) 

294 # Calculate linmin each spin separately 

295 if scf._log.level <= logging.DEBUG and i > 0: 

296 linmin[ik][spin] = linmin_test(g[ik][spin], d[ik][spin]) 

297 if precondition: 

298 d[ik][spin] = -atoms.K(g[ik][spin], ik) 

299 else: 

300 d[ik][spin] = -g[ik][spin] 

301 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin] 

302 

303 scf._precompute() 

304 for ik in range(atoms.kpts.Nk): 

305 for spin in range(atoms.occ.Nspin): 

306 gt = grad(scf, ik, spin, scf.W, **scf._precomputed) 

307 beta = abs( 

308 betat 

309 * dotprod(g[ik][spin], d[ik][spin]) 

310 / dotprod(g[ik][spin] - gt, d[ik][spin]) 

311 ) 

312 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin] 

313 c = cost(scf, i) 

314 costs.append(c) 

315 if condition(scf, method, costs, linmin): 

316 break 

317 return costs 

318 

319 

320@name("line minimization") 

321def lm(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, **kwargs): 

322 """Line minimization algorithm. 

323 

324 Args: 

325 scf: SCF object. 

326 Nit: Maximum number of SCF steps. 

327 

328 Keyword Args: 

329 cost: Function that will run every SCF step. 

330 grad: Function that calculates the respective gradient. 

331 condition: Function to check and log the convergence condition. 

332 betat: Step size. 

333 **kwargs: Throwaway arguments. 

334 

335 Returns: 

336 Total energies per SCF cycle. 

337 """ 

338 return pclm(scf, Nit, cost, grad, condition, betat, precondition=False) 

339 

340 

341@name("preconditioned conjugate-gradient minimization") 

342def pccg( 

343 scf, 

344 Nit, 

345 cost=scf_step, 

346 grad=get_grad, 

347 condition=check_convergence, 

348 betat=3e-5, 

349 cgform=1, 

350 precondition=True, 

351): 

352 """Preconditioned conjugate-gradient minimization algorithm. 

353 

354 Args: 

355 scf: SCF object. 

356 Nit: Maximum number of SCF steps. 

357 

358 Keyword Args: 

359 cost: Function that will run every SCF step. 

360 grad: Function that calculates the respective gradient. 

361 condition: Function to check and log the convergence condition. 

362 betat: Step size. 

363 cgform: Conjugate gradient form. 

364 precondition: Whether to use a preconditioner. 

365 

366 Returns: 

367 Total energies per SCF cycle. 

368 """ 

369 atoms = scf.atoms 

370 costs = [] 

371 

372 if precondition: 

373 method = "pccg" 

374 else: 

375 method = "cg" 

376 

377 # Scalars that need to be saved for each spin and k-point 

378 linmin = np.empty((atoms.kpts.Nk, atoms.occ.Nspin)) 

379 cg = np.empty((atoms.kpts.Nk, atoms.occ.Nspin)) 

380 norm_g = np.empty((atoms.kpts.Nk, atoms.occ.Nspin)) 

381 # Gradients that need to be saved for each spin and k-point 

382 d = [np.empty_like(Wk) for Wk in scf.W] 

383 g = [np.empty_like(Wk) for Wk in scf.W] 

384 d_old = [np.empty_like(Wk) for Wk in scf.W] 

385 g_old = [np.empty_like(Wk) for Wk in scf.W] 

386 

387 # Do the first step without the linmin and cg tests, and without the cg_method 

388 W_tmp = copy.deepcopy(scf.W) 

389 for ik in range(atoms.kpts.Nk): 

390 for spin in range(atoms.occ.Nspin): 

391 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed) 

392 if precondition: 

393 d[ik][spin] = -atoms.K(g[ik][spin], ik) 

394 else: 

395 d[ik][spin] = -g[ik][spin] 

396 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin] 

397 

398 # Calculate the optimal step width 

399 scf._precompute() 

400 for ik in range(atoms.kpts.Nk): 

401 for spin in range(atoms.occ.Nspin): 

402 gt = grad(scf, ik, spin, scf.W, **scf._precomputed) 

403 beta = abs( 

404 betat * dotprod(g[ik][spin], d[ik][spin]) / dotprod(g[ik][spin] - gt, d[ik][spin]) 

405 ) 

406 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin] 

407 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin] 

408 

409 # Evaluate the cost function 

410 c = cost(scf, -1) 

411 costs.append(c) 

412 condition(scf, method, costs) 

413 

414 # Start the iteration 

415 for i in range(1, Nit): 

416 W_tmp = copy.deepcopy(scf.W) 

417 for ik in range(atoms.kpts.Nk): 

418 for spin in range(atoms.occ.Nspin): 

419 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed) 

420 # Calculate linmin and cg for each spin and k-point separately if desired 

421 if scf._log.level <= logging.DEBUG: 

422 linmin[ik][spin] = linmin_test(g[ik][spin], d[ik][spin]) 

423 cg[ik][spin] = cg_test(atoms, ik, g[ik][spin], g_old[ik][spin], precondition) 

424 beta, norm_g[ik][spin] = cg_method( 

425 scf, ik, cgform, g[ik][spin], g_old[ik][spin], d_old[ik][spin], precondition 

426 ) 

427 if precondition: 

428 d[ik][spin] = -atoms.K(g[ik][spin], ik) + beta * d_old[ik][spin] 

429 else: 

430 d[ik][spin] = -g[ik][spin] + beta * d_old[ik][spin] 

431 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin] 

432 

433 scf._precompute() 

434 for ik in range(atoms.kpts.Nk): 

435 for spin in range(atoms.occ.Nspin): 

436 gt = grad(scf, ik, spin, scf.W, **scf._precomputed) 

437 beta = abs( 

438 betat 

439 * dotprod(g[ik][spin], d[ik][spin]) 

440 / dotprod(g[ik][spin] - gt, d[ik][spin]) 

441 ) 

442 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin] 

443 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin] 

444 

445 c = cost(scf, i) 

446 costs.append(c) 

447 if condition(scf, method, costs, linmin, cg, norm_g): 

448 break 

449 return costs 

450 

451 

452@name("conjugate-gradient minimization") 

453def cg(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, cgform=1): 

454 """Conjugate-gradient minimization algorithm. 

455 

456 Args: 

457 scf: SCF object. 

458 Nit: Maximum number of SCF steps. 

459 

460 Keyword Args: 

461 cost: Function that will run every SCF step. 

462 grad: Function that calculates the respective gradient. 

463 condition: Function to check and log the convergence condition. 

464 betat: Step size. 

465 cgform: Conjugate gradient form. 

466 

467 Returns: 

468 Total energies per SCF cycle. 

469 """ 

470 return pccg(scf, Nit, cost, grad, condition, betat, cgform, precondition=False) 

471 

472 

473@name("auto minimization") 

474def auto(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, cgform=1): # noqa: C901 

475 """Automatic preconditioned conjugate-gradient minimization algorithm. 

476 

477 This function chooses an sd step over the pccg step if the energy goes up. 

478 

479 Args: 

480 scf: SCF object. 

481 Nit: Maximum number of SCF steps. 

482 

483 Keyword Args: 

484 cost: Function that will run every SCF step. 

485 grad: Function that calculates the respective gradient. 

486 condition: Function to check and log the convergence condition. 

487 betat: Step size. 

488 cgform: Conjugate gradient form. 

489 

490 Returns: 

491 Total energies per SCF cycle. 

492 """ 

493 atoms = scf.atoms 

494 costs = [] 

495 

496 # Scalars that need to be saved for each spin 

497 linmin = np.empty((atoms.kpts.Nk, atoms.occ.Nspin)) 

498 cg = np.empty((atoms.kpts.Nk, atoms.occ.Nspin)) 

499 norm_g = np.empty((atoms.kpts.Nk, atoms.occ.Nspin)) 

500 # Gradients that need to be saved for each spin 

501 d = [np.empty_like(Wk) for Wk in scf.W] 

502 g = [np.empty_like(Wk) for Wk in scf.W] 

503 d_old = [np.empty_like(Wk) for Wk in scf.W] 

504 g_old = [np.empty_like(Wk) for Wk in scf.W] 

505 

506 # Do the first step without the linmin and cg tests, and without the cg_method 

507 W_tmp = copy.deepcopy(scf.W) 

508 for ik in range(atoms.kpts.Nk): 

509 for spin in range(atoms.occ.Nspin): 

510 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed) 

511 d[ik][spin] = -atoms.K(g[ik][spin], ik) 

512 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin] 

513 

514 # Calculate the optimal step width 

515 scf._precompute() 

516 for ik in range(atoms.kpts.Nk): 

517 for spin in range(atoms.occ.Nspin): 

518 gt = grad(scf, ik, spin, scf.W, **scf._precomputed) 

519 beta = abs( 

520 betat * dotprod(g[ik][spin], d[ik][spin]) / dotprod(g[ik][spin] - gt, d[ik][spin]) 

521 ) 

522 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin] 

523 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin] 

524 

525 # Evaluate the cost function 

526 c = cost(scf, -1) 

527 costs.append(c) 

528 if condition(scf, "pccg", costs): 

529 return costs 

530 

531 # Start the iteration 

532 for i in range(1, Nit): 

533 W_tmp = copy.deepcopy(scf.W) 

534 for ik in range(atoms.kpts.Nk): 

535 for spin in range(atoms.occ.Nspin): 

536 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed) 

537 # Calculate linmin and cg for each spin separately 

538 if scf._log.level <= logging.DEBUG: 

539 linmin[ik][spin] = linmin_test(g[ik][spin], d[ik][spin]) 

540 cg[ik][spin] = cg_test(atoms, ik, g[ik][spin], g_old[ik][spin]) 

541 beta, norm_g[ik][spin] = cg_method( 

542 scf, ik, cgform, g[ik][spin], g_old[ik][spin], d_old[ik][spin] 

543 ) 

544 d[ik][spin] = -atoms.K(g[ik][spin], ik) + beta * d_old[ik][spin] 

545 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin] 

546 

547 scf._precompute() 

548 for ik in range(atoms.kpts.Nk): 

549 for spin in range(atoms.occ.Nspin): 

550 gt = grad(scf, ik, spin, scf.W, **scf._precomputed) 

551 beta = abs( 

552 betat 

553 * dotprod(g[ik][spin], d[ik][spin]) 

554 / dotprod(g[ik][spin] - gt, d[ik][spin]) 

555 ) 

556 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin] 

557 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin] 

558 

559 c = cost(scf, i) 

560 # If the energy does not go down use the steepest descent step and recalculate the energy 

561 if c > costs[-1]: 

562 for ik in range(atoms.kpts.Nk): 

563 scf.W[ik] = W_tmp[ik] - betat * g[ik] 

564 c = cost(scf, -1) 

565 costs.append(c) 

566 # Do not print cg and linmin if we do the sd step 

567 if condition(scf, "sd", costs, norm_g=norm_g): 

568 break 

569 else: 

570 costs.append(c) 

571 if condition(scf, "pccg", costs, linmin, cg, norm_g): 

572 break 

573 return costs 

574 

575 

576#: Map minimizer names with their respective implementation. 

577IMPLEMENTED = { 

578 "sd": sd, 

579 "lm": lm, 

580 "pclm": pclm, 

581 "cg": cg, 

582 "pccg": pccg, 

583 "auto": auto, 

584}