Coverage for eminus/minimizer.py: 98.40%
250 statements
« prev ^ index » next coverage.py v7.7.0, created at 2025-03-18 09:20 +0000
« prev ^ index » next coverage.py v7.7.0, created at 2025-03-18 09:20 +0000
1# SPDX-FileCopyrightText: 2022 The eminus developers
2# SPDX-License-Identifier: Apache-2.0
3"""Minimization algorithms."""
5import copy
6import logging
8import numpy as np
10from .dft import get_epsilon, get_grad
11from .energies import get_E, get_Eentropy
12from .logger import name
13from .utils import dotprod
16def scf_step(scf, step):
17 """Perform one SCF step for a DFT calculation.
19 Calculating intermediate results speeds up the energy and gradient calculation.
20 This function is similar to H_precompute but will set all variables and energies in the SCF
21 class and returns the total energy.
23 Args:
24 scf: SCF object.
25 step: Optimization step.
27 Returns:
28 Total energy.
29 """
30 scf.callback(scf, step)
31 scf._precompute()
32 # Update occupations every smear_update'th cycle
33 if scf.atoms.occ.smearing > 0 and step % scf.smear_update == 0:
34 epsilon = get_epsilon(scf, scf.W, **scf._precomputed)
35 Efermi = scf.atoms.occ.smear(epsilon)
36 get_Eentropy(scf, epsilon, Efermi)
37 return get_E(scf)
40def check_convergence(scf, method, Elist, linmin=None, cg=None, norm_g=None):
41 """Check the energies for every SCF cycle and handle the output.
43 Args:
44 scf: SCF object.
45 method: Minimization method.
46 Elist: Total energies per SCF step.
48 Keyword Args:
49 linmin: Cosine between previous search direction and current gradient.
50 cg: Conjugate-gradient orthogonality.
51 norm_g: Gradient norm.
53 Returns:
54 Convergence condition.
55 """
56 iteration = len(Elist)
58 # Print all the data
59 print_scf_step(scf, method, Elist, linmin, cg, norm_g)
61 if iteration > 1:
62 # Check for convergence
63 if scf.gradtol is None or norm_g is None:
64 if abs(Elist[-1] - Elist[-2]) < scf.etol:
65 scf.is_converged = True
66 return True
67 # If a gradient tolerance has been set we also check norm_g for convergence
68 elif abs(Elist[-1] - Elist[-2]) < scf.etol and (np.sum(norm_g, axis=0) < scf.gradtol).all():
69 scf.is_converged = True
70 return True
71 # Check if the current energy is higher than the last two values
72 if (np.asarray(Elist[-3:-1]) < Elist[-1]).all():
73 scf._log.warning("Total energy is not decreasing.")
74 return False
77def print_scf_step(scf, method, Elist, linmin, cg, norm_g):
78 """Print the data of one SCF step and the header at the beginning.
80 Args:
81 scf: SCF object.
82 method: Minimization method.
83 Elist: Total energies per SCF step.
84 linmin: Cosine between previous search direction and current gradient.
85 cg: Conjugate-gradient orthogonality.
86 norm_g: Gradient norm.
87 """
88 iteration = len(Elist)
90 # Print a column header at the beginning
91 # The ljust values just have been chosen such that the output looks decent
92 if iteration == 1:
93 header = "Method".ljust(8)
94 header += "Iteration".ljust(11)
95 header += "Etot [Eh]".ljust(13)
96 header += "dEtot [Eh]".ljust(13)
97 # Print the gradient norm for cg methods
98 if method not in {"sd", "lm", "pclm"}:
99 header += "|Gradient|".ljust(10 * scf.atoms.occ.Nspin + 3)
100 # Print extra debugging information if available
101 if scf._log.level <= logging.DEBUG:
102 if method != "sd":
103 header += "linmin-test".ljust(10 * scf.atoms.occ.Nspin + 3)
104 if method not in {"sd", "lm", "pclm"}:
105 header += "cg-test".ljust(10 * scf.atoms.occ.Nspin + 3)
106 scf._log.debug(header)
107 else:
108 scf._log.info(header)
110 # Print the information for every cycle
111 # Context manager for printing norm_g, linmin, and cg
112 with np.printoptions(formatter={"float": "{:+0.2e}".format}):
113 info = f"{method:<8}{iteration:>8} {Elist[-1]:<+13,.6f}"
114 # In the first step we do not have all information yet
115 if iteration > 1:
116 info += f"{Elist[-1] - Elist[-2]:<+13,.4e}"
117 if norm_g is not None:
118 info += str(np.sum(norm_g, axis=0)).ljust(10 * scf.atoms.occ.Nspin + 3)
119 if scf._log.level <= logging.DEBUG:
120 if method != "sd" and linmin is not None:
121 info += str(np.sum(linmin, axis=0)).ljust(10 * scf.atoms.occ.Nspin + 3)
122 if method not in {"sd", "lm", "pclm"} and cg is not None:
123 info += str(np.sum(cg, axis=0)).ljust(10 * scf.atoms.occ.Nspin + 3)
124 if scf._log.level <= logging.DEBUG:
125 scf._log.debug(info)
126 else:
127 scf._log.info(info)
130def linmin_test(g, d):
131 """Do the line minimization test.
133 Calculate the cosine of the angle between g and d.
135 Reference: https://trond.hjorteland.com/thesis/node26.html
137 Args:
138 g: Current gradient.
139 d: Previous search direction.
141 Returns:
142 Linmin angle.
143 """
144 # cos = A B / |A| |B|
145 return dotprod(g, d) / np.sqrt(dotprod(g, g) * dotprod(d, d))
148def cg_test(atoms, ik, g, g_old, precondition=True):
149 """Test the gradient-orthogonality theorem, i.e., g and g_old should be orthogonal.
151 Calculate the cosine of the angle between g and g_old. For an angle of 90 degree the cosine goes
152 to zero.
154 Reference: https://math.uci.edu/~chenlong/CAMtips/CG.html
156 Args:
157 atoms: Atoms object.
158 ik: k-point index.
159 g: Current gradient.
160 g_old: Previous gradient.
162 Keyword Args:
163 precondition: Whether to use a preconditioner.
165 Returns:
166 CG angle.
167 """
168 if precondition:
169 Kg, Kg_old = atoms.K(g, ik), atoms.K(g_old, ik)
170 else:
171 Kg, Kg_old = g, g_old
172 # cos = A B / |A| |B|
173 return dotprod(g, Kg_old) / np.sqrt(dotprod(g, Kg) * dotprod(g_old, Kg_old))
176def cg_method(scf, ik, cgform, g, g_old, d_old, precondition=True):
177 """Do different variants of the conjugate gradient method.
179 Reference: https://indrag49.github.io/Numerical-Optimization/conjugate-gradient-methods-1.html
181 Args:
182 scf: SCF object.
183 ik: k-point index.
184 cgform: Conjugate gradient form.
185 g: Current gradient.
186 g_old: Previous gradient.
187 d_old: Previous search direction.
189 Keyword Args:
190 precondition: Whether to use a preconditioner.
192 Returns:
193 Conjugate scalar and gradient norm.
194 """
195 atoms = scf.atoms
197 if precondition:
198 Kg, Kg_old = atoms.K(g, ik), atoms.K(g_old, ik)
199 else:
200 Kg, Kg_old = g, g_old
201 norm_g = dotprod(g, Kg)
203 if cgform == 1: # Fletcher-Reeves
204 return norm_g / dotprod(g_old, Kg_old), norm_g
205 if cgform == 2: # Polak-Ribiere
206 return dotprod(g - g_old, Kg) / dotprod(g_old, Kg_old), norm_g
207 if cgform == 3: # Hestenes-Stiefel
208 return dotprod(g - g_old, Kg) / dotprod(g - g_old, d_old), norm_g
209 if cgform == 4: # Dai-Yuan
210 return norm_g / dotprod(g - g_old, d_old), norm_g
211 msg = f'No cgform found for "{cgform}".'
212 raise ValueError(msg)
215@name("steepest descent minimization")
216def sd(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, **kwargs):
217 """Steepest descent minimization algorithm.
219 Args:
220 scf: SCF object.
221 Nit: Maximum number of SCF steps.
223 Keyword Args:
224 cost: Function that will run every SCF step.
225 grad: Function that calculates the respective gradient.
226 condition: Function to check and log the convergence condition.
227 betat: Step size.
228 **kwargs: Throwaway arguments.
230 Returns:
231 Total energies per SCF cycle.
232 """
233 atoms = scf.atoms
234 costs = []
236 for i in range(Nit):
237 c = cost(scf, i)
238 costs.append(c)
239 if condition(scf, "sd", costs):
240 break
241 for ik in range(atoms.kpts.Nk):
242 for spin in range(atoms.occ.Nspin):
243 g = grad(scf, ik, spin, scf.W, **scf._precomputed)
244 scf.W[ik][spin] = scf.W[ik][spin] - betat * g
245 return costs
248@name("preconditioned line minimization")
249def pclm(
250 scf,
251 Nit,
252 cost=scf_step,
253 grad=get_grad,
254 condition=check_convergence,
255 betat=3e-5,
256 precondition=True,
257 **kwargs,
258):
259 """Preconditioned line minimization algorithm.
261 Args:
262 scf: SCF object.
263 Nit: Maximum number of SCF steps.
265 Keyword Args:
266 cost: Function that will run every SCF step.
267 grad: Function that calculates the respective gradient.
268 condition: Function to check and log the convergence condition.
269 betat: Step size.
270 precondition: Whether to use a preconditioner.
271 **kwargs: Throwaway arguments.
273 Returns:
274 Total energies per SCF cycle.
275 """
276 atoms = scf.atoms
277 costs = []
279 if precondition:
280 method = "pclm"
281 else:
282 method = "lm"
284 # Scalars that need to be saved for each spin
285 linmin = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))
286 # Search direction that needs to be saved for each spin
287 d = [np.empty_like(Wk) for Wk in scf.W]
288 g = [np.empty_like(Wk) for Wk in scf.W]
290 for i in range(Nit):
291 W_tmp = copy.deepcopy(scf.W)
292 for ik in range(atoms.kpts.Nk):
293 for spin in range(atoms.occ.Nspin):
294 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed)
295 # Calculate linmin each spin separately
296 if scf._log.level <= logging.DEBUG and i > 0:
297 linmin[ik][spin] = linmin_test(g[ik][spin], d[ik][spin])
298 if precondition:
299 d[ik][spin] = -atoms.K(g[ik][spin], ik)
300 else:
301 d[ik][spin] = -g[ik][spin]
302 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin]
304 scf._precompute()
305 for ik in range(atoms.kpts.Nk):
306 for spin in range(atoms.occ.Nspin):
307 gt = grad(scf, ik, spin, scf.W, **scf._precomputed)
308 beta = abs(
309 betat
310 * dotprod(g[ik][spin], d[ik][spin])
311 / dotprod(g[ik][spin] - gt, d[ik][spin])
312 )
313 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin]
314 c = cost(scf, i)
315 costs.append(c)
316 if condition(scf, method, costs, linmin):
317 break
318 return costs
321@name("line minimization")
322def lm(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, **kwargs):
323 """Line minimization algorithm.
325 Args:
326 scf: SCF object.
327 Nit: Maximum number of SCF steps.
329 Keyword Args:
330 cost: Function that will run every SCF step.
331 grad: Function that calculates the respective gradient.
332 condition: Function to check and log the convergence condition.
333 betat: Step size.
334 **kwargs: Throwaway arguments.
336 Returns:
337 Total energies per SCF cycle.
338 """
339 return pclm(scf, Nit, cost, grad, condition, betat, precondition=False)
342@name("preconditioned conjugate-gradient minimization")
343def pccg(
344 scf,
345 Nit,
346 cost=scf_step,
347 grad=get_grad,
348 condition=check_convergence,
349 betat=3e-5,
350 cgform=1,
351 precondition=True,
352):
353 """Preconditioned conjugate-gradient minimization algorithm.
355 Args:
356 scf: SCF object.
357 Nit: Maximum number of SCF steps.
359 Keyword Args:
360 cost: Function that will run every SCF step.
361 grad: Function that calculates the respective gradient.
362 condition: Function to check and log the convergence condition.
363 betat: Step size.
364 cgform: Conjugate gradient form.
365 precondition: Whether to use a preconditioner.
367 Returns:
368 Total energies per SCF cycle.
369 """
370 atoms = scf.atoms
371 costs = []
373 if precondition:
374 method = "pccg"
375 else:
376 method = "cg"
378 # Scalars that need to be saved for each spin and k-point
379 linmin = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))
380 cg = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))
381 norm_g = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))
382 # Gradients that need to be saved for each spin and k-point
383 d = [np.empty_like(Wk) for Wk in scf.W]
384 g = [np.empty_like(Wk) for Wk in scf.W]
385 d_old = [np.empty_like(Wk) for Wk in scf.W]
386 g_old = [np.empty_like(Wk) for Wk in scf.W]
388 # Do the first step without the linmin and cg tests, and without the cg_method
389 W_tmp = copy.deepcopy(scf.W)
390 for ik in range(atoms.kpts.Nk):
391 for spin in range(atoms.occ.Nspin):
392 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed)
393 if precondition:
394 d[ik][spin] = -atoms.K(g[ik][spin], ik)
395 else:
396 d[ik][spin] = -g[ik][spin]
397 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin]
399 # Calculate the optimal step width
400 scf._precompute()
401 for ik in range(atoms.kpts.Nk):
402 for spin in range(atoms.occ.Nspin):
403 gt = grad(scf, ik, spin, scf.W, **scf._precomputed)
404 beta = abs(
405 betat * dotprod(g[ik][spin], d[ik][spin]) / dotprod(g[ik][spin] - gt, d[ik][spin])
406 )
407 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin]
408 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin]
410 # Evaluate the cost function
411 c = cost(scf, -1)
412 costs.append(c)
413 condition(scf, method, costs)
415 # Start the iteration
416 for i in range(1, Nit):
417 W_tmp = copy.deepcopy(scf.W)
418 for ik in range(atoms.kpts.Nk):
419 for spin in range(atoms.occ.Nspin):
420 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed)
421 # Calculate linmin and cg for each spin and k-point separately if needed
422 if scf._log.level <= logging.DEBUG:
423 linmin[ik][spin] = linmin_test(g[ik][spin], d[ik][spin])
424 cg[ik][spin] = cg_test(atoms, ik, g[ik][spin], g_old[ik][spin], precondition)
425 beta, norm_g[ik][spin] = cg_method(
426 scf, ik, cgform, g[ik][spin], g_old[ik][spin], d_old[ik][spin], precondition
427 )
428 if precondition:
429 d[ik][spin] = -atoms.K(g[ik][spin], ik) + beta * d_old[ik][spin]
430 else:
431 d[ik][spin] = -g[ik][spin] + beta * d_old[ik][spin]
432 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin]
434 scf._precompute()
435 for ik in range(atoms.kpts.Nk):
436 for spin in range(atoms.occ.Nspin):
437 gt = grad(scf, ik, spin, scf.W, **scf._precomputed)
438 beta = abs(
439 betat
440 * dotprod(g[ik][spin], d[ik][spin])
441 / dotprod(g[ik][spin] - gt, d[ik][spin])
442 )
443 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin]
444 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin]
446 c = cost(scf, i)
447 costs.append(c)
448 if condition(scf, method, costs, linmin, cg, norm_g):
449 break
450 return costs
453@name("conjugate-gradient minimization")
454def cg(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, cgform=1):
455 """Conjugate-gradient minimization algorithm.
457 Args:
458 scf: SCF object.
459 Nit: Maximum number of SCF steps.
461 Keyword Args:
462 cost: Function that will run every SCF step.
463 grad: Function that calculates the respective gradient.
464 condition: Function to check and log the convergence condition.
465 betat: Step size.
466 cgform: Conjugate gradient form.
468 Returns:
469 Total energies per SCF cycle.
470 """
471 return pccg(scf, Nit, cost, grad, condition, betat, cgform, precondition=False)
474@name("auto minimization")
475def auto(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, cgform=1): # noqa: C901
476 """Automatic preconditioned conjugate-gradient minimization algorithm.
478 This function chooses an sd step over the pccg step if the energy goes up.
480 Args:
481 scf: SCF object.
482 Nit: Maximum number of SCF steps.
484 Keyword Args:
485 cost: Function that will run every SCF step.
486 grad: Function that calculates the respective gradient.
487 condition: Function to check and log the convergence condition.
488 betat: Step size.
489 cgform: Conjugate gradient form.
491 Returns:
492 Total energies per SCF cycle.
493 """
494 atoms = scf.atoms
495 costs = []
497 # Scalars that need to be saved for each spin
498 linmin = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))
499 cg = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))
500 norm_g = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))
501 # Gradients that need to be saved for each spin
502 d = [np.empty_like(Wk) for Wk in scf.W]
503 g = [np.empty_like(Wk) for Wk in scf.W]
504 d_old = [np.empty_like(Wk) for Wk in scf.W]
505 g_old = [np.empty_like(Wk) for Wk in scf.W]
507 # Do the first step without the linmin and cg tests, and without the cg_method
508 W_tmp = copy.deepcopy(scf.W)
509 for ik in range(atoms.kpts.Nk):
510 for spin in range(atoms.occ.Nspin):
511 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed)
512 d[ik][spin] = -atoms.K(g[ik][spin], ik)
513 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin]
515 # Calculate the optimal step width
516 scf._precompute()
517 for ik in range(atoms.kpts.Nk):
518 for spin in range(atoms.occ.Nspin):
519 gt = grad(scf, ik, spin, scf.W, **scf._precomputed)
520 beta = abs(
521 betat * dotprod(g[ik][spin], d[ik][spin]) / dotprod(g[ik][spin] - gt, d[ik][spin])
522 )
523 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin]
524 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin]
526 # Evaluate the cost function
527 c = cost(scf, -1)
528 costs.append(c)
529 if condition(scf, "pccg", costs):
530 return costs
532 # Start the iteration
533 for i in range(1, Nit):
534 W_tmp = copy.deepcopy(scf.W)
535 for ik in range(atoms.kpts.Nk):
536 for spin in range(atoms.occ.Nspin):
537 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed)
538 # Calculate linmin and cg for each spin separately
539 if scf._log.level <= logging.DEBUG:
540 linmin[ik][spin] = linmin_test(g[ik][spin], d[ik][spin])
541 cg[ik][spin] = cg_test(atoms, ik, g[ik][spin], g_old[ik][spin])
542 beta, norm_g[ik][spin] = cg_method(
543 scf, ik, cgform, g[ik][spin], g_old[ik][spin], d_old[ik][spin]
544 )
545 d[ik][spin] = -atoms.K(g[ik][spin], ik) + beta * d_old[ik][spin]
546 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin]
548 scf._precompute()
549 for ik in range(atoms.kpts.Nk):
550 for spin in range(atoms.occ.Nspin):
551 gt = grad(scf, ik, spin, scf.W, **scf._precomputed)
552 beta = abs(
553 betat
554 * dotprod(g[ik][spin], d[ik][spin])
555 / dotprod(g[ik][spin] - gt, d[ik][spin])
556 )
557 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin]
558 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin]
560 c = cost(scf, i)
561 # If the energy does not go down use the steepest descent step and recalculate the energy
562 if c > costs[-1]:
563 for ik in range(atoms.kpts.Nk):
564 scf.W[ik] = W_tmp[ik] - betat * g[ik]
565 c = cost(scf, -1)
566 costs.append(c)
567 # Do not print cg and linmin if we do the sd step
568 if condition(scf, "sd", costs, norm_g=norm_g):
569 break
570 else:
571 costs.append(c)
572 if condition(scf, "pccg", costs, linmin, cg, norm_g):
573 break
574 return costs
577#: Map minimizer names with their respective implementation.
578IMPLEMENTED = {
579 "sd": sd,
580 "lm": lm,
581 "pclm": pclm,
582 "cg": cg,
583 "pccg": pccg,
584 "auto": auto,
585}