Coverage for eminus/minimizer.py: 98.39%
249 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-08 12:59 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-08 12:59 +0000
1# SPDX-FileCopyrightText: 2021 The eminus developers
2# SPDX-License-Identifier: Apache-2.0
3"""Minimization algorithms."""
5import copy
6import logging
8import numpy as np
10from .dft import get_epsilon, get_grad
11from .energies import get_E, get_Eentropy
12from .logger import name
13from .utils import dotprod
16def scf_step(scf, step):
17 """Perform one SCF step for a DFT calculation.
19 Calculating intermediate results speeds up the energy and gradient calculation.
20 This function is similar to H_precompute but will set all variables and energies in the SCF
21 class and returns the total energy.
23 Args:
24 scf: SCF object.
25 step: Optimization step.
27 Returns:
28 Total energy.
29 """
30 scf._precompute()
31 # Update occupations every smear_update'th cycle
32 if scf.atoms.occ.smearing > 0 and step % scf.smear_update == 0:
33 epsilon = get_epsilon(scf, scf.W, **scf._precomputed)
34 Efermi = scf.atoms.occ.smear(epsilon)
35 get_Eentropy(scf, epsilon, Efermi)
36 return get_E(scf)
39def check_convergence(scf, method, Elist, linmin=None, cg=None, norm_g=None):
40 """Check the energies for every SCF cycle and handle the output.
42 Args:
43 scf: SCF object.
44 method: Minimization method.
45 Elist: Total energies per SCF step.
47 Keyword Args:
48 linmin: Cosine between previous search direction and current gradient.
49 cg: Conjugate-gradient orthogonality.
50 norm_g: Gradient norm.
52 Returns:
53 Convergence condition.
54 """
55 iteration = len(Elist)
57 # Print all the data
58 print_scf_step(scf, method, Elist, linmin, cg, norm_g)
60 if iteration > 1:
61 # Check for convergence
62 if scf.gradtol is None or norm_g is None:
63 if abs(Elist[-2] - Elist[-1]) < scf.etol:
64 scf.is_converged = True
65 return True
66 # If a gradient tolerance has been set we also check norm_g for convergence
67 elif abs(Elist[-2] - Elist[-1]) < scf.etol and (np.sum(norm_g, axis=0) < scf.gradtol).all():
68 scf.is_converged = True
69 return True
70 # Check if the current energy is higher than the last two values
71 if (np.asarray(Elist[-3:-1]) < Elist[-1]).all():
72 scf._log.warning("Total energy is not decreasing.")
73 return False
76def print_scf_step(scf, method, Elist, linmin, cg, norm_g):
77 """Print the data of one SCF step and the header at the beginning.
79 Args:
80 scf: SCF object.
81 method: Minimization method.
82 Elist: Total energies per SCF step.
83 linmin: Cosine between previous search direction and current gradient.
84 cg: Conjugate-gradient orthogonality.
85 norm_g: Gradient norm.
86 """
87 iteration = len(Elist)
89 # Print a column header at the beginning
90 # The ljust values just have been chosen such that the output looks decent
91 if iteration == 1:
92 header = "Method".ljust(8)
93 header += "Iteration".ljust(11)
94 header += "Etot [Eh]".ljust(13)
95 header += "dEtot [Eh]".ljust(13)
96 # Print the gradient norm for cg methods
97 if method not in {"sd", "lm", "pclm"}:
98 header += "|Gradient|".ljust(10 * scf.atoms.occ.Nspin + 3)
99 # Print extra debugging information if available
100 if scf._log.level <= logging.DEBUG:
101 if method != "sd":
102 header += "linmin-test".ljust(10 * scf.atoms.occ.Nspin + 3)
103 if method not in {"sd", "lm", "pclm"}:
104 header += "cg-test".ljust(10 * scf.atoms.occ.Nspin + 3)
105 scf._log.debug(header)
106 else:
107 scf._log.info(header)
109 # Print the information for every cycle
110 # Context manager for printing norm_g, linmin, and cg
111 with np.printoptions(formatter={"float": "{:+0.2e}".format}):
112 info = f"{method:<8}{iteration:>8} {Elist[-1]:<+13,.6f}"
113 # In the first step we do not have all information yet
114 if iteration > 1:
115 info += f"{Elist[-2] - Elist[-1]:<+13,.4e}"
116 if norm_g is not None:
117 info += str(np.sum(norm_g, axis=0)).ljust(10 * scf.atoms.occ.Nspin + 3)
118 if scf._log.level <= logging.DEBUG:
119 if method != "sd" and linmin is not None:
120 info += str(np.sum(linmin, axis=0)).ljust(10 * scf.atoms.occ.Nspin + 3)
121 if method not in {"sd", "lm", "pclm"} and cg is not None:
122 info += str(np.sum(cg, axis=0)).ljust(10 * scf.atoms.occ.Nspin + 3)
123 if scf._log.level <= logging.DEBUG:
124 scf._log.debug(info)
125 else:
126 scf._log.info(info)
129def linmin_test(g, d):
130 """Do the line minimization test.
132 Calculate the cosine of the angle between g and d.
134 Reference: https://trond.hjorteland.com/thesis/node26.html
136 Args:
137 g: Current gradient.
138 d: Previous search direction.
140 Returns:
141 Linmin angle.
142 """
143 # cos = A B / |A| |B|
144 return dotprod(g, d) / np.sqrt(dotprod(g, g) * dotprod(d, d))
147def cg_test(atoms, ik, g, g_old, precondition=True):
148 """Test the gradient-orthogonality theorem, i.e., g and g_old should be orthogonal.
150 Calculate the cosine of the angle between g and g_old. For an angle of 90 degree the cosine goes
151 to zero.
153 Reference: https://math.uci.edu/~chenlong/CAMtips/CG.html
155 Args:
156 atoms: Atoms object.
157 ik: k-point index.
158 g: Current gradient.
159 g_old: Previous gradient.
161 Keyword Args:
162 precondition: Whether to use a preconditioner.
164 Returns:
165 CG angle.
166 """
167 if precondition:
168 Kg, Kg_old = atoms.K(g, ik), atoms.K(g_old, ik)
169 else:
170 Kg, Kg_old = g, g_old
171 # cos = A B / |A| |B|
172 return dotprod(g, Kg_old) / np.sqrt(dotprod(g, Kg) * dotprod(g_old, Kg_old))
175def cg_method(scf, ik, cgform, g, g_old, d_old, precondition=True):
176 """Do different variants of the conjugate gradient method.
178 Reference: https://indrag49.github.io/Numerical-Optimization/conjugate-gradient-methods-1.html
180 Args:
181 scf: SCF object.
182 ik: k-point index.
183 cgform: Conjugate gradient form.
184 g: Current gradient.
185 g_old: Previous gradient.
186 d_old: Previous search direction.
188 Keyword Args:
189 precondition: Whether to use a preconditioner.
191 Returns:
192 Conjugate scalar and gradient norm.
193 """
194 atoms = scf.atoms
196 if precondition:
197 Kg, Kg_old = atoms.K(g, ik), atoms.K(g_old, ik)
198 else:
199 Kg, Kg_old = g, g_old
200 norm_g = dotprod(g, Kg)
202 if cgform == 1: # Fletcher-Reeves
203 return norm_g / dotprod(g_old, Kg_old), norm_g
204 if cgform == 2: # Polak-Ribiere
205 return dotprod(g - g_old, Kg) / dotprod(g_old, Kg_old), norm_g
206 if cgform == 3: # Hestenes-Stiefel
207 return dotprod(g - g_old, Kg) / dotprod(g - g_old, d_old), norm_g
208 if cgform == 4: # Dai-Yuan
209 return norm_g / dotprod(g - g_old, d_old), norm_g
210 msg = f'No cgform found for "{cgform}".'
211 raise ValueError(msg)
214@name("steepest descent minimization")
215def sd(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, **kwargs):
216 """Steepest descent minimization algorithm.
218 Args:
219 scf: SCF object.
220 Nit: Maximum number of SCF steps.
222 Keyword Args:
223 cost: Function that will run every SCF step.
224 grad: Function that calculates the respective gradient.
225 condition: Function to check and log the convergence condition.
226 betat: Step size.
227 **kwargs: Throwaway arguments.
229 Returns:
230 Total energies per SCF cycle.
231 """
232 atoms = scf.atoms
233 costs = []
235 for i in range(Nit):
236 c = cost(scf, i)
237 costs.append(c)
238 if condition(scf, "sd", costs):
239 break
240 for ik in range(atoms.kpts.Nk):
241 for spin in range(atoms.occ.Nspin):
242 g = grad(scf, ik, spin, scf.W, **scf._precomputed)
243 scf.W[ik][spin] = scf.W[ik][spin] - betat * g
244 return costs
247@name("preconditioned line minimization")
248def pclm(
249 scf,
250 Nit,
251 cost=scf_step,
252 grad=get_grad,
253 condition=check_convergence,
254 betat=3e-5,
255 precondition=True,
256 **kwargs,
257):
258 """Preconditioned line minimization algorithm.
260 Args:
261 scf: SCF object.
262 Nit: Maximum number of SCF steps.
264 Keyword Args:
265 cost: Function that will run every SCF step.
266 grad: Function that calculates the respective gradient.
267 condition: Function to check and log the convergence condition.
268 betat: Step size.
269 precondition: Whether to use a preconditioner.
270 **kwargs: Throwaway arguments.
272 Returns:
273 Total energies per SCF cycle.
274 """
275 atoms = scf.atoms
276 costs = []
278 if precondition:
279 method = "pclm"
280 else:
281 method = "lm"
283 # Scalars that need to be saved for each spin
284 linmin = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))
285 # Search direction that needs to be saved for each spin
286 d = [np.empty_like(Wk) for Wk in scf.W]
287 g = [np.empty_like(Wk) for Wk in scf.W]
289 for i in range(Nit):
290 W_tmp = copy.deepcopy(scf.W)
291 for ik in range(atoms.kpts.Nk):
292 for spin in range(atoms.occ.Nspin):
293 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed)
294 # Calculate linmin each spin separately
295 if scf._log.level <= logging.DEBUG and i > 0:
296 linmin[ik][spin] = linmin_test(g[ik][spin], d[ik][spin])
297 if precondition:
298 d[ik][spin] = -atoms.K(g[ik][spin], ik)
299 else:
300 d[ik][spin] = -g[ik][spin]
301 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin]
303 scf._precompute()
304 for ik in range(atoms.kpts.Nk):
305 for spin in range(atoms.occ.Nspin):
306 gt = grad(scf, ik, spin, scf.W, **scf._precomputed)
307 beta = abs(
308 betat
309 * dotprod(g[ik][spin], d[ik][spin])
310 / dotprod(g[ik][spin] - gt, d[ik][spin])
311 )
312 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin]
313 c = cost(scf, i)
314 costs.append(c)
315 if condition(scf, method, costs, linmin):
316 break
317 return costs
320@name("line minimization")
321def lm(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, **kwargs):
322 """Line minimization algorithm.
324 Args:
325 scf: SCF object.
326 Nit: Maximum number of SCF steps.
328 Keyword Args:
329 cost: Function that will run every SCF step.
330 grad: Function that calculates the respective gradient.
331 condition: Function to check and log the convergence condition.
332 betat: Step size.
333 **kwargs: Throwaway arguments.
335 Returns:
336 Total energies per SCF cycle.
337 """
338 return pclm(scf, Nit, cost, grad, condition, betat, precondition=False)
341@name("preconditioned conjugate-gradient minimization")
342def pccg(
343 scf,
344 Nit,
345 cost=scf_step,
346 grad=get_grad,
347 condition=check_convergence,
348 betat=3e-5,
349 cgform=1,
350 precondition=True,
351):
352 """Preconditioned conjugate-gradient minimization algorithm.
354 Args:
355 scf: SCF object.
356 Nit: Maximum number of SCF steps.
358 Keyword Args:
359 cost: Function that will run every SCF step.
360 grad: Function that calculates the respective gradient.
361 condition: Function to check and log the convergence condition.
362 betat: Step size.
363 cgform: Conjugate gradient form.
364 precondition: Whether to use a preconditioner.
366 Returns:
367 Total energies per SCF cycle.
368 """
369 atoms = scf.atoms
370 costs = []
372 if precondition:
373 method = "pccg"
374 else:
375 method = "cg"
377 # Scalars that need to be saved for each spin and k-point
378 linmin = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))
379 cg = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))
380 norm_g = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))
381 # Gradients that need to be saved for each spin and k-point
382 d = [np.empty_like(Wk) for Wk in scf.W]
383 g = [np.empty_like(Wk) for Wk in scf.W]
384 d_old = [np.empty_like(Wk) for Wk in scf.W]
385 g_old = [np.empty_like(Wk) for Wk in scf.W]
387 # Do the first step without the linmin and cg tests, and without the cg_method
388 W_tmp = copy.deepcopy(scf.W)
389 for ik in range(atoms.kpts.Nk):
390 for spin in range(atoms.occ.Nspin):
391 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed)
392 if precondition:
393 d[ik][spin] = -atoms.K(g[ik][spin], ik)
394 else:
395 d[ik][spin] = -g[ik][spin]
396 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin]
398 # Calculate the optimal step width
399 scf._precompute()
400 for ik in range(atoms.kpts.Nk):
401 for spin in range(atoms.occ.Nspin):
402 gt = grad(scf, ik, spin, scf.W, **scf._precomputed)
403 beta = abs(
404 betat * dotprod(g[ik][spin], d[ik][spin]) / dotprod(g[ik][spin] - gt, d[ik][spin])
405 )
406 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin]
407 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin]
409 # Evaluate the cost function
410 c = cost(scf, -1)
411 costs.append(c)
412 condition(scf, method, costs)
414 # Start the iteration
415 for i in range(1, Nit):
416 W_tmp = copy.deepcopy(scf.W)
417 for ik in range(atoms.kpts.Nk):
418 for spin in range(atoms.occ.Nspin):
419 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed)
420 # Calculate linmin and cg for each spin and k-point separately if desired
421 if scf._log.level <= logging.DEBUG:
422 linmin[ik][spin] = linmin_test(g[ik][spin], d[ik][spin])
423 cg[ik][spin] = cg_test(atoms, ik, g[ik][spin], g_old[ik][spin], precondition)
424 beta, norm_g[ik][spin] = cg_method(
425 scf, ik, cgform, g[ik][spin], g_old[ik][spin], d_old[ik][spin], precondition
426 )
427 if precondition:
428 d[ik][spin] = -atoms.K(g[ik][spin], ik) + beta * d_old[ik][spin]
429 else:
430 d[ik][spin] = -g[ik][spin] + beta * d_old[ik][spin]
431 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin]
433 scf._precompute()
434 for ik in range(atoms.kpts.Nk):
435 for spin in range(atoms.occ.Nspin):
436 gt = grad(scf, ik, spin, scf.W, **scf._precomputed)
437 beta = abs(
438 betat
439 * dotprod(g[ik][spin], d[ik][spin])
440 / dotprod(g[ik][spin] - gt, d[ik][spin])
441 )
442 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin]
443 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin]
445 c = cost(scf, i)
446 costs.append(c)
447 if condition(scf, method, costs, linmin, cg, norm_g):
448 break
449 return costs
452@name("conjugate-gradient minimization")
453def cg(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, cgform=1):
454 """Conjugate-gradient minimization algorithm.
456 Args:
457 scf: SCF object.
458 Nit: Maximum number of SCF steps.
460 Keyword Args:
461 cost: Function that will run every SCF step.
462 grad: Function that calculates the respective gradient.
463 condition: Function to check and log the convergence condition.
464 betat: Step size.
465 cgform: Conjugate gradient form.
467 Returns:
468 Total energies per SCF cycle.
469 """
470 return pccg(scf, Nit, cost, grad, condition, betat, cgform, precondition=False)
473@name("auto minimization")
474def auto(scf, Nit, cost=scf_step, grad=get_grad, condition=check_convergence, betat=3e-5, cgform=1): # noqa: C901
475 """Automatic preconditioned conjugate-gradient minimization algorithm.
477 This function chooses an sd step over the pccg step if the energy goes up.
479 Args:
480 scf: SCF object.
481 Nit: Maximum number of SCF steps.
483 Keyword Args:
484 cost: Function that will run every SCF step.
485 grad: Function that calculates the respective gradient.
486 condition: Function to check and log the convergence condition.
487 betat: Step size.
488 cgform: Conjugate gradient form.
490 Returns:
491 Total energies per SCF cycle.
492 """
493 atoms = scf.atoms
494 costs = []
496 # Scalars that need to be saved for each spin
497 linmin = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))
498 cg = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))
499 norm_g = np.empty((atoms.kpts.Nk, atoms.occ.Nspin))
500 # Gradients that need to be saved for each spin
501 d = [np.empty_like(Wk) for Wk in scf.W]
502 g = [np.empty_like(Wk) for Wk in scf.W]
503 d_old = [np.empty_like(Wk) for Wk in scf.W]
504 g_old = [np.empty_like(Wk) for Wk in scf.W]
506 # Do the first step without the linmin and cg tests, and without the cg_method
507 W_tmp = copy.deepcopy(scf.W)
508 for ik in range(atoms.kpts.Nk):
509 for spin in range(atoms.occ.Nspin):
510 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed)
511 d[ik][spin] = -atoms.K(g[ik][spin], ik)
512 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin]
514 # Calculate the optimal step width
515 scf._precompute()
516 for ik in range(atoms.kpts.Nk):
517 for spin in range(atoms.occ.Nspin):
518 gt = grad(scf, ik, spin, scf.W, **scf._precomputed)
519 beta = abs(
520 betat * dotprod(g[ik][spin], d[ik][spin]) / dotprod(g[ik][spin] - gt, d[ik][spin])
521 )
522 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin]
523 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin]
525 # Evaluate the cost function
526 c = cost(scf, -1)
527 costs.append(c)
528 if condition(scf, "pccg", costs):
529 return costs
531 # Start the iteration
532 for i in range(1, Nit):
533 W_tmp = copy.deepcopy(scf.W)
534 for ik in range(atoms.kpts.Nk):
535 for spin in range(atoms.occ.Nspin):
536 g[ik][spin] = grad(scf, ik, spin, scf.W, **scf._precomputed)
537 # Calculate linmin and cg for each spin separately
538 if scf._log.level <= logging.DEBUG:
539 linmin[ik][spin] = linmin_test(g[ik][spin], d[ik][spin])
540 cg[ik][spin] = cg_test(atoms, ik, g[ik][spin], g_old[ik][spin])
541 beta, norm_g[ik][spin] = cg_method(
542 scf, ik, cgform, g[ik][spin], g_old[ik][spin], d_old[ik][spin]
543 )
544 d[ik][spin] = -atoms.K(g[ik][spin], ik) + beta * d_old[ik][spin]
545 scf.W[ik][spin] = scf.W[ik][spin] + betat * d[ik][spin]
547 scf._precompute()
548 for ik in range(atoms.kpts.Nk):
549 for spin in range(atoms.occ.Nspin):
550 gt = grad(scf, ik, spin, scf.W, **scf._precomputed)
551 beta = abs(
552 betat
553 * dotprod(g[ik][spin], d[ik][spin])
554 / dotprod(g[ik][spin] - gt, d[ik][spin])
555 )
556 scf.W[ik][spin] = W_tmp[ik][spin] + beta * d[ik][spin]
557 g_old[ik][spin], d_old[ik][spin] = g[ik][spin], d[ik][spin]
559 c = cost(scf, i)
560 # If the energy does not go down use the steepest descent step and recalculate the energy
561 if c > costs[-1]:
562 for ik in range(atoms.kpts.Nk):
563 scf.W[ik] = W_tmp[ik] - betat * g[ik]
564 c = cost(scf, -1)
565 costs.append(c)
566 # Do not print cg and linmin if we do the sd step
567 if condition(scf, "sd", costs, norm_g=norm_g):
568 break
569 else:
570 costs.append(c)
571 if condition(scf, "pccg", costs, linmin, cg, norm_g):
572 break
573 return costs
576#: Map minimizer names with their respective implementation.
577IMPLEMENTED = {
578 "sd": sd,
579 "lm": lm,
580 "pclm": pclm,
581 "cg": cg,
582 "pccg": pccg,
583 "auto": auto,
584}