Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 45 additions & 10 deletions dpdata/cp2k/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,9 @@ def get_frames(fname):
fp.close()
return [], [], [], [], [], [], [], None

# Check if this is CP2K 2025 format
is_cp2k_2025 = "energy [hartree]" in content

# search duplicated header
fp.seek(0)
header_idx = []
Expand Down Expand Up @@ -430,16 +433,48 @@ def get_frames(fname):
atom_symbol_idx_list.append(ii.split()[1])

if "ENERGY|" in ii:
energy = ii.split()[8]
if " Atom Kind " in ii:
force_flag = True
force_idx = idx
if force_flag:
if idx > force_idx:
if "SUM OF ATOMIC FORCES" in ii:
force_flag = False
else:
force.append(ii.split()[3:6])
# CP2K 2025 format: ENERGY| Total FORCE_EVAL ( QS ) energy [hartree] -7.364190264587725
# CP2K 2023 format: ENERGY| Total FORCE_EVAL ( QS ) energy (a.u.): -1766.225653832774242
if is_cp2k_2025:
# Find the energy value after "[hartree]"
parts = ii.split()
try:
hartree_idx = parts.index("[hartree]")
energy = parts[hartree_idx + 1]
except (ValueError, IndexError):
# Fallback: try to find energy value in the line
for part in reversed(parts):
try:
float(part)
energy = part
break
except ValueError:
continue
else:
energy = ii.split()[8]
Comment on lines +438 to +454
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Potential uninitialized variable in fallback path.

If "[hartree]" is not found in parts and the fallback loop doesn't find any numeric value, energy will not be set for this line, but the code continues silently. While the assertion on line 491 will eventually catch this, the error message won't be helpful for debugging malformed CP2K 2025 output.

Consider adding explicit error handling:

Proposed fix
                 if is_cp2k_2025:
                     # Find the energy value after "[hartree]"
                     parts = ii.split()
                     try:
                         hartree_idx = parts.index("[hartree]")
                         energy = parts[hartree_idx + 1]
                     except (ValueError, IndexError):
                         # Fallback: try to find energy value in the line
                         for part in reversed(parts):
                             try:
                                 float(part)
                                 energy = part
                                 break
                             except ValueError:
                                 continue
+                        else:
+                            # Loop completed without finding energy
+                            raise RuntimeError(f"Cannot parse energy from CP2K 2025 line: {ii}")
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@dpdata/cp2k/output.py` around lines 438 - 454, In the is_cp2k_2025 branch the
fallback loop may leave energy uninitialized if no numeric token is found;
modify the block around is_cp2k_2025 / parts / hartree_idx so that if neither
the "[hartree]" lookup nor the reversed parts numeric search sets energy you
raise a clear exception or log an error with context (e.g., include the original
line string ii and parts) instead of continuing silently; ensure the raised
error (ValueError or custom) describes that energy could not be parsed from the
CP2K 2025 output so the later assertion yields a helpful message.


# CP2K 2025 force format: FORCES| prefix lines
if is_cp2k_2025:
if (
"FORCES|" in ii
and "Atom x y z" not in ii
and "Atomic forces" not in ii
):
parts = ii.split()
# FORCES| 1 -5.73440344E-02 2.95274914E-02 -1.50988167E-02 6.62433792E-02
if len(parts) >= 5 and parts[1].isdigit():
force.append(parts[2:5])
else:
# CP2K 2023 format
if " Atom Kind " in ii:
force_flag = True
force_idx = idx
if force_flag:
if idx > force_idx:
if "SUM OF ATOMIC FORCES" in ii:
force_flag = False
else:
force.append(ii.split()[3:6])
# add reading stress tensor
if "STRESS TENSOR [GPa" in ii:
stress_flag = True
Expand Down
74 changes: 74 additions & 0 deletions tests/cp2k/cp2k_2025_output/cp2k_2025_output
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
DBCSR| Multiplication driver XSMM
DBCSR| Multrec recursion limit 512

**** **** ****** ** PROGRAM STARTED AT 2025-01-15 10:30:00.000
***** ** *** *** ** PROGRAM STARTED ON test-machine
** **** ****** PROGRAM STARTED BY test
***** ** ** ** ** PROGRAM PROCESS ID 12345
**** ** ******* ** PROGRAM STARTED IN /test

CP2K| version string: CP2K version 2025.1
CP2K| source code revision number: git:abc123

GLOBAL| Force Environment number 1
GLOBAL| Run type ENERGY_FORCE
GLOBAL| Global print level MEDIUM
GLOBAL| Total number of message passing processes 1

MEMORY| system memory details [Kb]

CELL| Vector a [angstrom]: 5.000 0.000 0.000 |a| = 5.000
CELL| Vector b [angstrom]: 0.000 5.000 0.000 |b| = 5.000
CELL| Vector c [angstrom]: 0.000 0.000 5.000 |c| = 5.000

ATOMIC KIND INFORMATION

1. Atomic kind: H Number of atoms: 2

MOLECULE KIND INFORMATION

TOTAL NUMBERS AND MAXIMUM NUMBERS

Total number of - Atomic kinds: 1
- Atoms: 2


SCF WAVEFUNCTION OPTIMIZATION

Step Update method Time Convergence Total energy Change
------------------------------------------------------------------------------

1 OT DIIS 0.80E-01 0.1 0.00000001 -7.3641902645877 -7.36E+00

*** SCF run converged in 1 steps ***


*******************************************************************************
MODULE QUICKSTEP: ATOMIC COORDINATES IN angstrom

Atom Kind Element X Y Z Z(eff) Mass

1 1 H 1 0.000000 0.000000 0.000000 1.00 1.0079
2 1 H 1 0.760000 0.000000 0.000000 1.00 1.0079

ENERGY| Total FORCE_EVAL ( QS ) energy [hartree] -7.364190264587725

FORCES| Atomic forces [hartree/bohr]

FORCES| Atom x y z |f|

FORCES| 1 -5.73440344E-02 2.95274914E-02 -1.50988167E-02 6.62433792E-02
FORCES| 2 7.92269287E-02 3.84670665E-02 -3.41478833E-02 9.44600412E-02

STRESS TENSOR [GPa]

X Y Z
X 0.12345678 0.00000000 0.00000000
Y 0.00000000 0.12345678 0.00000000
Z 0.00000000 0.00000000 0.12345678

-------------------------------------------------------------------------------
- -\n - T I M I N G -
-------------------------------------------------------------------------------

**** **** ****** ** PROGRAM ENDED AT 2025-01-15 10:30:05.000
Binary file added tests/cp2k/cp2k_2025_output/deepmd/set.000/box.npy
Binary file not shown.
Binary file added tests/cp2k/cp2k_2025_output/deepmd/set.000/coord.npy
Binary file not shown.
Binary file not shown.
Binary file added tests/cp2k/cp2k_2025_output/deepmd/set.000/force.npy
Binary file not shown.
Binary file not shown.
2 changes: 2 additions & 0 deletions tests/cp2k/cp2k_2025_output/deepmd/type.raw
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
0
0
1 change: 1 addition & 0 deletions tests/cp2k/cp2k_2025_output/deepmd/type_map.raw
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
H
60 changes: 60 additions & 0 deletions tests/test_cp2k_2025_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from __future__ import annotations

import unittest

from comp_sys import CompLabeledSys
from context import dpdata


class TestCp2k2025Output(unittest.TestCase, CompLabeledSys):
"""Test CP2K 2025 output format parsing."""

def setUp(self):
self.system_1 = dpdata.LabeledSystem(
"cp2k/cp2k_2025_output/cp2k_2025_output", fmt="cp2k/output"
)
self.system_2 = dpdata.LabeledSystem(
"cp2k/cp2k_2025_output/deepmd", fmt="deepmd/npy"
)
self.places = 6
self.e_places = 6
self.f_places = 6
self.v_places = 4

def test_energy_extraction(self):
"""Test that energy is correctly extracted from CP2K 2025 format."""
# Energy should be -7.364190264587725 hartree
# Using dpdata's conversion factor: -200.3898256786414 eV
expected_energy = -200.3898256786414
self.assertAlmostEqual(
self.system_1.data["energies"][0], expected_energy, places=5
)

def test_forces_extraction(self):
"""Test that forces are correctly extracted from CP2K 2025 format."""
# Forces should be converted from hartree/bohr to eV/angstrom
self.assertEqual(self.system_1.data["forces"].shape, (1, 2, 3))
# Check first atom force x-component
self.assertAlmostEqual(
self.system_1.data["forces"][0][0][0], -2.94874881, places=5
)


class TestCp2k2023FormatStillWorks(unittest.TestCase, CompLabeledSys):
"""Test that CP2K 2023 format still works (regression test)."""

def setUp(self):
self.system_1 = dpdata.LabeledSystem(
"cp2k/cp2k_normal_output/cp2k_output", fmt="cp2k/output"
)
self.system_2 = dpdata.LabeledSystem(
"cp2k/cp2k_normal_output/deepmd", fmt="deepmd/npy"
)
self.places = 6
self.e_places = 6
self.f_places = 6
self.v_places = 4


if __name__ == "__main__":
unittest.main()