Coverage for cosmolayer / parser / parser.py: 96%
26 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-11 14:25 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-11 14:25 +0000
1"""
2.. module:: cosmolayer.parser
3 :synopsis: Parser for COSMO output files.
5.. classauthor:: Charlles Abreu <craabreu@gmail.com>
6"""
8from types import ModuleType
10import pandas as pd
12from . import dmol3, turbomole
13from .utils import parse_table, parse_value
16def get_atom_dataframe(module: ModuleType, file_contents: str) -> pd.DataFrame:
17 df = parse_table(
18 file_contents,
19 module.ATOM_ROW_REGEX,
20 module.ATOM_SECTION_REGEX,
21 module.ATOM_INFO_SCHEMA,
22 )
23 for axis in "xyz":
24 df[axis] *= module.ATOM_POSITION_CONVERSION_FACTOR
25 return df
28def get_segment_dataframe(module: ModuleType, file_contents: str) -> pd.DataFrame:
29 df = parse_table(
30 file_contents,
31 module.SEGMENT_ROW_REGEX,
32 module.SEGMENT_SECTION_REGEX,
33 module.SEGMENT_INFO_SCHEMA,
34 )
35 for axis in "xyz":
36 df[axis] *= module.SEGMENT_POSITION_CONVERSION_FACTOR
37 df["atom"] -= 1
38 return df
41def get_volume(module: ModuleType, file_contents: str) -> float:
42 return float(
43 parse_value(file_contents, module.VOLUME_REGEX)
44 * module.VOLUME_CONVERSION_FACTOR
45 )
48def parse_cosmo_file(
49 contents: str,
50) -> tuple[str, pd.DataFrame, pd.DataFrame, float]:
51 """Parse the contents of a COSMO output file.
53 This function reads the contents of a COSMO (Conductor-like Screening Model) output
54 file and extracts atomic coordinates, segment information, and molecular volume.
55 It automatically detects the file format (TURBOMOLE or DMol-3) and uses the
56 appropriate parser.
58 Parameters
59 ----------
60 contents : str
61 Contents of the COSMO output file to parse.
63 Returns
64 -------
65 format : str
66 The file format detected ("DMol-3" or "TURBOMOLE").
67 atom_df : pd.DataFrame
68 DataFrame containing atomic information with columns:
69 - id: atom identifier (str)
70 - x, y, z: Cartesian coordinates in Angstroms (float)
71 - element: chemical element symbol (str)
72 segment_df : pd.DataFrame
73 DataFrame containing segment information with columns:
74 - atom: associated atom number (int)
75 - x, y, z: segment coordinates in Angstroms (float)
76 - charge: segment charge (float)
77 - area: segment surface area (float)
78 volume : float
79 Molecular cavity volume in cubic Angstroms.
81 Raises
82 ------
83 ValueError
84 If the file format is not recognized or does not contain the required
85 COSMO sections.
86 FileNotFoundError
87 If the specified file does not exist.
89 Examples
90 --------
91 Parse a TURBOMOLE COSMO file:
93 >>> from importlib.resources import files
94 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"
95 >>> contents = path.read_text(encoding="utf-8", errors="replace")
96 >>> fmt, atoms, segments, volume = parse_cosmo_file(contents)
97 >>> print(fmt)
98 TURBOMOLE
99 >>> atoms.tail(3)
100 id x y z element
101 6 H3 0.6389 -1.8805 -0.1568 H
102 7 H4 1.6297 -0.8236 0.6729 H
103 8 H5 1.1806 1.3231 -0.4305 H
104 >>> segments.tail(3)
105 atom x y z charge area
106 468 8 1.003395 2.214518 -1.389667 -0.002498 0.193147
107 469 8 1.068201 0.923523 -1.695803 -0.002131 0.130985
108 470 8 2.133636 1.152865 0.489697 -0.001817 0.145681
109 >>> volume
110 80.07160...
112 Parse a DMol-3 COSMO file:
114 >>> path = files("cosmolayer.data") / "NCCO.cosmo"
115 >>> contents = path.read_text(encoding="utf-8", errors="replace")
116 >>> fmt, atoms, segments, volume = parse_cosmo_file(contents)
117 >>> print(fmt)
118 DMol-3
119 >>> len(atoms)
120 11
121 >>> len(segments)
122 429
123 >>> volume
124 86.10187...
125 """
126 module: ModuleType
127 if "DMol3/COSMO Results" in contents:
128 format = "DMol-3"
129 module = dmol3
130 elif "$segment_information" in contents and "$coord_car" in contents:
131 format = "TURBOMOLE"
132 module = turbomole
133 else:
134 raise ValueError(
135 "Could not parse COSMO file contents. Supported formats: TURBOMOLE, DMol-3"
136 )
137 return (
138 format,
139 get_atom_dataframe(module, contents),
140 get_segment_dataframe(module, contents),
141 get_volume(module, contents),
142 )