Coverage for cosmolayer / parser / parser.py: 96%

26 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-11 14:25 +0000

1""" 

2.. module:: cosmolayer.parser 

3 :synopsis: Parser for COSMO output files. 

4 

5.. classauthor:: Charlles Abreu <craabreu@gmail.com> 

6""" 

7 

8from types import ModuleType 

9 

10import pandas as pd 

11 

12from . import dmol3, turbomole 

13from .utils import parse_table, parse_value 

14 

15 

16def get_atom_dataframe(module: ModuleType, file_contents: str) -> pd.DataFrame: 

17 df = parse_table( 

18 file_contents, 

19 module.ATOM_ROW_REGEX, 

20 module.ATOM_SECTION_REGEX, 

21 module.ATOM_INFO_SCHEMA, 

22 ) 

23 for axis in "xyz": 

24 df[axis] *= module.ATOM_POSITION_CONVERSION_FACTOR 

25 return df 

26 

27 

28def get_segment_dataframe(module: ModuleType, file_contents: str) -> pd.DataFrame: 

29 df = parse_table( 

30 file_contents, 

31 module.SEGMENT_ROW_REGEX, 

32 module.SEGMENT_SECTION_REGEX, 

33 module.SEGMENT_INFO_SCHEMA, 

34 ) 

35 for axis in "xyz": 

36 df[axis] *= module.SEGMENT_POSITION_CONVERSION_FACTOR 

37 df["atom"] -= 1 

38 return df 

39 

40 

41def get_volume(module: ModuleType, file_contents: str) -> float: 

42 return float( 

43 parse_value(file_contents, module.VOLUME_REGEX) 

44 * module.VOLUME_CONVERSION_FACTOR 

45 ) 

46 

47 

48def parse_cosmo_file( 

49 contents: str, 

50) -> tuple[str, pd.DataFrame, pd.DataFrame, float]: 

51 """Parse the contents of a COSMO output file. 

52 

53 This function reads the contents of a COSMO (Conductor-like Screening Model) output 

54 file and extracts atomic coordinates, segment information, and molecular volume. 

55 It automatically detects the file format (TURBOMOLE or DMol-3) and uses the 

56 appropriate parser. 

57 

58 Parameters 

59 ---------- 

60 contents : str 

61 Contents of the COSMO output file to parse. 

62 

63 Returns 

64 ------- 

65 format : str 

66 The file format detected ("DMol-3" or "TURBOMOLE"). 

67 atom_df : pd.DataFrame 

68 DataFrame containing atomic information with columns: 

69 - id: atom identifier (str) 

70 - x, y, z: Cartesian coordinates in Angstroms (float) 

71 - element: chemical element symbol (str) 

72 segment_df : pd.DataFrame 

73 DataFrame containing segment information with columns: 

74 - atom: associated atom number (int) 

75 - x, y, z: segment coordinates in Angstroms (float) 

76 - charge: segment charge (float) 

77 - area: segment surface area (float) 

78 volume : float 

79 Molecular cavity volume in cubic Angstroms. 

80 

81 Raises 

82 ------ 

83 ValueError 

84 If the file format is not recognized or does not contain the required 

85 COSMO sections. 

86 FileNotFoundError 

87 If the specified file does not exist. 

88 

89 Examples 

90 -------- 

91 Parse a TURBOMOLE COSMO file: 

92 

93 >>> from importlib.resources import files 

94 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo" 

95 >>> contents = path.read_text(encoding="utf-8", errors="replace") 

96 >>> fmt, atoms, segments, volume = parse_cosmo_file(contents) 

97 >>> print(fmt) 

98 TURBOMOLE 

99 >>> atoms.tail(3) 

100 id x y z element 

101 6 H3 0.6389 -1.8805 -0.1568 H 

102 7 H4 1.6297 -0.8236 0.6729 H 

103 8 H5 1.1806 1.3231 -0.4305 H 

104 >>> segments.tail(3) 

105 atom x y z charge area 

106 468 8 1.003395 2.214518 -1.389667 -0.002498 0.193147 

107 469 8 1.068201 0.923523 -1.695803 -0.002131 0.130985 

108 470 8 2.133636 1.152865 0.489697 -0.001817 0.145681 

109 >>> volume 

110 80.07160... 

111 

112 Parse a DMol-3 COSMO file: 

113 

114 >>> path = files("cosmolayer.data") / "NCCO.cosmo" 

115 >>> contents = path.read_text(encoding="utf-8", errors="replace") 

116 >>> fmt, atoms, segments, volume = parse_cosmo_file(contents) 

117 >>> print(fmt) 

118 DMol-3 

119 >>> len(atoms) 

120 11 

121 >>> len(segments) 

122 429 

123 >>> volume 

124 86.10187... 

125 """ 

126 module: ModuleType 

127 if "DMol3/COSMO Results" in contents: 

128 format = "DMol-3" 

129 module = dmol3 

130 elif "$segment_information" in contents and "$coord_car" in contents: 

131 format = "TURBOMOLE" 

132 module = turbomole 

133 else: 

134 raise ValueError( 

135 "Could not parse COSMO file contents. Supported formats: TURBOMOLE, DMol-3" 

136 ) 

137 return ( 

138 format, 

139 get_atom_dataframe(module, contents), 

140 get_segment_dataframe(module, contents), 

141 get_volume(module, contents), 

142 )