Coverage for cosmolayer / cosmosac / datapoint.py: 100%

24 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-11 14:25 +0000

1""" 

2.. module:: cosmolayer.cosmosac.datapoint 

3 :synopsis: Single datapoint for a COSMO-SAC mixture. 

4 

5.. moduleauthor:: Charlles Abreu <craabreu@gmail.com> 

6""" 

7 

8from __future__ import annotations 

9 

10import functools 

11import os 

12from collections.abc import Sequence 

13 

14import numpy as np 

15import pandas as pd 

16 

17from ..cosmodata import MixtureDatapoint, NumpyArray1D 

18from .model import CosmoSac2010Model, Model 

19 

20 

21@functools.cache 

22def _get_component_data( 

23 cosmo_file_path: str, 

24 model: Model, 

25) -> tuple[float, float, NumpyArray1D]: 

26 """Load area, volume, and sigma-profile probabilities for one component. 

27 

28 Results are cached by (path, model) so repeated loads of the same file 

29 with the same model are cheap. 

30 

31 Parameters 

32 ---------- 

33 cosmo_file_path : str 

34 Path to a COSMO output file (e.g. ``.cosmo``). 

35 model : :class:`~cosmolayer.cosmosac.model.Model` 

36 Model used to create the component and compute probabilities. 

37 

38 Returns 

39 ------- 

40 tuple[float, float, NumpyArray1D] 

41 (area, volume, probabilities) for the component. 

42 """ 

43 with open(cosmo_file_path) as f: 

44 component = model.create_component(f.read()) 

45 probabilities = component.probabilities 

46 probabilities.flags.writeable = False 

47 return component.area, component.volume, probabilities 

48 

49 

50class CosmoSacMixtureDatapoint(MixtureDatapoint): 

51 """Subclass of :class:`MixtureDatapoint` for COSMO-SAC mixtures. 

52 

53 Parameters 

54 ---------- 

55 cosmo_files : Sequence[os.PathLike[str]] 

56 Paths to COSMO files, one per component. Order must match 

57 ``mole_fractions`` and rows of ``component_targets``. 

58 mole_fractions : Sequence[float] 

59 Mole fractions for each component (should sum to 1). 

60 temperature : float 

61 Temperature in Kelvin. 

62 targets : Sequence[float] | None, optional 

63 Target values for the mixture (e.g. activity coefficients, excess 

64 properties). Length defines the number of training targets. If ``None``, 

65 no training targets are stored. 

66 model: :class:`~cosmolayer.cosmosac.model.Model` 

67 COSMO-SAC model used to load components and compute probabilities. 

68 

69 Raises 

70 ------ 

71 ValueError 

72 If the number of mole fractions does not match the number of COSMO files. 

73 

74 Examples 

75 -------- 

76 Build a binary mixture datapoint from packaged COSMO files and read inputs 

77 and targets: 

78 

79 >>> from importlib.resources import files 

80 >>> from cosmolayer.cosmosac import CosmoSac2002Model 

81 >>> from cosmolayer.cosmosac.datapoint import CosmoSacMixtureDatapoint 

82 >>> data = files("cosmolayer.data") 

83 >>> cosmo_files = [data / "C=C(N)O.cosmo", data / "NCCO.cosmo"] 

84 >>> mole_fractions = [0.5, 0.5] 

85 >>> temperature = 298.15 

86 >>> targets = [1.2] 

87 >>> dp = CosmoSacMixtureDatapoint( 

88 ... cosmo_files, mole_fractions, temperature, 

89 ... targets, CosmoSac2002Model, 

90 ... ) 

91 >>> dp.temperature 

92 298.15 

93 >>> dp.mole_fractions.shape 

94 (2,) 

95 >>> dp.areas.shape, dp.volumes.shape 

96 ((2,), (2,)) 

97 >>> dp.probabilities.shape 

98 (2, 51) 

99 >>> dp.targets.shape 

100 (1,) 

101 >>> dp.num_components, dp.num_segment_types 

102 (2, 51) 

103 >>> dp.num_targets 

104 1 

105 """ 

106 

107 def __init__( # noqa: PLR0913 

108 self, 

109 cosmo_files: Sequence[os.PathLike[str]], 

110 mole_fractions: Sequence[float], 

111 temperature: float, 

112 targets: Sequence[float] | None = None, 

113 model: Model = CosmoSac2010Model, 

114 ): 

115 """Build a mixture datapoint from COSMO files and optional targets. 

116 

117 Parameters 

118 ---------- 

119 cosmo_files : Sequence[os.PathLike[str]] 

120 Paths to COSMO files, one per component. 

121 mole_fractions : Sequence[float] 

122 Mole fractions for each component. 

123 temperature : float 

124 Temperature in Kelvin. 

125 targets : Sequence[float] | None, optional 

126 Optional training targets. If ``None``, no training targets are stored. 

127 model : Model, optional 

128 COSMO-SAC model used to parse component data. Default is 

129 :data:`CosmoSac2010Model`. 

130 """ 

131 if targets is None: 

132 targets = [] 

133 

134 areas, volumes, probabilities = zip( 

135 *[_get_component_data(str(path), model) for path in cosmo_files], 

136 strict=True, 

137 ) 

138 

139 super().__init__( 

140 temperature=temperature, 

141 mole_fractions=np.array(mole_fractions), 

142 areas=np.array(areas), 

143 volumes=np.array(volumes), 

144 probabilities=np.stack(probabilities, axis=0), 

145 targets=np.array(targets), 

146 ) 

147 

148 @classmethod 

149 def from_series( 

150 cls, 

151 series: pd.Series, 

152 cosmo_files: Sequence[str | os.PathLike[str]], 

153 mole_fractions: Sequence[str | float], 

154 temperature: str | float, 

155 targets: Sequence[str | float], 

156 model: Model = CosmoSac2010Model, 

157 ) -> CosmoSacMixtureDatapoint: 

158 """Build a mixture datapoint from one row of a DataFrame (as a Series). 

159 

160 This method is useful for building 

161 :class:`~cosmolayer.MixtureTrainingDataset` 

162 instances from a pandas DataFrame using :meth:`pandas.DataFrame.apply`. 

163 

164 Column specifiers can be column names (strings), in which case values 

165 are taken from ``series[key]``, or literal numbers or paths (floats or 

166 os.PathLike), which are converted to strings and used as-is. This allows 

167 mixing DataFrame columns with fixed values (e.g. same solvent, same 

168 temperature, or same mole fractions for all datapoints). 

169 

170 Examples 

171 -------- 

172 >>> from importlib.resources import files 

173 >>> from pathlib import Path 

174 >>> data = Path(str(files("cosmolayer") / "data")) 

175 >>> row = pd.Series( 

176 ... { 

177 ... "file_a": data / "C=C(N)O.cosmo", 

178 ... "target_1": 1.2, 

179 ... } 

180 ... ) 

181 >>> point = CosmoSacMixtureDatapoint.from_series( 

182 ... series=row, 

183 ... cosmo_files=["file_a", data / "NCCO.cosmo"], 

184 ... mole_fractions=[0.25, 0.75], 

185 ... temperature=298.15, 

186 ... targets=["target_1"], 

187 ... ) 

188 >>> point.num_components, point.num_targets 

189 (2, 1) 

190 >>> point.mole_fractions.tolist() 

191 [0.25, 0.75] 

192 

193 Parameters 

194 ---------- 

195 series : pd.Series 

196 One row of a DataFrame (e.g. from ``df.iloc[i]`` or ``df.iterrows()``). 

197 cosmo_files : Sequence[str | pathlib.Path] 

198 For each component, either a column name (str) or a path to a COSMO 

199 file (pathlib.Path). 

200 mole_fractions : Sequence[str | float] 

201 For each component, either a column name (str) or a literal mole 

202 fraction (float). Values should sum to 1. 

203 temperature : str | float 

204 Column name for temperature in Kelvin, or a literal temperature. 

205 targets : Sequence[str | float] 

206 For each target, either a column name (str) or a literal value (float). 

207 model : Model, optional 

208 COSMO-SAC model used to load components. Default is 

209 :class:`~cosmolayer.cosmosac.model.CosmoSac2010Model`. 

210 

211 Returns 

212 ------- 

213 CosmoSacMixtureDatapoint 

214 A datapoint built from the series values. 

215 """ 

216 

217 return cls( 

218 cosmo_files=[ 

219 series[cosmo_file] if isinstance(cosmo_file, str) else cosmo_file 

220 for cosmo_file in cosmo_files 

221 ], 

222 mole_fractions=[ 

223 series[fraction] if isinstance(fraction, str) else fraction 

224 for fraction in mole_fractions 

225 ], 

226 temperature=( 

227 series[temperature] if isinstance(temperature, str) else temperature 

228 ), 

229 targets=[ 

230 series[target] if isinstance(target, str) else target 

231 for target in targets 

232 ], 

233 model=model, 

234 )