Coverage for cosmolayer / cosmosac / datapoint.py: 100%
24 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-11 14:25 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-11 14:25 +0000
1"""
2.. module:: cosmolayer.cosmosac.datapoint
3 :synopsis: Single datapoint for a COSMO-SAC mixture.
5.. moduleauthor:: Charlles Abreu <craabreu@gmail.com>
6"""
8from __future__ import annotations
10import functools
11import os
12from collections.abc import Sequence
14import numpy as np
15import pandas as pd
17from ..cosmodata import MixtureDatapoint, NumpyArray1D
18from .model import CosmoSac2010Model, Model
21@functools.cache
22def _get_component_data(
23 cosmo_file_path: str,
24 model: Model,
25) -> tuple[float, float, NumpyArray1D]:
26 """Load area, volume, and sigma-profile probabilities for one component.
28 Results are cached by (path, model) so repeated loads of the same file
29 with the same model are cheap.
31 Parameters
32 ----------
33 cosmo_file_path : str
34 Path to a COSMO output file (e.g. ``.cosmo``).
35 model : :class:`~cosmolayer.cosmosac.model.Model`
36 Model used to create the component and compute probabilities.
38 Returns
39 -------
40 tuple[float, float, NumpyArray1D]
41 (area, volume, probabilities) for the component.
42 """
43 with open(cosmo_file_path) as f:
44 component = model.create_component(f.read())
45 probabilities = component.probabilities
46 probabilities.flags.writeable = False
47 return component.area, component.volume, probabilities
50class CosmoSacMixtureDatapoint(MixtureDatapoint):
51 """Subclass of :class:`MixtureDatapoint` for COSMO-SAC mixtures.
53 Parameters
54 ----------
55 cosmo_files : Sequence[os.PathLike[str]]
56 Paths to COSMO files, one per component. Order must match
57 ``mole_fractions`` and rows of ``component_targets``.
58 mole_fractions : Sequence[float]
59 Mole fractions for each component (should sum to 1).
60 temperature : float
61 Temperature in Kelvin.
62 targets : Sequence[float] | None, optional
63 Target values for the mixture (e.g. activity coefficients, excess
64 properties). Length defines the number of training targets. If ``None``,
65 no training targets are stored.
66 model: :class:`~cosmolayer.cosmosac.model.Model`
67 COSMO-SAC model used to load components and compute probabilities.
69 Raises
70 ------
71 ValueError
72 If the number of mole fractions does not match the number of COSMO files.
74 Examples
75 --------
76 Build a binary mixture datapoint from packaged COSMO files and read inputs
77 and targets:
79 >>> from importlib.resources import files
80 >>> from cosmolayer.cosmosac import CosmoSac2002Model
81 >>> from cosmolayer.cosmosac.datapoint import CosmoSacMixtureDatapoint
82 >>> data = files("cosmolayer.data")
83 >>> cosmo_files = [data / "C=C(N)O.cosmo", data / "NCCO.cosmo"]
84 >>> mole_fractions = [0.5, 0.5]
85 >>> temperature = 298.15
86 >>> targets = [1.2]
87 >>> dp = CosmoSacMixtureDatapoint(
88 ... cosmo_files, mole_fractions, temperature,
89 ... targets, CosmoSac2002Model,
90 ... )
91 >>> dp.temperature
92 298.15
93 >>> dp.mole_fractions.shape
94 (2,)
95 >>> dp.areas.shape, dp.volumes.shape
96 ((2,), (2,))
97 >>> dp.probabilities.shape
98 (2, 51)
99 >>> dp.targets.shape
100 (1,)
101 >>> dp.num_components, dp.num_segment_types
102 (2, 51)
103 >>> dp.num_targets
104 1
105 """
107 def __init__( # noqa: PLR0913
108 self,
109 cosmo_files: Sequence[os.PathLike[str]],
110 mole_fractions: Sequence[float],
111 temperature: float,
112 targets: Sequence[float] | None = None,
113 model: Model = CosmoSac2010Model,
114 ):
115 """Build a mixture datapoint from COSMO files and optional targets.
117 Parameters
118 ----------
119 cosmo_files : Sequence[os.PathLike[str]]
120 Paths to COSMO files, one per component.
121 mole_fractions : Sequence[float]
122 Mole fractions for each component.
123 temperature : float
124 Temperature in Kelvin.
125 targets : Sequence[float] | None, optional
126 Optional training targets. If ``None``, no training targets are stored.
127 model : Model, optional
128 COSMO-SAC model used to parse component data. Default is
129 :data:`CosmoSac2010Model`.
130 """
131 if targets is None:
132 targets = []
134 areas, volumes, probabilities = zip(
135 *[_get_component_data(str(path), model) for path in cosmo_files],
136 strict=True,
137 )
139 super().__init__(
140 temperature=temperature,
141 mole_fractions=np.array(mole_fractions),
142 areas=np.array(areas),
143 volumes=np.array(volumes),
144 probabilities=np.stack(probabilities, axis=0),
145 targets=np.array(targets),
146 )
148 @classmethod
149 def from_series(
150 cls,
151 series: pd.Series,
152 cosmo_files: Sequence[str | os.PathLike[str]],
153 mole_fractions: Sequence[str | float],
154 temperature: str | float,
155 targets: Sequence[str | float],
156 model: Model = CosmoSac2010Model,
157 ) -> CosmoSacMixtureDatapoint:
158 """Build a mixture datapoint from one row of a DataFrame (as a Series).
160 This method is useful for building
161 :class:`~cosmolayer.MixtureTrainingDataset`
162 instances from a pandas DataFrame using :meth:`pandas.DataFrame.apply`.
164 Column specifiers can be column names (strings), in which case values
165 are taken from ``series[key]``, or literal numbers or paths (floats or
166 os.PathLike), which are converted to strings and used as-is. This allows
167 mixing DataFrame columns with fixed values (e.g. same solvent, same
168 temperature, or same mole fractions for all datapoints).
170 Examples
171 --------
172 >>> from importlib.resources import files
173 >>> from pathlib import Path
174 >>> data = Path(str(files("cosmolayer") / "data"))
175 >>> row = pd.Series(
176 ... {
177 ... "file_a": data / "C=C(N)O.cosmo",
178 ... "target_1": 1.2,
179 ... }
180 ... )
181 >>> point = CosmoSacMixtureDatapoint.from_series(
182 ... series=row,
183 ... cosmo_files=["file_a", data / "NCCO.cosmo"],
184 ... mole_fractions=[0.25, 0.75],
185 ... temperature=298.15,
186 ... targets=["target_1"],
187 ... )
188 >>> point.num_components, point.num_targets
189 (2, 1)
190 >>> point.mole_fractions.tolist()
191 [0.25, 0.75]
193 Parameters
194 ----------
195 series : pd.Series
196 One row of a DataFrame (e.g. from ``df.iloc[i]`` or ``df.iterrows()``).
197 cosmo_files : Sequence[str | pathlib.Path]
198 For each component, either a column name (str) or a path to a COSMO
199 file (pathlib.Path).
200 mole_fractions : Sequence[str | float]
201 For each component, either a column name (str) or a literal mole
202 fraction (float). Values should sum to 1.
203 temperature : str | float
204 Column name for temperature in Kelvin, or a literal temperature.
205 targets : Sequence[str | float]
206 For each target, either a column name (str) or a literal value (float).
207 model : Model, optional
208 COSMO-SAC model used to load components. Default is
209 :class:`~cosmolayer.cosmosac.model.CosmoSac2010Model`.
211 Returns
212 -------
213 CosmoSacMixtureDatapoint
214 A datapoint built from the series values.
215 """
217 return cls(
218 cosmo_files=[
219 series[cosmo_file] if isinstance(cosmo_file, str) else cosmo_file
220 for cosmo_file in cosmo_files
221 ],
222 mole_fractions=[
223 series[fraction] if isinstance(fraction, str) else fraction
224 for fraction in mole_fractions
225 ],
226 temperature=(
227 series[temperature] if isinstance(temperature, str) else temperature
228 ),
229 targets=[
230 series[target] if isinstance(target, str) else target
231 for target in targets
232 ],
233 model=model,
234 )