Coverage for cosmolayer / cosmosac / component.py: 93%
137 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-11 14:25 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-11 14:25 +0000
1import os
2from typing import Any, TextIO
4try:
5 from importlib.resources.abc import Traversable
6except (ImportError, AttributeError):
7 Traversable = Any # fallback when Traversable not available (e.g. Python < 3.9)
9import numpy as np
10import pandas as pd
11import periodictable as pt
12from numpy.typing import NDArray
14from ..parser import parse_cosmo_file
15from .constants import (
16 COSMO_SAC_2010_AVERAGING_RADIUS,
17 COSMO_SAC_2010_F_DECAY,
18 COSMO_SAC_2010_SIGMA_0,
19)
20from .segment_groups import NHB, OH, OT, SEGMENT_GROUPS
22COVALENT_FACTOR = 1.3 # Same as in RDKit
25class Component:
26 r"""Molecular component for the COSMO-SAC activity coefficient model.
28 Parameters
29 ----------
30 cosmo_string : str
31 Contents of a COSMO output file from quantum mechanical calculations.
33 Keyword Arguments
34 -----------------
35 min_sigma : float, optional
36 Minimum screening charge density in e/Ų. Default is -0.025 e/Ų.
37 max_sigma : float, optional
38 Maximum screening charge density in e/Ų. Default is 0.025 e/Ų.
39 num_points : int, optional
40 Number of discrete points in the sigma profile. Default is 51.
41 averaging_radius : float, optional
42 Effective radius for distance-weighted sigma averaging in Å.
43 Default is √(7.25 / π) Å :cite:`Bell2020`.
44 f_decay : float, optional
45 Decay factor for exponential distance weighting in the sigma averaging
46 procedure. Default is 3.57 :cite:`Bell2020`.
47 sigma_0 : float or None, optional
48 Standard deviation of the Gaussian probability of a segment to form a hydrogen
49 bond in e/Ų. Set to ``None`` to disable hydrogen-bond splitting (all
50 surface area is assigned to the NHB class).
51 Default is 0.007 e/Ų :cite:`Bell2020`.
52 merge_profiles : bool, optional
53 Whether to merge segment groups (NHB, OH, OT) into a single profile
54 when accessing :attr:`probabilities` and :attr:`sigma_profile`.
55 Default is False.
57 Raises
58 ------
59 ValueError
60 If the COSMO string is not in any supported format.
61 ValueError
62 If averaged charge densities fall outside the specified sigma range.
64 Examples
65 --------
66 >>> import numpy as np
67 >>> from importlib.resources import files
68 >>> from cosmolayer.cosmosac import Component
69 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"
70 >>> component = Component(path.read_text())
71 >>> component.area
72 97.34554...
73 >>> component.volume
74 80.07160...
76 When :attr:`merge_profiles` is True, :attr:`sigma_profile` is a single
77 merged profile:
79 >>> component = Component(path.read_text(), merge_profiles=True)
80 >>> sigma_profile = component.sigma_profile
81 >>> sigma_profile.shape
82 (51,)
83 >>> print(sum(sigma_profile))
84 97.34554...
86 When :attr:`merge_profiles` is False, :attr:`sigma_profile` is stacked
87 (NHB, OH, OT), shape (3, num_points):
89 >>> component = Component(path.read_text(), merge_profiles=False)
90 >>> stacked = component.sigma_profile
91 >>> stacked.shape
92 (3, 51)
93 >>> from cosmolayer.cosmosac.segment_groups import SEGMENT_GROUPS
94 >>> for i, s in enumerate(SEGMENT_GROUPS):
95 ... print(s, sum(stacked[i]))
96 NHB 72.31802...
97 OH 12.25732...
98 OT 12.77019...
100 Plotting the sigma profiles (stacked, :attr:`merge_profiles` is False):
103 .. plot::
104 :context: close-figs
106 >>> from importlib.resources import files
107 >>> from cosmolayer.cosmosac import Component
108 >>> from cosmolayer.cosmosac.segment_groups import SEGMENT_GROUPS
109 >>> from matplotlib import pyplot as plt
110 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"
111 >>> component = Component(path.read_text(), merge_profiles=False)
112 >>> fig, ax = plt.subplots(figsize=(8, 4))
113 >>> grid = component.sigma_grid
114 >>> for i, label in enumerate(SEGMENT_GROUPS):
115 ... _ = ax.plot(grid, component.sigma_profile[i], label=label)
116 >>> _ = ax.set_xlabel("Charge density (e/Ų)")
117 >>> _ = ax.set_ylabel("Surface area contribution (Ų)")
118 >>> _ = ax.legend()
119 >>> fig.tight_layout()
121 Plotting the segment-type probabilities:
123 .. plot::
124 :context: close-figs
126 >>> from importlib.resources import files
127 >>> from cosmolayer.cosmosac import Component
128 >>> from matplotlib import pyplot as plt
129 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"
130 >>> component = Component(path.read_text())
131 >>> fig, ax = plt.subplots(figsize=(8, 4))
132 >>> p = component.probabilities
133 >>> _ = ax.bar(range(len(p)), p)
134 >>> _ = ax.set_xlabel("Segment type index")
135 >>> _ = ax.set_ylabel("Probability")
136 >>> fig.tight_layout()
137 """
139 def __init__( # noqa: PLR0913
140 self,
141 cosmo_string: str,
142 *,
143 min_sigma: float = -0.025, # e/Ų
144 max_sigma: float = 0.025, # e/Ų
145 num_points: int = 51,
146 averaging_radius: float = COSMO_SAC_2010_AVERAGING_RADIUS, # Å
147 f_decay: float = COSMO_SAC_2010_F_DECAY,
148 sigma_0: float | None = COSMO_SAC_2010_SIGMA_0, # e/Ų
149 merge_profiles: bool = False,
150 ):
151 self._min_sigma = min_sigma
152 self._grid = np.linspace(min_sigma, max_sigma, num_points)
153 self._bin_width = (max_sigma - min_sigma) / (num_points - 1)
155 self._averaging_radius = averaging_radius
156 self._f_decay = f_decay
157 self._sigma_0 = sigma_0
158 self._merge_profiles = merge_profiles
160 self._format, self._atom_data, self._segment_data, self._volume = (
161 parse_cosmo_file(cosmo_string)
162 )
164 sigmas, averaged_sigmas = self._average_sigmas()
165 if (averaged_sigmas < min_sigma).any() or (averaged_sigmas > max_sigma).any():
166 raise ValueError("Averaged charge densities out of range.")
167 self._segment_data["sigma"] = sigmas
168 self._segment_data["sigma_avg"] = averaged_sigmas
170 self._bonds = self._detect_bonds()
171 self._area = float(self._segment_data["area"].sum())
172 self._sigma_profiles = self._compute_sigma_profiles(averaged_sigmas)
174 def __repr__(self) -> str:
175 num_atoms = len(self._atom_data)
176 num_segments = len(self._segment_data)
177 return f"Component({num_atoms} atoms, {num_segments} segments)"
179 @staticmethod
180 def _get_covalent_radius(element: str) -> float:
181 """Get scaled covalent radius for bond detection.
183 Parameters
184 ----------
185 element : str
186 Chemical element symbol.
188 Returns
189 -------
190 float
191 Covalent radius in Å, scaled by factor 1.3.
192 """
193 radius: float = float(pt.elements.symbol(element).covalent_radius)
194 return COVALENT_FACTOR * radius
196 def _detect_bonds(self) -> list[tuple[int, int]]:
197 """Determines bonds from interatomic distances."""
198 df = self._atom_data
199 coords = df[["x", "y", "z"]].values
200 distances = np.sqrt(np.square(coords[:, None, :] - coords).sum(axis=-1))
201 radii = df["element"].apply(self._get_covalent_radius).values
202 adjacency_matrix = distances < (radii[:, None] + radii[None, :])
203 bond_indices = np.nonzero(np.triu(adjacency_matrix, k=1))
204 return [(int(i), int(j)) for i, j in zip(*bond_indices, strict=True)]
206 def _get_hydrogen_bonding_classes(self) -> pd.Series:
207 """Classify atoms into hydrogen bonding types (OH, OT, NHB).
209 Assigns hydrogen bonding classes: OH (O-H bonds), OT (N-H, F-H bonds or
210 isolated N/F/O), and NHB (all other atoms).
212 Returns
213 -------
214 pd.Series
215 Hydrogen bonding class label for each atom.
216 """
217 elements = self._atom_data["element"]
218 hb_class = elements.apply(
219 lambda element: OT if element in ["N", "F", "O"] else NHB
220 )
221 for i, j in self._bonds:
222 elements_ij = set(elements.iloc[[i, j]])
223 if elements_ij in [{"O", "H"}, {"N", "H"}, {"F", "H"}]:
224 hb_class.at[i] = hb_class.at[j] = OH if "O" in elements_ij else OT
225 return hb_class
227 def _average_sigmas(self) -> tuple[NDArray[np.float64], NDArray[np.float64]]:
228 """Apply distance-weighted averaging to segment charge densities.
230 Smooths raw screening charge densities (σ = q/A) using exponentially
231 decaying weights based on distances between segment centroids.
233 Returns
234 -------
235 np.ndarray
236 Averaged screening charge density for each segment in e/Å.
237 """
238 sigmas = self._segment_data["charge"].values / self._segment_data["area"].values
239 coords = self._segment_data[["x", "y", "z"]].values
240 squared_distances = np.square(coords[:, None, :] - coords).sum(axis=-1)
241 squared_radii = self._segment_data["area"].values / np.pi
243 sums = squared_radii + self._averaging_radius**2
244 prods = squared_radii * self._averaging_radius**2
245 weights = np.exp(-self._f_decay * squared_distances / sums) * prods / sums
246 averaged_sigmas: NDArray[np.float64] = np.sum(
247 weights * sigmas, axis=1
248 ) / np.sum(weights, axis=1)
250 return sigmas, averaged_sigmas
252 def _compute_sigma_profile(
253 self, averaged_sigmas: NDArray[np.float64], areas: NDArray[np.float64]
254 ) -> NDArray[np.float64]:
255 """Bin segment areas by charge density using linear interpolation.
257 Parameters
258 ----------
259 averaged_sigmas : np.ndarray
260 Averaged screening charge densities in e/Ų.
261 areas : np.ndarray
262 Surface areas in Ų.
264 Returns
265 -------
266 np.ndarray
267 Sigma profile histogram. Shape: (num_points,).
268 """
269 profile = np.zeros_like(self._grid)
270 max_index = len(self._grid) - 2 # index + 1 must be valid
271 for sigma, area in zip(averaged_sigmas, areas, strict=True):
272 index = int((sigma - self._min_sigma) / self._bin_width)
273 index = min(max(0, index), max_index)
274 weight = (self._grid[index + 1] - sigma) / self._bin_width
275 profile[index] += area * weight
276 profile[index + 1] += area * (1.0 - weight)
277 return profile
279 def _compute_sigma_profiles(
280 self, averaged_sigmas: NDArray[np.float64]
281 ) -> dict[str, NDArray[np.float64]]:
282 """Compute sigma profiles separated by hydrogen bonding type.
284 Classifies segments by H-bonding type (OH, OT, NHB) based on parent atom
285 and sigma sign, then applies a Gaussian probability weighting function.
287 Parameters
288 ----------
289 averaged_sigmas : np.ndarray
290 Averaged screening charge densities for all segments in e/Ų.
292 Returns
293 -------
294 dict
295 Dictionary with keys "NHB", "OH", "OT" and values as sigma profile
296 arrays. Each profile has shape (num_points,).
297 """
298 atom_indices = self._segment_data["atom"]
299 element = atom_indices.map(self._atom_data["element"])
300 is_hb_candidate = (element == "H") == (averaged_sigmas < 0.0)
301 hb_class = atom_indices.map(self._get_hydrogen_bonding_classes())
302 mask_oh = is_hb_candidate & (hb_class == OH)
303 mask_ot = is_hb_candidate & (hb_class == OT)
304 mask_nhb = np.logical_not(mask_oh | mask_ot)
305 areas = self._segment_data["area"].values
306 profile_oh = self._compute_sigma_profile(
307 averaged_sigmas[mask_oh], areas[mask_oh]
308 )
309 profile_ot = self._compute_sigma_profile(
310 averaged_sigmas[mask_ot], areas[mask_ot]
311 )
312 profile_nhb = self._compute_sigma_profile(
313 averaged_sigmas[mask_nhb], areas[mask_nhb]
314 )
315 if self._sigma_0 is None:
316 hb_probability = np.zeros_like(self._grid)
317 else:
318 hb_probability = 1.0 - np.exp(-0.5 * (self._grid / self._sigma_0) ** 2)
319 return {
320 NHB: profile_nhb + (profile_oh + profile_ot) * (1.0 - hb_probability),
321 OH: profile_oh * hb_probability,
322 OT: profile_ot * hb_probability,
323 }
325 @classmethod
326 def from_text_reader(cls, text_reader: TextIO) -> "Component":
327 """Create a component from a text reader.
329 .. note::
330 This method creates a component with default parameters.
332 Parameters
333 ----------
334 text_reader : io.TextIO
335 Text reader to read the COSMO output file from.
337 Returns
338 -------
339 Component
340 Component object.
342 Examples
343 --------
344 >>> from importlib.resources import files
345 >>> from cosmolayer.cosmosac import Component
346 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"
347 >>> with open(path, encoding="utf-8") as file:
348 ... component = Component.from_text_reader(file)
349 >>> component.area, component.volume
350 (97.34554..., 80.07160...)
352 """
353 return cls(text_reader.read())
355 @classmethod
356 def from_file(cls, file_path: os.PathLike[str] | Traversable) -> "Component":
357 """Create a component from a COSMO output file.
359 .. note::
360 This method creates a component with default parameters.
362 Parameters
363 ----------
364 file_path : path-like or Traversable
365 Path to the COSMO output file.
367 Returns
368 -------
369 Component
370 Component object.
372 Examples
373 --------
374 >>> from importlib.resources import files
375 >>> from cosmolayer.cosmosac import Component
376 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"
377 >>> component = Component.from_file(path)
378 >>> component.area, component.volume
379 (97.34554..., 80.07160...)
381 """
382 if isinstance(file_path, os.PathLike):
383 with open(file_path, encoding="utf-8") as file:
384 return cls.from_text_reader(file)
385 with file_path.open("r", encoding="utf-8") as file:
386 return cls.from_text_reader(file)
388 @property
389 def area(self) -> float:
390 """Cavity surface area of the molecule in Ų.
392 Sum of the areas of all segments from the COSMO calculation.
393 """
394 return self._area
396 @property
397 def volume(self) -> float:
398 """Cavity volume of the molecule in ų."""
399 return self._volume
401 @property
402 def cosmo_format(self) -> str:
403 """COSMO file format that was parsed.
405 Either "TURBOMOLE" or "DMol-3".
407 Examples
408 --------
409 >>> from importlib.resources import files
410 >>> from cosmolayer.cosmosac import Component
411 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"
412 >>> component = Component.from_file(path)
413 >>> component.cosmo_format
414 'TURBOMOLE'
415 >>> path = files("cosmolayer.data") / "NCCO.cosmo"
416 >>> component = Component.from_file(path)
417 >>> component.cosmo_format
418 'DMol-3'
419 """
420 return self._format
422 @property
423 def atom_data(self) -> pd.DataFrame:
424 """Atom data from the parsed COSMO file.
426 DataFrame columns: ``id`` (atom identifier), ``x``, ``y``, ``z`` (Cartesian
427 coordinates in Å), ``element`` (chemical symbol).
429 Returns
430 -------
431 pd.DataFrame
432 One row per atom.
434 Examples
435 --------
436 >>> from importlib.resources import files
437 >>> from cosmolayer.cosmosac import Component
438 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"
439 >>> component = Component(path.read_text())
440 >>> component.atom_data
441 id x y z element
442 0 C1 -1.4... -0.2... 0.0... C
443 1 C2 -0.0... 0.0... 0.0... C
444 2 N1 0.9... -0.9... -0.0... N
445 ...
446 8 H5 1.1... 1.3... -0.4... H
448 """
449 return self._atom_data
451 @property
452 def segment_data(self) -> pd.DataFrame:
453 """Segment (surface tile) data from the COSMO calculation.
455 DataFrame columns: ``atom`` (parent atom index), ``x``, ``y``, ``z``
456 (segment centroid coordinates in Å), ``charge`` (e), ``area`` (Ų),
457 ``sigma`` (screening charge density in e/Ų), ``sigma_avg`` (smoothed
458 density in e/Ų).
460 Returns
461 -------
462 pd.DataFrame
463 One row per segment.
465 Examples
466 --------
467 >>> from importlib.resources import files
468 >>> from cosmolayer.cosmosac import Component
469 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"
470 >>> component = Component(path.read_text())
471 >>> component.segment_data
472 atom x y ... area sigma sigma_avg
473 0 0 -0.867... -1.196... ... 0.206... 0.010... 0.007...
474 1 0 -1.504... -1.502... ... 0.218... 0.007... 0.005...
475 ...
476 470 8 2.133... 1.152... ... 0.145... -0.012... -0.009...
477 <BLANKLINE>
478 [471 rows x 8 columns]
480 """
481 return self._segment_data
483 @property
484 def bonds(self) -> list[tuple[int, int]]:
485 """Bonds between atoms, inferred from interatomic distances.
487 Returns
488 -------
489 list[tuple[int, int]]
490 Pairs of atom indices (i, j) for each bond.
492 Examples
493 --------
494 >>> from importlib.resources import files
495 >>> from cosmolayer.cosmosac import Component
496 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"
497 >>> component = Component(path.read_text())
498 >>> component.bonds
499 [(0, 1), (0, 4), (0, 5), ... (2, 7), (3, 8)]
500 """
501 return self._bonds
503 @property
504 def sigma_grid(self) -> NDArray[np.float64]:
505 """Get the screening charge density grid in e/Ų.
507 Returns
508 -------
509 np.ndarray
510 Charge density vector in e/Ų.
512 Examples
513 --------
514 >>> from importlib.resources import files
515 >>> from cosmolayer.cosmosac import Component
516 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"
517 >>> component = Component(path.read_text())
518 >>> component.sigma_grid
519 array([-0.025, -0.024, -0.023, ... 0.023, 0.024, 0.025])
520 """
521 return self._grid
523 @property
524 def merge_profiles(self) -> bool:
525 """Whether segment groups (NHB, OH, OT) are merged for :attr:`sigma_profile`
526 and :attr:`probabilities`.
528 Returns
529 -------
530 bool
531 """
532 return self._merge_profiles
534 @property
535 def sigma_profile(self) -> NDArray[np.float64]:
536 """Surface area distribution over screening charge density (sigma), in Ų.
538 Shape and layout depend on :attr:`merge_profiles`. If True, returns a single
539 merged profile (sum over NHB, OH, OT), shape ``(num_points,)``. If False,
540 returns stacked segment profiles in SEGMENT_GROUPS order (NHB, OH, OT),
541 shape ``(3, num_points)``; ``sigma_profile[0]`` is NHB, ``[1]`` is OH,
542 ``[2]`` is OT.
544 Returns
545 -------
546 np.ndarray
547 Sigma profile(s). Units: Ų.
548 """
549 if self._merge_profiles:
550 total_profile: NDArray[np.float64] = np.sum(
551 list(self._sigma_profiles.values()), axis=0
552 )
553 return total_profile
554 return np.stack([self._sigma_profiles[seg] for seg in SEGMENT_GROUPS], axis=0)
556 @property
557 def probabilities(self) -> NDArray[np.float64]:
558 """Normalized segment-type probability distribution (sigma profile / area).
560 Segment types are defined by hydrogen bonding class (NHB, OH, OT) and
561 averaged charge density. Shape is ``(num_points,)`` if :attr:`merge_profiles`
562 is True, otherwise ``(3*num_points,)``.
564 Returns
565 -------
566 np.ndarray
567 Probabilities summing to 1.0.
569 Examples
570 --------
571 >>> import numpy as np
572 >>> from importlib.resources import files
573 >>> from cosmolayer.cosmosac import Component
574 >>> cosmo_string = (files("cosmolayer.data") / "C=C(N)O.cosmo").read_text()
575 >>> component = Component(cosmo_string, merge_profiles=True)
576 >>> probabilities = component.probabilities
577 >>> probabilities.shape
578 (51,)
579 >>> bool(np.all(probabilities <= 1))
580 True
581 >>> bool(np.isclose(probabilities.sum(), 1.0))
582 True
583 >>> component = Component(cosmo_string, merge_profiles=False)
584 >>> probabilities_full = component.probabilities
585 >>> probabilities_full.shape
586 (153,)
587 >>> bool(np.isclose(probabilities_full.sum(), 1.0))
588 True
589 """
590 profiles = [self._sigma_profiles[segtype] for segtype in SEGMENT_GROUPS]
591 probabilities: NDArray[np.float64] = (
592 np.sum(profiles, axis=0)
593 if self._merge_profiles
594 else np.concatenate(profiles)
595 ) / self._area
596 return probabilities