Coverage for cosmolayer / cosmosac / component.py: 93%

137 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-11 14:25 +0000

1import os 

2from typing import Any, TextIO 

3 

4try: 

5 from importlib.resources.abc import Traversable 

6except (ImportError, AttributeError): 

7 Traversable = Any # fallback when Traversable not available (e.g. Python < 3.9) 

8 

9import numpy as np 

10import pandas as pd 

11import periodictable as pt 

12from numpy.typing import NDArray 

13 

14from ..parser import parse_cosmo_file 

15from .constants import ( 

16 COSMO_SAC_2010_AVERAGING_RADIUS, 

17 COSMO_SAC_2010_F_DECAY, 

18 COSMO_SAC_2010_SIGMA_0, 

19) 

20from .segment_groups import NHB, OH, OT, SEGMENT_GROUPS 

21 

22COVALENT_FACTOR = 1.3 # Same as in RDKit 

23 

24 

25class Component: 

26 r"""Molecular component for the COSMO-SAC activity coefficient model. 

27 

28 Parameters 

29 ---------- 

30 cosmo_string : str 

31 Contents of a COSMO output file from quantum mechanical calculations. 

32 

33 Keyword Arguments 

34 ----------------- 

35 min_sigma : float, optional 

36 Minimum screening charge density in e/Ų. Default is -0.025 e/Ų. 

37 max_sigma : float, optional 

38 Maximum screening charge density in e/Ų. Default is 0.025 e/Ų. 

39 num_points : int, optional 

40 Number of discrete points in the sigma profile. Default is 51. 

41 averaging_radius : float, optional 

42 Effective radius for distance-weighted sigma averaging in Å. 

43 Default is √(7.25 / π) Å :cite:`Bell2020`. 

44 f_decay : float, optional 

45 Decay factor for exponential distance weighting in the sigma averaging 

46 procedure. Default is 3.57 :cite:`Bell2020`. 

47 sigma_0 : float or None, optional 

48 Standard deviation of the Gaussian probability of a segment to form a hydrogen 

49 bond in e/Ų. Set to ``None`` to disable hydrogen-bond splitting (all 

50 surface area is assigned to the NHB class). 

51 Default is 0.007 e/Ų :cite:`Bell2020`. 

52 merge_profiles : bool, optional 

53 Whether to merge segment groups (NHB, OH, OT) into a single profile 

54 when accessing :attr:`probabilities` and :attr:`sigma_profile`. 

55 Default is False. 

56 

57 Raises 

58 ------ 

59 ValueError 

60 If the COSMO string is not in any supported format. 

61 ValueError 

62 If averaged charge densities fall outside the specified sigma range. 

63 

64 Examples 

65 -------- 

66 >>> import numpy as np 

67 >>> from importlib.resources import files 

68 >>> from cosmolayer.cosmosac import Component 

69 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo" 

70 >>> component = Component(path.read_text()) 

71 >>> component.area 

72 97.34554... 

73 >>> component.volume 

74 80.07160... 

75 

76 When :attr:`merge_profiles` is True, :attr:`sigma_profile` is a single 

77 merged profile: 

78 

79 >>> component = Component(path.read_text(), merge_profiles=True) 

80 >>> sigma_profile = component.sigma_profile 

81 >>> sigma_profile.shape 

82 (51,) 

83 >>> print(sum(sigma_profile)) 

84 97.34554... 

85 

86 When :attr:`merge_profiles` is False, :attr:`sigma_profile` is stacked 

87 (NHB, OH, OT), shape (3, num_points): 

88 

89 >>> component = Component(path.read_text(), merge_profiles=False) 

90 >>> stacked = component.sigma_profile 

91 >>> stacked.shape 

92 (3, 51) 

93 >>> from cosmolayer.cosmosac.segment_groups import SEGMENT_GROUPS 

94 >>> for i, s in enumerate(SEGMENT_GROUPS): 

95 ... print(s, sum(stacked[i])) 

96 NHB 72.31802... 

97 OH 12.25732... 

98 OT 12.77019... 

99 

100 Plotting the sigma profiles (stacked, :attr:`merge_profiles` is False): 

101 

102 

103 .. plot:: 

104 :context: close-figs 

105 

106 >>> from importlib.resources import files 

107 >>> from cosmolayer.cosmosac import Component 

108 >>> from cosmolayer.cosmosac.segment_groups import SEGMENT_GROUPS 

109 >>> from matplotlib import pyplot as plt 

110 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo" 

111 >>> component = Component(path.read_text(), merge_profiles=False) 

112 >>> fig, ax = plt.subplots(figsize=(8, 4)) 

113 >>> grid = component.sigma_grid 

114 >>> for i, label in enumerate(SEGMENT_GROUPS): 

115 ... _ = ax.plot(grid, component.sigma_profile[i], label=label) 

116 >>> _ = ax.set_xlabel("Charge density (e/Ų)") 

117 >>> _ = ax.set_ylabel("Surface area contribution (Ų)") 

118 >>> _ = ax.legend() 

119 >>> fig.tight_layout() 

120 

121 Plotting the segment-type probabilities: 

122 

123 .. plot:: 

124 :context: close-figs 

125 

126 >>> from importlib.resources import files 

127 >>> from cosmolayer.cosmosac import Component 

128 >>> from matplotlib import pyplot as plt 

129 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo" 

130 >>> component = Component(path.read_text()) 

131 >>> fig, ax = plt.subplots(figsize=(8, 4)) 

132 >>> p = component.probabilities 

133 >>> _ = ax.bar(range(len(p)), p) 

134 >>> _ = ax.set_xlabel("Segment type index") 

135 >>> _ = ax.set_ylabel("Probability") 

136 >>> fig.tight_layout() 

137 """ 

138 

139 def __init__( # noqa: PLR0913 

140 self, 

141 cosmo_string: str, 

142 *, 

143 min_sigma: float = -0.025, # e/Ų 

144 max_sigma: float = 0.025, # e/Ų 

145 num_points: int = 51, 

146 averaging_radius: float = COSMO_SAC_2010_AVERAGING_RADIUS, # Å 

147 f_decay: float = COSMO_SAC_2010_F_DECAY, 

148 sigma_0: float | None = COSMO_SAC_2010_SIGMA_0, # e/Ų 

149 merge_profiles: bool = False, 

150 ): 

151 self._min_sigma = min_sigma 

152 self._grid = np.linspace(min_sigma, max_sigma, num_points) 

153 self._bin_width = (max_sigma - min_sigma) / (num_points - 1) 

154 

155 self._averaging_radius = averaging_radius 

156 self._f_decay = f_decay 

157 self._sigma_0 = sigma_0 

158 self._merge_profiles = merge_profiles 

159 

160 self._format, self._atom_data, self._segment_data, self._volume = ( 

161 parse_cosmo_file(cosmo_string) 

162 ) 

163 

164 sigmas, averaged_sigmas = self._average_sigmas() 

165 if (averaged_sigmas < min_sigma).any() or (averaged_sigmas > max_sigma).any(): 

166 raise ValueError("Averaged charge densities out of range.") 

167 self._segment_data["sigma"] = sigmas 

168 self._segment_data["sigma_avg"] = averaged_sigmas 

169 

170 self._bonds = self._detect_bonds() 

171 self._area = float(self._segment_data["area"].sum()) 

172 self._sigma_profiles = self._compute_sigma_profiles(averaged_sigmas) 

173 

174 def __repr__(self) -> str: 

175 num_atoms = len(self._atom_data) 

176 num_segments = len(self._segment_data) 

177 return f"Component({num_atoms} atoms, {num_segments} segments)" 

178 

179 @staticmethod 

180 def _get_covalent_radius(element: str) -> float: 

181 """Get scaled covalent radius for bond detection. 

182 

183 Parameters 

184 ---------- 

185 element : str 

186 Chemical element symbol. 

187 

188 Returns 

189 ------- 

190 float 

191 Covalent radius in Å, scaled by factor 1.3. 

192 """ 

193 radius: float = float(pt.elements.symbol(element).covalent_radius) 

194 return COVALENT_FACTOR * radius 

195 

196 def _detect_bonds(self) -> list[tuple[int, int]]: 

197 """Determines bonds from interatomic distances.""" 

198 df = self._atom_data 

199 coords = df[["x", "y", "z"]].values 

200 distances = np.sqrt(np.square(coords[:, None, :] - coords).sum(axis=-1)) 

201 radii = df["element"].apply(self._get_covalent_radius).values 

202 adjacency_matrix = distances < (radii[:, None] + radii[None, :]) 

203 bond_indices = np.nonzero(np.triu(adjacency_matrix, k=1)) 

204 return [(int(i), int(j)) for i, j in zip(*bond_indices, strict=True)] 

205 

206 def _get_hydrogen_bonding_classes(self) -> pd.Series: 

207 """Classify atoms into hydrogen bonding types (OH, OT, NHB). 

208 

209 Assigns hydrogen bonding classes: OH (O-H bonds), OT (N-H, F-H bonds or 

210 isolated N/F/O), and NHB (all other atoms). 

211 

212 Returns 

213 ------- 

214 pd.Series 

215 Hydrogen bonding class label for each atom. 

216 """ 

217 elements = self._atom_data["element"] 

218 hb_class = elements.apply( 

219 lambda element: OT if element in ["N", "F", "O"] else NHB 

220 ) 

221 for i, j in self._bonds: 

222 elements_ij = set(elements.iloc[[i, j]]) 

223 if elements_ij in [{"O", "H"}, {"N", "H"}, {"F", "H"}]: 

224 hb_class.at[i] = hb_class.at[j] = OH if "O" in elements_ij else OT 

225 return hb_class 

226 

227 def _average_sigmas(self) -> tuple[NDArray[np.float64], NDArray[np.float64]]: 

228 """Apply distance-weighted averaging to segment charge densities. 

229 

230 Smooths raw screening charge densities (σ = q/A) using exponentially 

231 decaying weights based on distances between segment centroids. 

232 

233 Returns 

234 ------- 

235 np.ndarray 

236 Averaged screening charge density for each segment in e/Å. 

237 """ 

238 sigmas = self._segment_data["charge"].values / self._segment_data["area"].values 

239 coords = self._segment_data[["x", "y", "z"]].values 

240 squared_distances = np.square(coords[:, None, :] - coords).sum(axis=-1) 

241 squared_radii = self._segment_data["area"].values / np.pi 

242 

243 sums = squared_radii + self._averaging_radius**2 

244 prods = squared_radii * self._averaging_radius**2 

245 weights = np.exp(-self._f_decay * squared_distances / sums) * prods / sums 

246 averaged_sigmas: NDArray[np.float64] = np.sum( 

247 weights * sigmas, axis=1 

248 ) / np.sum(weights, axis=1) 

249 

250 return sigmas, averaged_sigmas 

251 

252 def _compute_sigma_profile( 

253 self, averaged_sigmas: NDArray[np.float64], areas: NDArray[np.float64] 

254 ) -> NDArray[np.float64]: 

255 """Bin segment areas by charge density using linear interpolation. 

256 

257 Parameters 

258 ---------- 

259 averaged_sigmas : np.ndarray 

260 Averaged screening charge densities in e/Ų. 

261 areas : np.ndarray 

262 Surface areas in Ų. 

263 

264 Returns 

265 ------- 

266 np.ndarray 

267 Sigma profile histogram. Shape: (num_points,). 

268 """ 

269 profile = np.zeros_like(self._grid) 

270 max_index = len(self._grid) - 2 # index + 1 must be valid 

271 for sigma, area in zip(averaged_sigmas, areas, strict=True): 

272 index = int((sigma - self._min_sigma) / self._bin_width) 

273 index = min(max(0, index), max_index) 

274 weight = (self._grid[index + 1] - sigma) / self._bin_width 

275 profile[index] += area * weight 

276 profile[index + 1] += area * (1.0 - weight) 

277 return profile 

278 

279 def _compute_sigma_profiles( 

280 self, averaged_sigmas: NDArray[np.float64] 

281 ) -> dict[str, NDArray[np.float64]]: 

282 """Compute sigma profiles separated by hydrogen bonding type. 

283 

284 Classifies segments by H-bonding type (OH, OT, NHB) based on parent atom 

285 and sigma sign, then applies a Gaussian probability weighting function. 

286 

287 Parameters 

288 ---------- 

289 averaged_sigmas : np.ndarray 

290 Averaged screening charge densities for all segments in e/Ų. 

291 

292 Returns 

293 ------- 

294 dict 

295 Dictionary with keys "NHB", "OH", "OT" and values as sigma profile 

296 arrays. Each profile has shape (num_points,). 

297 """ 

298 atom_indices = self._segment_data["atom"] 

299 element = atom_indices.map(self._atom_data["element"]) 

300 is_hb_candidate = (element == "H") == (averaged_sigmas < 0.0) 

301 hb_class = atom_indices.map(self._get_hydrogen_bonding_classes()) 

302 mask_oh = is_hb_candidate & (hb_class == OH) 

303 mask_ot = is_hb_candidate & (hb_class == OT) 

304 mask_nhb = np.logical_not(mask_oh | mask_ot) 

305 areas = self._segment_data["area"].values 

306 profile_oh = self._compute_sigma_profile( 

307 averaged_sigmas[mask_oh], areas[mask_oh] 

308 ) 

309 profile_ot = self._compute_sigma_profile( 

310 averaged_sigmas[mask_ot], areas[mask_ot] 

311 ) 

312 profile_nhb = self._compute_sigma_profile( 

313 averaged_sigmas[mask_nhb], areas[mask_nhb] 

314 ) 

315 if self._sigma_0 is None: 

316 hb_probability = np.zeros_like(self._grid) 

317 else: 

318 hb_probability = 1.0 - np.exp(-0.5 * (self._grid / self._sigma_0) ** 2) 

319 return { 

320 NHB: profile_nhb + (profile_oh + profile_ot) * (1.0 - hb_probability), 

321 OH: profile_oh * hb_probability, 

322 OT: profile_ot * hb_probability, 

323 } 

324 

325 @classmethod 

326 def from_text_reader(cls, text_reader: TextIO) -> "Component": 

327 """Create a component from a text reader. 

328 

329 .. note:: 

330 This method creates a component with default parameters. 

331 

332 Parameters 

333 ---------- 

334 text_reader : io.TextIO 

335 Text reader to read the COSMO output file from. 

336 

337 Returns 

338 ------- 

339 Component 

340 Component object. 

341 

342 Examples 

343 -------- 

344 >>> from importlib.resources import files 

345 >>> from cosmolayer.cosmosac import Component 

346 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo" 

347 >>> with open(path, encoding="utf-8") as file: 

348 ... component = Component.from_text_reader(file) 

349 >>> component.area, component.volume 

350 (97.34554..., 80.07160...) 

351 

352 """ 

353 return cls(text_reader.read()) 

354 

355 @classmethod 

356 def from_file(cls, file_path: os.PathLike[str] | Traversable) -> "Component": 

357 """Create a component from a COSMO output file. 

358 

359 .. note:: 

360 This method creates a component with default parameters. 

361 

362 Parameters 

363 ---------- 

364 file_path : path-like or Traversable 

365 Path to the COSMO output file. 

366 

367 Returns 

368 ------- 

369 Component 

370 Component object. 

371 

372 Examples 

373 -------- 

374 >>> from importlib.resources import files 

375 >>> from cosmolayer.cosmosac import Component 

376 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo" 

377 >>> component = Component.from_file(path) 

378 >>> component.area, component.volume 

379 (97.34554..., 80.07160...) 

380 

381 """ 

382 if isinstance(file_path, os.PathLike): 

383 with open(file_path, encoding="utf-8") as file: 

384 return cls.from_text_reader(file) 

385 with file_path.open("r", encoding="utf-8") as file: 

386 return cls.from_text_reader(file) 

387 

388 @property 

389 def area(self) -> float: 

390 """Cavity surface area of the molecule in Ų. 

391 

392 Sum of the areas of all segments from the COSMO calculation. 

393 """ 

394 return self._area 

395 

396 @property 

397 def volume(self) -> float: 

398 """Cavity volume of the molecule in ų.""" 

399 return self._volume 

400 

401 @property 

402 def cosmo_format(self) -> str: 

403 """COSMO file format that was parsed. 

404 

405 Either "TURBOMOLE" or "DMol-3". 

406 

407 Examples 

408 -------- 

409 >>> from importlib.resources import files 

410 >>> from cosmolayer.cosmosac import Component 

411 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo" 

412 >>> component = Component.from_file(path) 

413 >>> component.cosmo_format 

414 'TURBOMOLE' 

415 >>> path = files("cosmolayer.data") / "NCCO.cosmo" 

416 >>> component = Component.from_file(path) 

417 >>> component.cosmo_format 

418 'DMol-3' 

419 """ 

420 return self._format 

421 

422 @property 

423 def atom_data(self) -> pd.DataFrame: 

424 """Atom data from the parsed COSMO file. 

425 

426 DataFrame columns: ``id`` (atom identifier), ``x``, ``y``, ``z`` (Cartesian 

427 coordinates in Å), ``element`` (chemical symbol). 

428 

429 Returns 

430 ------- 

431 pd.DataFrame 

432 One row per atom. 

433 

434 Examples 

435 -------- 

436 >>> from importlib.resources import files 

437 >>> from cosmolayer.cosmosac import Component 

438 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo" 

439 >>> component = Component(path.read_text()) 

440 >>> component.atom_data 

441 id x y z element 

442 0 C1 -1.4... -0.2... 0.0... C 

443 1 C2 -0.0... 0.0... 0.0... C 

444 2 N1 0.9... -0.9... -0.0... N 

445 ... 

446 8 H5 1.1... 1.3... -0.4... H 

447 

448 """ 

449 return self._atom_data 

450 

451 @property 

452 def segment_data(self) -> pd.DataFrame: 

453 """Segment (surface tile) data from the COSMO calculation. 

454 

455 DataFrame columns: ``atom`` (parent atom index), ``x``, ``y``, ``z`` 

456 (segment centroid coordinates in Å), ``charge`` (e), ``area`` (Ų), 

457 ``sigma`` (screening charge density in e/Ų), ``sigma_avg`` (smoothed 

458 density in e/Ų). 

459 

460 Returns 

461 ------- 

462 pd.DataFrame 

463 One row per segment. 

464 

465 Examples 

466 -------- 

467 >>> from importlib.resources import files 

468 >>> from cosmolayer.cosmosac import Component 

469 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo" 

470 >>> component = Component(path.read_text()) 

471 >>> component.segment_data 

472 atom x y ... area sigma sigma_avg 

473 0 0 -0.867... -1.196... ... 0.206... 0.010... 0.007... 

474 1 0 -1.504... -1.502... ... 0.218... 0.007... 0.005... 

475 ... 

476 470 8 2.133... 1.152... ... 0.145... -0.012... -0.009... 

477 <BLANKLINE> 

478 [471 rows x 8 columns] 

479 

480 """ 

481 return self._segment_data 

482 

483 @property 

484 def bonds(self) -> list[tuple[int, int]]: 

485 """Bonds between atoms, inferred from interatomic distances. 

486 

487 Returns 

488 ------- 

489 list[tuple[int, int]] 

490 Pairs of atom indices (i, j) for each bond. 

491 

492 Examples 

493 -------- 

494 >>> from importlib.resources import files 

495 >>> from cosmolayer.cosmosac import Component 

496 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo" 

497 >>> component = Component(path.read_text()) 

498 >>> component.bonds 

499 [(0, 1), (0, 4), (0, 5), ... (2, 7), (3, 8)] 

500 """ 

501 return self._bonds 

502 

503 @property 

504 def sigma_grid(self) -> NDArray[np.float64]: 

505 """Get the screening charge density grid in e/Ų. 

506 

507 Returns 

508 ------- 

509 np.ndarray 

510 Charge density vector in e/Ų. 

511 

512 Examples 

513 -------- 

514 >>> from importlib.resources import files 

515 >>> from cosmolayer.cosmosac import Component 

516 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo" 

517 >>> component = Component(path.read_text()) 

518 >>> component.sigma_grid 

519 array([-0.025, -0.024, -0.023, ... 0.023, 0.024, 0.025]) 

520 """ 

521 return self._grid 

522 

523 @property 

524 def merge_profiles(self) -> bool: 

525 """Whether segment groups (NHB, OH, OT) are merged for :attr:`sigma_profile` 

526 and :attr:`probabilities`. 

527 

528 Returns 

529 ------- 

530 bool 

531 """ 

532 return self._merge_profiles 

533 

534 @property 

535 def sigma_profile(self) -> NDArray[np.float64]: 

536 """Surface area distribution over screening charge density (sigma), in Ų. 

537 

538 Shape and layout depend on :attr:`merge_profiles`. If True, returns a single 

539 merged profile (sum over NHB, OH, OT), shape ``(num_points,)``. If False, 

540 returns stacked segment profiles in SEGMENT_GROUPS order (NHB, OH, OT), 

541 shape ``(3, num_points)``; ``sigma_profile[0]`` is NHB, ``[1]`` is OH, 

542 ``[2]`` is OT. 

543 

544 Returns 

545 ------- 

546 np.ndarray 

547 Sigma profile(s). Units: Ų. 

548 """ 

549 if self._merge_profiles: 

550 total_profile: NDArray[np.float64] = np.sum( 

551 list(self._sigma_profiles.values()), axis=0 

552 ) 

553 return total_profile 

554 return np.stack([self._sigma_profiles[seg] for seg in SEGMENT_GROUPS], axis=0) 

555 

556 @property 

557 def probabilities(self) -> NDArray[np.float64]: 

558 """Normalized segment-type probability distribution (sigma profile / area). 

559 

560 Segment types are defined by hydrogen bonding class (NHB, OH, OT) and 

561 averaged charge density. Shape is ``(num_points,)`` if :attr:`merge_profiles` 

562 is True, otherwise ``(3*num_points,)``. 

563 

564 Returns 

565 ------- 

566 np.ndarray 

567 Probabilities summing to 1.0. 

568 

569 Examples 

570 -------- 

571 >>> import numpy as np 

572 >>> from importlib.resources import files 

573 >>> from cosmolayer.cosmosac import Component 

574 >>> cosmo_string = (files("cosmolayer.data") / "C=C(N)O.cosmo").read_text() 

575 >>> component = Component(cosmo_string, merge_profiles=True) 

576 >>> probabilities = component.probabilities 

577 >>> probabilities.shape 

578 (51,) 

579 >>> bool(np.all(probabilities <= 1)) 

580 True 

581 >>> bool(np.isclose(probabilities.sum(), 1.0)) 

582 True 

583 >>> component = Component(cosmo_string, merge_profiles=False) 

584 >>> probabilities_full = component.probabilities 

585 >>> probabilities_full.shape 

586 (153,) 

587 >>> bool(np.isclose(probabilities_full.sum(), 1.0)) 

588 True 

589 """ 

590 profiles = [self._sigma_profiles[segtype] for segtype in SEGMENT_GROUPS] 

591 probabilities: NDArray[np.float64] = ( 

592 np.sum(profiles, axis=0) 

593 if self._merge_profiles 

594 else np.concatenate(profiles) 

595 ) / self._area 

596 return probabilities