Coverage for cosmolayer/cosmosac/component.py: 93%

1import os

2from typing import Any, TextIO

4try:

5 from importlib.resources.abc import Traversable

6except (ImportError, AttributeError):

7 Traversable = Any # fallback when Traversable not available (e.g. Python < 3.9)

9import numpy as np

10import pandas as pd

11import periodictable as pt

12from numpy.typing import NDArray

14from ..parser import parse_cosmo_file

15from .constants import (

16 COSMO_SAC_2010_AVERAGING_RADIUS,

17 COSMO_SAC_2010_F_DECAY,

18 COSMO_SAC_2010_SIGMA_0,

19)

20from .segment_groups import NHB, OH, OT, SEGMENT_GROUPS

22COVALENT_FACTOR = 1.3 # Same as in RDKit

25class Component:

26 r"""Molecular component for the COSMO-SAC activity coefficient model.

28 Parameters

29 ----------

30 cosmo_string : str

31 Contents of a COSMO output file from quantum mechanical calculations.

33 Keyword Arguments

34 -----------------

35 min_sigma : float, optional

36 Minimum screening charge density in e/Å². Default is -0.025 e/Å².

37 max_sigma : float, optional

38 Maximum screening charge density in e/Å². Default is 0.025 e/Å².

39 num_points : int, optional

40 Number of discrete points in the sigma profile. Default is 51.

41 averaging_radius : float, optional

42 Effective radius for distance-weighted sigma averaging in Å.

43 Default is √(7.25 / π) Å :cite:`Bell2020`.

44 f_decay : float, optional

45 Decay factor for exponential distance weighting in the sigma averaging

46 procedure. Default is 3.57 :cite:`Bell2020`.

47 sigma_0 : float or None, optional

48 Standard deviation of the Gaussian probability of a segment to form a hydrogen

49 bond in e/Å². Set to ``None`` to disable hydrogen-bond splitting (all

50 surface area is assigned to the NHB class).

51 Default is 0.007 e/Å² :cite:`Bell2020`.

52 merge_profiles : bool, optional

53 Whether to merge segment groups (NHB, OH, OT) into a single profile

54 when accessing :attr:`probabilities` and :attr:`sigma_profile`.

55 Default is False.

57 Raises

58 ------

59 ValueError

60 If the COSMO string is not in any supported format.

61 ValueError

62 If averaged charge densities fall outside the specified sigma range.

64 Examples

65 --------

66 >>> import numpy as np

67 >>> from importlib.resources import files

68 >>> from cosmolayer.cosmosac import Component

69 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"

70 >>> component = Component(path.read_text())

71 >>> component.area

72 97.34554...

73 >>> component.volume

74 80.07160...

76 When :attr:`merge_profiles` is True, :attr:`sigma_profile` is a single

77 merged profile:

79 >>> component = Component(path.read_text(), merge_profiles=True)

80 >>> sigma_profile = component.sigma_profile

81 >>> sigma_profile.shape

82 (51,)

83 >>> print(sum(sigma_profile))

84 97.34554...

86 When :attr:`merge_profiles` is False, :attr:`sigma_profile` is stacked

87 (NHB, OH, OT), shape (3, num_points):

89 >>> component = Component(path.read_text(), merge_profiles=False)

90 >>> stacked = component.sigma_profile

91 >>> stacked.shape

92 (3, 51)

93 >>> from cosmolayer.cosmosac.segment_groups import SEGMENT_GROUPS

94 >>> for i, s in enumerate(SEGMENT_GROUPS):

95 ... print(s, sum(stacked[i]))

96 NHB 72.31802...

97 OH 12.25732...

98 OT 12.77019...

100 Plotting the sigma profiles (stacked, :attr:`merge_profiles` is False):

101

102

103 .. plot::

104 :context: close-figs

105

106 >>> from importlib.resources import files

107 >>> from cosmolayer.cosmosac import Component

108 >>> from cosmolayer.cosmosac.segment_groups import SEGMENT_GROUPS

109 >>> from matplotlib import pyplot as plt

110 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"

111 >>> component = Component(path.read_text(), merge_profiles=False)

112 >>> fig, ax = plt.subplots(figsize=(8, 4))

113 >>> grid = component.sigma_grid

114 >>> for i, label in enumerate(SEGMENT_GROUPS):

115 ... _ = ax.plot(grid, component.sigma_profile[i], label=label)

116 >>> _ = ax.set_xlabel("Charge density (e/Å²)")

117 >>> _ = ax.set_ylabel("Surface area contribution (Å²)")

118 >>> _ = ax.legend()

119 >>> fig.tight_layout()

120

121 Plotting the segment-type probabilities:

122

123 .. plot::

124 :context: close-figs

125

126 >>> from importlib.resources import files

127 >>> from cosmolayer.cosmosac import Component

128 >>> from matplotlib import pyplot as plt

129 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"

130 >>> component = Component(path.read_text())

131 >>> fig, ax = plt.subplots(figsize=(8, 4))

132 >>> p = component.probabilities

133 >>> _ = ax.bar(range(len(p)), p)

134 >>> _ = ax.set_xlabel("Segment type index")

135 >>> _ = ax.set_ylabel("Probability")

136 >>> fig.tight_layout()

137 """

138

139 def __init__( # noqa: PLR0913

140 self,

141 cosmo_string: str,

142 *,

143 min_sigma: float = -0.025, # e/Å²

144 max_sigma: float = 0.025, # e/Å²

145 num_points: int = 51,

146 averaging_radius: float = COSMO_SAC_2010_AVERAGING_RADIUS, # Å

147 f_decay: float = COSMO_SAC_2010_F_DECAY,

148 sigma_0: float | None = COSMO_SAC_2010_SIGMA_0, # e/Å²

149 merge_profiles: bool = False,

150 ):

151 self._min_sigma = min_sigma

152 self._grid = np.linspace(min_sigma, max_sigma, num_points)

153 self._bin_width = (max_sigma - min_sigma) / (num_points - 1)

154

155 self._averaging_radius = averaging_radius

156 self._f_decay = f_decay

157 self._sigma_0 = sigma_0

158 self._merge_profiles = merge_profiles

159

160 self._format, self._atom_data, self._segment_data, self._volume = (

161 parse_cosmo_file(cosmo_string)

162 )

163

164 sigmas, averaged_sigmas = self._average_sigmas()

165 if (averaged_sigmas < min_sigma).any() or (averaged_sigmas > max_sigma).any():

166 raise ValueError("Averaged charge densities out of range.")

167 self._segment_data["sigma"] = sigmas

168 self._segment_data["sigma_avg"] = averaged_sigmas

169

170 self._bonds = self._detect_bonds()

171 self._area = float(self._segment_data["area"].sum())

172 self._sigma_profiles = self._compute_sigma_profiles(averaged_sigmas)

173

174 def __repr__(self) -> str:

175 num_atoms = len(self._atom_data)

176 num_segments = len(self._segment_data)

177 return f"Component({num_atoms} atoms, {num_segments} segments)"

178

179 @staticmethod

180 def _get_covalent_radius(element: str) -> float:

181 """Get scaled covalent radius for bond detection.

182

183 Parameters

184 ----------

185 element : str

186 Chemical element symbol.

187

188 Returns

189 -------

190 float

191 Covalent radius in Å, scaled by factor 1.3.

192 """

193 radius: float = float(pt.elements.symbol(element).covalent_radius)

194 return COVALENT_FACTOR * radius

195

196 def _detect_bonds(self) -> list[tuple[int, int]]:

197 """Determines bonds from interatomic distances."""

198 df = self._atom_data

199 coords = df[["x", "y", "z"]].values

200 distances = np.sqrt(np.square(coords[:, None, :] - coords).sum(axis=-1))

201 radii = df["element"].apply(self._get_covalent_radius).values

202 adjacency_matrix = distances < (radii[:, None] + radii[None, :])

203 bond_indices = np.nonzero(np.triu(adjacency_matrix, k=1))

204 return [(int(i), int(j)) for i, j in zip(*bond_indices, strict=True)]

205

206 def _get_hydrogen_bonding_classes(self) -> pd.Series:

207 """Classify atoms into hydrogen bonding types (OH, OT, NHB).

208

209 Assigns hydrogen bonding classes: OH (O-H bonds), OT (N-H, F-H bonds or

210 isolated N/F/O), and NHB (all other atoms).

211

212 Returns

213 -------

214 pd.Series

215 Hydrogen bonding class label for each atom.

216 """

217 elements = self._atom_data["element"]

218 hb_class = elements.apply(

219 lambda element: OT if element in ["N", "F", "O"] else NHB

220 )

221 for i, j in self._bonds:

222 elements_ij = set(elements.iloc[[i, j]])

223 if elements_ij in [{"O", "H"}, {"N", "H"}, {"F", "H"}]:

224 hb_class.at[i] = hb_class.at[j] = OH if "O" in elements_ij else OT

225 return hb_class

226

227 def _average_sigmas(self) -> tuple[NDArray[np.float64], NDArray[np.float64]]:

228 """Apply distance-weighted averaging to segment charge densities.

229

230 Smooths raw screening charge densities (σ = q/A) using exponentially

231 decaying weights based on distances between segment centroids.

232

233 Returns

234 -------

235 np.ndarray

236 Averaged screening charge density for each segment in e/Å.

237 """

238 sigmas = self._segment_data["charge"].values / self._segment_data["area"].values

239 coords = self._segment_data[["x", "y", "z"]].values

240 squared_distances = np.square(coords[:, None, :] - coords).sum(axis=-1)

241 squared_radii = self._segment_data["area"].values / np.pi

242

243 sums = squared_radii + self._averaging_radius**2

244 prods = squared_radii * self._averaging_radius**2

245 weights = np.exp(-self._f_decay * squared_distances / sums) * prods / sums

246 averaged_sigmas: NDArray[np.float64] = np.sum(

247 weights * sigmas, axis=1

248 ) / np.sum(weights, axis=1)

249

250 return sigmas, averaged_sigmas

251

252 def _compute_sigma_profile(

253 self, averaged_sigmas: NDArray[np.float64], areas: NDArray[np.float64]

254 ) -> NDArray[np.float64]:

255 """Bin segment areas by charge density using linear interpolation.

256

257 Parameters

258 ----------

259 averaged_sigmas : np.ndarray

260 Averaged screening charge densities in e/Å².

261 areas : np.ndarray

262 Surface areas in Å².

263

264 Returns

265 -------

266 np.ndarray

267 Sigma profile histogram. Shape: (num_points,).

268 """

269 profile = np.zeros_like(self._grid)

270 max_index = len(self._grid) - 2 # index + 1 must be valid

271 for sigma, area in zip(averaged_sigmas, areas, strict=True):

272 index = int((sigma - self._min_sigma) / self._bin_width)

273 index = min(max(0, index), max_index)

274 weight = (self._grid[index + 1] - sigma) / self._bin_width

275 profile[index] += area * weight

276 profile[index + 1] += area * (1.0 - weight)

277 return profile

278

279 def _compute_sigma_profiles(

280 self, averaged_sigmas: NDArray[np.float64]

281 ) -> dict[str, NDArray[np.float64]]:

282 """Compute sigma profiles separated by hydrogen bonding type.

283

284 Classifies segments by H-bonding type (OH, OT, NHB) based on parent atom

285 and sigma sign, then applies a Gaussian probability weighting function.

286

287 Parameters

288 ----------

289 averaged_sigmas : np.ndarray

290 Averaged screening charge densities for all segments in e/Å².

291

292 Returns

293 -------

294 dict

295 Dictionary with keys "NHB", "OH", "OT" and values as sigma profile

296 arrays. Each profile has shape (num_points,).

297 """

298 atom_indices = self._segment_data["atom"]

299 element = atom_indices.map(self._atom_data["element"])

300 is_hb_candidate = (element == "H") == (averaged_sigmas < 0.0)

301 hb_class = atom_indices.map(self._get_hydrogen_bonding_classes())

302 mask_oh = is_hb_candidate & (hb_class == OH)

303 mask_ot = is_hb_candidate & (hb_class == OT)

304 mask_nhb = np.logical_not(mask_oh | mask_ot)

305 areas = self._segment_data["area"].values

306 profile_oh = self._compute_sigma_profile(

307 averaged_sigmas[mask_oh], areas[mask_oh]

308 )

309 profile_ot = self._compute_sigma_profile(

310 averaged_sigmas[mask_ot], areas[mask_ot]

311 )

312 profile_nhb = self._compute_sigma_profile(

313 averaged_sigmas[mask_nhb], areas[mask_nhb]

314 )

315 if self._sigma_0 is None:

316 hb_probability = np.zeros_like(self._grid)

317 else:

318 hb_probability = 1.0 - np.exp(-0.5 * (self._grid / self._sigma_0) ** 2)

319 return {

320 NHB: profile_nhb + (profile_oh + profile_ot) * (1.0 - hb_probability),

321 OH: profile_oh * hb_probability,

322 OT: profile_ot * hb_probability,

323 }

324

325 @classmethod

326 def from_text_reader(cls, text_reader: TextIO) -> "Component":

327 """Create a component from a text reader.

328

329 .. note::

330 This method creates a component with default parameters.

331

332 Parameters

333 ----------

334 text_reader : io.TextIO

335 Text reader to read the COSMO output file from.

336

337 Returns

338 -------

339 Component

340 Component object.

341

342 Examples

343 --------

344 >>> from importlib.resources import files

345 >>> from cosmolayer.cosmosac import Component

346 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"

347 >>> with open(path, encoding="utf-8") as file:

348 ... component = Component.from_text_reader(file)

349 >>> component.area, component.volume

350 (97.34554..., 80.07160...)

351

352 """

353 return cls(text_reader.read())

354

355 @classmethod

356 def from_file(cls, file_path: os.PathLike[str] | Traversable) -> "Component":

357 """Create a component from a COSMO output file.

358

359 .. note::

360 This method creates a component with default parameters.

361

362 Parameters

363 ----------

364 file_path : path-like or Traversable

365 Path to the COSMO output file.

366

367 Returns

368 -------

369 Component

370 Component object.

371

372 Examples

373 --------

374 >>> from importlib.resources import files

375 >>> from cosmolayer.cosmosac import Component

376 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"

377 >>> component = Component.from_file(path)

378 >>> component.area, component.volume

379 (97.34554..., 80.07160...)

380

381 """

382 if isinstance(file_path, os.PathLike):

383 with open(file_path, encoding="utf-8") as file:

384 return cls.from_text_reader(file)

385 with file_path.open("r", encoding="utf-8") as file:

386 return cls.from_text_reader(file)

387

388 @property

389 def area(self) -> float:

390 """Cavity surface area of the molecule in Å².

391

392 Sum of the areas of all segments from the COSMO calculation.

393 """

394 return self._area

395

396 @property

397 def volume(self) -> float:

398 """Cavity volume of the molecule in Å³."""

399 return self._volume

400

401 @property

402 def cosmo_format(self) -> str:

403 """COSMO file format that was parsed.

404

405 Either "TURBOMOLE" or "DMol-3".

406

407 Examples

408 --------

409 >>> from importlib.resources import files

410 >>> from cosmolayer.cosmosac import Component

411 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"

412 >>> component = Component.from_file(path)

413 >>> component.cosmo_format

414 'TURBOMOLE'

415 >>> path = files("cosmolayer.data") / "NCCO.cosmo"

416 >>> component = Component.from_file(path)

417 >>> component.cosmo_format

418 'DMol-3'

419 """

420 return self._format

421

422 @property

423 def atom_data(self) -> pd.DataFrame:

424 """Atom data from the parsed COSMO file.

425

426 DataFrame columns: ``id`` (atom identifier), ``x``, ``y``, ``z`` (Cartesian

427 coordinates in Å), ``element`` (chemical symbol).

428

429 Returns

430 -------

431 pd.DataFrame

432 One row per atom.

433

434 Examples

435 --------

436 >>> from importlib.resources import files

437 >>> from cosmolayer.cosmosac import Component

438 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"

439 >>> component = Component(path.read_text())

440 >>> component.atom_data

441 id x y z element

442 0 C1 -1.4... -0.2... 0.0... C

443 1 C2 -0.0... 0.0... 0.0... C

444 2 N1 0.9... -0.9... -0.0... N

445 ...

446 8 H5 1.1... 1.3... -0.4... H

447

448 """

449 return self._atom_data

450

451 @property

452 def segment_data(self) -> pd.DataFrame:

453 """Segment (surface tile) data from the COSMO calculation.

454

455 DataFrame columns: ``atom`` (parent atom index), ``x``, ``y``, ``z``

456 (segment centroid coordinates in Å), ``charge`` (e), ``area`` (Å²),

457 ``sigma`` (screening charge density in e/Å²), ``sigma_avg`` (smoothed

458 density in e/Å²).

459

460 Returns

461 -------

462 pd.DataFrame

463 One row per segment.

464

465 Examples

466 --------

467 >>> from importlib.resources import files

468 >>> from cosmolayer.cosmosac import Component

469 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"

470 >>> component = Component(path.read_text())

471 >>> component.segment_data

472 atom x y ... area sigma sigma_avg

473 0 0 -0.867... -1.196... ... 0.206... 0.010... 0.007...

474 1 0 -1.504... -1.502... ... 0.218... 0.007... 0.005...

475 ...

476 470 8 2.133... 1.152... ... 0.145... -0.012... -0.009...

477 <BLANKLINE>

478 [471 rows x 8 columns]

479

480 """

481 return self._segment_data

482

483 @property

484 def bonds(self) -> list[tuple[int, int]]:

485 """Bonds between atoms, inferred from interatomic distances.

486

487 Returns

488 -------

489 list[tuple[int, int]]

490 Pairs of atom indices (i, j) for each bond.

491

492 Examples

493 --------

494 >>> from importlib.resources import files

495 >>> from cosmolayer.cosmosac import Component

496 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"

497 >>> component = Component(path.read_text())

498 >>> component.bonds

499 [(0, 1), (0, 4), (0, 5), ... (2, 7), (3, 8)]

500 """

501 return self._bonds

502

503 @property

504 def sigma_grid(self) -> NDArray[np.float64]:

505 """Get the screening charge density grid in e/Å².

506

507 Returns

508 -------

509 np.ndarray

510 Charge density vector in e/Å².

511

512 Examples

513 --------

514 >>> from importlib.resources import files

515 >>> from cosmolayer.cosmosac import Component

516 >>> path = files("cosmolayer.data") / "C=C(N)O.cosmo"

517 >>> component = Component(path.read_text())

518 >>> component.sigma_grid

519 array([-0.025, -0.024, -0.023, ... 0.023, 0.024, 0.025])

520 """

521 return self._grid

522

523 @property

524 def merge_profiles(self) -> bool:

525 """Whether segment groups (NHB, OH, OT) are merged for :attr:`sigma_profile`

526 and :attr:`probabilities`.

527

528 Returns

529 -------

530 bool

531 """

532 return self._merge_profiles

533

534 @property

535 def sigma_profile(self) -> NDArray[np.float64]:

536 """Surface area distribution over screening charge density (sigma), in Å².

537

538 Shape and layout depend on :attr:`merge_profiles`. If True, returns a single

539 merged profile (sum over NHB, OH, OT), shape ``(num_points,)``. If False,

540 returns stacked segment profiles in SEGMENT_GROUPS order (NHB, OH, OT),

541 shape ``(3, num_points)``; ``sigma_profile[0]`` is NHB, ``[1]`` is OH,

542 ``[2]`` is OT.

543

544 Returns

545 -------

546 np.ndarray

547 Sigma profile(s). Units: Å².

548 """

549 if self._merge_profiles:

550 total_profile: NDArray[np.float64] = np.sum(

551 list(self._sigma_profiles.values()), axis=0

552 )

553 return total_profile

554 return np.stack([self._sigma_profiles[seg] for seg in SEGMENT_GROUPS], axis=0)

555

556 @property

557 def probabilities(self) -> NDArray[np.float64]:

558 """Normalized segment-type probability distribution (sigma profile / area).

559

560 Segment types are defined by hydrogen bonding class (NHB, OH, OT) and

561 averaged charge density. Shape is ``(num_points,)`` if :attr:`merge_profiles`

562 is True, otherwise ``(3*num_points,)``.

563

564 Returns

565 -------

566 np.ndarray

567 Probabilities summing to 1.0.

568

569 Examples

570 --------

571 >>> import numpy as np

572 >>> from importlib.resources import files

573 >>> from cosmolayer.cosmosac import Component

574 >>> cosmo_string = (files("cosmolayer.data") / "C=C(N)O.cosmo").read_text()

575 >>> component = Component(cosmo_string, merge_profiles=True)

576 >>> probabilities = component.probabilities

577 >>> probabilities.shape

578 (51,)

579 >>> bool(np.all(probabilities <= 1))

580 True

581 >>> bool(np.isclose(probabilities.sum(), 1.0))

582 True

583 >>> component = Component(cosmo_string, merge_profiles=False)

584 >>> probabilities_full = component.probabilities

585 >>> probabilities_full.shape

586 (153,)

587 >>> bool(np.isclose(probabilities_full.sum(), 1.0))

588 True

589 """

590 profiles = [self._sigma_profiles[segtype] for segtype in SEGMENT_GROUPS]

591 probabilities: NDArray[np.float64] = (

592 np.sum(profiles, axis=0)

593 if self._merge_profiles

594 else np.concatenate(profiles)

595 ) / self._area

596 return probabilities