Coverage for chempropstereo/featurizers/utils.py: 100%

13 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2025-03-22 21:04 +0000

1"""Utilities for printing featurized molecules.""" 

2 

3import numpy as np 

4 

5 

6def describe_atom_features( 

7 index: int, features: np.ndarray, sizes: tuple[int, ...] 

8) -> str: 

9 """Format an atom feature array into a string. 

10 

11 Parameters 

12 ---------- 

13 index : int 

14 The index of the atom. 

15 features : np.ndarray 

16 The feature array of the atom. 

17 sizes : tuple[int, ...] 

18 The size of each feature type. 

19 

20 Returns 

21 ------- 

22 str 

23 A formatted string representing the atom features, including the 

24 atom index, bit features, and mass feature. 

25 

26 Examples 

27 -------- 

28 >>> import numpy as np 

29 >>> features = np.array([1, 0, 1, 0, 1, 0.120]) 

30 >>> sizes = (2, 2, 1, 1) 

31 >>> describe_atom_features(1, features, sizes) 

32 ' 1: 10 10 1 0.120' 

33 

34 """ 

35 atom_desc = str(index).rjust(3) 

36 s = "".join(map(str, map(int, features[:-1]))) 

37 cuts = list(np.cumsum(sizes[:-1])) 

38 bits_desc = " ".join(s[a:b] for a, b in zip([0] + cuts, cuts)) 

39 mass_desc = f"{features[-1]:.3f}" 

40 return f"{atom_desc}: {bits_desc} {mass_desc}" 

41 

42 

43def describe_bond_features( 

44 atoms: tuple[int, int], features: np.ndarray, sizes: tuple[int, ...] 

45) -> str: 

46 """Format a bond feature array into a string. 

47 

48 Parameters 

49 ---------- 

50 atoms : tuple[int, int] 

51 The indices of the atoms connected by the bond. 

52 features : np.ndarray 

53 The feature array of the bond. 

54 sizes : tuple[int, ...] 

55 The size of each feature type. 

56 

57 Returns 

58 ------- 

59 str 

60 A formatted string representing the bond features. 

61 

62 Examples 

63 -------- 

64 >>> from rdkit import Chem 

65 >>> from chempropstereo import featurizers 

66 >>> mol = Chem.MolFromSmiles('CC') 

67 >>> atoms = (0, 1) 

68 >>> features = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) 

69 >>> sizes = (6, 2, 1, 3, 4, 2) 

70 >>> describe_bond_features(atoms, features, sizes) 

71 ' 0→1: 010000 00 0 000 0111 11' 

72 

73 """ 

74 bond_desc = "\u2192".join(map(str, atoms)).rjust(7) 

75 s = "".join(map(str, map(int, features))) 

76 cuts = list(np.cumsum(sizes)) 

77 return f"{bond_desc}: " + " ".join(s[a:b] for a, b in zip([0] + cuts, cuts))