Coverage for ufedmm/analysis.py: 10%

162 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-02 04:20 +0000

1""" 

2.. module:: analysis 

3 :platform: Unix, Windows 

4 :synopsis: Unified Free Energy Dynamics with OpenMM 

5 

6.. moduleauthor:: Charlles Abreu <abreu@eq.ufrj.br> 

7 

8""" 

9 

10import itertools 

11from collections import namedtuple 

12 

13import numpy as np 

14import openmm 

15from scipy import stats 

16 

17from ufedmm.ufedmm import _get_energy_function, _get_parameters, _standardized 

18 

19 

20class _RBFContext(openmm.Context): 

21 def __init__(self, variables, variances, centers, weights, platform, properties): 

22 num_particles = (len(variables) + 2) // 3 

23 coordinates = [f"{x}{i+1}" for i in range(num_particles) for x in "xyz"] 

24 exponents = [] 

25 for v, variance, x in zip(variables, variances, coordinates): 

26 if v.periodic: # von Mises 

27 factor = 2 * np.pi / v._range 

28 exponents.append( 

29 f"{1.0/(variance*factor**2)}*(cos({factor}*({v.id}-{x}))-1)" 

30 ) 

31 else: # Gauss 

32 exponents.append(f"(-{0.5/variance})*({v.id}-{x})^2") 

33 expression = f'weight*exp({"+".join(exponents)})' 

34 

35 force = openmm.CustomCompoundBondForce(num_particles, expression) 

36 force.addPerBondParameter("weight") 

37 for v in variables: 

38 force.addGlobalParameter(v.id, 0) 

39 force.addEnergyParameterDerivative(v.id) 

40 

41 system = openmm.System() 

42 positions = [] 

43 for i, (center, weight) in enumerate(zip(centers, weights)): 

44 for position in np.resize(center, (num_particles, 3)): 

45 system.addParticle(1.0) 

46 positions.append(openmm.Vec3(*position)) 

47 force.addBond(range(i * num_particles, (i + 1) * num_particles), [weight]) 

48 

49 system.addForce(force) 

50 integrator = openmm.CustomIntegrator(0) 

51 super().__init__(system, integrator, platform, properties) 

52 self.parameters = [v.id for v in variables] 

53 self.setPositions(positions) 

54 

55 

56class FreeEnergyAnalyzer(object): 

57 """Calculate free energy landscapes from UFED simulation results. 

58 

59 Parameters 

60 ---------- 

61 ufed : :class:`~ufedmm.ufedmm.UnifiedFreeEnergyDynamics` 

62 The UFED object. 

63 dataframe : pandas.DataFrame 

64 A data frame containing sampled sets of collective variables and driver 

65 parameters. 

66 """ 

67 

68 def __init__(self, ufed, dataframe): 

69 self._ufed = ufed 

70 self._dataframe = dataframe 

71 self._bias_variables = filter( 

72 lambda v: v.sigma is not None, self._ufed.variables 

73 ) 

74 

75 def metadynamics_bias_free_energy(self): 

76 """Returns a Python function which, in turn, receives the values of extended- 

77 space variables and returns the energy estimated from a Metadynamics bias 

78 potential reconstructed from the simulation data. 

79 

80 Returns 

81 ------- 

82 function 

83 The free energy function. 

84 """ 

85 Variable = namedtuple("Variable", "sigma factor periodic centers") 

86 variables = [ 

87 Variable( 

88 v.sigma, 2 * np.pi / v._range, v.periodic, self._dataframe[v.id].values 

89 ) 

90 for v in self._bias_variables 

91 ] 

92 try: 

93 heights = self._dataframe["Height (kJ/mole)"].values 

94 except KeyError: 

95 heights = self._ufed.height 

96 

97 def free_energy(*position): 

98 exponents = 0.0 

99 for v, x in zip(variables, position): 

100 if v.periodic: 

101 exponents += (np.cos(v.factor * (v.centers - x)) - 1.0) / ( 

102 v.factor * v.sigma 

103 ) ** 2 

104 else: 

105 exponents += -0.5 * ((v.centers - x) / v.sigma) ** 2 

106 return -np.sum(heights * np.exp(exponents)) 

107 

108 return np.vectorize(free_energy) 

109 

110 def centers_and_mean_forces(self, bins, min_count=1, adjust_centers=False): 

111 """Performs binned statistics with the UFED simulation data. 

112 

113 Parameters 

114 ---------- 

115 bins : list(int) or int 

116 The number of bins in each direction. If a single integer is passed, 

117 then the same number of bins will be considered for all directions. 

118 

119 Keyword Args 

120 ------------ 

121 min_count : int, default=1 

122 The miminum number of hits for any bin to be considered in the analysis. 

123 adjust_centers : bool, default=False 

124 Whether to consider the center of a bin as the mean value of the its 

125 sampled internal points instead of its geometric center. 

126 

127 Returns 

128 ------- 

129 centers : list(numpy.array) 

130 A list of Numpy arrays, each one containing the values of an 

131 extended-space variable at the centers of all bins that satisfy the 

132 minimum-count criterion. 

133 mean_forces : list(numpy.array) 

134 A list of Numpy arrays, each one containing the mean forces in the 

135 direction of an extended-space variable. 

136 """ 

137 variables = self._ufed.variables 

138 sample = [self._dataframe[v.id] for v in variables] 

139 forces = self._compute_forces() 

140 ranges = [(v.min_value, v.max_value) for v in variables] 

141 

142 counts = stats.binned_statistic_dd( 

143 sample, [], statistic="count", bins=bins, range=ranges 

144 ) 

145 index = np.where(counts.statistic.flatten() >= min_count) 

146 

147 n = len(variables) 

148 if adjust_centers: 

149 means = stats.binned_statistic_dd( 

150 sample, sample + forces, bins=bins, range=ranges 

151 ) 

152 centers = [means.statistic[i].flatten()[index] for i in range(n)] 

153 mean_forces = [means.statistic[n + i].flatten()[index] for i in range(n)] 

154 else: 

155 means = stats.binned_statistic_dd(sample, forces, bins=bins, range=ranges) 

156 bin_centers = [0.5 * (edges[1:] + edges[:-1]) for edges in counts.bin_edges] 

157 center_points = np.stack( 

158 [np.array(point) for point in itertools.product(*bin_centers)] 

159 ) 

160 centers = [center_points[:, i][index] for i in range(n)] 

161 mean_forces = [statistic.flatten()[index] for statistic in means.statistic] 

162 

163 return centers, mean_forces 

164 

165 def mean_force_free_energy( 

166 self, centers, mean_forces, sigma, platform_name="Reference", properties={} 

167 ): 

168 """Returns Python functions for evaluating the potential of mean force and their 

169 originating mean forces as a function of the collective variables. 

170 

171 Parameters 

172 ---------- 

173 centers : list(numpy.array) 

174 The bin centers. 

175 mean_forces : list(numpy.array) 

176 The mean forces. 

177 sigmas : float or unit.Quantity or list 

178 The standard deviation of kernels. 

179 

180 Keyword Args 

181 ------------ 

182 platform_name : string, default='Reference' 

183 The name of the OpenMM Platform to be used for potential and mean-force 

184 evaluations. 

185 properties : dict, default={} 

186 A set of values for platform-specific properties. Keys are the property 

187 names. 

188 

189 Returns 

190 ------- 

191 potential : function 

192 A Python function whose arguments are collective variable values and 

193 whose result is the potential of mean force at that values. 

194 mean_force : function 

195 A Python function whose arguments are collective variable values and 

196 whose result is the mean force at those values. 

197 """ 

198 variables = self._ufed.variables 

199 n = len(variables) 

200 try: 

201 variances = [_standardized(value) ** 2 for value in sigma] 

202 except TypeError: 

203 variances = [_standardized(sigma) ** 2] * n 

204 

205 exponent = [] 

206 derivative = [] 

207 for v, variance in zip(variables, variances): 

208 if v.periodic: # von Mises 

209 factor = 2 * np.pi / v._range 

210 exponent.append( 

211 lambda x: (np.cos(factor * x) - 1.0) / (factor * factor * variance) 

212 ) 

213 derivative.append(lambda x: -np.sin(factor * x) / (factor * variance)) 

214 else: # Gauss 

215 exponent.append(lambda x: -0.5 * x**2 / variance) 

216 derivative.append(lambda x: -x / variance) 

217 

218 def kernel(x): 

219 return np.exp(np.sum(exponent[i](x[i]) for i in range(n))) 

220 

221 def gradient(x, i): 

222 return kernel(x) * derivative[i](x[i]) 

223 

224 grid_points = [np.array(xc) for xc in zip(*centers)] 

225 coefficients = [] 

226 for i in range(n): 

227 for x in grid_points: 

228 coefficients.append( 

229 np.array([gradient(x - xc, i) for xc in grid_points]) 

230 ) 

231 

232 M = np.vstack(coefficients) 

233 F = -np.hstack(mean_forces) 

234 A, _, _, _ = np.linalg.lstsq(M, F, rcond=None) 

235 

236 platform = openmm.Platform.getPlatformByName(platform_name) 

237 context = _RBFContext( 

238 variables, variances, grid_points, A, platform, properties 

239 ) 

240 minimum = 0.0 

241 

242 def potential(*x): 

243 for parameter, value in zip(context.parameters, x): 

244 context.setParameter(parameter, value) 

245 state = context.getState(getEnergy=True) 

246 return state.getPotentialEnergy()._value - minimum 

247 

248 def mean_force(*x): 

249 for parameter, value in zip(context.parameters, x): 

250 context.setParameter(parameter, value) 

251 state = context.getState(getParameterDerivatives=True) 

252 return -state.getEnergyParameterDerivatives()._value 

253 

254 minimum = np.min([potential(*x) for x in grid_points]) 

255 return np.vectorize(potential), np.vectorize(mean_force) 

256 

257 def _compute_forces(self): 

258 variables = self._ufed.variables 

259 collective_variables = [colvar.id for v in variables for colvar in v.colvars] 

260 extended_variables = [v.id for v in variables] 

261 all_variables = collective_variables + extended_variables 

262 

263 force = openmm.CustomCVForce(_get_energy_function(variables)) 

264 for key, value in _get_parameters(variables).items(): 

265 force.addGlobalParameter(key, value) 

266 for variable in all_variables: 

267 force.addGlobalParameter(variable, 0) 

268 for xv in extended_variables: 

269 force.addEnergyParameterDerivative(xv) 

270 

271 system = openmm.System() 

272 system.addForce(force) 

273 system.addParticle(0) 

274 platform = openmm.Platform.getPlatformByName("Reference") 

275 context = openmm.Context(system, openmm.CustomIntegrator(0), platform) 

276 context.setPositions([openmm.Vec3(0, 0, 0)]) 

277 

278 n = len(self._dataframe.index) 

279 forces = [np.empty(n) for xv in extended_variables] 

280 for j, row in self._dataframe.iterrows(): 

281 for variable in all_variables: 

282 context.setParameter(variable, row[variable]) 

283 state = context.getState(getParameterDerivatives=True) 

284 derivatives = state.getEnergyParameterDerivatives() 

285 for i, xv in enumerate(extended_variables): 

286 forces[i][j] = -derivatives[xv] 

287 return forces 

288 

289 

290class Analyzer(FreeEnergyAnalyzer): 

291 """UFED Analyzer. 

292 

293 .. warning:: 

294 This class is obsolete and will be discontinued. Use :class:`FreeEnergyAnalyzer` 

295 instead. 

296 

297 Parameters 

298 ---------- 

299 ufed : :class:`~ufedmm.ufedmm.UnifiedFreeEnergyDynamics` 

300 The UFED object. 

301 dataframe : pandas.DataFrame 

302 A data frame containing sampled sets of collective variables and driver 

303 parameters. 

304 bins : int or list(int) 

305 The number of bins in each direction. 

306 

307 Keyword Args 

308 ------------ 

309 min_count : int, default=1 

310 The miminum number of hits for a given bin to be considered in the analysis. 

311 adjust_centers : bool, default=False 

312 Whether to consider the center of a bin as the mean value of the its sampled 

313 internal points istead of its geometric center. 

314 """ 

315 

316 def __init__(self, ufed, dataframe, bins, min_count=1, adjust_centers=False): 

317 super().__init__(ufed, dataframe) 

318 try: 

319 self._bins = [bin for bin in bins] 

320 except TypeError: 

321 self._bins = [bins] * len(ufed.variables) 

322 self._min_count = min_count 

323 self._adjust_centers = adjust_centers 

324 

325 def free_energy_functions(self, sigma=None, factor=8): 

326 """Returns Python functions for evaluating the potential of mean force and their 

327 originating mean forces as a function of the collective variables. 

328 

329 Keyword Args 

330 ------------ 

331 sigma : float or unit.Quantity, default=None 

332 The standard deviation of kernels. If this is `None`, then values will 

333 be determined from the distances between nodes. 

334 factor : float, default=8 

335 If ``sigma`` is not explicitly provided, then it will be computed as 

336 ``sigma = factor*range/bins`` for each direction. 

337 

338 Returns 

339 ------- 

340 potential : function 

341 A Python function whose arguments are collective variable values and 

342 whose result is the potential of mean force at that values. 

343 mean_force : function 

344 A Python function whose arguments are collective variable values and 

345 whose result is the mean force at that values regarding a given 

346 direction. Such direction must be defined through a keyword argument 

347 `dir`, whose default value is `0` (meaning the direction of the first 

348 collective variable). 

349 """ 

350 self.centers, self.mean_forces = self.centers_and_mean_forces( 

351 self._bins, 

352 self._min_count, 

353 self._adjust_centers, 

354 ) 

355 variables = self._ufed.variables 

356 if sigma is None: 

357 sigmas = [factor * v._range / bin for v, bin in zip(variables, self._bins)] 

358 else: 

359 try: 

360 sigmas = [_standardized(value) for value in sigma] 

361 except TypeError: 

362 sigmas = [_standardized(sigma)] * len(variables) 

363 

364 return self.mean_force_free_energy(self.centers, self.mean_forces, sigmas)