#!/usr/bin/env python
"""
Fit partial charges for typed polarizabilities
"""
import copy
import json
from collections import defaultdict
import numpy as np
import pint
from openeye import oechem
from openeye.oechem import OEField, Types
from rdkit import Chem
from scipy.spatial.distance import cdist
from dpolfit.fitting.polarizability import label_alpha
from dpolfit.utilities.miscellaneous import *
ureg = pint.UnitRegistry()
Q_ = ureg.Quantity
[docs]
def pair_equivalent(pattern: list) -> np.ndarray:
"""
A function to pair related patterns together for use as constraints
Parameters
----------
pattern: list
A list of patterns, could be elements, SMIRNOFF patterns
Returns
-------
ndarry
Return pairs of related patterns in a nested numpy ndarry.
"""
tmp1 = defaultdict(list)
for idx1, p in enumerate(pattern):
tmp1[p].append(idx1)
tmp2 = []
for key, v in tmp1.items():
n = len(v)
if n > 1:
tmp2.append([[v[i], v[i + 1]] for i in range(n - 1)])
if len(tmp2) == 0:
ret = []
else:
ret = np.concatenate(tmp2)
return ret
[docs]
def coulomb_scaling_rdkit(
rdmol: Chem.rdchem.Mol, coulomb14scale: float = 0.5
) -> np.ndarray:
"""
Parameters
----------
rdmol: Chem.rdchem.Mol
An input rdkit molecule used for specifying connectivity
coulomb14scale: float
Returns
-------
ndarray
"""
natom = rdmol.GetNumAtoms()
# initializing arrays
bonds = []
bound12 = np.zeros((natom, natom))
bound13 = np.zeros((natom, natom))
scaling_matrix = np.ones((natom, natom))
for bond in rdmol.GetBonds():
b = bond.GetBeginAtomIdx()
e = bond.GetEndAtomIdx()
bonds.append([b, e])
# Find 1-2 scaling_matrix
for pair in bonds:
bound12[pair[0], pair[1]] = 12.0
bound12[pair[1], pair[0]] = 12.0
# Find 1-3 scaling_matrix
b13_pairs = []
for i in range(natom):
b12_idx = np.nonzero(bound12[i])[0]
for idx, j in enumerate(b12_idx):
for k in b12_idx[idx + 1 :]:
b13_pairs.append([j, k])
for pair in b13_pairs:
bound13[pair[0], pair[1]] = 13.0
bound13[pair[1], pair[0]] = 13.0
# Find 1-4 scaling_matrix
b14_pairs = []
for i in range(natom):
b12_idx = np.nonzero(bound12[i])[0]
for j in b12_idx:
b122_idx = np.nonzero(bound12[j])[0]
for k in b122_idx:
for j2 in b12_idx:
if k != i and j2 != j:
b14_pairs.append([j2, k])
# Assign coulomb14scaling factor
for pair in b14_pairs:
scaling_matrix[pair[0], pair[1]] = coulomb14scale
scaling_matrix[pair[1], pair[0]] = coulomb14scale
# Exclude 1-2, 1-3 interactions
for pair in bonds:
scaling_matrix[pair[0], pair[1]] = 0.0
scaling_matrix[pair[1], pair[0]] = 0.0
for pair in b13_pairs:
scaling_matrix[pair[0], pair[1]] = 0.0
scaling_matrix[pair[1], pair[0]] = 0.0
# Fill 1-1 with zeros
np.fill_diagonal(scaling_matrix, 0)
return scaling_matrix
def coulomb_scaling_oe(oemol: oechem.OEMol, coulomb14scale: float = 0.5) -> np.ndarray:
natom = oemol.GetMaxAtomIdx()
# initializing arrays
bonds = []
bound12 = np.zeros((natom, natom))
bound13 = np.zeros((natom, natom))
scaling_matrix = np.ones((natom, natom))
for bond in oemol.GetBonds():
b = bond.GetBgnIdx()
e = bond.GetEndIdx()
bonds.append([b, e])
# Find 1-2 scaling_matrix
for pair in bonds:
bound12[pair[0], pair[1]] = 12.0
bound12[pair[1], pair[0]] = 12.0
# Find 1-3 scaling_matrix
b13_pairs = []
for i in range(natom):
b12_idx = np.nonzero(bound12[i])[0]
for idx, j in enumerate(b12_idx):
for k in b12_idx[idx + 1 :]:
b13_pairs.append([j, k])
for pair in b13_pairs:
bound13[pair[0], pair[1]] = 13.0
bound13[pair[1], pair[0]] = 13.0
# Find 1-4 scaling_matrix
b14_pairs = []
for i in range(natom):
b12_idx = np.nonzero(bound12[i])[0]
for j in b12_idx:
b122_idx = np.nonzero(bound12[j])[0]
for k in b122_idx:
for j2 in b12_idx:
if k != i and j2 != j:
b14_pairs.append([j2, k])
# Assign coulomb14scaling factor
for pair in b14_pairs:
scaling_matrix[pair[0], pair[1]] = coulomb14scale
scaling_matrix[pair[1], pair[0]] = coulomb14scale
# Exclude 1-2, 1-3 interactions
for pair in bonds:
scaling_matrix[pair[0], pair[1]] = 0.0
scaling_matrix[pair[1], pair[0]] = 0.0
for pair in b13_pairs:
scaling_matrix[pair[0], pair[1]] = 0.0
scaling_matrix[pair[1], pair[0]] = 0.0
# Fill 1-1 with zeros
np.fill_diagonal(scaling_matrix, 0)
return scaling_matrix
[docs]
def calc_desp(
natom: int,
qs: np.ndarray,
alphas: list,
drjk: np.ndarray,
r_ij: np.ndarray,
r_ij3: np.ndarray,
) -> np.ndarray:
"""
Calculate the ESP from induced dipoles of the molecule.
Unit: e / bohr
"""
efield = np.zeros((natom, 3))
for k in range(natom):
efield[k] = np.dot(qs, drjk[k])
deij = np.einsum("jm, jim->ji", efield, r_ij) * r_ij3.T
desp = np.dot(alphas, deij)
return desp
def generate_paired_parameters(oemol: oechem.OEMol, tags=smiTags) -> dict:
ret = {}
# clear potential mapped index
[atom.SetMapIdx(0) for atom in oemol.GetAtoms()]
for atom in oemol.GetAtoms():
idx = atom.GetIdx()
atom.SetMapIdx(idx + 1)
pattern = oechem.OECreateSmiString(oemol, tags)
atom.SetMapIdx(0)
ret[pattern] = atom.GetData("polarizability")
return ret
def assign_polarizability(
oemol: oechem.OEMol, polarizabilities: dict, unit: str
) -> oechem.OEMol:
conversion = Q_(1, unit).to("bohr**3").magnitude
# hierarchy
included = []
for smarts, value in polarizabilities.items():
labelled = label_alpha(oemol=oemol, smarts_pattern=smarts, index=False)
if len(labelled) > 0:
for atom in labelled:
atom_idx = atom.GetIdx()
if atom_idx in included:
pass
else:
included.append(atom_idx)
atom.SetData("polarizability", value * conversion)
oemol.SetData("polarizability unit", "bohr**3")
return oemol
def fit(oerecord: oechem.OEMolRecord, polarizabilities: dict, unit: str) -> np.ndarray:
oemol = oerecord.get_mol()
natom = oemol.GetMaxAtomIdx()
oemol = assign_polarizability(oemol, polarizabilities, unit)
alphas = [a.GetData("polarizability") for a in oemol.GetAtoms()] # bohr**3
for conf in oemol.GetConfs():
conf_record = oerecord.get_conf_record(conf)
geometry_angstrom = json.loads(
conf_record.get_value(OEField("geometry_angstrom", Types.String))
)
geometry_bohr = Q_(geometry_angstrom, ureg.angstrom).to(ureg.bohr).magnitude
grid_angstrom = json.loads(
conf_record.get_value(OEField("grid_angstrom", Types.String))
)
grid_bohr = Q_(grid_angstrom, ureg.angstrom).to(ureg.bohr).magnitude
grid_esp_0 = np.array(
json.loads(
conf_record.get_value(OEField("grid_esp_0_au_field", Types.String))
)
).reshape(-1)
npoints = len(grid_bohr)
r_ij = -(
grid_bohr - geometry_bohr[:, None]
) # distance vector of grid_bohr points from atoms
r_ij0 = cdist(grid_bohr, geometry_bohr, metric="euclidean")
r_ij1 = np.power(r_ij0, -1) # euclidean distance of atoms and grid_bohrs ^ -1
r_ij3 = np.power(r_ij0, -3) # euclidean distance of atoms and grid_bohrs ^ -3
r_jk = (
geometry_bohr - geometry_bohr[:, None]
) # distance vector of atoms from each other
r_jk1 = cdist(
geometry_bohr, geometry_bohr, metric="euclidean"
) # euclidean distance of atoms from each other
r_jk3 = np.power(
r_jk1, -3, where=r_jk1 != 0
) # euclidean distance of atoms from each other ^ -3
oechem.OEPerceiveSymmetry(oemol, includeH=True)
chemically_equivalent_atoms = [a.GetSymmetryClass() for a in oemol.GetAtoms()]
chemically_equivalent_atoms_pairs = pair_equivalent(chemically_equivalent_atoms)
n_chemically_equivalent_atoms = len(chemically_equivalent_atoms_pairs)
net_charge = sum([a.GetFormalCharge() for a in oemol.GetAtoms()])
coulomb14scale_matrix = coulomb_scaling_oe(oemol, coulomb14scale=0.5)
forced_symmetry = set(
[item for sublist in chemically_equivalent_atoms_pairs for item in sublist]
)
polar_region = list(set(range(natom)) - forced_symmetry)
n_polar_region = len(polar_region)
elements = [
oechem.OEGetAtomicSymbol(a.GetAtomicNum()) for a in oemol.GetAtoms()
]
# Distance dependent matrices for fitting
drjk = np.zeros((natom, natom, 3))
for k in range(natom):
drjk[k] = r_jk[k] * (r_jk3[k] * coulomb14scale_matrix[k]).reshape(-1, 1)
# start charge-fitting
# first stage, no symmetry
ndim1 = natom + 1
a0 = np.einsum("ij,ik->jk", r_ij1, r_ij1)
a1 = np.zeros((ndim1, ndim1))
a1[:natom, :natom] = a0
# Lagrange multiplier
a1[natom, :] = 1.0
a1[:, natom] = 1.0
a1[natom, natom] = 0.0
b1 = np.zeros(ndim1)
b1[:natom] = np.einsum("ik,i->k", r_ij1, grid_esp_0)
b1[natom] = net_charge
q1 = np.linalg.solve(a1, b1)[:natom]
q11 = np.zeros(natom)
while not np.allclose(q1, q11):
a10 = copy.deepcopy(a1)
for j in range(natom):
if elements[j] != "H":
a10[j, j] += 0.0005 * np.power((q1[j] ** 2 + 0.1**2), -0.5)
q1 = q11
q11 = np.linalg.solve(a10, b1)[:natom]
resp1 = q11
# second stage, apply forced symmetry
ndim2 = natom + 1 + n_chemically_equivalent_atoms + n_polar_region
a2 = np.zeros((ndim2, ndim2))
a2[: natom + 1, : natom + 1] = a1
if n_chemically_equivalent_atoms == 0:
pass
else:
for idx, pair in enumerate(chemically_equivalent_atoms_pairs):
a2[natom + 1 + idx, pair[0]] = 1.0
a2[natom + 1 + idx, pair[1]] = -1.0
a2[pair[0], natom + 1 + idx] = 1.0
a2[pair[1], natom + 1 + idx] = -1.0
b2 = np.zeros(ndim2)
b2[natom] = net_charge
charge_to_be_fixed = q1[polar_region]
for idx, pol_idx in enumerate(polar_region):
a2[ndim2 - n_polar_region + idx, pol_idx] = 1.0
a2[pol_idx, ndim2 - n_polar_region + idx] = 1.0
b2[ndim2 - n_polar_region + idx] = charge_to_be_fixed[idx]
q2 = resp1
q22 = np.zeros(natom)
steps = 0
while not np.allclose(q2, q22) and steps < 20:
a20 = copy.deepcopy(a2)
for j in range(natom):
if elements[j] != "H":
a20[j, j] += 0.001 * np.power((q2[j] ** 2 + 0.1**2), -0.5)
desp = calc_desp(
natom=natom, qs=q2, alphas=alphas, drjk=drjk, r_ij=r_ij, r_ij3=r_ij3
)
esp_to_fit = grid_esp_0 - desp
b2[:natom] = np.einsum("ik,i->k", r_ij1, esp_to_fit)
q2 = q22
q22 = np.linalg.solve(a20, b2)[:natom]
steps += 1
resp2 = q22
# quality of fit
base_esp = np.dot(r_ij1, resp2)
dpol_esp = calc_desp(
natom=natom, qs=q2, alphas=alphas, drjk=drjk, r_ij=r_ij, r_ij3=r_ij3
)
final_esp = base_esp + dpol_esp
## rrms
y = lambda x: np.sqrt(
(sum((x - grid_esp_0) ** 2) / sum(grid_esp_0**2)) / npoints
)
dpol_rrms = y(final_esp)
return resp1, resp2, dpol_rrms