Source code for dpolfit.fitting.respdpol

#!/usr/bin/env python

"""
Fit partial charges for typed polarizabilities
"""

import copy
import json
from collections import defaultdict

import numpy as np
import pint
from openeye import oechem
from openeye.oechem import OEField, Types
from rdkit import Chem
from scipy.spatial.distance import cdist

from dpolfit.fitting.polarizability import label_alpha
from dpolfit.utilities.miscellaneous import *

ureg = pint.UnitRegistry()
Q_ = ureg.Quantity



[docs]
def pair_equivalent(pattern: list) -> np.ndarray:
    """
    A function to pair related patterns together for use as constraints

    Parameters
    ----------
    pattern: list
        A list of patterns, could be elements, SMIRNOFF patterns

    Returns
    -------
    ndarry
        Return pairs of related patterns in a nested numpy ndarry.

    """
    tmp1 = defaultdict(list)
    for idx1, p in enumerate(pattern):
        tmp1[p].append(idx1)

    tmp2 = []
    for key, v in tmp1.items():
        n = len(v)
        if n > 1:
            tmp2.append([[v[i], v[i + 1]] for i in range(n - 1)])
    if len(tmp2) == 0:
        ret = []
    else:
        ret = np.concatenate(tmp2)
    return ret




[docs]
def coulomb_scaling_rdkit(
    rdmol: Chem.rdchem.Mol, coulomb14scale: float = 0.5
) -> np.ndarray:
    """

    Parameters
    ----------
    rdmol: Chem.rdchem.Mol
        An input rdkit molecule used for specifying connectivity

    coulomb14scale: float

    Returns
    -------
    ndarray

    """

    natom = rdmol.GetNumAtoms()
    # initializing arrays
    bonds = []
    bound12 = np.zeros((natom, natom))
    bound13 = np.zeros((natom, natom))
    scaling_matrix = np.ones((natom, natom))

    for bond in rdmol.GetBonds():
        b = bond.GetBeginAtomIdx()
        e = bond.GetEndAtomIdx()
        bonds.append([b, e])

    # Find 1-2 scaling_matrix
    for pair in bonds:
        bound12[pair[0], pair[1]] = 12.0
        bound12[pair[1], pair[0]] = 12.0

    # Find 1-3 scaling_matrix
    b13_pairs = []
    for i in range(natom):
        b12_idx = np.nonzero(bound12[i])[0]
        for idx, j in enumerate(b12_idx):
            for k in b12_idx[idx + 1 :]:
                b13_pairs.append([j, k])
    for pair in b13_pairs:
        bound13[pair[0], pair[1]] = 13.0
        bound13[pair[1], pair[0]] = 13.0

    # Find 1-4 scaling_matrix
    b14_pairs = []
    for i in range(natom):
        b12_idx = np.nonzero(bound12[i])[0]
        for j in b12_idx:
            b122_idx = np.nonzero(bound12[j])[0]
            for k in b122_idx:
                for j2 in b12_idx:
                    if k != i and j2 != j:
                        b14_pairs.append([j2, k])

    # Assign coulomb14scaling factor
    for pair in b14_pairs:
        scaling_matrix[pair[0], pair[1]] = coulomb14scale
        scaling_matrix[pair[1], pair[0]] = coulomb14scale

    # Exclude 1-2, 1-3 interactions
    for pair in bonds:
        scaling_matrix[pair[0], pair[1]] = 0.0
        scaling_matrix[pair[1], pair[0]] = 0.0

    for pair in b13_pairs:
        scaling_matrix[pair[0], pair[1]] = 0.0
        scaling_matrix[pair[1], pair[0]] = 0.0

    # Fill 1-1 with zeros
    np.fill_diagonal(scaling_matrix, 0)

    return scaling_matrix



def coulomb_scaling_oe(oemol: oechem.OEMol, coulomb14scale: float = 0.5) -> np.ndarray:

    natom = oemol.GetMaxAtomIdx()
    # initializing arrays
    bonds = []
    bound12 = np.zeros((natom, natom))
    bound13 = np.zeros((natom, natom))
    scaling_matrix = np.ones((natom, natom))

    for bond in oemol.GetBonds():
        b = bond.GetBgnIdx()
        e = bond.GetEndIdx()
        bonds.append([b, e])

    # Find 1-2 scaling_matrix
    for pair in bonds:
        bound12[pair[0], pair[1]] = 12.0
        bound12[pair[1], pair[0]] = 12.0

    # Find 1-3 scaling_matrix
    b13_pairs = []
    for i in range(natom):
        b12_idx = np.nonzero(bound12[i])[0]
        for idx, j in enumerate(b12_idx):
            for k in b12_idx[idx + 1 :]:
                b13_pairs.append([j, k])
    for pair in b13_pairs:
        bound13[pair[0], pair[1]] = 13.0
        bound13[pair[1], pair[0]] = 13.0

    # Find 1-4 scaling_matrix
    b14_pairs = []
    for i in range(natom):
        b12_idx = np.nonzero(bound12[i])[0]
        for j in b12_idx:
            b122_idx = np.nonzero(bound12[j])[0]
            for k in b122_idx:
                for j2 in b12_idx:
                    if k != i and j2 != j:
                        b14_pairs.append([j2, k])

    # Assign coulomb14scaling factor
    for pair in b14_pairs:
        scaling_matrix[pair[0], pair[1]] = coulomb14scale
        scaling_matrix[pair[1], pair[0]] = coulomb14scale

    # Exclude 1-2, 1-3 interactions
    for pair in bonds:
        scaling_matrix[pair[0], pair[1]] = 0.0
        scaling_matrix[pair[1], pair[0]] = 0.0

    for pair in b13_pairs:
        scaling_matrix[pair[0], pair[1]] = 0.0
        scaling_matrix[pair[1], pair[0]] = 0.0

    # Fill 1-1 with zeros
    np.fill_diagonal(scaling_matrix, 0)

    return scaling_matrix



[docs]
def calc_desp(
    natom: int,
    qs: np.ndarray,
    alphas: list,
    drjk: np.ndarray,
    r_ij: np.ndarray,
    r_ij3: np.ndarray,
) -> np.ndarray:
    """
    Calculate the ESP from induced dipoles of the molecule.
    Unit: e / bohr
    """

    efield = np.zeros((natom, 3))
    for k in range(natom):
        efield[k] = np.dot(qs, drjk[k])

    deij = np.einsum("jm, jim->ji", efield, r_ij) * r_ij3.T

    desp = np.dot(alphas, deij)

    return desp



def generate_paired_parameters(oemol: oechem.OEMol, tags=smiTags) -> dict:
    ret = {}
    # clear potential mapped index
    [atom.SetMapIdx(0) for atom in oemol.GetAtoms()]
    for atom in oemol.GetAtoms():
        idx = atom.GetIdx()
        atom.SetMapIdx(idx + 1)
        pattern = oechem.OECreateSmiString(oemol, tags)
        atom.SetMapIdx(0)
        ret[pattern] = atom.GetData("polarizability")

    return ret


def assign_polarizability(
    oemol: oechem.OEMol, polarizabilities: dict, unit: str
) -> oechem.OEMol:

    conversion = Q_(1, unit).to("bohr**3").magnitude

    # hierarchy
    included = []

    for smarts, value in polarizabilities.items():
        labelled = label_alpha(oemol=oemol, smarts_pattern=smarts, index=False)

        if len(labelled) > 0:
            for atom in labelled:
                atom_idx = atom.GetIdx()
                if atom_idx in included:
                    pass
                else:
                    included.append(atom_idx)
                    atom.SetData("polarizability", value * conversion)

    oemol.SetData("polarizability unit", "bohr**3")

    return oemol


def fit(oerecord: oechem.OEMolRecord, polarizabilities: dict, unit: str) -> np.ndarray:

    oemol = oerecord.get_mol()
    natom = oemol.GetMaxAtomIdx()
    oemol = assign_polarizability(oemol, polarizabilities, unit)
    alphas = [a.GetData("polarizability") for a in oemol.GetAtoms()]  # bohr**3

    for conf in oemol.GetConfs():
        conf_record = oerecord.get_conf_record(conf)
        geometry_angstrom = json.loads(
            conf_record.get_value(OEField("geometry_angstrom", Types.String))
        )

        geometry_bohr = Q_(geometry_angstrom, ureg.angstrom).to(ureg.bohr).magnitude

        grid_angstrom = json.loads(
            conf_record.get_value(OEField("grid_angstrom", Types.String))
        )

        grid_bohr = Q_(grid_angstrom, ureg.angstrom).to(ureg.bohr).magnitude

        grid_esp_0 = np.array(
            json.loads(
                conf_record.get_value(OEField("grid_esp_0_au_field", Types.String))
            )
        ).reshape(-1)

        npoints = len(grid_bohr)

        r_ij = -(
            grid_bohr - geometry_bohr[:, None]
        )  # distance vector of grid_bohr points from atoms
        r_ij0 = cdist(grid_bohr, geometry_bohr, metric="euclidean")
        r_ij1 = np.power(r_ij0, -1)  # euclidean distance of atoms and grid_bohrs ^ -1
        r_ij3 = np.power(r_ij0, -3)  # euclidean distance of atoms and grid_bohrs ^ -3

        r_jk = (
            geometry_bohr - geometry_bohr[:, None]
        )  # distance vector of atoms from each other
        r_jk1 = cdist(
            geometry_bohr, geometry_bohr, metric="euclidean"
        )  # euclidean distance of atoms from each other
        r_jk3 = np.power(
            r_jk1, -3, where=r_jk1 != 0
        )  # euclidean distance of atoms from each other ^ -3

        oechem.OEPerceiveSymmetry(oemol, includeH=True)
        chemically_equivalent_atoms = [a.GetSymmetryClass() for a in oemol.GetAtoms()]
        chemically_equivalent_atoms_pairs = pair_equivalent(chemically_equivalent_atoms)

        n_chemically_equivalent_atoms = len(chemically_equivalent_atoms_pairs)
        net_charge = sum([a.GetFormalCharge() for a in oemol.GetAtoms()])

        coulomb14scale_matrix = coulomb_scaling_oe(oemol, coulomb14scale=0.5)

        forced_symmetry = set(
            [item for sublist in chemically_equivalent_atoms_pairs for item in sublist]
        )

        polar_region = list(set(range(natom)) - forced_symmetry)
        n_polar_region = len(polar_region)
        elements = [
            oechem.OEGetAtomicSymbol(a.GetAtomicNum()) for a in oemol.GetAtoms()
        ]

        # Distance dependent matrices for fitting

        drjk = np.zeros((natom, natom, 3))
        for k in range(natom):
            drjk[k] = r_jk[k] * (r_jk3[k] * coulomb14scale_matrix[k]).reshape(-1, 1)

        # start charge-fitting
        # first stage, no symmetry
        ndim1 = natom + 1
        a0 = np.einsum("ij,ik->jk", r_ij1, r_ij1)
        a1 = np.zeros((ndim1, ndim1))
        a1[:natom, :natom] = a0

        # Lagrange multiplier
        a1[natom, :] = 1.0
        a1[:, natom] = 1.0
        a1[natom, natom] = 0.0

        b1 = np.zeros(ndim1)
        b1[:natom] = np.einsum("ik,i->k", r_ij1, grid_esp_0)
        b1[natom] = net_charge

        q1 = np.linalg.solve(a1, b1)[:natom]

        q11 = np.zeros(natom)

        while not np.allclose(q1, q11):
            a10 = copy.deepcopy(a1)
            for j in range(natom):
                if elements[j] != "H":
                    a10[j, j] += 0.0005 * np.power((q1[j] ** 2 + 0.1**2), -0.5)
            q1 = q11
            q11 = np.linalg.solve(a10, b1)[:natom]

        resp1 = q11

        # second stage, apply forced symmetry
        ndim2 = natom + 1 + n_chemically_equivalent_atoms + n_polar_region
        a2 = np.zeros((ndim2, ndim2))
        a2[: natom + 1, : natom + 1] = a1

        if n_chemically_equivalent_atoms == 0:
            pass
        else:
            for idx, pair in enumerate(chemically_equivalent_atoms_pairs):
                a2[natom + 1 + idx, pair[0]] = 1.0
                a2[natom + 1 + idx, pair[1]] = -1.0
                a2[pair[0], natom + 1 + idx] = 1.0
                a2[pair[1], natom + 1 + idx] = -1.0

        b2 = np.zeros(ndim2)
        b2[natom] = net_charge

        charge_to_be_fixed = q1[polar_region]

        for idx, pol_idx in enumerate(polar_region):
            a2[ndim2 - n_polar_region + idx, pol_idx] = 1.0
            a2[pol_idx, ndim2 - n_polar_region + idx] = 1.0
            b2[ndim2 - n_polar_region + idx] = charge_to_be_fixed[idx]

        q2 = resp1
        q22 = np.zeros(natom)
        steps = 0
        while not np.allclose(q2, q22) and steps < 20:
            a20 = copy.deepcopy(a2)
            for j in range(natom):
                if elements[j] != "H":
                    a20[j, j] += 0.001 * np.power((q2[j] ** 2 + 0.1**2), -0.5)

            desp = calc_desp(
                natom=natom, qs=q2, alphas=alphas, drjk=drjk, r_ij=r_ij, r_ij3=r_ij3
            )
            esp_to_fit = grid_esp_0 - desp
            b2[:natom] = np.einsum("ik,i->k", r_ij1, esp_to_fit)
            q2 = q22
            q22 = np.linalg.solve(a20, b2)[:natom]
            steps += 1

        resp2 = q22

        # quality of fit
        base_esp = np.dot(r_ij1, resp2)
        dpol_esp = calc_desp(
            natom=natom, qs=q2, alphas=alphas, drjk=drjk, r_ij=r_ij, r_ij3=r_ij3
        )
        final_esp = base_esp + dpol_esp

        ## rrms
        y = lambda x: np.sqrt(
            (sum((x - grid_esp_0) ** 2) / sum(grid_esp_0**2)) / npoints
        )

        dpol_rrms = y(final_esp)

        return resp1, resp2, dpol_rrms