Source code for fegrow.package

import copy
import itertools
import stat
from typing import Optional, List, Union, Tuple
import os
import glob
import tempfile
import subprocess
import re
from pathlib import Path
from urllib.request import urlretrieve
from collections import OrderedDict

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import to_hex
from prody.proteins.functions import showProtein, view3D
import py3Dmol
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem, Draw
from rdkit.Chem.rdMolAlign import AlignMol
from rdkit.Chem import PandasTools
import mols2grid
import pandas


from .conformers import generate_conformers
from .toxicity import tox_props


def replace_atom(mol: Chem.Mol, target_idx: int, new_atom: int) -> Chem.Mol:
    edit_mol = Chem.RWMol(mol)
    for atom in edit_mol.GetAtoms():
        if atom.GetIdx() == target_idx:
            atom.SetAtomicNum(new_atom)
    return Chem.Mol(edit_mol)


def rep2D(mol, idx=-1, rdkit_mol=False, **kwargs):
    numbered = copy.deepcopy(mol)
    numbered.RemoveAllConformers()
    if idx:
        for atom in numbered.GetAtoms():
            atom.SetAtomMapNum(atom.GetIdx())
    AllChem.Compute2DCoords(numbered)

    if rdkit_mol:
        return numbered
    else:
        return Draw.MolToImage(numbered, **kwargs)


def rep3D(mol):
    viewer = py3Dmol.view(width=300, height=300, viewergrid=(1, 1))
    for i in range(mol.GetNumConformers()):
        mb = Chem.MolToMolBlock(mol, confId=i)
        viewer.addModel(mb, "mol")
    viewer.setStyle({"stick": {}})
    viewer.zoomTo()
    return viewer


def is_linker(rmol):
    """
    Check if the molecule is a linker by checking if it has 2 R-group points
    """
    if len([atom for atom in rmol.GetAtoms() if atom.GetAtomMapNum() in (1, 2)]) == 2:
        return True

    return False


def __getAttachmentVector(R_group):
    """In the R-group or a linker, search for the position of the attachment point (R atom)
    and extract the atom (currently only single bond supported). In case of the linker,
    the R1 atom is selected.
    rgroup: fragment passed as rdkit molecule
    return: tuple (ratom, ratom_neighbour)
    """

    # find the R groups in the molecule
    ratoms = [atom for atom in R_group.GetAtoms() if atom.GetAtomicNum() == 0]
    if not len(ratoms):
        raise Exception(
            "The R-group does not have R-atoms (Atoms with index == 0, visualised with a '*' character)"
        )

    # if it is a linker, it will have more than 1 R group, pick the one with index 1
    if len(ratoms) == 1:
        atom = ratoms[0]
    elif is_linker(R_group):
        # find the attachable point
        ratoms = [atom for atom in ratoms if atom.GetAtomMapNum() == 1]
        atom = ratoms[0]
    else:
        raise Exception(
            "Either missing R-atoms, or more than two R-atoms. "
            '"Atom.GetAtomicNum" should be 0 for the R-atoms, and in the case of the linker,  '
            '"Atom.GetAtomMapNum" has to specify the order (1,2) '
        )

    neighbours = atom.GetNeighbors()
    if len(neighbours) > 1:
        raise NotImplementedError(
            "The linking R atom in the R group has two or more attachment points. "
        )

    return atom, neighbours[0]


def merge_R_group(mol, R_group, replaceIndex):
    """function originally copied from
    https://github.com/molecularsets/moses/blob/master/moses/baselines/combinatorial.py
    """

    # the linking R atom on the R group
    rgroup_R_atom, R_atom_neighbour = __getAttachmentVector(R_group)
    print(f"Rgroup atom index {rgroup_R_atom} neighbouring {R_atom_neighbour}")

    # atom to be replaced in the molecule
    replace_atom = mol.GetAtomWithIdx(replaceIndex)
    assert (
        len(replace_atom.GetNeighbors()) == 1
    ), "The atom being replaced on the molecule has more neighbour atoms than 1. Not supported."
    replace_atom_neighbour = replace_atom.GetNeighbors()[0]

    # align the Rgroup
    AlignMol(
        R_group,
        mol,
        atomMap=(
            (R_atom_neighbour.GetIdx(), replace_atom.GetIdx()),
            (rgroup_R_atom.GetIdx(), replace_atom_neighbour.GetIdx()),
        ),
    )

    # merge the two molecules
    combined = Chem.CombineMols(mol, R_group)
    emol = Chem.EditableMol(combined)

    # connect
    bond_order = rgroup_R_atom.GetBonds()[0].GetBondType()
    emol.AddBond(
        replace_atom_neighbour.GetIdx(),
        R_atom_neighbour.GetIdx() + mol.GetNumAtoms(),
        order=bond_order,
    )
    # -1 accounts for the removed linking atom on the template
    emol.RemoveAtom(rgroup_R_atom.GetIdx() + mol.GetNumAtoms())
    # remove the linking atom on the template
    emol.RemoveAtom(replace_atom.GetIdx())

    merged = emol.GetMol()
    Chem.SanitizeMol(merged)

    # if the molecule was previously merged
    if hasattr(mol, "template") and mol.template is not None:
        # mol already had a connected e.g. a linker, therefore we use the area without the linker
        template = mol.template
    else:
        # prepare the template
        etemp = Chem.EditableMol(mol)
        etemp.RemoveAtom(replace_atom.GetIdx())
        template = etemp.GetMol()

    with_template = RMol(merged)
    with_template._save_template(template)
    # save the group
    with_template._save_rgroup(R_group)

    if is_linker(R_group):
        # the linker's label = 1 was used for the merging,
        # rename label = 2 to 0 to turn it into a simple R-group
        for atom in with_template.GetAtoms():
            if atom.GetAtomMapNum() == 2:
                atom.SetAtomMapNum(0)

    return with_template


def ic50(x):
    return 10 ** (-x - -9)


class RInterface:
    """
    This is a shared interface for a molecule and a list of molecules.

    The main purpose is to allow using the same functions on a single molecule and on a group of them.
    """

    def rep2D(self, **kwargs):
        pass

    def toxicity(self):
        pass

    def generate_conformers(
        self, num_conf: int, minimum_conf_rms: Optional[float] = [], **kwargs
    ):
        pass

    def remove_clashing_confs(self, prot, min_dst_allowed=1):
        pass


[docs]class RMol(rdkit.Chem.rdchem.Mol, RInterface): """ RMol is essentially a wrapper around RDKit Mol with tailored functionalities for attaching R groups, etc. :param rmol: when provided, energies and additional metadata is preserved. :type rmol: RMol :param template: Provide the original molecule template used for this RMol. """ gnina_dir = None def __init__(self, *args, id=None, template=None, **kwargs): super().__init__(*args, **kwargs) if isinstance(args[0], RMol) or isinstance(args[0], rdkit.Chem.Mol): self.template = args[0].template if hasattr(args[0], "template") else None self.rgroup = args[0].rgroup if hasattr(args[0], "rgroup") else None self.opt_energies = ( args[0].opt_energies if hasattr(args[0], "opt_energies") else None ) self.id = args[0].id if hasattr(args[0], "id") else None else: self.template = template self.rgroup = None self.opt_energies = None self.id = id def _save_template(self, mol): self.template = RMol(copy.deepcopy(mol)) def _save_rgroup(self, rgroup): self.rgroup = rgroup def _save_opt_energies(self, energies): self.opt_energies = energies
[docs] def toxicity(self): """ Assessed various ADMET properties, including - Lipinksi rule of 5 properties, - the presence of unwanted substructures - problematic functional groups - synthetic accessibility :return: a row of a dataframe with the descriptors :rtype: dataframe """ df = tox_props(self) # add an index column to the front df.insert(0, "ID", self.id) df.set_index("ID", inplace=True) # add a column with smiles df = df.assign(Smiles=[Chem.MolToSmiles(self)]) return df
[docs] def generate_conformers( self, num_conf: int, minimum_conf_rms: Optional[float] = [], **kwargs ): """ Generate conformers using the RDKIT's ETKDG. The generated conformers are embedded into the template structure. In other words, any atoms that are common with the template structure, should have the same coordinates. :param num_conf: fixme :param minimum_conf_rms: The minimum acceptable difference in the RMS in any new generated conformer. Conformers that are too similar are discarded. :type minimum_conf_rms: float :param flexible: A list of indices that are common with the template molecule that should have new coordinates. :type flexible: List[int] """ cons = generate_conformers(self, num_conf, minimum_conf_rms, **kwargs) self.RemoveAllConformers() [self.AddConformer(con, assignId=True) for con in cons.GetConformers()]
[docs] def optimise_in_receptor(self, *args, **kwargs): """ Enumerate the conformers inside of the receptor by employing ANI2x, a hybrid machine learning / molecular mechanics (ML/MM) approach. ANI2x is neural nework potential for the ligand energetics but works only for the following atoms: H, C, N, O, F, S, Cl. Open Force Field Parsley force field is used for intermolecular interactions with the receptor. :param sigma_scale_factor: is used to scale the Lennard-Jones radii of the atoms. :param relative_permittivity: is used to scale the electrostatic interactions with the protein. :param water_model: can be used to set the force field for any water molecules present in the binding site. """ if self.GetNumConformers() == 0: print("Warning: no conformers so cannot optimise_in_receptor. Ignoring.") return from .receptor import ForceField, optimise_in_receptor opt_mol, energies = optimise_in_receptor(self, *args, **kwargs) # replace the conformers with the optimised ones self.RemoveAllConformers() [ self.AddConformer(conformer, assignId=True) for conformer in opt_mol.GetConformers() ] # save the energies self._save_opt_energies(energies) # build a dataframe with the molecules conformer_ids = [c.GetId() for c in self.GetConformers()] df = pandas.DataFrame( { "ID": [self.id] * len(energies), "Conformer": conformer_ids, "Energy": energies, } ) return df
[docs] def sort_conformers(self, energy_range=5): """ For the given molecule and the conformer energies order the energies and only keep any conformers with in the energy range of the lowest energy conformer. :param energy_range: The energy range (kcal/mol), above the minimum, for which conformers should be kept. """ if self.GetNumConformers() == 0: print("An rmol doesn't have any conformers. Ignoring.") return None elif self.opt_energies is None: raise AttributeError( "Please run the optimise_in_receptor in order to generate the energies first. " ) from .receptor import sort_conformers final_mol, final_energies = sort_conformers( self, self.opt_energies, energy_range=energy_range ) # overwrite the current confs self.RemoveAllConformers() [ self.AddConformer(conformer, assignId=True) for conformer in final_mol.GetConformers() ] self._save_opt_energies(final_energies) # build a dataframe with the molecules conformer_ids = [c.GetId() for c in self.GetConformers()] df = pandas.DataFrame( { "ID": [self.id] * len(final_energies), "Conformer": conformer_ids, "Energy": final_energies, } ) return df
[docs] def rep2D(self, **kwargs): """ Use RDKit and get a 2D diagram. Uses Compute2DCoords and Draw.MolToImage function Works with IPython Notebook. :param **kwargs: are passed further to Draw.MolToImage function. """ return rep2D(self, **kwargs)
[docs] def rep3D( self, view=None, prody=None, template=False, confIds: Optional[List[int]] = None ): """ Use py3Dmol to obtain the 3D view of the molecule. Works with IPython Notebook. :param view: a view to which add the visualisation. Useful if one wants to 3D view multiple conformers in one view. :type view: py3Dmol view instance (None) :param prody: A prody protein around which a view 3D can be created :type prody: Prody instance (Default: None) :param template: Whether to visualise the original 3D template as well from which the molecule was made. :type template: bool (False) :param confIds: Select the conformations for display. :type confIds: List[int] """ if prody is not None: view = view3D(prody) if view is None: view = py3Dmol.view(width=400, height=400, viewergrid=(1, 1)) for conf in self.GetConformers(): # ignore the confIds we've not asked for if confIds is not None and conf.GetId() not in confIds: continue mb = Chem.MolToMolBlock(self, confId=conf.GetId()) view.addModel(mb, "lig") # use reverse indexing to reference the just added conformer # http://3dmol.csb.pitt.edu/doc/types.html#AtomSelectionSpec # cmap = plt.get_cmap("tab20c") # hex = to_hex(cmap.colors[i]).split('#')[-1] view.setStyle({"model": -1}, {"stick": {}}) if template: mb = Chem.MolToMolBlock(self.template) view.addModel(mb, "template") # show as sticks view.setStyle({"model": -1}, {"stick": {"color": "0xAF10AB"}}) # zoom to the last added model view.zoomTo({"model": -1}) return view
[docs] def remove_clashing_confs(self, prot, min_dst_allowed=1.0): """ Removing conformations that class with the protein. Note that the original conformer should be well docked into the protein, ideally with some space between the area of growth and the protein, so that any growth on the template doesn't automatically cause clashes. :param prot: The protein against which the conformers should be tested. :type prot: Prody instance :param min_dst_allowed: If any atom is within this distance in a conformer, the conformer will be deleted. :type min_dst_allowed: float in Angstroms """ prot_coords = prot.getCoords() counter = 0 for conf in list(self.GetConformers())[::-1]: confid = conf.GetId() # for each atom check how far it is from the protein atoms eacheach_shortest = [] for coord in conf.GetPositions(): shortest = np.min(np.sqrt(np.sum((coord - prot_coords) ** 2, axis=1))) eacheach_shortest.append(shortest) min_dst = np.min(eacheach_shortest) if min_dst < min_dst_allowed: self.RemoveConformer(confid) print(f"Clash with the protein. Removing conformer id: {confid}")
[docs] @staticmethod def set_gnina(loc): """ Set the location of the binary file gnina. This could be your own compiled directory, or a directory where you'd like it to be downloaded. By default, gnina path is to the working directory (~500MB). :param loc: path to gnina binary file. E.g. /dir/path/gnina. Note that right now gnina should be a binary file with that specific filename "gnina". :type loc: str """ # set gnina location path = Path(loc) if path.is_file(): assert path.name == "gnina", 'Please ensure gnina binary is named "gnina"' RMol.gnina_dir = path.parent else: raise Exception("The path is not the binary file gnina")
# extend this with running a binary check @staticmethod def _check_download_gnina(): """ Check if gnina works. Otherwise, download it. """ if RMol.gnina_dir is None: # assume it is in the current directory RMol.gnina_dir = os.getcwd() # check if gnina works try: subprocess.run( ["./gnina", "--help"], capture_output=True, cwd=RMol.gnina_dir ) return except FileNotFoundError as E: pass # gnina is not found, try downloading it print(f"Gnina not found or set. Download gnina (~500MB) into {RMol.gnina_dir}") gnina = os.path.join(RMol.gnina_dir, "gnina") # fixme - currently download to the working directory (Home could be more applicable). urlretrieve( "https://github.com/gnina/gnina/releases/download/v1.0.1/gnina", filename=gnina, ) # make executable (chmod +x) mode = os.stat(gnina).st_mode os.chmod(gnina, mode | stat.S_IEXEC) # check if it works subprocess.run(["./gnina", "--help"], capture_output=True, cwd=RMol.gnina_dir)
[docs] def gnina(self, receptor_file): """ Use gnina to extract CNNaffinity, and convert it into IC50. LIMITATION: currenly the gnina binaries do not support Mac. :param receptor_file: Path to the receptor file. :type receptor_file: str """ self._check_download_gnina() # obtain the absolute file to the receptor receptor = Path(receptor_file) assert receptor.exists() # make a temporary sdf file for gnina tmp = tempfile.NamedTemporaryFile(mode="w", suffix=".sdf") with Chem.SDWriter(tmp.name) as w: for conformer in self.GetConformers(): w.write(self, confId=conformer.GetId()) # run the code on the sdf process = subprocess.run( [ "./gnina", "--score_only", "-l", tmp.name, "-r", receptor.absolute(), "--seed", "0", "--stripH", "False", ], capture_output=True, cwd=RMol.gnina_dir, ) output = process.stdout.decode("utf-8") CNNaffinities_str = re.findall(r"CNNaffinity: (-?\d+.\d+)", output) # convert to floats CNNaffinities = list(map(float, CNNaffinities_str)) # generate IC50 from the CNNaffinities ic50s = list(map(ic50, CNNaffinities)) # create a dataframe conformer_ids = [c.GetId() for c in self.GetConformers()] df = pandas.DataFrame( { "ID": [self.id] * len(CNNaffinities), "Conformer": conformer_ids, "CNNaffinity": CNNaffinities, "CNNaffinity->IC50s": ic50s, } ) return df
[docs] def to_file(self, file_name: str): """ Write the molecule and all conformers to file. Note: The file type is worked out from the name extension by splitting on `.`. """ file_type = file_name.split(".")[-1] write_functions = { "mol": Chem.MolToMolBlock, "sdf": Chem.MolToMolBlock, "pdb": Chem.MolToPDBBlock, "xyz": Chem.MolToXYZBlock, } func = write_functions.get(file_type, None) if func is None: raise RuntimeError( f"The file type {file_type} is not support please chose from {write_functions.keys()}" ) with open(file_name, "w") as output: for conformer in self.GetConformers(): output.write(func(self, confId=conformer.GetId()))
[docs] def df(self): """ Generate a pandas dataframe row for this molecule with SMILES. :returns: pandas dataframe row. """ df = pandas.DataFrame( { "ID": [self.id], "Smiles": [Chem.MolToSmiles(self)], } ) # attach energies if they're present if self.opt_energies: df = df.assign( Energies=", ".join([str(e) for e in sorted(self.opt_energies)]) ) df.set_index(["ID"], inplace=True) return df
def _repr_html_(self): # return a dataframe with the rdkit visualisation df = self.df() # add a visualisation column PandasTools.AddMoleculeColumnToFrame( df, "Smiles", "Molecule", includeFingerprints=True ) return df._repr_html_()
class RGroupGrid(mols2grid.MolGrid): """ A wrapper around the mols to grid class to load and process the r group folders locally. """ def __init__(self): dataframe = self._load_molecules() super(RGroupGrid, self).__init__( dataframe, removeHs=True, mol_col="Mol", use_coords=False, name="m2" ) def _load_molecules(self) -> pandas.DataFrame: """ Load the local r groups into rdkit molecules """ molecules = [] names = [] builtin_rgroups = Path(__file__).parent / "data" / "rgroups" / "library" for molfile in glob.glob(str(builtin_rgroups / "*.mol")): r_mol = Chem.MolFromMolFile(molfile, removeHs=False) molecules.append(r_mol) names.append(Path(molfile).stem) # highlight the attachment atom for atom in r_mol.GetAtoms(): if atom.GetAtomicNum() == 0: setattr(r_mol, "__sssAtoms", [atom.GetIdx()]) return pandas.DataFrame({"Mol": molecules, "Name": names}) def _ipython_display_(self): from IPython.display import display, update_display, display_html subset = ["img", "Name", "mols2grid-id"] display_html(self.display(subset=subset, substruct_highlight=True)) def get_selected(self): # use the new API df = self.get_selection() # now get a list of the molecules return list(df["Mol"]) class RLinkerGrid(mols2grid.MolGrid): """ A wrapper around the mols to grid class to load and process the linker folders locally. """ def __init__(self): dataframe = self._load_molecules() super(RLinkerGrid, self).__init__( dataframe, removeHs=True, mol_col="Mol", use_coords=False, name="m1", prerender=False, ) def _load_molecules(self) -> pandas.DataFrame: """ Load the local linkers into rdkit molecules """ builtin_rlinkers = Path(__file__).parent / "data" / "linkers" / "library" linker_files = glob.glob(str(builtin_rlinkers / "*.sdf")) # sort the linkers so that [R1]C[R2] is next to [R2]C[R1] in the grid linker_files = sorted( linker_files, key=lambda smiles: smiles.replace("[*:1]", "R").replace("[*:2]", "R"), ) linkers = [] for molfile in linker_files: r_mol = list(Chem.SDMolSupplier(molfile, removeHs=False)).pop() # generate a searchable name in the form of a simple SMILE without hydrogens name = ( Chem.MolToSmiles(Chem.RemoveHs(r_mol), canonical=False) .replace("[*:1]", "R1") .replace("[*:2]", "R2") ) # simplify the names for the user linkers.append( { "Mol": r_mol, "Name": name, "Common": r_mol.GetIntProp( "SmileIndex" ), # extract the index property from the original publication } ) # presort using the original publication index linkers = sorted(linkers, key=lambda i: i["Common"]) return pandas.DataFrame(linkers) def _ipython_display_(self): from IPython.display import display subset = ["img", "Name", "mols2grid-id"] return display(self.display(subset=subset, substruct_highlight=True)) def get_selected(self): # use the new API df = self.get_selection() # now get a list of the molecules return list(df["Mol"]) def link(Rgroups, Rlinkers, One2One=False): """ :param Rgroups: :param Rlinkers: :param One2One: If True, for each selected RGroup, a selected linker with the same index is added. Alternatively, each linker is added to each RGroup resulting in n x m number of combinsions, with n being the number of RGroups and m the number of linkers. :return: """ # convert rgroups/linkers to smiles rgroup_smiles = [Chem.MolToSmiles(mol) for mol in Rgroups] linker_smiles = [Chem.MolToSmiles(mol) for mol in Rlinkers] # loop over and combine smiles combined = [] if One2One: for linker, rgroup in zip(linker_smiles, rgroup_smiles): combined.append(rgroup.replace("*", "*" + linker)) else: for linker in linker_smiles: for rgroup in rgroup_smiles: combined.append(rgroup.replace("*", "*" + linker)) # generate 3D conformers for each molecule mols = [] for smile in combined: mol = Chem.MolFromSmiles(smile) AllChem.EmbedMultipleConfs(mol, numConfs=1, params=AllChem.ETKDGv3()) mols.append(mol) return mols
[docs]class RList(RInterface, list): """ Streamline working with RMol by presenting the same interface on the list, and allowing to dispatch the functions to any single member. """ def rep2D(self, subImgSize=(400, 400), **kwargs): return Draw.MolsToGridImage( [mol.rep2D(rdkit_mol=True, **kwargs) for mol in self], subImgSize=subImgSize ) def toxicity(self): df = pandas.concat([m.toxicity() for m in self]) # add a column with the visualisation PandasTools.AddMoleculeColumnToFrame( df, "Smiles", "Molecule", includeFingerprints=True ) return df def generate_conformers( self, num_conf: int, minimum_conf_rms: Optional[float] = [], **kwargs ): for i, rmol in enumerate(self): print(f"RMol index {i}") rmol.generate_conformers(num_conf, minimum_conf_rms, **kwargs) def GetNumConformers(self): return [rmol.GetNumConformers() for rmol in self] def remove_clashing_confs(self, prot, min_dst_allowed=1): for i, rmol in enumerate(self): print(f"RMol index {i}") rmol.remove_clashing_confs(prot, min_dst_allowed=min_dst_allowed)
[docs] def optimise_in_receptor(self, *args, **kwargs): """ Replace the current molecule with the optimised one. Return lists of energies. """ # return pandas.concat([m.toxicity() for m in self]) dfs = [] for i, rmol in enumerate(self): print(f"RMol index {i}") dfs.append(rmol.optimise_in_receptor(*args, **kwargs)) df = pandas.concat(dfs) df.set_index(["ID", "Conformer"], inplace=True) return df
def sort_conformers(self, energy_range=5): dfs = [] for i, rmol in enumerate(self): print(f"RMol index {i}") dfs.append(rmol.sort_conformers(energy_range)) df = pandas.concat(dfs) df.set_index(["ID", "Conformer"], inplace=True) return df def gnina(self, receptor_file): dfs = [] for i, rmol in enumerate(self): print(f"RMol index {i}") dfs.append(rmol.gnina(receptor_file)) df = pandas.concat(dfs) df.set_index(["ID", "Conformer"], inplace=True) return df
[docs] def discard_missing(self): """ Remove from this list the molecules that have no conformers """ removed = [] for rmol in self[::-1]: if rmol.GetNumConformers() == 0: rmindex = self.index(rmol) print( f"Discarding a molecule (id {rmindex}) due to the lack of conformers. " ) self.remove(rmol) removed.append(rmindex) return removed
def _repr_html_(self): # return the dataframe with the visualisation column of the dataframe df = pandas.concat([rmol.df() for rmol in self]) # add a column with the visualisation PandasTools.AddMoleculeColumnToFrame( df, "Smiles", "Molecule", includeFingerprints=True ) return df._repr_html_()
[docs]def build_molecules( templates: Union[Chem.Mol, List[Chem.Mol]], r_groups: Union[Chem.Mol, List[Chem.Mol], int], attachment_points: Optional[List[int]] = None, ): """ For the given core molecule and list of attachment points and r groups enumerate the possible molecules and return a list of them. :param template: The core scaffold molecule to attach the r groups to, or a list of them. :param r_group: The list of rdkit molecules which should be considered r groups or the RGroup Grid with highlighted molecules. :param attachment_points: The list of atom index in the core ligand that the r groups should be attached to. If it is empty, connecting points are sought out and matched. """ # fixme - special case after changing the API: remove in the future # This is a temporary warning about the change in the interface. # This change is because there are situations where the attachment_points do not need to be passed to the function. if ( isinstance(r_groups, list) and len(r_groups) > 0 and isinstance(r_groups[0], int) ): print( 'Warning: second argument is detected to be an integer. It is now "r_groups" ' "whereas attachement_points are provided as the 3rd argument. " ) raise Exception( "Please note that after adding the linker to FEgrow (version 1.1), " 'the "build_molecules" function interface has changed to' ' "build_molecules(core_ligand, r_groups, attachment_points)". ' ) # ensure template and r_group are lists if not issubclass(templates.__class__, List): templates = [templates] if not issubclass(r_groups.__class__, List): r_groups = [r_groups] # make a deep copy of r_groups/linkers to ensure we don't modify the library templates = [copy.deepcopy(mol) for mol in templates] r_groups = [copy.deepcopy(mol) for mol in r_groups] # get attachment points for each template if not attachment_points: # attempt to generate the attachment points by picking the joining molecule # case: a list of templates previously joined with linkers requires iterating over them attachment_points = [__getAttachmentVector(lig)[0].GetIdx() for lig in templates] if not attachment_points: raise Exception("Could not find attachement points. ") if len(attachment_points) != len(templates): raise Exception( f"There must be one attachment point for each template. " f"Provided attachement points = {len(attachment_points)} " f"with templates number: {len(templates)}" ) combined_mols = RList() id_counter = 0 for atom_idx, core_ligand in zip(attachment_points, templates): for r_mol in r_groups: core_mol = RMol(copy.deepcopy(core_ligand)) merged_mol = merge_R_group( mol=core_mol, R_group=r_mol, replaceIndex=atom_idx ) # assign the identifying index to the molecule merged_mol.id = id_counter combined_mols.append(merged_mol) id_counter += 1 return combined_mols