import copy
import itertools
import stat
from typing import Optional, List, Union, Tuple
import os
import glob
import tempfile
import subprocess
import re
from pathlib import Path
from urllib.request import urlretrieve
from collections import OrderedDict
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import to_hex
from prody.proteins.functions import showProtein, view3D
import py3Dmol
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem, Draw
from rdkit.Chem.rdMolAlign import AlignMol
from rdkit.Chem import PandasTools
import mols2grid
import pandas
from .conformers import generate_conformers
from .toxicity import tox_props
def replace_atom(mol: Chem.Mol, target_idx: int, new_atom: int) -> Chem.Mol:
edit_mol = Chem.RWMol(mol)
for atom in edit_mol.GetAtoms():
if atom.GetIdx() == target_idx:
atom.SetAtomicNum(new_atom)
return Chem.Mol(edit_mol)
def rep2D(mol, idx=-1, rdkit_mol=False, **kwargs):
numbered = copy.deepcopy(mol)
numbered.RemoveAllConformers()
if idx:
for atom in numbered.GetAtoms():
atom.SetAtomMapNum(atom.GetIdx())
AllChem.Compute2DCoords(numbered)
if rdkit_mol:
return numbered
else:
return Draw.MolToImage(numbered, **kwargs)
def rep3D(mol):
viewer = py3Dmol.view(width=300, height=300, viewergrid=(1, 1))
for i in range(mol.GetNumConformers()):
mb = Chem.MolToMolBlock(mol, confId=i)
viewer.addModel(mb, "mol")
viewer.setStyle({"stick": {}})
viewer.zoomTo()
return viewer
def is_linker(rmol):
"""
Check if the molecule is a linker by checking if it has 2 R-group points
"""
if len([atom for atom in rmol.GetAtoms() if atom.GetAtomMapNum() in (1, 2)]) == 2:
return True
return False
def __getAttachmentVector(R_group):
"""In the R-group or a linker, search for the position of the attachment point (R atom)
and extract the atom (currently only single bond supported). In case of the linker,
the R1 atom is selected.
rgroup: fragment passed as rdkit molecule
return: tuple (ratom, ratom_neighbour)
"""
# find the R groups in the molecule
ratoms = [atom for atom in R_group.GetAtoms() if atom.GetAtomicNum() == 0]
if not len(ratoms):
raise Exception(
"The R-group does not have R-atoms (Atoms with index == 0, visualised with a '*' character)"
)
# if it is a linker, it will have more than 1 R group, pick the one with index 1
if len(ratoms) == 1:
atom = ratoms[0]
elif is_linker(R_group):
# find the attachable point
ratoms = [atom for atom in ratoms if atom.GetAtomMapNum() == 1]
atom = ratoms[0]
else:
raise Exception(
"Either missing R-atoms, or more than two R-atoms. "
'"Atom.GetAtomicNum" should be 0 for the R-atoms, and in the case of the linker, '
'"Atom.GetAtomMapNum" has to specify the order (1,2) '
)
neighbours = atom.GetNeighbors()
if len(neighbours) > 1:
raise NotImplementedError(
"The linking R atom in the R group has two or more attachment points. "
)
return atom, neighbours[0]
def merge_R_group(mol, R_group, replaceIndex):
"""function originally copied from
https://github.com/molecularsets/moses/blob/master/moses/baselines/combinatorial.py
"""
# the linking R atom on the R group
rgroup_R_atom, R_atom_neighbour = __getAttachmentVector(R_group)
print(f"Rgroup atom index {rgroup_R_atom} neighbouring {R_atom_neighbour}")
# atom to be replaced in the molecule
replace_atom = mol.GetAtomWithIdx(replaceIndex)
assert (
len(replace_atom.GetNeighbors()) == 1
), "The atom being replaced on the molecule has more neighbour atoms than 1. Not supported."
replace_atom_neighbour = replace_atom.GetNeighbors()[0]
# align the Rgroup
AlignMol(
R_group,
mol,
atomMap=(
(R_atom_neighbour.GetIdx(), replace_atom.GetIdx()),
(rgroup_R_atom.GetIdx(), replace_atom_neighbour.GetIdx()),
),
)
# merge the two molecules
combined = Chem.CombineMols(mol, R_group)
emol = Chem.EditableMol(combined)
# connect
bond_order = rgroup_R_atom.GetBonds()[0].GetBondType()
emol.AddBond(
replace_atom_neighbour.GetIdx(),
R_atom_neighbour.GetIdx() + mol.GetNumAtoms(),
order=bond_order,
)
# -1 accounts for the removed linking atom on the template
emol.RemoveAtom(rgroup_R_atom.GetIdx() + mol.GetNumAtoms())
# remove the linking atom on the template
emol.RemoveAtom(replace_atom.GetIdx())
merged = emol.GetMol()
Chem.SanitizeMol(merged)
# if the molecule was previously merged
if hasattr(mol, "template") and mol.template is not None:
# mol already had a connected e.g. a linker, therefore we use the area without the linker
template = mol.template
else:
# prepare the template
etemp = Chem.EditableMol(mol)
etemp.RemoveAtom(replace_atom.GetIdx())
template = etemp.GetMol()
with_template = RMol(merged)
with_template._save_template(template)
# save the group
with_template._save_rgroup(R_group)
if is_linker(R_group):
# the linker's label = 1 was used for the merging,
# rename label = 2 to 0 to turn it into a simple R-group
for atom in with_template.GetAtoms():
if atom.GetAtomMapNum() == 2:
atom.SetAtomMapNum(0)
return with_template
def ic50(x):
return 10 ** (-x - -9)
class RInterface:
"""
This is a shared interface for a molecule and a list of molecules.
The main purpose is to allow using the same functions on a single molecule and on a group of them.
"""
def rep2D(self, **kwargs):
pass
def toxicity(self):
pass
def generate_conformers(
self, num_conf: int, minimum_conf_rms: Optional[float] = [], **kwargs
):
pass
def remove_clashing_confs(self, prot, min_dst_allowed=1):
pass
[docs]class RMol(rdkit.Chem.rdchem.Mol, RInterface):
"""
RMol is essentially a wrapper around RDKit Mol with
tailored functionalities for attaching R groups, etc.
:param rmol: when provided, energies and additional metadata is preserved.
:type rmol: RMol
:param template: Provide the original molecule template
used for this RMol.
"""
gnina_dir = None
def __init__(self, *args, id=None, template=None, **kwargs):
super().__init__(*args, **kwargs)
if isinstance(args[0], RMol) or isinstance(args[0], rdkit.Chem.Mol):
self.template = args[0].template if hasattr(args[0], "template") else None
self.rgroup = args[0].rgroup if hasattr(args[0], "rgroup") else None
self.opt_energies = (
args[0].opt_energies if hasattr(args[0], "opt_energies") else None
)
self.id = args[0].id if hasattr(args[0], "id") else None
else:
self.template = template
self.rgroup = None
self.opt_energies = None
self.id = id
def _save_template(self, mol):
self.template = RMol(copy.deepcopy(mol))
def _save_rgroup(self, rgroup):
self.rgroup = rgroup
def _save_opt_energies(self, energies):
self.opt_energies = energies
[docs] def toxicity(self):
"""
Assessed various ADMET properties, including
- Lipinksi rule of 5 properties,
- the presence of unwanted substructures
- problematic functional groups
- synthetic accessibility
:return: a row of a dataframe with the descriptors
:rtype: dataframe
"""
df = tox_props(self)
# add an index column to the front
df.insert(0, "ID", self.id)
df.set_index("ID", inplace=True)
# add a column with smiles
df = df.assign(Smiles=[Chem.MolToSmiles(self)])
return df
[docs] def optimise_in_receptor(self, *args, **kwargs):
"""
Enumerate the conformers inside of the receptor by employing
ANI2x, a hybrid machine learning / molecular mechanics (ML/MM) approach.
ANI2x is neural nework potential for the ligand energetics
but works only for the following atoms: H, C, N, O, F, S, Cl.
Open Force Field Parsley force field is used for intermolecular interactions with the receptor.
:param sigma_scale_factor: is used to scale the Lennard-Jones radii of the atoms.
:param relative_permittivity: is used to scale the electrostatic interactions with the protein.
:param water_model: can be used to set the force field for any water molecules present in the binding site.
"""
if self.GetNumConformers() == 0:
print("Warning: no conformers so cannot optimise_in_receptor. Ignoring.")
return
from .receptor import ForceField, optimise_in_receptor
opt_mol, energies = optimise_in_receptor(self, *args, **kwargs)
# replace the conformers with the optimised ones
self.RemoveAllConformers()
[
self.AddConformer(conformer, assignId=True)
for conformer in opt_mol.GetConformers()
]
# save the energies
self._save_opt_energies(energies)
# build a dataframe with the molecules
conformer_ids = [c.GetId() for c in self.GetConformers()]
df = pandas.DataFrame(
{
"ID": [self.id] * len(energies),
"Conformer": conformer_ids,
"Energy": energies,
}
)
return df
[docs] def rep2D(self, **kwargs):
"""
Use RDKit and get a 2D diagram.
Uses Compute2DCoords and Draw.MolToImage function
Works with IPython Notebook.
:param **kwargs: are passed further to Draw.MolToImage function.
"""
return rep2D(self, **kwargs)
[docs] def rep3D(
self, view=None, prody=None, template=False, confIds: Optional[List[int]] = None
):
"""
Use py3Dmol to obtain the 3D view of the molecule.
Works with IPython Notebook.
:param view: a view to which add the visualisation. Useful if one wants to 3D view
multiple conformers in one view.
:type view: py3Dmol view instance (None)
:param prody: A prody protein around which a view 3D can be created
:type prody: Prody instance (Default: None)
:param template: Whether to visualise the original 3D template as well from which the molecule was made.
:type template: bool (False)
:param confIds: Select the conformations for display.
:type confIds: List[int]
"""
if prody is not None:
view = view3D(prody)
if view is None:
view = py3Dmol.view(width=400, height=400, viewergrid=(1, 1))
for conf in self.GetConformers():
# ignore the confIds we've not asked for
if confIds is not None and conf.GetId() not in confIds:
continue
mb = Chem.MolToMolBlock(self, confId=conf.GetId())
view.addModel(mb, "lig")
# use reverse indexing to reference the just added conformer
# http://3dmol.csb.pitt.edu/doc/types.html#AtomSelectionSpec
# cmap = plt.get_cmap("tab20c")
# hex = to_hex(cmap.colors[i]).split('#')[-1]
view.setStyle({"model": -1}, {"stick": {}})
if template:
mb = Chem.MolToMolBlock(self.template)
view.addModel(mb, "template")
# show as sticks
view.setStyle({"model": -1}, {"stick": {"color": "0xAF10AB"}})
# zoom to the last added model
view.zoomTo({"model": -1})
return view
[docs] def remove_clashing_confs(self, prot, min_dst_allowed=1.0):
"""
Removing conformations that class with the protein.
Note that the original conformer should be well docked into the protein,
ideally with some space between the area of growth and the protein,
so that any growth on the template doesn't automatically cause
clashes.
:param prot: The protein against which the conformers should be tested.
:type prot: Prody instance
:param min_dst_allowed: If any atom is within this distance in a conformer, the
conformer will be deleted.
:type min_dst_allowed: float in Angstroms
"""
prot_coords = prot.getCoords()
counter = 0
for conf in list(self.GetConformers())[::-1]:
confid = conf.GetId()
# for each atom check how far it is from the protein atoms
eacheach_shortest = []
for coord in conf.GetPositions():
shortest = np.min(np.sqrt(np.sum((coord - prot_coords) ** 2, axis=1)))
eacheach_shortest.append(shortest)
min_dst = np.min(eacheach_shortest)
if min_dst < min_dst_allowed:
self.RemoveConformer(confid)
print(f"Clash with the protein. Removing conformer id: {confid}")
[docs] @staticmethod
def set_gnina(loc):
"""
Set the location of the binary file gnina. This could be your own compiled directory,
or a directory where you'd like it to be downloaded.
By default, gnina path is to the working directory (~500MB).
:param loc: path to gnina binary file. E.g. /dir/path/gnina. Note that right now gnina should
be a binary file with that specific filename "gnina".
:type loc: str
"""
# set gnina location
path = Path(loc)
if path.is_file():
assert path.name == "gnina", 'Please ensure gnina binary is named "gnina"'
RMol.gnina_dir = path.parent
else:
raise Exception("The path is not the binary file gnina")
# extend this with running a binary check
@staticmethod
def _check_download_gnina():
"""
Check if gnina works. Otherwise, download it.
"""
if RMol.gnina_dir is None:
# assume it is in the current directory
RMol.gnina_dir = os.getcwd()
# check if gnina works
try:
subprocess.run(
["./gnina", "--help"], capture_output=True, cwd=RMol.gnina_dir
)
return
except FileNotFoundError as E:
pass
# gnina is not found, try downloading it
print(f"Gnina not found or set. Download gnina (~500MB) into {RMol.gnina_dir}")
gnina = os.path.join(RMol.gnina_dir, "gnina")
# fixme - currently download to the working directory (Home could be more applicable).
urlretrieve(
"https://github.com/gnina/gnina/releases/download/v1.0.1/gnina",
filename=gnina,
)
# make executable (chmod +x)
mode = os.stat(gnina).st_mode
os.chmod(gnina, mode | stat.S_IEXEC)
# check if it works
subprocess.run(["./gnina", "--help"], capture_output=True, cwd=RMol.gnina_dir)
[docs] def gnina(self, receptor_file):
"""
Use gnina to extract CNNaffinity, and convert it into IC50.
LIMITATION: currenly the gnina binaries do not support Mac.
:param receptor_file: Path to the receptor file.
:type receptor_file: str
"""
self._check_download_gnina()
# obtain the absolute file to the receptor
receptor = Path(receptor_file)
assert receptor.exists()
# make a temporary sdf file for gnina
tmp = tempfile.NamedTemporaryFile(mode="w", suffix=".sdf")
with Chem.SDWriter(tmp.name) as w:
for conformer in self.GetConformers():
w.write(self, confId=conformer.GetId())
# run the code on the sdf
process = subprocess.run(
[
"./gnina",
"--score_only",
"-l",
tmp.name,
"-r",
receptor.absolute(),
"--seed",
"0",
"--stripH",
"False",
],
capture_output=True,
cwd=RMol.gnina_dir,
)
output = process.stdout.decode("utf-8")
CNNaffinities_str = re.findall(r"CNNaffinity: (-?\d+.\d+)", output)
# convert to floats
CNNaffinities = list(map(float, CNNaffinities_str))
# generate IC50 from the CNNaffinities
ic50s = list(map(ic50, CNNaffinities))
# create a dataframe
conformer_ids = [c.GetId() for c in self.GetConformers()]
df = pandas.DataFrame(
{
"ID": [self.id] * len(CNNaffinities),
"Conformer": conformer_ids,
"CNNaffinity": CNNaffinities,
"CNNaffinity->IC50s": ic50s,
}
)
return df
[docs] def to_file(self, file_name: str):
"""
Write the molecule and all conformers to file.
Note:
The file type is worked out from the name extension by splitting on `.`.
"""
file_type = file_name.split(".")[-1]
write_functions = {
"mol": Chem.MolToMolBlock,
"sdf": Chem.MolToMolBlock,
"pdb": Chem.MolToPDBBlock,
"xyz": Chem.MolToXYZBlock,
}
func = write_functions.get(file_type, None)
if func is None:
raise RuntimeError(
f"The file type {file_type} is not support please chose from {write_functions.keys()}"
)
with open(file_name, "w") as output:
for conformer in self.GetConformers():
output.write(func(self, confId=conformer.GetId()))
[docs] def df(self):
"""
Generate a pandas dataframe row for this molecule with SMILES.
:returns: pandas dataframe row.
"""
df = pandas.DataFrame(
{
"ID": [self.id],
"Smiles": [Chem.MolToSmiles(self)],
}
)
# attach energies if they're present
if self.opt_energies:
df = df.assign(
Energies=", ".join([str(e) for e in sorted(self.opt_energies)])
)
df.set_index(["ID"], inplace=True)
return df
def _repr_html_(self):
# return a dataframe with the rdkit visualisation
df = self.df()
# add a visualisation column
PandasTools.AddMoleculeColumnToFrame(
df, "Smiles", "Molecule", includeFingerprints=True
)
return df._repr_html_()
class RGroupGrid(mols2grid.MolGrid):
"""
A wrapper around the mols to grid class to load and process the r group folders locally.
"""
def __init__(self):
dataframe = self._load_molecules()
super(RGroupGrid, self).__init__(
dataframe, removeHs=True, mol_col="Mol", use_coords=False, name="m2"
)
def _load_molecules(self) -> pandas.DataFrame:
"""
Load the local r groups into rdkit molecules
"""
molecules = []
names = []
builtin_rgroups = Path(__file__).parent / "data" / "rgroups" / "library"
for molfile in glob.glob(str(builtin_rgroups / "*.mol")):
r_mol = Chem.MolFromMolFile(molfile, removeHs=False)
molecules.append(r_mol)
names.append(Path(molfile).stem)
# highlight the attachment atom
for atom in r_mol.GetAtoms():
if atom.GetAtomicNum() == 0:
setattr(r_mol, "__sssAtoms", [atom.GetIdx()])
return pandas.DataFrame({"Mol": molecules, "Name": names})
def _ipython_display_(self):
from IPython.display import display, update_display, display_html
subset = ["img", "Name", "mols2grid-id"]
display_html(self.display(subset=subset, substruct_highlight=True))
def get_selected(self):
# use the new API
df = self.get_selection()
# now get a list of the molecules
return list(df["Mol"])
class RLinkerGrid(mols2grid.MolGrid):
"""
A wrapper around the mols to grid class to load and process the linker folders locally.
"""
def __init__(self):
dataframe = self._load_molecules()
super(RLinkerGrid, self).__init__(
dataframe,
removeHs=True,
mol_col="Mol",
use_coords=False,
name="m1",
prerender=False,
)
def _load_molecules(self) -> pandas.DataFrame:
"""
Load the local linkers into rdkit molecules
"""
builtin_rlinkers = Path(__file__).parent / "data" / "linkers" / "library"
linker_files = glob.glob(str(builtin_rlinkers / "*.sdf"))
# sort the linkers so that [R1]C[R2] is next to [R2]C[R1] in the grid
linker_files = sorted(
linker_files,
key=lambda smiles: smiles.replace("[*:1]", "R").replace("[*:2]", "R"),
)
linkers = []
for molfile in linker_files:
r_mol = list(Chem.SDMolSupplier(molfile, removeHs=False)).pop()
# generate a searchable name in the form of a simple SMILE without hydrogens
name = (
Chem.MolToSmiles(Chem.RemoveHs(r_mol), canonical=False)
.replace("[*:1]", "R1")
.replace("[*:2]", "R2")
)
# simplify the names for the user
linkers.append(
{
"Mol": r_mol,
"Name": name,
"Common": r_mol.GetIntProp(
"SmileIndex"
), # extract the index property from the original publication
}
)
# presort using the original publication index
linkers = sorted(linkers, key=lambda i: i["Common"])
return pandas.DataFrame(linkers)
def _ipython_display_(self):
from IPython.display import display
subset = ["img", "Name", "mols2grid-id"]
return display(self.display(subset=subset, substruct_highlight=True))
def get_selected(self):
# use the new API
df = self.get_selection()
# now get a list of the molecules
return list(df["Mol"])
def link(Rgroups, Rlinkers, One2One=False):
"""
:param Rgroups:
:param Rlinkers:
:param One2One: If True, for each selected RGroup, a selected linker with the same index is added.
Alternatively, each linker is added to each RGroup resulting in n x m number of combinsions, with n being
the number of RGroups and m the number of linkers.
:return:
"""
# convert rgroups/linkers to smiles
rgroup_smiles = [Chem.MolToSmiles(mol) for mol in Rgroups]
linker_smiles = [Chem.MolToSmiles(mol) for mol in Rlinkers]
# loop over and combine smiles
combined = []
if One2One:
for linker, rgroup in zip(linker_smiles, rgroup_smiles):
combined.append(rgroup.replace("*", "*" + linker))
else:
for linker in linker_smiles:
for rgroup in rgroup_smiles:
combined.append(rgroup.replace("*", "*" + linker))
# generate 3D conformers for each molecule
mols = []
for smile in combined:
mol = Chem.MolFromSmiles(smile)
AllChem.EmbedMultipleConfs(mol, numConfs=1, params=AllChem.ETKDGv3())
mols.append(mol)
return mols
[docs]class RList(RInterface, list):
"""
Streamline working with RMol by presenting the same interface on the list,
and allowing to dispatch the functions to any single member.
"""
def rep2D(self, subImgSize=(400, 400), **kwargs):
return Draw.MolsToGridImage(
[mol.rep2D(rdkit_mol=True, **kwargs) for mol in self], subImgSize=subImgSize
)
def toxicity(self):
df = pandas.concat([m.toxicity() for m in self])
# add a column with the visualisation
PandasTools.AddMoleculeColumnToFrame(
df, "Smiles", "Molecule", includeFingerprints=True
)
return df
def generate_conformers(
self, num_conf: int, minimum_conf_rms: Optional[float] = [], **kwargs
):
for i, rmol in enumerate(self):
print(f"RMol index {i}")
rmol.generate_conformers(num_conf, minimum_conf_rms, **kwargs)
def GetNumConformers(self):
return [rmol.GetNumConformers() for rmol in self]
def remove_clashing_confs(self, prot, min_dst_allowed=1):
for i, rmol in enumerate(self):
print(f"RMol index {i}")
rmol.remove_clashing_confs(prot, min_dst_allowed=min_dst_allowed)
[docs] def optimise_in_receptor(self, *args, **kwargs):
"""
Replace the current molecule with the optimised one. Return lists of energies.
"""
# return pandas.concat([m.toxicity() for m in self])
dfs = []
for i, rmol in enumerate(self):
print(f"RMol index {i}")
dfs.append(rmol.optimise_in_receptor(*args, **kwargs))
df = pandas.concat(dfs)
df.set_index(["ID", "Conformer"], inplace=True)
return df
def sort_conformers(self, energy_range=5):
dfs = []
for i, rmol in enumerate(self):
print(f"RMol index {i}")
dfs.append(rmol.sort_conformers(energy_range))
df = pandas.concat(dfs)
df.set_index(["ID", "Conformer"], inplace=True)
return df
def gnina(self, receptor_file):
dfs = []
for i, rmol in enumerate(self):
print(f"RMol index {i}")
dfs.append(rmol.gnina(receptor_file))
df = pandas.concat(dfs)
df.set_index(["ID", "Conformer"], inplace=True)
return df
[docs] def discard_missing(self):
"""
Remove from this list the molecules that have no conformers
"""
removed = []
for rmol in self[::-1]:
if rmol.GetNumConformers() == 0:
rmindex = self.index(rmol)
print(
f"Discarding a molecule (id {rmindex}) due to the lack of conformers. "
)
self.remove(rmol)
removed.append(rmindex)
return removed
def _repr_html_(self):
# return the dataframe with the visualisation column of the dataframe
df = pandas.concat([rmol.df() for rmol in self])
# add a column with the visualisation
PandasTools.AddMoleculeColumnToFrame(
df, "Smiles", "Molecule", includeFingerprints=True
)
return df._repr_html_()
[docs]def build_molecules(
templates: Union[Chem.Mol, List[Chem.Mol]],
r_groups: Union[Chem.Mol, List[Chem.Mol], int],
attachment_points: Optional[List[int]] = None,
):
"""
For the given core molecule and list of attachment points
and r groups enumerate the possible molecules and
return a list of them.
:param template: The core scaffold molecule to attach the r groups to, or a list of them.
:param r_group: The list of rdkit molecules which should be considered
r groups or the RGroup Grid with highlighted molecules.
:param attachment_points: The list of atom index in the core ligand
that the r groups should be attached to. If it is empty, connecting points are sought out and matched.
"""
# fixme - special case after changing the API: remove in the future
# This is a temporary warning about the change in the interface.
# This change is because there are situations where the attachment_points do not need to be passed to the function.
if (
isinstance(r_groups, list)
and len(r_groups) > 0
and isinstance(r_groups[0], int)
):
print(
'Warning: second argument is detected to be an integer. It is now "r_groups" '
"whereas attachement_points are provided as the 3rd argument. "
)
raise Exception(
"Please note that after adding the linker to FEgrow (version 1.1), "
'the "build_molecules" function interface has changed to'
' "build_molecules(core_ligand, r_groups, attachment_points)". '
)
# ensure template and r_group are lists
if not issubclass(templates.__class__, List):
templates = [templates]
if not issubclass(r_groups.__class__, List):
r_groups = [r_groups]
# make a deep copy of r_groups/linkers to ensure we don't modify the library
templates = [copy.deepcopy(mol) for mol in templates]
r_groups = [copy.deepcopy(mol) for mol in r_groups]
# get attachment points for each template
if not attachment_points:
# attempt to generate the attachment points by picking the joining molecule
# case: a list of templates previously joined with linkers requires iterating over them
attachment_points = [__getAttachmentVector(lig)[0].GetIdx() for lig in templates]
if not attachment_points:
raise Exception("Could not find attachement points. ")
if len(attachment_points) != len(templates):
raise Exception(
f"There must be one attachment point for each template. "
f"Provided attachement points = {len(attachment_points)} "
f"with templates number: {len(templates)}"
)
combined_mols = RList()
id_counter = 0
for atom_idx, core_ligand in zip(attachment_points, templates):
for r_mol in r_groups:
core_mol = RMol(copy.deepcopy(core_ligand))
merged_mol = merge_R_group(
mol=core_mol, R_group=r_mol, replaceIndex=atom_idx
)
# assign the identifying index to the molecule
merged_mol.id = id_counter
combined_mols.append(merged_mol)
id_counter += 1
return combined_mols