"""GRiTS: Coarse-graining tools."""
import json
import os
import tempfile
from collections import defaultdict
from warnings import catch_warnings, simplefilter, warn
import ele
import freud
import gsd.hoomd
import numpy as np
from cmeutils.gsd_utils import (
get_molecule_cluster,
identify_snapshot_connections,
)
from ele import element_from_symbol
from mbuild import Compound, clone
from mbuild.utils.io import run_from_ipython
from openbabel import pybel
from grits.utils import (
NumpyEncoder,
comp_from_snapshot,
get_bonds,
get_major_axis,
get_quaternion,
has_common_member,
has_number,
)
__all__ = ["CG_Compound", "CG_System", "Bead"]
[docs]
class CG_Compound(Compound):
"""Coarse-grained Compound.
Wrapper for :py:class:`mbuild.Compound`. Coarse-grained mapping can be
specified using SMARTS grammar.
Parameters
----------
compound : mbuild.Compound
Fine-grain structure to be coarse-grained
beads : dict, default None
Dictionary with keys containing desired bead name and values containing
SMARTS string specification of that bead. For example::
beads = {"_B": "c1sccc1", "_S": "CCC"}
would map a ``"_B"`` bead to any thiophene moiety (``"c1sccc1"``) found
in the compound and an ``"_S"`` bead to a propyl moiety (``"CCC"``).
User must provide only one of beads or mapping.
mapping : dict or path, default None
Either a dictionary or path to a json file of a dictionary. Dictionary
keys contain desired bead name and SMARTS string specification of that
bead and values containing list of tuples of atom indices::
mapping = {"_B...c1sccc1": [(0, 4, 3, 2, 1), ...]}
User must provide only one of beads or mapping.
allow_overlap : bool, default False
Whether to allow beads representing ring structures to share atoms.
add_hydrogens : bool, default False
Whether to add hydrogens. Useful for united-atom models.
aniso_beads : bool, default False
Whether to calculate orientations for anisotropic beads.
Note: Bead sizes should be fitted during paramaterization.
Only Gay-Berne major axis orientations are calculated here.
Attributes
----------
atomistic : mbuild.Compound
The atomistic structure.
mapping : dict
A mapping from atomistic to coarse-grain structure. Dictionary keys are
the bead name and SMARTS string (separated by "..."), and the values are
a list of tuples of fine-grain particle indices for each bead instance::
{"_B...c1sccc1": [(0, 4, 3, 2, 1), ...], ...}
anchors : dict
A mapping of the anchor particle indices in each bead. Dictionary keys
are the bead name and the values are a set of indices::
{"_B": {0, 2, 3}, ...}
bond_map: list[tuple(str, tuple(int, int))]
A list of the bond types and the anchors to use for that bond::
[('_B-_S', (3, 0)), ...]
"""
def __init__(
self,
compound,
beads=None,
mapping=None,
allow_overlap=False,
add_hydrogens=False,
aniso_beads=False,
**kwargs,
):
super(CG_Compound, self).__init__(**kwargs)
if (beads is None) == (mapping is None):
raise ValueError(
"Please provide only one of either beads or mapping."
)
self.atomistic = compound
self.anchors = None
self.bond_map = None
self.aniso_beads = aniso_beads
if beads is not None:
mol = compound.to_pybel()
mol.OBMol.PerceiveBondOrders()
if add_hydrogens:
n_atoms = mol.OBMol.NumAtoms()
# This is a goofy work around necessary for the aromaticity
# to be set correctly.
with tempfile.NamedTemporaryFile() as f:
mol.write(format="mol2", filename=f.name, overwrite=True)
mol = list(pybel.readfile("mol2", f.name))[0]
mol.OBMol.AddHydrogens() # mol.addh()
n_atoms2 = mol.OBMol.NumAtoms()
print(f"Added {n_atoms2 - n_atoms} hydrogens.")
self._set_mapping(beads, mol, allow_overlap)
elif mapping is not None:
if not isinstance(mapping, dict):
with open(mapping, "r") as f:
mapping = json.load(f)
self.mapping = mapping
self._cg_particles()
self._cg_bonds()
def __repr__(self):
"""Format the CG_Compound representation."""
return (
f"<{self.name}: {self.n_particles} beads "
+ "pos=({:.4f},{: .4f},{: .4f}), ".format(*self.pos)
+ f"{self.n_bonds:d} bonds>"
)
def _set_mapping(self, beads, mol, allow_overlap):
"""Set the mapping attribute."""
particle_ids = np.array([id(p) for p in self.atomistic.particles()])
matches = []
for bead_name, smart_str in beads.items():
smarts = pybel.Smarts(smart_str)
if not smarts.findall(mol):
warn(f"{smart_str} not found in compound!")
for group in smarts.findall(mol):
# correct for SMARTS one-based indexing
group = tuple(i - 1 for i in group)
_group = list(group)
for p_idx in group:
for particle in self.atomistic[p_idx].direct_bonds():
if particle.element == ele.element_from_symbol("H"):
h_idx = int(
np.where(particle_ids == id(particle))[0][0]
)
_group.append(h_idx)
group = tuple(_group)
matches.append((group, smart_str, bead_name))
seen = set()
mapping = defaultdict(list)
for group, smarts, name in matches:
if allow_overlap:
# smart strings for rings can share atoms
# add bead regardless of whether it was seen
if has_number(smarts):
seen.update(group)
mapping[f"{name}...{smarts}"].append(group)
# alkyl chains should be exclusive
else:
if has_common_member(seen, group):
pass
else:
seen.update(group)
mapping[f"{name}...{smarts}"].append(group)
else:
if has_common_member(seen, group):
pass
else:
seen.update(group)
mapping[f"{name}...{smarts}"].append(group)
n_atoms = mol.OBMol.NumAtoms()
if n_atoms != len(seen):
# TODO this warning seems to trigger on re-adding H into ring-containing UA systems
warn("Some atoms have been left out of coarse-graining!")
# TODO make this more informative
self.mapping = mapping
def _cg_particles(self):
"""Set the beads in the coarse-structure."""
orientations = []
for key, inds in self.mapping.items():
name, smarts = key.split("...")
for group in inds:
masses = np.array([self.atomistic[i].mass for i in group])
tot_mass = sum(masses)
bead_xyz = self.atomistic.xyz[group, :]
avg_xyz = np.mean(bead_xyz, axis=0)
orientation = None
if self.aniso_beads:
# filter out hydrogens
hmass = element_from_symbol("H").mass
heavy_positions = bead_xyz[np.where(masses > hmass)]
if len(heavy_positions) > 2:
major_axis, _ = get_major_axis(heavy_positions)
elif len(heavy_positions) == 2:
major_axis = heavy_positions[1] - heavy_positions[0]
else:
major_axis = None
orientation = get_quaternion(major_axis)
orientations.append(orientation)
bead = Bead(
name=name,
pos=avg_xyz,
smarts=smarts,
mass=tot_mass,
orientation=orientation,
)
self.add(bead)
def _cg_bonds(self):
"""Set the bonds in the coarse structure."""
bonds = get_bonds(self.atomistic)
bead_inds = [
(key.split("...")[0], group)
for key, inds in self.mapping.items()
for group in inds
]
anchors = defaultdict(set)
bond_map = []
for i, (iname, igroup) in enumerate(bead_inds[:-1]):
for j, (jname, jgroup) in enumerate(bead_inds[(i + 1) :]):
for a, b in bonds:
if a in igroup and b in jgroup:
anchors[iname].add(igroup.index(a))
anchors[jname].add(jgroup.index(b))
bondinfo = (
f"{iname}-{jname}",
(igroup.index(a), jgroup.index(b)),
)
elif b in igroup and a in jgroup:
anchors[iname].add(igroup.index(b))
anchors[jname].add(jgroup.index(a))
bondinfo = (
f"{iname}-{jname}",
(igroup.index(b), jgroup.index(a)),
)
else:
continue
if bondinfo not in bond_map:
# If the bond is between two beads of the same type,
# add it to the end
if iname == jname:
bond_map.append(bondinfo)
# Otherwise add it to the beginning
else:
bond_map.insert(0, bondinfo)
self.add_bond([self[i], self[j + i + 1]])
if anchors and bond_map:
self.anchors = anchors
self.bond_map = bond_map
[docs]
def save_mapping(self, filename=None):
"""Save the mapping operator to a json file.
Parameters
----------
filename : str, default None
Filename where the mapping operator will be saved in json format.
If None is provided, the filename will be CG_Compound.name +
"_mapping.json".
Returns
-------
str
Path to saved mapping
"""
if filename is None:
filename = f"{self.name}_mapping.json"
with open(filename, "w") as f:
json.dump(self.mapping, f)
print(f"Mapping saved to {filename}")
return filename
[docs]
def visualize(
self, show_ports=False, color_scheme={}, show_atomistic=False, scale=1.0
): # pragma: no cover
"""Visualize the Compound using py3dmol.
Allows for visualization of a Compound within a Jupyter Notebook.
Modified to from :py:meth:`mbuild.Compound.visualize` to show atomistic
elements (translucent) with larger CG beads.
Parameters
----------
show_ports : bool, default False
Visualize Ports in addition to Particles
color_scheme : dict, default {}
Specify coloring for non-elemental particles keys are strings of
the particle names values are strings of the colors::
{'_CGBEAD': 'blue'}
show_atomistic : bool, default False
Show the atomistic structure stored in
:py:attr:`CG_Compound.atomistic`
scale : float, default 1.0
Scaling factor to modify the size of objects in the view.
Returns
-------
view : py3Dmol.view
"""
if not run_from_ipython():
raise RuntimeError(
"Visualization is only supported in Jupyter Notebooks."
)
import py3Dmol
atom_names = []
if self.atomistic is not None and show_atomistic:
atomistic = clone(self.atomistic)
for particle in atomistic:
if not particle.name:
particle.name = "UNK"
else:
if particle.name not in ("Compound", "CG_Compound"):
atom_names.append(particle.name)
coarse = clone(self)
modified_color_scheme = {}
for name, color in color_scheme.items():
# Py3dmol does some element string conversions,
# first character is as-is, rest of the characters are lowercase
new_name = name[0] + name[1:].lower()
modified_color_scheme[new_name] = color
modified_color_scheme[name] = color
cg_names = []
for particle in coarse:
if not particle.name:
particle.name = "UNK"
else:
if particle.name not in ("Compound", "CG_Compound"):
cg_names.append(particle.name)
tmp_dir = tempfile.mkdtemp()
view = py3Dmol.view()
if atom_names:
atomistic.save(
os.path.join(tmp_dir, "atomistic_tmp.mol2"),
include_ports=show_ports,
overwrite=True,
)
# atomistic
with open(os.path.join(tmp_dir, "atomistic_tmp.mol2"), "r") as f:
view.addModel(f.read(), "mol2", keepH=True)
if cg_names:
opacity = 0.6
else:
opacity = 1.0
view.setStyle(
{
"stick": {
"radius": 0.2 * scale,
"opacity": opacity,
"color": "grey",
},
"sphere": {
"scale": 0.3 * scale,
"opacity": opacity,
"colorscheme": modified_color_scheme,
},
}
)
# coarse
if cg_names:
# silence warning about No element found for CG bead
with catch_warnings():
simplefilter("ignore")
coarse.save(
os.path.join(tmp_dir, "coarse_tmp.mol2"),
include_ports=show_ports,
overwrite=True,
)
with open(os.path.join(tmp_dir, "coarse_tmp.mol2"), "r") as f:
view.addModel(f.read(), "mol2", keepH=True)
if self.atomistic is None:
scale = 0.3 * scale
else:
scale = 0.7 * scale
view.setStyle(
{"atom": cg_names},
{
"stick": {
"radius": 0.2 * scale,
"opacity": 1,
"color": "grey",
},
"sphere": {
"scale": scale,
"opacity": 1,
"colorscheme": modified_color_scheme,
},
},
)
view.zoomTo()
return view
[docs]
class Bead(Compound):
"""Coarse-grained Bead.
Wrapper for :py:class:`mbuild.Compound`.
Parameters
----------
smarts : str, default None
SMARTS string used to specify this Bead.
orientation : numpy array, default None
Quaternion describing an anisotropic Gay-Berne
bead's orientation.
Attributes
----------
smarts : str
SMARTS string used to specify this Bead.
orientation : numpy array
Quaternion describing an anisotropic Gay-Berne
bead's orientation.
"""
def __init__(self, smarts=None, orientation=None, **kwargs):
self.smarts = smarts
self.orientation = orientation
super(Bead, self).__init__(element=None, **kwargs)
[docs]
class CG_System:
"""Coarse-grained System.
Coarse-grained mapping can be specified using SMARTS grammar or the indices
of particles.
Parameters
----------
gsdfile : path
Path to a gsd file.
beads : dict, default None
Dictionary with keys containing desired bead name and values containing
SMARTS string specification of that bead. For example::
beads = {"_B": "c1sccc1", "_S": "CCC"}
would map a ``"_B"`` bead to any thiophene moiety (``"c1sccc1"``) found
in the compound and an ``"_S"`` bead to a propyl moiety (``"CCC"``).
User must provide only one of beads or mapping.
mapping : dict or path, default None
Either a dictionary or path to a json file of a dictionary. Dictionary
keys contain desired bead name and SMARTS string specification of that
bead and values containing list of lists of atom indices::
mapping = {"_B...c1sccc1": [[0, 4, 3, 2, 1], ...]}
User must provide only one of beads or mapping.
If a mapping is provided, the bonding between the beads will not be set.
allow_overlap : bool, default False,
Whether to allow beads representing ring structures to share atoms.
conversion_dict : dictionary, default None
Dictionary to map particle types to their element.
length_scale : float, default 1.0
Factor by which to scale length values.
mass__scale : float, default 1.0
Factor by which to scale mass values.
add_hydrogens : bool, default False
Whether to add hydrogens. Useful for united-atom models.
aniso_beads : bool, default False
Whether to calculate orientations for anisotropic beads.
Note: Bead sizes should be fitted during paramaterization.
These are not calculated here.
Attributes
----------
mapping : dict
A mapping from atomistic to coarse-grain structure. Dictionary keys are
the bead name and SMARTS string (separated by "..."), and the values are
a list of numpy arrays of fine-grain particle indices for each bead
instance::
{"_B...c1sccc1": [np.array([0, 4, 3, 2, 1]), ...], ...}
"""
def __init__(
self,
gsdfile,
beads=None,
mapping=None,
allow_overlap=False,
conversion_dict=None,
length_scale=1.0,
mass_scale=1.0,
add_hydrogens=False,
aniso_beads=False,
):
if (beads is None) == (mapping is None):
raise ValueError(
"Please provide only one of either beads or mapping."
)
self.gsdfile = gsdfile
self._compounds = []
self._inds = []
self._bond_array = None
self.mass_scale = mass_scale
self.aniso_beads = aniso_beads
self.mass_scale = mass_scale
if beads is not None:
# get compounds
self._get_compounds(
beads=beads,
allow_overlap=allow_overlap,
length_scale=length_scale,
conversion_dict=conversion_dict,
add_hydrogens=add_hydrogens,
aniso_beads=aniso_beads,
)
# calculate the bead mappings for the entire trajectory
self._set_mapping()
elif mapping is not None:
if not isinstance(mapping, dict):
with open(mapping, "r") as f:
mapping = json.load(f)
self.mapping = mapping
def _get_compounds(
self,
beads,
allow_overlap,
length_scale,
conversion_dict,
add_hydrogens,
aniso_beads,
):
"""Get compounds for each molecule type in the gsd trajectory."""
# Use the first frame to find the coarse-grain mapping
with gsd.hoomd.open(self.gsdfile) as t:
snap = t[0]
# Use the conversion dictionary to map particle type to element symbol
if conversion_dict is not None:
snap.particles.types = [
conversion_dict[i].symbol for i in snap.particles.types
]
# Break apart the snapshot into separate molecules
molecules = get_molecule_cluster(snap=snap)
mol_inds = []
for i in range(max(molecules) + 1):
mol_inds.append(np.where(molecules == i)[0])
# If molecule length is different, it will be assumed to be different
mol_lengths = [len(i) for i in mol_inds]
uniq_mol_inds = []
for length in set(mol_lengths):
uniq_mol_inds.append(mol_inds[mol_lengths.index(length)])
# Convert each unique molecule to a compound
for inds in uniq_mol_inds:
inds_length = len(inds)
mb_comp = comp_from_snapshot(
snapshot=snap,
indices=inds,
length_scale=length_scale,
mass_scale=self.mass_scale,
)
self._compounds.append(
CG_Compound(
compound=mb_comp,
allow_overlap=allow_overlap,
beads=beads,
add_hydrogens=add_hydrogens,
aniso_beads=aniso_beads,
)
)
self._inds.append(
[
mol_inds[i]
for i in np.where(np.array(mol_lengths) == inds_length)[0]
]
)
def _set_mapping(self):
"""Scale the mapping from each compound to the entire trajectory."""
def shift_value(i):
n_before, n_bead = order[types[i]]
return (
n_comps * n_before
+ (i - n_before)
+ comp_idx * n_bead
+ bead_count
)
v_shift = np.vectorize(shift_value)
self.mapping = {}
all_bonds = []
bead_count = 0
for comp, inds in zip(self._compounds, self._inds):
# Map particles
for k, v in comp.mapping.items():
self.mapping[k] = [i[list(g)] for i in inds for g in v]
# Map bonds
p = {p: i for i, p in enumerate(comp.particles())}
bond_array = np.array(
[
(p[i], p[j]) if p[i] < p[j] else (p[j], p[i])
for (i, j) in comp.bonds()
]
)
types = [p.name for p in comp.particles()]
n_comps = len(inds)
# Check that bond array exists
if bond_array.size > 0:
order = {
i: (types.index(i), types.count(i)) for i in set(types)
}
comp_bonds = []
for comp_idx in range(n_comps):
comp_bonds.append(v_shift(bond_array))
all_bonds += comp_bonds
bead_count += n_comps * len(types)
if all_bonds:
all_bond_array = np.vstack(all_bonds)
self._bond_array = all_bond_array[all_bond_array[:, 0].argsort()]
[docs]
def save_mapping(self, filename):
"""Save the mapping operator to a json file.
Parameters
----------
filename : str
Filename where the mapping operator will be saved in json format.
"""
with open(filename, "w") as f:
json.dump(self.mapping, f, cls=NumpyEncoder)
print(f"Mapping saved to {filename}")
[docs]
def save(self, cg_gsdfile, start=0, stop=None, stride=1):
"""Save the coarse-grain system to a gsd file.
Does not calculate the image of the coarse-grain bead.
Retains:
- configuration: box, step
- particles: N, position, typeid, types
- bonds: N, group, typeid, types
Parameters
----------
cg_gsdfile : str
Filename for new gsd file. If file already exists, it will be
overwritten.
start : int, default 0
Where to start reading the gsd trajectory the system was created
with.
stop : int, default None
Where to stop reading the gsd trajectory the system was created
with. If None, will stop at the last frame.
stride : int, default 1
The step size to use when iterating through start:stop
"""
typeid = []
types = [i.split("...")[0] for i in self.mapping]
for i, inds in enumerate(self.mapping.values()):
typeid.append(np.ones(len(inds)) * i)
typeid = np.hstack(typeid)
# Set up bond information if it exists
bond_types = []
bond_ids = []
# Todo: Fill this if we need to
bond_type_shapes = None
N_bonds = 0
if self._bond_array is not None:
N_bonds = self._bond_array.shape[0]
for bond in self._bond_array:
bond_pair = "-".join(
[
types[int(typeid[int(bond[0])])],
types[int(typeid[int(bond[1])])],
]
)
if bond_pair not in bond_types:
bond_types.append(bond_pair)
_id = bond_types.index(bond_pair)
bond_ids.append(_id)
else:
bond_types = None
with (
gsd.hoomd.open(cg_gsdfile, "w") as new,
gsd.hoomd.open(self.gsdfile, "r") as old,
):
# stop being None is fine; slicing [0:None] gives whole array,
# even in edge case where there's only one or two frames
for s in old[start:stop:stride]:
new_snap = gsd.hoomd.Frame()
position = []
mass = []
orientation = [] if self.aniso_beads else None
f_box = freud.Box.from_box(s.configuration.box)
unwrap_pos = f_box.unwrap(
s.particles.position, s.particles.image
)
for i, inds in enumerate(self.mapping.values()):
position += [np.mean(unwrap_pos[x], axis=0) for x in inds]
mass += [
sum(s.particles.mass[x]) * self.mass_scale for x in inds
]
if self.aniso_beads:
for x in inds:
masses = s.particles.mass[x] * self.mass_scale
hmass = element_from_symbol("H").mass
positions = s.particles.position[x]
heavy_positions = positions[
np.where(masses > hmass)
]
major_axis, ab_idxs = get_major_axis(
heavy_positions
)
orientation.append(get_quaternion(major_axis))
if self.aniso_beads:
orientation = np.vstack(orientation)
new_snap.particles.orientation = orientation
position = np.vstack(position)
images = f_box.get_images(position)
position = f_box.wrap(position)
new_snap.configuration.box = s.configuration.box
new_snap.configuration.step = s.configuration.step
new_snap.particles.N = len(typeid)
new_snap.particles.position = position
new_snap.particles.image = images
new_snap.particles.typeid = typeid.astype(int)
new_snap.particles.types = types
new_snap.particles.mass = mass
if N_bonds > 0:
new_snap.bonds.N = N_bonds
new_snap.bonds.group = self._bond_array
new_snap.bonds.typeid = bond_ids
if bond_types is not None:
new_snap.bonds.types = bond_types
else:
new_snap.bonds.types = None
new_snap.bonds.type_shapes = bond_type_shapes
if self.aniso_beads:
new_snap.particles.orientation = orientation
new_snap = identify_snapshot_connections(new_snap)
new.append(new_snap)