Source code for grits.coarsegrain

"""GRiTS: Coarse-graining tools."""

import json
import os
import tempfile
from collections import defaultdict
from warnings import catch_warnings, simplefilter, warn

import ele
import freud
import gsd.hoomd
import numpy as np
from cmeutils.gsd_utils import get_molecule_cluster, identify_snapshot_connections
from ele import element_from_symbol
from mbuild import Compound, clone
from mbuild.utils.io import run_from_ipython
from openbabel import pybel

from grits.utils import (
    NumpyEncoder,
    comp_from_snapshot,
    get_bonds,
    get_major_axis,
    get_quaternion,
    has_common_member,
    has_number,
)

__all__ = ["CG_Compound", "CG_System", "Bead"]


[docs] class CG_Compound(Compound): """Coarse-grained Compound. Wrapper for :py:class:`mbuild.Compound`. Coarse-grained mapping can be specified using SMARTS grammar. Parameters ---------- compound : mbuild.Compound Fine-grain structure to be coarse-grained beads : dict, default None Dictionary with keys containing desired bead name and values containing SMARTS string specification of that bead. For example:: beads = {"_B": "c1sccc1", "_S": "CCC"} would map a ``"_B"`` bead to any thiophene moiety (``"c1sccc1"``) found in the compound and an ``"_S"`` bead to a propyl moiety (``"CCC"``). User must provide only one of beads or mapping. mapping : dict or path, default None Either a dictionary or path to a json file of a dictionary. Dictionary keys contain desired bead name and SMARTS string specification of that bead and values containing list of tuples of atom indices:: mapping = {"_B...c1sccc1": [(0, 4, 3, 2, 1), ...]} User must provide only one of beads or mapping. allow_overlap : bool, default False Whether to allow beads representing ring structures to share atoms. add_hydrogens : bool, default False Whether to add hydrogens. Useful for united-atom models. aniso_beads : bool, default False Whether to calculate orientations for anisotropic beads. Note: Bead sizes should be fitted during paramaterization. Only Gay-Berne major axis orientations are calculated here. Attributes ---------- atomistic : mbuild.Compound The atomistic structure. mapping : dict A mapping from atomistic to coarse-grain structure. Dictionary keys are the bead name and SMARTS string (separated by "..."), and the values are a list of tuples of fine-grain particle indices for each bead instance:: {"_B...c1sccc1": [(0, 4, 3, 2, 1), ...], ...} anchors : dict A mapping of the anchor particle indices in each bead. Dictionary keys are the bead name and the values are a set of indices:: {"_B": {0, 2, 3}, ...} bond_map: list[tuple(str, tuple(int, int))] A list of the bond types and the anchors to use for that bond:: [('_B-_S', (3, 0)), ...] """ def __init__( self, compound, beads=None, mapping=None, allow_overlap=False, add_hydrogens=False, aniso_beads=False, **kwargs, ): super(CG_Compound, self).__init__(**kwargs) if (beads is None) == (mapping is None): raise ValueError( "Please provide only one of either beads or mapping." ) self.atomistic = compound self.anchors = None self.bond_map = None self.aniso_beads = aniso_beads if beads is not None: mol = compound.to_pybel() mol.OBMol.PerceiveBondOrders() if add_hydrogens: n_atoms = mol.OBMol.NumAtoms() # This is a goofy work around necessary for the aromaticity # to be set correctly. with tempfile.NamedTemporaryFile() as f: mol.write(format="mol2", filename=f.name, overwrite=True) mol = list(pybel.readfile("mol2", f.name))[0] mol.OBMol.AddHydrogens() # mol.addh() n_atoms2 = mol.OBMol.NumAtoms() print(f"Added {n_atoms2-n_atoms} hydrogens.") self._set_mapping(beads, mol, allow_overlap) elif mapping is not None: if not isinstance(mapping, dict): with open(mapping, "r") as f: mapping = json.load(f) self.mapping = mapping self._cg_particles() self._cg_bonds() def __repr__(self): """Format the CG_Compound representation.""" return ( f"<{self.name}: {self.n_particles} beads " + f"(from {self.atomistic.n_particles} atoms), " + "pos=({:.4f},{: .4f},{: .4f}), ".format(*self.pos) + f"{self.n_bonds:d} bonds>" ) def _set_mapping(self, beads, mol, allow_overlap): """Set the mapping attribute.""" particle_ids = np.array([id(p) for p in self.atomistic.particles()]) matches = [] for bead_name, smart_str in beads.items(): smarts = pybel.Smarts(smart_str) if not smarts.findall(mol): warn(f"{smart_str} not found in compound!") for group in smarts.findall(mol): # correct for SMARTS one-based indexing group = tuple(i - 1 for i in group) _group = list(group) for p_idx in group: for particle in self.atomistic[p_idx].direct_bonds(): if particle.element == ele.element_from_symbol("H"): h_idx = int( np.where(particle_ids == id(particle))[0][0] ) _group.append(h_idx) group = tuple(_group) matches.append((group, smart_str, bead_name)) seen = set() mapping = defaultdict(list) for group, smarts, name in matches: if allow_overlap: # smart strings for rings can share atoms # add bead regardless of whether it was seen if has_number(smarts): seen.update(group) mapping[f"{name}...{smarts}"].append(group) # alkyl chains should be exclusive else: if has_common_member(seen, group): pass else: seen.update(group) mapping[f"{name}...{smarts}"].append(group) else: if has_common_member(seen, group): pass else: seen.update(group) mapping[f"{name}...{smarts}"].append(group) n_atoms = mol.OBMol.NumAtoms() if n_atoms != len(seen): # TODO this warning seems to trigger on re-adding H into ring-containing UA systems warn(f"Some atoms have been left out of coarse-graining!") # TODO make this more informative self.mapping = mapping def _cg_particles(self): """Set the beads in the coarse-structure.""" orientations = [] for key, inds in self.mapping.items(): name, smarts = key.split("...") for group in inds: masses = np.array([self.atomistic[i].mass for i in group]) tot_mass = sum(masses) bead_xyz = self.atomistic.xyz[group, :] avg_xyz = np.mean(bead_xyz, axis=0) orientation = None if self.aniso_beads: # filter out hydrogens hmass = element_from_symbol("H").mass heavy_positions = bead_xyz[np.where(masses > hmass)] if len(heavy_positions) > 2: major_axis, _ = get_major_axis(heavy_positions) elif len(heavy_positions) == 2: major_axis = heavy_positions[1] - heavy_positions[0] else: major_axis = None orientation = get_quaternion(major_axis) orientations.append(orientation) bead = Bead( name=name, pos=avg_xyz, smarts=smarts, mass=tot_mass, orientation=orientation, ) self.add(bead) def _cg_bonds(self): """Set the bonds in the coarse structure.""" bonds = get_bonds(self.atomistic) bead_inds = [ (key.split("...")[0], group) for key, inds in self.mapping.items() for group in inds ] anchors = defaultdict(set) bond_map = [] for i, (iname, igroup) in enumerate(bead_inds[:-1]): for j, (jname, jgroup) in enumerate(bead_inds[(i + 1) :]): for a, b in bonds: if a in igroup and b in jgroup: anchors[iname].add(igroup.index(a)) anchors[jname].add(jgroup.index(b)) bondinfo = ( f"{iname}-{jname}", (igroup.index(a), jgroup.index(b)), ) elif b in igroup and a in jgroup: anchors[iname].add(igroup.index(b)) anchors[jname].add(jgroup.index(a)) bondinfo = ( f"{iname}-{jname}", (igroup.index(b), jgroup.index(a)), ) else: continue if bondinfo not in bond_map: # If the bond is between two beads of the same type, # add it to the end if iname == jname: bond_map.append(bondinfo) # Otherwise add it to the beginning else: bond_map.insert(0, bondinfo) self.add_bond([self[i], self[j + i + 1]]) if anchors and bond_map: self.anchors = anchors self.bond_map = bond_map
[docs] def save_mapping(self, filename=None): """Save the mapping operator to a json file. Parameters ---------- filename : str, default None Filename where the mapping operator will be saved in json format. If None is provided, the filename will be CG_Compound.name + "_mapping.json". Returns ------- str Path to saved mapping """ if filename is None: filename = f"{self.name}_mapping.json" with open(filename, "w") as f: json.dump(self.mapping, f) print(f"Mapping saved to {filename}") return filename
[docs] def visualize( self, show_ports=False, color_scheme={}, show_atomistic=False, scale=1.0 ): # pragma: no cover """Visualize the Compound using py3dmol. Allows for visualization of a Compound within a Jupyter Notebook. Modified to from :py:meth:`mbuild.Compound.visualize` to show atomistic elements (translucent) with larger CG beads. Parameters ---------- show_ports : bool, default False Visualize Ports in addition to Particles color_scheme : dict, default {} Specify coloring for non-elemental particles keys are strings of the particle names values are strings of the colors:: {'_CGBEAD': 'blue'} show_atomistic : bool, default False Show the atomistic structure stored in :py:attr:`CG_Compound.atomistic` scale : float, default 1.0 Scaling factor to modify the size of objects in the view. Returns ------- view : py3Dmol.view """ if not run_from_ipython(): raise RuntimeError( "Visualization is only supported in Jupyter Notebooks." ) import py3Dmol atom_names = [] if self.atomistic is not None and show_atomistic: atomistic = clone(self.atomistic) for particle in atomistic: if not particle.name: particle.name = "UNK" else: if particle.name not in ("Compound", "CG_Compound"): atom_names.append(particle.name) coarse = clone(self) modified_color_scheme = {} for name, color in color_scheme.items(): # Py3dmol does some element string conversions, # first character is as-is, rest of the characters are lowercase new_name = name[0] + name[1:].lower() modified_color_scheme[new_name] = color modified_color_scheme[name] = color cg_names = [] for particle in coarse: if not particle.name: particle.name = "UNK" else: if particle.name not in ("Compound", "CG_Compound"): cg_names.append(particle.name) tmp_dir = tempfile.mkdtemp() view = py3Dmol.view() if atom_names: atomistic.save( os.path.join(tmp_dir, "atomistic_tmp.mol2"), include_ports=show_ports, overwrite=True, ) # atomistic with open(os.path.join(tmp_dir, "atomistic_tmp.mol2"), "r") as f: view.addModel(f.read(), "mol2", keepH=True) if cg_names: opacity = 0.6 else: opacity = 1.0 view.setStyle( { "stick": { "radius": 0.2 * scale, "opacity": opacity, "color": "grey", }, "sphere": { "scale": 0.3 * scale, "opacity": opacity, "colorscheme": modified_color_scheme, }, } ) # coarse if cg_names: # silence warning about No element found for CG bead with catch_warnings(): simplefilter("ignore") coarse.save( os.path.join(tmp_dir, "coarse_tmp.mol2"), include_ports=show_ports, overwrite=True, ) with open(os.path.join(tmp_dir, "coarse_tmp.mol2"), "r") as f: view.addModel(f.read(), "mol2", keepH=True) if self.atomistic is None: scale = 0.3 * scale else: scale = 0.7 * scale view.setStyle( {"atom": cg_names}, { "stick": { "radius": 0.2 * scale, "opacity": 1, "color": "grey", }, "sphere": { "scale": scale, "opacity": 1, "colorscheme": modified_color_scheme, }, }, ) view.zoomTo() return view
[docs] class Bead(Compound): """Coarse-grained Bead. Wrapper for :py:class:`mbuild.Compound`. Parameters ---------- smarts : str, default None SMARTS string used to specify this Bead. orientation : numpy array, default None Quaternion describing an anisotropic Gay-Berne bead's orientation. Attributes ---------- smarts : str SMARTS string used to specify this Bead. orientation : numpy array Quaternion describing an anisotropic Gay-Berne bead's orientation. """ def __init__(self, smarts=None, orientation=None, **kwargs): self.smarts = smarts self.orientation = orientation super(Bead, self).__init__(element=None, **kwargs)
[docs] class CG_System: """Coarse-grained System. Coarse-grained mapping can be specified using SMARTS grammar or the indices of particles. Parameters ---------- gsdfile : path Path to a gsd file. beads : dict, default None Dictionary with keys containing desired bead name and values containing SMARTS string specification of that bead. For example:: beads = {"_B": "c1sccc1", "_S": "CCC"} would map a ``"_B"`` bead to any thiophene moiety (``"c1sccc1"``) found in the compound and an ``"_S"`` bead to a propyl moiety (``"CCC"``). User must provide only one of beads or mapping. mapping : dict or path, default None Either a dictionary or path to a json file of a dictionary. Dictionary keys contain desired bead name and SMARTS string specification of that bead and values containing list of lists of atom indices:: mapping = {"_B...c1sccc1": [[0, 4, 3, 2, 1], ...]} User must provide only one of beads or mapping. If a mapping is provided, the bonding between the beads will not be set. allow_overlap : bool, default False, Whether to allow beads representing ring structures to share atoms. conversion_dict : dictionary, default None Dictionary to map particle types to their element. length_scale : float, default 1.0 Factor by which to scale length values. mass__scale : float, default 1.0 Factor by which to scale mass values. add_hydrogens : bool, default False Whether to add hydrogens. Useful for united-atom models. aniso_beads : bool, default False Whether to calculate orientations for anisotropic beads. Note: Bead sizes should be fitted during paramaterization. These are not calculated here. Attributes ---------- mapping : dict A mapping from atomistic to coarse-grain structure. Dictionary keys are the bead name and SMARTS string (separated by "..."), and the values are a list of numpy arrays of fine-grain particle indices for each bead instance:: {"_B...c1sccc1": [np.array([0, 4, 3, 2, 1]), ...], ...} """ def __init__( self, gsdfile, beads=None, mapping=None, allow_overlap=False, conversion_dict=None, length_scale=1.0, mass_scale=1.0, add_hydrogens=False, aniso_beads=False, ): if (beads is None) == (mapping is None): raise ValueError( "Please provide only one of either beads or mapping." ) self.gsdfile = gsdfile self._compounds = [] self._inds = [] self._bond_array = None self.mass_scale = mass_scale self.aniso_beads = aniso_beads self.mass_scale = mass_scale if beads is not None: # get compounds self._get_compounds( beads=beads, allow_overlap=allow_overlap, length_scale=length_scale, conversion_dict=conversion_dict, add_hydrogens=add_hydrogens, aniso_beads=aniso_beads, ) # calculate the bead mappings for the entire trajectory self._set_mapping() elif mapping is not None: if not isinstance(mapping, dict): with open(mapping, "r") as f: mapping = json.load(f) self.mapping = mapping def _get_compounds( self, beads, allow_overlap, length_scale, conversion_dict, add_hydrogens, aniso_beads, ): """Get compounds for each molecule type in the gsd trajectory.""" # Use the first frame to find the coarse-grain mapping with gsd.hoomd.open(self.gsdfile) as t: snap = t[0] # Use the conversion dictionary to map particle type to element symbol if conversion_dict is not None: snap.particles.types = [ conversion_dict[i].symbol for i in snap.particles.types ] # Break apart the snapshot into separate molecules molecules = get_molecule_cluster(snap=snap) mol_inds = [] for i in range(max(molecules) + 1): mol_inds.append(np.where(molecules == i)[0]) # If molecule length is different, it will be assumed to be different mol_lengths = [len(i) for i in mol_inds] uniq_mol_inds = [] for l in set(mol_lengths): uniq_mol_inds.append(mol_inds[mol_lengths.index(l)]) # Convert each unique molecule to a compound for inds in uniq_mol_inds: l = len(inds) mb_comp = comp_from_snapshot( snapshot=snap, indices=inds, length_scale=length_scale, mass_scale=self.mass_scale, ) self._compounds.append( CG_Compound( compound=mb_comp, allow_overlap=allow_overlap, beads=beads, add_hydrogens=add_hydrogens, aniso_beads=aniso_beads, ) ) self._inds.append( [mol_inds[i] for i in np.where(np.array(mol_lengths) == l)[0]] ) def _set_mapping(self): """Scale the mapping from each compound to the entire trajectory.""" def shift_value(i): n_before, n_bead = order[types[i]] return ( n_comps * n_before + (i - n_before) + comp_idx * n_bead + bead_count ) v_shift = np.vectorize(shift_value) self.mapping = {} all_bonds = [] bead_count = 0 for comp, inds in zip(self._compounds, self._inds): # Map particles for k, v in comp.mapping.items(): self.mapping[k] = [i[list(g)] for i in inds for g in v] # Map bonds p = {p: i for i, p in enumerate(comp.particles())} bond_array = np.array( [ (p[i], p[j]) if p[i] < p[j] else (p[j], p[i]) for (i, j) in comp.bonds() ] ) types = [p.name for p in comp.particles()] n_comps = len(inds) # Check that bond array exists if bond_array.size > 0: order = { i: (types.index(i), types.count(i)) for i in set(types) } comp_bonds = [] for comp_idx in range(n_comps): comp_bonds.append(v_shift(bond_array)) all_bonds += comp_bonds bead_count += n_comps * len(types) if all_bonds: all_bond_array = np.vstack(all_bonds) self._bond_array = all_bond_array[all_bond_array[:, 0].argsort()]
[docs] def save_mapping(self, filename): """Save the mapping operator to a json file. Parameters ---------- filename : str Filename where the mapping operator will be saved in json format. """ with open(filename, "w") as f: json.dump(self.mapping, f, cls=NumpyEncoder) print(f"Mapping saved to {filename}")
[docs] def save(self, cg_gsdfile, start=0, stop=None, stride=1): """Save the coarse-grain system to a gsd file. Does not calculate the image of the coarse-grain bead. Retains: - configuration: box, step - particles: N, position, typeid, types - bonds: N, group, typeid, types Parameters ---------- cg_gsdfile : str Filename for new gsd file. If file already exists, it will be overwritten. start : int, default 0 Where to start reading the gsd trajectory the system was created with. stop : int, default None Where to stop reading the gsd trajectory the system was created with. If None, will stop at the last frame. stride : int, default 1 The step size to use when iterating through start:stop """ typeid = [] types = [i.split("...")[0] for i in self.mapping] for i, inds in enumerate(self.mapping.values()): typeid.append(np.ones(len(inds)) * i) typeid = np.hstack(typeid) # Set up bond information if it exists bond_types = [] bond_ids = [] # Todo: Fill this if we need to bond_type_shapes = None N_bonds = 0 if self._bond_array is not None: N_bonds = self._bond_array.shape[0] for bond in self._bond_array: bond_pair = "-".join( [ types[int(typeid[int(bond[0])])], types[int(typeid[int(bond[1])])], ] ) if bond_pair not in bond_types: bond_types.append(bond_pair) _id = np.where(np.array(bond_types) == bond_pair)[0] bond_ids.append(_id) else: bond_types = None with gsd.hoomd.open(cg_gsdfile, "w") as new, gsd.hoomd.open( self.gsdfile, "r" ) as old: # stop being None is fine; slicing [0:None] gives whole array, # even in edge case where there's only one or two frames for s in old[start:stop:stride]: new_snap = gsd.hoomd.Frame() position = [] mass = [] orientation = [] if self.aniso_beads else None f_box = freud.Box.from_box(s.configuration.box) unwrap_pos = f_box.unwrap( s.particles.position, s.particles.image ) for i, inds in enumerate(self.mapping.values()): position += [np.mean(unwrap_pos[x], axis=0) for x in inds] mass += [ sum(s.particles.mass[x]) * self.mass_scale for x in inds ] if self.aniso_beads: for x in inds: masses = s.particles.mass[x] * self.mass_scale hmass = element_from_symbol("H").mass positions = s.particles.position[x] heavy_positions = positions[ np.where(masses > hmass) ] major_axis, ab_idxs = get_major_axis( heavy_positions ) orientation.append(get_quaternion(major_axis)) if self.aniso_beads: orientation = np.vstack(orientation) new_snap.particles.orientation = orientation position = np.vstack(position) images = f_box.get_images(position) position = f_box.wrap(position) new_snap.configuration.box = s.configuration.box new_snap.configuration.step = s.configuration.step new_snap.particles.N = len(typeid) new_snap.particles.position = position new_snap.particles.image = images new_snap.particles.typeid = typeid.astype(int) new_snap.particles.types = types new_snap.particles.mass = mass if N_bonds > 0: new_snap.bonds.N = N_bonds new_snap.bonds.group = self._bond_array new_snap.bonds.typeid = np.array(bond_ids) if bond_types is not None: new_snap.bonds.types = np.array(bond_types) else: new_snap.bonds.types = None new_snap.bonds.type_shapes = bond_type_shapes if self.aniso_beads: new_snap.particles.orientation = orientation new_snap = identify_snapshot_connections(new_snap) new.append(new_snap)