Source code for mols2grid.utils
import gzip
import re
from ast import literal_eval
from functools import partial, wraps
from importlib.util import find_spec
from pathlib import Path
import pandas as pd
from jinja2 import Environment, FileSystemLoader
from rdkit import Chem
env = Environment(
loader=FileSystemLoader(Path(__file__).parent / "templates"), autoescape=False
)
def requires(module):
def inner(func):
@wraps(func)
def wrapper(*args, **kwargs):
if find_spec(module):
return func(*args, **kwargs)
raise ModuleNotFoundError(
f"The module {module!r} is required to use {func.__name__!r} "
"but it is not installed!"
)
return wrapper
return inner
def tooltip_formatter(s, subset, fmt, style, transform):
"""Function to generate tooltips from a pandas Series
Parameters
----------
s : pandas.Series
Row in the internal pandas DataFrame
subset : list
Subset of columns that are used for the tooltip
fmt : str
Format string for each key-value pair of the tooltip
style : dict
CSS styling applied to each item independently
transform : dict
Functions applied to each value before rendering
"""
items = []
for k, v in s[subset].to_dict().items():
displayed = transform[k](v) if transform.get(k) else v
value = (
f'<span class="copy-me" style="{style[k](v)}">{displayed}</span>'
if style.get(k)
else f'<span class="copy-me">{displayed}</span>'
)
items.append(fmt.format(key=k, value=value))
return "<br>".join(items)
def mol_to_smiles(mol):
"""Returns a SMILES from an RDKit molecule, or None if not an RDKit mol"""
return Chem.MolToSmiles(mol) if mol else None
def mol_to_record(mol, mol_col="mol"):
"""Function to create a dict of data from an RDKit molecule"""
return {**mol.GetPropsAsDict(includePrivate=True), mol_col: mol} if mol else {}
[docs]def sdf_to_dataframe(sdf_path, mol_col="mol"):
"""Creates a dataframe of molecules from an SDFile. All property fields in
the SDFile are made available in the resulting dataframe
Parameters
----------
sdf_path : str, Path
Path to the SDFile, ending with either ``.sdf`` or ``.sdf.gz``
mol_col : str
Name of the column containing the RDKit molecules in the dataframe
Returns
-------
df : pandas.DataFrame
"""
read_file = gzip.open if str(sdf_path).endswith(".gz") else partial(open, mode="rb")
with read_file(sdf_path) as f:
return pd.DataFrame(
[mol_to_record(mol, mol_col) for mol in Chem.ForwardSDMolSupplier(f)]
)
def remove_coordinates(mol):
"""Removes the existing coordinates from the molecule. The molecule is
modified inplace"""
mol.RemoveAllConformers()
return mol
def slugify(string):
"""Replaces whitespaces with hyphens"""
return re.sub(r"\s+", "-", string)
def callback_handler(callback, event):
"""Handler for applying the callback function on change"""
data = literal_eval(event.new)
callback(data)
def _get_streamlit_script_run_ctx():
from streamlit.runtime.scriptrunner import get_script_run_ctx
return get_script_run_ctx()
def is_running_within_streamlit():
"""
Function to check whether python code is run within streamlit
Returns
-------
use_streamlit : boolean
True if code is run within streamlit, else False
"""
try:
ctx = _get_streamlit_script_run_ctx()
except ImportError:
return False
else:
return ctx is not None
def is_running_within_marimo():
"""
Function to check whether python code is run within marimo
Returns
-------
use_marimo : boolean
True if code is run within marimo, else False
"""
import sys
return "marimo" in sys.modules