Source code for mdtraj.nmr.shift_wrappers

##############################################################################
# MDTraj: A Python Library for Loading, Saving, and Manipulating
#         Molecular Dynamics Trajectories.
# Copyright 2012-2014 Stanford University and the Authors
#
# Authors: Kyle A. Beauchamp
# Contributors: Robert McGibbon
#
# MDTraj is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as
# published by the Free Software Foundation, either version 2.1
# of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with MDTraj. If not, see <http://www.gnu.org/licenses/>.
#############################################################################


import os
import shutil
import subprocess

import numpy as np

from mdtraj.utils import enter_temp_directory, import_

##############################################################################
# Globals
##############################################################################

# Possible names for the external commands -- these are expected
# to be found in the PATH.
SHIFTX2 = ["shiftx2.py"]
SPARTA_PLUS = ["sparta+", "SPARTA+", "SPARTA+.linux"]
PPM = ["ppm_linux_64.exe"]

__all__ = [
    "chemical_shifts_shiftx2",
    "chemical_shifts_ppm",
    "chemical_shifts_spartaplus",
    "reindex_dataframe_by_atoms",
]


def find_executable(names):
    for possible in names:
        result = shutil.which(possible)
        if result is not None:
            return result
    return None


##############################################################################
# Code
##############################################################################


def compute_chemical_shifts(trj, model="shiftx2", **kwargs):
    """Predict chemical shifts of a trajectory using ShiftX2.

    Parameters
    ----------
    trj : Trajectory
        Trajectory to predict shifts for.
    model : str, optional, default="shiftx2"
        The program to use for calculating chemical shifts.  Must be one
        of shiftx2, ppm, or sparta+

    Returns
    -------
    results : pandas DataFrame
        Dataframe containing results, with index consisting of
        (resSeq, atom_name) pairs and columns for each frame in trj.

    Notes
    -----
    You must have the appropriate chemical soft programs installed
    and in your executable path.

    Chemical shift prediction is for PROTEIN atoms; trajectory objects
    with ligands, solvent, ions, or other non-protein components may give
    UNKNOWN RESULTS.

    Please cite the appropriate reference, see docstrings for chemical_shifts_*
    for the various possible models.
    """
    if model == "shiftx2":
        return chemical_shifts_shiftx2(trj, **kwargs)
    elif model == "ppm":
        return chemical_shifts_ppm(trj, **kwargs)
    elif model == "sparta+":
        return chemical_shifts_spartaplus(trj, **kwargs)
    else:
        raise (ValueError("model must be one of shiftx2, ppm, or sparta+"))


[docs] def chemical_shifts_shiftx2(trj, pH=5.0, temperature=298.00): """Predict chemical shifts of a trajectory using ShiftX2. Parameters ---------- trj : Trajectory Trajectory to predict shifts for. pH : float, optional, default=5.0 pH value which gets passed to the ShiftX2 predictor. temperature : float, optional, default=298.00 Temperature which gets passed to the ShiftX2 predictor. Returns ------- results : pandas DataFrame Dataframe containing results, with index consisting of (resSeq, atom_name) pairs and columns for each frame in trj. Notes ----- You must have ShiftX2 available on your path; see (http://www.shiftx2.ca/). Chemical shift prediction is for PROTEIN atoms; trajectory objects with ligands, solvent, ions, or other non-protein components may give UNKNOWN RESULTS. Please cite the appropriate reference below. References ---------- .. [1] Beomsoo Han, Yifeng Liu, Simon Ginzinger, and David Wishart. "SHIFTX2: significantly improved protein chemical shift prediction." J. Biomol. NMR, 50, 1 43-57 (2011) """ pd = import_("pandas") binary = find_executable(SHIFTX2) if binary is None: raise OSError( "External command not found. Looked for {} in PATH. " "`chemical_shifts_shiftx2` requires the external program SHIFTX2, " "available at http://www.shiftx2.ca/".format(", ".join(SHIFTX2)), ) results = [] with enter_temp_directory(): for i in range(trj.n_frames): fn = "./trj%d.pdb" % i trj[i].save(fn) subprocess.check_call( [ binary, "-b", fn, "-p", f"{pH:.1f}", "-t", f"{temperature:.2f}", ], ) d = pd.read_csv("./trj%d.pdb.cs" % i) d.rename( columns={"NUM": "resSeq", "RES": "resName", "ATOMNAME": "name"}, inplace=True, ) d["frame"] = i results.append(d) return pd.concat(results).pivot_table( index=["resSeq", "name"], columns="frame", values="SHIFT", )
[docs] def chemical_shifts_ppm(trj): """Predict chemical shifts of a trajectory using ppm. Parameters ---------- trj : Trajectory Trajectory to predict shifts for. Returns ------- results : pandas.DataFrame Dataframe containing results, with index consisting of (resSeq, atom_name) pairs and columns for each frame in trj. Notes ----- You must have ppm available on your path; see (http://spin.ccic.ohio-state.edu/index.php/download/index). Chemical shift prediction is for PROTEIN atoms; trajectory objects with ligands, solvent, ions, or other non-protein components may give UNKNOWN RESULTS. Please cite the appropriate reference below. References ---------- .. [1] Li, DW, and Bruschweiler, R. "PPM: a side-chain and backbone chemical shift predictor for the assessment of protein conformational ensembles." J Biomol NMR. 2012 Nov;54(3):257-65. """ pd = import_("pandas") binary = find_executable(PPM) first_resSeq = trj.top.residue(0).resSeq if binary is None: raise OSError( f"External command not found. Looked for {PPM} in PATH. `chemical_shifts_ppm` " "requires the external program PPM, available at " "http://spin.ccic.ohio-state.edu/index.php/download/index", ) with enter_temp_directory(): trj.save("./trj.pdb") # -para old is on order to use newer ppm versions cmd = "%s -pdb trj.pdb -mode detail -para old" % binary return_flag = os.system(cmd) if return_flag != 0: raise ( OSError( f"Could not successfully execute command '{cmd}', check your PPM " "installation or your input trajectory.", ) ) d = pd.read_table( "./bb_details.dat", index_col=False, header=None, sep=r"\s+", ).drop([3], axis=1) d = d.rename(columns={0: "resSeq", 1: "resName", 2: "name"}) d["resSeq"] += first_resSeq - 1 # Fix bug in PPM that reindexes to 1 d = d.drop("resName", axis=1) d = d.set_index(["resSeq", "name"]) d.columns = np.arange(trj.n_frames) d.columns.name = "frame" return d
def _get_lines_to_skip(filename): """Determine the number of comment lines in a SPARTA+ output file.""" format_string = """FORMAT %4d %4s %4s %9.3f %9.3f %9.3f %9.3f %9.3f %9.3f""" handle = open(filename) for i, line in enumerate(handle): if line.find(format_string) != -1: return i + 2 raise (Exception("No format string found in SPARTA+ file!"))
[docs] def chemical_shifts_spartaplus(trj, rename_HN=True): """Predict chemical shifts of a trajectory using SPARTA+. Parameters ---------- trj : Trajectory Trajectory to predict shifts for. rename_HN : bool, optional, default=True SPARTA+ calls the amide proton "HN" instead of the standard "H". When True, this option renames the output as "H" to match the PDB and BMRB nomenclature. Returns ------- results : pandas.DataFrame Dataframe containing results, with index consisting of (resSeq, atom_name) pairs and columns for each frame in trj. Notes ----- You must have SPARTA+ available on your path; see (http://spin.niddk.nih.gov/bax/software/SPARTA+/). Also, the SPARTAP_DIR environment variable must be set so that SPARTA+ knows where to find its database files. Chemical shift prediction is for PROTEIN atoms; trajectory objects with ligands, solvent, ions, or other non-protein components may give UNKNOWN RESULTS. Please cite the appropriate reference below. References ---------- .. [1] Shen, Y., and Bax, Ad. "SPARTA+: a modest improvement in empirical NMR chemical shift prediction by means of an artificial neural network." J. Biomol. NMR, 48, 13-22 (2010) """ pd = import_("pandas") binary = find_executable(SPARTA_PLUS) if binary is None: raise OSError( f"External command not found. Looked for {SPARTA_PLUS} in PATH. " "`chemical_shifts_spartaplus` requires the external program SPARTA+, available at " "http://spin.niddk.nih.gov/bax/software/SPARTA+/", ) names = [ "resSeq", "resName", "name", "SS_SHIFT", "SHIFT", "RC_SHIFT", "HM_SHIFT", "EF_SHIFT", "SIGMA", ] with enter_temp_directory(): for i in range(trj.n_frames): trj[i].save("./trj%d.pdb" % i) subprocess.check_call( [binary, "-in"] + [f"trj{i}.pdb" for i in range(trj.n_frames)] + ["-out", "trj0_pred.tab"], ) lines_to_skip = _get_lines_to_skip("trj0_pred.tab") results = [] for i in range(trj.n_frames): d = pd.read_table( "./trj%d_pred.tab" % i, names=names, header=None, sep=r"\s+", skiprows=lines_to_skip, ) d["frame"] = i results.append(d) results = pd.concat(results) if rename_HN: results.name[results.name == "HN"] = "H" return results.pivot_table( index=["resSeq", "name"], columns="frame", values="SHIFT", )
[docs] def reindex_dataframe_by_atoms(trj, frame): """Reindex chemical shift output to use atom number (serial) indexing. Parameters ---------- trj : Trajectory Trajectory to predict shifts for. frame : pandas.DataFrame Dataframe containing results, with index consisting of (resSeq, atom_name) pairs and columns for each frame in trj. Returns ------- new_frame : pandas.DataFrame Dataframe containing results, with index consisting of atom indices (AKA the 'serial' entry in a PDB). Columns correspond to each frame in trj. Notes ----- Be aware that this function may DROP predictions if the atom naming is different between the input trajectory and the output of various chemical shift prediction tools. """ top, bonds = trj.top.to_dataframe() top["serial"] = top.index top = top.set_index(["resSeq", "name"]) new_frame = frame.copy() new_frame["serial"] = top.ix[new_frame.index].serial new_frame = new_frame.dropna().reset_index().set_index("serial").drop(["resSeq", "name"], axis=1) return new_frame