# -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ import scine_utilities as utils import numpy as np from typing import List, Tuple, Union, Optional import mdtraj as md from openmm import unit from tqdm import tqdm def read_trajectory(file_name: str, topology_file_name: str) -> Tuple[utils.MolecularTrajectory, List[Tuple[np.ndarray, np.ndarray, np.ndarray]]]: """ Read a trajectory file. Parameters ---------- file_name: str The trajectory file name. topology_file_name: str The topology file name. Supported file types: PDB Returns ------- Tuple[utils.MolecularTrajectory, List[Tuple[np.ndarray, np.ndarray, np.ndarray]]] The molecular trajectory and a list of the box vectors for each frame in the trajectory. """ if "xtc" in file_name: return read_xtc_file(file_name, topology_file_name) elif any(text in file_name for text in [".dat", ".data", ".txt"]): return read_episodic_memory_file(file_name, topology_file_name) raise RuntimeError(f"Trajectory file format is not supported. Supported formats are: xtc, episodic-memory (data/dat/txt).\n" f"Your file name is: {file_name}") def read_xtc_file(file_name: str, topology_file_name: str)\ -> Tuple[utils.MolecularTrajectory, List[Tuple[np.ndarray, np.ndarray, np.ndarray]]]: """ Read a xtc file. Parameters ---------- file_name: str The xtc file name. topology_file_name: str The topology file name. Supported file types: PDB Returns ------- Tuple[utils.MolecularTrajectory, List[Tuple[np.ndarray, np.ndarray, np.ndarray]]] The molecular trajectory and a list of the box vectors for each frame in the trajectory. """ box_vectors: List[Tuple[np.ndarray, np.ndarray, np.ndarray]] = [] traj = md.load(file_name, top=topology_file_name) reference_topology, bonds = utils.io.read(topology_file_name) trajectory = utils.MolecularTrajectory(reference_topology.elements) print("Reading xtc trajectory") for frame in tqdm(range(len(traj))): box_vector = traj.openmm_boxes(frame) box_vectors.append((np.asarray(box_vector[0].value_in_unit(unit.angstrom) * utils.BOHR_PER_ANGSTROM), np.asarray(box_vector[1].value_in_unit(unit.angstrom) * utils.BOHR_PER_ANGSTROM), np.asarray(box_vector[2].value_in_unit(unit.angstrom) * utils.BOHR_PER_ANGSTROM) )) trajectory.push_back(np.asarray(traj.openmm_positions(frame).value_in_unit(unit.angstrom)) * utils.BOHR_PER_ANGSTROM) return trajectory, box_vectors def read_variable_episodic_memory_file(file_name: str)\ -> List[Tuple[utils.AtomCollection, Tuple[np.ndarray, np.ndarray, np.ndarray], np.ndarray, List[int]]]: position = [] lattice = [] elements: List[utils.ElementType] = [] results: List[Tuple[utils.AtomCollection, Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]], np.ndarray], List[int]] = [] forces_for_structure = [] qm_indices: List[int] = [] print("Reading episodic memory trajectory") # The coordinates are directly in Bohr. with open(file_name, encoding='utf-8') as f: for line in f: line = line.strip() if line: line = line.split() if line[0] == 'atom': position.append(line[1:4]) forces_for_structure.append(line[7:10]) elements.append(utils.ElementInfo.element_from_symbol(line[4])) if line[5] == "1.0": qm_indices.append(len(elements) - 1) elif line[0] == 'lattice': lattice.append(line[1:4]) elif line[0] == 'begin': if len(position) > 0: atom_collection = utils.AtomCollection(elements, np.asarray(position)) if lattice: vector_tuple = (np.array(lattice[0]).astype(float), np.array(lattice[1]).astype(float), np.array(lattice[2]).astype(float)) else: vector_tuple = None res = ( atom_collection, vector_tuple, np.array(forces_for_structure).astype(float), qm_indices ) results.append(res) position = [] elements = [] lattice = [] forces_for_structure = [] qm_indices = [] if len(position) > 0: atom_collection = utils.AtomCollection(elements, np.asarray(position)) if lattice: vector_tuple = (np.array(lattice[0]).astype(float), np.array(lattice[1]).astype(float), np.array(lattice[2]).astype(float)) else: vector_tuple = None res = ( atom_collection, vector_tuple, np.array(forces_for_structure).astype(float), qm_indices ) results.append(res) return results def read_episodic_memory_file(file_name: str, topology_file_name: str, get_step_ids: bool = False, forces: List[np.ndarray] = [])\ -> Union[Tuple[utils.MolecularTrajectory, List[Tuple[np.ndarray, np.ndarray, np.ndarray]]], Tuple[utils.MolecularTrajectory, List[Tuple[np.ndarray, np.ndarray, np.ndarray]], List[int]]]: """ Read episodic memory files. Parameters ---------- file_name: str The episodic memory file name. topology_file_name: str The topology file name. Supported file types: PDB get_step_ids: bool, optional Whether to get the step ids. Defaults to False. forces : Optional[List[np.ndarray]], optional If given, this list will be filled with the forces given in the episodic memory file. Defaults to None. Returns ------- Union[Tuple[utils.MolecularTrajectory, List[Tuple[np.ndarray, np.ndarray, np.ndarray]]], Tuple[utils.MolecularTrajectory, List[Tuple[np.ndarray, np.ndarray, np.ndarray]], List[int]]] The molecular trajectory and a list of the box vectors for each frame in the trajectory. If the parameter get_step_ids is True: Returns molecular trajectory, a list of the box vectors for each frame in the trajectory, and the step ID in the MD run. """ position = [] lattice = [] reference_topology, bonds = utils.io.read(topology_file_name) trajectory = utils.MolecularTrajectory(reference_topology.elements) box_vectors: List[Tuple[np.ndarray, np.ndarray, np.ndarray]] = [] forces_for_structure = [] step_ids = [] print("Reading episodic memory trajectory") # The coordinates are directly in Bohr. with open(file_name, encoding='utf-8') as f: for line in f: line = line.strip() if line: line = line.split() if line[0] == 'atom': position.append(line[1:4]) forces_for_structure.append(line[7:10]) elif line[0] == 'lattice': lattice.append(line[1:4]) elif line[0] == 'begin': if len(position) > 0: atom_positions = np.asarray(position) trajectory.push_back(atom_positions) box_vectors.append((np.array(lattice[0]).astype(float), np.array(lattice[1]).astype(float), np.array(lattice[2]).astype(float))) forces.append(np.asarray(forces_for_structure).astype(float)) position = [] lattice = [] forces_for_structure = [] elif line[0] == 'comment' and line[1] == "number": step_ids.append(int(line[2])) if len(position) > 0: atom_positions = np.asarray(position) trajectory.push_back(atom_positions) box_vectors.append((np.array(lattice[0]).astype(float), np.array(lattice[1]).astype(float), np.array(lattice[2]).astype(float))) forces.append(np.asarray(forces_for_structure).astype(float)) if get_step_ids: return trajectory, box_vectors, step_ids return trajectory, box_vectors def write_episodic_memory_file(file_name: str, trajectory: utils.MolecularTrajectory, lattices: List[Tuple[np.ndarray, np.ndarray, np.ndarray]], atomic_classes: np.ndarray, mode: str = "w", comments: List[List[str]]=[]) -> None: """ Write an episodic memory file. Parameters ---------- file_name: str The file name. trajectory: utils.MolecularTrajectory The molecular trajectory. lattices: List[Tuple[np.ndarray, np.ndarray, np.ndarray]] The lattice vectors for each frame in the trajectory. atomic_classes: np.ndarray The atomic classes for each atom in the trajectory. mode: str The writing mode: "a" (append) or "w" (write/overwrite). comments: List[List[str]] Optional comments for each structure. """ elements = [str(e) for e in trajectory.elements] with open(file_name, mode, encoding='utf-8') as f: for i, (position, lattice) in enumerate(zip(trajectory, lattices)): f.write('begin\n') if comments: for comment in comments[i]: f.write(f"comment {comment}\n") if len(lattice) > 0: for i in range(3): f.write('lattice {0:>10.6f} {1:>10.6f} {2:>10.6f}\n' .format(round(lattice[i][0], 6), round(lattice[i][1], 6), round(lattice[i][2], 6))) atomic_class = atomic_classes.astype(float) for pos, element, a_class in zip(position, elements, atomic_class): f.write('atom {0:>10.6f} {1:>10.6f} {2:>10.6f} {3:2} {4:3.1f} {5:>6.3f} ' .format(round(pos[0], 6), round(pos[1], 6), round(pos[2], 6), element, a_class, round(0.00, 3))) f.write('{0:>10.6f} {1:>10.6f} {2:>10.6f}\n'.format( round(0.00, 6), round(0.00, 6), round(0.00, 6))) f.write('energy {0:.8f}\ncharge 0.0\nend\n'.format(round(0.00, 8)))