# -*- coding: utf-8 -*- __copyright__ = """ This code is licensed under the 3-clause BSD license. Copyright ETH Zurich, Department of Chemistry and Applied Biosciences, Reiher Group. See LICENSE.txt for details. """ from typing import Optional, List, Dict, Tuple, Set import scine_database as db import yaml import os import time import math def wipe_if_required(manager: db.Manager, keyword: str) -> bool: """ Initialize the database if required. Parameters ---------- manager : db.Manager The database manager. Returns ------- bool Returns true, if the database was reinitialized. """ wipe = False force = keyword.upper() == "FORCE" db_name = manager.credentials.database_name if keyword.upper() in ["WIPE", "NEW", "INIT", "FORCE"]: wipe = True if wipe and not force: inp = input("Are you sure you want to wipe the database '" + db_name + "'? (y/n): ") while True: if inp.strip().lower() in ["y", "yes"]: break if inp.strip().lower() in ["n", "no"]: wipe = False break inp = input("Did not recognize answer, please answer 'y', if '" + db_name + "' should be wiped: ") if wipe: manager.wipe() manager.init() time.sleep(1.0) return wipe class Configuration: """ This class contains all settings for single point/qm region selection calculations. """ __slots__=("credentials", "trajectory_file", "topology_file", "force_field_file_paths", "single_point_settings", "positive_terminus", "negative_terminus", "charge", "multiplicity", "qm_atom_indices", "qm_atom_index_file", "compensated_charges", "n_structures", "model", "atom_info_file", "positively_charged_residues", "negatively_charged_residues", "qm_region_selection_settings", "subsystem_indices", "quantum_core_file", "ml_resource_path", "openmm_xml_file_path", "median_energy", "energy_window", "atomic_force_max", "transfer_learning", "energy_cutoff_threshold", ) def __init__(self): self.credentials = db.Credentials("hostname", 27017, "db_name") """ The database credentials """ self.trajectory_file: str = "trajectory.xtc" """ The trajectory file path. """ self.topology_file: str = "topology.pdb" """ The topology file path. """ self.force_field_file_paths: List[int] = [] """ The paths to the OpenMM xml force field files. """ self.single_point_settings: Dict = { "electrostatic_embedding": True, "require_gradients": True, "require_partial_energies": True, "require_partial_gradients": True, "use_external_atom_types": True, "use_xml_parameters": True, } """ Additional settings to be added to the db.Calculation single point objects. """ self.positive_terminus: Optional[int] = None """ Index of the positively charged terminus. """ self.negative_terminus: Optional[int] = None """ Index of the negatively charged terminus. """ self.charge: int = 0 """ Total charge of the QM system. """ self.multiplicity: int = 1 """ Total multiplicity of the QM system. """ self.qm_atom_indices: List[int] = [] """ Indices of the QM atoms. """ self.compensated_charges: int = 0 """ Charges compensated by ions not part of the QM system or the protein. This is used as a sanity check. """ self.n_structures: int = 2000 """ Maximum number of structures to be added to the DB. """ self.model = db.Model("dft/gaff", "pbe-d3bj", "def2-svp") """ The electronic structure model definition. """ self.atom_info_file: Optional[str] = None """ Optional atom info file. """ self.qm_atom_index_file: Optional[str] = None """ Optional atom index file. """ self.positively_charged_residues: List[Tuple[str, str]] = [("LYS", "NZ"), ("ARG", "CZ"), ("HIS", "NE2")] """ Tuples of residue names and atom types that carry a positive charge. """ self.negatively_charged_residues: List[Tuple[str, str]] = [("ASP", "OD1"), ("GLU", "OE1")] """ Tuples of residue names and atom types that carry negative charges. """ self.qm_region_selection_settings: Dict = { "electrostatic_embedding": True, "initial_radius": 4.0, "cutting_probability": 0.9, "tol_percentage_error": 20.0, "qm_region_max_size": 250, # 300 "qm_region_min_size": 100, "ref_max_size": 500, "tol_percentage_sym_score": float("inf"), # the QM region may be complete non-symmetric. "excluded_residues": ["HOH", "CL", "NA"], "use_external_atom_types": True, "use_xml_parameters": True, "num_attempts_per_radius": 50, "max_num_ref_models": 4 } """ The settings for the QM region selection calculation. """ self.subsystem_indices: List[List[int]] = [] """ Indices of the atoms for quantum cores. """ self.quantum_core_file: Optional[str] = None """ Optional file to load the quantum cores/subsystem partitioning from (QM/QM) embedding only. """ self.ml_resource_path: str = "ml/" """ The path to the machine learning resources folder. """ self.openmm_xml_file_path: str = "" """ The path to the openmm xml (system) file. """ self.median_energy: float = math.nan """ The rough QM energy to expect for QM/MM. This value can be used to select structures for QM/QM/MM that have a reasonable energy, i.e., the QM/MM SCF did not converge to an unphysical state. """ self.energy_window: float = math.nan """ The energy window to be used with the median energy: """ self.atomic_force_max: float = 10.0 """ The maximal atomic force in eV/Angstrom (structures are disregarded otherwise) for MLP training. """ self.transfer_learning: bool = False """ If True, the settings of the MLP will be changed to do transfer learning. """ self.energy_cutoff_threshold: float = 0.0 """ If this value is greater than 0, any structures with an energy outside the range (mean - cutoff, mean + cutoff) will be excluded from the training process. The unit should be identical to the energy unit used in the episodic memory file (hartree). """ def qm_indices_from_file(self, file_name: str) -> None: """ Read the QM atom indices from a text file and save them as an attribute. Parameters ---------- file_name : str The file. """ if not os.path.exists(file_name): raise RuntimeError(f"QM atom index file not found: {file_name}") with open(file_name, "r") as f: self.qm_atom_indices = [int(i) for i in f.read().splitlines() if i] def charge_and_multiplicity_from_atom_info(self, atom_info_file: str) -> None: """ Calculate the QM system's charge and multiplicity from atom-wise formal charges and multiplicity information in a file. Parameters ---------- atom_info_file : str The atom info file. """ if not os.path.exists(atom_info_file): raise RuntimeError(f"QM atom info file not found: {atom_info_file}") atom_info = {int(line.split()[0]): (int(line.split()[1]), int(line.split()[2])) for line in open(atom_info_file, "r").readlines() if line} self.charge = sum(atom_info[idx][0] if idx in atom_info else 0 for idx in self.qm_atom_indices) self.multiplicity = sum(atom_info[idx][1] if idx in atom_info else 0 for idx in self.qm_atom_indices) + 1 def qm_cores_from_file(self, file_name: str) -> None: if not os.path.exists(file_name): raise RuntimeError(f"Quantum core file not found: {file_name}") all_qm_cores: List[List[int]] = [[int(idx_str) for idx_str in line.split()] for line in open(file_name, "r").readlines()] for core in all_qm_cores: for idx in core: if idx not in self.qm_atom_indices: raise RuntimeError(f"Quantum core atom {idx} not in QM atom list") environment: Set[int] = set(self.qm_atom_indices) for core in all_qm_cores: environment.difference_update(set(core)) self.subsystem_indices = all_qm_cores + [list(environment)] @classmethod def from_file(cls, filename: str): """ Construct a Configuration object from a yml file. Parameters ---------- filename : str The path to the YAML file. """ if not os.path.exists(filename): raise RuntimeError(f"Configuration YAML file not found at: {filename}") with open(filename, 'r') as file: configuration_dict = yaml.safe_load(file) configuration = cls() for key, value in configuration_dict.items(): if key == "credentials": for kk, vv in value.items(): setattr(configuration.credentials, kk, vv) continue if key == "model": for kk, vv in value.items(): setattr(configuration.model, kk, vv) continue if key == "single_point_settings": for kk, vv in value.items(): configuration.single_point_settings[kk] = vv continue if key == "qm_region_selection_settings": for kk, vv in value.items(): configuration.qm_region_selection_settings[kk] = vv continue if key in cls.__slots__: setattr(configuration, key, value) else: print("Warning: unrecognized configuration key:", key) if configuration.qm_atom_index_file is not None: configuration.qm_indices_from_file(configuration.qm_atom_index_file) if configuration.atom_info_file is not None: configuration.charge_and_multiplicity_from_atom_info(configuration.atom_info_file) if configuration.quantum_core_file is not None: configuration.qm_cores_from_file(configuration.quantum_core_file) return configuration