Source code for q2m3.solvation.solvent

# Copyright (c) 2026 Ye Jun <yjmaxpayne@hotmail.com>
# SPDX-License-Identifier: MIT

"""
Explicit Solvent Models for QM/MM Simulations

This module provides data-driven solvent model definitions for use with
PySCF's qmmm module. It focuses on:
1. Solvent geometry and partial charges (for pyscf.qmmm.mm_charge)
2. Rigid body coordinate transformations
3. Classical MM energy for solvent-solvent interactions (MC acceptance)

Design Philosophy:
    - Data-driven: Solvent models defined as dataclasses, easy to extend
    - PySCF-compatible: Output coordinates in Angstrom for qmmm.mm_charge
    - Minimal: Only implement what PySCF doesn't provide directly

Usage with PySCF:
    from pyscf import qmmm

    model = TIP3P_WATER
    molecules = [SolventMolecule(model, pos, angles) for ...]
    mm_coords, mm_charges = get_mm_embedding_data(molecules)

    # Convert to Bohr for PySCF
    mm_coords_bohr = mm_coords * ANGSTROM_TO_BOHR
    mf = qmmm.mm_charge(mf, mm_coords_bohr, mm_charges)
"""

from collections.abc import Sequence
from dataclasses import dataclass, field

import numpy as np

from q2m3.constants import ANGSTROM_TO_BOHR, COULOMB_CONSTANT, KCAL_TO_HARTREE

_MIN_MM_DISTANCE_ANGSTROM = 1e-12

# =============================================================================
# Solvent Model Definition (Data-Driven)
# =============================================================================


[docs] @dataclass(frozen=True) class SolventAtom: """ Single atom in a solvent model. Attributes: symbol: Atomic symbol (e.g., "O", "H") local_coord: Position in molecule-fixed frame (Angstrom) charge: Partial charge (elementary charge units) lj_sigma: Lennard-Jones sigma parameter (Angstrom), 0 if no LJ lj_epsilon: Lennard-Jones epsilon parameter (kcal/mol), 0 if no LJ """ symbol: str local_coord: tuple[float, float, float] charge: float lj_sigma: float = 0.0 lj_epsilon: float = 0.0 @property def local_coord_array(self) -> np.ndarray: return np.array(self.local_coord)
[docs] @dataclass(frozen=True) class SolventModel: """ Solvent model definition containing geometry, charges, and LJ parameters. This is an immutable data structure that defines a solvent type. Actual solvent molecule instances are represented by SolventMolecule. Attributes: name: Model identifier (e.g., "TIP3P", "SPC/E") atoms: Tuple of SolventAtom defining the molecule structure reference: Literature reference for the model parameters """ name: str atoms: tuple[SolventAtom, ...] reference: str = "" @property def n_atoms(self) -> int: return len(self.atoms) @property def symbols(self) -> list[str]: return [atom.symbol for atom in self.atoms] @property def charges(self) -> np.ndarray: return np.array([atom.charge for atom in self.atoms]) @property def local_coords(self) -> np.ndarray: """Local coordinates as (n_atoms, 3) array.""" return np.array([atom.local_coord for atom in self.atoms])
[docs] def get_lj_params(self, atom_idx: int) -> tuple[float, float]: """Get (sigma, epsilon) for atom at given index.""" atom = self.atoms[atom_idx] return (atom.lj_sigma, atom.lj_epsilon)
# ============================================================================= # Predefined Solvent Models # ============================================================================= # TIP3P Water Model # Reference: Jorgensen et al., J. Chem. Phys. 79, 926 (1983) _TIP3P_HALF_ANGLE = np.radians(104.52 / 2) _TIP3P_OH_LENGTH = 0.9572 TIP3P_WATER = SolventModel( name="TIP3P", atoms=( SolventAtom("O", (0.0, 0.0, 0.0), -0.834, lj_sigma=3.15061, lj_epsilon=0.152), SolventAtom( "H", ( _TIP3P_OH_LENGTH * np.cos(_TIP3P_HALF_ANGLE), _TIP3P_OH_LENGTH * np.sin(_TIP3P_HALF_ANGLE), 0.0, ), 0.417, ), SolventAtom( "H", ( _TIP3P_OH_LENGTH * np.cos(_TIP3P_HALF_ANGLE), -_TIP3P_OH_LENGTH * np.sin(_TIP3P_HALF_ANGLE), 0.0, ), 0.417, ), ), reference="Jorgensen et al., J. Chem. Phys. 79, 926 (1983)", ) # SPC/E Water Model # Reference: Berendsen et al., J. Phys. Chem. 91, 6269 (1987) _SPCE_HALF_ANGLE = np.radians(109.47 / 2) # Tetrahedral angle _SPCE_OH_LENGTH = 1.0 SPC_E_WATER = SolventModel( name="SPC/E", atoms=( SolventAtom("O", (0.0, 0.0, 0.0), -0.8476, lj_sigma=3.166, lj_epsilon=0.1554), SolventAtom( "H", ( _SPCE_OH_LENGTH * np.cos(_SPCE_HALF_ANGLE), _SPCE_OH_LENGTH * np.sin(_SPCE_HALF_ANGLE), 0.0, ), 0.4238, ), SolventAtom( "H", ( _SPCE_OH_LENGTH * np.cos(_SPCE_HALF_ANGLE), -_SPCE_OH_LENGTH * np.sin(_SPCE_HALF_ANGLE), 0.0, ), 0.4238, ), ), reference="Berendsen et al., J. Phys. Chem. 91, 6269 (1987)", ) # Registry of available models SOLVENT_MODELS: dict[str, SolventModel] = { "TIP3P": TIP3P_WATER, "SPC/E": SPC_E_WATER, } # ============================================================================= # Solvent Molecule Instance # =============================================================================
[docs] @dataclass class SolventMolecule: """ A solvent molecule instance with position and orientation. Attributes: model: The solvent model (defines geometry and parameters) position: Center of mass position in Angstrom euler_angles: Orientation as (roll, pitch, yaw) in radians """ model: SolventModel position: np.ndarray = field(default_factory=lambda: np.zeros(3)) euler_angles: np.ndarray = field(default_factory=lambda: np.zeros(3)) def __post_init__(self): self.position = np.asarray(self.position, dtype=float) self.euler_angles = np.asarray(self.euler_angles, dtype=float)
[docs] def get_atom_coords(self) -> np.ndarray: """ Get global coordinates of all atoms. Returns: Array of shape (n_atoms, 3) in Angstrom """ R = _euler_to_rotation_matrix(self.euler_angles) local = self.model.local_coords return self.position + (R @ local.T).T
[docs] def get_charges(self) -> np.ndarray: """Get partial charges for all atoms.""" return self.model.charges
[docs] def to_state_vector(self) -> np.ndarray: """Convert to state vector [x, y, z, roll, pitch, yaw].""" return np.concatenate([self.position, self.euler_angles])
[docs] @classmethod def from_state_vector(cls, model: SolventModel, state: np.ndarray) -> "SolventMolecule": """Create molecule from state vector.""" return cls(model=model, position=state[:3], euler_angles=state[3:])
def _euler_to_rotation_matrix(euler_angles: np.ndarray) -> np.ndarray: """ Create rotation matrix from Euler angles (ZYX convention). Args: euler_angles: [roll, pitch, yaw] in radians Returns: 3x3 rotation matrix """ roll, pitch, yaw = euler_angles Rz = np.array( [ [np.cos(yaw), -np.sin(yaw), 0], [np.sin(yaw), np.cos(yaw), 0], [0, 0, 1], ] ) Ry = np.array( [ [np.cos(pitch), 0, np.sin(pitch)], [0, 1, 0], [-np.sin(pitch), 0, np.cos(pitch)], ] ) Rx = np.array( [ [1, 0, 0], [0, np.cos(roll), -np.sin(roll)], [0, np.sin(roll), np.cos(roll)], ] ) return Rz @ Ry @ Rx # ============================================================================= # Functions for PySCF Integration # =============================================================================
[docs] def get_mm_embedding_data( molecules: Sequence[SolventMolecule], ) -> tuple[np.ndarray, np.ndarray]: """ Extract MM embedding data for pyscf.qmmm.mm_charge(). Args: molecules: Sequence of SolventMolecule instances Returns: Tuple of: - coords: Atom coordinates in Angstrom, shape (n_total_atoms, 3) - charges: Partial charges, shape (n_total_atoms,) Usage with PySCF: coords, charges = get_mm_embedding_data(molecules) coords_bohr = coords * ANGSTROM_TO_BOHR mf = qmmm.mm_charge(mf, coords_bohr, charges) """ all_coords = [] all_charges = [] for mol in molecules: all_coords.append(mol.get_atom_coords()) all_charges.append(mol.get_charges()) if not all_coords: return np.array([]).reshape(0, 3), np.array([]) return np.vstack(all_coords), np.concatenate(all_charges)
[docs] def get_mm_embedding_data_bohr( molecules: Sequence[SolventMolecule], ) -> tuple[np.ndarray, np.ndarray]: """ Get MM embedding data with coordinates in Bohr (ready for PySCF). This is a convenience function that converts coordinates to Bohr, which is the unit expected by pyscf.qmmm.mm_charge(). Returns: Tuple of (coords_bohr, charges) """ coords, charges = get_mm_embedding_data(molecules) return coords * ANGSTROM_TO_BOHR, charges
# ============================================================================= # MM Energy Calculation (for MC acceptance criterion) # =============================================================================
[docs] def compute_mm_energy(molecules: Sequence[SolventMolecule]) -> float: """ Compute classical MM energy between solvent molecules. This calculates the Lennard-Jones and Coulomb interactions between all pairs of solvent molecules. Used for the MC acceptance criterion. Note: This is a simple pairwise implementation. For production use with large systems, consider using OpenMM or other optimized libraries. Args: molecules: Sequence of SolventMolecule instances Returns: Total MM energy in Hartree. Exact intermolecular atom overlap is treated as a hard-core invalid configuration and returns +inf. """ n_mol = len(molecules) if n_mol < 2: return 0.0 # Precompute all atom coordinates and charges all_coords = [mol.get_atom_coords() for mol in molecules] all_charges = [mol.get_charges() for mol in molecules] e_lj_kcal = 0.0 e_coulomb_kcal = 0.0 for i in range(n_mol): model_i = molecules[i].model coords_i = all_coords[i] charges_i = all_charges[i] for j in range(i + 1, n_mol): model_j = molecules[j].model coords_j = all_coords[j] charges_j = all_charges[j] # LJ interactions (only between atoms with LJ parameters) for ai in range(model_i.n_atoms): sigma_i, eps_i = model_i.get_lj_params(ai) if eps_i == 0: continue for aj in range(model_j.n_atoms): sigma_j, eps_j = model_j.get_lj_params(aj) if eps_j == 0: continue r = np.linalg.norm(coords_j[aj] - coords_i[ai]) if r <= _MIN_MM_DISTANCE_ANGSTROM: return float("inf") # Lorentz-Berthelot combining rules sigma = (sigma_i + sigma_j) / 2 epsilon = np.sqrt(eps_i * eps_j) sigma_r = sigma / r sigma_r_6 = sigma_r**6 e_lj_kcal += 4.0 * epsilon * (sigma_r_6**2 - sigma_r_6) # Coulomb interactions (all atom pairs) for ai in range(model_i.n_atoms): for aj in range(model_j.n_atoms): r = np.linalg.norm(coords_j[aj] - coords_i[ai]) if r <= _MIN_MM_DISTANCE_ANGSTROM: return float("inf") e_coulomb_kcal += COULOMB_CONSTANT * charges_i[ai] * charges_j[aj] / r return (e_lj_kcal + e_coulomb_kcal) * KCAL_TO_HARTREE
# ============================================================================= # Initialization Utilities # =============================================================================
[docs] def initialize_solvent_ring( model: SolventModel, n_molecules: int, center: np.ndarray, radius: float, random_seed: int = 42, ) -> list[SolventMolecule]: """ Initialize solvent molecules in a ring arrangement. Molecules are placed equidistant in a ring in the x-y plane, with random initial orientations. Args: model: Solvent model to use n_molecules: Number of molecules center: Center of the ring (typically solute center) radius: Ring radius in Angstrom random_seed: Random seed for reproducible orientations Returns: List of SolventMolecule instances """ np.random.seed(random_seed) center = np.asarray(center) molecules = [] for i in range(n_molecules): angle = 2 * np.pi * i / n_molecules position = np.array( [ center[0] + radius * np.cos(angle), center[1] + radius * np.sin(angle), center[2], ] ) euler_angles = np.random.uniform(-np.pi, np.pi, size=3) molecules.append(SolventMolecule(model, position, euler_angles)) return molecules
[docs] def molecules_to_state_array(molecules: Sequence[SolventMolecule]) -> np.ndarray: """Convert list of molecules to state array of shape (n_mol, 6).""" return np.array([mol.to_state_vector() for mol in molecules])
[docs] def state_array_to_molecules(model: SolventModel, states: np.ndarray) -> list[SolventMolecule]: """Convert state array to list of molecules.""" return [SolventMolecule.from_state_vector(model, state) for state in states]