Skip to content

Commit

Permalink
Merge pull request #466 from frantropy/feature/new_io_format
Browse files Browse the repository at this point in the history
Feature/new io format
  • Loading branch information
carlocamilloni authored Sep 19, 2024
2 parents 9d42d8b + 4a20dee commit 3ee22d0
Show file tree
Hide file tree
Showing 33 changed files with 45,348 additions and 68 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ jobs:
echo $CONDA/bin >> $GITHUB_PATH
- name: Install dependencies with conda
run: |
conda install -y python=3.11
conda env update --file conda/environment.yml --name base
conda install flake8
conda install --solver=classic -y python=3.11
conda env update --solver=classic --file conda/environment.yml --name base
conda install --solver=classic flake8
- name: Run flake8
run: |
# Flake8 exit on most issues
Expand Down
12 changes: 6 additions & 6 deletions multiego.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,8 @@ def meGO_parsing():
if args.multi_epsilon_intra or args.multi_epsilon_inter_domain or args.multi_epsilon_inter:
multi_flag = True

####################################
# PRELIMINARY SOLUTION TO TOPOLOGY #
####################################
r_topol = pmd.load_file(f"{args.root_dir}/inputs/{args.system}/{args.reference}/topol.top")
topol_names = [m for m in r_topol.molecules]
mego_topology = pmd.load_file(f"{args.root_dir}/inputs/{args.system}/topol.top")
topol_names = [m for m in mego_topology.molecules]

args.names = []
for name in args.multi_epsilon_intra.keys():
Expand All @@ -105,6 +102,9 @@ def meGO_parsing():
elif not multi_flag:
args.names = topol_names

if args.egos != "rc" and not args.reference:
args.reference = ["reference"]

if args.epsilon and not args.inter_epsilon:
args.inter_epsilon = args.epsilon
if args.epsilon and not args.inter_domain_epsilon:
Expand Down Expand Up @@ -137,7 +137,7 @@ def meGO_parsing():
sys.exit()
if args.symmetry_file:
args.symmetry = io.read_symmetry_file(args.symmetry_file)
if args.symmetry:
elif args.symmetry:
args.symmetry = io.parse_symmetry_list(args.symmetry)

if args.custom_dict:
Expand Down
6 changes: 3 additions & 3 deletions src/multiego/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
"help": "Maximum interaction energy per contact. The typical range is 0.2-0.4 kJ/mol",
},
"--reference": {
"type": str,
"default": "reference",
"type": lambda x: x.split(","),
"default": [],
"help": "The folder including all the reference information needed to setup multi-eGO, "
"corresponding to the subfolder to process.",
},
Expand All @@ -26,7 +26,7 @@
"corresponding to the subfolders to process and where the contacts are learned.",
},
"--check": {
"type": lambda x: [*x.split(",")],
"type": lambda x: x.split(","),
"default": [],
"help": "A list of the simulations corresponding to the subfolders used to check "
"whether the contacts learned are compatible with those provided in here.",
Expand Down
88 changes: 54 additions & 34 deletions src/multiego/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,55 +375,72 @@ def init_meGO_ensemble(args):
"""

# we initialize the reference topology
reference_path = f"{args.root_dir}/inputs/{args.system}/{args.reference}"
ensemble_type = reference_path.split("/")[-1]
print("\t-", f"Initializing {ensemble_type} ensemble topology")
topology_path = f"{reference_path}/topol.top"
# reference_paths = [ f"{args.root_dir}/inputs/{args.system}/{reference}" for reference in args.reference ]
# ensemble_type = reference_path.split("/")[-1]
# print("\t-", f"Initializing {ensemble_type} ensemble topology")
base_topology_path = f"{args.root_dir}/inputs/{args.system}/topol.top"

if not os.path.isfile(topology_path):
raise FileNotFoundError(f"{topology_path} not found.")
if not os.path.isfile(base_topology_path):
raise FileNotFoundError(f"{base_topology_path} not found.")

# reading the custom dictionary for trainging to multi-eGO translation of atom names
custom_dict = type_definitions.parse_json(args.custom_dict)

print("\t-", f"Reading {topology_path}")
print("\t-", f"Reading {base_topology_path}")
# ignore the dihedral type overriding in parmed
with warnings.catch_warnings():
warnings.simplefilter("ignore")
defines = {"DISULFIDE": 1}
reference_topology = parmed.load_file(topology_path, defines)
base_reference_topology = parmed.load_file(base_topology_path, defines)
(
topology_dataframe,
molecules_idx_sbtype_dictionary,
sbtype_c12_dict,
sbtype_name_dict,
sbtype_moltype_dict,
molecule_type_dict,
) = initialize_topology(reference_topology, custom_dict, args)

) = initialize_topology(base_reference_topology, custom_dict, args)

# prior = {}
# prior["topology"] = reference_topology
# prior["topology_dataframe"] = topology_dataframe
# prior["molecules_idx_sbtype_dictionary"] = molecules_idx_sbtype_dictionary
# prior["sbtype_c12_dict"] = sbtype_c12_dict
# prior["sbtype_name_dict"] = sbtype_name_dict
# prior["sbtype_moltype_dict"] = sbtype_moltype_dict
# prior["sbtype_number_dict"] = (
# topology_dataframe[["sb_type", "number"]].set_index("sb_type")["number"].to_dict()
# )
# prior["sbtype_type_dict"] = {key: name for key, name in topology_dataframe[["sb_type", "type"]].values}
# prior["molecule_type_dict"] = molecule_type_dict
reference_contact_matrices = {}

io.check_matrix_format(args)
if args.egos != "rc":
matrix_paths = glob.glob(f"{reference_path}/int??mat_?_?.ndx")
matrix_paths = matrix_paths + glob.glob(f"{reference_path}/int??mat_?_?.ndx.gz")
if matrix_paths == []:
raise FileNotFoundError("Contact matrix .ndx file(s) must be named as intramat_X_X.ndx or intermat_X_Y.ndx")
for path in matrix_paths:
name = path.replace(f"{args.root_dir}/inputs/", "")
name = name.replace("/", "_")
name = name.replace(".ndx", "")
name = name.replace(".gz", "")
reference_contact_matrices[name] = initialize_molecular_contacts(
io.read_molecular_contacts(path),
path,
molecules_idx_sbtype_dictionary,
args.reference,
args,
)
reference_contact_matrices[name] = reference_contact_matrices[name].add_prefix("rc_")
for reference in args.reference: # reference_paths:
reference_path = f"{args.root_dir}/inputs/{args.system}/{reference}"
if args.egos != "rc":
# path = f"{args.root_dir}/inputs/{args.system}/{reference_path}"
topology_path = f"{reference_path}/topol.top"
matrix_paths = glob.glob(f"{reference_path}/int??mat_?_?.ndx")
matrix_paths = matrix_paths + glob.glob(f"{reference_path}/int??mat_?_?.ndx.gz")
if matrix_paths == []:
raise FileNotFoundError("Contact matrix .ndx file(s) must be named as intramat_X_X.ndx or intermat_X_Y.ndx")
for path in matrix_paths:
name = path.replace(f"{args.root_dir}/inputs/", "")
name = name.replace("/", "_")
name = name.replace(".ndx", "")
name = name.replace(".gz", "")
reference_contact_matrices[name] = initialize_molecular_contacts(
io.read_molecular_contacts(path),
path,
molecules_idx_sbtype_dictionary,
reference,
args,
)
reference_contact_matrices[name] = reference_contact_matrices[name].add_prefix("rc_")

ensemble = {}
ensemble["topology"] = reference_topology
ensemble["topology"] = base_reference_topology
ensemble["topology_dataframe"] = topology_dataframe
ensemble["molecules_idx_sbtype_dictionary"] = molecules_idx_sbtype_dictionary
ensemble["sbtype_c12_dict"] = sbtype_c12_dict
Expand Down Expand Up @@ -488,11 +505,14 @@ def init_meGO_ensemble(args):
simulation,
args,
)
ref_name = reference_path + "_" + path.split("/")[-1]
ref_name = ref_name.replace(f"{args.root_dir}/inputs/", "")
ref_name = ref_name.replace("/", "_")
ref_name = ref_name.replace(".ndx", "")
ref_name = ref_name.replace(".gz", "")
# ref_name = reference_path + "_" + path.split("/")[-1]
# find corresponding reference matrix (given by the number_number at the end of the name)
# using reference contact matrices
identifier = f'_{("_").join(path.split("/")[-1].replace(".ndx", "").replace(".gz", "").split("_")[-3:])}'
ref_name = [key for key in reference_contact_matrices.keys() if key.endswith(identifier)]
if ref_name == []:
raise FileNotFoundError(f"No corresponding reference matrix found for {path}")
ref_name = ref_name[0]
ensemble["train_matrix_tuples"].append((name, ref_name))

ensemble["train_matrices"] = train_contact_matrices
Expand Down
36 changes: 24 additions & 12 deletions src/multiego/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ def read_config(file, args_dict):
else:
key = list(element.keys())[0]
if f"--{key}" not in args_dict:
print(f"ERROR: {key} in {file} is not a valid argument.")
exit()
raise ValueError(f"ERROR: {key} in {file} is not a valid argument.")

return yml

Expand Down Expand Up @@ -88,6 +87,7 @@ def combine_configurations(yml, args, args_dict):
setattr(args, key, value)
else:
if hasattr(args, element):
print(element, yml)
setattr(args, element, True)

return args
Expand Down Expand Up @@ -196,7 +196,6 @@ def read_symmetry_file(path):
symmetry : dict
The symmetry parameters as a dictionary
"""
print("\t-", f"Reading symmetry file {path}")
with open(path, "r") as file:
lines = file.readlines()
symmetry = parse_symmetry_list(lines)
Expand Down Expand Up @@ -226,16 +225,21 @@ def parse_symmetry_list(symmetry_list):
A list of tuples, with each tuple containing the symmetry information for a single interaction.
"""
symmetry = []
for i, line in enumerate(symmetry_list):
if "#" in line:
symmetry_list[i] = line.split("#")[0]
symmetry_list[i] = symmetry_list[i].strip()

for line in symmetry_list:
if line.startswith("\n"):
if "#" in line:
line = line[: line.index("#")]
line = line.replace("\n", "")
line = line.strip()
if not line:
continue
else:
symmetry.append(line.split())
line = line.split(" ")
line = [x for x in line if x]
if len(line) < 3:
continue

symmetry.append(line)

return symmetry


Expand Down Expand Up @@ -321,6 +325,14 @@ def write_nonbonded(topology_dataframe, meGO_LJ, parameters, output_folder):
with open(f"{output_folder}/ffnonbonded.itp", "w") as file:
if write_header:
file.write(header)

# write the defaults section
file.write("\n[ defaults ]\n")
file.write("; Include forcefield parameters\n")
file.write("#define _FF_MAGROS\n\n")
file.write("; nbfunc comb-rule gen-pairs fudgeLJ fudgeQQ\n")
file.write(" 1 1 no 1.0 1.0\n\n")

file.write("[ atomtypes ]\n")
atomtypes = topology_dataframe[["sb_type", "atomic_number", "mass", "charge", "ptype", "c6", "c12"]].copy()
atomtypes["c6"] = atomtypes["c6"].map(lambda x: "{:.6e}".format(x))
Expand Down Expand Up @@ -661,7 +673,7 @@ def write_topology(
with open(f"{output_folder}/topol_GRETA.top", "w") as file:
header += """
; Include forcefield parameters
#include "multi-ego-basic.ff/forcefield.itp"
#include "ffnonbonded.itp"
"""

file.write(header)
Expand Down Expand Up @@ -785,7 +797,7 @@ def check_files_existence(args):
FileNotFoundError
If any of the files or directories does not exist
"""
md_ensembles = [args.reference] + args.train + args.check
md_ensembles = args.reference + args.train + args.check

for ensemble in md_ensembles:
ensemble = f"{args.root_dir}/inputs/{args.system}/{ensemble}"
Expand Down
32 changes: 32 additions & 0 deletions src/multiego/topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,38 @@ def get_14_interaction_list(reduced_topology, bond_pair):
return exclusion_bonds, p14


def get_lj_pairs(topology):
"""
Extracts Lennard-Jones pair information from a molecular topology.
Parameters
----------
topology: parmed.topology object
Contains the molecular topology information
Returns
-------
pairs_dataframe: pd.DataFrame
DataFrame containing Lennard-Jones pair information
"""
lj_pairs = pd.DataFrame(columns=["ai", "aj", "func", "c6", "c12"])
for sbtype_i, sbtype_j in topology.parameterset.nbfix_types:
key = (sbtype_i, sbtype_j)
c12, c6 = topology.parameterset.nbfix_types[key] * 4.184 # Convert from kcal/mol to kJ/mol
lj_pairs = lj_pairs.append(
{
"ai": sbtype_i,
"aj": sbtype_j,
"func": 1,
"c6": c6,
"c12": c12,
},
ignore_index=True,
)

return lj_pairs


def create_pairs_14_dataframe(atomtype1, atomtype2, c6=0.0, shift=0, prefactor=None, constant=None):
"""
Used to create additional or modified, multi-eGO-specific 1-4 (like) interactions. Two sets of atomtypes with
Expand Down
Loading

0 comments on commit 3ee22d0

Please sign in to comment.