Skip to content

Commit

Permalink
Merge pull request #475 from carlocamilloni/main
Browse files Browse the repository at this point in the history
some refactoring
  • Loading branch information
carlocamilloni authored Nov 3, 2024
2 parents ebf8ee0 + 935a15a commit 22c5fb2
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 64 deletions.
92 changes: 29 additions & 63 deletions src/multiego/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,7 @@ def generate_basic_LJ(meGO_ensemble, args, matrices=None):
return basic_LJ


def set_epsilon(meGO_LJ):
def set_sig_epsilon(meGO_LJ, needed_fields):
"""
Set the epsilon parameter for LJ interactions based on probability and distance.
Expand All @@ -885,6 +885,14 @@ def set_epsilon(meGO_LJ):
adjusting them to represent the strength of attractive and repulsive forces. It ensures that LJ parameters are
consistent with the given probability and distance thresholds, maintaining the accuracy of simulations or calculations.
"""
# when distance estimates are poor we use the cutoff value
meGO_LJ.loc[(meGO_LJ["probability"] <= meGO_LJ["md_threshold"]), "distance"] = (meGO_LJ["rep"] / meGO_LJ["epsilon_0"]) ** (
1.0 / 12.0
)
meGO_LJ.loc[(meGO_LJ["rc_probability"] <= meGO_LJ["md_threshold"]), "rc_distance"] = (
meGO_LJ["rep"] / meGO_LJ["epsilon_0"]
) ** (1.0 / 12.0)

# Epsilon is initialised to a rescaled C12
# This is always correct becasue distance is always well defined by either training data
# or using default C12 values
Expand Down Expand Up @@ -928,32 +936,6 @@ def set_epsilon(meGO_LJ):
meGO_LJ.dropna(subset=["epsilon"], inplace=True)
meGO_LJ = meGO_LJ[meGO_LJ.epsilon != 0]

return meGO_LJ


def repulsions_in_range(meGO_LJ):
"""
Adjust repulsion values within specific ranges to maintain consistency.
This function adjusts repulsion values within specific ranges to maintain consistency and prevent unrealistic
or extreme repulsion values. It ensures that repulsion values are within acceptable bounds based on the LJ parameters.
Parameters
----------
meGO_LJ : pd.DataFrame
DataFrame containing LJ parameters.
Returns
-------
pd.DataFrame
DataFrame containing LJ parameters with adjusted repulsion values within specified ranges.
Notes
-----
This function is crucial for maintaining the integrity of LJ parameters by ensuring that repulsion values are within
reasonable bounds. Extreme or unrealistic repulsion values can lead to inaccuracies or anomalies in simulations
or calculations.
"""
# lower value for repulsion
meGO_LJ.loc[
(meGO_LJ["1-4"] != "1_4") & (meGO_LJ["epsilon"] < 0.0) & (-meGO_LJ["epsilon"] < 0.1 * meGO_LJ["rep"]),
Expand Down Expand Up @@ -982,6 +964,20 @@ def repulsions_in_range(meGO_LJ):
"epsilon",
] = -meGO_LJ["rep"]

# Sigma is set from the estimated interaction length
meGO_LJ = meGO_LJ.assign(sigma=meGO_LJ["distance"] / (2.0 ** (1.0 / 6.0)))

# for repulsive interaction we reset sigma to its effective value
# this because when merging repulsive contacts from different sources what will matters
# will be the repulsive strength
meGO_LJ.loc[(meGO_LJ["epsilon"] < 0.0), "sigma"] = (-meGO_LJ["epsilon"]) ** (1.0 / 12.0)

# add a flag to identify learned contacts
meGO_LJ.loc[:, "learned"] = 1

# keep only needed fields
meGO_LJ = meGO_LJ[needed_fields]

return meGO_LJ


Expand Down Expand Up @@ -1110,34 +1106,7 @@ def generate_LJ(meGO_ensemble, train_dataset, basic_LJ, parameters):

st = time.time()
print("\t- Set sigma and epsilon")
# copy but remove intramolecular excluded interactions
meGO_LJ = train_dataset.copy()

# when distance estimates are poor we use the cutoff value
meGO_LJ.loc[(meGO_LJ["probability"] <= meGO_LJ["md_threshold"]), "distance"] = (meGO_LJ["rep"] / meGO_LJ["epsilon_0"]) ** (
1.0 / 12.0
)
meGO_LJ.loc[(meGO_LJ["rc_probability"] <= meGO_LJ["md_threshold"]), "rc_distance"] = (
meGO_LJ["rep"] / meGO_LJ["epsilon_0"]
) ** (1.0 / 12.0)

# at this point meGO_LJ is symmetric in ai/aj for interaction between identical molecules
# while it is not symmetric for cross interactions

# generate attractive and repulsive interactions
meGO_LJ = set_epsilon(meGO_LJ)
# fix for too small/too large repulsion
meGO_LJ = repulsions_in_range(meGO_LJ)

# Sigma is set from the estimated interaction length
meGO_LJ["sigma"] = (meGO_LJ["distance"]) / (2.0 ** (1.0 / 6.0))
# for repulsive interaction we reset sigma to its effective value
# this because when merging repulsive contacts from different sources what will matters
# will be the repulsive strength
meGO_LJ.loc[(meGO_LJ["epsilon"] < 0.0), "sigma"] = (-meGO_LJ["epsilon"]) ** (1.0 / 12.0)

# add a flag to identify learned contacts
meGO_LJ["learned"] = 1
# meGO needed fields
needed_fields = [
"molecule_name_ai",
Expand All @@ -1158,8 +1127,10 @@ def generate_LJ(meGO_ensemble, train_dataset, basic_LJ, parameters):
"rc_threshold",
"learned",
]
# keep only needed fields
meGO_LJ = meGO_LJ[needed_fields]

# generate attractive and repulsive interactions
meGO_LJ = set_sig_epsilon(meGO_LJ, needed_fields)

et = time.time()
elapsed_time = et - st
st = et
Expand Down Expand Up @@ -1191,10 +1162,6 @@ def generate_LJ(meGO_ensemble, train_dataset, basic_LJ, parameters):
)
# Cleaning the duplicates
meGO_LJ = meGO_LJ.drop_duplicates(subset=["ai", "aj", "same_chain"], keep="first")
meGO_LJ.drop(columns=["type"], inplace=True)

# keep only needed fields
meGO_LJ = meGO_LJ[needed_fields]

# now we can remove all repulsive contacts with default (i.e., rep) c12 becasue these
# are uninformative and predefined. This also allow to replace them with contact learned
Expand All @@ -1212,17 +1179,16 @@ def generate_LJ(meGO_ensemble, train_dataset, basic_LJ, parameters):
# 2) attractive contacts that can be transferd are those non affected by their random coil (prc <= rc_threshold)
# 3) an attractive contacts can only take the place of a trivial repulsive contact (i.e. a repulsive contact with prc <= rc_threshold)
meGO_LJ["trivial"] = False
meGO_LJ["sign"] = np.sign(meGO_LJ["epsilon"])
meGO_LJ.loc[(meGO_LJ["epsilon"] > 0) & (meGO_LJ["rc_probability"] > meGO_LJ["rc_threshold"]), "trivial"] = True
meGO_LJ.loc[(meGO_LJ["epsilon"] < 0) & (meGO_LJ["rc_probability"] <= meGO_LJ["rc_threshold"]), "trivial"] = True
# Identify rows where "trivial repulsive" is True and there exists another duplicate row
duplicated_at_least_one_trivial = (
(meGO_LJ.duplicated(subset=["ai", "aj", "1-4"], keep=False)) & (meGO_LJ["trivial"]) & (meGO_LJ["sign"] == -1)
(meGO_LJ.duplicated(subset=["ai", "aj", "1-4"], keep=False)) & (meGO_LJ["trivial"]) & (meGO_LJ["type"] == -1)
)
# Identify rows where both are trivial/not trivial
duplicated_same_trivial = meGO_LJ.duplicated(subset=["ai", "aj", "trivial"], keep=False)
# Identify rows where both are attractive or repulsive
duplicated_same_type = meGO_LJ.duplicated(subset=["ai", "aj", "sign"], keep=False)
duplicated_same_type = meGO_LJ.duplicated(subset=["ai", "aj", "type"], keep=False)
# Combine the conditions to remove only the rows that are trivial but duplicated with a non-trivial counterpart
remove_duplicates_mask = duplicated_at_least_one_trivial & ~duplicated_same_trivial & ~duplicated_same_type
# Remove rows where "trivial" is True and there exists another duplicate row with "trivial" as False with the not Trivial attractive and the Trivial repulsive
Expand Down
1 change: 1 addition & 0 deletions tools/make_mat/HDF52ndx.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
parser.add_argument(
"-i",
"--input_file",
required=True,
type=str,
help="Path to the input contact matrix file (e.g., 'intramat_1_1.ndx.h5')",
)
Expand Down
11 changes: 10 additions & 1 deletion tools/make_mat/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# make_mat.py
# Interatomic contact matrices tools

## make_mat.py

This script calculates probabilities from histograms based on specified parameters.

Expand Down Expand Up @@ -26,3 +28,10 @@ Parameters:

`--cutoff`: The maximum cutoff used for the accumulation of the histograms.

## ndx2HDF5.py

This scripts convert a text or a text.gz matrix in HDF5 format, this is usefull for large system because it is much faster to process.

## HDF52ndx.py

This scripts convert a HDF5 matrix into text file.
1 change: 1 addition & 0 deletions tools/make_mat/ndx2HDF5.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
parser.add_argument(
"-i",
"--input_file",
required=True,
type=str,
help="Path to the input contact matrix file (e.g., 'intramat_1_1.ndx.gz')",
)
Expand Down

0 comments on commit 22c5fb2

Please sign in to comment.