From b2d51965596c93569c19192ccbf56425fb53e4ce Mon Sep 17 00:00:00 2001 From: Michael Hucka Date: Wed, 24 Sep 2025 15:55:31 -0700 Subject: [PATCH 1/6] Fix deserialization warning CodeQL security [scan report #567](https://github.com/quantumlib/OpenFermion/security/code-scanning/567) flagged a data loading operation in `src/openfermion/utils/operator_utils.py` as being usafe due because it uses a user-provided value. The warning is about lines 282-283, involving the code ```python with open(file_path, 'rb') as f: data = marshal.load(f) ``` > Deserializing untrusted data using any deserialization framework that allows the construction of arbitrary serializable objects is easily exploitable and in many cases allows an attacker to execute arbitrary code. Even before a deserialized object is returned to the caller of a deserialization method a lot of code may have been executed, including static initializers, constructors, and finalizers. Automatic deserialization of fields means that an attacker may craft a nested combination of objects on which the executed initialization code may have unforeseen effects, such as the execution of arbitrary code. This changes the code to use the `json` package instead of the `marshal` package and extracts the data more carefully. --- src/openfermion/utils/operator_utils.py | 19 +++++---- src/openfermion/utils/operator_utils_test.py | 41 ++++++++++++++++++++ 2 files changed, 53 insertions(+), 7 deletions(-) diff --git a/src/openfermion/utils/operator_utils.py b/src/openfermion/utils/operator_utils.py index 82d2e345e..cfe80a9c1 100644 --- a/src/openfermion/utils/operator_utils.py +++ b/src/openfermion/utils/operator_utils.py @@ -11,8 +11,9 @@ # limitations under the License. """This module provides generic tools for classes in ops/""" from builtins import map, zip -import marshal +import json import os +from ast import literal_eval import numpy import sympy @@ -279,10 +280,13 @@ def load_operator(file_name=None, data_directory=None, plain_text=False): else: raise TypeError('Operator of invalid type.') else: - with open(file_path, 'rb') as f: - data = marshal.load(f) - operator_type = data[0] - operator_terms = data[1] + with open(file_path, 'r') as f: + data = json.load(f) + operator_type, serializable_terms = data + operator_terms = { + literal_eval(key): complex(value[0], value[1]) + for key, value in serializable_terms.items() + } if operator_type == 'FermionOperator': operator = FermionOperator() @@ -356,5 +360,6 @@ def save_operator( f.write(operator_type + ":\n" + str(operator)) else: tm = operator.terms - with open(file_path, 'wb') as f: - marshal.dump((operator_type, dict(zip(tm.keys(), map(complex, tm.values())))), f) + serializable_terms = {str(key): (value.real, value.imag) for key, value in tm.items()} + with open(file_path, 'w') as f: + json.dump((operator_type, serializable_terms), f) diff --git a/src/openfermion/utils/operator_utils_test.py b/src/openfermion/utils/operator_utils_test.py index 95b7d32cb..ecd0d791f 100644 --- a/src/openfermion/utils/operator_utils_test.py +++ b/src/openfermion/utils/operator_utils_test.py @@ -12,6 +12,7 @@ """Tests for operator_utils.""" import os +import json import itertools @@ -594,6 +595,46 @@ def test_save_bad_type(self): with self.assertRaises(TypeError): save_operator('ping', 'somewhere') + def test_save_and_load_complex_json(self): + fermion_op = FermionOperator('1^ 2', 1 + 2j) + boson_op = BosonOperator('1^ 2', 1 + 2j) + qubit_op = QubitOperator('X1 Y2', 1 + 2j) + quad_op = QuadOperator('q1 p2', 1 + 2j) + + save_operator(fermion_op, self.file_name) + loaded_op = load_operator(self.file_name) + self.assertEqual(fermion_op, loaded_op) + + save_operator(boson_op, self.file_name, allow_overwrite=True) + loaded_op = load_operator(self.file_name) + self.assertEqual(boson_op, loaded_op) + + save_operator(qubit_op, self.file_name, allow_overwrite=True) + loaded_op = load_operator(self.file_name) + self.assertEqual(qubit_op, loaded_op) + + save_operator(quad_op, self.file_name, allow_overwrite=True) + loaded_op = load_operator(self.file_name) + self.assertEqual(quad_op, loaded_op) + + def test_saved_json_content(self): + import json + + qubit_op = QubitOperator('X1 Y2', 1 + 2j) + save_operator(qubit_op, self.file_name) + + file_path = get_file_path(self.file_name, None) + with open(file_path, 'r') as f: + data = json.load(f) + + self.assertEqual(len(data), 2) + self.assertEqual(data[0], 'QubitOperator') + + # The key is stringified tuple + # The value is a list [real, imag] + expected_terms = {"((1, 'X'), (2, 'Y'))": [1.0, 2.0]} + self.assertEqual(data[1], expected_terms) + class GetFileDirTest(unittest.TestCase): def setUp(self): From c8fab8aeca838d8a17121ecfc5543e4bec0549c0 Mon Sep 17 00:00:00 2001 From: mhucka Date: Fri, 26 Sep 2025 03:54:32 +0000 Subject: [PATCH 2/6] Resolve conflicts during merge --- src/openfermion/utils/operator_utils.py | 125 +++++++++++++++---- src/openfermion/utils/operator_utils_test.py | 125 +++++++++++++------ 2 files changed, 187 insertions(+), 63 deletions(-) diff --git a/src/openfermion/utils/operator_utils.py b/src/openfermion/utils/operator_utils.py index cfe80a9c1..055d608e8 100644 --- a/src/openfermion/utils/operator_utils.py +++ b/src/openfermion/utils/operator_utils.py @@ -11,9 +11,8 @@ # limitations under the License. """This module provides generic tools for classes in ops/""" from builtins import map, zip -import json +import marshal import os -from ast import literal_eval import numpy import sympy @@ -37,6 +36,12 @@ from openfermion.transforms.opconversions.term_reordering import normal_ordered +# Maximum size allowed for data files read by load_operator(). This is a (weak) safety +# measure against corrupted or insecure files. +_MAX_TEXT_OPERATOR_DATA = 5 * 1024 * 1024 +_MAX_BINARY_OPERATOR_DATA = 1024 * 1024 + + class OperatorUtilsError(Exception): pass @@ -247,27 +252,45 @@ def get_file_path(file_name, data_directory): def load_operator(file_name=None, data_directory=None, plain_text=False): - """Load FermionOperator or QubitOperator from file. + """Load an operator (such as a FermionOperator) from a file. Args: - file_name: The name of the saved file. + file_name: The name of the data file to read. data_directory: Optional data directory to change from default data - directory specified in config file. + directory specified in config file. plain_text: Whether the input file is plain text Returns: operator: The stored FermionOperator, BosonOperator, - QuadOperator, or QubitOperator + QuadOperator, or QubitOperator. Raises: TypeError: Operator of invalid type. + ValueError: If the file is larger than the maximum allowed. + ValueError: If the file content is not as expected or loading fails. + IOError: If the file cannot be opened. + + Warning: + Loading from binary files (plain_text=False) uses the Python 'marshal' + module, which is not secure against untrusted or maliciously crafted + data. Only load binary operator files from sources that you trust. + Prefer using the plain_text format for data from untrusted sources. """ + file_path = get_file_path(file_name, data_directory) + operator_type = None + operator_terms = None + if plain_text: with open(file_path, 'r') as f: - data = f.read() - operator_type, operator_terms = data.split(":\n") + data = f.read(_MAX_TEXT_OPERATOR_DATA) + try: + operator_type, operator_terms = data.split(":\n", 1) + except ValueError: + raise ValueError( + "Invalid format in plain-text data file {file_path}: " "expected 'TYPE:\\nTERMS'" + ) if operator_type == 'FermionOperator': operator = FermionOperator(operator_terms) @@ -278,15 +301,24 @@ def load_operator(file_name=None, data_directory=None, plain_text=False): elif operator_type == 'QuadOperator': operator = QuadOperator(operator_terms) else: - raise TypeError('Operator of invalid type.') + raise TypeError( + f"Invalid operator type '{operator_type}' encountered " + f"found in plain-text data file '{file_path}'." + ) else: - with open(file_path, 'r') as f: - data = json.load(f) - operator_type, serializable_terms = data - operator_terms = { - literal_eval(key): complex(value[0], value[1]) - for key, value in serializable_terms.items() - } + # marshal.load() doesn't have a size parameter, so we test it ourselves. + if os.path.getsize(file_path) > _MAX_BINARY_OPERATOR_DATA: + raise ValueError( + f"Size of {file_path} exceeds maximum allowed " + f"({_MAX_BINARY_OPERATOR_DATA} bytes)." + ) + try: + with open(file_path, 'rb') as f: + raw_data = marshal.load(f) + except Exception as e: + raise ValueError(f"Failed to load marshaled data from {file_path}: {e}") + + operator_type, operator_terms = _validate_operator_data(raw_data) if operator_type == 'FermionOperator': operator = FermionOperator() @@ -313,17 +345,17 @@ def load_operator(file_name=None, data_directory=None, plain_text=False): def save_operator( operator, file_name=None, data_directory=None, allow_overwrite=False, plain_text=False ): - """Save FermionOperator or QubitOperator to file. + """Save an operator (such as a FermionOperator) to a file. Args: operator: An instance of FermionOperator, BosonOperator, or QubitOperator. file_name: The name of the saved file. data_directory: Optional data directory to change from default data - directory specified in config file. + directory specified in config file. allow_overwrite: Whether to allow files to be overwritten. plain_text: Whether the operator should be saved to a - plain-text format for manual analysis + plain-text format for manual analysis. Raises: OperatorUtilsError: Not saved, file already exists. @@ -360,6 +392,55 @@ def save_operator( f.write(operator_type + ":\n" + str(operator)) else: tm = operator.terms - serializable_terms = {str(key): (value.real, value.imag) for key, value in tm.items()} - with open(file_path, 'w') as f: - json.dump((operator_type, serializable_terms), f) + with open(file_path, 'wb') as f: + marshal.dump((operator_type, dict(zip(tm.keys(), map(complex, tm.values())))), f) + + +def _validate_operator_data(raw_data): + """Validates the structure and types of data loaded using marshal. + + The file is expected to contain a tuple of (type, data), where the + "type" is one of the currently-supported operators, and "data" is a dict. + + Args: + raw_data: text or binary data. + + Returns: + tuple(str, dict) where the 0th element is the name of the operator + type (e.g., 'FermionOperator') and the dict is the operator data. + + Raises: + TypeError: raw_data did not contain a tuple of length 2. + TypeError: the first element of the tuple is not a string. + TypeError: the second element of the tuple is not a dict. + TypeError: the given operator type is not supported. + """ + + if not isinstance(raw_data, tuple) or len(raw_data) != 2: + raise TypeError( + f"Invalid marshaled structure: Expected a tuple " + "of length 2, but got {type(raw_data)} instead." + ) + + operator_type, operator_terms = raw_data + + if not isinstance(operator_type, str): + raise TypeError( + f"Invalid type for operator_type: Expected str but " + "got type {type(operator_type)} instead." + ) + + allowed = {'FermionOperator', 'BosonOperator', 'QubitOperator', 'QuadOperator'} + if operator_type not in allowed: + raise TypeError( + f"Operator type '{operator_type}' is not supported. " + "The operator must be one of {allowed}." + ) + + if not isinstance(operator_terms, dict): + raise TypeError( + f"Invalid type for operator_terms: Expected dict " + "but got type {type(operator_terms)} instead." + ) + + return operator_type, operator_terms diff --git a/src/openfermion/utils/operator_utils_test.py b/src/openfermion/utils/operator_utils_test.py index ecd0d791f..e8921773a 100644 --- a/src/openfermion/utils/operator_utils_test.py +++ b/src/openfermion/utils/operator_utils_test.py @@ -12,7 +12,6 @@ """Tests for operator_utils.""" import os -import json import itertools @@ -587,53 +586,97 @@ def test_overwrite_flag_save_on_top_of_existing_operator(self): self.assertEqual(fermion_operator, self.fermion_operator) - def test_load_bad_type(self): - with self.assertRaises(TypeError): - _ = load_operator('bad_type_operator') + def test_load_nonexistent_file_raises_error(self): + with self.assertRaises(FileNotFoundError): + load_operator('non_existent_file_for_testing') def test_save_bad_type(self): with self.assertRaises(TypeError): save_operator('ping', 'somewhere') - def test_save_and_load_complex_json(self): - fermion_op = FermionOperator('1^ 2', 1 + 2j) - boson_op = BosonOperator('1^ 2', 1 + 2j) - qubit_op = QubitOperator('X1 Y2', 1 + 2j) - quad_op = QuadOperator('q1 p2', 1 + 2j) - - save_operator(fermion_op, self.file_name) - loaded_op = load_operator(self.file_name) - self.assertEqual(fermion_op, loaded_op) - - save_operator(boson_op, self.file_name, allow_overwrite=True) - loaded_op = load_operator(self.file_name) - self.assertEqual(boson_op, loaded_op) - - save_operator(qubit_op, self.file_name, allow_overwrite=True) - loaded_op = load_operator(self.file_name) - self.assertEqual(qubit_op, loaded_op) - - save_operator(quad_op, self.file_name, allow_overwrite=True) - loaded_op = load_operator(self.file_name) - self.assertEqual(quad_op, loaded_op) - def test_saved_json_content(self): - import json - - qubit_op = QubitOperator('X1 Y2', 1 + 2j) - save_operator(qubit_op, self.file_name) - - file_path = get_file_path(self.file_name, None) - with open(file_path, 'r') as f: - data = json.load(f) - - self.assertEqual(len(data), 2) - self.assertEqual(data[0], 'QubitOperator') +class LoadOperatorTest(unittest.TestCase): + def setUp(self): + self.file_name = "test_load_operator_file.data" + self.file_path = os.path.join(DATA_DIRECTORY, self.file_name) - # The key is stringified tuple - # The value is a list [real, imag] - expected_terms = {"((1, 'X'), (2, 'Y'))": [1.0, 2.0]} - self.assertEqual(data[1], expected_terms) + def tearDown(self): + if os.path.isfile(self.file_path): + os.remove(self.file_path) + + def test_load_plain_text_invalid_format_raises_value_error(self): + with open(self.file_path, 'w') as f: + f.write("some text without the required separator") + with self.assertRaises(ValueError) as cm: + load_operator(self.file_name, plain_text=True) + self.assertIn("expected 'TYPE:\\nTERMS'", str(cm.exception)) + + def test_load_binary_too_large_raises_value_error(self): + with open(self.file_path, 'wb') as f: + f.write(b'data') + + original_getsize = os.path.getsize + + def mock_getsize(path): + from openfermion.utils.operator_utils import _MAX_BINARY_OPERATOR_DATA + if path == self.file_path: + return _MAX_BINARY_OPERATOR_DATA + 1 + return original_getsize(path) + + os.path.getsize = mock_getsize + try: + with self.assertRaises(ValueError) as cm: + load_operator(self.file_name, plain_text=False) + self.assertIn("exceeds maximum allowed", str(cm.exception)) + finally: + os.path.getsize = original_getsize + + def test_load_binary_corrupted_data_raises_value_error(self): + with open(self.file_path, 'wb') as f: + f.write(b"corrupted marshal data") + with self.assertRaises(ValueError) as cm: + load_operator(self.file_name, plain_text=False) + self.assertIn("Failed to load marshaled data", str(cm.exception)) + + def test_load_binary_invalid_structure_not_tuple_raises_type_error(self): + import marshal + with open(self.file_path, 'wb') as f: + marshal.dump("this is not a tuple", f) + with self.assertRaises(TypeError) as cm: + load_operator(self.file_name, plain_text=False) + self.assertIn("Expected a tuple", str(cm.exception)) + + def test_load_binary_invalid_structure_wrong_len_tuple_raises_type_error(self): + import marshal + with open(self.file_path, 'wb') as f: + marshal.dump(("one", "two", "three"), f) + with self.assertRaises(TypeError) as cm: + load_operator(self.file_name, plain_text=False) + self.assertIn("Expected a tuple of length 2", str(cm.exception)) + + def test_load_binary_invalid_operator_type_not_string_raises_type_error(self): + import marshal + with open(self.file_path, 'wb') as f: + marshal.dump((123, {}), f) + with self.assertRaises(TypeError) as cm: + load_operator(self.file_name, plain_text=False) + self.assertIn("Expected str but got type", str(cm.exception)) + + def test_load_binary_invalid_terms_type_not_dict_raises_type_error(self): + import marshal + with open(self.file_path, 'wb') as f: + marshal.dump(("FermionOperator", ["a", "list"]), f) + with self.assertRaises(TypeError) as cm: + load_operator(self.file_name, plain_text=False) + self.assertIn("Expected dict but got type", str(cm.exception)) + + def test_load_binary_unsupported_operator_type_raises_type_error(self): + import marshal + with open(self.file_path, 'wb') as f: + marshal.dump(("UnsupportedOperator", {}), f) + with self.assertRaises(TypeError) as cm: + load_operator(self.file_name, plain_text=False) + self.assertIn("is not supported", str(cm.exception)) class GetFileDirTest(unittest.TestCase): From bb1967811180d09205eabe86759edfd715df40da Mon Sep 17 00:00:00 2001 From: mhucka Date: Fri, 26 Sep 2025 04:13:48 +0000 Subject: [PATCH 3/6] Move imports of marshal to the top of the file --- src/openfermion/utils/operator_utils_test.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/openfermion/utils/operator_utils_test.py b/src/openfermion/utils/operator_utils_test.py index e8921773a..e3a76dfbd 100644 --- a/src/openfermion/utils/operator_utils_test.py +++ b/src/openfermion/utils/operator_utils_test.py @@ -11,16 +11,13 @@ # limitations under the License. """Tests for operator_utils.""" -import os - import itertools - +import marshal +import os import unittest import numpy - import sympy - from scipy.sparse import csc_matrix from openfermion.config import DATA_DIRECTORY @@ -619,6 +616,7 @@ def test_load_binary_too_large_raises_value_error(self): def mock_getsize(path): from openfermion.utils.operator_utils import _MAX_BINARY_OPERATOR_DATA + if path == self.file_path: return _MAX_BINARY_OPERATOR_DATA + 1 return original_getsize(path) @@ -639,7 +637,6 @@ def test_load_binary_corrupted_data_raises_value_error(self): self.assertIn("Failed to load marshaled data", str(cm.exception)) def test_load_binary_invalid_structure_not_tuple_raises_type_error(self): - import marshal with open(self.file_path, 'wb') as f: marshal.dump("this is not a tuple", f) with self.assertRaises(TypeError) as cm: @@ -647,7 +644,6 @@ def test_load_binary_invalid_structure_not_tuple_raises_type_error(self): self.assertIn("Expected a tuple", str(cm.exception)) def test_load_binary_invalid_structure_wrong_len_tuple_raises_type_error(self): - import marshal with open(self.file_path, 'wb') as f: marshal.dump(("one", "two", "three"), f) with self.assertRaises(TypeError) as cm: @@ -655,7 +651,6 @@ def test_load_binary_invalid_structure_wrong_len_tuple_raises_type_error(self): self.assertIn("Expected a tuple of length 2", str(cm.exception)) def test_load_binary_invalid_operator_type_not_string_raises_type_error(self): - import marshal with open(self.file_path, 'wb') as f: marshal.dump((123, {}), f) with self.assertRaises(TypeError) as cm: @@ -663,7 +658,6 @@ def test_load_binary_invalid_operator_type_not_string_raises_type_error(self): self.assertIn("Expected str but got type", str(cm.exception)) def test_load_binary_invalid_terms_type_not_dict_raises_type_error(self): - import marshal with open(self.file_path, 'wb') as f: marshal.dump(("FermionOperator", ["a", "list"]), f) with self.assertRaises(TypeError) as cm: @@ -671,7 +665,6 @@ def test_load_binary_invalid_terms_type_not_dict_raises_type_error(self): self.assertIn("Expected dict but got type", str(cm.exception)) def test_load_binary_unsupported_operator_type_raises_type_error(self): - import marshal with open(self.file_path, 'wb') as f: marshal.dump(("UnsupportedOperator", {}), f) with self.assertRaises(TypeError) as cm: From 7edf089ecbbbcbb9105d464a08e649a04fc4d22f Mon Sep 17 00:00:00 2001 From: mhucka Date: Fri, 26 Sep 2025 04:14:52 +0000 Subject: [PATCH 4/6] Remove unneeded import of built-in functions zip and map --- src/openfermion/utils/operator_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openfermion/utils/operator_utils.py b/src/openfermion/utils/operator_utils.py index 055d608e8..e631da3dc 100644 --- a/src/openfermion/utils/operator_utils.py +++ b/src/openfermion/utils/operator_utils.py @@ -10,7 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """This module provides generic tools for classes in ops/""" -from builtins import map, zip + import marshal import os From 0648c39cf32116643c8046fec671199e0bfe13fc Mon Sep 17 00:00:00 2001 From: mhucka Date: Fri, 26 Sep 2025 04:15:33 +0000 Subject: [PATCH 5/6] Fix formatting of imports --- src/openfermion/utils/operator_utils.py | 5 ++--- src/openfermion/utils/operator_utils_test.py | 18 +++++++++--------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/openfermion/utils/operator_utils.py b/src/openfermion/utils/operator_utils.py index e631da3dc..6798f3407 100644 --- a/src/openfermion/utils/operator_utils.py +++ b/src/openfermion/utils/operator_utils.py @@ -22,20 +22,19 @@ from openfermion.ops.operators import ( BosonOperator, FermionOperator, + IsingOperator, MajoranaOperator, QuadOperator, QubitOperator, - IsingOperator, ) from openfermion.ops.representations import ( - PolynomialTensor, DiagonalCoulombHamiltonian, InteractionOperator, InteractionRDM, + PolynomialTensor, ) from openfermion.transforms.opconversions.term_reordering import normal_ordered - # Maximum size allowed for data files read by load_operator(). This is a (weak) safety # measure against corrupted or insecure files. _MAX_TEXT_OPERATOR_DATA = 5 * 1024 * 1024 diff --git a/src/openfermion/utils/operator_utils_test.py b/src/openfermion/utils/operator_utils_test.py index e3a76dfbd..181d9d486 100644 --- a/src/openfermion/utils/operator_utils_test.py +++ b/src/openfermion/utils/operator_utils_test.py @@ -23,26 +23,26 @@ from openfermion.config import DATA_DIRECTORY from openfermion.hamiltonians import fermi_hubbard from openfermion.ops.operators import ( + BosonOperator, FermionOperator, + IsingOperator, MajoranaOperator, - BosonOperator, - QubitOperator, QuadOperator, - IsingOperator, + QubitOperator, ) from openfermion.ops.representations import InteractionOperator -from openfermion.transforms.opconversions import jordan_wigner, bravyi_kitaev -from openfermion.transforms.repconversions import get_interaction_operator from openfermion.testing.testing_utils import random_interaction_operator +from openfermion.transforms.opconversions import bravyi_kitaev, jordan_wigner +from openfermion.transforms.repconversions import get_interaction_operator from openfermion.utils.operator_utils import ( + OperatorUtilsError, count_qubits, + get_file_path, hermitian_conjugated, - is_identity, - save_operator, - OperatorUtilsError, is_hermitian, + is_identity, load_operator, - get_file_path, + save_operator, ) From a5c7098357970a87ad93c9454d7b5937495d7b39 Mon Sep 17 00:00:00 2001 From: mhucka Date: Fri, 26 Sep 2025 19:41:51 +0000 Subject: [PATCH 6/6] Silence a difficult coverage warning Python pytest coverage complains about a line not being covered where a function is being mocked. I don't see a way to actually test this, so for now I added a pragma no cover. --- src/openfermion/utils/operator_utils_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openfermion/utils/operator_utils_test.py b/src/openfermion/utils/operator_utils_test.py index 181d9d486..6d436565d 100644 --- a/src/openfermion/utils/operator_utils_test.py +++ b/src/openfermion/utils/operator_utils_test.py @@ -619,7 +619,7 @@ def mock_getsize(path): if path == self.file_path: return _MAX_BINARY_OPERATOR_DATA + 1 - return original_getsize(path) + return original_getsize(path) # pragma: no cover os.path.getsize = mock_getsize try: