cea-hpc · qa-cea · Aug 26, 2025 · Aug 26, 2025 · Aug 26, 2025 · valeriyoann
diff --git a/lib/rift/Annex.py b/lib/rift/Annex.py
@@ -90,7 +90,7 @@ def is_binary(filepath, blocksize=65536):
 
 def hashfile(filepath, iosize=65536):
     """Compute a digest of filepath content."""
-    hasher = hashlib.md5()
+    hasher = hashlib.sha3_256()
     with open(filepath, 'rb') as srcfile:
         buf = srcfile.read(iosize)
         while len(buf) > 0:
@@ -123,10 +123,20 @@ def is_pointer(cls, filepath):
         identifier.
         """
         meta = os.stat(filepath)
+
+        # MD5
         if meta.st_size == 32:
+            logging.warning("Using deprecated hash algorithm (MD5)")
             with open(filepath, encoding='utf-8') as fh:
                 identifier = fh.read(32)
             return all(byte in string.hexdigits for byte in identifier)
+
+        # SHA3 256
+        elif meta.st_size == 64:
+            with open(filepath, encoding='utf-8') as fh:
+                identifier = fh.read(64)
+            return all(byte in string.hexdigits for byte in identifier)
+
         return False
 
     def get(self, identifier, destpath):
@@ -226,13 +236,29 @@ def list(self):
             if not filename.endswith('.info'):
                 info = self._load_metadata(filename)
                 names = info.get('filenames', [])
-                for annexed_file in names.values():
-                    insertion_time = annexed_file['date']
-                    insertion_time = datetime.datetime.strptime(insertion_time, "%c").timestamp()
-
-                #The file size must come from the filesystem
-                meta = os.stat(os.path.join(self.path, filename))
-                yield filename, meta.st_size, insertion_time, names
+                for annexed_file, details in names.items():
+                    insertion_time = details['date']
+
+                    # Handle different date formats (old method)
+                    if isinstance(insertion_time, str):
+                        for fmt in ('%a %b %d %H:%M:%S %Y', '%a %d %b %Y %H:%M:%S %p %Z'):
+                            try:
+                                insertion_time = datetime.datetime.strptime(insertion_time, fmt).timestamp()
+                                break
+                            except ValueError:
+                                continue
+                        else:
+                            raise ValueError(f"Invalid date format in metadata: {insertion_time}")
+
+                    # UNIX timestamp
+                    elif isinstance(insertion_time, (int, float)):
+                        insertion_time = insertion_time
+                    else:
+                        raise ValueError("Invalid date format in metadata")
+
+                    # The file size must come from the filesystem
+                    meta = os.stat(os.path.join(self.path, filename))
+                    yield filename, meta.st_size, insertion_time, [annexed_file]
 
     def push(self, filepath):
         """
@@ -254,21 +280,21 @@ def push(self, filepath):
         destinfo = None
         if os.path.exists(destpath):
             destinfo = os.stat(destpath)
-        if destinfo and destinfo.st_size == originfo.st_size and \
-          filename in metadata.get('filenames', {}):
-            logging.debug('%s is already into annex, skipping it', filename)
-
-        else:
-            # Update them and write them back
-            fileset = metadata.setdefault('filenames', {})
-            fileset.setdefault(filename, {})
-            fileset[filename]['date'] = time.strftime("%c")
-            self._save_metadata(digest, metadata)
-
-            # Move binary file to annex
-            logging.debug('Importing %s into annex (%s)', filepath, digest)
-            shutil.copyfile(filepath, destpath)
-            os.chmod(destpath, self.WMODE)
+            if destinfo and destinfo.st_size == originfo.st_size and \
+            filename in metadata.get('filenames', {}):
+                logging.debug('%s is already into annex, skipping it', filename)
+                return
+
+        # Update them and write them back
+        fileset = metadata.setdefault('filenames', {})
+        fileset.setdefault(filename, {})
+        fileset[filename]['date'] = time.time()  # Unix timestamp
+        self._save_metadata(digest, metadata)
+
+        # Move binary file to annex
+        logging.debug('Importing %s into annex (%s)', filepath, digest)
+        shutil.copyfile(filepath, destpath)
+        os.chmod(destpath, self.WMODE)
 
         # Verify permission are correct before copying
         os.chmod(filepath, self.RMODE)

diff --git a/tests/Annex.py b/tests/Annex.py
@@ -192,22 +192,17 @@ def test_delete(self):
             self.annex.get_by_path(source_file.name, '/dev/null')
 
     def test_list(self):
-        """ Test list method """
+        """Test the list method"""
 
         source_size = os.stat(self.source.name).st_size
-        source_insertion_time =  datetime.datetime.strptime(time.strftime('%c'), '%c').timestamp()
-        # Get the current time with the %c format and convert it to unix timestamp to
-        # have the same method as annex.list (in terms of precision)
+        source_insertion_time = time.time()
         self.annex.push(self.source.name)
 
-        # Check if the file pointer is present in the annex list output
-        # by checking it's attributes
         for filename, size, insertion_time, names in self.annex.list():
             self.assertEqual(get_digest_from_path(self.source.name), filename)
             self.assertEqual(source_size, size)
-            # As tests can take time to run, accept less or equal 1 second shift
-            self.assertAlmostEqual(source_insertion_time, insertion_time, delta=1)
-            self.assertTrue(os.path.basename(self.source.name) in names.keys())
+            self.assertAlmostEqual(source_insertion_time, insertion_time, delta=1) # delta for potentials delay
+        self.assertTrue(os.path.basename(self.source.name) in names)
 
     def test_push(self):
         """ Test push method """