Small performance improvements

cdgriffith · May 21, 2024 · e0d70ed · e0d70ed
1 parent f445e2e
commit e0d70ed
Show file tree

Hide file tree

Showing 7 changed files with 21 additions and 29 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -17,7 +17,6 @@ repos:
     - id: check-docstring-first
     - id: debug-statements
     - id: requirements-txt-fixer
-    - id: fix-encoding-pragma
     - id: fix-byte-order-marker
     # General quality checks
     - id: mixed-line-ending

diff --git a/puremagic/__init__.py b/puremagic/__init__.py
@@ -1,4 +1,3 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
 from puremagic.main import __version__, __author__
 from puremagic.main import *
diff --git a/puremagic/__main__.py b/puremagic/__main__.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
 from puremagic.main import command_line_entry
 
 command_line_entry()
diff --git a/puremagic/main.py b/puremagic/main.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
 """
 puremagic is a pure python module that will identify a file based off it's
 magic numbers. It is designed to be minimalistic and inherently cross platform
@@ -14,12 +13,11 @@
 """
 from __future__ import annotations
 
-import os
 import json
-import binascii
-from itertools import chain
+import os
+from binascii import unhexlify
 from collections import namedtuple
-from typing import Union, Tuple, List, Dict, Optional
+from itertools import chain
 
 __author__ = "Chris Griffith"
 __version__ = "1.23"
@@ -82,15 +80,13 @@ def _magic_data(
     extensions = [_create_puremagic(x) for x in data["extension_only"]]
     multi_part_extensions = {}
     for file_match, option_list in data["multi-part"].items():
-        multi_part_extensions[binascii.unhexlify(file_match.encode("ascii"))] = [
-            _create_puremagic(x) for x in option_list
-        ]
+        multi_part_extensions[unhexlify(file_match.encode("ascii"))] = [_create_puremagic(x) for x in option_list]
     return headers, footers, extensions, multi_part_extensions
 
 
 def _create_puremagic(x: list) -> PureMagic:
     return PureMagic(
-        byte_match=binascii.unhexlify(x[0].encode("ascii")),
+        byte_match=unhexlify(x[0].encode("ascii")),
         offset=x[1],
         extension=x[2],
         mime_type=x[3],
@@ -120,15 +116,17 @@ def _confidence(matches, ext=None) -> list[PureMagicWithConfidence]:
     """Rough confidence based on string length and file extension"""
     results = []
     for match in matches:
-        con = 0.8 if len(match.byte_match) >= 9 else float("0.{0}".format(len(match.byte_match)))
+        con = 0.8 if len(match.byte_match) >= 9 else float(f"0.{len(match.byte_match)}")
         if con >= 0.1 and ext and ext == match.extension:
             con = 0.9
         results.append(PureMagicWithConfidence(confidence=con, **match._asdict()))
 
     if not results and ext:
-        for magic_row in extension_only_array:
-            if ext == magic_row.extension:
-                results.append(PureMagicWithConfidence(confidence=0.1, **magic_row._asdict()))
+        results = [
+            PureMagicWithConfidence(confidence=0.1, **magic_row._asdict())
+            for magic_row in extension_only_array
+            if ext == magic_row.extension
+        ]
 
     if not results:
         raise PureError("Could not identify file")
@@ -141,7 +139,7 @@ def _identify_all(header: bytes, footer: bytes, ext=None) -> list[PureMagicWithC
 
     # Capture the length of the data
     # That way we do not try to identify bytes that don't exist
-    matches = list()
+    matches = []
     for magic_row in magic_header_array:
         start = magic_row.offset
         end = magic_row.offset + len(magic_row.byte_match)
@@ -210,7 +208,7 @@ def _file_details(filename: os.PathLike | str) -> tuple[bytes, bytes]:
         head = fin.read(max_head)
         try:
             fin.seek(-max_foot, os.SEEK_END)
-        except IOError:
+        except OSError:
             fin.seek(0)
         foot = fin.read()
     return head, foot
@@ -242,7 +240,7 @@ def ext_from_filename(filename: os.PathLike | str) -> str:
         base, ext = str(filename).lower().rsplit(".", 1)
     except ValueError:
         return ""
-    ext = ".{0}".format(ext)
+    ext = f".{ext}"
     all_exts = [x.extension for x in chain(magic_header_array, magic_footer_array)]
 
     if base[-4:].startswith("."):
@@ -379,12 +377,12 @@ def command_line_entry(*args):
 
     for fn in args.files:
         if not os.path.exists(fn):
-            print("File '{0}' does not exist!".format(fn))
+            print(f"File '{fn}' does not exist!")
             continue
         try:
-            print("'{0}' : {1}".format(fn, from_file(fn, args.mime)))
+            print(f"'{fn}' : {from_file(fn, args.mime)}")
         except PureError:
-            print("'{0}' : could not be Identified".format(fn))
+            print(f"'{fn}' : could not be Identified")
 
 
 if __name__ == "__main__":

diff --git a/scripts/parse_ftk_kessler_sigs.py b/scripts/parse_ftk_kessler_sigs.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 """
 This is a very ugly helper script to keep up to date with file types in
 Gary C. Kessler's FTK_sigs_GCK archive.
@@ -60,6 +59,6 @@
     elif sig["SIG"] not in known_sigs:
         for ext in sig["EXT_NAME"]:
             if ext != "(none)":
-                print("\t\t{},".format(json.dumps([sig["SIG"], offset, ".{}".format(ext), "", sig["DESCRIPTION"]])))
+                print("\t\t{},".format(json.dumps([sig["SIG"], offset, f".{ext}", "", sig["DESCRIPTION"]])))
             else:
                 print("\t\t{},".format(json.dumps([sig["SIG"], offset, "", "", sig["DESCRIPTION"]])))
diff --git a/setup.py b/setup.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
 
 from setuptools import setup
 import os
@@ -8,12 +7,12 @@
 
 root = os.path.abspath(os.path.dirname(__file__))
 
-with open(os.path.join(root, "puremagic", "main.py"), "r") as reuse_file:
+with open(os.path.join(root, "puremagic", "main.py")) as reuse_file:
     reuse_content = reuse_file.read()
 
 attrs = dict(re.findall(r"__([a-z]+)__ *= *['\"](.+)['\"]", reuse_content))
 
-with open("README.rst", "r") as readme_file:
+with open("README.rst") as readme_file:
     long_description = readme_file.read()
 
 setup(

diff --git a/test/test_common_extensions.py b/test/test_common_extensions.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import unittest
 from tempfile import NamedTemporaryFile
 import os
@@ -55,7 +54,7 @@ def group_test(self, directory):
         if ext_failures:
             raise AssertionError(
                 "The following files did not have the expected extensions: {}".format(
-                    ", ".join(['"{}" expected "{}"'.format(item, ext) for item, ext in ext_failures])
+                    ", ".join([f'"{item}" expected "{ext}"' for item, ext in ext_failures])
                 )
             )
         if mime_failures: