Merge branch 'master' into master

blag · May 10, 2017 · e6dc5f7 · e6dc5f7
2 parents 6ad5c71 + 5bed9ac
commit e6dc5f7
Show file tree

Hide file tree

Showing 7 changed files with 198 additions and 94 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,7 +1,9 @@
 *.egg-info/
 *.pyc
 *.pyo
+.coverage
 .DS_Store
 MANIFEST
 build/
 dist/
+htmlcov/
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,17 @@
+sudo: false
+language: python
+python:
+  - "2.7"
+  # Coverage doesn't support Python 3.2
+  - "3.3"
+  - "3.4"
+  - "3.5"
+  - "3.6"
+install: pip install coverage coveralls flake8
+script:
+  - flake8 humanhash.py
+  - coverage run humanhash.py
+after_script:
+  - coverage report
+  - coverage combine
+  - coveralls
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,2 @@
+include humanhash.py
+include README.rst
diff --git a/README.md b/README.md
diff --git a/README.rst b/README.rst
@@ -0,0 +1,69 @@
+humanhash
+=========
+
+humanhash provides human-readable representations of digests.
+
+.. image:: https://img.shields.io/travis/blag/humanhash.svg
+    :target: https://travis-ci.org/blag/humanhash
+
+.. image:: https://img.shields.io/coveralls/blag/humanhash.svg
+    :target: https://coveralls.io/github/blag/humanhash
+
+.. image:: https://img.shields.io/pypi/v/humanhash3.svg
+    :target: https://pypi.python.org/pypi/humanhash3
+
+.. image:: https://img.shields.io/pypi/l/humanhash3.svg
+    :target: https://github.com/blag/humanhash/blob/master/UNLICENSE
+
+.. image:: https://img.shields.io/pypi/pyversions/humanhash3.svg
+    :target: https://github.com/blag/humanhash/blob/master/.travis.yml
+
+Example
+-------
+
+.. code-block:: python
+
+    >>> import humanhash
+
+    >>> digest = '7528880a986c40e78c38115e640da2a1'
+    >>> humanhash.humanize(digest)
+    'three-georgia-xray-jig'
+    >>> humanhash.humanize(digest, words=6)
+    'high-mango-white-oregon-purple-charlie'
+
+    >>> humanhash.uuid()
+    ('potato-oranges-william-friend', '9d2278759ae24698b1345525bd53358b')
+
+Caveats
+-------
+
+Don’t store the humanhash output, as its statistical uniqueness is only
+around 1 in 4.3 billion. Its intended use is as a human-readable (and,
+most importantly, **memorable**) representation of a longer digest,
+unique enough for display in a user interface, where a user may need to
+remember or verbally communicate the identity of a hash, without having
+to remember a 40-character hexadecimal sequence. Nevertheless, you
+should keep original digests around, then pass them through
+``humanize()`` only as you’re displaying them.
+
+How It Works
+------------
+
+The procedure for generating a humanhash involves compressing the input
+to a fixed length (default: 4 bytes), then mapping each of these bytes
+to a word in a pre-defined wordlist (a default wordlist is supplied with
+the library). This algorithm is consistent, so the same input, given the
+same wordlist, will always give the same output. You can also use your
+own wordlist, and specify a different number of words for output.
+
+Inspiration
+-----------
+
+- `Chroma-Hash`_ - A human-viewable representation of a hash (albeit not
+  one that can be output on a terminal, or shouted down a hallway).
+- `The NATO Phonetic Alphabet`_ - A great example of the trade-off
+  between clarity of human communication and byte-wise efficiency of
+  representation.
+
+.. _Chroma-Hash: http://mattt.github.com/Chroma-Hash/
+.. _The NATO Phonetic Alphabet: http://en.wikipedia.org/wiki/NATO_phonetic_alphabet
diff --git a/humanhash.py b/humanhash.py
@@ -8,6 +8,21 @@
 import operator
 import uuid as uuidlib
 import math
+import sys
+
+if sys.version_info.major == 3:
+    # Map returns an iterator in PY3K
+    py3_map = map
+
+    def map(*args, **kwargs):
+        return [i for i in py3_map(*args, **kwargs)]
+
+    # Functionality of xrange is in range now
+    xrange = range
+
+    # Reduce moved to functools
+    # http://www.artima.com/weblogs/viewpost.jsp?thread=98196
+    from functools import reduce
 
 
 DEFAULT_WORDLIST = (
@@ -49,6 +64,12 @@
     'zulu')
 
 
+# Use a simple XOR checksum-like function for compression.
+# checksum = lambda _bytes: reduce(operator.xor, _bytes, 0)
+def checksum(checksum_bytes):
+    return reduce(operator.xor, checksum_bytes, 0)
+
+
 class HumanHasher(object):
 
     """
@@ -65,12 +86,35 @@ class HumanHasher(object):
     """
 
     def __init__(self, wordlist=DEFAULT_WORDLIST):
+        """
+            >>> HumanHasher(wordlist=[])
+            Traceback (most recent call last):
+              ...
+            ValueError: Wordlist must have exactly 256 items
+        """
         if len(wordlist) != 256:
-            raise ArgumentError("Wordlist must have exactly 256 items")
+            raise ValueError("Wordlist must have exactly 256 items")
         self.wordlist = wordlist
 
-    def humanize(self, hexdigest, words=4, separator='-'):
+    def humanize_list(self, hexdigest, words=4):
+        """
+        Human a given hexadecimal digest, returning a list of words.
+
+        Change the number of words output by specifying `words`.
+
+            >>> digest = '60ad8d0d871b6095808297'
+            >>> HumanHasher().humanize_list(digest)
+            ['sodium', 'magnesium', 'nineteen', 'hydrogen']
+        """
+        # Gets a list of byte values between 0-255.
+        bytes_ = map(lambda x: int(x, 16),
+                     map(''.join, zip(hexdigest[::2], hexdigest[1::2])))
+        # Compress an arbitrary number of bytes to `words`.
+        compressed = self.compress(bytes_, words)
+
+        return [str(self.wordlist[byte]) for byte in compressed]
 
+    def humanize(self, hexdigest, words=4, separator='-'):
         """
         Humanize a given hexadecimal digest.
 
@@ -80,33 +124,33 @@ def humanize(self, hexdigest, words=4, separator='-'):
             >>> digest = '60ad8d0d871b6095808297'
             >>> HumanHasher().humanize(digest)
             'sodium-magnesium-nineteen-hydrogen'
+            >>> HumanHasher().humanize(digest, words=6)
+            'hydrogen-pasta-mississippi-august-may-lithium'
+            >>> HumanHasher().humanize(digest, separator='*')
+            'sodium*magnesium*nineteen*hydrogen'
         """
-
-        # Gets a list of byte values between 0-255.
-        bytes = map(lambda x: int(x, 16),
-                    map(''.join, zip(hexdigest[::2], hexdigest[1::2])))
-        # Compress an arbitrary number of bytes to `words`.
-        compressed = self.compress(bytes, words)
         # Map the compressed byte values through the word list.
-        return separator.join(self.wordlist[byte] for byte in compressed)
+        return separator.join(self.humanize_list(hexdigest, words))
 
     @staticmethod
-    def compress(bytes, target):
+    def compress(bytes_, target):
 
         """
         Compress a list of byte values to a fixed target length.
 
-            >>> bytes = [96, 173, 141, 13, 135, 27, 96, 149, 128, 130, 151]
-            >>> HumanHasher.compress(bytes, 4)
+            >>> bytes_ = [96, 173, 141, 13, 135, 27, 96, 149, 128, 130, 151]
+            >>> list(HumanHasher.compress(bytes_, 4))
             [205, 128, 156, 96]
 
         If there are less than the target number bytes, the input bytes will be returned
 
-            >>> HumanHasher.compress(bytes, 15)
+            >>> HumanHasher.compress(bytes_, 15)  # doctest: +ELLIPSIS
             [96, 173, 141, 13, 135, 27, 96, 149, 128, 130, 151]
         """
 
-        length = len(bytes)
+        bytes_list = list(bytes_)
+
+        length = len(bytes_list)
         # If there are less than the target number bytes, the input bytes will be returned
         if target >= length:
             return bytes
@@ -119,7 +163,7 @@ def compress(bytes, target):
         seg_num = 0
 
         # Use a simple XOR checksum-like function for compression
-        for i, byte in enumerate(bytes):
+        for i, byte in enumerate(bytes_list):
             # Divide the byte index by the segment size to determine which segment to place it in
             # Floor to create a valid segment index
             # Min to ensure the index is within `target`
@@ -136,12 +180,30 @@ def uuid(self, **params):
 
         Returns `(human_repr, full_digest)`. Accepts the same keyword arguments
         as :meth:`humanize` (they'll be passed straight through).
-        """
 
+            >>> import re
+            >>> hh = HumanHasher()
+            >>> result = hh.uuid()
+            >>> type(result) == tuple
+            True
+            >>> bool(re.match(r'^(\w+-){3}\w+$', result[0]))
+            True
+            >>> bool(re.match(r'^[0-9a-f]{32}$', result[1]))
+            True
+        """
         digest = str(uuidlib.uuid4()).replace('-', '')
         return self.humanize(digest, **params), digest
 
 
 DEFAULT_HASHER = HumanHasher()
 uuid = DEFAULT_HASHER.uuid
-humanize = DEFAULT_HASHER.humanize
+
+humanize = DEFAULT_HASHER.humanize
+humanize_list = DEFAULT_HASHER.humanize_list
+
+if __name__ == "__main__":
+    import doctest
+    # http://stackoverflow.com/a/25691978/6461688
+    # This will force Python to exit with the number of failing tests as the
+    # exit code, which should be interpreted as a failing test by Travis.
+    sys.exit(doctest.testmod())
diff --git a/setup.py b/setup.py
@@ -3,12 +3,38 @@
 
 from distutils.core import setup
 
+with open('README.rst', 'r') as f:
+    long_description = f.read()
+
 setup(
-    name='humanhash',
-    version='0.0.1',
+    name='humanhash3',
+    version='0.0.5',
     description='Human-readable representations of digests.',
+    long_description=long_description,
     author='Zachary Voase',
     author_email='[email protected]',
-    url='http://github.com/zacharyvoase/humanhash',
+    url='https://github.com/blag/humanhash',
     py_modules=['humanhash'],
+    license='Public Domain',
+    classifiers=[
+        'Development Status :: 3 - Alpha',
+        'Intended Audience :: Developers',
+        'Intended Audience :: End Users/Desktop',
+        'Topic :: Security',
+        'Topic :: Utilities',
+
+        # Pick your license as you wish (should match "license" above)
+        'License :: Public Domain',
+
+        # Specify the Python versions you support here. In particular, ensure
+        # that you indicate whether you support Python 2, Python 3 or both.
+        'Programming Language :: Python :: 2',
+        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3',
+        # 'Programming Language :: Python :: 3.2',  # Not tested
+        'Programming Language :: Python :: 3.3',
+        'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
+    ],
 )