google · mattyclarkson · Oct 31, 2013 · Oct 17, 2013
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+__pycache__
+*.pyc
diff --git a/cpplint/cpplint.py b/cpplint/cpplint.py
@@ -52,6 +52,21 @@
 import sys
 import unicodedata
 
+Py3k = (sys.version_info[0] == 3)
+"""A boolean to check if we are running Python3000"""
+
+try:
+    xrange(0,1)
+except NameError:
+    xrange = range
+try:
+    unicode
+except NameError:
+    basestring = unicode = str
+try:
+    long
+except NameError:
+    long = int
 
 _USAGE = """
 Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
@@ -840,7 +855,11 @@ def IncrementErrorCount(self, category):
 
   def PrintErrorCounts(self):
     """Print a summary of errors by category, and the total."""
-    for category, count in self.errors_by_category.iteritems():
+    try:
+        items = self.errors_by_category.iteritems()
+    except AttributeError:
+        items = self.errors_by_category.items()
+    for category, count in items:
       sys.stderr.write('Category \'%s\' errors found: %d\n' %
                        (category, count))
     sys.stderr.write('Total errors found: %d\n' % self.error_count)
@@ -1815,7 +1834,7 @@ def CheckForBadCharacters(filename, lines, error):
     error: The function to call with any errors found.
   """
   for linenum, line in enumerate(lines):
-    if u'\ufffd' in line:
+    if unicode(b'\xef\xbf\xbd', 'utf-8') in line:
       error(filename, linenum, 'readability/utf8', 5,
             'Line contains invalid UTF-8 (or Unicode replacement character).')
     if '\0' in line:
@@ -4701,7 +4720,10 @@ def _GetTextInside(text, start_pattern):
 
   # Give opening punctuations to get the matching close-punctuations.
   matching_punctuation = {'(': ')', '{': '}', '[': ']'}
-  closing_punctuation = set(matching_punctuation.itervalues())
+  try:
+    closing_punctuation = set(matching_punctuation.values())
+  except AttributeError:
+    closing_punctuation = set(matching_punctuation.itervalues())
 
   # Find the position to start extracting text.
   match = re.search(start_pattern, text, re.M)
@@ -5671,7 +5693,7 @@ def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
 
   # include_dict is modified during iteration, so we iterate over a copy of
   # the keys.
-  header_keys = include_dict.keys()
+  header_keys = list(include_dict.keys())
   for header in header_keys:
     (same_module, common_path) = FilesBelongToSameModule(abs_filename, header)
     fullpath = common_path + header
@@ -6306,10 +6328,11 @@ def main():
 
   # Change stderr to write with replacement characters so we don't die
   # if we try to print something containing non-ASCII characters.
-  sys.stderr = codecs.StreamReaderWriter(sys.stderr,
-                                         codecs.getreader('utf8'),
-                                         codecs.getwriter('utf8'),
-                                         'replace')
+  if not Py3k:
+      sys.stderr = codecs.StreamReaderWriter(sys.stderr,
+                                             codecs.getreader('utf8'),
+                                             codecs.getwriter('utf8'),
+                                             'replace')
 
   _cpplint_state.ResetErrorCounts()
   for filename in filenames:
@@ -6321,3 +6344,4 @@ def main():
 
 if __name__ == '__main__':
   main()
+
diff --git a/cpplint/cpplint_unittest.py b/cpplint/cpplint_unittest.py
@@ -41,7 +41,26 @@
 import unittest
 
 import cpplint
+import sys
 
+try:
+    xrange(0,1)
+except NameError:
+    xrange = range
+try:
+    unicode
+except NameError:
+    basestring = unicode = str
+try:
+    long
+except NameError:
+    long = int
+Py3k = (sys.version_info[0] == 3)
+if Py3k:
+  chrstr = bytes
+else:
+  def chrstr(l):
+    return ''.join([chr(x) for x in l])
 
 # This class works as an error collector and replaces cpplint.Error
 # function for the unit tests.  We also verify each category we see
@@ -305,8 +324,11 @@ def testFalsePositivesNoError(self):
   # Test get line width.
   def testGetLineWidth(self):
     self.assertEquals(0, cpplint.GetLineWidth(''))
-    self.assertEquals(10, cpplint.GetLineWidth(u'x' * 10))
-    self.assertEquals(16, cpplint.GetLineWidth(u'都|道|府|県|支庁'))
+    self.assertEquals(10, cpplint.GetLineWidth(unicode('x') * 10))
+    try:
+      self.assertEquals(16, cpplint.GetLineWidth('都|道|府|県|支庁'.decode('utf-8')))
+    except AttributeError:
+      self.assertEquals(16, cpplint.GetLineWidth('都|道|府|県|支庁'))
 
   def testGetTextInside(self):
     self.assertEquals('', cpplint._GetTextInside('fun()', r'fun\('))
@@ -2928,7 +2950,7 @@ def DoTest(self, raw_bytes, has_invalid_utf8):
       error_collector = ErrorCollector(self.assert_)
       cpplint.ProcessFileData(
           'foo.cc', 'cc',
-          unicode(raw_bytes, 'utf8', 'replace').split('\n'),
+          raw_bytes.decode('utf-8', 'replace').split('\n'),
           error_collector)
       # The warning appears only once.
       self.assertEquals(
@@ -2938,12 +2960,19 @@ def DoTest(self, raw_bytes, has_invalid_utf8):
               ' (or Unicode replacement character).'
               '  [readability/utf8] [5]'))
 
-    DoTest(self, 'Hello world\n', False)
-    DoTest(self, '\xe9\x8e\xbd\n', False)
-    DoTest(self, '\xe9x\x8e\xbd\n', True)
+    # For Python 2/3 compatibility we must use the chrstr shim to create the
+    # the byte strings because Python3 automatically trys to encode it to
+    # UTF-8. Normal strings must be encoded to ascii to make the DoTest
+    # function correctly work on Python3
+    DoTest(self, 'Hello world\n'.encode('ascii'), False)
+    #                  '\xe9 \x8e \xbd  \n'
+    DoTest(self, chrstr([233, 142, 189, 10]), False)
+    #                  '\xe9   x  \x8e \xbd  \n'
+    DoTest(self, chrstr([233, 120, 142, 189, 10]), True)
     # This is the encoding of the replacement character itself (which
     # you can see by evaluating codecs.getencoder('utf8')(u'\ufffd')).
-    DoTest(self, '\xef\xbf\xbd\n', True)
+    #                  '\xef \xbf \xbd  \n'
+    DoTest(self, chrstr([239, 191, 189, 10]), True)
 
   def testBadCharacters(self):
     # Test for NUL bytes only
@@ -2961,7 +2990,7 @@ def testBadCharacters(self):
     cpplint.ProcessFileData(
         'nul_utf8.cc', 'cc',
         ['// Copyright 2014 Your Company.',
-         unicode('\xe9x\0', 'utf8', 'replace'), ''],
+         chrstr([233, 120, 0]).decode('utf-8', 'replace'), ''],
         error_collector)
     self.assertEquals(
         error_collector.Results(),