diff --git a/nltk_contrib/align/__init__.py b/nltk_contrib/align/__init__.py
index 8929336..2eed021 100644
--- a/nltk_contrib/align/__init__.py
+++ b/nltk_contrib/align/__init__.py
@@ -9,8 +9,8 @@
Classes and interfaces for aligning text.
"""
-from api import *
-from gale_church import *
+from .api import *
+from .gale_church import *
__all__ = []
diff --git a/nltk_contrib/align/align.py b/nltk_contrib/align/align.py
index c0e85dc..0ba4b09 100644
--- a/nltk_contrib/align/align.py
+++ b/nltk_contrib/align/align.py
@@ -7,16 +7,16 @@
# For license information, see LICENSE.TXT
import sys
-from itertools import izip
+
from nltk.metrics import scores
## --NLTK--
## Import the nltk.aligner module, which defines the aligner interface
-from api import *
+from .api import *
-import distance_measures
-import align_util
+from . import distance_measures
+from . import align_util
# Based on Gale & Church 1993, "A Program for Aligning Sentences in Bilingual Corpora"
# This is a Python version of the C implementation by Mike Riley presented in the appendix
@@ -82,10 +82,10 @@ def get_delimited_regions(self, base_type, input_file1, input_file2, hard_delimi
hard_regions2 = align_util.get_paragraphs_sentences(lines2, hard_delimiter, soft_delimiter)
if (len(hard_regions1) != len(hard_regions2)):
- print "align_regions: input files do not contain the same number of hard regions" + '\n'
- print "%s" % hard_delimiter + '\n'
- print "%s has %d and %s has %d" % (input_file1, len(hard_regions1), \
- input_file2, len(hard_regions2) + '\n')
+ print(("align_regions: input files do not contain the same number of hard regions" + '\n'))
+ print(("%s" % hard_delimiter + '\n'))
+ print(("%s has %d and %s has %d" % (input_file1, len(hard_regions1), \
+ input_file2, len(hard_regions2) + '\n')))
return ([],[])
return (hard_regions1, hard_regions2)
@@ -154,7 +154,7 @@ def _seq_align(self, x, y, nx, ny):
path_x = [[0] * second_len for c in range(first_len)]
path_y = [[0] * second_len for c in range(first_len)]
- d1 = d2 = d3 = d4 = d5 = d6 = sys.maxint
+ d1 = d2 = d3 = d4 = d5 = d6 = sys.maxsize
for j in range(0, ny + 1):
for i in range(0, nx + 1):
@@ -163,46 +163,46 @@ def _seq_align(self, x, y, nx, ny):
d1 = distances[i-1][j-1] + \
self.dist_funct(x[i-1], y[j-1], 0, 0)
else:
- d1 = sys.maxint
+ d1 = sys.maxsize
if (i > 0):
#/* deletion */
d2 = distances[i-1][j] + \
self.dist_funct(x[i-1], 0, 0, 0)
else:
- d2 = sys.maxint
+ d2 = sys.maxsize
if (j > 0):
#/* insertion */
d3 = distances[i][j-1] + \
self.dist_funct(0, y[j-1], 0, 0)
else:
- d3 = sys.maxint
+ d3 = sys.maxsize
if (i > 1 and j > 0):
#/* contraction */
d4 = distances[i-2][j-1] + \
self.dist_funct(x[i-2], y[j-1], x[i-1], 0)
else:
- d4 = sys.maxint
+ d4 = sys.maxsize
if (i > 0 and j > 1):
#/* expansion */
d5 = distances[i-1][j-2] + \
self.dist_funct(x[i-1], y[j-2], 0, y[j-1])
else:
- d5 = sys.maxint
+ d5 = sys.maxsize
if (i > 1 and j > 1):
#/* melding */
d6 = distances[i-2][j-2] + \
self.dist_funct(x[i-2], y[j-2], x[i-1], y[j-1])
else:
- d6 = sys.maxint
+ d6 = sys.maxsize
dmin = min(d1, d2, d3, d4, d5, d6)
- if (dmin == sys.maxint):
+ if (dmin == sys.maxsize):
distances[i][j] = 0
elif (dmin == d1):
distances[i][j] = d1
@@ -341,7 +341,7 @@ def _seq_align_extended(self, x, y, nx, ny):
path_x = [[0] * second_len for c in range(first_len)]
path_y = [[0] * second_len for c in range(first_len)]
- d1 = d2 = d3 = d4 = d5 = d6 = d7 = d8 = d9 = d10 = d11 = sys.maxint
+ d1 = d2 = d3 = d4 = d5 = d6 = d7 = d8 = d9 = d10 = d11 = sys.maxsize
for j in range(0, ny + 1):
for i in range(0, nx + 1):
@@ -350,81 +350,81 @@ def _seq_align_extended(self, x, y, nx, ny):
d1 = distances[i-1][j-1] + \
self.dist_funct(x[i-1], y[j-1], 0, 0, 0, 0)
else:
- d1 = sys.maxint
+ d1 = sys.maxsize
if (i > 0):
#/* deletion */ /* 1-0 */
d2 = distances[i-1][j] + \
self.dist_funct(x[i-1], 0, 0, 0, 0, 0)
else:
- d2 = sys.maxint
+ d2 = sys.maxsize
if (j > 0):
#/* insertion */ /* 0-1 */
d3 = distances[i][j-1] + \
self.dist_funct(0, y[j-1], 0, 0, 0, 0)
else:
- d3 = sys.maxint
+ d3 = sys.maxsize
if (i > 1 and j > 0):
#/* contraction */ /* 2-1 */
d4 = distances[i-2][j-1] + \
self.dist_funct(x[i-2], y[j-1], x[i-1], 0, 0, 0)
else:
- d4 = sys.maxint
+ d4 = sys.maxsize
if (i > 0 and j > 1):
#/* expansion */ /* 1-2 */
d5 = distances[i-1][j-2] + \
self.dist_funct(x[i-1], y[j-2], 0, y[j-1], 0, 0)
else:
- d5 = sys.maxint
+ d5 = sys.maxsize
if (i > 1 and j > 1):
#/* melding */ /* 2-2 */
d6 = distances[i-2][j-2] + \
self.dist_funct(x[i-2], y[j-2], x[i-1], y[j-1], 0, 0)
else:
- d6 = sys.maxint
+ d6 = sys.maxsize
if (i > 2 and j > 0):
#/* contraction */ /* 3-1 */
d7 = distances[i-3][j-1] + \
self.dist_funct(x[i-3], y[j-1], x[i-2], 0, x[i-1], 0)
else:
- d7 = sys.maxint
+ d7 = sys.maxsize
if (i > 2 and j > 1):
#/* contraction */ /* 3-2 */
d8 = distances[i-3][j-2] + \
self.dist_funct(x[i-3], y[j-1], x[i-2], y[j-2], x[i-1], 0)
else:
- d8 = sys.maxint
+ d8 = sys.maxsize
if (i > 0 and j > 2):
#/* expansion */ /* 1-3 */
d9 = distances[i-1][j-3] + \
self.dist_funct(x[i-1], y[j-3], 0, y[j-2], 0, y[j-1])
else:
- d9 = sys.maxint
+ d9 = sys.maxsize
if (i > 1 and j > 2):
#/* expansion */ /* 2-3 */
d10 = distances[i-2][j-3] + \
self.dist_funct(x[i-3], y[j-3], x[i-2], y[j-2], 0, y[j-1])
else:
- d10 = sys.maxint
+ d10 = sys.maxsize
if (i > 2 and j > 2):
#/* melding */ /* 3-3 */
d11 = distances[i-3][j-3] + \
self.dist_funct(x[i-3], y[j-3], x[i-2], y[j-2], x[i-1], y[j-1])
else:
- d11 = sys.maxint
+ d11 = sys.maxsize
dmin = min(d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11)
- if (dmin == sys.maxint):
+ if (dmin == sys.maxsize):
distances[i][j] = 0
elif (dmin == d1):
distances[i][j] = d1
@@ -619,13 +619,13 @@ def demo_eval(alignments, gold_file):
"""
alignment_mappings = align_util2.get_alignment_links(alignments)
- print "Alignment mappings: %s" % alignment_mappings
+ print(("Alignment mappings: %s" % alignment_mappings))
#test_values = align_util.get_test_values(alignments)
reference_values = align_util2.get_reference_values(gold_file)
- print "Reference values: %s" % reference_values
+ print(("Reference values: %s" % reference_values))
#accuracy = scores.accuracy(reference_values, test_values)
@@ -653,7 +653,7 @@ def demo():
gc_alignment = gc.batch_align(regions1, regions2)
- print "Alignment0: %s" % gc_alignment
+ print(("Alignment0: %s" % gc_alignment))
demo_eval(gc_alignment, gold_file)
@@ -675,7 +675,7 @@ def demo():
gc_alignment = gc.batch_align(regions1, regions2)
- print "Alignment1: %s" % gc_alignment
+ print(("Alignment1: %s" % gc_alignment))
demo_eval(gc_alignment, gold_file)
@@ -694,7 +694,7 @@ def demo():
standard_alignment2 = std.batch_align(s2, t2)
- print "Alignment2: %s" % standard_alignment2
+ print(("Alignment2: %s" % standard_alignment2))
# demo 4
@@ -703,14 +703,14 @@ def demo():
standard_alignment3 = std.align(s3, t3)
- print "Alignment3: %s" % standard_alignment3
+ print(("Alignment3: %s" % standard_alignment3))
# demo 5
top_down_alignments = std.recursive_align(s3, t3)
for alignment in top_down_alignments:
- print "Top down align: %s" % alignment
+ print(("Top down align: %s" % alignment))
if __name__=='__main__':
demo()
diff --git a/nltk_contrib/align/align_regions.py b/nltk_contrib/align/align_regions.py
index bfaf51b..ba2876b 100755
--- a/nltk_contrib/align/align_regions.py
+++ b/nltk_contrib/align/align_regions.py
@@ -7,8 +7,8 @@
from nltk.metrics import scores
-import distance_measures
-import alignment_util
+from . import distance_measures
+from . import alignment_util
##//////////////////////////////////////////////////////
## Alignment
@@ -81,7 +81,7 @@ def set_alignment_mappings(self):
self.soft_regions_index)
self.alignment_mappings.append(align_triple)
else:
- print "not supported alignment type"
+ print("not supported alignment type")
##//////////////////////////////////////////////////////
## Aligner
@@ -132,10 +132,10 @@ def align_regions(self, dist_funct, debug=False, verbose=False):
(hard_regions2, number_of_hard_regions2) = tmp.find_sub_regions(self.hard_delimiter)
if (number_of_hard_regions1 != number_of_hard_regions2):
- print "align_regions: input files do not contain the same number of hard regions" + '\n'
- print "%s" % hard_delimiter + '\n'
- print "%s has %d and %s has %d" % (self.input_file1, number_of_hard_regions1, \
- self.input_file2, number_of_hard_regions2) + '\n'
+ print(("align_regions: input files do not contain the same number of hard regions" + '\n'))
+ print(("%s" % hard_delimiter + '\n'))
+ print(("%s has %d and %s has %d" % (self.input_file1, number_of_hard_regions1, \
+ self.input_file2, number_of_hard_regions2) + '\n'))
return
@@ -225,7 +225,7 @@ def seq_align(self, x, y, nx, ny, dist_funct, hard_regions_index):
path_x = [[0] * second_len for c in range(first_len)]
path_y = [[0] * second_len for c in range(first_len)]
- d1 = d2 = d3 = d4 = d5 = d6 = sys.maxint
+ d1 = d2 = d3 = d4 = d5 = d6 = sys.maxsize
for j in range(0, ny + 1):
for i in range(0, nx + 1):
@@ -234,46 +234,46 @@ def seq_align(self, x, y, nx, ny, dist_funct, hard_regions_index):
d1 = distances[i-1][j-1] + \
dist_funct(x[i-1], y[j-1], 0, 0)
else:
- d1 = sys.maxint
+ d1 = sys.maxsize
if (i > 0):
#/* deletion */
d2 = distances[i-1][j] + \
dist_funct(x[i-1], 0, 0, 0)
else:
- d2 = sys.maxint
+ d2 = sys.maxsize
if (j > 0):
#/* insertion */
d3 = distances[i][j-1] + \
dist_funct(0, y[j-1], 0, 0)
else:
- d3 = sys.maxint
+ d3 = sys.maxsize
if (i > 1 and j > 0):
#/* contraction */
d4 = distances[i-2][j-1] + \
dist_funct(x[i-2], y[j-1], x[i-1], 0)
else:
- d4 = sys.maxint
+ d4 = sys.maxsize
if (i > 0 and j > 1):
#/* expansion */
d5 = distances[i-1][j-2] + \
dist_funct(x[i-1], y[j-2], 0, y[j-1])
else:
- d5 = sys.maxint
+ d5 = sys.maxsize
if (i > 1 and j > 1):
#/* melding */
d6 = distances[i-2][j-2] + \
dist_funct(x[i-2], y[j-2], x[i-1], y[j-1])
else:
- d6 = sys.maxint
+ d6 = sys.maxsize
dmin = min(d1, d2, d3, d4, d5, d6)
- if (dmin == sys.maxint):
+ if (dmin == sys.maxsize):
distances[i][j] = 0
elif (dmin == d1):
distances[i][j] = d1
@@ -502,7 +502,7 @@ def demo_eval(alignments, gold_file):
accuracy = scores.accuracy(reference_values, test_values)
- print "accuracy: %.2f" % accuracy
+ print(("accuracy: %.2f" % accuracy))
def demo():
"""
diff --git a/nltk_contrib/align/align_util.py b/nltk_contrib/align/align_util.py
index 2e0271e..e0f1f9e 100644
--- a/nltk_contrib/align/align_util.py
+++ b/nltk_contrib/align/align_util.py
@@ -78,132 +78,132 @@ def get_character_lengths(region):
def print_alignment_text_mapping(alignment_mapping):
entry_num = 0
for entry in alignment_mapping:
- print "--------------------------------"
- print "Entry: %d" % entry_num
+ print("--------------------------------")
+ print(("Entry: %d" % entry_num))
entry_num = entry_num + 1
- print "%s" % str(entry[0])
- print "%s" % str(entry[1])
+ print(("%s" % str(entry[0])))
+ print(("%s" % str(entry[1])))
def print_alignment_index_mapping(alignment_mapping_indices):
entry_num = 0
for entry in alignment_mapping_indices:
- print "--------------------------------"
- print "Indices Entry: %d" % entry_num
+ print("--------------------------------")
+ print(("Indices Entry: %d" % entry_num))
entry_num = entry_num + 1
source = entry[0]
target = entry[1]
- print "%s" % str(source)
- print "%s" % str(target)
+ print(("%s" % str(source)))
+ print(("%s" % str(target)))
def print_alignments(alignments, hard_region1, hard_region2):
hard1_key = 0
hard2_key = 0
- for soft_key in alignments.keys():
+ for soft_key in list(alignments.keys()):
alignment = alignments[soft_key]
if (alignment.category == '1 - 1'):
- print "1-1: %s" % alignment.d
- print "--------------------------"
- print "%s" % hard_region1[hard1_key]
- print "%s" % hard_region2[hard2_key]
- print "--------------------------"
+ print(("1-1: %s" % alignment.d))
+ print("--------------------------")
+ print(("%s" % hard_region1[hard1_key]))
+ print(("%s" % hard_region2[hard2_key]))
+ print("--------------------------")
hard1_key = hard1_key + 1
hard2_key = hard2_key + 1
elif (alignment.category == '1 - 0'):
- print "1-0: %s" % alignment.d
- print "--------------------------"
- print "%s" % hard_region1[hard1_key]
- print "--------------------------"
+ print(("1-0: %s" % alignment.d))
+ print("--------------------------")
+ print(("%s" % hard_region1[hard1_key]))
+ print("--------------------------")
hard1_key = hard1_key + 1
elif (alignment.category == '0 - 1'):
- print "0-1: %s" % alignment.d
- print "--------------------------"
- print "%s" % hard_region2[hard2_key]
- print "--------------------------"
+ print(("0-1: %s" % alignment.d))
+ print("--------------------------")
+ print(("%s" % hard_region2[hard2_key]))
+ print("--------------------------")
hard2_key = hard2_key + 1
elif (alignment.category == '2 - 1'):
- print "2-1: %.2f" % alignment.d
- print "--------------------------"
- print "%s" % hard_region1[hard1_key]
- print "%s" % hard_region1[hard1_key + 1]
- print "%s" % hard_region2[hard2_key]
- print "--------------------------"
+ print(("2-1: %.2f" % alignment.d))
+ print("--------------------------")
+ print(("%s" % hard_region1[hard1_key]))
+ print(("%s" % hard_region1[hard1_key + 1]))
+ print(("%s" % hard_region2[hard2_key]))
+ print("--------------------------")
hard1_key = hard1_key + 2
hard2_key = hard2_key + 1
elif (alignment.category == '1 - 2'):
- print "1-2: %.2f" % alignment.d
- print "--------------------------"
- print "%s" % hard_region1[hard1_key]
- print "%s" % hard_region2[hard2_key]
- print "%s" % hard_region2[hard2_key + 1]
- print "--------------------------"
+ print(("1-2: %.2f" % alignment.d))
+ print("--------------------------")
+ print(("%s" % hard_region1[hard1_key]))
+ print(("%s" % hard_region2[hard2_key]))
+ print(("%s" % hard_region2[hard2_key + 1]))
+ print("--------------------------")
hard1_key = hard1_key + 1
hard2_key = hard2_key + 2
elif (alignment.category == '2 - 2'):
- print "2-2: %.2f" % alignment.d
- print "--------------------------"
- print "%s" % hard_region1[hard1_key]
- print "%s" % hard_region1[hard1_key + 1]
- print "%s" % hard_region2[hard2_key]
- print "%s" % hard_region2[hard2_key + 1]
- print "--------------------------"
+ print(("2-2: %.2f" % alignment.d))
+ print("--------------------------")
+ print(("%s" % hard_region1[hard1_key]))
+ print(("%s" % hard_region1[hard1_key + 1]))
+ print(("%s" % hard_region2[hard2_key]))
+ print(("%s" % hard_region2[hard2_key + 1]))
+ print("--------------------------")
hard1_key = hard1_key + 2
hard2_key = hard2_key + 2
elif (alignment.category == '3 - 1'):
- print "3-1: %.2f" % alignment.d
- print "--------------------------"
- print "%s" % hard_region1[hard1_key]
- print "%s" % hard_region1[hard1_key + 1]
- print "%s" % hard_region1[hard1_key + 2]
- print "%s" % hard_region2[hard2_key]
- print "--------------------------"
+ print(("3-1: %.2f" % alignment.d))
+ print("--------------------------")
+ print(("%s" % hard_region1[hard1_key]))
+ print(("%s" % hard_region1[hard1_key + 1]))
+ print(("%s" % hard_region1[hard1_key + 2]))
+ print(("%s" % hard_region2[hard2_key]))
+ print("--------------------------")
hard1_key = hard1_key + 3
hard2_key = hard2_key + 1
elif (alignment.category == '3 - 2'):
- print "3-2: %.2f" % alignment.d
- print "--------------------------"
- print "%s" % hard_region1[hard1_key]
- print "%s" % hard_region1[hard1_key + 1]
- print "%s" % hard_region1[hard1_key + 2]
- print "%s" % hard_region2[hard2_key]
- print "%s" % hard_region2[hard2_key + 1]
- print "--------------------------"
+ print(("3-2: %.2f" % alignment.d))
+ print("--------------------------")
+ print(("%s" % hard_region1[hard1_key]))
+ print(("%s" % hard_region1[hard1_key + 1]))
+ print(("%s" % hard_region1[hard1_key + 2]))
+ print(("%s" % hard_region2[hard2_key]))
+ print(("%s" % hard_region2[hard2_key + 1]))
+ print("--------------------------")
hard1_key = hard1_key + 3
hard2_key = hard2_key + 2
elif (alignment.category == '1 - 3'):
- print "1-3: %.2f" % alignment.d
- print "--------------------------"
- print "%s" % hard_region1[hard1_key]
- print "%s" % hard_region2[hard2_key]
- print "%s" % hard_region2[hard2_key + 1]
- print "%s" % hard_region2[hard2_key + 2]
- print "--------------------------"
+ print(("1-3: %.2f" % alignment.d))
+ print("--------------------------")
+ print(("%s" % hard_region1[hard1_key]))
+ print(("%s" % hard_region2[hard2_key]))
+ print(("%s" % hard_region2[hard2_key + 1]))
+ print(("%s" % hard_region2[hard2_key + 2]))
+ print("--------------------------")
hard1_key = hard1_key + 1
hard2_key = hard2_key + 3
elif (alignment.category == '2 - 3'):
- print "2-3: %.2f" % alignment.d
- print "--------------------------"
- print "%s" % hard_region1[hard1_key]
- print "%s" % hard_region1[hard1_key + 1]
- print "%s" % hard_region2[hard2_key]
- print "%s" % hard_region2[hard2_key + 1]
- print "%s" % hard_region2[hard2_key + 2]
- print "--------------------------"
+ print(("2-3: %.2f" % alignment.d))
+ print("--------------------------")
+ print(("%s" % hard_region1[hard1_key]))
+ print(("%s" % hard_region1[hard1_key + 1]))
+ print(("%s" % hard_region2[hard2_key]))
+ print(("%s" % hard_region2[hard2_key + 1]))
+ print(("%s" % hard_region2[hard2_key + 2]))
+ print("--------------------------")
hard1_key = hard1_key + 2
hard2_key = hard2_key + 3
elif (alignment.category == '3 - 3'):
- print "3-3: %.2f" % alignment.d
- print "--------------------------"
- print "%s" % hard_region1[hard1_key]
- print "%s" % hard_region1[hard1_key + 1]
- print "%s" % hard_region1[hard1_key + 2]
- print "%s" % hard_region2[hard2_key]
- print "%s" % hard_region2[hard2_key + 1]
- print "%s" % hard_region2[hard2_key + 2]
- print "--------------------------"
+ print(("3-3: %.2f" % alignment.d))
+ print("--------------------------")
+ print(("%s" % hard_region1[hard1_key]))
+ print(("%s" % hard_region1[hard1_key + 1]))
+ print(("%s" % hard_region1[hard1_key + 2]))
+ print(("%s" % hard_region2[hard2_key]))
+ print(("%s" % hard_region2[hard2_key + 1]))
+ print(("%s" % hard_region2[hard2_key + 2]))
+ print("--------------------------")
hard1_key = hard1_key + 3
hard2_key = hard2_key + 3
else:
- print "not supported alignment type"
+ print("not supported alignment type")
def list_to_str(input_list):
return input_list
@@ -214,7 +214,7 @@ def convert_bead_to_tuples(alignments, hard_region1, hard_region2):
alignment_mapping_indices = []
hard1_key = 0
hard2_key = 0
- for soft_key in alignments.keys():
+ for soft_key in list(alignments.keys()):
alignment = alignments[soft_key]
if (alignment.category == '1 - 1'):
align_tuple = (list_to_str(hard_region1[hard1_key]), list_to_str(hard_region2[hard2_key]))
@@ -311,7 +311,7 @@ def convert_bead_to_tuples(alignments, hard_region1, hard_region2):
hard1_key = hard1_key + 3
hard2_key = hard2_key + 3
else:
- print "not supported alignment type"
+ print("not supported alignment type")
return (alignment_mapping, alignment_mapping_indices)
@@ -320,7 +320,7 @@ def get_alignment_links(alignments):
hard_key = 0
for hard_list in alignments:
for alignment_dict in hard_list:
- for align_key in alignment_dict.keys():
+ for align_key in list(alignment_dict.keys()):
alignment = alignment_dict[align_key]
if (alignment.category == '1 - 1'):
@@ -366,15 +366,15 @@ def get_alignment_links(alignments):
align_key)
alignment_mappings.append(align_triple)
else:
- print "not supported alignment type"
+ print("not supported alignment type")
return alignment_mappings
def get_test_values(alignments):
test_values = []
- for hard_regions_index in alignments.keys():
+ for hard_regions_index in list(alignments.keys()):
soft_regions_list = []
- for soft_regions_index in alignments[hard_regions_index].keys():
+ for soft_regions_index in list(alignments[hard_regions_index].keys()):
soft_regions_list.extend(alignments[hard_regions_index][soft_regions_index].alignment_mappings)
soft_regions_list.reverse()
test_values.extend(soft_regions_list)
diff --git a/nltk_contrib/align/alignment_util.py b/nltk_contrib/align/alignment_util.py
index f33f917..1a61c7b 100755
--- a/nltk_contrib/align/alignment_util.py
+++ b/nltk_contrib/align/alignment_util.py
@@ -41,9 +41,9 @@ def get_test_values(alignments):
"""
test_values = []
- for hard_regions_index in alignments.keys():
+ for hard_regions_index in list(alignments.keys()):
soft_regions_list = []
- for soft_regions_index in alignments[hard_regions_index].keys():
+ for soft_regions_index in list(alignments[hard_regions_index].keys()):
soft_regions_list.extend(alignments[hard_regions_index][soft_regions_index].alignment_mappings)
soft_regions_list.reverse()
test_values.extend(soft_regions_list)
diff --git a/nltk_contrib/align/api.py b/nltk_contrib/align/api.py
index 1a2566a..1edecd5 100644
--- a/nltk_contrib/align/api.py
+++ b/nltk_contrib/align/api.py
@@ -7,7 +7,7 @@
"""
from nltk.internals import deprecated, overridden
-from itertools import izip
+
##//////////////////////////////////////////////////////
# Alignment Interfaces
@@ -53,7 +53,7 @@ def batch_align(self, source, target):
@rtype: C{list} of I{alignments}
"""
- return [self.align(st, tt) for (st, tt) in izip(source, target)]
+ return [self.align(st, tt) for (st, tt) in zip(source, target)]
def recursive_align(self, source, target, alignments):
"""
@@ -70,7 +70,7 @@ def recursive_align(self, source, target, alignments):
if (self.output_format == 'text_tuples'):
alignment_mapping = standard_alignment
- import align_util
+ from . import align_util
if (self.output_format == 'bead_objects'):
(alignment_mapping, alignment_mapping_indices) = align_util.convert_bead_to_tuples(standard_alignment, source, target)
diff --git a/nltk_contrib/align/gale_church.py b/nltk_contrib/align/gale_church.py
index 4374b3b..ba10045 100644
--- a/nltk_contrib/align/gale_church.py
+++ b/nltk_contrib/align/gale_church.py
@@ -7,10 +7,10 @@
# URL:
# For license information, see LICENSE.TXT
-from __future__ import division
+
import math
-from util import *
+from .util import *
# Based on Gale & Church 1993,
# "A Program for Aligning Sentences in Bilingual Corpora"
@@ -182,10 +182,10 @@ def _chunk_iterator(first):
v = first
while v != split_value:
yield v
- v = it.next()
+ v = next(it)
while True:
- yield _chunk_iterator(it.next())
+ yield _chunk_iterator(next(it))
def parse_token_stream(stream, soft_delimiter, hard_delimiter):
@@ -205,4 +205,4 @@ def parse_token_stream(stream, soft_delimiter, hard_delimiter):
with nested(open(sys.argv[1], "r"), open(sys.argv[2], "r")) as (s, t):
source = parse_token_stream((l.strip() for l in s), ".EOS", ".EOP")
target = parse_token_stream((l.strip() for l in t), ".EOS", ".EOP")
- print align_texts(source, target)
+ print((align_texts(source, target)))
diff --git a/nltk_contrib/align/test.py b/nltk_contrib/align/test.py
index b02885a..5f02cca 100644
--- a/nltk_contrib/align/test.py
+++ b/nltk_contrib/align/test.py
@@ -1,7 +1,7 @@
-import align_util
-import align
-import distance_measures
+from . import align_util
+from . import align
+from . import distance_measures
import sys
@@ -56,7 +56,7 @@ def demo():
gc_alignment = gc.batch_align(regions1, regions2)
- print "Alignment0: %s" % gc_alignment
+ print(("Alignment0: %s" % gc_alignment))
#demo_eval(gc_alignment, gold_file)
@@ -78,7 +78,7 @@ def demo():
gc_alignment = gc.batch_align(regions1, regions2)
- print "Alignment1: %s" % gc_alignment
+ print(("Alignment1: %s" % gc_alignment))
#demo_eval(gc_alignment, gold_file)
@@ -97,7 +97,7 @@ def demo():
standard_alignment2 = std.batch_align(s2, t2)
- print "Alignment2: %s" % standard_alignment2
+ print(("Alignment2: %s" % standard_alignment2))
# demo 4
@@ -109,14 +109,14 @@ def demo():
standard_alignment3 = std.align(s3, t3)
- print "Alignment3: %s" % standard_alignment3
+ print(("Alignment3: %s" % standard_alignment3))
# demo 5
top_down_alignments = std.recursive_align(s3, t3, [])
for alignment in top_down_alignments:
- print "Top down align: %s" % alignment
+ print(("Top down align: %s" % alignment))
def madame_bovary_test(source_file, target_file, source_pickle_file, target_pickle_file):
diff --git a/nltk_contrib/bioreader/__init__.py b/nltk_contrib/bioreader/__init__.py
index e7a7dee..d693f0e 100644
--- a/nltk_contrib/bioreader/__init__.py
+++ b/nltk_contrib/bioreader/__init__.py
@@ -7,7 +7,7 @@
# For license information, see LICENSE.TXT
#
-from bioreader import *
+from .bioreader import *
__all__ = [
'Reader',
diff --git a/nltk_contrib/bioreader/bioreader.py b/nltk_contrib/bioreader/bioreader.py
index e0dd539..8b394e1 100644
--- a/nltk_contrib/bioreader/bioreader.py
+++ b/nltk_contrib/bioreader/bioreader.py
@@ -283,18 +283,18 @@ def __init__(self,file,format="medline"):
elif format.lower() == "pubmed":
self.rerecord = re.compile(r'\'r'(?P.+?)'r'\',re.DOTALL)
else:
- print "Unrecognized format"
+ print("Unrecognized format")
self.RecordsList = re.findall(self.rerecord,whole)
whole = ""
self.RecordsList = [""+x.rstrip()+"" for x in self.RecordsList]
self.dictRecords = self.Createdict()
self.RecordsList = []
- self.howmany = len(self.dictRecords.keys())
- self.keys = self.dictRecords.keys()
+ self.howmany = len(list(self.dictRecords.keys()))
+ self.keys = list(self.dictRecords.keys())
tfinal = time.time()
self.repository = None
- print "finished loading at ",time.ctime(tfinal)
- print "loaded in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes"
+ print(("finished loading at ",time.ctime(tfinal)))
+ print(("loaded in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes"))
def __repr__(self):
return ""
@@ -355,7 +355,7 @@ def Search(self,cadena,where=None):
tinicial = time.time()
resultlist = []
if where:
- for cadapmid in self.dictRecords.keys():
+ for cadapmid in list(self.dictRecords.keys()):
d = self.Read(cadapmid)
if where == 'title':
tosearch = d.title
@@ -374,7 +374,7 @@ def Search(self,cadena,where=None):
if self.repository:
pass
else:
- print "No full text repository has been defined...."
+ print("No full text repository has been defined....")
return None
elif where == 'pmid':
tosearch = d.pmid
@@ -385,16 +385,16 @@ def Search(self,cadena,where=None):
pass
if len(resultlist)!= 0:
tfinal = time.time()
- print "Searched in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes"
- print "Found a total of ",str(len(resultlist))," hits for your query, in the ",where," field"
+ print(("Searched in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes"))
+ print(("Found a total of ",str(len(resultlist))," hits for your query, in the ",where," field"))
return resultlist
else:
- print "Searched in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes"
- print "Query not found"
+ print(("Searched in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes"))
+ print("Query not found")
return None
else:
tosearch = ''
- for cadapmid in self.dictRecords.keys():
+ for cadapmid in list(self.dictRecords.keys()):
tosearch = self.dictRecords[cadapmid]
hit = re.search(cadena,tosearch)
if hit:
@@ -403,13 +403,13 @@ def Search(self,cadena,where=None):
pass
if len(resultlist)!= 0:
tfinal = time.time()
- print "Searched in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes"
- print "Found a total of ",str(len(resultlist))," hits for your query, in all fields"
+ print(("Searched in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes"))
+ print(("Found a total of ",str(len(resultlist))," hits for your query, in all fields"))
return resultlist
else:
tfinal = time.time()
- print "Searched in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes"
- print "Query not found"
+ print(("Searched in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes"))
+ print("Query not found")
return None
@@ -432,15 +432,15 @@ class CreateXML:
"""
def __init__(self):
#global urllib,time,string,random
- import urllib,time,string,random
+ import urllib.request, urllib.parse, urllib.error,time,string,random
def getXml(self,s):
- pedir = urllib.urlopen("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id="+s+"&retmode=xml")
+ pedir = urllib.request.urlopen("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id="+s+"&retmode=xml")
stringxml = pedir.read()
self.salida.write(stringxml[:-20]+"\n")
def getXmlString(self,s):
- pedir = urllib.urlopen("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id="+s+"&retmode=xml")
+ pedir = urllib.request.urlopen("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id="+s+"&retmode=xml")
stringxml = pedir.read()
return stringxml[:-20]+"\n"
@@ -463,7 +463,7 @@ def GenerateFile(self,inputfile,outputfile):
cientos = self.listafin[:100]
- print "new length self.listacorr", len(self.listafin)
+ print(("new length self.listacorr", len(self.listafin)))
if len(self.listafin) <= 0:
break
else:
@@ -471,7 +471,7 @@ def GenerateFile(self,inputfile,outputfile):
nueva = self.listastring(cientos)
self.getXml(nueva)
for c in cientos:
- print c
+ print(c)
self.listafin.remove(c)
self.salida.close()
@@ -489,7 +489,7 @@ def Generate2String(self,inputfile):
cientos = self.listafin[:100]
- print "new length self.listacorr", len(self.listafin)
+ print(("new length self.listacorr", len(self.listafin)))
if len(self.listafin) <= 0:
break
else:
@@ -498,6 +498,6 @@ def Generate2String(self,inputfile):
newX = self.getXmlString(nueva)
self.AllXML = self.AllXML + newX
for c in cientos:
- print c
+ print(c)
self.listafin.remove(c)
return self.AllXML
diff --git a/nltk_contrib/classifier/__init__.py b/nltk_contrib/classifier/__init__.py
index 8e8e478..a83c7c3 100644
--- a/nltk_contrib/classifier/__init__.py
+++ b/nltk_contrib/classifier/__init__.py
@@ -19,7 +19,7 @@ def __init__(self, training, attributes, klass):
self.attributes = attributes
self.training = training
self.convert_continuous_values_to_numbers(self.training)
- sorted_klass_freqs = self.training.class_freq_dist().keys()
+ sorted_klass_freqs = list(self.training.class_freq_dist().keys())
sorted_klass_values = [each for each in sorted_klass_freqs]
sorted_klass_values.extend([each for each in klass if not sorted_klass_values.__contains__(each)])
self.klass = sorted_klass_values
@@ -86,7 +86,7 @@ def entropy(values):
def entropy_of_key_counts(dictionary):
freq_dist = prob.FreqDist()
- klasses = dictionary.keys()
+ klasses = list(dictionary.keys())
for klass in klasses:
freq_dist.inc(klass, dictionary[klass])
return entropy_of_freq_dist(freq_dist)
diff --git a/nltk_contrib/classifier/attribute.py b/nltk_contrib/classifier/attribute.py
index a5c0c68..5ce22e9 100644
--- a/nltk_contrib/classifier/attribute.py
+++ b/nltk_contrib/classifier/attribute.py
@@ -8,7 +8,7 @@
from nltk_contrib.classifier.exceptions import systemerror as se
from nltk_contrib.classifier import autoclass as ac, cfile, decisionstump as ds
from nltk import probability as prob
-import UserList
+import collections
CONTINUOUS = 'continuous'
DISCRETE = 'discrete'
@@ -58,7 +58,7 @@ def empty_freq_dists(self):
def __hash__(self):
return hash(self.name) + hash(self.index)
-class Attributes(UserList.UserList):
+class Attributes(collections.UserList):
def __init__(self, attributes = []):
self.data = attributes
@@ -84,7 +84,7 @@ def discretise(self, discretised_attributes):
self.data[disc_attr.index] = disc_attr
def empty_decision_stumps(self, ignore_attributes, klass):
- filtered = filter(lambda attribute: attribute not in ignore_attributes, self.data)
+ filtered = [attribute for attribute in self.data if attribute not in ignore_attributes]
return [ds.DecisionStump(attribute, klass) for attribute in filtered]
def remove_attributes(self, attributes):
diff --git a/nltk_contrib/classifier/autoclass.py b/nltk_contrib/classifier/autoclass.py
index f41e80b..555c0e1 100644
--- a/nltk_contrib/classifier/autoclass.py
+++ b/nltk_contrib/classifier/autoclass.py
@@ -10,7 +10,7 @@ class AutoClass:
def __init__(self, name):
self.name = name
- def next(self):
+ def __next__(self):
base26 = self.base26()
base26 += 1
return AutoClass(string(base26))
diff --git a/nltk_contrib/classifier/classify.py b/nltk_contrib/classifier/classify.py
index c57bddc..9c54432 100644
--- a/nltk_contrib/classifier/classify.py
+++ b/nltk_contrib/classifier/classify.py
@@ -67,7 +67,7 @@
IB1 = 'IB1'
ALGORITHM_MAPPINGS = {ZERO_R:zeror.ZeroR, ONE_R:oner.OneR, DECISION_TREE:decisiontree.DecisionTree, NAIVE_BAYES:naivebayes.NaiveBayes, IB1:knn.IB1}
-ALL_ALGORITHMS = ALGORITHM_MAPPINGS.keys()
+ALL_ALGORITHMS = list(ALGORITHM_MAPPINGS.keys())
VERIFY='verify'
ACCURACY='accuracy'
@@ -80,7 +80,7 @@
class Classify(cl.CommandLineInterface):
def __init__(self):
- cl.CommandLineInterface.__init__(self, ALGORITHM_MAPPINGS.keys(), ONE_R, a_help, f_help, t_help, T_help, g_help, o_help)
+ cl.CommandLineInterface.__init__(self, list(ALGORITHM_MAPPINGS.keys()), ONE_R, a_help, f_help, t_help, T_help, g_help, o_help)
self.add_option("-v", "--verify", dest=VERIFY, action="store_true", default=False, help=v_help)
self.add_option("-A", "--accuracy", dest=ACCURACY, action="store_false", default=True, help=A_help)
self.add_option("-e", "--error", dest=ERROR, action="store_true", default=False, help=e_help)
@@ -103,7 +103,7 @@ def execute(self):
self.error('Invalid arguments. Cannot verify classification for test data.')
file_strategy = get_file_strategy(self.files, self.training_path, self.test_path, self.gold_path, self.get_value(VERIFY))
- self.training_path, self.test_path, self.gold_path = file_strategy.values()
+ self.training_path, self.test_path, self.gold_path = list(file_strategy.values())
training, attributes, klass, test, gold = self.get_instances(self.training_path, self.test_path, self.gold_path, cross_validation_fold is not None)
classifier = ALGORITHM_MAPPINGS[self.algorithm](training, attributes, klass)
@@ -165,14 +165,14 @@ def __print_value(self, log, is_true, attribute, str_repn):
total = 0
for each in self.confusion_matrices:
total += getattr(each, attribute)()
- print >>log, str_repn + ': ' + str(float(total)/len(self.confusion_matrices))
+ print(str_repn + ': ' + str(float(total)/len(self.confusion_matrices)), file=log)
def write(self, log, should_write, data_format, suffix):
if should_write:
for index in range(len(self.gold_instances)):
new_path = self.training_path + str(index + 1) + suffix
data_format.write_gold(self.gold_instances[index], new_path)
- print >>log, 'Gold classification written to ' + new_path + ' file.'
+ print('Gold classification written to ' + new_path + ' file.', file=log)
def train(self):
#do Nothing
@@ -198,7 +198,7 @@ def write(self, log, should_write, data_format, suffix):
Will always write in the case of test files
"""
data_format.write_test(self.test, self.test_path + suffix)
- print >>log, 'Test classification written to ' + self.test_path + suffix + ' file.'
+ print('Test classification written to ' + self.test_path + suffix + ' file.', file=log)
def train(self):
self.classifier.train()
@@ -223,12 +223,12 @@ def print_results(self, log, accuracy, error, fscore, precision, recall):
def __print_value(self, log, is_true, attribute, str_repn):
if is_true:
- print >>log, str_repn + ': ' + getattr(self.confusion_matrix, attribute)().__str__()
+ print(str_repn + ': ' + getattr(self.confusion_matrix, attribute)().__str__(), file=log)
def write(self, log, should_write, data_format, suffix):
if should_write:
data_format.write_gold(self.gold, self.gold_path + suffix)
- print >>log, 'Gold classification written to ' + self.gold_path + suffix + ' file.'
+ print('Gold classification written to ' + self.gold_path + suffix + ' file.', file=log)
def train(self):
self.classifier.train()
diff --git a/nltk_contrib/classifier/commandline.py b/nltk_contrib/classifier/commandline.py
index 261ce69..feac9d9 100644
--- a/nltk_contrib/classifier/commandline.py
+++ b/nltk_contrib/classifier/commandline.py
@@ -38,7 +38,7 @@ def __init__(self, alg_choices, alg_default, a_help, f_help, t_help, T_help, g_h
self.add_option("-T", "--test-file", dest=TEST, type="string", help=T_help)
self.add_option("-g", "--gold-file", dest=GOLD, type="string", help=g_help)
- self.add_option("-D", "--data-format", dest=DATA_FORMAT, type="choice", choices=DATA_FORMAT_MAPPINGS.keys(), \
+ self.add_option("-D", "--data-format", dest=DATA_FORMAT, type="choice", choices=list(DATA_FORMAT_MAPPINGS.keys()), \
default=C45_FORMAT, help=D_help)
self.add_option("-l", "--log-file", dest=LOG_FILE, type="string", help=l_help)
self.add_option("-o", "--options", dest=OPTIONS, type="string", help=o_help)
@@ -67,8 +67,8 @@ def execute(self):
self.log = None
if log_file is not None:
self.log = open(log_file, 'a')
- print >>self.log, '-' * 40
- print >>self.log, 'DateTime: ' + time.strftime('%c', time.localtime())
+ print('-' * 40, file=self.log)
+ print('DateTime: ' + time.strftime('%c', time.localtime()), file=self.log)
def run(self, args):
"""
@@ -117,22 +117,22 @@ def write_to_file(self, suffix, training, attributes, klass, test, gold, include
def log_common_params(self, name):
if self.log is not None:
- print >>self.log, 'Operation: ' + name
- print >>self.log, '\nAlgorithm: ' + str(self.algorithm) + '\nTraining: ' + str(self.training_path) + \
- '\nTest: ' + str(self.test_path) + '\nGold: ' + str(self.gold_path) + '\nOptions: ' + str(self.options)
+ print('Operation: ' + name, file=self.log)
+ print('\nAlgorithm: ' + str(self.algorithm) + '\nTraining: ' + str(self.training_path) + \
+ '\nTest: ' + str(self.test_path) + '\nGold: ' + str(self.gold_path) + '\nOptions: ' + str(self.options), file=self.log)
def log_created_files(self, files_names, message):
if self.log is None:
- print message
+ print(message)
else:
- print >>self.log, "NumberOfFilesCreated: " + str(len(files_names))
+ print("NumberOfFilesCreated: " + str(len(files_names)), file=self.log)
count = 0
for file_name in files_names:
if self.log is None:
- print file_name
+ print(file_name)
else:
- print >>self.log, "CreatedFile" + str(count) + ": " + file_name
+ print("CreatedFile" + str(count) + ": " + file_name, file=self.log)
count += 1
diff --git a/nltk_contrib/classifier/decisionstump.py b/nltk_contrib/classifier/decisionstump.py
index d4a8973..a8639ab 100644
--- a/nltk_contrib/classifier/decisionstump.py
+++ b/nltk_contrib/classifier/decisionstump.py
@@ -37,10 +37,10 @@ def update_count(self, instance):
self.root[instance.klass_value] += 1
def error(self):
- count_for_each_attr_value = self.counts.values()
+ count_for_each_attr_value = list(self.counts.values())
total, errors = 0, 0
for class_count in count_for_each_attr_value:
- subtotal, counts = 0, class_count.values()
+ subtotal, counts = 0, list(class_count.values())
counts.sort()
for count in counts: subtotal += count
errors += (subtotal - counts[-1])
@@ -56,7 +56,7 @@ def klass(self, instance):
def majority_klass(self, attr_value):
klass_values_with_count = self.counts[attr_value]
_max, klass_value = 0, self.safe_default() # will consider safe default because at times the test will have an attribute value not present in the stump(can happen in cross validation as well)
- for klass, count in klass_values_with_count.items():
+ for klass, count in list(klass_values_with_count.items()):
if count > _max:
_max, klass_value = count, klass
return klass_value
@@ -67,7 +67,7 @@ def safe_default(self):
"""
if self.__safe_default == None:
max_occurance, klass = -1, None
- for klass_element in self.root.keys():
+ for klass_element in list(self.root.keys()):
if self.root[klass_element] > max_occurance:
max_occurance = self.root[klass_element]
klass = klass_element
@@ -110,14 +110,14 @@ def split_info(self):
def __str__(self):
_str = 'Decision stump for attribute ' + self.attribute.name
- for key, value in self.counts.items():
+ for key, value in list(self.counts.items()):
_str += '\nAttr value: ' + key + '; counts: ' + value.__str__()
for child in self.children:
_str += child.__str__()
return _str
def total_counts(dictionary_of_klass_freq):
- return sum([count for count in dictionary_of_klass_freq.values()])
+ return sum([count for count in list(dictionary_of_klass_freq.values())])
def dictionary_of_values(klass):
return dict([(value, 0) for value in klass])
diff --git a/nltk_contrib/classifier/discretise.py b/nltk_contrib/classifier/discretise.py
index 4c7bddb..9625bad 100644
--- a/nltk_contrib/classifier/discretise.py
+++ b/nltk_contrib/classifier/discretise.py
@@ -52,7 +52,7 @@
class Discretise(cl.CommandLineInterface):
def __init__(self):
- cl.CommandLineInterface.__init__(self, ALGORITHM_MAPPINGS.keys(), UNSUPERVISED_EQUAL_WIDTH, a_help, f_help, t_help, T_help, g_help, o_help)
+ cl.CommandLineInterface.__init__(self, list(ALGORITHM_MAPPINGS.keys()), UNSUPERVISED_EQUAL_WIDTH, a_help, f_help, t_help, T_help, g_help, o_help)
self.add_option("-A", "--attributes", dest="attributes", type="string", help=A_help)
def execute(self):
@@ -185,7 +185,7 @@ def create_and_run(algorithm, path, indices, log_path, options):
params.extend(['-o', options])
if log_path is not None:
params.extend(['-l', log_path])
- print "Params " + str(params)
+ print(("Params " + str(params)))
disc.run(params)
return disc.get_suffix()
diff --git a/nltk_contrib/classifier/discretisedattribute.py b/nltk_contrib/classifier/discretisedattribute.py
index 666a90c..ff4c573 100644
--- a/nltk_contrib/classifier/discretisedattribute.py
+++ b/nltk_contrib/classifier/discretisedattribute.py
@@ -14,7 +14,7 @@ def __init__(self, name, ranges, index):
self.values, klass_value = [], autoclass.FIRST
for i in range(len(ranges)):
self.values.append(klass_value.name)
- klass_value = klass_value.next()
+ klass_value = next(klass_value)
self.index = index
self.type = attribute.DISCRETE
self.ranges = ranges
diff --git a/nltk_contrib/classifier/featureselect.py b/nltk_contrib/classifier/featureselect.py
index e4695f9..05c872e 100644
--- a/nltk_contrib/classifier/featureselect.py
+++ b/nltk_contrib/classifier/featureselect.py
@@ -58,7 +58,7 @@
class FeatureSelect(cl.CommandLineInterface):
def __init__(self):
- cl.CommandLineInterface.__init__(self, ALGORITHM_MAPPINGS.keys(), RANK, a_help, f_help, t_help, T_help, g_help, o_help)
+ cl.CommandLineInterface.__init__(self, list(ALGORITHM_MAPPINGS.keys()), RANK, a_help, f_help, t_help, T_help, g_help, o_help)
def execute(self):
cl.CommandLineInterface.execute(self)
@@ -221,7 +221,7 @@ def isfloat(stringval):
try:
float(stringval)
return True
- except (ValueError, TypeError), e: return False
+ except (ValueError, TypeError) as e: return False
def batch_filter_select(base_path, suffixes, number_of_attributes, log_path, has_continuous):
filter_suffixes = []
@@ -229,7 +229,7 @@ def batch_filter_select(base_path, suffixes, number_of_attributes, log_path, has
for selection_criteria in [INFORMATION_GAIN, GAIN_RATIO]:
feat_sel = FeatureSelect()
params = ['-a', RANK, '-f', base_path + each, '-o', selection_criteria + ',' + str(number_of_attributes), '-l', log_path]
- print "Params " + str(params)
+ print(("Params " + str(params)))
feat_sel.run(params)
filter_suffixes.append(each + feat_sel.get_suffix())
return filter_suffixes
@@ -240,7 +240,7 @@ def batch_wrapper_select(base_path, suffixes, classifier, fold, delta, log_path)
for alg in [FORWARD_SELECTION, BACKWARD_ELIMINATION]:
feat_sel = FeatureSelect()
params = ['-a', alg, '-f', base_path + each, '-o', classifier + ',' + str(fold) + ',' + str(delta), '-l', log_path]
- print "Params " + str(params)
+ print(("Params " + str(params)))
feat_sel.run(params)
wrapper_suffixes.append(each + feat_sel.get_suffix())
return wrapper_suffixes
diff --git a/nltk_contrib/classifier/instances.py b/nltk_contrib/classifier/instances.py
index 0d2d42a..8825f68 100644
--- a/nltk_contrib/classifier/instances.py
+++ b/nltk_contrib/classifier/instances.py
@@ -9,11 +9,11 @@
from nltk_contrib.classifier import instance as ins, item, cfile, confusionmatrix as cm, numrange as r, util
from nltk_contrib.classifier.exceptions import systemerror as system, invaliddataerror as inv
from nltk import probability as prob
-import operator, UserList, UserDict, math
+import operator, collections, UserDict, math
-class Instances(UserList.UserList):
+class Instances(collections.UserList):
def __init__(self, instances):
- UserList.UserList.__init__(self, instances)
+ collections.UserList.__init__(self, instances)
def are_valid(self, klass, attributes):
for instance in self.data:
@@ -122,7 +122,7 @@ def posterior_probablities(self, attributes, klass_values):
for klass_value in klass_values:
freq_dists[attribute][value].inc(klass_value) #Laplacian smoothing
stat_list_values = {}
- cont_attrs = filter(lambda attr: attr.is_continuous(), attributes)
+ cont_attrs = [attr for attr in attributes if attr.is_continuous()]
if attributes.has_continuous():
for attribute in cont_attrs:
stat_list_values[attribute] = {}
@@ -160,12 +160,12 @@ def confusion_matrix(self, klass):
matrix.count(i.klass_value, i.classified_klass)
return matrix
-class SupervisedBreakpoints(UserList.UserList):
+class SupervisedBreakpoints(collections.UserList):
"""
Used to find breakpoints for discretisation
"""
def __init__(self, klass_values, attr_values):
- UserList.UserList.__init__(self, [])
+ collections.UserList.__init__(self, [])
self.attr_values = attr_values
self.klass_values = klass_values
diff --git a/nltk_contrib/classifier/knn.py b/nltk_contrib/classifier/knn.py
index 37988f9..03325bd 100644
--- a/nltk_contrib/classifier/knn.py
+++ b/nltk_contrib/classifier/knn.py
@@ -41,7 +41,7 @@ def distance(self, value, instance):
self.distances[value] = [instance]
def minimum_distance_instances(self):
- keys = self.distances.keys()
+ keys = list(self.distances.keys())
keys.sort()
return self.distances[keys[0]]
diff --git a/nltk_contrib/classifier/naivebayes.py b/nltk_contrib/classifier/naivebayes.py
index 68e32c6..9ee8fa3 100644
--- a/nltk_contrib/classifier/naivebayes.py
+++ b/nltk_contrib/classifier/naivebayes.py
@@ -30,7 +30,7 @@ def estimate_klass(self, instance):
for klass_value in self.klass:
class_conditional_probability = self.class_conditional_probability(instance, klass_value)
estimates_using_prob[class_conditional_probability] = klass_value
- keys = estimates_using_prob.keys()
+ keys = list(estimates_using_prob.keys())
keys.sort()#find the one with max conditional prob
return estimates_using_prob[keys[-1]]
diff --git a/nltk_contrib/classifier/util.py b/nltk_contrib/classifier/util.py
index 4400c61..078644d 100644
--- a/nltk_contrib/classifier/util.py
+++ b/nltk_contrib/classifier/util.py
@@ -3,11 +3,11 @@
#
# URL:
# This software is distributed under GPL, for license information see LICENSE.TXT
-import UserList, math
+import collections, math
-class StatList(UserList.UserList):
+class StatList(collections.UserList):
def __init__(self, values=None):
- UserList.UserList.__init__(self, values)
+ collections.UserList.__init__(self, values)
def mean(self):
if len(self.data) == 0: return 0
diff --git a/nltk_contrib/classifier/zeror.py b/nltk_contrib/classifier/zeror.py
index 6b3c6f8..5de0632 100644
--- a/nltk_contrib/classifier/zeror.py
+++ b/nltk_contrib/classifier/zeror.py
@@ -36,7 +36,7 @@ def update_count(self, instance):
def __max(self):
max, klass_value = 0, None
- for key in self.__klassCount.keys():
+ for key in list(self.__klassCount.keys()):
value = self.__klassCount[key]
if value > max:
max = value
diff --git a/nltk_contrib/classifier_tests/alltests.py b/nltk_contrib/classifier_tests/alltests.py
index e166309..a0d4c86 100644
--- a/nltk_contrib/classifier_tests/alltests.py
+++ b/nltk_contrib/classifier_tests/alltests.py
@@ -13,10 +13,10 @@ def allTestsSuite():
for dn,d,f in os.walk('.'):
if dn is not '.': continue
testfilenames = [filename for filename in f if re.search('tests\.py$', filename) is not None]
- modulenames = map(lambda f: re.sub('\.py$', '', f), testfilenames)
- modules = map(__import__, modulenames)
+ modulenames = [re.sub('\.py$', '', f) for f in testfilenames]
+ modules = list(map(__import__, modulenames))
load = unittest.defaultTestLoader.loadTestsFromModule
- return unittest.TestSuite(map(load, modules))
+ return unittest.TestSuite(list(map(load, modules)))
if __name__ == '__main__':
runner = unittest.TextTestRunner()
diff --git a/nltk_contrib/classifier_tests/autoclasstests.py b/nltk_contrib/classifier_tests/autoclasstests.py
index 03afeeb..e0acf4c 100644
--- a/nltk_contrib/classifier_tests/autoclasstests.py
+++ b/nltk_contrib/classifier_tests/autoclasstests.py
@@ -25,9 +25,9 @@ def test_string(self):
def test_next(self):
a = autoclass.FIRST
- b = a.next()
+ b = next(a)
self.assertEqual('b', str(b))
- self.assertEqual('c', str(b.next()))
+ self.assertEqual('c', str(next(b)))
self.assertEqual('z', self.next('y'))
self.assertEqual('ba', self.next('z'))
self.assertEqual('bb', self.next('ba'))
@@ -36,4 +36,4 @@ def test_next(self):
self.assertEqual('baa', self.next('zz'))
def next(self, current):
- return str(autoclass.AutoClass(current).next())
+ return str(next(autoclass.AutoClass(current)))
diff --git a/nltk_contrib/classifier_tests/classifytests.py b/nltk_contrib/classifier_tests/classifytests.py
index 30570c8..339a120 100644
--- a/nltk_contrib/classifier_tests/classifytests.py
+++ b/nltk_contrib/classifier_tests/classifytests.py
@@ -126,28 +126,28 @@ def test_does_not_throw_error_if_only_file_option_present(self):
def test_get_file_strategy(self):
strategy = c.get_file_strategy('files', None, None, None, True)
self.assertEqual(c.CommonBaseNameStrategy, strategy.__class__)
- values = strategy.values()
+ values = list(strategy.values())
self.assertEqual(values[0], 'files')
self.assertEqual(values[1], None)
self.assertEqual(values[2], 'files')
strategy = c.get_file_strategy('files', None, None, None, False)
self.assertEqual(c.CommonBaseNameStrategy, strategy.__class__)
- values = strategy.values()
+ values = list(strategy.values())
self.assertEqual(values[0], 'files')
self.assertEqual(values[1], 'files')
self.assertEqual(values[2], None)
strategy = c.get_file_strategy(None, 'train', 'test', None, False)
self.assertEqual(c.ExplicitNamesStrategy, strategy.__class__)
- values = strategy.values()
+ values = list(strategy.values())
self.assertEqual(values[0], 'train')
self.assertEqual(values[1], 'test')
self.assertEqual(values[2], None)
strategy = c.get_file_strategy(None, 'train', None, 'gold', False)
self.assertEqual(c.ExplicitNamesStrategy, strategy.__class__)
- values = strategy.values()
+ values = list(strategy.values())
self.assertEqual(values[0], 'train')
self.assertEqual(values[1], None)
self.assertEqual(values[2], 'gold')
diff --git a/nltk_contrib/classifier_tests/decisionstumptests.py b/nltk_contrib/classifier_tests/decisionstumptests.py
index 0a50a46..51a7712 100644
--- a/nltk_contrib/classifier_tests/decisionstumptests.py
+++ b/nltk_contrib/classifier_tests/decisionstumptests.py
@@ -112,7 +112,7 @@ def test_dictionary_of_all_values_with_count_0(self):
values = ds.dictionary_of_values(phoney);
self.assertEqual(3, len(values))
for i in ['a', 'b', 'c']:
- self.assertTrue(values.has_key(i))
+ self.assertTrue(i in values)
self.assertEqual(0, values[i])
def test_gain_ratio(self):
diff --git a/nltk_contrib/classifier_tests/instancestests.py b/nltk_contrib/classifier_tests/instancestests.py
index d3397c1..0f35f91 100644
--- a/nltk_contrib/classifier_tests/instancestests.py
+++ b/nltk_contrib/classifier_tests/instancestests.py
@@ -285,7 +285,7 @@ def test_class_freq_dist_in_reverse_to_store_classes(self):
path = datasetsDir(self) + 'numerical' + SEP + 'person'
_training = training(path)
class_freq_dist = _training.class_freq_dist()
- self.assertEqual(['yes','no'], class_freq_dist.keys())
+ self.assertEqual(['yes','no'], list(class_freq_dist.keys()))
def test_posterior_probablities_with_discrete_values(self):
diff --git a/nltk_contrib/classifier_tests/numrangetests.py b/nltk_contrib/classifier_tests/numrangetests.py
index 7c82ff6..f40217e 100644
--- a/nltk_contrib/classifier_tests/numrangetests.py
+++ b/nltk_contrib/classifier_tests/numrangetests.py
@@ -57,14 +57,14 @@ def test_include_expands_range(self):
def test_split_returns_none_when_lower_eq_upper(self):
_range = r.Range()
- self.assertEquals(None, _range.split(2))
+ self.assertEqual(None, _range.split(2))
def test_split_returns_none_if_size_of_each_split_is_less_than_delta(self):
try:
_range = r.Range(0, 0.000005)
_range.split(7)
- except (se.SystemError), e:
- self.assertEquals('Splitting of range resulted in elements smaller than delta 1e-06.', e.message)
+ except (se.SystemError) as e:
+ self.assertEqual('Splitting of range resulted in elements smaller than delta 1e-06.', e.message)
def test_split_includes_the_highest_and_lowest(self):
_range = r.Range()
diff --git a/nltk_contrib/classifier_tests/utilities/batchtest.py b/nltk_contrib/classifier_tests/utilities/batchtest.py
index da65b04..751a5b6 100644
--- a/nltk_contrib/classifier_tests/utilities/batchtest.py
+++ b/nltk_contrib/classifier_tests/utilities/batchtest.py
@@ -15,7 +15,7 @@ def run(root_path, log_path):
print('in run')
for dir_name, dirs, files in os.walk(root_path):
data = set([])
- print('Dir name ' + str(dir_name) + ' dirs ' + str(dirs) + ' files ' + str(files))
+ print(('Dir name ' + str(dir_name) + ' dirs ' + str(dirs) + ' files ' + str(files)))
for file in files:
index = file.rfind('.')
if index != -1:
@@ -65,7 +65,7 @@ def process(path, log_path):
for suffix in all:
params = ['-a', algorithm, '-f', path + suffix, '-l', log_path, '-c', 5]
- print "Params " + str(params)
+ print(("Params " + str(params)))
c.Classify().run(params)
def to_str_array(value, times):
@@ -91,13 +91,13 @@ def delete_generated_files(path):
resp = 0
while(resp != 1 and resp != 2):
try:
- resp = int(raw_input("Select one of following options:\n1. Run all tests\n2. Delete generated files\n"))
+ resp = int(eval(input("Select one of following options:\n1. Run all tests\n2. Delete generated files\n")))
except ValueError:
pass
if resp == 1:
- dir_tree_path = raw_input("Enter directory tree path")
- log_file = raw_input("Enter log file")
+ dir_tree_path = eval(input("Enter directory tree path"))
+ log_file = eval(input("Enter log file"))
run(dir_tree_path, log_file)
elif resp == 2:
- dir_path = raw_input("Enter directory path")
+ dir_path = eval(input("Enter directory path"))
delete_generated_files(dir_path)
diff --git a/nltk_contrib/classifier_tests/utilities/convert.py b/nltk_contrib/classifier_tests/utilities/convert.py
index 23cd3b2..9147635 100644
--- a/nltk_contrib/classifier_tests/utilities/convert.py
+++ b/nltk_contrib/classifier_tests/utilities/convert.py
@@ -49,7 +49,7 @@ def values(file_path, index, sep = " "):
for line in f:
words = line.split(sep)
if not index < len(words):
- print "Warning! omitting line " + str(line)
+ print("Warning! omitting line " + str(line))
continue
values.add(words[index])
return ','.join(values)
@@ -65,7 +65,7 @@ def convert(path):
ind = path.rfind('.')
if ind == -1: ind = len(path)
nf = open(path[:ind] + 'conv' + path[ind:], 'w')
- for l in converted:print >>nf, l
+ for l in converted:print(l, file=nf)
nf.close()
def convert_log_to_csv(path):
@@ -73,7 +73,7 @@ def convert_log_to_csv(path):
csvf = open(path + '.csv', 'w')
for each in classifications:
- print >>csvf, each.algorithm + ',' + each.training + ',' + each.test + ',' + each.gold + ',' + each.accuracy + ',' + each.f_score
+ print(each.algorithm + ',' + each.training + ',' + each.test + ',' + each.gold + ',' + each.accuracy + ',' + each.f_score, file=csvf)
def get_classification_log_entries(path):
f = open(path)
@@ -215,15 +215,15 @@ def convert_log_to_tex_tables(path):
texf = open(path + '-acc.tex', 'w')
for table in accuracy_tables:
- print >>texf, table
+ print(table, file=texf)
texf = open(path + '-fs.tex', 'w')
for table in f_score_tables:
- print >>texf, table
+ print(table, file=texf)
texf = open(path + '-macc.tex', 'w')
for table in mean_accuracy_tables:
- print >>texf, table
+ print(table, file=texf)
texf = open(path + '-mdatasets.tex', 'w')
- print >>texf, mean_datasets
+ print(mean_datasets, file=texf)
def get_stat_lists(cols):
return dict([(each, util.StatList()) for each in cols])
diff --git a/nltk_contrib/classify/__init__.py b/nltk_contrib/classify/__init__.py
index 16025db..77f227b 100755
--- a/nltk_contrib/classify/__init__.py
+++ b/nltk_contrib/classify/__init__.py
@@ -110,6 +110,6 @@ def classifier_accuracy(classifier, gold):
return float(correct) / len(gold)
-from cosine import *
-from naivebayes import *
-from spearman import *
+from .cosine import *
+from .naivebayes import *
+from .spearman import *
diff --git a/nltk_contrib/classify/cosine.py b/nltk_contrib/classify/cosine.py
index cde6979..26950df 100755
--- a/nltk_contrib/classify/cosine.py
+++ b/nltk_contrib/classify/cosine.py
@@ -144,7 +144,7 @@ def demo():
result = classifier.get_class_dict("a")
for cls in result:
- print cls, ':', result[cls]
+ print((cls, ':', result[cls]))
"""
expected values:
@@ -181,7 +181,7 @@ def demo2():
result = classifier.get_class_dict("aaababb")
for cls in result:
- print cls, ':', result[cls]
+ print((cls, ':', result[cls]))
"""
expected values:
class a: 'aa' = 5
@@ -220,7 +220,7 @@ def demo3():
result = classifier.get_class_dict("aaababb")
for cls in result:
- print cls, ':', result[cls]
+ print((cls, ':', result[cls]))
"""
expected values:
@@ -270,9 +270,9 @@ def demo4():
result = classifier.get_class_probs(list(islice(genesis.raw("english-kjv"), 150, 200)))
- print 'english-kjv :', result.prob('english-kjv')
- print 'french :', result.prob('french')
- print 'finnish :', result.prob('finnish')
+ print(('english-kjv :', result.prob('english-kjv')))
+ print(('french :', result.prob('french')))
+ print(('finnish :', result.prob('finnish')))
if __name__ == '__main__':
diff --git a/nltk_contrib/classify/naivebayes.py b/nltk_contrib/classify/naivebayes.py
index 92dd5d0..d2cd3ba 100755
--- a/nltk_contrib/classify/naivebayes.py
+++ b/nltk_contrib/classify/naivebayes.py
@@ -82,7 +82,8 @@ def train(self, gold):
self._cls_prob_dist = GoodTuringProbDist(cls_freq_dist, cls_freq_dist.B())
# for features
- def make_probdist(freqdist, (cls, fname)):
+ def make_probdist(freqdist, xxx_todo_changeme):
+ (cls, fname) = xxx_todo_changeme
return GoodTuringProbDist(freqdist, len(feature_values[fname]))
self._feat_prob_dist = ConditionalProbDist(feat_freq_dist, make_probdist, True)
@@ -149,7 +150,7 @@ def demo():
result = classifier.get_class_dict("a")
for cls in result:
- print cls, ':', result[cls]
+ print((cls, ':', result[cls]))
"""
expected values:
@@ -180,7 +181,7 @@ def demo2():
result = classifier.get_class_dict("aababb")
for cls in result:
- print cls, ':', result[cls]
+ print((cls, ':', result[cls]))
"""
expected values:
class_probs a = 0.5
@@ -215,7 +216,7 @@ def demo3():
result = classifier.get_class_dict("aaababb")
for cls in result:
- print cls, ':', result[cls]
+ print((cls, ':', result[cls]))
"""
expected values:
@@ -260,9 +261,9 @@ def demo4():
result = classifier.get_class_probs(list(islice(genesis.raw("english-kjv"), 150, 200)))
- print 'english-kjv :', result.prob('english-kjv')
- print 'french :', result.prob('french')
- print 'finnish :', result.prob('finnish')
+ print(('english-kjv :', result.prob('english-kjv')))
+ print(('french :', result.prob('french')))
+ print(('finnish :', result.prob('finnish')))
if __name__ == '__main__':
demo2()
diff --git a/nltk_contrib/classify/spearman.py b/nltk_contrib/classify/spearman.py
index db81523..5c788b0 100644
--- a/nltk_contrib/classify/spearman.py
+++ b/nltk_contrib/classify/spearman.py
@@ -162,7 +162,7 @@ def demo():
result = classifier.get_class_dict("a")
for cls in result:
- print cls, ':', result[cls]
+ print((cls, ':', result[cls]))
"""
expected values:
class a: 'a' = 1
@@ -190,7 +190,7 @@ def demo2():
result = classifier.get_class_dict("aaababb")
for cls in result:
- print cls, ':', result[cls]
+ print((cls, ':', result[cls]))
"""
expected values:
class a: 'aa' = 1
@@ -224,7 +224,7 @@ def demo3():
result = classifier.get_class_dict("aaababb")
for cls in result:
- print cls, ':', result[cls]
+ print((cls, ':', result[cls]))
"""
expected values:
@@ -268,9 +268,9 @@ def demo4():
result = classifier.get_class_probs(list(islice(genesis.raw("english-kjv"), 150, 200)))
- print 'english-kjv :', result.prob('english-kjv')
- print 'french :', result.prob('french')
- print 'finnish :', result.prob('finnish')
+ print(('english-kjv :', result.prob('english-kjv')))
+ print(('french :', result.prob('french')))
+ print(('finnish :', result.prob('finnish')))
if __name__ == '__main__':
diff --git a/nltk_contrib/combined.py b/nltk_contrib/combined.py
index fe47c93..d83b870 100644
--- a/nltk_contrib/combined.py
+++ b/nltk_contrib/combined.py
@@ -96,7 +96,7 @@ def unmarshal (self, basepath):
self._brill = Brill(self._tagger[-1], [])
self._brill.unmarshal(tagger_file)
else:
- print "error, tagger type not recognized."
+ print("error, tagger type not recognized.")
def exemple_train (self, train_sents, verbose=False):
self._append_default("N")
@@ -124,8 +124,8 @@ def create_tagger (train_sents):
ct.unmarshal("tresoldi")
tokens = "Mauro viu o livro sobre a mesa".split()
- print list(ct.tag(tokens))
+ print((list(ct.tag(tokens))))
# tests
acc = tag.accuracy(ct, [train_sents])
- print 'Accuracy = %4.2f%%' % (100 * acc)
+ print(('Accuracy = %4.2f%%' % (100 * acc)))
diff --git a/nltk_contrib/concord.py b/nltk_contrib/concord.py
index f5bfeac..5c401a5 100644
--- a/nltk_contrib/concord.py
+++ b/nltk_contrib/concord.py
@@ -225,16 +225,16 @@ def raw(self, leftRegexp=None, middleRegexp=".*", rightRegexp=None,
reg = re.compile(middleRegexp)
if verbose:
- print "Matching the following target words:"
+ print("Matching the following target words:")
wordLocs = []
# get list of (sentence, word) pairs to get context for
- for item in self.index.getIndex().iteritems():
+ for item in list(self.index.getIndex().items()):
if reg.match("/".join([item[0][0].lower(), item[0][1]])):
if verbose:
- print "/".join(item[0])
+ print(("/".join(item[0])))
wordLocs.append(item[1])
- print ""
+ print("")
items = []
# if context lengths are specified in words:
@@ -358,24 +358,24 @@ def raw(self, leftRegexp=None, middleRegexp=".*", rightRegexp=None,
items.append((left, target, right, sentenceNum))
if verbose:
- print "Found %d matches for target word..." % len(items)
+ print(("Found %d matches for target word..." % len(items)))
# sort the concordance
if sort == self.SORT_WORD:
if verbose:
- print "Sorting by target word..."
+ print("Sorting by target word...")
items.sort(key=lambda i:i[1][0].lower())
elif sort == self.SORT_POS:
if verbose:
- print "Sorting by target word POS tag..."
+ print("Sorting by target word POS tag...")
items.sort(key=lambda i:i[1][1].lower())
elif sort == self.SORT_NUM:
if verbose:
- print "Sorting by sentence number..."
+ print("Sorting by sentence number...")
items.sort(key=lambda i:i[3])
elif sort == self.SORT_RIGHT_CONTEXT:
if verbose:
- print "Sorting by first word of right context..."
+ print("Sorting by first word of right context...")
items.sort(key=lambda i:i[2][0][0])
# if any regular expressions have been given for the context, filter
@@ -390,11 +390,11 @@ def raw(self, leftRegexp=None, middleRegexp=".*", rightRegexp=None,
rightRe=None
if leftRegexp != None:
if verbose:
- print "Filtering on left context..."
+ print("Filtering on left context...")
leftRe = re.compile(leftRegexp)
if rightRegexp != None:
if verbose:
- print "Filtering on right context..."
+ print("Filtering on right context...")
rightRe = re.compile(rightRegexp)
for item in items:
@@ -515,11 +515,11 @@ def format(self, source, contextChars=55, maxKeyLength=0, showWord=True,
rPad = int(floor(max(maxMiddleLength - len(middle), 0) / 2.0))
middle = " "*lPad + middle + " "*rPad
- print left + "| " + middle + " | " + right + " "
+ print((left + "| " + middle + " | " + right + " "))
count += 1
if verbose:
- print "\n" + repr(count) + " lines"
+ print(("\n" + repr(count) + " lines"))
def _matches(self, item, leftRe, rightRe):
""" Private method that runs the given regexps over a raw concordance
@@ -798,10 +798,10 @@ def format(self, output, maxKeyLength=20, threshold=-1, showFirstX=-1,
x = 0
other = 0
total = 0
- print name
- print "-"*(maxKeyLength + 7)
+ print(name)
+ print(("-"*(maxKeyLength + 7)))
# for each key:
- for key in dist.keys():
+ for key in list(dist.keys()):
# keep track of how many samples shown, if using the showFirstX
# option
#if showFirstX > 0 and x >= showFirstX:
@@ -823,7 +823,7 @@ def format(self, output, maxKeyLength=20, threshold=-1, showFirstX=-1,
if count < threshold or (showFirstX > 0 and x >= showFirstX):
other += count
else:
- print key + " "*(maxKeyLength - len(key) + 1) + countString
+ print((key + " "*(maxKeyLength - len(key) + 1) + countString))
x += 1
if countOther:
@@ -833,7 +833,7 @@ def format(self, output, maxKeyLength=20, threshold=-1, showFirstX=-1,
else:
count = other
countString = str(count)
- print self._OTHER_TEXT + " "*(maxKeyLength - len(self._OTHER_TEXT) + 1) + countString
+ print((self._OTHER_TEXT + " "*(maxKeyLength - len(self._OTHER_TEXT) + 1) + countString))
if showTotal:
if normalise:
count = 1.0 * total
@@ -841,21 +841,21 @@ def format(self, output, maxKeyLength=20, threshold=-1, showFirstX=-1,
else:
count = total
countString = str(count)
- print self._TOTAL_TEXT + " "*(maxKeyLength - len(self._TOTAL_TEXT) + 1) + countString
- print ""
+ print((self._TOTAL_TEXT + " "*(maxKeyLength - len(self._TOTAL_TEXT) + 1) + countString))
+ print("")
def demo():
"""
Demonstrates how to use IndexConcordance and Aggregator.
"""
- print "Reading Brown Corpus into memory..."
+ print("Reading Brown Corpus into memory...")
corpus = brown.tagged_sents('a')
- print "Generating index..."
+ print("Generating index...")
ic = IndexConcordance(corpus)
- print "Showing all occurences of 'plasma' in the Brown Corpus..."
+ print("Showing all occurences of 'plasma' in the Brown Corpus...")
ic.formatted(middleRegexp="^plasma/.*", verbose=True)
- print "Investigating the collocates of 'deal' and derivatives..."
+ print("Investigating the collocates of 'deal' and derivatives...")
agg = Aggregator()
agg.add(ic.raw(middleRegexp="^deal", leftContextLength=1, rightContextLength=0,
leftRegexp="^(\w|\s|/)*$"), "Brown Corpus 'deal' left collocates")
diff --git a/nltk_contrib/coref/__init__.py b/nltk_contrib/coref/__init__.py
index 898a163..5fec319 100644
--- a/nltk_contrib/coref/__init__.py
+++ b/nltk_contrib/coref/__init__.py
@@ -31,7 +31,7 @@
# Import top-level functionality into top-level namespace
# Processing packages -- these all define __all__ carefully.
-from api import *
+from .api import *
import nltk.data
from nltk.corpus.util import LazyCorpusLoader
@@ -39,6 +39,6 @@
if os.environ.get('NLTK_DATA_MUC6') \
and os.environ.get('NLTK_DATA_MUC6') not in nltk.data.path:
nltk.data.path.insert(0, os.environ.get('NLTK_DATA_MUC6'))
-from muc import MUCCorpusReader
+from .muc import MUCCorpusReader
muc6 = LazyCorpusLoader('muc6/',
MUCCorpusReader, r'.*\.ne\..*\.sgm')
\ No newline at end of file
diff --git a/nltk_contrib/coref/ace2.py b/nltk_contrib/coref/ace2.py
index f1b6b5d..7c3459a 100644
--- a/nltk_contrib/coref/ace2.py
+++ b/nltk_contrib/coref/ace2.py
@@ -243,17 +243,17 @@ def _demo(root, file):
try:
reader = ACE2CorpusReader(root, file)
- print 'Sentences for %s:' % (file)
+ print(('Sentences for %s:' % (file)))
for sent in reader.sents():
- print ' %s' % (sent)
- print
- print 'Words for %s:' % (file)
+ print((' %s' % (sent)))
+ print()
+ print(('Words for %s:' % (file)))
for word in reader.words():
- print ' %s' % (word)
- print
- except Exception, e:
- print 'Error encountered while running demo for %s: %s' % (file, e)
- print
+ print((' %s' % (word)))
+ print()
+ except Exception as e:
+ print(('Error encountered while running demo for %s: %s' % (file, e)))
+ print()
def demo():
"""
diff --git a/nltk_contrib/coref/api.py b/nltk_contrib/coref/api.py
index 9fefc3b..16a92ea 100644
--- a/nltk_contrib/coref/api.py
+++ b/nltk_contrib/coref/api.py
@@ -27,7 +27,7 @@ class TrainableI(object):
"""
def __init__(self):
if self.__class__ == TrainableI:
- raise AssertionError, "Interfaces can't be instantiated"
+ raise AssertionError("Interfaces can't be instantiated")
def train(self, labeled_sequence, test_sequence=None,
unlabeled_sequence=None, **kwargs):
@@ -54,7 +54,7 @@ class HiddenMarkovModelChunkTaggerTransformI(HiddenMarkovModelTaggerTransformI):
# Inherit the superclass documentation.
def __init__(self):
if self.__class__ == HiddenMarkovModelChunkTaggerTransformI:
- raise AssertionError, "Interfaces can't be instantiated"
+ raise AssertionError("Interfaces can't be instantiated")
def path2tags(self, path):
"""
@@ -78,7 +78,7 @@ class CorpusReaderDecoratorI(CorpusReader):
"""
def __init__(self):
if self.__class__ == CorpusReaderDecorator:
- raise AssertionError, "Interfaces can't be instantiated"
+ raise AssertionError("Interfaces can't be instantiated")
def reader(self):
"""
@@ -115,7 +115,7 @@ def __new__(self, s, **kwargs):
def __init__(self, s, **kwargs):
if self.__class__ == NamedEntityI:
- raise AssertionError, "Interfaces can't be instantiated"
+ raise AssertionError("Interfaces can't be instantiated")
self._iob_tag = kwargs.get('iob_tag', self.BEGINS)
def iob_in(self):
@@ -159,7 +159,7 @@ class ChunkTaggerI(TaggerI):
"""
def __init__(self):
if self.__class__ == ChunkTaggerI:
- raise AssertionError, "Interfaces can't be instantiated"
+ raise AssertionError("Interfaces can't be instantiated")
@@ -172,7 +172,7 @@ class CorefResolverI(object):
"""
def __init__(self):
if self.__class__ == CorefResolverI:
- raise AssertionError, "Interfaces can't be instantiated"
+ raise AssertionError("Interfaces can't be instantiated")
def mentions(self, sentences):
"""
@@ -255,7 +255,7 @@ def resolve(self, sentences):
class ChunkTaggerI(TaggerI, ChunkParserI):
def __init__(self):
if self.__class__ == ChunkTaggerI:
- raise AssertionError, "Interfaces can't be instantiated"
+ raise AssertionError("Interfaces can't be instantiated")
def tag(self, sent):
"""
@@ -310,7 +310,7 @@ def __init__(self, feature_detector, labeled_sequence, classifier_builder):
@type classifier_builder: C{function}
"""
if self.__class__ == AbstractClassifierBasedTagger:
- raise AssertionError, "Interfaces can't be instantiated"
+ raise AssertionError("Interfaces can't be instantiated")
ClassifierBasedTagger.__init__(self, feature_detector,
labeled_sequence, classifier_builder)
diff --git a/nltk_contrib/coref/chunk.py b/nltk_contrib/coref/chunk.py
index 2ed9ef3..329a8e0 100644
--- a/nltk_contrib/coref/chunk.py
+++ b/nltk_contrib/coref/chunk.py
@@ -71,13 +71,13 @@ def __init__(self, tokens, index=0, history=None, **kwargs):
if window > 0 and index > 0:
prev_feats = \
self.__class__(tokens, index - 1, history, window=window - 1)
- for key, val in prev_feats.items():
+ for key, val in list(prev_feats.items()):
if not key.startswith('next_') and key != 'word':
self['prev_%s' % key] = val
if window > 0 and index < len(tokens) - 1:
next_feats = self.__class__(tokens, index + 1, window=window - 1)
- for key, val in next_feats.items():
+ for key, val in list(next_feats.items()):
if not key.startswith('prev_') and key != 'word':
self['next_%s' % key] = val
@@ -99,16 +99,16 @@ def parse(self, sent):
return self.__iob2tree(self.tag(sent))
def batch_parse(self, sents):
- return map(self.__iob2tree, self.batch_tag(sents))
+ return list(map(self.__iob2tree, self.batch_tag(sents)))
def chunk(self, sent):
return self.__tree2chunks(self.parse(sent))
def batch_chunk(self, sents):
- return map(self.__tree2chunks, self.batch_parse(sents))
+ return list(map(self.__tree2chunks, self.batch_parse(sents)))
def __iob2tree(self, tagged_sent):
- return tokens2tree(map(flatten, tagged_sent), self.chunk_types)
+ return tokens2tree(list(map(flatten, tagged_sent)), self.chunk_types)
def __tree2chunks(self, tree):
chunks = []
@@ -132,7 +132,7 @@ class NaiveBayesChunkTagger(ClassifierBasedTagger, AbstractChunkTagger):
def train(cls, iob_sents, **kwargs):
fd = kwargs.get('feature_detector', ChunkTaggerFeatureDetector)
chunk_types = kwargs.get('chunk_types', _DEFAULT_CHUNK_TYPES)
- train = LazyMap(lambda sent: map(unflatten, sent), iob_sents)
+ train = LazyMap(lambda sent: list(map(unflatten, sent)), iob_sents)
chunker = cls(fd, train, NaiveBayesClassifier.train)
chunker.chunk_types = chunk_types
return chunker
@@ -157,7 +157,7 @@ def __maxent_train(fs):
count_cutoff=count_cutoff,
min_lldelta=min_lldelta,
trace=trace)
- train = LazyMap(lambda sent: map(unflatten, sent), iob_sents)
+ train = LazyMap(lambda sent: list(map(unflatten, sent)), iob_sents)
chunker = cls(fd, train, __maxent_train)
chunker.chunk_types = chunk_types
return chunker
@@ -182,7 +182,7 @@ def train(cls, iob_sents, **kwargs):
else:
trace = 0
- train = LazyMap(lambda sent: map(unflatten, sent), iob_sents)
+ train = LazyMap(lambda sent: list(map(unflatten, sent)), iob_sents)
mallet_home = os.environ.get('MALLET_HOME', '/usr/local/mallet-0.4')
nltk.classify.mallet.config_mallet(mallet_home)
@@ -205,7 +205,7 @@ def tokens2tree(tokens, chunk_types=_DEFAULT_CHUNK_TYPES, top_node='S'):
for token in tokens:
token, tag = unflatten(token)
- if isinstance(token, basestring):
+ if isinstance(token, str):
word = token
pos = None
elif isinstance(token, tuple):
@@ -254,32 +254,32 @@ def unflatten(token):
def test_chunk_tagger(chunk_tagger, iob_sents, **kwargs):
chunk_types = chunk_tagger.chunk_types
- correct = map(lambda sent: tokens2tree(sent, chunk_types), iob_sents)
- guesses = chunk_tagger.batch_parse(map(lambda c: c.leaves(), correct))
+ correct = [tokens2tree(sent, chunk_types) for sent in iob_sents]
+ guesses = chunk_tagger.batch_parse([c.leaves() for c in correct])
chunkscore = ChunkScore()
for c, g in zip(correct, guesses):
chunkscore.score(c, g)
if kwargs.get('verbose'):
- guesses = chunk_tagger.batch_tag(map(lambda c: c.leaves(), correct))
+ guesses = chunk_tagger.batch_tag([c.leaves() for c in correct])
correct = iob_sents
- print
+ print()
for c, g in zip(correct, guesses):
- for tokc, tokg in zip(map(flatten, c), map(flatten, g)):
+ for tokc, tokg in zip(list(map(flatten, c)), list(map(flatten, g))):
word = tokc[0]
iobc = tokc[-1]
iobg = tokg[-1]
star = ''
if iobg != iobc: star = '*'
- print '%3s %20s %20s %20s' % (star, word, iobc, iobg)
- print
+ print(('%3s %20s %20s %20s' % (star, word, iobc, iobg)))
+ print()
- print 'Precision: %.2f' % chunkscore.precision()
- print 'Recall: %.2f' % chunkscore.recall()
- print 'Accuracy: %.2f' % chunkscore.accuracy()
- print 'F-measure: %.2f' % chunkscore.f_measure()
+ print(('Precision: %.2f' % chunkscore.precision()))
+ print(('Recall: %.2f' % chunkscore.recall()))
+ print(('Accuracy: %.2f' % chunkscore.accuracy()))
+ print(('F-measure: %.2f' % chunkscore.f_measure()))
return chunkscore
@@ -287,11 +287,11 @@ def unittest(verbose=False):
import doctest
failed, tested = doctest.testfile('test/chunk.doctest', verbose)
if not verbose:
- print '%d passed and %d failed.' % (tested - failed, failed)
+ print(('%d passed and %d failed.' % (tested - failed, failed)))
if failed == 0:
- print 'Test passed.'
+ print('Test passed.')
else:
- print '***Test Failed*** %d failures.' % failed
+ print(('***Test Failed*** %d failures.' % failed))
return (tested - failed), failed
def demo():
@@ -304,7 +304,7 @@ def demo():
import optparse
try:
- import cPickle as pickle
+ import pickle as pickle
except:
import pickle
@@ -342,12 +342,12 @@ def demo():
num_test = int(m.group('test') or 0)
options.numsents = (num_train, num_test)
else:
- raise ValueError, "malformed argument for option -n"
+ raise ValueError("malformed argument for option -n")
else:
options.numsents = (None, None)
- except ValueError, v:
- print 'error: %s' % v.message
+ except ValueError as v:
+ print(('error: %s' % v.message))
parser.print_help()
if options.unittest:
@@ -369,8 +369,8 @@ def demo():
trainer = eval(options.trainer)
if options.verbose:
- print 'Training %s with %d sentences' % \
- (options.trainer, num_train)
+ print(('Training %s with %d sentences' % \
+ (options.trainer, num_train)))
chunker = trainer(train, verbose=options.verbose)
if options.model:
@@ -388,12 +388,12 @@ def demo():
stream.close()
chunker = pickle.load(_open(options.model, 'r'))
if options.verbose:
- print 'Model saved as %s' % options.model
- except Exception, e:
- print "error: %s" % e
+ print(('Model saved as %s' % options.model))
+ except Exception as e:
+ print(("error: %s" % e))
if test:
if options.verbose:
- print 'Testing %s on %d sentences' % \
- (options.trainer, num_test)
+ print(('Testing %s on %d sentences' % \
+ (options.trainer, num_test)))
chunker.test(test, verbose=options.verbose)
\ No newline at end of file
diff --git a/nltk_contrib/coref/data.py b/nltk_contrib/coref/data.py
index f399fe3..e3bf631 100644
--- a/nltk_contrib/coref/data.py
+++ b/nltk_contrib/coref/data.py
@@ -9,14 +9,14 @@
from gzip import GzipFile, READ as GZ_READ, WRITE as GZ_WRITE
try:
- import cPickle as pickle
+ import pickle as pickle
except:
import pickle
try:
- from cStringIO import StringIO
+ from io import StringIO
except:
- from StringIO import StringIO
+ from io import StringIO
class BufferedGzipFile(GzipFile):
"""
diff --git a/nltk_contrib/coref/features.py b/nltk_contrib/coref/features.py
index 899c729..9695580 100644
--- a/nltk_contrib/coref/features.py
+++ b/nltk_contrib/coref/features.py
@@ -432,7 +432,7 @@ def demo():
wt = word_type(word)
if len(wt) == 0: wt = None
if '*' in word: continue
- print "%-20s\t%s" % (word, wt)
+ print(("%-20s\t%s" % (word, wt)))
if __name__ == '__main__':
demo()
diff --git a/nltk_contrib/coref/freiburg.py b/nltk_contrib/coref/freiburg.py
index 598b907..18bc4dd 100644
--- a/nltk_contrib/coref/freiburg.py
+++ b/nltk_contrib/coref/freiburg.py
@@ -238,21 +238,21 @@ def _demo(root, file):
try:
reader = FreiburgCorpusReader(root, file)
- print 'Paragraphs for %s:' % (file)
+ print(('Paragraphs for %s:' % (file)))
for para in reader.paras():
- print ' %s' % (para)
- print
- print 'Sentences for %s:' % (file)
+ print((' %s' % (para)))
+ print()
+ print(('Sentences for %s:' % (file)))
for sent in reader.sents():
- print ' %s' % (sent)
- print
- print 'Words for %s:' % (file)
+ print((' %s' % (sent)))
+ print()
+ print(('Words for %s:' % (file)))
for word in reader.words():
- print ' %s/%s' % (word, word.pos())
- print
- except Exception, e:
- print 'Error encountered while running demo for %s: %s' % (file, e)
- print
+ print((' %s/%s' % (word, word.pos())))
+ print()
+ except Exception as e:
+ print(('Error encountered while running demo for %s: %s' % (file, e)))
+ print()
def demo():
"""
diff --git a/nltk_contrib/coref/muc.py b/nltk_contrib/coref/muc.py
index 528acbc..14d7992 100644
--- a/nltk_contrib/coref/muc.py
+++ b/nltk_contrib/coref/muc.py
@@ -99,10 +99,10 @@ class MUCDocument:
# def __init__(self, text, docno=None, dateline=None, headline=''):
def __init__(self, **text):
self.text = None
- if isinstance(text, basestring):
+ if isinstance(text, str):
self.text = text
elif isinstance(text, dict):
- for key, val in text.items():
+ for key, val in list(text.items()):
setattr(self, key, val)
else:
raise
@@ -154,7 +154,7 @@ def raw(self, fileids=None):
"""
if fileids is None:
fileids = self._fileids
- elif isinstance(fileids, basestring):
+ elif isinstance(fileids, str):
fileids = [fileids]
return concat([self.open(f).read() for f in fileids])
@@ -221,7 +221,7 @@ def __chunked_sent(sent):
chunks.append([(word, None) for word in token[0]])
# If the token's contents is a string, append it as a
# word/tag tuple.
- elif isinstance(token[0], basestring):
+ elif isinstance(token[0], str):
chunks.append((token[0], None))
# Something bad happened.
else:
@@ -416,7 +416,7 @@ def __chunked_sent(sent):
def _read_parsed_block(self, stream):
# TODO: LazyMap but StreamBackedCorpusView doesn't support
# AbstractLazySequence currently.
- return map(self._parse, self._read_block(stream))
+ return list(map(self._parse, self._read_block(stream)))
def _parse(self, doc):
"""
@@ -488,7 +488,7 @@ def tree2tuple(tree):
# Get the leaves.
s = (tree.leaves(),)
# Get the label
- if isinstance(tree.node, basestring):
+ if isinstance(tree.node, str):
node = (tree.node,)
elif isinstance(tree.node, tuple):
node = tree.node
@@ -497,7 +497,7 @@ def tree2tuple(tree):
# Merge the leaves and the label.
return s + node
# If the tree is a string just convert it to a tuple.
- elif isinstance(tree, basestring):
+ elif isinstance(tree, str):
return (tree, None)
# Something bad happened.
else:
@@ -513,7 +513,7 @@ def __fix_tokenization(sents):
sents[index] += sents[index + next]
sents[index + next] = ''
next += 1
- sents = filter(None, sents)
+ sents = [_f for _f in sents if _f]
return sents
if s:
tree = Tree(top_node, [])
@@ -554,7 +554,7 @@ def _muc_read_words(s, top_node):
else:
stack[-1].extend(_WORD_TOKENIZER.tokenize(word))
if len(stack) != 1:
- print stack
+ print(stack)
assert len(stack) == 1
return stack[0]
@@ -567,25 +567,25 @@ def demo(**kwargs):
muc6 = LazyCorpusLoader('muc6/', MUCCorpusReader, muc6_documents)
for sent in muc6.iob_sents()[:]:
for word in sent:
- print word
- print
- print
+ print(word)
+ print()
+ print()
for sent in muc6.mentions(depth=None):
for mention in sent:
- print mention
- if sent: print
- print
+ print(mention)
+ if sent: print()
+ print()
muc7 = LazyCorpusLoader('muc7/', MUCCorpusReader, muc7_documents)
for sent in muc7.iob_sents()[:]:
for word in sent:
- print word
- print
- print
+ print(word)
+ print()
+ print()
for sent in muc7.mentions(depth=None):
for mention in sent:
- print mention
- if sent: print
- print
+ print(mention)
+ if sent: print()
+ print()
if __name__ == '__main__':
demo()
diff --git a/nltk_contrib/coref/muc7.py b/nltk_contrib/coref/muc7.py
index 524e9b1..c8fd5af 100644
--- a/nltk_contrib/coref/muc7.py
+++ b/nltk_contrib/coref/muc7.py
@@ -273,21 +273,21 @@ def _demo(root, file):
try:
reader = MUC7CorpusReader(root, file)
- print 'Paragraphs for %s:' % (file)
+ print(('Paragraphs for %s:' % (file)))
for para in reader.paras():
- print ' %s' % (para)
- print
- print 'Sentences for %s:' % (file)
+ print((' %s' % (para)))
+ print()
+ print(('Sentences for %s:' % (file)))
for sent in reader.sents():
- print ' %s' % (sent)
- print
- print 'Words for %s:' % (file)
+ print((' %s' % (sent)))
+ print()
+ print(('Words for %s:' % (file)))
for word in reader.words():
- print ' %s' % (word)
- print
- except Exception, e:
- print 'Error encountered while running demo for %s: %s' % (file, e)
- print
+ print((' %s' % (word)))
+ print()
+ except Exception as e:
+ print(('Error encountered while running demo for %s: %s' % (file, e)))
+ print()
def demo():
"""
diff --git a/nltk_contrib/coref/ne.py b/nltk_contrib/coref/ne.py
index 67f0abd..bb12b1c 100644
--- a/nltk_contrib/coref/ne.py
+++ b/nltk_contrib/coref/ne.py
@@ -159,13 +159,13 @@ def __init__(self, tokens, index=0, history=None, **kwargs):
if window > 0 and index > 0:
prev_feats = \
self.__class__(tokens, index - 1, history, window=window - 1)
- for key, val in prev_feats.items():
+ for key, val in list(prev_feats.items()):
if not key.startswith('next_') and not key == 'word':
self['prev_%s' % key] = val
if window > 0 and index < len(tokens) - 1:
next_feats = self.__class__(tokens, index + 1, window=window - 1)
- for key, val in next_feats.items():
+ for key, val in list(next_feats.items()):
if not key.startswith('prev_') and not key == 'word':
self['next_%s' % key] = val
@@ -184,11 +184,11 @@ def unittest(verbose=False):
import doctest
failed, passed = doctest.testfile('test/ne.doctest', verbose)
if not verbose:
- print '%d passed and %d failed.' % (failed, passed)
+ print(('%d passed and %d failed.' % (failed, passed)))
if failed == 0:
- print 'Test passed.'
+ print('Test passed.')
else:
- print '***Test Failed*** %d failures.' % failed
+ print(('***Test Failed*** %d failures.' % failed))
return failed, passed
_NE_CHUNK_TYPES = ('PERSON', 'LOCATION', 'ORGANIZATION', 'MONEY')
@@ -199,7 +199,7 @@ def unittest(verbose=False):
import optparse
try:
- import cPickle as pickle
+ import pickle as pickle
except:
import pickle
@@ -244,7 +244,7 @@ def unittest(verbose=False):
num_test = int(m.group('test') or 0)
options.numsents = (num_train, num_test)
else:
- raise ValueError, "malformed argument for option -n"
+ raise ValueError("malformed argument for option -n")
else:
options.numsents = (None, None)
@@ -256,10 +256,10 @@ def unittest(verbose=False):
file_test = m.group('test')
options.extract = (file_train, file_test)
else:
- raise ValueError, "malformed argument for option -e"
+ raise ValueError("malformed argument for option -e")
- except ValueError, v:
- print 'error: %s' % v.message
+ except ValueError as v:
+ print(('error: %s' % v.message))
parser.print_help()
if options.unittest:
@@ -292,9 +292,9 @@ def unittest(verbose=False):
for index in range(len(tokens)):
tag = tokens[index][-1]
feats = feature_detector(tokens, index, history)
- keys.update(feats.keys())
+ keys.update(list(feats.keys()))
stream.write('%s %s\n' % (tag, ' '.join(['%s=%s' % (k, re.escape(str(v)))
- for k, v in feats.items()])))
+ for k, v in list(feats.items())])))
history.append(tag)
history = []
stream.close()
@@ -306,9 +306,9 @@ def unittest(verbose=False):
for index in range(len(tokens)):
tag = tokens[index][-1]
feats = feature_detector(tokens, index, history)
- keys.update(feats.keys())
+ keys.update(list(feats.keys()))
stream.write('%s %s\n' % (tag, ' '.join(['%s=%s' % (k, re.escape(str(v)))
- for k, v in feats.items()])))
+ for k, v in list(feats.items())])))
history.append(tag)
history = []
stream.close()
@@ -343,9 +343,9 @@ def unittest(verbose=False):
reader = MXPostTaggerCorpusReader(eval(options.corpus))
iob_sents = reader.iob_sents()
tagged_sents = reader.tagged_sents()
- corpus = LazyMap(lambda (iob_sent, tagged_sent):
+ corpus = LazyMap(lambda iob_sent_tagged_sent:
[(iw, tt, iob) for ((iw, iob), (tw, tt))
- in zip(iob_sent, tagged_sent)],
+ in zip(iob_sent_tagged_sent[0], iob_sent_tagged_sent[1])],
LazyZip(iob_sents, tagged_sents))
else:
iob_sents = eval(options.corpus).iob_sents()
@@ -360,8 +360,8 @@ def unittest(verbose=False):
trainer = eval(options.trainer)
if options.verbose:
- print 'Training %s with %d sentences' % \
- (options.trainer, num_train)
+ print(('Training %s with %d sentences' % \
+ (options.trainer, num_train)))
ner = trainer(train,
feature_detector=NERChunkTaggerFeatureDetector,
chunk_types=_NE_CHUNK_TYPES,
@@ -382,12 +382,12 @@ def unittest(verbose=False):
stream.close()
ner = pickle.load(_open(options.model, 'r'))
if options.verbose:
- print 'Model saved as %s' % options.model
- except Exception, e:
- print "error: %s" % e
+ print(('Model saved as %s' % options.model))
+ except Exception as e:
+ print(("error: %s" % e))
if test:
if options.verbose:
- print 'Testing %s on %d sentences' % \
- (options.trainer, num_test)
+ print(('Testing %s on %d sentences' % \
+ (options.trainer, num_test)))
ner.test(test, verbose=options.verbose)
diff --git a/nltk_contrib/coref/resolve.py b/nltk_contrib/coref/resolve.py
index 805ee38..329ea22 100644
--- a/nltk_contrib/coref/resolve.py
+++ b/nltk_contrib/coref/resolve.py
@@ -10,14 +10,14 @@
import optparse
try:
- import cPickle as pickle
+ import pickle as pickle
except:
import pickle
try:
- from cStringIO import StringIO
+ from io import StringIO
except:
- from StringIO import StringIO
+ from io import StringIO
from nltk.util import LazyMap, LazyZip, LazyConcatenation, LazyEnumerate
@@ -129,23 +129,23 @@ def baseline_coref_resolver_demo():
resolved_mentions = resolver.resolve_mentions(mentions)
resolved_discourse = resolver.resolve(sents)
- print 'Baseline coref resolver demo...'
- print 'Mentions:'
+ print('Baseline coref resolver demo...')
+ print('Mentions:')
for mention in mentions:
- print mention
- print
- print 'Resolved mentions:'
+ print(mention)
+ print()
+ print('Resolved mentions:')
for mention in resolved_mentions:
- print mention
- print
- print 'Resolved discourse:'
+ print(mention)
+ print()
+ print('Resolved discourse:')
for sent in resolved_discourse:
- print sent
- print
- print
+ print(sent)
+ print()
+ print()
def demo():
- print 'Demo...'
+ print('Demo...')
baseline_coref_resolver_demo()
# muc6_test = LazyCorpusLoader(
# 'muc6', MUC6CorpusReader,
@@ -184,7 +184,7 @@ def demo():
# print
if __name__ == '__main__':
- print time.ctime(time.time())
+ print((time.ctime(time.time())))
parser = optparse.OptionParser()
parser.add_option('-d', '--demo', action='store_true', dest='demo',
@@ -322,9 +322,9 @@ def join(chunk):
pred_tags = model.tag(words)
for x, y, z in zip(pred_tags, gold_tags, words):
if x == y:
- print ' ', (x, y, z)
+ print((' ', (x, y, z)))
else:
- print '* ', (x, y, z)
+ print(('* ', (x, y, z)))
elif options.train_ner == 'classifier2':
muc6_train = LazyCorpusLoader(
@@ -352,11 +352,11 @@ def join(chunk):
pred_tags = model.tag(words)
for x, y, z in zip(pred_tags, gold_tags, words):
if x == y:
- print ' ', (x, y, z)
+ print((' ', (x, y, z)))
else:
- print '* ', (x, y, z)
+ print(('* ', (x, y, z)))
elif options.demo:
demo()
- print time.ctime(time.time())
+ print((time.ctime(time.time())))
diff --git a/nltk_contrib/coref/tag.py b/nltk_contrib/coref/tag.py
index f78b179..291bb8d 100644
--- a/nltk_contrib/coref/tag.py
+++ b/nltk_contrib/coref/tag.py
@@ -3,9 +3,9 @@
import subprocess
try:
- from cStringIO import StringIO
+ from io import StringIO
except:
- from StringIO import StringIO
+ from io import StringIO
from nltk.util import LazyMap, LazyConcatenation
from nltk.internals import find_binary, java
@@ -48,7 +48,7 @@ def __init__(self, reader, **kwargs):
def tagged_sents(self):
sents = self.sents()
- batch_indices = range(len(sents) / 1024 + 1)
+ batch_indices = list(range(len(sents) / 1024 + 1))
return LazyConcatenation(LazyMap(lambda i:
self._tagger.batch_tag(sents[i * 1024: i * 1024 + 1024]),
batch_indices))
@@ -67,7 +67,7 @@ def batch_tag(self, sents):
def config_mxpost(mxpost_home=None):
global _mxpost_classpath, _mxpost_home
classpath = os.environ.get('CLASSPATH', '').split(':')
- mxpost_jar = filter(lambda c: c.endswith('mxpost.jar'), classpath)
+ mxpost_jar = [c for c in classpath if c.endswith('mxpost.jar')]
if mxpost_jar:
_mxpost_home = os.path.dirname(mxpost_jar[0])
_mxpost_classpath = mxpost_jar[0]
@@ -83,7 +83,7 @@ def config_mxpost(mxpost_home=None):
else:
_mxpost_home = None
_mxpost_classpath = None
- raise Exception, "can't find mxpost.jar"
+ raise Exception("can't find mxpost.jar")
def call_mxpost(classpath=None, stdin=None, stdout=None, stderr=None,
blocking=False):
@@ -103,14 +103,14 @@ def call_mxpost(classpath=None, stdin=None, stdout=None, stderr=None,
def mxpost_parse_output(mxpost_output):
result = []
mxpost_output = mxpost_output.strip()
- for sent in filter(None, mxpost_output.split('\n')):
- tokens = filter(None, re.split(r'\s+', sent))
+ for sent in [_f for _f in mxpost_output.split('\n') if _f]:
+ tokens = [_f for _f in re.split(r'\s+', sent) if _f]
if tokens:
result.append([])
for token in tokens:
m = _MXPOST_OUTPUT_RE.match(token)
if not m:
- raise Exception, "invalid mxpost tag pattern: %s, %s" % (token, tokens)
+ raise Exception("invalid mxpost tag pattern: %s, %s" % (token, tokens))
word = m.group('word')
tag = m.group('tag')
result[-1].append((word, tag))
@@ -122,7 +122,7 @@ def mxpost_tag(sents, **kwargs):
p.communicate('\n'.join([' '.join(sent) for sent in sents]))
rc = p.returncode
if rc != 0:
- raise Exception, 'exited with non-zero status %s' % rc
+ raise Exception('exited with non-zero status %s' % rc)
if kwargs.get('verbose'):
- print 'warning: %s' % stderr
+ print(('warning: %s' % stderr))
return mxpost_parse_output(stdout)
\ No newline at end of file
diff --git a/nltk_contrib/coref/train.py b/nltk_contrib/coref/train.py
index bd36e44..99c1273 100644
--- a/nltk_contrib/coref/train.py
+++ b/nltk_contrib/coref/train.py
@@ -19,22 +19,22 @@
from nltk_contrib.coref.data import BufferedGzipFile
try:
- import cPickle as pickle
+ import pickle as pickle
except:
import pickle
try:
- from cStringIO import StringIO
+ from io import StringIO
except:
- from StringIO import StringIO
+ from io import StringIO
class LidstoneProbDistFactory(LidstoneProbDist):
def __init__(self, fd, *args, **kwargs):
LidstoneProbDist.__init__(self, fd, 0.01, args[-1])
samples = fd.samples()
- self._probs = dict(zip([0]*len(samples), samples))
- self._logprobs = dict(zip([0]*len(samples), samples))
+ self._probs = dict(list(zip([0]*len(samples), samples)))
+ self._logprobs = dict(list(zip([0]*len(samples), samples)))
for sample in samples:
self._logprobs[sample] = LidstoneProbDist.logprob(self, sample)
self._probs[sample] = LidstoneProbDist.prob(self, sample)
@@ -84,7 +84,7 @@ def __featurize(tagged_token):
untagged_sequence = LazyMap(__untag, LazyMap(__featurize, test_sequence))
predicted_tags = LazyMap(self.classify, untagged_sequence)
acc = accuracy(correct_tags, predicted_tags)
- print 'accuracy over %d tokens: %.2f' % (count, acc)
+ print(('accuracy over %d tokens: %.2f' % (count, acc)))
class MaxentClassifierFactory(object):
@@ -125,37 +125,37 @@ def train_model(train_class, labeled_sequence, test_sequence, pickle_file,
verbose or include printed output.
@type verbose: C{bool}
"""
- print 'Training ', train_class
- print 'Loading training data (supervised)...'
+ print(('Training ', train_class))
+ print('Loading training data (supervised)...')
labeled_sequence = labeled_sequence[:num_train_sents]
sent_count = len(labeled_sequence)
word_count = sum([len(sent) for sent in labeled_sequence])
- print '%s sentences' % (sent_count)
- print '%s words' % (word_count)
+ print(('%s sentences' % (sent_count)))
+ print(('%s words' % (word_count)))
- print 'Training...'
+ print('Training...')
start = time.time()
model = train_class.train(labeled_sequence, **kwargs)
end = time.time()
- print 'Training time: %.3fs' % (end - start)
- print 'Training time per sentence: %.3fs' % (float(end - start) / sent_count)
- print 'Training time per word: %.3fs' % (float(end - start) / word_count)
+ print(('Training time: %.3fs' % (end - start)))
+ print(('Training time per sentence: %.3fs' % (float(end - start) / sent_count)))
+ print(('Training time per word: %.3fs' % (float(end - start) / word_count)))
- print 'Loading test data...'
+ print('Loading test data...')
test_sequence = test_sequence[:num_test_sents]
sent_count = len(test_sequence)
word_count = sum([len(sent) for sent in test_sequence])
- print '%s sentences' % (sent_count)
- print '%s words' % (word_count)
+ print(('%s sentences' % (sent_count)))
+ print(('%s words' % (word_count)))
try:
- print 'Saving model...'
+ print('Saving model...')
if isinstance(pickle_file, str):
if pickle_file.endswith('.gz'):
_open = BufferedGzipFile
@@ -165,23 +165,23 @@ def train_model(train_class, labeled_sequence, test_sequence, pickle_file,
pickle.dump(model, stream)
stream.close()
model = pickle.load(_open(pickle_file, 'rb'))
- print 'Model saved as %s' % pickle_file
+ print(('Model saved as %s' % pickle_file))
else:
stream = StringIO()
pickle.dump(model, stream)
stream = StringIO(stream.getvalue())
model = pickle.load(stream)
- except Exception, e:
- print 'Error saving model, %s' % str(e)
+ except Exception as e:
+ print(('Error saving model, %s' % str(e)))
- print 'Testing...'
+ print('Testing...')
start = time.time()
model.test(test_sequence, **kwargs)
end = time.time()
- print 'Test time: %.3fs' % (end - start)
- print 'Test time per sentence: %.3fs' % (float(end - start) / sent_count)
- print 'Test time per word: %.3fs' % (float(end - start) / word_count)
+ print(('Test time: %.3fs' % (end - start)))
+ print(('Test time per sentence: %.3fs' % (float(end - start) / sent_count)))
+ print(('Test time per word: %.3fs' % (float(end - start) / word_count)))
return model
diff --git a/nltk_contrib/coref/util.py b/nltk_contrib/coref/util.py
index 75f0d8a..0e3b9be 100644
--- a/nltk_contrib/coref/util.py
+++ b/nltk_contrib/coref/util.py
@@ -8,14 +8,14 @@
import time
try:
- import cPickle as pickle
+ import pickle as pickle
except:
import pickle
try:
- from cStringIO import StringIO
+ from io import StringIO
except:
- from StringIO import StringIO
+ from io import StringIO
from nltk.data import load, find
from nltk.corpus import CorpusReader, BracketParseCorpusReader
@@ -114,7 +114,7 @@ def __init__(self, reader):
def zipzip(*lists):
- return LazyMap(lambda lst: zip(*lst), LazyZip(*lists))
+ return LazyMap(lambda lst: list(zip(*lst)), LazyZip(*lists))
def load_treebank(sections):
treebank_path = os.environ.get('NLTK_TREEBANK', 'treebank/combined')
@@ -133,16 +133,16 @@ def treebank_tagger_demo():
'state_union', PlaintextCorpusReader, r'(?!\.svn).*\.txt')
state_union = TreebankTaggerCorpusReader(state_union)
- print 'Treebank tagger demo...'
- print 'Tagged sentences:'
+ print('Treebank tagger demo...')
+ print('Tagged sentences:')
for sent in state_union.tagged_sents()[500:505]:
- print sent
- print
- print
- print 'Tagged words:'
+ print(sent)
+ print()
+ print()
+ print('Tagged words:')
for word in state_union.tagged_words()[500:505]:
- print word
- print
+ print(word)
+ print()
def treebank_chunk_tagger_demo():
from nltk.corpus.util import LazyCorpusLoader
@@ -153,17 +153,17 @@ def treebank_chunk_tagger_demo():
'state_union', PlaintextCorpusReader, r'(?!\.svn).*\.txt')
state_union = TreebankChunkTaggerCorpusReader(state_union)
- print 'Treebank chunker demo...'
- print 'Chunked sentences:'
+ print('Treebank chunker demo...')
+ print('Chunked sentences:')
for sent in state_union.chunked_sents()[500:505]:
- print sent
- print
- print
- print 'Parsed sentences:'
+ print(sent)
+ print()
+ print()
+ print('Parsed sentences:')
for tree in state_union.parsed_sents()[500:505]:
- print tree
- print
- print
+ print(tree)
+ print()
+ print()
def muc6_chunk_tagger_demo():
from nltk.corpus.util import LazyCorpusLoader
@@ -172,12 +172,12 @@ def muc6_chunk_tagger_demo():
treebank = MUC6NamedEntityChunkTaggerCorpusReader(load_treebank('0[12]'))
- print 'MUC6 named entity chunker demo...'
- print 'Chunked sentences:'
+ print('MUC6 named entity chunker demo...')
+ print('Chunked sentences:')
for sent in treebank.chunked_sents()[:10]:
- print sent
- print
- print
+ print(sent)
+ print()
+ print()
def baseline_chunk_tagger_demo():
from nltk.corpus.util import LazyCorpusLoader
@@ -186,16 +186,16 @@ def baseline_chunk_tagger_demo():
chunker = BaselineNamedEntityChunkTagger()
treebank = load_treebank('0[12]')
- print 'Baseline named entity chunker demo...'
- print 'Chunked sentences:'
+ print('Baseline named entity chunker demo...')
+ print('Chunked sentences:')
for sent in treebank.sents()[:10]:
- print chunker.chunk(sent)
- print
- print 'IOB-tagged sentences:'
+ print((chunker.chunk(sent)))
+ print()
+ print('IOB-tagged sentences:')
for sent in treebank.sents()[:10]:
- print chunker.tag(sent)
- print
- print
+ print((chunker.tag(sent)))
+ print()
+ print()
def demo():
from nltk_contrib.coref.util import treebank_tagger_demo, \
diff --git a/nltk_contrib/dependency/__init__.py b/nltk_contrib/dependency/__init__.py
index 1d3f444..b01c902 100644
--- a/nltk_contrib/dependency/__init__.py
+++ b/nltk_contrib/dependency/__init__.py
@@ -4,4 +4,4 @@
# URL:
# For license information, see LICENSE.TXT
-from deptree import *
+from .deptree import *
diff --git a/nltk_contrib/dependency/deptree.py b/nltk_contrib/dependency/deptree.py
index 8cfd110..857bb8d 100644
--- a/nltk_contrib/dependency/deptree.py
+++ b/nltk_contrib/dependency/deptree.py
@@ -35,7 +35,7 @@ def __init__(self):
def __str__(self):
# return '\n'.join([str(n) for n in self.nodelist])
- return '\n'.join([', '.join(['%s: %15s'%item for item in n.iteritems()]) for n in self.nodelist])
+ return '\n'.join([', '.join(['%s: %15s'%item for item in list(n.items())]) for n in self.nodelist])
def load(self, file):
"""
@@ -151,7 +151,7 @@ def nx_graph(self):
labeled directed graph.
@rtype: C{XDigraph}
"""
- nx_nodelist = range(1, len(self.nodelist))
+ nx_nodelist = list(range(1, len(self.nodelist)))
nx_edgelist = [(n, self._hd(n), self._rel(n))
for n in nx_nodelist if self._hd(n)]
self.nx_labels = {}
@@ -191,7 +191,7 @@ def demo(nx=False):
. . 9 VMOD
""")
tree = dg.deptree()
- print tree.pprint()
+ print((tree.pprint()))
if nx:
#currently doesn't work
try:
diff --git a/nltk_contrib/dependency/ptbconv.py b/nltk_contrib/dependency/ptbconv.py
index 4e3e6ae..ddf066f 100644
--- a/nltk_contrib/dependency/ptbconv.py
+++ b/nltk_contrib/dependency/ptbconv.py
@@ -18,6 +18,7 @@
import math
from nltk.internals import find_binary
import os
+from functools import reduce
OUTPUT_FORMAT = '%s\t%s\t_\t%s\t_\t_\t%s\t%s\t_\t_\n'
@@ -84,7 +85,7 @@ def _run_ptbconv(num, format='D', verbose=False):
(stdout, stderr) = p.communicate()
if verbose:
- print stderr.strip()
+ print(stderr.strip())
return stdout
@@ -94,10 +95,10 @@ def _treebank_path():
[os.environ['NLTK_DATA'], 'corpora', 'treebank'])
def convert_all():
- for i in xrange(199):
- print '%s:' % (i+1),
+ for i in range(199):
+ print('%s:' % (i+1), end=' ')
convert(i+1, 'D', True, True)
if __name__ == '__main__':
- print convert(1, 'D')
+ print(convert(1, 'D'))
\ No newline at end of file
diff --git a/nltk_contrib/dependency/util.py b/nltk_contrib/dependency/util.py
index 6695530..a4f9c6b 100644
--- a/nltk_contrib/dependency/util.py
+++ b/nltk_contrib/dependency/util.py
@@ -5,7 +5,7 @@
from nltk import tokenize
from itertools import islice
import os
-from deptree import DepGraph
+from .deptree import DepGraph
from nltk.stem.wordnet import WordNetLemmatizer
def tag2tab(s, sep='/'):
@@ -60,8 +60,8 @@ def conll_to_depgraph(input_str, stem=False, verbose=False):
assert depgraph_input, 'depgraph_input is empty'
if verbose:
- print 'Begin DepGraph creation'
- print 'depgraph_input=\n%s' % depgraph_input
+ print('Begin DepGraph creation')
+ print('depgraph_input=\n%s' % depgraph_input)
return DepGraph().read(depgraph_input)
@@ -79,7 +79,7 @@ def demo():
#s = ''
for sent in islice(tabtagged(), 3):
for line in sent:
- print line,
+ print(line, end=' ')
#s += ''.join(sent)
#print >>f, s
#f.close()
diff --git a/nltk_contrib/featuredemo.py b/nltk_contrib/featuredemo.py
index e94d685..e06b406 100755
--- a/nltk_contrib/featuredemo.py
+++ b/nltk_contrib/featuredemo.py
@@ -13,7 +13,7 @@
def text_parse(grammar, sent, trace=2, drawtrees=False, latex=False):
parser = grammar.earley_parser(trace=trace)
- print parser._grammar
+ print((parser._grammar))
tokens = sent.split()
trees = parser.get_parse_list(tokens)
if drawtrees:
@@ -21,8 +21,8 @@ def text_parse(grammar, sent, trace=2, drawtrees=False, latex=False):
TreeView(trees)
else:
for tree in trees:
- if latex: print tree.latex_qtree()
- else: print tree
+ if latex: print((tree.latex_qtree()))
+ else: print(tree)
def main():
import sys
@@ -83,7 +83,7 @@ def main():
sentence = line.strip()
if sentence == '': continue
if sentence[0] == '#': continue
- print "Sentence: %s" % sentence
+ print(("Sentence: %s" % sentence))
text_parse(grammar, sentence, trace, False, options.latex)
if __name__ == '__main__':
diff --git a/nltk_contrib/fst/draw_graph.py b/nltk_contrib/fst/draw_graph.py
index abbeaa8..e3c56bc 100644
--- a/nltk_contrib/fst/draw_graph.py
+++ b/nltk_contrib/fst/draw_graph.py
@@ -133,7 +133,9 @@ def _label_coords(self):
labely = (y1+y2)*0.5 - (x2-x1)*(self._curve/2 + 8/r)
return (int(labelx), int(labely))
- def _line_coords(self, (startx, starty), (endx, endy)):
+ def _line_coords(self, xxx_todo_changeme, xxx_todo_changeme1):
+ (startx, starty) = xxx_todo_changeme
+ (endx, endy) = xxx_todo_changeme1
(x1, y1) = int(startx), int(starty)
(x2, y2) = int(endx), int(endy)
radius1 = 0
@@ -253,7 +255,7 @@ def remove_edge(self, edge):
Remove an edge from the graph (but don't destroy it).
@type edge: L{GraphEdgeWidget}
"""
- print 'remove', edge
+ print(('remove', edge))
# Get the edge's start & end nodes.
start, end = self._startnode[edge], self._endnode[edge]
@@ -315,9 +317,9 @@ def destroy_node(self, node):
"""
Remove a node from the graph, and destroy the node.
"""
- print 'removing', node
+ print(('removing', node))
for widget in self.remove_node(node):
- print 'destroying', widget
+ print(('destroying', widget))
widget.destroy()
def _tags(self): return []
@@ -467,7 +469,7 @@ def _arrange_level(self, levelnum):
while len(nodes) > 0:
best = (None, None, -1) # node, position, score.
for pos in range(len(scores)):
- for (node, score) in scores[pos].items():
+ for (node, score) in list(scores[pos].items()):
if (score > best[2] and level[pos] is None and
node in nodes):
best = (node, pos, score)
@@ -526,9 +528,9 @@ def _reachable(self, node, reached=None):
"""
How many *unexpanded* nodes can be reached from the given node?
"""
- if self._nodelevel.has_key(node): return 0
+ if node in self._nodelevel: return 0
if reached is None: reached = {}
- if not reached.has_key(node):
+ if node not in reached:
reached[node] = 1
for edge in self._outedges.get(node, []):
self._reachable(self._endnode[edge], reached)
@@ -551,14 +553,14 @@ def _add_descendants_dfs(self, parent_level, levelnum):
if levelnum >= len(self._levels): self._levels.append([])
for parent_node in parent_level:
# Add the parent node
- if not self._nodelevel.has_key(parent_node):
+ if parent_node not in self._nodelevel:
self._levels[levelnum-1].append(parent_node)
self._nodelevel[parent_node] = levelnum-1
# Recurse to its children
child_nodes = [self._endnode[edge]
for edge in self._outedges.get(parent_node, [])
- if not self._nodelevel.has_key(self._endnode[edge])]
+ if self._endnode[edge] not in self._nodelevel]
if len(child_nodes) > 0:
self._add_descendants_dfs(child_nodes, levelnum+1)
@@ -569,7 +571,7 @@ def _add_descendants_bfs(self, parent_level, levelnum):
child_nodes = [self._endnode[edge]
for edge in self._outedges.get(parent_node, [])]
for node in child_nodes:
- if not self._nodelevel.has_key(node):
+ if node not in self._nodelevel:
self._levels[levelnum].append(node)
self._nodelevel[node] = levelnum
frontier_nodes.append(node)
@@ -585,7 +587,7 @@ def _add_descendants_bfs2(self, parent_level, levelnum):
child_nodes += [self._startnode[edge]
for edge in self._inedges.get(parent_node, [])]
for node in child_nodes:
- if not self._nodelevel.has_key(node):
+ if node not in self._nodelevel:
self._levels[levelnum].append(node)
self._nodelevel[node] = levelnum
frontier_nodes.append(node)
diff --git a/nltk_contrib/fst/fst.py b/nltk_contrib/fst/fst.py
index 20c2988..222fb4d 100644
--- a/nltk_contrib/fst/fst.py
+++ b/nltk_contrib/fst/fst.py
@@ -1363,7 +1363,7 @@ def step(self, *e):
if self.stepper is None: return
# Perform one step.
- try: result, val = self.stepper.next()
+ try: result, val = next(self.stepper)
except StopIteration: return
if result == 'fail':
@@ -1377,7 +1377,7 @@ def step(self, *e):
self.out_text.insert('end', ' (Finished!)')
elif result == 'backtrack':
self.out_text.insert('end', ' (Backtrack)')
- for state, widget in self.graph.state_widgets.items():
+ for state, widget in list(self.graph.state_widgets.items()):
if state == val: self.graph.mark_state(state, '#f0b0b0')
else: self.graph.unmark_state(state)
else:
@@ -1408,7 +1408,7 @@ def step(self, *e):
self.state_descr.insert('end', state_descr or '')
# Highlight the new dst state.
- for state, widget in self.graph.state_widgets.items():
+ for state, widget in list(self.graph.state_widgets.items()):
if state == fst.dst(arc):
self.graph.mark_state(state, '#00ff00')
elif state == fst.src(arc):
@@ -1416,7 +1416,7 @@ def step(self, *e):
else: self.graph.unmark_state(state)
# Highlight the new arc.
- for a, widget in self.graph.arc_widgets.items():
+ for a, widget in list(self.graph.arc_widgets.items()):
if a == arc: self.graph.mark_arc(a)
else: self.graph.unmark_arc(a)
@@ -1467,11 +1467,11 @@ def mainloop(self, *args, **kwargs):
end ->
""")
- print "john eats the bread ->"
- print ' '+ ' '.join(fst.transduce("john eats the bread".split()))
+ print("john eats the bread ->")
+ print((' '+ ' '.join(fst.transduce("john eats the bread".split()))))
rev = fst.inverted()
- print "la vache mange de l'herbe ->"
- print ' '+' '.join(rev.transduce("la vache mange de l'herbe".split()))
+ print("la vache mange de l'herbe ->")
+ print((' '+' '.join(rev.transduce("la vache mange de l'herbe".split()))))
demo = FSTDemo(fst)
demo.transduce("the cow eats the bread".split())
diff --git a/nltk_contrib/fst/fst2.py b/nltk_contrib/fst/fst2.py
index 75188e0..6107bdb 100644
--- a/nltk_contrib/fst/fst2.py
+++ b/nltk_contrib/fst/fst2.py
@@ -965,8 +965,8 @@ def dotgraph(self, mergeEdges=True, multiEdgesToNodesColoringThreshold=2.5,
uniqueArcs[(src,dst)] += [(in_str,out_str)]
else:
uniqueArcs[(src,dst)] = [(in_str,out_str)]
- ratio = float(len(uniqueArcs.keys())) / float(stateCount)
- for src,dst in uniqueArcs.keys():
+ ratio = float(len(list(uniqueArcs.keys()))) / float(stateCount)
+ for src,dst in list(uniqueArcs.keys()):
uniqueArcs[(src,dst)].sort()
sortedArcs = FST.mergeRuns(uniqueArcs[(src,dst)],minRun)
label = ""
@@ -1467,7 +1467,7 @@ def step(self, *e):
if self.stepper is None: return
# Perform one step.
- try: result, val = self.stepper.next()
+ try: result, val = next(self.stepper)
except StopIteration: return
if result == 'fail':
@@ -1481,7 +1481,7 @@ def step(self, *e):
self.out_text.insert('end', ' (Finished!)')
elif result == 'backtrack':
self.out_text.insert('end', ' (Backtrack)')
- for state, widget in self.graph.state_widgets.items():
+ for state, widget in list(self.graph.state_widgets.items()):
if state == val: self.graph.mark_state(state, '#f0b0b0')
else: self.graph.unmark_state(state)
else:
@@ -1512,7 +1512,7 @@ def step(self, *e):
self.state_descr.insert('end', state_descr or '')
# Highlight the new dst state.
- for state, widget in self.graph.state_widgets.items():
+ for state, widget in list(self.graph.state_widgets.items()):
if state == fst.dst(arc):
self.graph.mark_state(state, '#00ff00')
elif state == fst.src(arc):
@@ -1520,7 +1520,7 @@ def step(self, *e):
else: self.graph.unmark_state(state)
# Highlight the new arc.
- for a, widget in self.graph.arc_widgets.items():
+ for a, widget in list(self.graph.arc_widgets.items()):
if a == arc: self.graph.mark_arc(a)
else: self.graph.unmark_arc(a)
@@ -1571,11 +1571,11 @@ def mainloop(self, *args, **kwargs):
end ->
""")
- print "john eats the bread ->"
- print ' '+ ' '.join(fst.transduce("john eats the bread".split()))
+ print("john eats the bread ->")
+ print((' '+ ' '.join(fst.transduce("john eats the bread".split()))))
rev = fst.inverted()
- print "la vache mange de l'herbe ->"
- print ' '+' '.join(rev.transduce("la vache mange de l'herbe".split()))
+ print("la vache mange de l'herbe ->")
+ print((' '+' '.join(rev.transduce("la vache mange de l'herbe".split()))))
demo = FSTDemo(fst)
demo.transduce("the cow eats the bread".split())
diff --git a/nltk_contrib/fuf/__init__.py b/nltk_contrib/fuf/__init__.py
index 9df32ca..3bf45a4 100644
--- a/nltk_contrib/fuf/__init__.py
+++ b/nltk_contrib/fuf/__init__.py
@@ -49,12 +49,12 @@
syntax to C{nltk.featstruct.FeatStruct}.
"""
-from fufconvert import *
-from fuf import *
-from linearizer import *
-from fstypes import *
-from link import *
-from util import *
+from .fufconvert import *
+from .fuf import *
+from .linearizer import *
+from .fstypes import *
+from .link import *
+from .util import *
__all__ = [
# Unifier
diff --git a/nltk_contrib/fuf/fstypes.py b/nltk_contrib/fuf/fstypes.py
index 073f229..f41a3a6 100644
--- a/nltk_contrib/fuf/fstypes.py
+++ b/nltk_contrib/fuf/fstypes.py
@@ -2,7 +2,7 @@
C{fstypes.py} module contains the implementation of feature
value types as defined in the FUF manual (v5.2)
"""
-from sexp import *
+from .sexp import *
from nltk.featstruct import CustomFeatureValue, UnificationFailure
class FeatureTypeTable(object):
@@ -28,9 +28,9 @@ def define_type(self, name, children):
@type children: single string or list of strings
"""
- if name not in self.table.keys():
+ if name not in list(self.table.keys()):
self.table[name] = []
- if isinstance(children, basestring):
+ if isinstance(children, str):
children = [children]
for child in children:
self.table[name].append(child)
@@ -48,14 +48,14 @@ def subsume(self, name, specialization):
# quick check if the specialization is the immediate one
spec = specialization
if name == spec: return True
- if not self.table.has_key(name): return False
+ if name not in self.table: return False
if spec in self.table[name]:
return True
return any(self.subsume(item, spec) for item in self.table[name])
def __repr__(self):
output = ""
- for key, value in self.table.items():
+ for key, value in list(self.table.items()):
output += "%s <--- %s\n" % (key, value)
return output
@@ -141,16 +141,16 @@ def assign_types(table, fs):
"""
def assign_types_helper(fs, type_table, flat_type_table):
# go through the feature structure and convert the typed values
- for fkey, fval in fs.items():
+ for fkey, fval in list(fs.items()):
if isinstance(fval, nltk.FeatStruct):
assign_types_helper(fval, type_table, flat_type_table)
- elif isinstance(fval, basestring) and (fval in flat_type_table):
+ elif isinstance(fval, str) and (fval in flat_type_table):
newval = TypedFeatureValue(fval, table)
fs[fkey] = newval
# flattten the table
flat_type_table = list()
- for tkey, tvalue in table.table.items():
+ for tkey, tvalue in list(table.table.items()):
flat_type_table.append(tkey)
for tval in tvalue:
flat_type_table.append(tval)
@@ -165,9 +165,9 @@ def assign_types_helper(fs, type_table, flat_type_table):
sexp = SexpListParser().parse(typedef)
type_table.define_type(sexp[1], sexp[2])
- print type_table
- print type_table.subsume('np', 'common')
- print type_table.subsume('mood', 'imperative')
+ print(type_table)
+ print((type_table.subsume('np', 'common')))
+ print((type_table.subsume('mood', 'imperative')))
diff --git a/nltk_contrib/fuf/fuf.py b/nltk_contrib/fuf/fuf.py
index c748e17..8e04779 100644
--- a/nltk_contrib/fuf/fuf.py
+++ b/nltk_contrib/fuf/fuf.py
@@ -1,10 +1,10 @@
import os
import nltk
-from fufconvert import *
-from link import *
-from linearizer import *
-from util import output_html, flatten
+from .fufconvert import *
+from .link import *
+from .linearizer import *
+from .util import output_html, flatten
class GrammarPathResolver(object):
@@ -41,7 +41,7 @@ def filter_for_alt(grammar):
alts = list()
fs = nltk.FeatStruct()
- for gkey, gvalue in grammar.items():
+ for gkey, gvalue in list(grammar.items()):
if gkey != "alt" and not gkey.startswith("alt_"):
#if isinstance(gvalue, basestring):
fs[gkey] = gvalue
@@ -63,7 +63,7 @@ def alt_to_list(fs, altname):
@return: list
"""
- altkeys = fs[altname].keys()
+ altkeys = list(fs[altname].keys())
altkeys = sorted([int(key) for key in altkeys if key != "_index_"], cmp)
altkeys = [str(key) for key in altkeys]
@@ -107,7 +107,7 @@ def _copy_vals(table, fs, pack):
"""
if isinstance(pack, list):
for subpack in pack:
- for fkey, fvalue in fs.items():
+ for fkey, fvalue in list(fs.items()):
if (fkey in subpack) and \
GrammarPathResolver._is_subsumed_val(table, fs, fkey, subpack):
pass
@@ -120,7 +120,7 @@ def _copy_vals(table, fs, pack):
subpack[fkey] = fvalue
else:
assert isinstance(pack, nltk.FeatStruct)
- for fkey, fvalue in fs.items():
+ for fkey, fvalue in list(fs.items()):
if (fkey in pack) and \
GrammarPathResolver._is_subsumed_val(table, fs, fkey, pack):
pass
@@ -138,7 +138,7 @@ def resolve(self, fstruct):
path through the alternations.
"""
- if isinstance(fstruct, basestring):
+ if isinstance(fstruct, str):
return fstruct
fs, alts = GrammarPathResolver.filter_for_alt(fstruct)
@@ -148,7 +148,7 @@ def resolve(self, fstruct):
toplevel_pack = GrammarPathResolver.alt_to_list(fstruct, altname)
subpack = list()
for item in toplevel_pack:
- if isinstance(item, nltk.FeatStruct) and len(item.keys()) == 0:
+ if isinstance(item, nltk.FeatStruct) and len(list(item.keys())) == 0:
# empty feature - result of having opts
pass
elif isinstance(item, nltk.FeatStruct):
@@ -162,7 +162,7 @@ def resolve(self, fstruct):
return result
else:
total_packs = list()
- for fkey, fvalue in fstruct.items():
+ for fkey, fvalue in list(fstruct.items()):
if isinstance(fvalue, nltk.FeatStruct):
subpack = list()
fs, alts = GrammarPathResolver.filter_for_alt(fvalue)
@@ -170,7 +170,7 @@ def resolve(self, fstruct):
for item in self.resolve(fvalue):
newfs = nltk.FeatStruct()
newfs[fkey] = item
- for key, value in fvalue.items():
+ for key, value in list(fvalue.items()):
if not ('alt' in value):
newfs[key] = value
subpack.append(newfs)
@@ -319,7 +319,7 @@ def _isconstituent(fstruct, subfs_key, subfs_val):
return True
if ('pattern' in fstruct):
- for fkey in subfs_val.keys():
+ for fkey in list(subfs_val.keys()):
if fkey in fstruct['pattern']:
return True
return False
@@ -332,7 +332,7 @@ def _unify(fs, grs, resolver=None, trace=False):
unifs = fs.unify(gr)
if unifs:
resolver.resolve(unifs)
- for fname, fval in unifs.items():
+ for fname, fval in list(unifs.items()):
if Unifier._isconstituent(unifs, fname, fval):
newval = Unifier._unify(fval, grs, resolver)
if newval:
@@ -366,24 +366,24 @@ def unify(self):
input_files = ['ir2.fuf']
for ifile, gfile in zip(input_files, grammar_files):
if ifile == 'ir3.fuf' and gfile == 'gr3.fuf':
- print 'gr3.fuf doesn\'t work because of the (* focus) s-expression in the feature structure'
+ print('gr3.fuf doesn\'t work because of the (* focus) s-expression in the feature structure')
continue
# input files contain more than one definition of input
output = None
result = None
- print "\nINPUT FILE: %s, GRAMMAR FILE: %s" % (ifile, gfile)
+ print(("\nINPUT FILE: %s, GRAMMAR FILE: %s" % (ifile, gfile)))
gfs = fuf_to_featstruct(open('tests/%s' % gfile).read())
for i, iline in enumerate(open('tests/%s' % ifile).readlines()):
try:
ifs = fuf_to_featstruct(iline)
- except Exception, e:
- print 'Failed to convert %s to nltk.FeatStruct' % iline
+ except Exception as e:
+ print(('Failed to convert %s to nltk.FeatStruct' % iline))
exit()
fuf = Unifier(ifs, gfs)
result = fuf.unify()
if result:
output = " ".join(linearize(result))
- print output_html([ifs, gfs, result, output])
- print i, "result:", output
+ print((output_html([ifs, gfs, result, output])))
+ print((i, "result:", output))
else:
- print i, 'result: failed'
+ print((i, 'result: failed'))
diff --git a/nltk_contrib/fuf/fufconvert.py b/nltk_contrib/fuf/fufconvert.py
index 5978634..6c1e56c 100644
--- a/nltk_contrib/fuf/fufconvert.py
+++ b/nltk_contrib/fuf/fufconvert.py
@@ -1,10 +1,10 @@
import re
import os
import nltk
-from sexp import *
-from link import *
-from specialfs import *
-from fstypes import *
+from .sexp import *
+from .link import *
+from .specialfs import *
+from .fstypes import *
def fuf_to_featstruct(fuf):
"""
@@ -23,7 +23,7 @@ def _convert_fuf_featstruct(sexp):
assert sexp.lparen == '('
fs = nltk.FeatStruct()
for child in sexp:
- if isinstance(child, basestring):
+ if isinstance(child, str):
feat, val = _convert_fuf_feature(sexp)
fs[feat] = val
break
@@ -55,11 +55,11 @@ def _convert_fuf_feature(sexp):
del sexp[1]
result = _list_convert(sexp[1])
sexp[1] = result
- print sexp[1]
+ print((sexp[1]))
feat, val = sexp
else:
assert len(sexp) == 2, sexp[1]
- assert isinstance(sexp[0], basestring), sexp
+ assert isinstance(sexp[0], str), sexp
feat, val = sexp
# Special handling for pattern feature
@@ -72,7 +72,7 @@ def _convert_fuf_feature(sexp):
assert isinstance(val, SexpList) and val.lparen == '('
choices = list()
for c in val:
- if isinstance(c, basestring):
+ if isinstance(c, str):
choices.append(c)
else:
choices.append(_convert_fuf_featstruct(c))
@@ -124,7 +124,7 @@ def fuf_file_to_featstruct(fuf_filename):
# process the type defs and the grammar
for sexp in lsexp:
- if isinstance(sexp[0], basestring) and sexp[0] == 'define-feature-type':
+ if isinstance(sexp[0], str) and sexp[0] == 'define-feature-type':
assert len(sexp) == 3
name, children = sexp[1], sexp[2]
type_table.define_type(name, children)
@@ -166,7 +166,7 @@ def _list_convert(lst):
#test the alt feature
- print 'START LIST TEST'
+ print('START LIST TEST')
#listlines = open('tests/list.fuf').readlines()
#for line in listlines:
#print 'INPUTS:', line
@@ -198,19 +198,19 @@ def _list_convert(lst):
# test the example grammars
grammar_files = [gfile for gfile in os.listdir('tests/') if gfile.startswith('gr')]
- print grammar_files
+ print(grammar_files)
for gfile in grammar_files:
- print "FILE: %s" % gfile
+ print(("FILE: %s" % gfile))
text = open('tests/%s' % gfile).read()
- print text
- print fuf_to_featstruct(text)
- print
+ print(text)
+ print((fuf_to_featstruct(text)))
+ print()
1/0
type_table, grammar = fuf_file_to_featstruct('tests/typed_gr4.fuf')
- print type_table
- print grammar
+ print(type_table)
+ print(grammar)
gr5 = fuf_to_featstruct(open('tests/gr5.fuf').read())
- print gr5
+ print(gr5)
diff --git a/nltk_contrib/fuf/linearizer.py b/nltk_contrib/fuf/linearizer.py
index 6e125e6..a8724d4 100644
--- a/nltk_contrib/fuf/linearizer.py
+++ b/nltk_contrib/fuf/linearizer.py
@@ -4,8 +4,8 @@
"""
import nltk
-from link import *
-from util import output_html
+from .link import *
+from .util import output_html
def linearize(fstruct):
"""
@@ -25,9 +25,9 @@ def lin_helper(fs, pattern, output):
else:
if isinstance(fs[item], ReentranceLink):
LinkResolver().resolve(fs)
- if fs[item].has_key('pattern'):
+ if 'pattern' in fs[item]:
lin_helper(fs[item], fs[item]['pattern'], output)
- elif fs[item].has_key('lex'):
+ elif 'lex' in fs[item]:
output.append(fs[item]['lex'])
assert isinstance(fstruct, nltk.FeatStruct)
@@ -37,15 +37,15 @@ def lin_helper(fs, pattern, output):
return output
if __name__ == '__main__':
- from fufconvert import *
- from fuf import *
+ from .fufconvert import *
+ from .fuf import *
gfs = fuf_to_featstruct(open('tests/gr0.fuf').read())
itext = open('tests/ir0.fuf').readlines()[3]
ifs = fuf_to_featstruct(itext)
result = unify_with_grammar(ifs, gfs)
- print result
- print linearize(result)
+ print(result)
+ print((linearize(result)))
diff --git a/nltk_contrib/fuf/link.py b/nltk_contrib/fuf/link.py
index 1df86d8..c62a5b0 100644
--- a/nltk_contrib/fuf/link.py
+++ b/nltk_contrib/fuf/link.py
@@ -80,7 +80,7 @@ def resolve(self, fstruct):
def resolve_helper(fs, ancestors):
# start looking for links
- for feat, val in fs.items():
+ for feat, val in list(fs.items()):
# add to path and recurse
if isinstance(val, nltk.FeatStruct):
ancestors.append(val)
@@ -144,8 +144,8 @@ def __repr__(self):
if __name__ == '__main__':
# testing the link resolution using gr0.fuf grammar and ir0.fuf inputs
import os
- from fufconvert import *
- from fuf import *
+ from .fufconvert import *
+ from .fuf import *
gfs = fuf_to_featstruct(open('tests/gr0.fuf').read())
itext = open('tests/ir0.fuf').readlines()[2]
@@ -153,4 +153,4 @@ def __repr__(self):
ifs = fuf_to_featstruct(itext)
result = unify_with_grammar(ifs, gfs)
- print output_html([ifs, gfs, result])
+ print((output_html([ifs, gfs, result])))
diff --git a/nltk_contrib/fuf/morphology.py b/nltk_contrib/fuf/morphology.py
index f3c363b..6d966fa 100644
--- a/nltk_contrib/fuf/morphology.py
+++ b/nltk_contrib/fuf/morphology.py
@@ -4,7 +4,7 @@
- morph_numeric: integer number to text
"""
-import lexicon
+from . import lexicon
def _is_vowel(char):
return char in ['o', 'e', 'i', 'a', 'y']
@@ -24,7 +24,7 @@ def pluralize(word):
"""
assert word
- assert isinstance(word, basestring)
+ assert isinstance(word, str)
assert len(word) > 0
second_last = word[-2]
@@ -90,7 +90,7 @@ def form_past(word):
last = word[-1]
assert word
- assert isinstance(word, basestring)
+ assert isinstance(word, str)
if last == 'e':
return word + 'd'
@@ -132,7 +132,7 @@ def form_present_verb(word, number, person):
Forms the suffix for the present tense of the verb WORD
"""
assert word
- assert isinstance(word, basestring)
+ assert isinstance(word, str)
if _is_first_person(person) or _is_second_person(person):
return word
elif _is_third_person(person):
@@ -253,7 +253,7 @@ def morph_pronoun(lex, pronoun_type, case, gender, number, distance, animate,
"""
Returns the correct pronoun given the features
"""
- if lex and isinstance(lex, basestring) and not (lex in ['none', 'nil']):
+ if lex and isinstance(lex, str) and not (lex in ['none', 'nil']):
return lex
if pronoun_type == 'personal':
# start with the 'he' then augmen by person, then, by number,
diff --git a/nltk_contrib/fuf/sexp.py b/nltk_contrib/fuf/sexp.py
index ecac870..a7a8290 100644
--- a/nltk_contrib/fuf/sexp.py
+++ b/nltk_contrib/fuf/sexp.py
@@ -6,7 +6,7 @@
import os
-from statemachine import PushDownMachine
+from .statemachine import PushDownMachine
class SexpList(list):
"""
@@ -39,7 +39,7 @@ def pp(self):
for i, val in enumerate(self):
if isinstance(val, SexpList):
s += val.pp()
- elif isinstance(val, basestring):
+ elif isinstance(val, str):
s += val
else:
s += repr(val)
@@ -71,8 +71,8 @@ def __init__(self):
# set up the parenthesis
self.parens = {'(':')', '[':']', '{':'}'}
- self.lparens = self.parens.keys()
- self.rparens = self.parens.values()
+ self.lparens = list(self.parens.keys())
+ self.rparens = list(self.parens.values())
self._build_machine()
self.machine.stack = [[]]
@@ -90,8 +90,8 @@ def _tokenizer(self, to_tokenize):
"""
Return a tokenizer
"""
- lparen_res = ''.join([re.escape(lparen) for lparen in self.parens.keys()])
- rparen_res = ''.join([re.escape(rparen) for rparen in self.parens.values()])
+ lparen_res = ''.join([re.escape(lparen) for lparen in list(self.parens.keys())])
+ rparen_res = ''.join([re.escape(rparen) for rparen in list(self.parens.values())])
tok_re = re.compile('[%s]|[%s]|[^%s%s\s]+' %
(lparen_res, rparen_res, lparen_res, rparen_res))
@@ -239,16 +239,16 @@ def parse(self):
lines = open('tests/sexp.txt').readlines()
for test in lines:
try:
- print '%s' % test
+ print(('%s' % test))
l = SexpListParser().parse(test)
- print '==>', SexpListParser().parse(test)
- print
- except Exception, e:
- print 'Exception:', e
+ print(('==>', SexpListParser().parse(test)))
+ print()
+ except Exception as e:
+ print(('Exception:', e))
# testing the SexpFileParser
sfp = SexpFileParser('tests/typed_gr4.fuf')
- print sfp.parse()
+ print((sfp.parse()))
diff --git a/nltk_contrib/fuf/specialfs.py b/nltk_contrib/fuf/specialfs.py
index 0ff8eee..5c56ee9 100644
--- a/nltk_contrib/fuf/specialfs.py
+++ b/nltk_contrib/fuf/specialfs.py
@@ -2,7 +2,7 @@
Handling for special feature names during parsing
"""
-from sexp import *
+from .sexp import *
def parse_alt(sexpl):
"""
@@ -17,7 +17,7 @@ def parse_alt(sexpl):
feat, name, index, val = ('', '', '', '')
# named alt
- if isinstance(sexpl[1], basestring):
+ if isinstance(sexpl[1], str):
# alt with index
if len(sexpl) == 4:
feat, name, index, val = sexpl
diff --git a/nltk_contrib/fuf/util.py b/nltk_contrib/fuf/util.py
index dbad459..87598fb 100644
--- a/nltk_contrib/fuf/util.py
+++ b/nltk_contrib/fuf/util.py
@@ -37,7 +37,7 @@ def draw(fstruct, filename=None):
"""
def draw_helper(output, fstruct, pcount, ccount):
output += 'fs%d [label=" " style="filled" fillcolor="white"];\n' % (pcount)
- for fs, val in fstruct.items():
+ for fs, val in list(fstruct.items()):
if isinstance(val, nltk.FeatStruct):
output += 'fs%d -> fs%d [label="%s"];\n' % (pcount, ccount, fs)
output, ccount = draw_helper(output, val, ccount,
diff --git a/nltk_contrib/hadoop/EM/EM_mapper.py b/nltk_contrib/hadoop/EM/EM_mapper.py
index a1d7e6d..34e0376 100644
--- a/nltk_contrib/hadoop/EM/EM_mapper.py
+++ b/nltk_contrib/hadoop/EM/EM_mapper.py
@@ -63,8 +63,8 @@ def read_params(self):
# get initial state probability p (state)
Pi = DictionaryProbDist(d)
- A_keys = A.keys()
- B_keys = B.keys()
+ A_keys = list(A.keys())
+ B_keys = list(B.keys())
states = set()
symbols = set()
for e in A_keys:
diff --git a/nltk_contrib/hadoop/EM/runStreaming.py b/nltk_contrib/hadoop/EM/runStreaming.py
index 630f0d5..17c6ffd 100644
--- a/nltk_contrib/hadoop/EM/runStreaming.py
+++ b/nltk_contrib/hadoop/EM/runStreaming.py
@@ -14,8 +14,8 @@
# while not converged or not reach maximum iteration number
while (abs(newlog - oldlog) > diff and i <= iter):
- print "oldlog", oldlog
- print "newlog", newlog
+ print(("oldlog", oldlog))
+ print(("newlog", newlog))
i += 1
oldlog = newlog
@@ -25,7 +25,7 @@
userdir = '/home/mxf/nltknew/nltk_contrib/hadoop/EM/'
p = Popen([userdir + 'runStreaming.sh' ], shell=True, stdout=sys.stdout)
p.wait()
- print "returncode", p.returncode
+ print(("returncode", p.returncode))
# open the parameter output from finished iteration
# and get the new loglikelihood
@@ -36,5 +36,5 @@
newlog = float(li[1])
f.close()
-print "oldlog", oldlog
-print "newlog", newlog
+print(("oldlog", oldlog))
+print(("newlog", newlog))
diff --git a/nltk_contrib/hadoop/hadooplib/mapper.py b/nltk_contrib/hadoop/hadooplib/mapper.py
index 1e8ec8d..0835a1b 100644
--- a/nltk_contrib/hadoop/hadooplib/mapper.py
+++ b/nltk_contrib/hadoop/hadooplib/mapper.py
@@ -1,5 +1,5 @@
-from inputformat import TextLineInput
-from outputcollector import LineOutput
+from .inputformat import TextLineInput
+from .outputcollector import LineOutput
class MapperBase:
diff --git a/nltk_contrib/hadoop/hadooplib/outputcollector.py b/nltk_contrib/hadoop/hadooplib/outputcollector.py
index 3c1a16a..8133d71 100644
--- a/nltk_contrib/hadoop/hadooplib/outputcollector.py
+++ b/nltk_contrib/hadoop/hadooplib/outputcollector.py
@@ -20,4 +20,4 @@ def collect(key, value, separator = '\t'):
keystr = str(key)
valuestr = str(value)
- print '%s%s%s' % (keystr, separator, valuestr)
+ print(('%s%s%s' % (keystr, separator, valuestr)))
diff --git a/nltk_contrib/hadoop/hadooplib/reducer.py b/nltk_contrib/hadoop/hadooplib/reducer.py
index aeb39b6..e23829a 100644
--- a/nltk_contrib/hadoop/hadooplib/reducer.py
+++ b/nltk_contrib/hadoop/hadooplib/reducer.py
@@ -1,8 +1,8 @@
from itertools import groupby
from operator import itemgetter
-from inputformat import KeyValueInput
-from outputcollector import LineOutput
+from .inputformat import KeyValueInput
+from .outputcollector import LineOutput
class ReducerBase:
"""
@@ -44,7 +44,7 @@ def group_data(self, data):
"""
for key, group in groupby(data, itemgetter(0)):
- values = map(itemgetter(1), group)
+ values = list(map(itemgetter(1), group))
yield key, values
def reduce(self, key, values):
diff --git a/nltk_contrib/hadoop/hadooplib/util.py b/nltk_contrib/hadoop/hadooplib/util.py
index 9bb6e5c..082d131 100644
--- a/nltk_contrib/hadoop/hadooplib/util.py
+++ b/nltk_contrib/hadoop/hadooplib/util.py
@@ -31,7 +31,7 @@ def tuple2str(t, separator = ' '):
return s
else:
- raise ValueError, "The first parameter must be a tuple"
+ raise ValueError("The first parameter must be a tuple")
def str2tuple(s, separator = ' '):
"""
@@ -55,7 +55,7 @@ def str2tuple(s, separator = ' '):
t = s.strip().split(separator)
return tuple(t)
else:
- raise ValueError, "the first parameter must be a string"
+ raise ValueError("the first parameter must be a string")
if __name__ == "__main__":
diff --git a/nltk_contrib/hadoop/tf_idf/sort.py b/nltk_contrib/hadoop/tf_idf/sort.py
index bab0ebc..1f948c2 100644
--- a/nltk_contrib/hadoop/tf_idf/sort.py
+++ b/nltk_contrib/hadoop/tf_idf/sort.py
@@ -11,4 +11,4 @@
li.sort()
for e in li:
- print e,
+ print(e, end=' ')
diff --git a/nltk_contrib/lambek/lambek.py b/nltk_contrib/lambek/lambek.py
index e16e9a8..8eb7b36 100755
--- a/nltk_contrib/lambek/lambek.py
+++ b/nltk_contrib/lambek/lambek.py
@@ -18,9 +18,9 @@
_VAR_NAMES = 1
_SHOW_VARMAP = not _VAR_NAMES
-from term import *
-from typedterm import *
-from lexicon import *
+from .term import *
+from .typedterm import *
+from .lexicon import *
import sys, re
class Sequent:
@@ -30,8 +30,8 @@ class Sequent:
def __init__(self, left, right):
# Check types, because we're paranoid.
- if type(left) not in [types.ListType, types.TupleType] or \
- type(right) not in [types.ListType, types.TupleType]:
+ if type(left) not in [list, tuple] or \
+ type(right) not in [list, tuple]:
raise TypeError('Expected lists of TypedTerms')
for elt in left+right:
if not isinstance(elt, TypedTerm):
@@ -41,8 +41,8 @@ def __init__(self, left, right):
self.right = right
def __repr__(self):
- left_str = `self.left`[1:-1]
- right_str = `self.right`[1:-1]
+ left_str = repr(self.left)[1:-1]
+ right_str = repr(self.right)[1:-1]
return left_str + ' => ' + right_str
def to_latex(self, pp_varmap=None):
@@ -86,8 +86,8 @@ def __init__(self, rule, assumptions, conclusion, varmap):
self.varmap = varmap
def __repr__(self):
- return self.rule+' '+`self.assumptions`+' -> '\
- +`self.conclusion`
+ return self.rule+' '+repr(self.assumptions)+' -> '\
+ +repr(self.conclusion)
def simplify(self, varmap=None):
if varmap == None:
@@ -157,7 +157,7 @@ def pp(self, left=0, toplevel=1, pp_varmap=None):
if _VAR_NAMES:
concl = self.conclusion.pp(pp_varmap)
else:
- concl = `self.conclusion`
+ concl = repr(self.conclusion)
# Draw assumptions
for assumption in self.assumptions:
@@ -175,7 +175,7 @@ def pp(self, left=0, toplevel=1, pp_varmap=None):
if toplevel:
if _SHOW_VARMAP:
- return str+'\nVarmap: '+ `self.varmap`+'\n'
+ return str+'\nVarmap: '+ repr(self.varmap)+'\n'
else:
return str
else:
@@ -225,7 +225,7 @@ def prove(sequent, short_circuit=0):
def _prove(sequent, varmap, short_circuit, depth):
if _VERBOSE:
- print (' '*depth)+'Trying to prove', sequent
+ print((' '*depth)+'Trying to prove', sequent)
proofs = []
@@ -245,7 +245,7 @@ def _prove(sequent, varmap, short_circuit, depth):
proofs = proofs + dot_r(sequent, varmap, short_circuit, depth+1)
if _VERBOSE:
- print ' '*depth+'Found '+`len(proofs)`+' proof(s)'
+ print(' '*depth+'Found '+repr(len(proofs))+' proof(s)')
return proofs
@@ -506,14 +506,14 @@ def find_proof(left, right, short_circuit=1):
sq = Sequent(left, right)
proofs = prove(sq, short_circuit)
if proofs:
- print '#'*60
- print "## Proof(s) for", sq.pp()
+ print('#'*60)
+ print("## Proof(s) for", sq.pp())
for proof in proofs:
- print
- print proof.to_latex()
+ print()
+ print(proof.to_latex())
else:
- print '#'*60
- print "## Can't prove", sq.pp()
+ print('#'*60)
+ print("## Can't prove", sq.pp())
def test_lambek():
lex = Lexicon()
@@ -573,70 +573,70 @@ def mainloop(input, out, lex, latexmode, shortcircuit):
if str.lower().endswith('off'): latexmode = 0
elif str.lower().endswith('on'): latexmode = 1
else: latexmode = not latexmode
- if latexmode: print >>out, '% latexmode on'
- else: print >>out, 'latexmode off'
+ if latexmode: print('% latexmode on', file=out)
+ else: print('latexmode off', file=out)
elif str.lower().startswith('short'):
if str.lower().endswith('off'): shortcircuit = 0
elif str.lower().endswith('on'): shortcircuit = 1
else: shortcircuit = not shortcircuit
- if shortcircuit: print >>out, '%shortcircuit on'
- else: print >>out, '% shortcircuit off'
+ if shortcircuit: print('%shortcircuit on', file=out)
+ else: print('% shortcircuit off', file=out)
elif str.lower().startswith('lex'):
words = lex.words()
- print >>out, '% Lexicon: '
+ print('% Lexicon: ', file=out)
for word in words:
- print >>out, '% ' + word + ':', \
- ' '*(14-len(word)) + lex[word].pp()
+ print('% ' + word + ':', \
+ ' '*(14-len(word)) + lex[word].pp(), file=out)
elif str.lower().startswith('q'): return
elif str.lower().startswith('x'): return
else:
- print >>out, HELP
+ print(HELP, file=out)
else:
try:
(left, right) = str.split('=>')
seq = Sequent(lex.parse(left), lex.parse(right))
proofs = prove(seq, shortcircuit)
- print >>out
- print >>out, '%'*60
+ print(file=out)
+ print('%'*60, file=out)
if proofs:
- print >>out, "%% Proof(s) for", seq.pp()
+ print("%% Proof(s) for", seq.pp(), file=out)
for proof in proofs:
- print >>out
- if latexmode: print >>out, proof.to_latex()
- else: print >>out, proof.pp()
+ print(file=out)
+ if latexmode: print(proof.to_latex(), file=out)
+ else: print(proof.pp(), file=out)
else:
- print >>out, "%% Can't prove", seq.pp()
- except KeyError, e:
- print 'Mal-formatted sequent'
- print 'Key error (unknown lexicon entry?)'
- print e
- except ValueError, e:
- print 'Mal-formatted sequent'
- print e
+ print("%% Can't prove", seq.pp(), file=out)
+ except KeyError as e:
+ print('Mal-formatted sequent')
+ print('Key error (unknown lexicon entry?)')
+ print(e)
+ except ValueError as e:
+ print('Mal-formatted sequent')
+ print(e)
# Usage: argv[0] lexiconfile
def main(argv):
if (len(argv) != 2) and (len(argv) != 4):
- print 'Usage:', argv[0], ''
- print 'Usage:', argv[0], '