diff --git a/nltk_contrib/align/__init__.py b/nltk_contrib/align/__init__.py index 8929336..2eed021 100644 --- a/nltk_contrib/align/__init__.py +++ b/nltk_contrib/align/__init__.py @@ -9,8 +9,8 @@ Classes and interfaces for aligning text. """ -from api import * -from gale_church import * +from .api import * +from .gale_church import * __all__ = [] diff --git a/nltk_contrib/align/align.py b/nltk_contrib/align/align.py index c0e85dc..0ba4b09 100644 --- a/nltk_contrib/align/align.py +++ b/nltk_contrib/align/align.py @@ -7,16 +7,16 @@ # For license information, see LICENSE.TXT import sys -from itertools import izip + from nltk.metrics import scores ## --NLTK-- ## Import the nltk.aligner module, which defines the aligner interface -from api import * +from .api import * -import distance_measures -import align_util +from . import distance_measures +from . import align_util # Based on Gale & Church 1993, "A Program for Aligning Sentences in Bilingual Corpora" # This is a Python version of the C implementation by Mike Riley presented in the appendix @@ -82,10 +82,10 @@ def get_delimited_regions(self, base_type, input_file1, input_file2, hard_delimi hard_regions2 = align_util.get_paragraphs_sentences(lines2, hard_delimiter, soft_delimiter) if (len(hard_regions1) != len(hard_regions2)): - print "align_regions: input files do not contain the same number of hard regions" + '\n' - print "%s" % hard_delimiter + '\n' - print "%s has %d and %s has %d" % (input_file1, len(hard_regions1), \ - input_file2, len(hard_regions2) + '\n') + print(("align_regions: input files do not contain the same number of hard regions" + '\n')) + print(("%s" % hard_delimiter + '\n')) + print(("%s has %d and %s has %d" % (input_file1, len(hard_regions1), \ + input_file2, len(hard_regions2) + '\n'))) return ([],[]) return (hard_regions1, hard_regions2) @@ -154,7 +154,7 @@ def _seq_align(self, x, y, nx, ny): path_x = [[0] * second_len for c in range(first_len)] path_y = [[0] * second_len for c in range(first_len)] - d1 = d2 = d3 = d4 = d5 = d6 = sys.maxint + d1 = d2 = d3 = d4 = d5 = d6 = sys.maxsize for j in range(0, ny + 1): for i in range(0, nx + 1): @@ -163,46 +163,46 @@ def _seq_align(self, x, y, nx, ny): d1 = distances[i-1][j-1] + \ self.dist_funct(x[i-1], y[j-1], 0, 0) else: - d1 = sys.maxint + d1 = sys.maxsize if (i > 0): #/* deletion */ d2 = distances[i-1][j] + \ self.dist_funct(x[i-1], 0, 0, 0) else: - d2 = sys.maxint + d2 = sys.maxsize if (j > 0): #/* insertion */ d3 = distances[i][j-1] + \ self.dist_funct(0, y[j-1], 0, 0) else: - d3 = sys.maxint + d3 = sys.maxsize if (i > 1 and j > 0): #/* contraction */ d4 = distances[i-2][j-1] + \ self.dist_funct(x[i-2], y[j-1], x[i-1], 0) else: - d4 = sys.maxint + d4 = sys.maxsize if (i > 0 and j > 1): #/* expansion */ d5 = distances[i-1][j-2] + \ self.dist_funct(x[i-1], y[j-2], 0, y[j-1]) else: - d5 = sys.maxint + d5 = sys.maxsize if (i > 1 and j > 1): #/* melding */ d6 = distances[i-2][j-2] + \ self.dist_funct(x[i-2], y[j-2], x[i-1], y[j-1]) else: - d6 = sys.maxint + d6 = sys.maxsize dmin = min(d1, d2, d3, d4, d5, d6) - if (dmin == sys.maxint): + if (dmin == sys.maxsize): distances[i][j] = 0 elif (dmin == d1): distances[i][j] = d1 @@ -341,7 +341,7 @@ def _seq_align_extended(self, x, y, nx, ny): path_x = [[0] * second_len for c in range(first_len)] path_y = [[0] * second_len for c in range(first_len)] - d1 = d2 = d3 = d4 = d5 = d6 = d7 = d8 = d9 = d10 = d11 = sys.maxint + d1 = d2 = d3 = d4 = d5 = d6 = d7 = d8 = d9 = d10 = d11 = sys.maxsize for j in range(0, ny + 1): for i in range(0, nx + 1): @@ -350,81 +350,81 @@ def _seq_align_extended(self, x, y, nx, ny): d1 = distances[i-1][j-1] + \ self.dist_funct(x[i-1], y[j-1], 0, 0, 0, 0) else: - d1 = sys.maxint + d1 = sys.maxsize if (i > 0): #/* deletion */ /* 1-0 */ d2 = distances[i-1][j] + \ self.dist_funct(x[i-1], 0, 0, 0, 0, 0) else: - d2 = sys.maxint + d2 = sys.maxsize if (j > 0): #/* insertion */ /* 0-1 */ d3 = distances[i][j-1] + \ self.dist_funct(0, y[j-1], 0, 0, 0, 0) else: - d3 = sys.maxint + d3 = sys.maxsize if (i > 1 and j > 0): #/* contraction */ /* 2-1 */ d4 = distances[i-2][j-1] + \ self.dist_funct(x[i-2], y[j-1], x[i-1], 0, 0, 0) else: - d4 = sys.maxint + d4 = sys.maxsize if (i > 0 and j > 1): #/* expansion */ /* 1-2 */ d5 = distances[i-1][j-2] + \ self.dist_funct(x[i-1], y[j-2], 0, y[j-1], 0, 0) else: - d5 = sys.maxint + d5 = sys.maxsize if (i > 1 and j > 1): #/* melding */ /* 2-2 */ d6 = distances[i-2][j-2] + \ self.dist_funct(x[i-2], y[j-2], x[i-1], y[j-1], 0, 0) else: - d6 = sys.maxint + d6 = sys.maxsize if (i > 2 and j > 0): #/* contraction */ /* 3-1 */ d7 = distances[i-3][j-1] + \ self.dist_funct(x[i-3], y[j-1], x[i-2], 0, x[i-1], 0) else: - d7 = sys.maxint + d7 = sys.maxsize if (i > 2 and j > 1): #/* contraction */ /* 3-2 */ d8 = distances[i-3][j-2] + \ self.dist_funct(x[i-3], y[j-1], x[i-2], y[j-2], x[i-1], 0) else: - d8 = sys.maxint + d8 = sys.maxsize if (i > 0 and j > 2): #/* expansion */ /* 1-3 */ d9 = distances[i-1][j-3] + \ self.dist_funct(x[i-1], y[j-3], 0, y[j-2], 0, y[j-1]) else: - d9 = sys.maxint + d9 = sys.maxsize if (i > 1 and j > 2): #/* expansion */ /* 2-3 */ d10 = distances[i-2][j-3] + \ self.dist_funct(x[i-3], y[j-3], x[i-2], y[j-2], 0, y[j-1]) else: - d10 = sys.maxint + d10 = sys.maxsize if (i > 2 and j > 2): #/* melding */ /* 3-3 */ d11 = distances[i-3][j-3] + \ self.dist_funct(x[i-3], y[j-3], x[i-2], y[j-2], x[i-1], y[j-1]) else: - d11 = sys.maxint + d11 = sys.maxsize dmin = min(d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11) - if (dmin == sys.maxint): + if (dmin == sys.maxsize): distances[i][j] = 0 elif (dmin == d1): distances[i][j] = d1 @@ -619,13 +619,13 @@ def demo_eval(alignments, gold_file): """ alignment_mappings = align_util2.get_alignment_links(alignments) - print "Alignment mappings: %s" % alignment_mappings + print(("Alignment mappings: %s" % alignment_mappings)) #test_values = align_util.get_test_values(alignments) reference_values = align_util2.get_reference_values(gold_file) - print "Reference values: %s" % reference_values + print(("Reference values: %s" % reference_values)) #accuracy = scores.accuracy(reference_values, test_values) @@ -653,7 +653,7 @@ def demo(): gc_alignment = gc.batch_align(regions1, regions2) - print "Alignment0: %s" % gc_alignment + print(("Alignment0: %s" % gc_alignment)) demo_eval(gc_alignment, gold_file) @@ -675,7 +675,7 @@ def demo(): gc_alignment = gc.batch_align(regions1, regions2) - print "Alignment1: %s" % gc_alignment + print(("Alignment1: %s" % gc_alignment)) demo_eval(gc_alignment, gold_file) @@ -694,7 +694,7 @@ def demo(): standard_alignment2 = std.batch_align(s2, t2) - print "Alignment2: %s" % standard_alignment2 + print(("Alignment2: %s" % standard_alignment2)) # demo 4 @@ -703,14 +703,14 @@ def demo(): standard_alignment3 = std.align(s3, t3) - print "Alignment3: %s" % standard_alignment3 + print(("Alignment3: %s" % standard_alignment3)) # demo 5 top_down_alignments = std.recursive_align(s3, t3) for alignment in top_down_alignments: - print "Top down align: %s" % alignment + print(("Top down align: %s" % alignment)) if __name__=='__main__': demo() diff --git a/nltk_contrib/align/align_regions.py b/nltk_contrib/align/align_regions.py index bfaf51b..ba2876b 100755 --- a/nltk_contrib/align/align_regions.py +++ b/nltk_contrib/align/align_regions.py @@ -7,8 +7,8 @@ from nltk.metrics import scores -import distance_measures -import alignment_util +from . import distance_measures +from . import alignment_util ##////////////////////////////////////////////////////// ## Alignment @@ -81,7 +81,7 @@ def set_alignment_mappings(self): self.soft_regions_index) self.alignment_mappings.append(align_triple) else: - print "not supported alignment type" + print("not supported alignment type") ##////////////////////////////////////////////////////// ## Aligner @@ -132,10 +132,10 @@ def align_regions(self, dist_funct, debug=False, verbose=False): (hard_regions2, number_of_hard_regions2) = tmp.find_sub_regions(self.hard_delimiter) if (number_of_hard_regions1 != number_of_hard_regions2): - print "align_regions: input files do not contain the same number of hard regions" + '\n' - print "%s" % hard_delimiter + '\n' - print "%s has %d and %s has %d" % (self.input_file1, number_of_hard_regions1, \ - self.input_file2, number_of_hard_regions2) + '\n' + print(("align_regions: input files do not contain the same number of hard regions" + '\n')) + print(("%s" % hard_delimiter + '\n')) + print(("%s has %d and %s has %d" % (self.input_file1, number_of_hard_regions1, \ + self.input_file2, number_of_hard_regions2) + '\n')) return @@ -225,7 +225,7 @@ def seq_align(self, x, y, nx, ny, dist_funct, hard_regions_index): path_x = [[0] * second_len for c in range(first_len)] path_y = [[0] * second_len for c in range(first_len)] - d1 = d2 = d3 = d4 = d5 = d6 = sys.maxint + d1 = d2 = d3 = d4 = d5 = d6 = sys.maxsize for j in range(0, ny + 1): for i in range(0, nx + 1): @@ -234,46 +234,46 @@ def seq_align(self, x, y, nx, ny, dist_funct, hard_regions_index): d1 = distances[i-1][j-1] + \ dist_funct(x[i-1], y[j-1], 0, 0) else: - d1 = sys.maxint + d1 = sys.maxsize if (i > 0): #/* deletion */ d2 = distances[i-1][j] + \ dist_funct(x[i-1], 0, 0, 0) else: - d2 = sys.maxint + d2 = sys.maxsize if (j > 0): #/* insertion */ d3 = distances[i][j-1] + \ dist_funct(0, y[j-1], 0, 0) else: - d3 = sys.maxint + d3 = sys.maxsize if (i > 1 and j > 0): #/* contraction */ d4 = distances[i-2][j-1] + \ dist_funct(x[i-2], y[j-1], x[i-1], 0) else: - d4 = sys.maxint + d4 = sys.maxsize if (i > 0 and j > 1): #/* expansion */ d5 = distances[i-1][j-2] + \ dist_funct(x[i-1], y[j-2], 0, y[j-1]) else: - d5 = sys.maxint + d5 = sys.maxsize if (i > 1 and j > 1): #/* melding */ d6 = distances[i-2][j-2] + \ dist_funct(x[i-2], y[j-2], x[i-1], y[j-1]) else: - d6 = sys.maxint + d6 = sys.maxsize dmin = min(d1, d2, d3, d4, d5, d6) - if (dmin == sys.maxint): + if (dmin == sys.maxsize): distances[i][j] = 0 elif (dmin == d1): distances[i][j] = d1 @@ -502,7 +502,7 @@ def demo_eval(alignments, gold_file): accuracy = scores.accuracy(reference_values, test_values) - print "accuracy: %.2f" % accuracy + print(("accuracy: %.2f" % accuracy)) def demo(): """ diff --git a/nltk_contrib/align/align_util.py b/nltk_contrib/align/align_util.py index 2e0271e..e0f1f9e 100644 --- a/nltk_contrib/align/align_util.py +++ b/nltk_contrib/align/align_util.py @@ -78,132 +78,132 @@ def get_character_lengths(region): def print_alignment_text_mapping(alignment_mapping): entry_num = 0 for entry in alignment_mapping: - print "--------------------------------" - print "Entry: %d" % entry_num + print("--------------------------------") + print(("Entry: %d" % entry_num)) entry_num = entry_num + 1 - print "%s" % str(entry[0]) - print "%s" % str(entry[1]) + print(("%s" % str(entry[0]))) + print(("%s" % str(entry[1]))) def print_alignment_index_mapping(alignment_mapping_indices): entry_num = 0 for entry in alignment_mapping_indices: - print "--------------------------------" - print "Indices Entry: %d" % entry_num + print("--------------------------------") + print(("Indices Entry: %d" % entry_num)) entry_num = entry_num + 1 source = entry[0] target = entry[1] - print "%s" % str(source) - print "%s" % str(target) + print(("%s" % str(source))) + print(("%s" % str(target))) def print_alignments(alignments, hard_region1, hard_region2): hard1_key = 0 hard2_key = 0 - for soft_key in alignments.keys(): + for soft_key in list(alignments.keys()): alignment = alignments[soft_key] if (alignment.category == '1 - 1'): - print "1-1: %s" % alignment.d - print "--------------------------" - print "%s" % hard_region1[hard1_key] - print "%s" % hard_region2[hard2_key] - print "--------------------------" + print(("1-1: %s" % alignment.d)) + print("--------------------------") + print(("%s" % hard_region1[hard1_key])) + print(("%s" % hard_region2[hard2_key])) + print("--------------------------") hard1_key = hard1_key + 1 hard2_key = hard2_key + 1 elif (alignment.category == '1 - 0'): - print "1-0: %s" % alignment.d - print "--------------------------" - print "%s" % hard_region1[hard1_key] - print "--------------------------" + print(("1-0: %s" % alignment.d)) + print("--------------------------") + print(("%s" % hard_region1[hard1_key])) + print("--------------------------") hard1_key = hard1_key + 1 elif (alignment.category == '0 - 1'): - print "0-1: %s" % alignment.d - print "--------------------------" - print "%s" % hard_region2[hard2_key] - print "--------------------------" + print(("0-1: %s" % alignment.d)) + print("--------------------------") + print(("%s" % hard_region2[hard2_key])) + print("--------------------------") hard2_key = hard2_key + 1 elif (alignment.category == '2 - 1'): - print "2-1: %.2f" % alignment.d - print "--------------------------" - print "%s" % hard_region1[hard1_key] - print "%s" % hard_region1[hard1_key + 1] - print "%s" % hard_region2[hard2_key] - print "--------------------------" + print(("2-1: %.2f" % alignment.d)) + print("--------------------------") + print(("%s" % hard_region1[hard1_key])) + print(("%s" % hard_region1[hard1_key + 1])) + print(("%s" % hard_region2[hard2_key])) + print("--------------------------") hard1_key = hard1_key + 2 hard2_key = hard2_key + 1 elif (alignment.category == '1 - 2'): - print "1-2: %.2f" % alignment.d - print "--------------------------" - print "%s" % hard_region1[hard1_key] - print "%s" % hard_region2[hard2_key] - print "%s" % hard_region2[hard2_key + 1] - print "--------------------------" + print(("1-2: %.2f" % alignment.d)) + print("--------------------------") + print(("%s" % hard_region1[hard1_key])) + print(("%s" % hard_region2[hard2_key])) + print(("%s" % hard_region2[hard2_key + 1])) + print("--------------------------") hard1_key = hard1_key + 1 hard2_key = hard2_key + 2 elif (alignment.category == '2 - 2'): - print "2-2: %.2f" % alignment.d - print "--------------------------" - print "%s" % hard_region1[hard1_key] - print "%s" % hard_region1[hard1_key + 1] - print "%s" % hard_region2[hard2_key] - print "%s" % hard_region2[hard2_key + 1] - print "--------------------------" + print(("2-2: %.2f" % alignment.d)) + print("--------------------------") + print(("%s" % hard_region1[hard1_key])) + print(("%s" % hard_region1[hard1_key + 1])) + print(("%s" % hard_region2[hard2_key])) + print(("%s" % hard_region2[hard2_key + 1])) + print("--------------------------") hard1_key = hard1_key + 2 hard2_key = hard2_key + 2 elif (alignment.category == '3 - 1'): - print "3-1: %.2f" % alignment.d - print "--------------------------" - print "%s" % hard_region1[hard1_key] - print "%s" % hard_region1[hard1_key + 1] - print "%s" % hard_region1[hard1_key + 2] - print "%s" % hard_region2[hard2_key] - print "--------------------------" + print(("3-1: %.2f" % alignment.d)) + print("--------------------------") + print(("%s" % hard_region1[hard1_key])) + print(("%s" % hard_region1[hard1_key + 1])) + print(("%s" % hard_region1[hard1_key + 2])) + print(("%s" % hard_region2[hard2_key])) + print("--------------------------") hard1_key = hard1_key + 3 hard2_key = hard2_key + 1 elif (alignment.category == '3 - 2'): - print "3-2: %.2f" % alignment.d - print "--------------------------" - print "%s" % hard_region1[hard1_key] - print "%s" % hard_region1[hard1_key + 1] - print "%s" % hard_region1[hard1_key + 2] - print "%s" % hard_region2[hard2_key] - print "%s" % hard_region2[hard2_key + 1] - print "--------------------------" + print(("3-2: %.2f" % alignment.d)) + print("--------------------------") + print(("%s" % hard_region1[hard1_key])) + print(("%s" % hard_region1[hard1_key + 1])) + print(("%s" % hard_region1[hard1_key + 2])) + print(("%s" % hard_region2[hard2_key])) + print(("%s" % hard_region2[hard2_key + 1])) + print("--------------------------") hard1_key = hard1_key + 3 hard2_key = hard2_key + 2 elif (alignment.category == '1 - 3'): - print "1-3: %.2f" % alignment.d - print "--------------------------" - print "%s" % hard_region1[hard1_key] - print "%s" % hard_region2[hard2_key] - print "%s" % hard_region2[hard2_key + 1] - print "%s" % hard_region2[hard2_key + 2] - print "--------------------------" + print(("1-3: %.2f" % alignment.d)) + print("--------------------------") + print(("%s" % hard_region1[hard1_key])) + print(("%s" % hard_region2[hard2_key])) + print(("%s" % hard_region2[hard2_key + 1])) + print(("%s" % hard_region2[hard2_key + 2])) + print("--------------------------") hard1_key = hard1_key + 1 hard2_key = hard2_key + 3 elif (alignment.category == '2 - 3'): - print "2-3: %.2f" % alignment.d - print "--------------------------" - print "%s" % hard_region1[hard1_key] - print "%s" % hard_region1[hard1_key + 1] - print "%s" % hard_region2[hard2_key] - print "%s" % hard_region2[hard2_key + 1] - print "%s" % hard_region2[hard2_key + 2] - print "--------------------------" + print(("2-3: %.2f" % alignment.d)) + print("--------------------------") + print(("%s" % hard_region1[hard1_key])) + print(("%s" % hard_region1[hard1_key + 1])) + print(("%s" % hard_region2[hard2_key])) + print(("%s" % hard_region2[hard2_key + 1])) + print(("%s" % hard_region2[hard2_key + 2])) + print("--------------------------") hard1_key = hard1_key + 2 hard2_key = hard2_key + 3 elif (alignment.category == '3 - 3'): - print "3-3: %.2f" % alignment.d - print "--------------------------" - print "%s" % hard_region1[hard1_key] - print "%s" % hard_region1[hard1_key + 1] - print "%s" % hard_region1[hard1_key + 2] - print "%s" % hard_region2[hard2_key] - print "%s" % hard_region2[hard2_key + 1] - print "%s" % hard_region2[hard2_key + 2] - print "--------------------------" + print(("3-3: %.2f" % alignment.d)) + print("--------------------------") + print(("%s" % hard_region1[hard1_key])) + print(("%s" % hard_region1[hard1_key + 1])) + print(("%s" % hard_region1[hard1_key + 2])) + print(("%s" % hard_region2[hard2_key])) + print(("%s" % hard_region2[hard2_key + 1])) + print(("%s" % hard_region2[hard2_key + 2])) + print("--------------------------") hard1_key = hard1_key + 3 hard2_key = hard2_key + 3 else: - print "not supported alignment type" + print("not supported alignment type") def list_to_str(input_list): return input_list @@ -214,7 +214,7 @@ def convert_bead_to_tuples(alignments, hard_region1, hard_region2): alignment_mapping_indices = [] hard1_key = 0 hard2_key = 0 - for soft_key in alignments.keys(): + for soft_key in list(alignments.keys()): alignment = alignments[soft_key] if (alignment.category == '1 - 1'): align_tuple = (list_to_str(hard_region1[hard1_key]), list_to_str(hard_region2[hard2_key])) @@ -311,7 +311,7 @@ def convert_bead_to_tuples(alignments, hard_region1, hard_region2): hard1_key = hard1_key + 3 hard2_key = hard2_key + 3 else: - print "not supported alignment type" + print("not supported alignment type") return (alignment_mapping, alignment_mapping_indices) @@ -320,7 +320,7 @@ def get_alignment_links(alignments): hard_key = 0 for hard_list in alignments: for alignment_dict in hard_list: - for align_key in alignment_dict.keys(): + for align_key in list(alignment_dict.keys()): alignment = alignment_dict[align_key] if (alignment.category == '1 - 1'): @@ -366,15 +366,15 @@ def get_alignment_links(alignments): align_key) alignment_mappings.append(align_triple) else: - print "not supported alignment type" + print("not supported alignment type") return alignment_mappings def get_test_values(alignments): test_values = [] - for hard_regions_index in alignments.keys(): + for hard_regions_index in list(alignments.keys()): soft_regions_list = [] - for soft_regions_index in alignments[hard_regions_index].keys(): + for soft_regions_index in list(alignments[hard_regions_index].keys()): soft_regions_list.extend(alignments[hard_regions_index][soft_regions_index].alignment_mappings) soft_regions_list.reverse() test_values.extend(soft_regions_list) diff --git a/nltk_contrib/align/alignment_util.py b/nltk_contrib/align/alignment_util.py index f33f917..1a61c7b 100755 --- a/nltk_contrib/align/alignment_util.py +++ b/nltk_contrib/align/alignment_util.py @@ -41,9 +41,9 @@ def get_test_values(alignments): """ test_values = [] - for hard_regions_index in alignments.keys(): + for hard_regions_index in list(alignments.keys()): soft_regions_list = [] - for soft_regions_index in alignments[hard_regions_index].keys(): + for soft_regions_index in list(alignments[hard_regions_index].keys()): soft_regions_list.extend(alignments[hard_regions_index][soft_regions_index].alignment_mappings) soft_regions_list.reverse() test_values.extend(soft_regions_list) diff --git a/nltk_contrib/align/api.py b/nltk_contrib/align/api.py index 1a2566a..1edecd5 100644 --- a/nltk_contrib/align/api.py +++ b/nltk_contrib/align/api.py @@ -7,7 +7,7 @@ """ from nltk.internals import deprecated, overridden -from itertools import izip + ##////////////////////////////////////////////////////// # Alignment Interfaces @@ -53,7 +53,7 @@ def batch_align(self, source, target): @rtype: C{list} of I{alignments} """ - return [self.align(st, tt) for (st, tt) in izip(source, target)] + return [self.align(st, tt) for (st, tt) in zip(source, target)] def recursive_align(self, source, target, alignments): """ @@ -70,7 +70,7 @@ def recursive_align(self, source, target, alignments): if (self.output_format == 'text_tuples'): alignment_mapping = standard_alignment - import align_util + from . import align_util if (self.output_format == 'bead_objects'): (alignment_mapping, alignment_mapping_indices) = align_util.convert_bead_to_tuples(standard_alignment, source, target) diff --git a/nltk_contrib/align/gale_church.py b/nltk_contrib/align/gale_church.py index 4374b3b..ba10045 100644 --- a/nltk_contrib/align/gale_church.py +++ b/nltk_contrib/align/gale_church.py @@ -7,10 +7,10 @@ # URL: # For license information, see LICENSE.TXT -from __future__ import division + import math -from util import * +from .util import * # Based on Gale & Church 1993, # "A Program for Aligning Sentences in Bilingual Corpora" @@ -182,10 +182,10 @@ def _chunk_iterator(first): v = first while v != split_value: yield v - v = it.next() + v = next(it) while True: - yield _chunk_iterator(it.next()) + yield _chunk_iterator(next(it)) def parse_token_stream(stream, soft_delimiter, hard_delimiter): @@ -205,4 +205,4 @@ def parse_token_stream(stream, soft_delimiter, hard_delimiter): with nested(open(sys.argv[1], "r"), open(sys.argv[2], "r")) as (s, t): source = parse_token_stream((l.strip() for l in s), ".EOS", ".EOP") target = parse_token_stream((l.strip() for l in t), ".EOS", ".EOP") - print align_texts(source, target) + print((align_texts(source, target))) diff --git a/nltk_contrib/align/test.py b/nltk_contrib/align/test.py index b02885a..5f02cca 100644 --- a/nltk_contrib/align/test.py +++ b/nltk_contrib/align/test.py @@ -1,7 +1,7 @@ -import align_util -import align -import distance_measures +from . import align_util +from . import align +from . import distance_measures import sys @@ -56,7 +56,7 @@ def demo(): gc_alignment = gc.batch_align(regions1, regions2) - print "Alignment0: %s" % gc_alignment + print(("Alignment0: %s" % gc_alignment)) #demo_eval(gc_alignment, gold_file) @@ -78,7 +78,7 @@ def demo(): gc_alignment = gc.batch_align(regions1, regions2) - print "Alignment1: %s" % gc_alignment + print(("Alignment1: %s" % gc_alignment)) #demo_eval(gc_alignment, gold_file) @@ -97,7 +97,7 @@ def demo(): standard_alignment2 = std.batch_align(s2, t2) - print "Alignment2: %s" % standard_alignment2 + print(("Alignment2: %s" % standard_alignment2)) # demo 4 @@ -109,14 +109,14 @@ def demo(): standard_alignment3 = std.align(s3, t3) - print "Alignment3: %s" % standard_alignment3 + print(("Alignment3: %s" % standard_alignment3)) # demo 5 top_down_alignments = std.recursive_align(s3, t3, []) for alignment in top_down_alignments: - print "Top down align: %s" % alignment + print(("Top down align: %s" % alignment)) def madame_bovary_test(source_file, target_file, source_pickle_file, target_pickle_file): diff --git a/nltk_contrib/bioreader/__init__.py b/nltk_contrib/bioreader/__init__.py index e7a7dee..d693f0e 100644 --- a/nltk_contrib/bioreader/__init__.py +++ b/nltk_contrib/bioreader/__init__.py @@ -7,7 +7,7 @@ # For license information, see LICENSE.TXT # -from bioreader import * +from .bioreader import * __all__ = [ 'Reader', diff --git a/nltk_contrib/bioreader/bioreader.py b/nltk_contrib/bioreader/bioreader.py index e0dd539..8b394e1 100644 --- a/nltk_contrib/bioreader/bioreader.py +++ b/nltk_contrib/bioreader/bioreader.py @@ -283,18 +283,18 @@ def __init__(self,file,format="medline"): elif format.lower() == "pubmed": self.rerecord = re.compile(r'\'r'(?P.+?)'r'\',re.DOTALL) else: - print "Unrecognized format" + print("Unrecognized format") self.RecordsList = re.findall(self.rerecord,whole) whole = "" self.RecordsList = [""+x.rstrip()+"" for x in self.RecordsList] self.dictRecords = self.Createdict() self.RecordsList = [] - self.howmany = len(self.dictRecords.keys()) - self.keys = self.dictRecords.keys() + self.howmany = len(list(self.dictRecords.keys())) + self.keys = list(self.dictRecords.keys()) tfinal = time.time() self.repository = None - print "finished loading at ",time.ctime(tfinal) - print "loaded in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes" + print(("finished loading at ",time.ctime(tfinal))) + print(("loaded in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes")) def __repr__(self): return "" @@ -355,7 +355,7 @@ def Search(self,cadena,where=None): tinicial = time.time() resultlist = [] if where: - for cadapmid in self.dictRecords.keys(): + for cadapmid in list(self.dictRecords.keys()): d = self.Read(cadapmid) if where == 'title': tosearch = d.title @@ -374,7 +374,7 @@ def Search(self,cadena,where=None): if self.repository: pass else: - print "No full text repository has been defined...." + print("No full text repository has been defined....") return None elif where == 'pmid': tosearch = d.pmid @@ -385,16 +385,16 @@ def Search(self,cadena,where=None): pass if len(resultlist)!= 0: tfinal = time.time() - print "Searched in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes" - print "Found a total of ",str(len(resultlist))," hits for your query, in the ",where," field" + print(("Searched in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes")) + print(("Found a total of ",str(len(resultlist))," hits for your query, in the ",where," field")) return resultlist else: - print "Searched in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes" - print "Query not found" + print(("Searched in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes")) + print("Query not found") return None else: tosearch = '' - for cadapmid in self.dictRecords.keys(): + for cadapmid in list(self.dictRecords.keys()): tosearch = self.dictRecords[cadapmid] hit = re.search(cadena,tosearch) if hit: @@ -403,13 +403,13 @@ def Search(self,cadena,where=None): pass if len(resultlist)!= 0: tfinal = time.time() - print "Searched in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes" - print "Found a total of ",str(len(resultlist))," hits for your query, in all fields" + print(("Searched in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes")) + print(("Found a total of ",str(len(resultlist))," hits for your query, in all fields")) return resultlist else: tfinal = time.time() - print "Searched in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes" - print "Query not found" + print(("Searched in", tfinal-tinicial," seconds, or",((tfinal-tinicial)/60)," minutes")) + print("Query not found") return None @@ -432,15 +432,15 @@ class CreateXML: """ def __init__(self): #global urllib,time,string,random - import urllib,time,string,random + import urllib.request, urllib.parse, urllib.error,time,string,random def getXml(self,s): - pedir = urllib.urlopen("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id="+s+"&retmode=xml") + pedir = urllib.request.urlopen("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id="+s+"&retmode=xml") stringxml = pedir.read() self.salida.write(stringxml[:-20]+"\n") def getXmlString(self,s): - pedir = urllib.urlopen("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id="+s+"&retmode=xml") + pedir = urllib.request.urlopen("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id="+s+"&retmode=xml") stringxml = pedir.read() return stringxml[:-20]+"\n" @@ -463,7 +463,7 @@ def GenerateFile(self,inputfile,outputfile): cientos = self.listafin[:100] - print "new length self.listacorr", len(self.listafin) + print(("new length self.listacorr", len(self.listafin))) if len(self.listafin) <= 0: break else: @@ -471,7 +471,7 @@ def GenerateFile(self,inputfile,outputfile): nueva = self.listastring(cientos) self.getXml(nueva) for c in cientos: - print c + print(c) self.listafin.remove(c) self.salida.close() @@ -489,7 +489,7 @@ def Generate2String(self,inputfile): cientos = self.listafin[:100] - print "new length self.listacorr", len(self.listafin) + print(("new length self.listacorr", len(self.listafin))) if len(self.listafin) <= 0: break else: @@ -498,6 +498,6 @@ def Generate2String(self,inputfile): newX = self.getXmlString(nueva) self.AllXML = self.AllXML + newX for c in cientos: - print c + print(c) self.listafin.remove(c) return self.AllXML diff --git a/nltk_contrib/classifier/__init__.py b/nltk_contrib/classifier/__init__.py index 8e8e478..a83c7c3 100644 --- a/nltk_contrib/classifier/__init__.py +++ b/nltk_contrib/classifier/__init__.py @@ -19,7 +19,7 @@ def __init__(self, training, attributes, klass): self.attributes = attributes self.training = training self.convert_continuous_values_to_numbers(self.training) - sorted_klass_freqs = self.training.class_freq_dist().keys() + sorted_klass_freqs = list(self.training.class_freq_dist().keys()) sorted_klass_values = [each for each in sorted_klass_freqs] sorted_klass_values.extend([each for each in klass if not sorted_klass_values.__contains__(each)]) self.klass = sorted_klass_values @@ -86,7 +86,7 @@ def entropy(values): def entropy_of_key_counts(dictionary): freq_dist = prob.FreqDist() - klasses = dictionary.keys() + klasses = list(dictionary.keys()) for klass in klasses: freq_dist.inc(klass, dictionary[klass]) return entropy_of_freq_dist(freq_dist) diff --git a/nltk_contrib/classifier/attribute.py b/nltk_contrib/classifier/attribute.py index a5c0c68..5ce22e9 100644 --- a/nltk_contrib/classifier/attribute.py +++ b/nltk_contrib/classifier/attribute.py @@ -8,7 +8,7 @@ from nltk_contrib.classifier.exceptions import systemerror as se from nltk_contrib.classifier import autoclass as ac, cfile, decisionstump as ds from nltk import probability as prob -import UserList +import collections CONTINUOUS = 'continuous' DISCRETE = 'discrete' @@ -58,7 +58,7 @@ def empty_freq_dists(self): def __hash__(self): return hash(self.name) + hash(self.index) -class Attributes(UserList.UserList): +class Attributes(collections.UserList): def __init__(self, attributes = []): self.data = attributes @@ -84,7 +84,7 @@ def discretise(self, discretised_attributes): self.data[disc_attr.index] = disc_attr def empty_decision_stumps(self, ignore_attributes, klass): - filtered = filter(lambda attribute: attribute not in ignore_attributes, self.data) + filtered = [attribute for attribute in self.data if attribute not in ignore_attributes] return [ds.DecisionStump(attribute, klass) for attribute in filtered] def remove_attributes(self, attributes): diff --git a/nltk_contrib/classifier/autoclass.py b/nltk_contrib/classifier/autoclass.py index f41e80b..555c0e1 100644 --- a/nltk_contrib/classifier/autoclass.py +++ b/nltk_contrib/classifier/autoclass.py @@ -10,7 +10,7 @@ class AutoClass: def __init__(self, name): self.name = name - def next(self): + def __next__(self): base26 = self.base26() base26 += 1 return AutoClass(string(base26)) diff --git a/nltk_contrib/classifier/classify.py b/nltk_contrib/classifier/classify.py index c57bddc..9c54432 100644 --- a/nltk_contrib/classifier/classify.py +++ b/nltk_contrib/classifier/classify.py @@ -67,7 +67,7 @@ IB1 = 'IB1' ALGORITHM_MAPPINGS = {ZERO_R:zeror.ZeroR, ONE_R:oner.OneR, DECISION_TREE:decisiontree.DecisionTree, NAIVE_BAYES:naivebayes.NaiveBayes, IB1:knn.IB1} -ALL_ALGORITHMS = ALGORITHM_MAPPINGS.keys() +ALL_ALGORITHMS = list(ALGORITHM_MAPPINGS.keys()) VERIFY='verify' ACCURACY='accuracy' @@ -80,7 +80,7 @@ class Classify(cl.CommandLineInterface): def __init__(self): - cl.CommandLineInterface.__init__(self, ALGORITHM_MAPPINGS.keys(), ONE_R, a_help, f_help, t_help, T_help, g_help, o_help) + cl.CommandLineInterface.__init__(self, list(ALGORITHM_MAPPINGS.keys()), ONE_R, a_help, f_help, t_help, T_help, g_help, o_help) self.add_option("-v", "--verify", dest=VERIFY, action="store_true", default=False, help=v_help) self.add_option("-A", "--accuracy", dest=ACCURACY, action="store_false", default=True, help=A_help) self.add_option("-e", "--error", dest=ERROR, action="store_true", default=False, help=e_help) @@ -103,7 +103,7 @@ def execute(self): self.error('Invalid arguments. Cannot verify classification for test data.') file_strategy = get_file_strategy(self.files, self.training_path, self.test_path, self.gold_path, self.get_value(VERIFY)) - self.training_path, self.test_path, self.gold_path = file_strategy.values() + self.training_path, self.test_path, self.gold_path = list(file_strategy.values()) training, attributes, klass, test, gold = self.get_instances(self.training_path, self.test_path, self.gold_path, cross_validation_fold is not None) classifier = ALGORITHM_MAPPINGS[self.algorithm](training, attributes, klass) @@ -165,14 +165,14 @@ def __print_value(self, log, is_true, attribute, str_repn): total = 0 for each in self.confusion_matrices: total += getattr(each, attribute)() - print >>log, str_repn + ': ' + str(float(total)/len(self.confusion_matrices)) + print(str_repn + ': ' + str(float(total)/len(self.confusion_matrices)), file=log) def write(self, log, should_write, data_format, suffix): if should_write: for index in range(len(self.gold_instances)): new_path = self.training_path + str(index + 1) + suffix data_format.write_gold(self.gold_instances[index], new_path) - print >>log, 'Gold classification written to ' + new_path + ' file.' + print('Gold classification written to ' + new_path + ' file.', file=log) def train(self): #do Nothing @@ -198,7 +198,7 @@ def write(self, log, should_write, data_format, suffix): Will always write in the case of test files """ data_format.write_test(self.test, self.test_path + suffix) - print >>log, 'Test classification written to ' + self.test_path + suffix + ' file.' + print('Test classification written to ' + self.test_path + suffix + ' file.', file=log) def train(self): self.classifier.train() @@ -223,12 +223,12 @@ def print_results(self, log, accuracy, error, fscore, precision, recall): def __print_value(self, log, is_true, attribute, str_repn): if is_true: - print >>log, str_repn + ': ' + getattr(self.confusion_matrix, attribute)().__str__() + print(str_repn + ': ' + getattr(self.confusion_matrix, attribute)().__str__(), file=log) def write(self, log, should_write, data_format, suffix): if should_write: data_format.write_gold(self.gold, self.gold_path + suffix) - print >>log, 'Gold classification written to ' + self.gold_path + suffix + ' file.' + print('Gold classification written to ' + self.gold_path + suffix + ' file.', file=log) def train(self): self.classifier.train() diff --git a/nltk_contrib/classifier/commandline.py b/nltk_contrib/classifier/commandline.py index 261ce69..feac9d9 100644 --- a/nltk_contrib/classifier/commandline.py +++ b/nltk_contrib/classifier/commandline.py @@ -38,7 +38,7 @@ def __init__(self, alg_choices, alg_default, a_help, f_help, t_help, T_help, g_h self.add_option("-T", "--test-file", dest=TEST, type="string", help=T_help) self.add_option("-g", "--gold-file", dest=GOLD, type="string", help=g_help) - self.add_option("-D", "--data-format", dest=DATA_FORMAT, type="choice", choices=DATA_FORMAT_MAPPINGS.keys(), \ + self.add_option("-D", "--data-format", dest=DATA_FORMAT, type="choice", choices=list(DATA_FORMAT_MAPPINGS.keys()), \ default=C45_FORMAT, help=D_help) self.add_option("-l", "--log-file", dest=LOG_FILE, type="string", help=l_help) self.add_option("-o", "--options", dest=OPTIONS, type="string", help=o_help) @@ -67,8 +67,8 @@ def execute(self): self.log = None if log_file is not None: self.log = open(log_file, 'a') - print >>self.log, '-' * 40 - print >>self.log, 'DateTime: ' + time.strftime('%c', time.localtime()) + print('-' * 40, file=self.log) + print('DateTime: ' + time.strftime('%c', time.localtime()), file=self.log) def run(self, args): """ @@ -117,22 +117,22 @@ def write_to_file(self, suffix, training, attributes, klass, test, gold, include def log_common_params(self, name): if self.log is not None: - print >>self.log, 'Operation: ' + name - print >>self.log, '\nAlgorithm: ' + str(self.algorithm) + '\nTraining: ' + str(self.training_path) + \ - '\nTest: ' + str(self.test_path) + '\nGold: ' + str(self.gold_path) + '\nOptions: ' + str(self.options) + print('Operation: ' + name, file=self.log) + print('\nAlgorithm: ' + str(self.algorithm) + '\nTraining: ' + str(self.training_path) + \ + '\nTest: ' + str(self.test_path) + '\nGold: ' + str(self.gold_path) + '\nOptions: ' + str(self.options), file=self.log) def log_created_files(self, files_names, message): if self.log is None: - print message + print(message) else: - print >>self.log, "NumberOfFilesCreated: " + str(len(files_names)) + print("NumberOfFilesCreated: " + str(len(files_names)), file=self.log) count = 0 for file_name in files_names: if self.log is None: - print file_name + print(file_name) else: - print >>self.log, "CreatedFile" + str(count) + ": " + file_name + print("CreatedFile" + str(count) + ": " + file_name, file=self.log) count += 1 diff --git a/nltk_contrib/classifier/decisionstump.py b/nltk_contrib/classifier/decisionstump.py index d4a8973..a8639ab 100644 --- a/nltk_contrib/classifier/decisionstump.py +++ b/nltk_contrib/classifier/decisionstump.py @@ -37,10 +37,10 @@ def update_count(self, instance): self.root[instance.klass_value] += 1 def error(self): - count_for_each_attr_value = self.counts.values() + count_for_each_attr_value = list(self.counts.values()) total, errors = 0, 0 for class_count in count_for_each_attr_value: - subtotal, counts = 0, class_count.values() + subtotal, counts = 0, list(class_count.values()) counts.sort() for count in counts: subtotal += count errors += (subtotal - counts[-1]) @@ -56,7 +56,7 @@ def klass(self, instance): def majority_klass(self, attr_value): klass_values_with_count = self.counts[attr_value] _max, klass_value = 0, self.safe_default() # will consider safe default because at times the test will have an attribute value not present in the stump(can happen in cross validation as well) - for klass, count in klass_values_with_count.items(): + for klass, count in list(klass_values_with_count.items()): if count > _max: _max, klass_value = count, klass return klass_value @@ -67,7 +67,7 @@ def safe_default(self): """ if self.__safe_default == None: max_occurance, klass = -1, None - for klass_element in self.root.keys(): + for klass_element in list(self.root.keys()): if self.root[klass_element] > max_occurance: max_occurance = self.root[klass_element] klass = klass_element @@ -110,14 +110,14 @@ def split_info(self): def __str__(self): _str = 'Decision stump for attribute ' + self.attribute.name - for key, value in self.counts.items(): + for key, value in list(self.counts.items()): _str += '\nAttr value: ' + key + '; counts: ' + value.__str__() for child in self.children: _str += child.__str__() return _str def total_counts(dictionary_of_klass_freq): - return sum([count for count in dictionary_of_klass_freq.values()]) + return sum([count for count in list(dictionary_of_klass_freq.values())]) def dictionary_of_values(klass): return dict([(value, 0) for value in klass]) diff --git a/nltk_contrib/classifier/discretise.py b/nltk_contrib/classifier/discretise.py index 4c7bddb..9625bad 100644 --- a/nltk_contrib/classifier/discretise.py +++ b/nltk_contrib/classifier/discretise.py @@ -52,7 +52,7 @@ class Discretise(cl.CommandLineInterface): def __init__(self): - cl.CommandLineInterface.__init__(self, ALGORITHM_MAPPINGS.keys(), UNSUPERVISED_EQUAL_WIDTH, a_help, f_help, t_help, T_help, g_help, o_help) + cl.CommandLineInterface.__init__(self, list(ALGORITHM_MAPPINGS.keys()), UNSUPERVISED_EQUAL_WIDTH, a_help, f_help, t_help, T_help, g_help, o_help) self.add_option("-A", "--attributes", dest="attributes", type="string", help=A_help) def execute(self): @@ -185,7 +185,7 @@ def create_and_run(algorithm, path, indices, log_path, options): params.extend(['-o', options]) if log_path is not None: params.extend(['-l', log_path]) - print "Params " + str(params) + print(("Params " + str(params))) disc.run(params) return disc.get_suffix() diff --git a/nltk_contrib/classifier/discretisedattribute.py b/nltk_contrib/classifier/discretisedattribute.py index 666a90c..ff4c573 100644 --- a/nltk_contrib/classifier/discretisedattribute.py +++ b/nltk_contrib/classifier/discretisedattribute.py @@ -14,7 +14,7 @@ def __init__(self, name, ranges, index): self.values, klass_value = [], autoclass.FIRST for i in range(len(ranges)): self.values.append(klass_value.name) - klass_value = klass_value.next() + klass_value = next(klass_value) self.index = index self.type = attribute.DISCRETE self.ranges = ranges diff --git a/nltk_contrib/classifier/featureselect.py b/nltk_contrib/classifier/featureselect.py index e4695f9..05c872e 100644 --- a/nltk_contrib/classifier/featureselect.py +++ b/nltk_contrib/classifier/featureselect.py @@ -58,7 +58,7 @@ class FeatureSelect(cl.CommandLineInterface): def __init__(self): - cl.CommandLineInterface.__init__(self, ALGORITHM_MAPPINGS.keys(), RANK, a_help, f_help, t_help, T_help, g_help, o_help) + cl.CommandLineInterface.__init__(self, list(ALGORITHM_MAPPINGS.keys()), RANK, a_help, f_help, t_help, T_help, g_help, o_help) def execute(self): cl.CommandLineInterface.execute(self) @@ -221,7 +221,7 @@ def isfloat(stringval): try: float(stringval) return True - except (ValueError, TypeError), e: return False + except (ValueError, TypeError) as e: return False def batch_filter_select(base_path, suffixes, number_of_attributes, log_path, has_continuous): filter_suffixes = [] @@ -229,7 +229,7 @@ def batch_filter_select(base_path, suffixes, number_of_attributes, log_path, has for selection_criteria in [INFORMATION_GAIN, GAIN_RATIO]: feat_sel = FeatureSelect() params = ['-a', RANK, '-f', base_path + each, '-o', selection_criteria + ',' + str(number_of_attributes), '-l', log_path] - print "Params " + str(params) + print(("Params " + str(params))) feat_sel.run(params) filter_suffixes.append(each + feat_sel.get_suffix()) return filter_suffixes @@ -240,7 +240,7 @@ def batch_wrapper_select(base_path, suffixes, classifier, fold, delta, log_path) for alg in [FORWARD_SELECTION, BACKWARD_ELIMINATION]: feat_sel = FeatureSelect() params = ['-a', alg, '-f', base_path + each, '-o', classifier + ',' + str(fold) + ',' + str(delta), '-l', log_path] - print "Params " + str(params) + print(("Params " + str(params))) feat_sel.run(params) wrapper_suffixes.append(each + feat_sel.get_suffix()) return wrapper_suffixes diff --git a/nltk_contrib/classifier/instances.py b/nltk_contrib/classifier/instances.py index 0d2d42a..8825f68 100644 --- a/nltk_contrib/classifier/instances.py +++ b/nltk_contrib/classifier/instances.py @@ -9,11 +9,11 @@ from nltk_contrib.classifier import instance as ins, item, cfile, confusionmatrix as cm, numrange as r, util from nltk_contrib.classifier.exceptions import systemerror as system, invaliddataerror as inv from nltk import probability as prob -import operator, UserList, UserDict, math +import operator, collections, UserDict, math -class Instances(UserList.UserList): +class Instances(collections.UserList): def __init__(self, instances): - UserList.UserList.__init__(self, instances) + collections.UserList.__init__(self, instances) def are_valid(self, klass, attributes): for instance in self.data: @@ -122,7 +122,7 @@ def posterior_probablities(self, attributes, klass_values): for klass_value in klass_values: freq_dists[attribute][value].inc(klass_value) #Laplacian smoothing stat_list_values = {} - cont_attrs = filter(lambda attr: attr.is_continuous(), attributes) + cont_attrs = [attr for attr in attributes if attr.is_continuous()] if attributes.has_continuous(): for attribute in cont_attrs: stat_list_values[attribute] = {} @@ -160,12 +160,12 @@ def confusion_matrix(self, klass): matrix.count(i.klass_value, i.classified_klass) return matrix -class SupervisedBreakpoints(UserList.UserList): +class SupervisedBreakpoints(collections.UserList): """ Used to find breakpoints for discretisation """ def __init__(self, klass_values, attr_values): - UserList.UserList.__init__(self, []) + collections.UserList.__init__(self, []) self.attr_values = attr_values self.klass_values = klass_values diff --git a/nltk_contrib/classifier/knn.py b/nltk_contrib/classifier/knn.py index 37988f9..03325bd 100644 --- a/nltk_contrib/classifier/knn.py +++ b/nltk_contrib/classifier/knn.py @@ -41,7 +41,7 @@ def distance(self, value, instance): self.distances[value] = [instance] def minimum_distance_instances(self): - keys = self.distances.keys() + keys = list(self.distances.keys()) keys.sort() return self.distances[keys[0]] diff --git a/nltk_contrib/classifier/naivebayes.py b/nltk_contrib/classifier/naivebayes.py index 68e32c6..9ee8fa3 100644 --- a/nltk_contrib/classifier/naivebayes.py +++ b/nltk_contrib/classifier/naivebayes.py @@ -30,7 +30,7 @@ def estimate_klass(self, instance): for klass_value in self.klass: class_conditional_probability = self.class_conditional_probability(instance, klass_value) estimates_using_prob[class_conditional_probability] = klass_value - keys = estimates_using_prob.keys() + keys = list(estimates_using_prob.keys()) keys.sort()#find the one with max conditional prob return estimates_using_prob[keys[-1]] diff --git a/nltk_contrib/classifier/util.py b/nltk_contrib/classifier/util.py index 4400c61..078644d 100644 --- a/nltk_contrib/classifier/util.py +++ b/nltk_contrib/classifier/util.py @@ -3,11 +3,11 @@ # # URL: # This software is distributed under GPL, for license information see LICENSE.TXT -import UserList, math +import collections, math -class StatList(UserList.UserList): +class StatList(collections.UserList): def __init__(self, values=None): - UserList.UserList.__init__(self, values) + collections.UserList.__init__(self, values) def mean(self): if len(self.data) == 0: return 0 diff --git a/nltk_contrib/classifier/zeror.py b/nltk_contrib/classifier/zeror.py index 6b3c6f8..5de0632 100644 --- a/nltk_contrib/classifier/zeror.py +++ b/nltk_contrib/classifier/zeror.py @@ -36,7 +36,7 @@ def update_count(self, instance): def __max(self): max, klass_value = 0, None - for key in self.__klassCount.keys(): + for key in list(self.__klassCount.keys()): value = self.__klassCount[key] if value > max: max = value diff --git a/nltk_contrib/classifier_tests/alltests.py b/nltk_contrib/classifier_tests/alltests.py index e166309..a0d4c86 100644 --- a/nltk_contrib/classifier_tests/alltests.py +++ b/nltk_contrib/classifier_tests/alltests.py @@ -13,10 +13,10 @@ def allTestsSuite(): for dn,d,f in os.walk('.'): if dn is not '.': continue testfilenames = [filename for filename in f if re.search('tests\.py$', filename) is not None] - modulenames = map(lambda f: re.sub('\.py$', '', f), testfilenames) - modules = map(__import__, modulenames) + modulenames = [re.sub('\.py$', '', f) for f in testfilenames] + modules = list(map(__import__, modulenames)) load = unittest.defaultTestLoader.loadTestsFromModule - return unittest.TestSuite(map(load, modules)) + return unittest.TestSuite(list(map(load, modules))) if __name__ == '__main__': runner = unittest.TextTestRunner() diff --git a/nltk_contrib/classifier_tests/autoclasstests.py b/nltk_contrib/classifier_tests/autoclasstests.py index 03afeeb..e0acf4c 100644 --- a/nltk_contrib/classifier_tests/autoclasstests.py +++ b/nltk_contrib/classifier_tests/autoclasstests.py @@ -25,9 +25,9 @@ def test_string(self): def test_next(self): a = autoclass.FIRST - b = a.next() + b = next(a) self.assertEqual('b', str(b)) - self.assertEqual('c', str(b.next())) + self.assertEqual('c', str(next(b))) self.assertEqual('z', self.next('y')) self.assertEqual('ba', self.next('z')) self.assertEqual('bb', self.next('ba')) @@ -36,4 +36,4 @@ def test_next(self): self.assertEqual('baa', self.next('zz')) def next(self, current): - return str(autoclass.AutoClass(current).next()) + return str(next(autoclass.AutoClass(current))) diff --git a/nltk_contrib/classifier_tests/classifytests.py b/nltk_contrib/classifier_tests/classifytests.py index 30570c8..339a120 100644 --- a/nltk_contrib/classifier_tests/classifytests.py +++ b/nltk_contrib/classifier_tests/classifytests.py @@ -126,28 +126,28 @@ def test_does_not_throw_error_if_only_file_option_present(self): def test_get_file_strategy(self): strategy = c.get_file_strategy('files', None, None, None, True) self.assertEqual(c.CommonBaseNameStrategy, strategy.__class__) - values = strategy.values() + values = list(strategy.values()) self.assertEqual(values[0], 'files') self.assertEqual(values[1], None) self.assertEqual(values[2], 'files') strategy = c.get_file_strategy('files', None, None, None, False) self.assertEqual(c.CommonBaseNameStrategy, strategy.__class__) - values = strategy.values() + values = list(strategy.values()) self.assertEqual(values[0], 'files') self.assertEqual(values[1], 'files') self.assertEqual(values[2], None) strategy = c.get_file_strategy(None, 'train', 'test', None, False) self.assertEqual(c.ExplicitNamesStrategy, strategy.__class__) - values = strategy.values() + values = list(strategy.values()) self.assertEqual(values[0], 'train') self.assertEqual(values[1], 'test') self.assertEqual(values[2], None) strategy = c.get_file_strategy(None, 'train', None, 'gold', False) self.assertEqual(c.ExplicitNamesStrategy, strategy.__class__) - values = strategy.values() + values = list(strategy.values()) self.assertEqual(values[0], 'train') self.assertEqual(values[1], None) self.assertEqual(values[2], 'gold') diff --git a/nltk_contrib/classifier_tests/decisionstumptests.py b/nltk_contrib/classifier_tests/decisionstumptests.py index 0a50a46..51a7712 100644 --- a/nltk_contrib/classifier_tests/decisionstumptests.py +++ b/nltk_contrib/classifier_tests/decisionstumptests.py @@ -112,7 +112,7 @@ def test_dictionary_of_all_values_with_count_0(self): values = ds.dictionary_of_values(phoney); self.assertEqual(3, len(values)) for i in ['a', 'b', 'c']: - self.assertTrue(values.has_key(i)) + self.assertTrue(i in values) self.assertEqual(0, values[i]) def test_gain_ratio(self): diff --git a/nltk_contrib/classifier_tests/instancestests.py b/nltk_contrib/classifier_tests/instancestests.py index d3397c1..0f35f91 100644 --- a/nltk_contrib/classifier_tests/instancestests.py +++ b/nltk_contrib/classifier_tests/instancestests.py @@ -285,7 +285,7 @@ def test_class_freq_dist_in_reverse_to_store_classes(self): path = datasetsDir(self) + 'numerical' + SEP + 'person' _training = training(path) class_freq_dist = _training.class_freq_dist() - self.assertEqual(['yes','no'], class_freq_dist.keys()) + self.assertEqual(['yes','no'], list(class_freq_dist.keys())) def test_posterior_probablities_with_discrete_values(self): diff --git a/nltk_contrib/classifier_tests/numrangetests.py b/nltk_contrib/classifier_tests/numrangetests.py index 7c82ff6..f40217e 100644 --- a/nltk_contrib/classifier_tests/numrangetests.py +++ b/nltk_contrib/classifier_tests/numrangetests.py @@ -57,14 +57,14 @@ def test_include_expands_range(self): def test_split_returns_none_when_lower_eq_upper(self): _range = r.Range() - self.assertEquals(None, _range.split(2)) + self.assertEqual(None, _range.split(2)) def test_split_returns_none_if_size_of_each_split_is_less_than_delta(self): try: _range = r.Range(0, 0.000005) _range.split(7) - except (se.SystemError), e: - self.assertEquals('Splitting of range resulted in elements smaller than delta 1e-06.', e.message) + except (se.SystemError) as e: + self.assertEqual('Splitting of range resulted in elements smaller than delta 1e-06.', e.message) def test_split_includes_the_highest_and_lowest(self): _range = r.Range() diff --git a/nltk_contrib/classifier_tests/utilities/batchtest.py b/nltk_contrib/classifier_tests/utilities/batchtest.py index da65b04..751a5b6 100644 --- a/nltk_contrib/classifier_tests/utilities/batchtest.py +++ b/nltk_contrib/classifier_tests/utilities/batchtest.py @@ -15,7 +15,7 @@ def run(root_path, log_path): print('in run') for dir_name, dirs, files in os.walk(root_path): data = set([]) - print('Dir name ' + str(dir_name) + ' dirs ' + str(dirs) + ' files ' + str(files)) + print(('Dir name ' + str(dir_name) + ' dirs ' + str(dirs) + ' files ' + str(files))) for file in files: index = file.rfind('.') if index != -1: @@ -65,7 +65,7 @@ def process(path, log_path): for suffix in all: params = ['-a', algorithm, '-f', path + suffix, '-l', log_path, '-c', 5] - print "Params " + str(params) + print(("Params " + str(params))) c.Classify().run(params) def to_str_array(value, times): @@ -91,13 +91,13 @@ def delete_generated_files(path): resp = 0 while(resp != 1 and resp != 2): try: - resp = int(raw_input("Select one of following options:\n1. Run all tests\n2. Delete generated files\n")) + resp = int(eval(input("Select one of following options:\n1. Run all tests\n2. Delete generated files\n"))) except ValueError: pass if resp == 1: - dir_tree_path = raw_input("Enter directory tree path") - log_file = raw_input("Enter log file") + dir_tree_path = eval(input("Enter directory tree path")) + log_file = eval(input("Enter log file")) run(dir_tree_path, log_file) elif resp == 2: - dir_path = raw_input("Enter directory path") + dir_path = eval(input("Enter directory path")) delete_generated_files(dir_path) diff --git a/nltk_contrib/classifier_tests/utilities/convert.py b/nltk_contrib/classifier_tests/utilities/convert.py index 23cd3b2..9147635 100644 --- a/nltk_contrib/classifier_tests/utilities/convert.py +++ b/nltk_contrib/classifier_tests/utilities/convert.py @@ -49,7 +49,7 @@ def values(file_path, index, sep = " "): for line in f: words = line.split(sep) if not index < len(words): - print "Warning! omitting line " + str(line) + print("Warning! omitting line " + str(line)) continue values.add(words[index]) return ','.join(values) @@ -65,7 +65,7 @@ def convert(path): ind = path.rfind('.') if ind == -1: ind = len(path) nf = open(path[:ind] + 'conv' + path[ind:], 'w') - for l in converted:print >>nf, l + for l in converted:print(l, file=nf) nf.close() def convert_log_to_csv(path): @@ -73,7 +73,7 @@ def convert_log_to_csv(path): csvf = open(path + '.csv', 'w') for each in classifications: - print >>csvf, each.algorithm + ',' + each.training + ',' + each.test + ',' + each.gold + ',' + each.accuracy + ',' + each.f_score + print(each.algorithm + ',' + each.training + ',' + each.test + ',' + each.gold + ',' + each.accuracy + ',' + each.f_score, file=csvf) def get_classification_log_entries(path): f = open(path) @@ -215,15 +215,15 @@ def convert_log_to_tex_tables(path): texf = open(path + '-acc.tex', 'w') for table in accuracy_tables: - print >>texf, table + print(table, file=texf) texf = open(path + '-fs.tex', 'w') for table in f_score_tables: - print >>texf, table + print(table, file=texf) texf = open(path + '-macc.tex', 'w') for table in mean_accuracy_tables: - print >>texf, table + print(table, file=texf) texf = open(path + '-mdatasets.tex', 'w') - print >>texf, mean_datasets + print(mean_datasets, file=texf) def get_stat_lists(cols): return dict([(each, util.StatList()) for each in cols]) diff --git a/nltk_contrib/classify/__init__.py b/nltk_contrib/classify/__init__.py index 16025db..77f227b 100755 --- a/nltk_contrib/classify/__init__.py +++ b/nltk_contrib/classify/__init__.py @@ -110,6 +110,6 @@ def classifier_accuracy(classifier, gold): return float(correct) / len(gold) -from cosine import * -from naivebayes import * -from spearman import * +from .cosine import * +from .naivebayes import * +from .spearman import * diff --git a/nltk_contrib/classify/cosine.py b/nltk_contrib/classify/cosine.py index cde6979..26950df 100755 --- a/nltk_contrib/classify/cosine.py +++ b/nltk_contrib/classify/cosine.py @@ -144,7 +144,7 @@ def demo(): result = classifier.get_class_dict("a") for cls in result: - print cls, ':', result[cls] + print((cls, ':', result[cls])) """ expected values: @@ -181,7 +181,7 @@ def demo2(): result = classifier.get_class_dict("aaababb") for cls in result: - print cls, ':', result[cls] + print((cls, ':', result[cls])) """ expected values: class a: 'aa' = 5 @@ -220,7 +220,7 @@ def demo3(): result = classifier.get_class_dict("aaababb") for cls in result: - print cls, ':', result[cls] + print((cls, ':', result[cls])) """ expected values: @@ -270,9 +270,9 @@ def demo4(): result = classifier.get_class_probs(list(islice(genesis.raw("english-kjv"), 150, 200))) - print 'english-kjv :', result.prob('english-kjv') - print 'french :', result.prob('french') - print 'finnish :', result.prob('finnish') + print(('english-kjv :', result.prob('english-kjv'))) + print(('french :', result.prob('french'))) + print(('finnish :', result.prob('finnish'))) if __name__ == '__main__': diff --git a/nltk_contrib/classify/naivebayes.py b/nltk_contrib/classify/naivebayes.py index 92dd5d0..d2cd3ba 100755 --- a/nltk_contrib/classify/naivebayes.py +++ b/nltk_contrib/classify/naivebayes.py @@ -82,7 +82,8 @@ def train(self, gold): self._cls_prob_dist = GoodTuringProbDist(cls_freq_dist, cls_freq_dist.B()) # for features - def make_probdist(freqdist, (cls, fname)): + def make_probdist(freqdist, xxx_todo_changeme): + (cls, fname) = xxx_todo_changeme return GoodTuringProbDist(freqdist, len(feature_values[fname])) self._feat_prob_dist = ConditionalProbDist(feat_freq_dist, make_probdist, True) @@ -149,7 +150,7 @@ def demo(): result = classifier.get_class_dict("a") for cls in result: - print cls, ':', result[cls] + print((cls, ':', result[cls])) """ expected values: @@ -180,7 +181,7 @@ def demo2(): result = classifier.get_class_dict("aababb") for cls in result: - print cls, ':', result[cls] + print((cls, ':', result[cls])) """ expected values: class_probs a = 0.5 @@ -215,7 +216,7 @@ def demo3(): result = classifier.get_class_dict("aaababb") for cls in result: - print cls, ':', result[cls] + print((cls, ':', result[cls])) """ expected values: @@ -260,9 +261,9 @@ def demo4(): result = classifier.get_class_probs(list(islice(genesis.raw("english-kjv"), 150, 200))) - print 'english-kjv :', result.prob('english-kjv') - print 'french :', result.prob('french') - print 'finnish :', result.prob('finnish') + print(('english-kjv :', result.prob('english-kjv'))) + print(('french :', result.prob('french'))) + print(('finnish :', result.prob('finnish'))) if __name__ == '__main__': demo2() diff --git a/nltk_contrib/classify/spearman.py b/nltk_contrib/classify/spearman.py index db81523..5c788b0 100644 --- a/nltk_contrib/classify/spearman.py +++ b/nltk_contrib/classify/spearman.py @@ -162,7 +162,7 @@ def demo(): result = classifier.get_class_dict("a") for cls in result: - print cls, ':', result[cls] + print((cls, ':', result[cls])) """ expected values: class a: 'a' = 1 @@ -190,7 +190,7 @@ def demo2(): result = classifier.get_class_dict("aaababb") for cls in result: - print cls, ':', result[cls] + print((cls, ':', result[cls])) """ expected values: class a: 'aa' = 1 @@ -224,7 +224,7 @@ def demo3(): result = classifier.get_class_dict("aaababb") for cls in result: - print cls, ':', result[cls] + print((cls, ':', result[cls])) """ expected values: @@ -268,9 +268,9 @@ def demo4(): result = classifier.get_class_probs(list(islice(genesis.raw("english-kjv"), 150, 200))) - print 'english-kjv :', result.prob('english-kjv') - print 'french :', result.prob('french') - print 'finnish :', result.prob('finnish') + print(('english-kjv :', result.prob('english-kjv'))) + print(('french :', result.prob('french'))) + print(('finnish :', result.prob('finnish'))) if __name__ == '__main__': diff --git a/nltk_contrib/combined.py b/nltk_contrib/combined.py index fe47c93..d83b870 100644 --- a/nltk_contrib/combined.py +++ b/nltk_contrib/combined.py @@ -96,7 +96,7 @@ def unmarshal (self, basepath): self._brill = Brill(self._tagger[-1], []) self._brill.unmarshal(tagger_file) else: - print "error, tagger type not recognized." + print("error, tagger type not recognized.") def exemple_train (self, train_sents, verbose=False): self._append_default("N") @@ -124,8 +124,8 @@ def create_tagger (train_sents): ct.unmarshal("tresoldi") tokens = "Mauro viu o livro sobre a mesa".split() - print list(ct.tag(tokens)) + print((list(ct.tag(tokens)))) # tests acc = tag.accuracy(ct, [train_sents]) - print 'Accuracy = %4.2f%%' % (100 * acc) + print(('Accuracy = %4.2f%%' % (100 * acc))) diff --git a/nltk_contrib/concord.py b/nltk_contrib/concord.py index f5bfeac..5c401a5 100644 --- a/nltk_contrib/concord.py +++ b/nltk_contrib/concord.py @@ -225,16 +225,16 @@ def raw(self, leftRegexp=None, middleRegexp=".*", rightRegexp=None, reg = re.compile(middleRegexp) if verbose: - print "Matching the following target words:" + print("Matching the following target words:") wordLocs = [] # get list of (sentence, word) pairs to get context for - for item in self.index.getIndex().iteritems(): + for item in list(self.index.getIndex().items()): if reg.match("/".join([item[0][0].lower(), item[0][1]])): if verbose: - print "/".join(item[0]) + print(("/".join(item[0]))) wordLocs.append(item[1]) - print "" + print("") items = [] # if context lengths are specified in words: @@ -358,24 +358,24 @@ def raw(self, leftRegexp=None, middleRegexp=".*", rightRegexp=None, items.append((left, target, right, sentenceNum)) if verbose: - print "Found %d matches for target word..." % len(items) + print(("Found %d matches for target word..." % len(items))) # sort the concordance if sort == self.SORT_WORD: if verbose: - print "Sorting by target word..." + print("Sorting by target word...") items.sort(key=lambda i:i[1][0].lower()) elif sort == self.SORT_POS: if verbose: - print "Sorting by target word POS tag..." + print("Sorting by target word POS tag...") items.sort(key=lambda i:i[1][1].lower()) elif sort == self.SORT_NUM: if verbose: - print "Sorting by sentence number..." + print("Sorting by sentence number...") items.sort(key=lambda i:i[3]) elif sort == self.SORT_RIGHT_CONTEXT: if verbose: - print "Sorting by first word of right context..." + print("Sorting by first word of right context...") items.sort(key=lambda i:i[2][0][0]) # if any regular expressions have been given for the context, filter @@ -390,11 +390,11 @@ def raw(self, leftRegexp=None, middleRegexp=".*", rightRegexp=None, rightRe=None if leftRegexp != None: if verbose: - print "Filtering on left context..." + print("Filtering on left context...") leftRe = re.compile(leftRegexp) if rightRegexp != None: if verbose: - print "Filtering on right context..." + print("Filtering on right context...") rightRe = re.compile(rightRegexp) for item in items: @@ -515,11 +515,11 @@ def format(self, source, contextChars=55, maxKeyLength=0, showWord=True, rPad = int(floor(max(maxMiddleLength - len(middle), 0) / 2.0)) middle = " "*lPad + middle + " "*rPad - print left + "| " + middle + " | " + right + " " + print((left + "| " + middle + " | " + right + " ")) count += 1 if verbose: - print "\n" + repr(count) + " lines" + print(("\n" + repr(count) + " lines")) def _matches(self, item, leftRe, rightRe): """ Private method that runs the given regexps over a raw concordance @@ -798,10 +798,10 @@ def format(self, output, maxKeyLength=20, threshold=-1, showFirstX=-1, x = 0 other = 0 total = 0 - print name - print "-"*(maxKeyLength + 7) + print(name) + print(("-"*(maxKeyLength + 7))) # for each key: - for key in dist.keys(): + for key in list(dist.keys()): # keep track of how many samples shown, if using the showFirstX # option #if showFirstX > 0 and x >= showFirstX: @@ -823,7 +823,7 @@ def format(self, output, maxKeyLength=20, threshold=-1, showFirstX=-1, if count < threshold or (showFirstX > 0 and x >= showFirstX): other += count else: - print key + " "*(maxKeyLength - len(key) + 1) + countString + print((key + " "*(maxKeyLength - len(key) + 1) + countString)) x += 1 if countOther: @@ -833,7 +833,7 @@ def format(self, output, maxKeyLength=20, threshold=-1, showFirstX=-1, else: count = other countString = str(count) - print self._OTHER_TEXT + " "*(maxKeyLength - len(self._OTHER_TEXT) + 1) + countString + print((self._OTHER_TEXT + " "*(maxKeyLength - len(self._OTHER_TEXT) + 1) + countString)) if showTotal: if normalise: count = 1.0 * total @@ -841,21 +841,21 @@ def format(self, output, maxKeyLength=20, threshold=-1, showFirstX=-1, else: count = total countString = str(count) - print self._TOTAL_TEXT + " "*(maxKeyLength - len(self._TOTAL_TEXT) + 1) + countString - print "" + print((self._TOTAL_TEXT + " "*(maxKeyLength - len(self._TOTAL_TEXT) + 1) + countString)) + print("") def demo(): """ Demonstrates how to use IndexConcordance and Aggregator. """ - print "Reading Brown Corpus into memory..." + print("Reading Brown Corpus into memory...") corpus = brown.tagged_sents('a') - print "Generating index..." + print("Generating index...") ic = IndexConcordance(corpus) - print "Showing all occurences of 'plasma' in the Brown Corpus..." + print("Showing all occurences of 'plasma' in the Brown Corpus...") ic.formatted(middleRegexp="^plasma/.*", verbose=True) - print "Investigating the collocates of 'deal' and derivatives..." + print("Investigating the collocates of 'deal' and derivatives...") agg = Aggregator() agg.add(ic.raw(middleRegexp="^deal", leftContextLength=1, rightContextLength=0, leftRegexp="^(\w|\s|/)*$"), "Brown Corpus 'deal' left collocates") diff --git a/nltk_contrib/coref/__init__.py b/nltk_contrib/coref/__init__.py index 898a163..5fec319 100644 --- a/nltk_contrib/coref/__init__.py +++ b/nltk_contrib/coref/__init__.py @@ -31,7 +31,7 @@ # Import top-level functionality into top-level namespace # Processing packages -- these all define __all__ carefully. -from api import * +from .api import * import nltk.data from nltk.corpus.util import LazyCorpusLoader @@ -39,6 +39,6 @@ if os.environ.get('NLTK_DATA_MUC6') \ and os.environ.get('NLTK_DATA_MUC6') not in nltk.data.path: nltk.data.path.insert(0, os.environ.get('NLTK_DATA_MUC6')) -from muc import MUCCorpusReader +from .muc import MUCCorpusReader muc6 = LazyCorpusLoader('muc6/', MUCCorpusReader, r'.*\.ne\..*\.sgm') \ No newline at end of file diff --git a/nltk_contrib/coref/ace2.py b/nltk_contrib/coref/ace2.py index f1b6b5d..7c3459a 100644 --- a/nltk_contrib/coref/ace2.py +++ b/nltk_contrib/coref/ace2.py @@ -243,17 +243,17 @@ def _demo(root, file): try: reader = ACE2CorpusReader(root, file) - print 'Sentences for %s:' % (file) + print(('Sentences for %s:' % (file))) for sent in reader.sents(): - print ' %s' % (sent) - print - print 'Words for %s:' % (file) + print((' %s' % (sent))) + print() + print(('Words for %s:' % (file))) for word in reader.words(): - print ' %s' % (word) - print - except Exception, e: - print 'Error encountered while running demo for %s: %s' % (file, e) - print + print((' %s' % (word))) + print() + except Exception as e: + print(('Error encountered while running demo for %s: %s' % (file, e))) + print() def demo(): """ diff --git a/nltk_contrib/coref/api.py b/nltk_contrib/coref/api.py index 9fefc3b..16a92ea 100644 --- a/nltk_contrib/coref/api.py +++ b/nltk_contrib/coref/api.py @@ -27,7 +27,7 @@ class TrainableI(object): """ def __init__(self): if self.__class__ == TrainableI: - raise AssertionError, "Interfaces can't be instantiated" + raise AssertionError("Interfaces can't be instantiated") def train(self, labeled_sequence, test_sequence=None, unlabeled_sequence=None, **kwargs): @@ -54,7 +54,7 @@ class HiddenMarkovModelChunkTaggerTransformI(HiddenMarkovModelTaggerTransformI): # Inherit the superclass documentation. def __init__(self): if self.__class__ == HiddenMarkovModelChunkTaggerTransformI: - raise AssertionError, "Interfaces can't be instantiated" + raise AssertionError("Interfaces can't be instantiated") def path2tags(self, path): """ @@ -78,7 +78,7 @@ class CorpusReaderDecoratorI(CorpusReader): """ def __init__(self): if self.__class__ == CorpusReaderDecorator: - raise AssertionError, "Interfaces can't be instantiated" + raise AssertionError("Interfaces can't be instantiated") def reader(self): """ @@ -115,7 +115,7 @@ def __new__(self, s, **kwargs): def __init__(self, s, **kwargs): if self.__class__ == NamedEntityI: - raise AssertionError, "Interfaces can't be instantiated" + raise AssertionError("Interfaces can't be instantiated") self._iob_tag = kwargs.get('iob_tag', self.BEGINS) def iob_in(self): @@ -159,7 +159,7 @@ class ChunkTaggerI(TaggerI): """ def __init__(self): if self.__class__ == ChunkTaggerI: - raise AssertionError, "Interfaces can't be instantiated" + raise AssertionError("Interfaces can't be instantiated") @@ -172,7 +172,7 @@ class CorefResolverI(object): """ def __init__(self): if self.__class__ == CorefResolverI: - raise AssertionError, "Interfaces can't be instantiated" + raise AssertionError("Interfaces can't be instantiated") def mentions(self, sentences): """ @@ -255,7 +255,7 @@ def resolve(self, sentences): class ChunkTaggerI(TaggerI, ChunkParserI): def __init__(self): if self.__class__ == ChunkTaggerI: - raise AssertionError, "Interfaces can't be instantiated" + raise AssertionError("Interfaces can't be instantiated") def tag(self, sent): """ @@ -310,7 +310,7 @@ def __init__(self, feature_detector, labeled_sequence, classifier_builder): @type classifier_builder: C{function} """ if self.__class__ == AbstractClassifierBasedTagger: - raise AssertionError, "Interfaces can't be instantiated" + raise AssertionError("Interfaces can't be instantiated") ClassifierBasedTagger.__init__(self, feature_detector, labeled_sequence, classifier_builder) diff --git a/nltk_contrib/coref/chunk.py b/nltk_contrib/coref/chunk.py index 2ed9ef3..329a8e0 100644 --- a/nltk_contrib/coref/chunk.py +++ b/nltk_contrib/coref/chunk.py @@ -71,13 +71,13 @@ def __init__(self, tokens, index=0, history=None, **kwargs): if window > 0 and index > 0: prev_feats = \ self.__class__(tokens, index - 1, history, window=window - 1) - for key, val in prev_feats.items(): + for key, val in list(prev_feats.items()): if not key.startswith('next_') and key != 'word': self['prev_%s' % key] = val if window > 0 and index < len(tokens) - 1: next_feats = self.__class__(tokens, index + 1, window=window - 1) - for key, val in next_feats.items(): + for key, val in list(next_feats.items()): if not key.startswith('prev_') and key != 'word': self['next_%s' % key] = val @@ -99,16 +99,16 @@ def parse(self, sent): return self.__iob2tree(self.tag(sent)) def batch_parse(self, sents): - return map(self.__iob2tree, self.batch_tag(sents)) + return list(map(self.__iob2tree, self.batch_tag(sents))) def chunk(self, sent): return self.__tree2chunks(self.parse(sent)) def batch_chunk(self, sents): - return map(self.__tree2chunks, self.batch_parse(sents)) + return list(map(self.__tree2chunks, self.batch_parse(sents))) def __iob2tree(self, tagged_sent): - return tokens2tree(map(flatten, tagged_sent), self.chunk_types) + return tokens2tree(list(map(flatten, tagged_sent)), self.chunk_types) def __tree2chunks(self, tree): chunks = [] @@ -132,7 +132,7 @@ class NaiveBayesChunkTagger(ClassifierBasedTagger, AbstractChunkTagger): def train(cls, iob_sents, **kwargs): fd = kwargs.get('feature_detector', ChunkTaggerFeatureDetector) chunk_types = kwargs.get('chunk_types', _DEFAULT_CHUNK_TYPES) - train = LazyMap(lambda sent: map(unflatten, sent), iob_sents) + train = LazyMap(lambda sent: list(map(unflatten, sent)), iob_sents) chunker = cls(fd, train, NaiveBayesClassifier.train) chunker.chunk_types = chunk_types return chunker @@ -157,7 +157,7 @@ def __maxent_train(fs): count_cutoff=count_cutoff, min_lldelta=min_lldelta, trace=trace) - train = LazyMap(lambda sent: map(unflatten, sent), iob_sents) + train = LazyMap(lambda sent: list(map(unflatten, sent)), iob_sents) chunker = cls(fd, train, __maxent_train) chunker.chunk_types = chunk_types return chunker @@ -182,7 +182,7 @@ def train(cls, iob_sents, **kwargs): else: trace = 0 - train = LazyMap(lambda sent: map(unflatten, sent), iob_sents) + train = LazyMap(lambda sent: list(map(unflatten, sent)), iob_sents) mallet_home = os.environ.get('MALLET_HOME', '/usr/local/mallet-0.4') nltk.classify.mallet.config_mallet(mallet_home) @@ -205,7 +205,7 @@ def tokens2tree(tokens, chunk_types=_DEFAULT_CHUNK_TYPES, top_node='S'): for token in tokens: token, tag = unflatten(token) - if isinstance(token, basestring): + if isinstance(token, str): word = token pos = None elif isinstance(token, tuple): @@ -254,32 +254,32 @@ def unflatten(token): def test_chunk_tagger(chunk_tagger, iob_sents, **kwargs): chunk_types = chunk_tagger.chunk_types - correct = map(lambda sent: tokens2tree(sent, chunk_types), iob_sents) - guesses = chunk_tagger.batch_parse(map(lambda c: c.leaves(), correct)) + correct = [tokens2tree(sent, chunk_types) for sent in iob_sents] + guesses = chunk_tagger.batch_parse([c.leaves() for c in correct]) chunkscore = ChunkScore() for c, g in zip(correct, guesses): chunkscore.score(c, g) if kwargs.get('verbose'): - guesses = chunk_tagger.batch_tag(map(lambda c: c.leaves(), correct)) + guesses = chunk_tagger.batch_tag([c.leaves() for c in correct]) correct = iob_sents - print + print() for c, g in zip(correct, guesses): - for tokc, tokg in zip(map(flatten, c), map(flatten, g)): + for tokc, tokg in zip(list(map(flatten, c)), list(map(flatten, g))): word = tokc[0] iobc = tokc[-1] iobg = tokg[-1] star = '' if iobg != iobc: star = '*' - print '%3s %20s %20s %20s' % (star, word, iobc, iobg) - print + print(('%3s %20s %20s %20s' % (star, word, iobc, iobg))) + print() - print 'Precision: %.2f' % chunkscore.precision() - print 'Recall: %.2f' % chunkscore.recall() - print 'Accuracy: %.2f' % chunkscore.accuracy() - print 'F-measure: %.2f' % chunkscore.f_measure() + print(('Precision: %.2f' % chunkscore.precision())) + print(('Recall: %.2f' % chunkscore.recall())) + print(('Accuracy: %.2f' % chunkscore.accuracy())) + print(('F-measure: %.2f' % chunkscore.f_measure())) return chunkscore @@ -287,11 +287,11 @@ def unittest(verbose=False): import doctest failed, tested = doctest.testfile('test/chunk.doctest', verbose) if not verbose: - print '%d passed and %d failed.' % (tested - failed, failed) + print(('%d passed and %d failed.' % (tested - failed, failed))) if failed == 0: - print 'Test passed.' + print('Test passed.') else: - print '***Test Failed*** %d failures.' % failed + print(('***Test Failed*** %d failures.' % failed)) return (tested - failed), failed def demo(): @@ -304,7 +304,7 @@ def demo(): import optparse try: - import cPickle as pickle + import pickle as pickle except: import pickle @@ -342,12 +342,12 @@ def demo(): num_test = int(m.group('test') or 0) options.numsents = (num_train, num_test) else: - raise ValueError, "malformed argument for option -n" + raise ValueError("malformed argument for option -n") else: options.numsents = (None, None) - except ValueError, v: - print 'error: %s' % v.message + except ValueError as v: + print(('error: %s' % v.message)) parser.print_help() if options.unittest: @@ -369,8 +369,8 @@ def demo(): trainer = eval(options.trainer) if options.verbose: - print 'Training %s with %d sentences' % \ - (options.trainer, num_train) + print(('Training %s with %d sentences' % \ + (options.trainer, num_train))) chunker = trainer(train, verbose=options.verbose) if options.model: @@ -388,12 +388,12 @@ def demo(): stream.close() chunker = pickle.load(_open(options.model, 'r')) if options.verbose: - print 'Model saved as %s' % options.model - except Exception, e: - print "error: %s" % e + print(('Model saved as %s' % options.model)) + except Exception as e: + print(("error: %s" % e)) if test: if options.verbose: - print 'Testing %s on %d sentences' % \ - (options.trainer, num_test) + print(('Testing %s on %d sentences' % \ + (options.trainer, num_test))) chunker.test(test, verbose=options.verbose) \ No newline at end of file diff --git a/nltk_contrib/coref/data.py b/nltk_contrib/coref/data.py index f399fe3..e3bf631 100644 --- a/nltk_contrib/coref/data.py +++ b/nltk_contrib/coref/data.py @@ -9,14 +9,14 @@ from gzip import GzipFile, READ as GZ_READ, WRITE as GZ_WRITE try: - import cPickle as pickle + import pickle as pickle except: import pickle try: - from cStringIO import StringIO + from io import StringIO except: - from StringIO import StringIO + from io import StringIO class BufferedGzipFile(GzipFile): """ diff --git a/nltk_contrib/coref/features.py b/nltk_contrib/coref/features.py index 899c729..9695580 100644 --- a/nltk_contrib/coref/features.py +++ b/nltk_contrib/coref/features.py @@ -432,7 +432,7 @@ def demo(): wt = word_type(word) if len(wt) == 0: wt = None if '*' in word: continue - print "%-20s\t%s" % (word, wt) + print(("%-20s\t%s" % (word, wt))) if __name__ == '__main__': demo() diff --git a/nltk_contrib/coref/freiburg.py b/nltk_contrib/coref/freiburg.py index 598b907..18bc4dd 100644 --- a/nltk_contrib/coref/freiburg.py +++ b/nltk_contrib/coref/freiburg.py @@ -238,21 +238,21 @@ def _demo(root, file): try: reader = FreiburgCorpusReader(root, file) - print 'Paragraphs for %s:' % (file) + print(('Paragraphs for %s:' % (file))) for para in reader.paras(): - print ' %s' % (para) - print - print 'Sentences for %s:' % (file) + print((' %s' % (para))) + print() + print(('Sentences for %s:' % (file))) for sent in reader.sents(): - print ' %s' % (sent) - print - print 'Words for %s:' % (file) + print((' %s' % (sent))) + print() + print(('Words for %s:' % (file))) for word in reader.words(): - print ' %s/%s' % (word, word.pos()) - print - except Exception, e: - print 'Error encountered while running demo for %s: %s' % (file, e) - print + print((' %s/%s' % (word, word.pos()))) + print() + except Exception as e: + print(('Error encountered while running demo for %s: %s' % (file, e))) + print() def demo(): """ diff --git a/nltk_contrib/coref/muc.py b/nltk_contrib/coref/muc.py index 528acbc..14d7992 100644 --- a/nltk_contrib/coref/muc.py +++ b/nltk_contrib/coref/muc.py @@ -99,10 +99,10 @@ class MUCDocument: # def __init__(self, text, docno=None, dateline=None, headline=''): def __init__(self, **text): self.text = None - if isinstance(text, basestring): + if isinstance(text, str): self.text = text elif isinstance(text, dict): - for key, val in text.items(): + for key, val in list(text.items()): setattr(self, key, val) else: raise @@ -154,7 +154,7 @@ def raw(self, fileids=None): """ if fileids is None: fileids = self._fileids - elif isinstance(fileids, basestring): + elif isinstance(fileids, str): fileids = [fileids] return concat([self.open(f).read() for f in fileids]) @@ -221,7 +221,7 @@ def __chunked_sent(sent): chunks.append([(word, None) for word in token[0]]) # If the token's contents is a string, append it as a # word/tag tuple. - elif isinstance(token[0], basestring): + elif isinstance(token[0], str): chunks.append((token[0], None)) # Something bad happened. else: @@ -416,7 +416,7 @@ def __chunked_sent(sent): def _read_parsed_block(self, stream): # TODO: LazyMap but StreamBackedCorpusView doesn't support # AbstractLazySequence currently. - return map(self._parse, self._read_block(stream)) + return list(map(self._parse, self._read_block(stream))) def _parse(self, doc): """ @@ -488,7 +488,7 @@ def tree2tuple(tree): # Get the leaves. s = (tree.leaves(),) # Get the label - if isinstance(tree.node, basestring): + if isinstance(tree.node, str): node = (tree.node,) elif isinstance(tree.node, tuple): node = tree.node @@ -497,7 +497,7 @@ def tree2tuple(tree): # Merge the leaves and the label. return s + node # If the tree is a string just convert it to a tuple. - elif isinstance(tree, basestring): + elif isinstance(tree, str): return (tree, None) # Something bad happened. else: @@ -513,7 +513,7 @@ def __fix_tokenization(sents): sents[index] += sents[index + next] sents[index + next] = '' next += 1 - sents = filter(None, sents) + sents = [_f for _f in sents if _f] return sents if s: tree = Tree(top_node, []) @@ -554,7 +554,7 @@ def _muc_read_words(s, top_node): else: stack[-1].extend(_WORD_TOKENIZER.tokenize(word)) if len(stack) != 1: - print stack + print(stack) assert len(stack) == 1 return stack[0] @@ -567,25 +567,25 @@ def demo(**kwargs): muc6 = LazyCorpusLoader('muc6/', MUCCorpusReader, muc6_documents) for sent in muc6.iob_sents()[:]: for word in sent: - print word - print - print + print(word) + print() + print() for sent in muc6.mentions(depth=None): for mention in sent: - print mention - if sent: print - print + print(mention) + if sent: print() + print() muc7 = LazyCorpusLoader('muc7/', MUCCorpusReader, muc7_documents) for sent in muc7.iob_sents()[:]: for word in sent: - print word - print - print + print(word) + print() + print() for sent in muc7.mentions(depth=None): for mention in sent: - print mention - if sent: print - print + print(mention) + if sent: print() + print() if __name__ == '__main__': demo() diff --git a/nltk_contrib/coref/muc7.py b/nltk_contrib/coref/muc7.py index 524e9b1..c8fd5af 100644 --- a/nltk_contrib/coref/muc7.py +++ b/nltk_contrib/coref/muc7.py @@ -273,21 +273,21 @@ def _demo(root, file): try: reader = MUC7CorpusReader(root, file) - print 'Paragraphs for %s:' % (file) + print(('Paragraphs for %s:' % (file))) for para in reader.paras(): - print ' %s' % (para) - print - print 'Sentences for %s:' % (file) + print((' %s' % (para))) + print() + print(('Sentences for %s:' % (file))) for sent in reader.sents(): - print ' %s' % (sent) - print - print 'Words for %s:' % (file) + print((' %s' % (sent))) + print() + print(('Words for %s:' % (file))) for word in reader.words(): - print ' %s' % (word) - print - except Exception, e: - print 'Error encountered while running demo for %s: %s' % (file, e) - print + print((' %s' % (word))) + print() + except Exception as e: + print(('Error encountered while running demo for %s: %s' % (file, e))) + print() def demo(): """ diff --git a/nltk_contrib/coref/ne.py b/nltk_contrib/coref/ne.py index 67f0abd..bb12b1c 100644 --- a/nltk_contrib/coref/ne.py +++ b/nltk_contrib/coref/ne.py @@ -159,13 +159,13 @@ def __init__(self, tokens, index=0, history=None, **kwargs): if window > 0 and index > 0: prev_feats = \ self.__class__(tokens, index - 1, history, window=window - 1) - for key, val in prev_feats.items(): + for key, val in list(prev_feats.items()): if not key.startswith('next_') and not key == 'word': self['prev_%s' % key] = val if window > 0 and index < len(tokens) - 1: next_feats = self.__class__(tokens, index + 1, window=window - 1) - for key, val in next_feats.items(): + for key, val in list(next_feats.items()): if not key.startswith('prev_') and not key == 'word': self['next_%s' % key] = val @@ -184,11 +184,11 @@ def unittest(verbose=False): import doctest failed, passed = doctest.testfile('test/ne.doctest', verbose) if not verbose: - print '%d passed and %d failed.' % (failed, passed) + print(('%d passed and %d failed.' % (failed, passed))) if failed == 0: - print 'Test passed.' + print('Test passed.') else: - print '***Test Failed*** %d failures.' % failed + print(('***Test Failed*** %d failures.' % failed)) return failed, passed _NE_CHUNK_TYPES = ('PERSON', 'LOCATION', 'ORGANIZATION', 'MONEY') @@ -199,7 +199,7 @@ def unittest(verbose=False): import optparse try: - import cPickle as pickle + import pickle as pickle except: import pickle @@ -244,7 +244,7 @@ def unittest(verbose=False): num_test = int(m.group('test') or 0) options.numsents = (num_train, num_test) else: - raise ValueError, "malformed argument for option -n" + raise ValueError("malformed argument for option -n") else: options.numsents = (None, None) @@ -256,10 +256,10 @@ def unittest(verbose=False): file_test = m.group('test') options.extract = (file_train, file_test) else: - raise ValueError, "malformed argument for option -e" + raise ValueError("malformed argument for option -e") - except ValueError, v: - print 'error: %s' % v.message + except ValueError as v: + print(('error: %s' % v.message)) parser.print_help() if options.unittest: @@ -292,9 +292,9 @@ def unittest(verbose=False): for index in range(len(tokens)): tag = tokens[index][-1] feats = feature_detector(tokens, index, history) - keys.update(feats.keys()) + keys.update(list(feats.keys())) stream.write('%s %s\n' % (tag, ' '.join(['%s=%s' % (k, re.escape(str(v))) - for k, v in feats.items()]))) + for k, v in list(feats.items())]))) history.append(tag) history = [] stream.close() @@ -306,9 +306,9 @@ def unittest(verbose=False): for index in range(len(tokens)): tag = tokens[index][-1] feats = feature_detector(tokens, index, history) - keys.update(feats.keys()) + keys.update(list(feats.keys())) stream.write('%s %s\n' % (tag, ' '.join(['%s=%s' % (k, re.escape(str(v))) - for k, v in feats.items()]))) + for k, v in list(feats.items())]))) history.append(tag) history = [] stream.close() @@ -343,9 +343,9 @@ def unittest(verbose=False): reader = MXPostTaggerCorpusReader(eval(options.corpus)) iob_sents = reader.iob_sents() tagged_sents = reader.tagged_sents() - corpus = LazyMap(lambda (iob_sent, tagged_sent): + corpus = LazyMap(lambda iob_sent_tagged_sent: [(iw, tt, iob) for ((iw, iob), (tw, tt)) - in zip(iob_sent, tagged_sent)], + in zip(iob_sent_tagged_sent[0], iob_sent_tagged_sent[1])], LazyZip(iob_sents, tagged_sents)) else: iob_sents = eval(options.corpus).iob_sents() @@ -360,8 +360,8 @@ def unittest(verbose=False): trainer = eval(options.trainer) if options.verbose: - print 'Training %s with %d sentences' % \ - (options.trainer, num_train) + print(('Training %s with %d sentences' % \ + (options.trainer, num_train))) ner = trainer(train, feature_detector=NERChunkTaggerFeatureDetector, chunk_types=_NE_CHUNK_TYPES, @@ -382,12 +382,12 @@ def unittest(verbose=False): stream.close() ner = pickle.load(_open(options.model, 'r')) if options.verbose: - print 'Model saved as %s' % options.model - except Exception, e: - print "error: %s" % e + print(('Model saved as %s' % options.model)) + except Exception as e: + print(("error: %s" % e)) if test: if options.verbose: - print 'Testing %s on %d sentences' % \ - (options.trainer, num_test) + print(('Testing %s on %d sentences' % \ + (options.trainer, num_test))) ner.test(test, verbose=options.verbose) diff --git a/nltk_contrib/coref/resolve.py b/nltk_contrib/coref/resolve.py index 805ee38..329ea22 100644 --- a/nltk_contrib/coref/resolve.py +++ b/nltk_contrib/coref/resolve.py @@ -10,14 +10,14 @@ import optparse try: - import cPickle as pickle + import pickle as pickle except: import pickle try: - from cStringIO import StringIO + from io import StringIO except: - from StringIO import StringIO + from io import StringIO from nltk.util import LazyMap, LazyZip, LazyConcatenation, LazyEnumerate @@ -129,23 +129,23 @@ def baseline_coref_resolver_demo(): resolved_mentions = resolver.resolve_mentions(mentions) resolved_discourse = resolver.resolve(sents) - print 'Baseline coref resolver demo...' - print 'Mentions:' + print('Baseline coref resolver demo...') + print('Mentions:') for mention in mentions: - print mention - print - print 'Resolved mentions:' + print(mention) + print() + print('Resolved mentions:') for mention in resolved_mentions: - print mention - print - print 'Resolved discourse:' + print(mention) + print() + print('Resolved discourse:') for sent in resolved_discourse: - print sent - print - print + print(sent) + print() + print() def demo(): - print 'Demo...' + print('Demo...') baseline_coref_resolver_demo() # muc6_test = LazyCorpusLoader( # 'muc6', MUC6CorpusReader, @@ -184,7 +184,7 @@ def demo(): # print if __name__ == '__main__': - print time.ctime(time.time()) + print((time.ctime(time.time()))) parser = optparse.OptionParser() parser.add_option('-d', '--demo', action='store_true', dest='demo', @@ -322,9 +322,9 @@ def join(chunk): pred_tags = model.tag(words) for x, y, z in zip(pred_tags, gold_tags, words): if x == y: - print ' ', (x, y, z) + print((' ', (x, y, z))) else: - print '* ', (x, y, z) + print(('* ', (x, y, z))) elif options.train_ner == 'classifier2': muc6_train = LazyCorpusLoader( @@ -352,11 +352,11 @@ def join(chunk): pred_tags = model.tag(words) for x, y, z in zip(pred_tags, gold_tags, words): if x == y: - print ' ', (x, y, z) + print((' ', (x, y, z))) else: - print '* ', (x, y, z) + print(('* ', (x, y, z))) elif options.demo: demo() - print time.ctime(time.time()) + print((time.ctime(time.time()))) diff --git a/nltk_contrib/coref/tag.py b/nltk_contrib/coref/tag.py index f78b179..291bb8d 100644 --- a/nltk_contrib/coref/tag.py +++ b/nltk_contrib/coref/tag.py @@ -3,9 +3,9 @@ import subprocess try: - from cStringIO import StringIO + from io import StringIO except: - from StringIO import StringIO + from io import StringIO from nltk.util import LazyMap, LazyConcatenation from nltk.internals import find_binary, java @@ -48,7 +48,7 @@ def __init__(self, reader, **kwargs): def tagged_sents(self): sents = self.sents() - batch_indices = range(len(sents) / 1024 + 1) + batch_indices = list(range(len(sents) / 1024 + 1)) return LazyConcatenation(LazyMap(lambda i: self._tagger.batch_tag(sents[i * 1024: i * 1024 + 1024]), batch_indices)) @@ -67,7 +67,7 @@ def batch_tag(self, sents): def config_mxpost(mxpost_home=None): global _mxpost_classpath, _mxpost_home classpath = os.environ.get('CLASSPATH', '').split(':') - mxpost_jar = filter(lambda c: c.endswith('mxpost.jar'), classpath) + mxpost_jar = [c for c in classpath if c.endswith('mxpost.jar')] if mxpost_jar: _mxpost_home = os.path.dirname(mxpost_jar[0]) _mxpost_classpath = mxpost_jar[0] @@ -83,7 +83,7 @@ def config_mxpost(mxpost_home=None): else: _mxpost_home = None _mxpost_classpath = None - raise Exception, "can't find mxpost.jar" + raise Exception("can't find mxpost.jar") def call_mxpost(classpath=None, stdin=None, stdout=None, stderr=None, blocking=False): @@ -103,14 +103,14 @@ def call_mxpost(classpath=None, stdin=None, stdout=None, stderr=None, def mxpost_parse_output(mxpost_output): result = [] mxpost_output = mxpost_output.strip() - for sent in filter(None, mxpost_output.split('\n')): - tokens = filter(None, re.split(r'\s+', sent)) + for sent in [_f for _f in mxpost_output.split('\n') if _f]: + tokens = [_f for _f in re.split(r'\s+', sent) if _f] if tokens: result.append([]) for token in tokens: m = _MXPOST_OUTPUT_RE.match(token) if not m: - raise Exception, "invalid mxpost tag pattern: %s, %s" % (token, tokens) + raise Exception("invalid mxpost tag pattern: %s, %s" % (token, tokens)) word = m.group('word') tag = m.group('tag') result[-1].append((word, tag)) @@ -122,7 +122,7 @@ def mxpost_tag(sents, **kwargs): p.communicate('\n'.join([' '.join(sent) for sent in sents])) rc = p.returncode if rc != 0: - raise Exception, 'exited with non-zero status %s' % rc + raise Exception('exited with non-zero status %s' % rc) if kwargs.get('verbose'): - print 'warning: %s' % stderr + print(('warning: %s' % stderr)) return mxpost_parse_output(stdout) \ No newline at end of file diff --git a/nltk_contrib/coref/train.py b/nltk_contrib/coref/train.py index bd36e44..99c1273 100644 --- a/nltk_contrib/coref/train.py +++ b/nltk_contrib/coref/train.py @@ -19,22 +19,22 @@ from nltk_contrib.coref.data import BufferedGzipFile try: - import cPickle as pickle + import pickle as pickle except: import pickle try: - from cStringIO import StringIO + from io import StringIO except: - from StringIO import StringIO + from io import StringIO class LidstoneProbDistFactory(LidstoneProbDist): def __init__(self, fd, *args, **kwargs): LidstoneProbDist.__init__(self, fd, 0.01, args[-1]) samples = fd.samples() - self._probs = dict(zip([0]*len(samples), samples)) - self._logprobs = dict(zip([0]*len(samples), samples)) + self._probs = dict(list(zip([0]*len(samples), samples))) + self._logprobs = dict(list(zip([0]*len(samples), samples))) for sample in samples: self._logprobs[sample] = LidstoneProbDist.logprob(self, sample) self._probs[sample] = LidstoneProbDist.prob(self, sample) @@ -84,7 +84,7 @@ def __featurize(tagged_token): untagged_sequence = LazyMap(__untag, LazyMap(__featurize, test_sequence)) predicted_tags = LazyMap(self.classify, untagged_sequence) acc = accuracy(correct_tags, predicted_tags) - print 'accuracy over %d tokens: %.2f' % (count, acc) + print(('accuracy over %d tokens: %.2f' % (count, acc))) class MaxentClassifierFactory(object): @@ -125,37 +125,37 @@ def train_model(train_class, labeled_sequence, test_sequence, pickle_file, verbose or include printed output. @type verbose: C{bool} """ - print 'Training ', train_class - print 'Loading training data (supervised)...' + print(('Training ', train_class)) + print('Loading training data (supervised)...') labeled_sequence = labeled_sequence[:num_train_sents] sent_count = len(labeled_sequence) word_count = sum([len(sent) for sent in labeled_sequence]) - print '%s sentences' % (sent_count) - print '%s words' % (word_count) + print(('%s sentences' % (sent_count))) + print(('%s words' % (word_count))) - print 'Training...' + print('Training...') start = time.time() model = train_class.train(labeled_sequence, **kwargs) end = time.time() - print 'Training time: %.3fs' % (end - start) - print 'Training time per sentence: %.3fs' % (float(end - start) / sent_count) - print 'Training time per word: %.3fs' % (float(end - start) / word_count) + print(('Training time: %.3fs' % (end - start))) + print(('Training time per sentence: %.3fs' % (float(end - start) / sent_count))) + print(('Training time per word: %.3fs' % (float(end - start) / word_count))) - print 'Loading test data...' + print('Loading test data...') test_sequence = test_sequence[:num_test_sents] sent_count = len(test_sequence) word_count = sum([len(sent) for sent in test_sequence]) - print '%s sentences' % (sent_count) - print '%s words' % (word_count) + print(('%s sentences' % (sent_count))) + print(('%s words' % (word_count))) try: - print 'Saving model...' + print('Saving model...') if isinstance(pickle_file, str): if pickle_file.endswith('.gz'): _open = BufferedGzipFile @@ -165,23 +165,23 @@ def train_model(train_class, labeled_sequence, test_sequence, pickle_file, pickle.dump(model, stream) stream.close() model = pickle.load(_open(pickle_file, 'rb')) - print 'Model saved as %s' % pickle_file + print(('Model saved as %s' % pickle_file)) else: stream = StringIO() pickle.dump(model, stream) stream = StringIO(stream.getvalue()) model = pickle.load(stream) - except Exception, e: - print 'Error saving model, %s' % str(e) + except Exception as e: + print(('Error saving model, %s' % str(e))) - print 'Testing...' + print('Testing...') start = time.time() model.test(test_sequence, **kwargs) end = time.time() - print 'Test time: %.3fs' % (end - start) - print 'Test time per sentence: %.3fs' % (float(end - start) / sent_count) - print 'Test time per word: %.3fs' % (float(end - start) / word_count) + print(('Test time: %.3fs' % (end - start))) + print(('Test time per sentence: %.3fs' % (float(end - start) / sent_count))) + print(('Test time per word: %.3fs' % (float(end - start) / word_count))) return model diff --git a/nltk_contrib/coref/util.py b/nltk_contrib/coref/util.py index 75f0d8a..0e3b9be 100644 --- a/nltk_contrib/coref/util.py +++ b/nltk_contrib/coref/util.py @@ -8,14 +8,14 @@ import time try: - import cPickle as pickle + import pickle as pickle except: import pickle try: - from cStringIO import StringIO + from io import StringIO except: - from StringIO import StringIO + from io import StringIO from nltk.data import load, find from nltk.corpus import CorpusReader, BracketParseCorpusReader @@ -114,7 +114,7 @@ def __init__(self, reader): def zipzip(*lists): - return LazyMap(lambda lst: zip(*lst), LazyZip(*lists)) + return LazyMap(lambda lst: list(zip(*lst)), LazyZip(*lists)) def load_treebank(sections): treebank_path = os.environ.get('NLTK_TREEBANK', 'treebank/combined') @@ -133,16 +133,16 @@ def treebank_tagger_demo(): 'state_union', PlaintextCorpusReader, r'(?!\.svn).*\.txt') state_union = TreebankTaggerCorpusReader(state_union) - print 'Treebank tagger demo...' - print 'Tagged sentences:' + print('Treebank tagger demo...') + print('Tagged sentences:') for sent in state_union.tagged_sents()[500:505]: - print sent - print - print - print 'Tagged words:' + print(sent) + print() + print() + print('Tagged words:') for word in state_union.tagged_words()[500:505]: - print word - print + print(word) + print() def treebank_chunk_tagger_demo(): from nltk.corpus.util import LazyCorpusLoader @@ -153,17 +153,17 @@ def treebank_chunk_tagger_demo(): 'state_union', PlaintextCorpusReader, r'(?!\.svn).*\.txt') state_union = TreebankChunkTaggerCorpusReader(state_union) - print 'Treebank chunker demo...' - print 'Chunked sentences:' + print('Treebank chunker demo...') + print('Chunked sentences:') for sent in state_union.chunked_sents()[500:505]: - print sent - print - print - print 'Parsed sentences:' + print(sent) + print() + print() + print('Parsed sentences:') for tree in state_union.parsed_sents()[500:505]: - print tree - print - print + print(tree) + print() + print() def muc6_chunk_tagger_demo(): from nltk.corpus.util import LazyCorpusLoader @@ -172,12 +172,12 @@ def muc6_chunk_tagger_demo(): treebank = MUC6NamedEntityChunkTaggerCorpusReader(load_treebank('0[12]')) - print 'MUC6 named entity chunker demo...' - print 'Chunked sentences:' + print('MUC6 named entity chunker demo...') + print('Chunked sentences:') for sent in treebank.chunked_sents()[:10]: - print sent - print - print + print(sent) + print() + print() def baseline_chunk_tagger_demo(): from nltk.corpus.util import LazyCorpusLoader @@ -186,16 +186,16 @@ def baseline_chunk_tagger_demo(): chunker = BaselineNamedEntityChunkTagger() treebank = load_treebank('0[12]') - print 'Baseline named entity chunker demo...' - print 'Chunked sentences:' + print('Baseline named entity chunker demo...') + print('Chunked sentences:') for sent in treebank.sents()[:10]: - print chunker.chunk(sent) - print - print 'IOB-tagged sentences:' + print((chunker.chunk(sent))) + print() + print('IOB-tagged sentences:') for sent in treebank.sents()[:10]: - print chunker.tag(sent) - print - print + print((chunker.tag(sent))) + print() + print() def demo(): from nltk_contrib.coref.util import treebank_tagger_demo, \ diff --git a/nltk_contrib/dependency/__init__.py b/nltk_contrib/dependency/__init__.py index 1d3f444..b01c902 100644 --- a/nltk_contrib/dependency/__init__.py +++ b/nltk_contrib/dependency/__init__.py @@ -4,4 +4,4 @@ # URL: # For license information, see LICENSE.TXT -from deptree import * +from .deptree import * diff --git a/nltk_contrib/dependency/deptree.py b/nltk_contrib/dependency/deptree.py index 8cfd110..857bb8d 100644 --- a/nltk_contrib/dependency/deptree.py +++ b/nltk_contrib/dependency/deptree.py @@ -35,7 +35,7 @@ def __init__(self): def __str__(self): # return '\n'.join([str(n) for n in self.nodelist]) - return '\n'.join([', '.join(['%s: %15s'%item for item in n.iteritems()]) for n in self.nodelist]) + return '\n'.join([', '.join(['%s: %15s'%item for item in list(n.items())]) for n in self.nodelist]) def load(self, file): """ @@ -151,7 +151,7 @@ def nx_graph(self): labeled directed graph. @rtype: C{XDigraph} """ - nx_nodelist = range(1, len(self.nodelist)) + nx_nodelist = list(range(1, len(self.nodelist))) nx_edgelist = [(n, self._hd(n), self._rel(n)) for n in nx_nodelist if self._hd(n)] self.nx_labels = {} @@ -191,7 +191,7 @@ def demo(nx=False): . . 9 VMOD """) tree = dg.deptree() - print tree.pprint() + print((tree.pprint())) if nx: #currently doesn't work try: diff --git a/nltk_contrib/dependency/ptbconv.py b/nltk_contrib/dependency/ptbconv.py index 4e3e6ae..ddf066f 100644 --- a/nltk_contrib/dependency/ptbconv.py +++ b/nltk_contrib/dependency/ptbconv.py @@ -18,6 +18,7 @@ import math from nltk.internals import find_binary import os +from functools import reduce OUTPUT_FORMAT = '%s\t%s\t_\t%s\t_\t_\t%s\t%s\t_\t_\n' @@ -84,7 +85,7 @@ def _run_ptbconv(num, format='D', verbose=False): (stdout, stderr) = p.communicate() if verbose: - print stderr.strip() + print(stderr.strip()) return stdout @@ -94,10 +95,10 @@ def _treebank_path(): [os.environ['NLTK_DATA'], 'corpora', 'treebank']) def convert_all(): - for i in xrange(199): - print '%s:' % (i+1), + for i in range(199): + print('%s:' % (i+1), end=' ') convert(i+1, 'D', True, True) if __name__ == '__main__': - print convert(1, 'D') + print(convert(1, 'D')) \ No newline at end of file diff --git a/nltk_contrib/dependency/util.py b/nltk_contrib/dependency/util.py index 6695530..a4f9c6b 100644 --- a/nltk_contrib/dependency/util.py +++ b/nltk_contrib/dependency/util.py @@ -5,7 +5,7 @@ from nltk import tokenize from itertools import islice import os -from deptree import DepGraph +from .deptree import DepGraph from nltk.stem.wordnet import WordNetLemmatizer def tag2tab(s, sep='/'): @@ -60,8 +60,8 @@ def conll_to_depgraph(input_str, stem=False, verbose=False): assert depgraph_input, 'depgraph_input is empty' if verbose: - print 'Begin DepGraph creation' - print 'depgraph_input=\n%s' % depgraph_input + print('Begin DepGraph creation') + print('depgraph_input=\n%s' % depgraph_input) return DepGraph().read(depgraph_input) @@ -79,7 +79,7 @@ def demo(): #s = '' for sent in islice(tabtagged(), 3): for line in sent: - print line, + print(line, end=' ') #s += ''.join(sent) #print >>f, s #f.close() diff --git a/nltk_contrib/featuredemo.py b/nltk_contrib/featuredemo.py index e94d685..e06b406 100755 --- a/nltk_contrib/featuredemo.py +++ b/nltk_contrib/featuredemo.py @@ -13,7 +13,7 @@ def text_parse(grammar, sent, trace=2, drawtrees=False, latex=False): parser = grammar.earley_parser(trace=trace) - print parser._grammar + print((parser._grammar)) tokens = sent.split() trees = parser.get_parse_list(tokens) if drawtrees: @@ -21,8 +21,8 @@ def text_parse(grammar, sent, trace=2, drawtrees=False, latex=False): TreeView(trees) else: for tree in trees: - if latex: print tree.latex_qtree() - else: print tree + if latex: print((tree.latex_qtree())) + else: print(tree) def main(): import sys @@ -83,7 +83,7 @@ def main(): sentence = line.strip() if sentence == '': continue if sentence[0] == '#': continue - print "Sentence: %s" % sentence + print(("Sentence: %s" % sentence)) text_parse(grammar, sentence, trace, False, options.latex) if __name__ == '__main__': diff --git a/nltk_contrib/fst/draw_graph.py b/nltk_contrib/fst/draw_graph.py index abbeaa8..e3c56bc 100644 --- a/nltk_contrib/fst/draw_graph.py +++ b/nltk_contrib/fst/draw_graph.py @@ -133,7 +133,9 @@ def _label_coords(self): labely = (y1+y2)*0.5 - (x2-x1)*(self._curve/2 + 8/r) return (int(labelx), int(labely)) - def _line_coords(self, (startx, starty), (endx, endy)): + def _line_coords(self, xxx_todo_changeme, xxx_todo_changeme1): + (startx, starty) = xxx_todo_changeme + (endx, endy) = xxx_todo_changeme1 (x1, y1) = int(startx), int(starty) (x2, y2) = int(endx), int(endy) radius1 = 0 @@ -253,7 +255,7 @@ def remove_edge(self, edge): Remove an edge from the graph (but don't destroy it). @type edge: L{GraphEdgeWidget} """ - print 'remove', edge + print(('remove', edge)) # Get the edge's start & end nodes. start, end = self._startnode[edge], self._endnode[edge] @@ -315,9 +317,9 @@ def destroy_node(self, node): """ Remove a node from the graph, and destroy the node. """ - print 'removing', node + print(('removing', node)) for widget in self.remove_node(node): - print 'destroying', widget + print(('destroying', widget)) widget.destroy() def _tags(self): return [] @@ -467,7 +469,7 @@ def _arrange_level(self, levelnum): while len(nodes) > 0: best = (None, None, -1) # node, position, score. for pos in range(len(scores)): - for (node, score) in scores[pos].items(): + for (node, score) in list(scores[pos].items()): if (score > best[2] and level[pos] is None and node in nodes): best = (node, pos, score) @@ -526,9 +528,9 @@ def _reachable(self, node, reached=None): """ How many *unexpanded* nodes can be reached from the given node? """ - if self._nodelevel.has_key(node): return 0 + if node in self._nodelevel: return 0 if reached is None: reached = {} - if not reached.has_key(node): + if node not in reached: reached[node] = 1 for edge in self._outedges.get(node, []): self._reachable(self._endnode[edge], reached) @@ -551,14 +553,14 @@ def _add_descendants_dfs(self, parent_level, levelnum): if levelnum >= len(self._levels): self._levels.append([]) for parent_node in parent_level: # Add the parent node - if not self._nodelevel.has_key(parent_node): + if parent_node not in self._nodelevel: self._levels[levelnum-1].append(parent_node) self._nodelevel[parent_node] = levelnum-1 # Recurse to its children child_nodes = [self._endnode[edge] for edge in self._outedges.get(parent_node, []) - if not self._nodelevel.has_key(self._endnode[edge])] + if self._endnode[edge] not in self._nodelevel] if len(child_nodes) > 0: self._add_descendants_dfs(child_nodes, levelnum+1) @@ -569,7 +571,7 @@ def _add_descendants_bfs(self, parent_level, levelnum): child_nodes = [self._endnode[edge] for edge in self._outedges.get(parent_node, [])] for node in child_nodes: - if not self._nodelevel.has_key(node): + if node not in self._nodelevel: self._levels[levelnum].append(node) self._nodelevel[node] = levelnum frontier_nodes.append(node) @@ -585,7 +587,7 @@ def _add_descendants_bfs2(self, parent_level, levelnum): child_nodes += [self._startnode[edge] for edge in self._inedges.get(parent_node, [])] for node in child_nodes: - if not self._nodelevel.has_key(node): + if node not in self._nodelevel: self._levels[levelnum].append(node) self._nodelevel[node] = levelnum frontier_nodes.append(node) diff --git a/nltk_contrib/fst/fst.py b/nltk_contrib/fst/fst.py index 20c2988..222fb4d 100644 --- a/nltk_contrib/fst/fst.py +++ b/nltk_contrib/fst/fst.py @@ -1363,7 +1363,7 @@ def step(self, *e): if self.stepper is None: return # Perform one step. - try: result, val = self.stepper.next() + try: result, val = next(self.stepper) except StopIteration: return if result == 'fail': @@ -1377,7 +1377,7 @@ def step(self, *e): self.out_text.insert('end', ' (Finished!)') elif result == 'backtrack': self.out_text.insert('end', ' (Backtrack)') - for state, widget in self.graph.state_widgets.items(): + for state, widget in list(self.graph.state_widgets.items()): if state == val: self.graph.mark_state(state, '#f0b0b0') else: self.graph.unmark_state(state) else: @@ -1408,7 +1408,7 @@ def step(self, *e): self.state_descr.insert('end', state_descr or '') # Highlight the new dst state. - for state, widget in self.graph.state_widgets.items(): + for state, widget in list(self.graph.state_widgets.items()): if state == fst.dst(arc): self.graph.mark_state(state, '#00ff00') elif state == fst.src(arc): @@ -1416,7 +1416,7 @@ def step(self, *e): else: self.graph.unmark_state(state) # Highlight the new arc. - for a, widget in self.graph.arc_widgets.items(): + for a, widget in list(self.graph.arc_widgets.items()): if a == arc: self.graph.mark_arc(a) else: self.graph.unmark_arc(a) @@ -1467,11 +1467,11 @@ def mainloop(self, *args, **kwargs): end -> """) - print "john eats the bread ->" - print ' '+ ' '.join(fst.transduce("john eats the bread".split())) + print("john eats the bread ->") + print((' '+ ' '.join(fst.transduce("john eats the bread".split())))) rev = fst.inverted() - print "la vache mange de l'herbe ->" - print ' '+' '.join(rev.transduce("la vache mange de l'herbe".split())) + print("la vache mange de l'herbe ->") + print((' '+' '.join(rev.transduce("la vache mange de l'herbe".split())))) demo = FSTDemo(fst) demo.transduce("the cow eats the bread".split()) diff --git a/nltk_contrib/fst/fst2.py b/nltk_contrib/fst/fst2.py index 75188e0..6107bdb 100644 --- a/nltk_contrib/fst/fst2.py +++ b/nltk_contrib/fst/fst2.py @@ -965,8 +965,8 @@ def dotgraph(self, mergeEdges=True, multiEdgesToNodesColoringThreshold=2.5, uniqueArcs[(src,dst)] += [(in_str,out_str)] else: uniqueArcs[(src,dst)] = [(in_str,out_str)] - ratio = float(len(uniqueArcs.keys())) / float(stateCount) - for src,dst in uniqueArcs.keys(): + ratio = float(len(list(uniqueArcs.keys()))) / float(stateCount) + for src,dst in list(uniqueArcs.keys()): uniqueArcs[(src,dst)].sort() sortedArcs = FST.mergeRuns(uniqueArcs[(src,dst)],minRun) label = "" @@ -1467,7 +1467,7 @@ def step(self, *e): if self.stepper is None: return # Perform one step. - try: result, val = self.stepper.next() + try: result, val = next(self.stepper) except StopIteration: return if result == 'fail': @@ -1481,7 +1481,7 @@ def step(self, *e): self.out_text.insert('end', ' (Finished!)') elif result == 'backtrack': self.out_text.insert('end', ' (Backtrack)') - for state, widget in self.graph.state_widgets.items(): + for state, widget in list(self.graph.state_widgets.items()): if state == val: self.graph.mark_state(state, '#f0b0b0') else: self.graph.unmark_state(state) else: @@ -1512,7 +1512,7 @@ def step(self, *e): self.state_descr.insert('end', state_descr or '') # Highlight the new dst state. - for state, widget in self.graph.state_widgets.items(): + for state, widget in list(self.graph.state_widgets.items()): if state == fst.dst(arc): self.graph.mark_state(state, '#00ff00') elif state == fst.src(arc): @@ -1520,7 +1520,7 @@ def step(self, *e): else: self.graph.unmark_state(state) # Highlight the new arc. - for a, widget in self.graph.arc_widgets.items(): + for a, widget in list(self.graph.arc_widgets.items()): if a == arc: self.graph.mark_arc(a) else: self.graph.unmark_arc(a) @@ -1571,11 +1571,11 @@ def mainloop(self, *args, **kwargs): end -> """) - print "john eats the bread ->" - print ' '+ ' '.join(fst.transduce("john eats the bread".split())) + print("john eats the bread ->") + print((' '+ ' '.join(fst.transduce("john eats the bread".split())))) rev = fst.inverted() - print "la vache mange de l'herbe ->" - print ' '+' '.join(rev.transduce("la vache mange de l'herbe".split())) + print("la vache mange de l'herbe ->") + print((' '+' '.join(rev.transduce("la vache mange de l'herbe".split())))) demo = FSTDemo(fst) demo.transduce("the cow eats the bread".split()) diff --git a/nltk_contrib/fuf/__init__.py b/nltk_contrib/fuf/__init__.py index 9df32ca..3bf45a4 100644 --- a/nltk_contrib/fuf/__init__.py +++ b/nltk_contrib/fuf/__init__.py @@ -49,12 +49,12 @@ syntax to C{nltk.featstruct.FeatStruct}. """ -from fufconvert import * -from fuf import * -from linearizer import * -from fstypes import * -from link import * -from util import * +from .fufconvert import * +from .fuf import * +from .linearizer import * +from .fstypes import * +from .link import * +from .util import * __all__ = [ # Unifier diff --git a/nltk_contrib/fuf/fstypes.py b/nltk_contrib/fuf/fstypes.py index 073f229..f41a3a6 100644 --- a/nltk_contrib/fuf/fstypes.py +++ b/nltk_contrib/fuf/fstypes.py @@ -2,7 +2,7 @@ C{fstypes.py} module contains the implementation of feature value types as defined in the FUF manual (v5.2) """ -from sexp import * +from .sexp import * from nltk.featstruct import CustomFeatureValue, UnificationFailure class FeatureTypeTable(object): @@ -28,9 +28,9 @@ def define_type(self, name, children): @type children: single string or list of strings """ - if name not in self.table.keys(): + if name not in list(self.table.keys()): self.table[name] = [] - if isinstance(children, basestring): + if isinstance(children, str): children = [children] for child in children: self.table[name].append(child) @@ -48,14 +48,14 @@ def subsume(self, name, specialization): # quick check if the specialization is the immediate one spec = specialization if name == spec: return True - if not self.table.has_key(name): return False + if name not in self.table: return False if spec in self.table[name]: return True return any(self.subsume(item, spec) for item in self.table[name]) def __repr__(self): output = "" - for key, value in self.table.items(): + for key, value in list(self.table.items()): output += "%s <--- %s\n" % (key, value) return output @@ -141,16 +141,16 @@ def assign_types(table, fs): """ def assign_types_helper(fs, type_table, flat_type_table): # go through the feature structure and convert the typed values - for fkey, fval in fs.items(): + for fkey, fval in list(fs.items()): if isinstance(fval, nltk.FeatStruct): assign_types_helper(fval, type_table, flat_type_table) - elif isinstance(fval, basestring) and (fval in flat_type_table): + elif isinstance(fval, str) and (fval in flat_type_table): newval = TypedFeatureValue(fval, table) fs[fkey] = newval # flattten the table flat_type_table = list() - for tkey, tvalue in table.table.items(): + for tkey, tvalue in list(table.table.items()): flat_type_table.append(tkey) for tval in tvalue: flat_type_table.append(tval) @@ -165,9 +165,9 @@ def assign_types_helper(fs, type_table, flat_type_table): sexp = SexpListParser().parse(typedef) type_table.define_type(sexp[1], sexp[2]) - print type_table - print type_table.subsume('np', 'common') - print type_table.subsume('mood', 'imperative') + print(type_table) + print((type_table.subsume('np', 'common'))) + print((type_table.subsume('mood', 'imperative'))) diff --git a/nltk_contrib/fuf/fuf.py b/nltk_contrib/fuf/fuf.py index c748e17..8e04779 100644 --- a/nltk_contrib/fuf/fuf.py +++ b/nltk_contrib/fuf/fuf.py @@ -1,10 +1,10 @@ import os import nltk -from fufconvert import * -from link import * -from linearizer import * -from util import output_html, flatten +from .fufconvert import * +from .link import * +from .linearizer import * +from .util import output_html, flatten class GrammarPathResolver(object): @@ -41,7 +41,7 @@ def filter_for_alt(grammar): alts = list() fs = nltk.FeatStruct() - for gkey, gvalue in grammar.items(): + for gkey, gvalue in list(grammar.items()): if gkey != "alt" and not gkey.startswith("alt_"): #if isinstance(gvalue, basestring): fs[gkey] = gvalue @@ -63,7 +63,7 @@ def alt_to_list(fs, altname): @return: list """ - altkeys = fs[altname].keys() + altkeys = list(fs[altname].keys()) altkeys = sorted([int(key) for key in altkeys if key != "_index_"], cmp) altkeys = [str(key) for key in altkeys] @@ -107,7 +107,7 @@ def _copy_vals(table, fs, pack): """ if isinstance(pack, list): for subpack in pack: - for fkey, fvalue in fs.items(): + for fkey, fvalue in list(fs.items()): if (fkey in subpack) and \ GrammarPathResolver._is_subsumed_val(table, fs, fkey, subpack): pass @@ -120,7 +120,7 @@ def _copy_vals(table, fs, pack): subpack[fkey] = fvalue else: assert isinstance(pack, nltk.FeatStruct) - for fkey, fvalue in fs.items(): + for fkey, fvalue in list(fs.items()): if (fkey in pack) and \ GrammarPathResolver._is_subsumed_val(table, fs, fkey, pack): pass @@ -138,7 +138,7 @@ def resolve(self, fstruct): path through the alternations. """ - if isinstance(fstruct, basestring): + if isinstance(fstruct, str): return fstruct fs, alts = GrammarPathResolver.filter_for_alt(fstruct) @@ -148,7 +148,7 @@ def resolve(self, fstruct): toplevel_pack = GrammarPathResolver.alt_to_list(fstruct, altname) subpack = list() for item in toplevel_pack: - if isinstance(item, nltk.FeatStruct) and len(item.keys()) == 0: + if isinstance(item, nltk.FeatStruct) and len(list(item.keys())) == 0: # empty feature - result of having opts pass elif isinstance(item, nltk.FeatStruct): @@ -162,7 +162,7 @@ def resolve(self, fstruct): return result else: total_packs = list() - for fkey, fvalue in fstruct.items(): + for fkey, fvalue in list(fstruct.items()): if isinstance(fvalue, nltk.FeatStruct): subpack = list() fs, alts = GrammarPathResolver.filter_for_alt(fvalue) @@ -170,7 +170,7 @@ def resolve(self, fstruct): for item in self.resolve(fvalue): newfs = nltk.FeatStruct() newfs[fkey] = item - for key, value in fvalue.items(): + for key, value in list(fvalue.items()): if not ('alt' in value): newfs[key] = value subpack.append(newfs) @@ -319,7 +319,7 @@ def _isconstituent(fstruct, subfs_key, subfs_val): return True if ('pattern' in fstruct): - for fkey in subfs_val.keys(): + for fkey in list(subfs_val.keys()): if fkey in fstruct['pattern']: return True return False @@ -332,7 +332,7 @@ def _unify(fs, grs, resolver=None, trace=False): unifs = fs.unify(gr) if unifs: resolver.resolve(unifs) - for fname, fval in unifs.items(): + for fname, fval in list(unifs.items()): if Unifier._isconstituent(unifs, fname, fval): newval = Unifier._unify(fval, grs, resolver) if newval: @@ -366,24 +366,24 @@ def unify(self): input_files = ['ir2.fuf'] for ifile, gfile in zip(input_files, grammar_files): if ifile == 'ir3.fuf' and gfile == 'gr3.fuf': - print 'gr3.fuf doesn\'t work because of the (* focus) s-expression in the feature structure' + print('gr3.fuf doesn\'t work because of the (* focus) s-expression in the feature structure') continue # input files contain more than one definition of input output = None result = None - print "\nINPUT FILE: %s, GRAMMAR FILE: %s" % (ifile, gfile) + print(("\nINPUT FILE: %s, GRAMMAR FILE: %s" % (ifile, gfile))) gfs = fuf_to_featstruct(open('tests/%s' % gfile).read()) for i, iline in enumerate(open('tests/%s' % ifile).readlines()): try: ifs = fuf_to_featstruct(iline) - except Exception, e: - print 'Failed to convert %s to nltk.FeatStruct' % iline + except Exception as e: + print(('Failed to convert %s to nltk.FeatStruct' % iline)) exit() fuf = Unifier(ifs, gfs) result = fuf.unify() if result: output = " ".join(linearize(result)) - print output_html([ifs, gfs, result, output]) - print i, "result:", output + print((output_html([ifs, gfs, result, output]))) + print((i, "result:", output)) else: - print i, 'result: failed' + print((i, 'result: failed')) diff --git a/nltk_contrib/fuf/fufconvert.py b/nltk_contrib/fuf/fufconvert.py index 5978634..6c1e56c 100644 --- a/nltk_contrib/fuf/fufconvert.py +++ b/nltk_contrib/fuf/fufconvert.py @@ -1,10 +1,10 @@ import re import os import nltk -from sexp import * -from link import * -from specialfs import * -from fstypes import * +from .sexp import * +from .link import * +from .specialfs import * +from .fstypes import * def fuf_to_featstruct(fuf): """ @@ -23,7 +23,7 @@ def _convert_fuf_featstruct(sexp): assert sexp.lparen == '(' fs = nltk.FeatStruct() for child in sexp: - if isinstance(child, basestring): + if isinstance(child, str): feat, val = _convert_fuf_feature(sexp) fs[feat] = val break @@ -55,11 +55,11 @@ def _convert_fuf_feature(sexp): del sexp[1] result = _list_convert(sexp[1]) sexp[1] = result - print sexp[1] + print((sexp[1])) feat, val = sexp else: assert len(sexp) == 2, sexp[1] - assert isinstance(sexp[0], basestring), sexp + assert isinstance(sexp[0], str), sexp feat, val = sexp # Special handling for pattern feature @@ -72,7 +72,7 @@ def _convert_fuf_feature(sexp): assert isinstance(val, SexpList) and val.lparen == '(' choices = list() for c in val: - if isinstance(c, basestring): + if isinstance(c, str): choices.append(c) else: choices.append(_convert_fuf_featstruct(c)) @@ -124,7 +124,7 @@ def fuf_file_to_featstruct(fuf_filename): # process the type defs and the grammar for sexp in lsexp: - if isinstance(sexp[0], basestring) and sexp[0] == 'define-feature-type': + if isinstance(sexp[0], str) and sexp[0] == 'define-feature-type': assert len(sexp) == 3 name, children = sexp[1], sexp[2] type_table.define_type(name, children) @@ -166,7 +166,7 @@ def _list_convert(lst): #test the alt feature - print 'START LIST TEST' + print('START LIST TEST') #listlines = open('tests/list.fuf').readlines() #for line in listlines: #print 'INPUTS:', line @@ -198,19 +198,19 @@ def _list_convert(lst): # test the example grammars grammar_files = [gfile for gfile in os.listdir('tests/') if gfile.startswith('gr')] - print grammar_files + print(grammar_files) for gfile in grammar_files: - print "FILE: %s" % gfile + print(("FILE: %s" % gfile)) text = open('tests/%s' % gfile).read() - print text - print fuf_to_featstruct(text) - print + print(text) + print((fuf_to_featstruct(text))) + print() 1/0 type_table, grammar = fuf_file_to_featstruct('tests/typed_gr4.fuf') - print type_table - print grammar + print(type_table) + print(grammar) gr5 = fuf_to_featstruct(open('tests/gr5.fuf').read()) - print gr5 + print(gr5) diff --git a/nltk_contrib/fuf/linearizer.py b/nltk_contrib/fuf/linearizer.py index 6e125e6..a8724d4 100644 --- a/nltk_contrib/fuf/linearizer.py +++ b/nltk_contrib/fuf/linearizer.py @@ -4,8 +4,8 @@ """ import nltk -from link import * -from util import output_html +from .link import * +from .util import output_html def linearize(fstruct): """ @@ -25,9 +25,9 @@ def lin_helper(fs, pattern, output): else: if isinstance(fs[item], ReentranceLink): LinkResolver().resolve(fs) - if fs[item].has_key('pattern'): + if 'pattern' in fs[item]: lin_helper(fs[item], fs[item]['pattern'], output) - elif fs[item].has_key('lex'): + elif 'lex' in fs[item]: output.append(fs[item]['lex']) assert isinstance(fstruct, nltk.FeatStruct) @@ -37,15 +37,15 @@ def lin_helper(fs, pattern, output): return output if __name__ == '__main__': - from fufconvert import * - from fuf import * + from .fufconvert import * + from .fuf import * gfs = fuf_to_featstruct(open('tests/gr0.fuf').read()) itext = open('tests/ir0.fuf').readlines()[3] ifs = fuf_to_featstruct(itext) result = unify_with_grammar(ifs, gfs) - print result - print linearize(result) + print(result) + print((linearize(result))) diff --git a/nltk_contrib/fuf/link.py b/nltk_contrib/fuf/link.py index 1df86d8..c62a5b0 100644 --- a/nltk_contrib/fuf/link.py +++ b/nltk_contrib/fuf/link.py @@ -80,7 +80,7 @@ def resolve(self, fstruct): def resolve_helper(fs, ancestors): # start looking for links - for feat, val in fs.items(): + for feat, val in list(fs.items()): # add to path and recurse if isinstance(val, nltk.FeatStruct): ancestors.append(val) @@ -144,8 +144,8 @@ def __repr__(self): if __name__ == '__main__': # testing the link resolution using gr0.fuf grammar and ir0.fuf inputs import os - from fufconvert import * - from fuf import * + from .fufconvert import * + from .fuf import * gfs = fuf_to_featstruct(open('tests/gr0.fuf').read()) itext = open('tests/ir0.fuf').readlines()[2] @@ -153,4 +153,4 @@ def __repr__(self): ifs = fuf_to_featstruct(itext) result = unify_with_grammar(ifs, gfs) - print output_html([ifs, gfs, result]) + print((output_html([ifs, gfs, result]))) diff --git a/nltk_contrib/fuf/morphology.py b/nltk_contrib/fuf/morphology.py index f3c363b..6d966fa 100644 --- a/nltk_contrib/fuf/morphology.py +++ b/nltk_contrib/fuf/morphology.py @@ -4,7 +4,7 @@ - morph_numeric: integer number to text """ -import lexicon +from . import lexicon def _is_vowel(char): return char in ['o', 'e', 'i', 'a', 'y'] @@ -24,7 +24,7 @@ def pluralize(word): """ assert word - assert isinstance(word, basestring) + assert isinstance(word, str) assert len(word) > 0 second_last = word[-2] @@ -90,7 +90,7 @@ def form_past(word): last = word[-1] assert word - assert isinstance(word, basestring) + assert isinstance(word, str) if last == 'e': return word + 'd' @@ -132,7 +132,7 @@ def form_present_verb(word, number, person): Forms the suffix for the present tense of the verb WORD """ assert word - assert isinstance(word, basestring) + assert isinstance(word, str) if _is_first_person(person) or _is_second_person(person): return word elif _is_third_person(person): @@ -253,7 +253,7 @@ def morph_pronoun(lex, pronoun_type, case, gender, number, distance, animate, """ Returns the correct pronoun given the features """ - if lex and isinstance(lex, basestring) and not (lex in ['none', 'nil']): + if lex and isinstance(lex, str) and not (lex in ['none', 'nil']): return lex if pronoun_type == 'personal': # start with the 'he' then augmen by person, then, by number, diff --git a/nltk_contrib/fuf/sexp.py b/nltk_contrib/fuf/sexp.py index ecac870..a7a8290 100644 --- a/nltk_contrib/fuf/sexp.py +++ b/nltk_contrib/fuf/sexp.py @@ -6,7 +6,7 @@ import os -from statemachine import PushDownMachine +from .statemachine import PushDownMachine class SexpList(list): """ @@ -39,7 +39,7 @@ def pp(self): for i, val in enumerate(self): if isinstance(val, SexpList): s += val.pp() - elif isinstance(val, basestring): + elif isinstance(val, str): s += val else: s += repr(val) @@ -71,8 +71,8 @@ def __init__(self): # set up the parenthesis self.parens = {'(':')', '[':']', '{':'}'} - self.lparens = self.parens.keys() - self.rparens = self.parens.values() + self.lparens = list(self.parens.keys()) + self.rparens = list(self.parens.values()) self._build_machine() self.machine.stack = [[]] @@ -90,8 +90,8 @@ def _tokenizer(self, to_tokenize): """ Return a tokenizer """ - lparen_res = ''.join([re.escape(lparen) for lparen in self.parens.keys()]) - rparen_res = ''.join([re.escape(rparen) for rparen in self.parens.values()]) + lparen_res = ''.join([re.escape(lparen) for lparen in list(self.parens.keys())]) + rparen_res = ''.join([re.escape(rparen) for rparen in list(self.parens.values())]) tok_re = re.compile('[%s]|[%s]|[^%s%s\s]+' % (lparen_res, rparen_res, lparen_res, rparen_res)) @@ -239,16 +239,16 @@ def parse(self): lines = open('tests/sexp.txt').readlines() for test in lines: try: - print '%s' % test + print(('%s' % test)) l = SexpListParser().parse(test) - print '==>', SexpListParser().parse(test) - print - except Exception, e: - print 'Exception:', e + print(('==>', SexpListParser().parse(test))) + print() + except Exception as e: + print(('Exception:', e)) # testing the SexpFileParser sfp = SexpFileParser('tests/typed_gr4.fuf') - print sfp.parse() + print((sfp.parse())) diff --git a/nltk_contrib/fuf/specialfs.py b/nltk_contrib/fuf/specialfs.py index 0ff8eee..5c56ee9 100644 --- a/nltk_contrib/fuf/specialfs.py +++ b/nltk_contrib/fuf/specialfs.py @@ -2,7 +2,7 @@ Handling for special feature names during parsing """ -from sexp import * +from .sexp import * def parse_alt(sexpl): """ @@ -17,7 +17,7 @@ def parse_alt(sexpl): feat, name, index, val = ('', '', '', '') # named alt - if isinstance(sexpl[1], basestring): + if isinstance(sexpl[1], str): # alt with index if len(sexpl) == 4: feat, name, index, val = sexpl diff --git a/nltk_contrib/fuf/util.py b/nltk_contrib/fuf/util.py index dbad459..87598fb 100644 --- a/nltk_contrib/fuf/util.py +++ b/nltk_contrib/fuf/util.py @@ -37,7 +37,7 @@ def draw(fstruct, filename=None): """ def draw_helper(output, fstruct, pcount, ccount): output += 'fs%d [label=" " style="filled" fillcolor="white"];\n' % (pcount) - for fs, val in fstruct.items(): + for fs, val in list(fstruct.items()): if isinstance(val, nltk.FeatStruct): output += 'fs%d -> fs%d [label="%s"];\n' % (pcount, ccount, fs) output, ccount = draw_helper(output, val, ccount, diff --git a/nltk_contrib/hadoop/EM/EM_mapper.py b/nltk_contrib/hadoop/EM/EM_mapper.py index a1d7e6d..34e0376 100644 --- a/nltk_contrib/hadoop/EM/EM_mapper.py +++ b/nltk_contrib/hadoop/EM/EM_mapper.py @@ -63,8 +63,8 @@ def read_params(self): # get initial state probability p (state) Pi = DictionaryProbDist(d) - A_keys = A.keys() - B_keys = B.keys() + A_keys = list(A.keys()) + B_keys = list(B.keys()) states = set() symbols = set() for e in A_keys: diff --git a/nltk_contrib/hadoop/EM/runStreaming.py b/nltk_contrib/hadoop/EM/runStreaming.py index 630f0d5..17c6ffd 100644 --- a/nltk_contrib/hadoop/EM/runStreaming.py +++ b/nltk_contrib/hadoop/EM/runStreaming.py @@ -14,8 +14,8 @@ # while not converged or not reach maximum iteration number while (abs(newlog - oldlog) > diff and i <= iter): - print "oldlog", oldlog - print "newlog", newlog + print(("oldlog", oldlog)) + print(("newlog", newlog)) i += 1 oldlog = newlog @@ -25,7 +25,7 @@ userdir = '/home/mxf/nltknew/nltk_contrib/hadoop/EM/' p = Popen([userdir + 'runStreaming.sh' ], shell=True, stdout=sys.stdout) p.wait() - print "returncode", p.returncode + print(("returncode", p.returncode)) # open the parameter output from finished iteration # and get the new loglikelihood @@ -36,5 +36,5 @@ newlog = float(li[1]) f.close() -print "oldlog", oldlog -print "newlog", newlog +print(("oldlog", oldlog)) +print(("newlog", newlog)) diff --git a/nltk_contrib/hadoop/hadooplib/mapper.py b/nltk_contrib/hadoop/hadooplib/mapper.py index 1e8ec8d..0835a1b 100644 --- a/nltk_contrib/hadoop/hadooplib/mapper.py +++ b/nltk_contrib/hadoop/hadooplib/mapper.py @@ -1,5 +1,5 @@ -from inputformat import TextLineInput -from outputcollector import LineOutput +from .inputformat import TextLineInput +from .outputcollector import LineOutput class MapperBase: diff --git a/nltk_contrib/hadoop/hadooplib/outputcollector.py b/nltk_contrib/hadoop/hadooplib/outputcollector.py index 3c1a16a..8133d71 100644 --- a/nltk_contrib/hadoop/hadooplib/outputcollector.py +++ b/nltk_contrib/hadoop/hadooplib/outputcollector.py @@ -20,4 +20,4 @@ def collect(key, value, separator = '\t'): keystr = str(key) valuestr = str(value) - print '%s%s%s' % (keystr, separator, valuestr) + print(('%s%s%s' % (keystr, separator, valuestr))) diff --git a/nltk_contrib/hadoop/hadooplib/reducer.py b/nltk_contrib/hadoop/hadooplib/reducer.py index aeb39b6..e23829a 100644 --- a/nltk_contrib/hadoop/hadooplib/reducer.py +++ b/nltk_contrib/hadoop/hadooplib/reducer.py @@ -1,8 +1,8 @@ from itertools import groupby from operator import itemgetter -from inputformat import KeyValueInput -from outputcollector import LineOutput +from .inputformat import KeyValueInput +from .outputcollector import LineOutput class ReducerBase: """ @@ -44,7 +44,7 @@ def group_data(self, data): """ for key, group in groupby(data, itemgetter(0)): - values = map(itemgetter(1), group) + values = list(map(itemgetter(1), group)) yield key, values def reduce(self, key, values): diff --git a/nltk_contrib/hadoop/hadooplib/util.py b/nltk_contrib/hadoop/hadooplib/util.py index 9bb6e5c..082d131 100644 --- a/nltk_contrib/hadoop/hadooplib/util.py +++ b/nltk_contrib/hadoop/hadooplib/util.py @@ -31,7 +31,7 @@ def tuple2str(t, separator = ' '): return s else: - raise ValueError, "The first parameter must be a tuple" + raise ValueError("The first parameter must be a tuple") def str2tuple(s, separator = ' '): """ @@ -55,7 +55,7 @@ def str2tuple(s, separator = ' '): t = s.strip().split(separator) return tuple(t) else: - raise ValueError, "the first parameter must be a string" + raise ValueError("the first parameter must be a string") if __name__ == "__main__": diff --git a/nltk_contrib/hadoop/tf_idf/sort.py b/nltk_contrib/hadoop/tf_idf/sort.py index bab0ebc..1f948c2 100644 --- a/nltk_contrib/hadoop/tf_idf/sort.py +++ b/nltk_contrib/hadoop/tf_idf/sort.py @@ -11,4 +11,4 @@ li.sort() for e in li: - print e, + print(e, end=' ') diff --git a/nltk_contrib/lambek/lambek.py b/nltk_contrib/lambek/lambek.py index e16e9a8..8eb7b36 100755 --- a/nltk_contrib/lambek/lambek.py +++ b/nltk_contrib/lambek/lambek.py @@ -18,9 +18,9 @@ _VAR_NAMES = 1 _SHOW_VARMAP = not _VAR_NAMES -from term import * -from typedterm import * -from lexicon import * +from .term import * +from .typedterm import * +from .lexicon import * import sys, re class Sequent: @@ -30,8 +30,8 @@ class Sequent: def __init__(self, left, right): # Check types, because we're paranoid. - if type(left) not in [types.ListType, types.TupleType] or \ - type(right) not in [types.ListType, types.TupleType]: + if type(left) not in [list, tuple] or \ + type(right) not in [list, tuple]: raise TypeError('Expected lists of TypedTerms') for elt in left+right: if not isinstance(elt, TypedTerm): @@ -41,8 +41,8 @@ def __init__(self, left, right): self.right = right def __repr__(self): - left_str = `self.left`[1:-1] - right_str = `self.right`[1:-1] + left_str = repr(self.left)[1:-1] + right_str = repr(self.right)[1:-1] return left_str + ' => ' + right_str def to_latex(self, pp_varmap=None): @@ -86,8 +86,8 @@ def __init__(self, rule, assumptions, conclusion, varmap): self.varmap = varmap def __repr__(self): - return self.rule+' '+`self.assumptions`+' -> '\ - +`self.conclusion` + return self.rule+' '+repr(self.assumptions)+' -> '\ + +repr(self.conclusion) def simplify(self, varmap=None): if varmap == None: @@ -157,7 +157,7 @@ def pp(self, left=0, toplevel=1, pp_varmap=None): if _VAR_NAMES: concl = self.conclusion.pp(pp_varmap) else: - concl = `self.conclusion` + concl = repr(self.conclusion) # Draw assumptions for assumption in self.assumptions: @@ -175,7 +175,7 @@ def pp(self, left=0, toplevel=1, pp_varmap=None): if toplevel: if _SHOW_VARMAP: - return str+'\nVarmap: '+ `self.varmap`+'\n' + return str+'\nVarmap: '+ repr(self.varmap)+'\n' else: return str else: @@ -225,7 +225,7 @@ def prove(sequent, short_circuit=0): def _prove(sequent, varmap, short_circuit, depth): if _VERBOSE: - print (' '*depth)+'Trying to prove', sequent + print((' '*depth)+'Trying to prove', sequent) proofs = [] @@ -245,7 +245,7 @@ def _prove(sequent, varmap, short_circuit, depth): proofs = proofs + dot_r(sequent, varmap, short_circuit, depth+1) if _VERBOSE: - print ' '*depth+'Found '+`len(proofs)`+' proof(s)' + print(' '*depth+'Found '+repr(len(proofs))+' proof(s)') return proofs @@ -506,14 +506,14 @@ def find_proof(left, right, short_circuit=1): sq = Sequent(left, right) proofs = prove(sq, short_circuit) if proofs: - print '#'*60 - print "## Proof(s) for", sq.pp() + print('#'*60) + print("## Proof(s) for", sq.pp()) for proof in proofs: - print - print proof.to_latex() + print() + print(proof.to_latex()) else: - print '#'*60 - print "## Can't prove", sq.pp() + print('#'*60) + print("## Can't prove", sq.pp()) def test_lambek(): lex = Lexicon() @@ -573,70 +573,70 @@ def mainloop(input, out, lex, latexmode, shortcircuit): if str.lower().endswith('off'): latexmode = 0 elif str.lower().endswith('on'): latexmode = 1 else: latexmode = not latexmode - if latexmode: print >>out, '% latexmode on' - else: print >>out, 'latexmode off' + if latexmode: print('% latexmode on', file=out) + else: print('latexmode off', file=out) elif str.lower().startswith('short'): if str.lower().endswith('off'): shortcircuit = 0 elif str.lower().endswith('on'): shortcircuit = 1 else: shortcircuit = not shortcircuit - if shortcircuit: print >>out, '%shortcircuit on' - else: print >>out, '% shortcircuit off' + if shortcircuit: print('%shortcircuit on', file=out) + else: print('% shortcircuit off', file=out) elif str.lower().startswith('lex'): words = lex.words() - print >>out, '% Lexicon: ' + print('% Lexicon: ', file=out) for word in words: - print >>out, '% ' + word + ':', \ - ' '*(14-len(word)) + lex[word].pp() + print('% ' + word + ':', \ + ' '*(14-len(word)) + lex[word].pp(), file=out) elif str.lower().startswith('q'): return elif str.lower().startswith('x'): return else: - print >>out, HELP + print(HELP, file=out) else: try: (left, right) = str.split('=>') seq = Sequent(lex.parse(left), lex.parse(right)) proofs = prove(seq, shortcircuit) - print >>out - print >>out, '%'*60 + print(file=out) + print('%'*60, file=out) if proofs: - print >>out, "%% Proof(s) for", seq.pp() + print("%% Proof(s) for", seq.pp(), file=out) for proof in proofs: - print >>out - if latexmode: print >>out, proof.to_latex() - else: print >>out, proof.pp() + print(file=out) + if latexmode: print(proof.to_latex(), file=out) + else: print(proof.pp(), file=out) else: - print >>out, "%% Can't prove", seq.pp() - except KeyError, e: - print 'Mal-formatted sequent' - print 'Key error (unknown lexicon entry?)' - print e - except ValueError, e: - print 'Mal-formatted sequent' - print e + print("%% Can't prove", seq.pp(), file=out) + except KeyError as e: + print('Mal-formatted sequent') + print('Key error (unknown lexicon entry?)') + print(e) + except ValueError as e: + print('Mal-formatted sequent') + print(e) # Usage: argv[0] lexiconfile def main(argv): if (len(argv) != 2) and (len(argv) != 4): - print 'Usage:', argv[0], '' - print 'Usage:', argv[0], ' ' + print('Usage:', argv[0], '') + print('Usage:', argv[0], ' ') return lex = Lexicon() try: lex.load(open(argv[1], 'r')) except: - print "Error loading lexicon file" + print("Error loading lexicon file") return if len(argv) == 2: mainloop(sys.stdin, sys.stdout, lex, 0, 1) else: out = open(argv[3], 'w') - print >>out, '\documentclass{article}' - print >>out, '\usepackage{fullpage}' - print >>out, '\\begin{document}' - print >>out + print('\documentclass{article}', file=out) + print('\\usepackage{fullpage}', file=out) + print('\\begin{document}', file=out) + print(file=out) mainloop(open(argv[2], 'r'), out, lex, 1, 1) - print >>out - print >>out, '\\end{document}' + print(file=out) + print('\\end{document}', file=out) if __name__ == '__main__': main(sys.argv) diff --git a/nltk_contrib/lambek/lexicon.py b/nltk_contrib/lambek/lexicon.py index 1846279..1544650 100644 --- a/nltk_contrib/lambek/lexicon.py +++ b/nltk_contrib/lambek/lexicon.py @@ -13,8 +13,8 @@ """ -from term import * -from typedterm import * +from .term import * +from .typedterm import * # Map from word to TypedTerm class Lexicon: @@ -29,16 +29,16 @@ def load(self, file): (word, term, type) = line.split(':') te = TypedTerm(parse_term(term), parse_type(type)) except ValueError: - print 'Bad line:', line + print(('Bad line:', line)) continue word = word.strip().lower() - if self._map.has_key(word): - print 'Duplicate definitions for', word + if word in self._map: + print(('Duplicate definitions for', word)) self._map[word] = te def words(self): - return self._map.keys() + return list(self._map.keys()) def __getitem__(self, word): word = word.strip().lower() diff --git a/nltk_contrib/lambek/term.py b/nltk_contrib/lambek/term.py index f3c466b..9420d89 100644 --- a/nltk_contrib/lambek/term.py +++ b/nltk_contrib/lambek/term.py @@ -25,7 +25,7 @@ def __init__(self): Var._max_id += 1 self.id = Var._max_id def __repr__(self): - return '?' + `self.id` + return '?' + repr(self.id) def pp(self, pp_varmap=None): if pp_varmap == None: pp_varmap = make_pp_varmap(self) return pp_varmap[self] @@ -40,7 +40,7 @@ def __cmp__(self, other): class Const(Term): def __init__(self, name): - if type(name) != types.StringType: + if type(name) != bytes: raise TypeError("Expected a string name") self.name = name def __repr__(self): @@ -64,9 +64,9 @@ def __init__(self, func, arg): def __repr__(self): if isinstance(self.func, Appl) or \ isinstance(self.func, Abstr): - return '('+`self.func` + ')(' + `self.arg` + ')' + return '('+repr(self.func) + ')(' + repr(self.arg) + ')' else: - return `self.func` + '(' + `self.arg` + ')' + return repr(self.func) + '(' + repr(self.arg) + ')' def pp(self, pp_varmap=None): if pp_varmap == None: pp_varmap = make_pp_varmap(self) if isinstance(self.func, Appl) or \ @@ -101,9 +101,9 @@ def __init__(self, var, body): def __repr__(self): if isinstance(self.body, Abstr) or \ isinstance(self.body, Appl): - return '(\\' + `self.var` + '.' + `self.body`+')' + return '(\\' + repr(self.var) + '.' + repr(self.body)+')' else: - return '\\' + `self.var` + '.' + `self.body` + return '\\' + repr(self.var) + '.' + repr(self.body) def pp(self, pp_varmap=None): if pp_varmap == None: pp_varmap = make_pp_varmap(self) if isinstance(self.body, Abstr) or \ @@ -136,7 +136,7 @@ def __init__(self, left, right): not isinstance(self.right, Term): raise TypeError('Expected Term arguments') def __repr__(self): - return '<'+`self.left`+', '+`self.right`+'>' + return '<'+repr(self.left)+', '+repr(self.right)+'>' def pp(self, pp_varmap=None): if pp_varmap == None: pp_varmap = make_pp_varmap(self) return '<'+self.left.pp(pp_varmap)+', '+\ @@ -160,20 +160,20 @@ def extend_pp_varmap(pp_varmap, term): # Get the remaining names. freenames = [n for n in Term.FREEVAR_NAME \ - if n not in pp_varmap.values()] + if n not in list(pp_varmap.values())] boundnames = Term.BOUNDVAR_NAME[:] for fv in free: - if not pp_varmap.has_key(fv): + if fv not in pp_varmap: if freenames == []: - pp_varmap[fv] = `fv` + pp_varmap[fv] = repr(fv) else: pp_varmap[fv] = freenames.pop() for bv in bound: - if not pp_varmap.has_key(bv): + if bv not in pp_varmap: if boundnames == []: - pp_varmap[bv] = `bv` + pp_varmap[bv] = repr(bv) else: pp_varmap[bv] = boundnames.pop() @@ -183,7 +183,7 @@ class VarMap: def __init__(self): self._map = {} def add(self, var, term): - if self._map.has_key(var): + if var in self._map: if term != None and term != self._map[var]: # Unclear what I should do here -- for now, just pray # for the best. :) @@ -191,7 +191,7 @@ def add(self, var, term): else: self._map[var] = term def __repr__(self): - return `self._map` + return repr(self._map) def _get(self, var, orig, getNone=1): val = self._map[var] if not getNone and val == None: return var @@ -201,17 +201,17 @@ def _get(self, var, orig, getNone=1): # Break the loop at an arbitrary point. del(self._map[val]) return val - elif self._map.has_key(val): + elif val in self._map: return(self._get(val, orig, getNone)) else: return val def __getitem__(self, var): - if self._map.has_key(var): + if var in self._map: return self._get(var, var, 1) else: return var def simplify(self, var): - if self._map.has_key(var): + if var in self._map: return self._get(var, var, 0) else: return var @@ -221,7 +221,7 @@ def copy(self): return result def __add__(self, other): result = self.copy() - for var in other._map.keys(): + for var in list(other._map.keys()): result.add(var, other[var]) return result def copy_from(self, other): @@ -251,7 +251,7 @@ def simplify(term, varmap): _VERBOSE = 0 def unify(term1, term2, varmap=None, depth=0): - if _VERBOSE: print ' '*depth+'>> unify', term1, term2, varmap + if _VERBOSE: print((' '*depth+'>> unify', term1, term2, varmap)) term1 = reduce(term1) term2 = reduce(term2) if varmap == None: varmap = VarMap() @@ -260,18 +260,18 @@ def unify(term1, term2, varmap=None, depth=0): result = unify_oneway(term1, term2, varmap, depth+1) if result: if _VERBOSE: - print ' '*depth+'<', result + print((' '*depth+'<', result)) return result varmap.copy_from(old_varmap) result = unify_oneway(term2, term1, varmap, depth+1) if result: if _VERBOSE: - print ' '*depth+'<', result + print((' '*depth+'<', result)) return result #raise(ValueError("can't unify", term1, term2, varmap)) if _VERBOSE: - print ' '*depth+'unify', term1, term2, varmap, '=>', None + print((' '*depth+'unify', term1, term2, varmap, '=>', None)) return None @@ -514,7 +514,7 @@ def parse_term(str, varmap=None): var = re.match(r'\?(.*)', str) if var: varname = var.groups()[0] - if varmap.has_key(varname): + if varname in varmap: return varmap[varname] else: var = Var() @@ -535,22 +535,22 @@ def test(): f3 = Abstr(x, Appl(c, x)) f4 = Abstr(y, Appl(c, y)) - print f1, '=>', reduce(f1) - print f2, '=>', reduce(f2) - print f3, '=>', reduce(f3) - - print f1.pp() - print f2.pp() - print f3.pp() - - print - print unify(x, y) - print unify(x, c) - print unify(x, f1) - print unify(f3, f4) - print unify(Abstr(x,Appl(x,x)), Abstr(y,Appl(y,y))) - - print parse_term('<(\?var.(?var))(?other_var),?x>').pp() + print((f1, '=>', reduce(f1))) + print((f2, '=>', reduce(f2))) + print((f3, '=>', reduce(f3))) + + print((f1.pp())) + print((f2.pp())) + print((f3.pp())) + + print() + print((unify(x, y))) + print((unify(x, c))) + print((unify(x, f1))) + print((unify(f3, f4))) + print((unify(Abstr(x,Appl(x,x)), Abstr(y,Appl(y,y))))) + + print((parse_term('<(\?var.(?var))(?other_var),?x>').pp())) reduce(parse_term('')) diff --git a/nltk_contrib/lambek/typedterm.py b/nltk_contrib/lambek/typedterm.py index 0cdd262..7232bb0 100644 --- a/nltk_contrib/lambek/typedterm.py +++ b/nltk_contrib/lambek/typedterm.py @@ -9,7 +9,7 @@ """CG-style types""" import types -from term import * +from .term import * ##################################### # TYPEDTERM @@ -26,14 +26,14 @@ def __init__(self, term, type): self.type = type def __repr__(self): - return `self.term`+': '+`self.type` + return repr(self.term)+': '+repr(self.type) def pp(self, pp_varmap=None): - return self.term.pp(pp_varmap)+': '+`self.type` + return self.term.pp(pp_varmap)+': '+repr(self.type) def to_latex(self, pp_varmap=None): term = self.term.to_latex(pp_varmap) - type = `self.type` + type = repr(self.type) type = re.sub(r'\\', r'$\\backslash$', type) type = re.sub(r'\*', r'$\\cdot$', type) return term+': \\textrm{'+type+'}' @@ -72,11 +72,11 @@ def __init__(self, arg, result): def __repr__(self): if isinstance(self.result, RSlash) or \ isinstance(self.result, LSlash): - right = '('+`self.result`+')' - else: right = `self.result` + right = '('+repr(self.result)+')' + else: right = repr(self.result) if isinstance(self.arg, RSlash): - left = '('+`self.arg`+')' - else: left = `self.arg` + left = '('+repr(self.arg)+')' + else: left = repr(self.arg) return left + '\\' + right def __cmp__(self, other): if isinstance(other, LSlash) and self.arg == other.arg and \ @@ -95,15 +95,15 @@ def __init__(self, result, arg): raise TypeError('Expected Type arguments') def __repr__(self): if isinstance(self.result, RSlash): - left = '('+`self.result`+')' - else: left = `self.result` - return left + '/' + `self.arg` + left = '('+repr(self.result)+')' + else: left = repr(self.result) + return left + '/' + repr(self.arg) #return '('+`self.result`+'/'+`self.arg`+')' if isinstance(self.arg, LSlash): - return `self.result`+'/('+`self.arg`+')' + return repr(self.result)+'/('+repr(self.arg)+')' else: - return `self.result`+'/'+`self.arg` + return repr(self.result)+'/'+repr(self.arg) def __cmp__(self, other): if isinstance(other, RSlash) and self.arg == other.arg and \ self.result == other.result: @@ -113,7 +113,7 @@ def __cmp__(self, other): class BaseType(Type): def __init__(self, name): - if type(name) != types.StringType: + if type(name) != bytes: raise TypeError("Expected a string name") self.name = name def __repr__(self): @@ -131,7 +131,7 @@ def __init__(self, left, right): if not isinstance(right, Type) or not isinstance(left, Type): raise TypeError('Expected Type arguments') def __repr__(self): - return '('+`self.left`+'*'+`self.right`+')' + return '('+repr(self.left)+'*'+repr(self.right)+')' def __cmp__(self, other): if isinstance(other, Dot) and self.left == other.left and \ self.right == other.right: @@ -205,7 +205,7 @@ def parse_type(str): else: i += 1 if len(segments) != 1: - print 'Ouch!!', segments, ops + print(('Ouch!!', segments, ops)) return segments[0] @@ -219,16 +219,16 @@ def test(): vp = LSlash(np, s) v2 = RSlash(vp, np) AB = Dot(A, B) - print v2 - print AB - print LSlash(AB, v2) - print Dot(v2, AB) - - print parse_type('A / B') - print parse_type('A \\ B') - print parse_type('A / B / C') - print parse_type('A * B') - print parse_type('A \\ B \\ C') - print parse_type('A \\ (B / C)') - print parse_type('(A / B) \\ C') - print parse_type('(A / B) \\ C') + print(v2) + print(AB) + print((LSlash(AB, v2))) + print((Dot(v2, AB))) + + print((parse_type('A / B'))) + print((parse_type('A \\ B'))) + print((parse_type('A / B / C'))) + print((parse_type('A * B'))) + print((parse_type('A \\ B \\ C'))) + print((parse_type('A \\ (B / C)'))) + print((parse_type('(A / B) \\ C'))) + print((parse_type('(A / B) \\ C'))) diff --git a/nltk_contrib/lpath/__init__.py b/nltk_contrib/lpath/__init__.py index 33013e0..029ac92 100644 --- a/nltk_contrib/lpath/__init__.py +++ b/nltk_contrib/lpath/__init__.py @@ -1 +1 @@ -from lpath import * +from .lpath import * diff --git a/nltk_contrib/lpath/at_lite/__init__.py b/nltk_contrib/lpath/at_lite/__init__.py index 41fe91b..1a5dcad 100644 --- a/nltk_contrib/lpath/at_lite/__init__.py +++ b/nltk_contrib/lpath/at_lite/__init__.py @@ -2,9 +2,9 @@ """ """ -from tree_qt import * -from treeedit_qlistview import * -from table_qt import * -from tableedit_qtable import * +from .tree_qt import * +from .treeedit_qlistview import * +from .table_qt import * +from .tableedit_qtable import * diff --git a/nltk_contrib/lpath/at_lite/myaccel.py b/nltk_contrib/lpath/at_lite/myaccel.py index 1e48771..231cbb6 100644 --- a/nltk_contrib/lpath/at_lite/myaccel.py +++ b/nltk_contrib/lpath/at_lite/myaccel.py @@ -69,7 +69,7 @@ def setKeyBindings(self, keyBindings): """ bindings = {} - for keyseq,binding in keyBindings.items(): + for keyseq,binding in list(keyBindings.items()): seq = [] for subkeyseq in keyseq.split(','): a = [] diff --git a/nltk_contrib/lpath/at_lite/table.py b/nltk_contrib/lpath/at_lite/table.py index 0157003..c5e9307 100644 --- a/nltk_contrib/lpath/at_lite/table.py +++ b/nltk_contrib/lpath/at_lite/table.py @@ -1,4 +1,4 @@ -from tableio import TableIo +from .tableio import TableIo import bisect __all__ = ['TableModel'] @@ -177,7 +177,7 @@ def bisect_left(self, col, val): """ if type(col) != int: col = self.str2col[col] - return bisect.bisect_left(map(lambda x:x[col],self.table),val) + return bisect.bisect_left([x[col] for x in self.table],val) def bisect_right(self, col, val): """ @@ -185,7 +185,7 @@ def bisect_right(self, col, val): """ if type(col) != int: col = self.str2col[col] - return bisect.bisect_right(map(lambda x:x[col],self.table),val) + return bisect.bisect_right([x[col] for x in self.table],val) def setMetadata(self, nam, val): if type(val) != str: @@ -210,24 +210,24 @@ def getMetadata(self, nam, evl=False): tab[1][2] = 3 tab.printTable() - print + print() tab.insertColumn(1,["extra",10,9]) tab.printTable() - print + print() c = tab.takeColumn(1) tab.insertColumn(3,c) tab.printTable() - print + print() r = tab.takeRow(0) tab.insertRow(1,r) tab.printTable() - print + print() tab.sort(2,3) tab.printTable() diff --git a/nltk_contrib/lpath/at_lite/table_qt.py b/nltk_contrib/lpath/at_lite/table_qt.py index 0e0bbcd..0ff0bbe 100644 --- a/nltk_contrib/lpath/at_lite/table_qt.py +++ b/nltk_contrib/lpath/at_lite/table_qt.py @@ -1,5 +1,5 @@ -import tableproxy -from table import TableModel +from . import tableproxy +from .table import TableModel __all__ = ['TableModel'] @@ -17,24 +17,24 @@ tab[1][2] = 3 tab.printTable() - print + print() tab.insertColumn(1,[("extra",int),10,9]) tab.printTable() - print + print() c = tab.takeColumn(1) tab.insertColumn(3,c) tab.printTable() - print + print() r = tab.takeRow(0) tab.insertRow(1,r) tab.printTable() - print + print() tab.sort(1,2) tab.printTable() diff --git a/nltk_contrib/lpath/at_lite/tableedit_qtable.py b/nltk_contrib/lpath/at_lite/tableedit_qtable.py index 56cdab7..63609e5 100644 --- a/nltk_contrib/lpath/at_lite/tableedit_qtable.py +++ b/nltk_contrib/lpath/at_lite/tableedit_qtable.py @@ -7,8 +7,8 @@ def __init__(self, parent=None): self.data = None def setData(self, data): - self.removeColumns(range(self.numCols())) - self.removeRows(range(self.numRows())) + self.removeColumns(list(range(self.numCols()))) + self.removeRows(list(range(self.numRows()))) self.setNumCols(len(data.header)) for j,(h,t) in enumerate(data.header): @@ -17,7 +17,7 @@ def setData(self, data): for i,row in enumerate(data): for j,h in enumerate(row): if h is not None: - if type(h)==str or type(h)==unicode: + if type(h)==str or type(h)==str: self.setText(i,j,h) else: self.setText(i,j,str(h)) @@ -36,7 +36,7 @@ def _cellChanged(self, i, j, val): if val is None: val = '' self.disconnect(self,SIGNAL("valueChanged(int,int)"),self.__cellChanged) - self.setText(i,j,unicode(val)) + self.setText(i,j,str(val)) self.connect(self,SIGNAL("valueChanged(int,int)"),self.__cellChanged) def _insertRow(self, i, row): @@ -70,7 +70,7 @@ def __cellChanged(self, row, col): if __name__ == '__main__': import qt - from table_qt import TableModel + from .table_qt import TableModel class Demo(qt.QVBox): def __init__(self): diff --git a/nltk_contrib/lpath/at_lite/tableio.py b/nltk_contrib/lpath/at_lite/tableio.py index 6a582ad..9c2f329 100644 --- a/nltk_contrib/lpath/at_lite/tableio.py +++ b/nltk_contrib/lpath/at_lite/tableio.py @@ -27,7 +27,7 @@ import codecs import re -from error import * +from .error import * __all__ = ['TableIo'] @@ -38,7 +38,7 @@ def printTable(self): size = [len(str(x)) for x,t in self.header] for row in self.table: for i,c in enumerate(row): - if type(c)==str or type(c)==unicode: + if type(c)==str or type(c)==str: n = len(c) else: n = len(str(c)) @@ -52,7 +52,7 @@ def printRow(row,bar=True): s += "%%%ds|" % size[i] % str(c) else: s += "%%-%ds|" % size[i] % c - print s[:-1] + print((s[:-1])) printRow([s for s,t in self.header]) for row in self.table: @@ -73,7 +73,7 @@ def exportTdf(self, filename): f = writer(file(filename,'w')) f.write("\t".join([a[0]+';'+a[1].__name__ for a in self.header]) + "\n") - for item in self.metadata.items(): + for item in list(self.metadata.items()): f.write(";;MM %s\t%s\n" % item) for row in self.table: for c in row[:-1]: @@ -81,7 +81,7 @@ def exportTdf(self, filename): f.write("\t") else: t = type(c) - if t==str or t==unicode: + if t==str or t==str: f.write(c+"\t") else: f.write(str(c)+"\t") @@ -89,18 +89,18 @@ def exportTdf(self, filename): f.write("\n") else: t = type(row[-1]) - if t==str or t==unicode: + if t==str or t==str: f.write(row[-1]+"\n") else: f.write(str(row[-1])+"\n") - except IOError, e: + except IOError as e: raise Error(ERR_TDF_EXPORT, str(e)) def importTdf(cls, filename): _,_,reader,_ = codecs.lookup('utf-8') try: f = reader(file(filename)) - except IOError, e: + except IOError as e: raise Error(ERR_TDF_IMPORT, e) head = [] for h in f.readline().rstrip("\r\n").split("\t"): @@ -125,10 +125,10 @@ def importTdf(cls, filename): try: for i,cell in enumerate(l.rstrip("\n").split("\t")): row.append(head[i][1](cell)) - except ValueError, e: + except ValueError as e: raise Error(ERR_TDF_IMPORT, "[%d:%d] %s" % (lno,i,str(e))) - except IndexError, e: + except IndexError as e: msg = "record has too many fields" raise Error(ERR_TDF_IMPORT, "[%d:%d] %s" % (lno,i,msg)) diff --git a/nltk_contrib/lpath/at_lite/tableproxy.py b/nltk_contrib/lpath/at_lite/tableproxy.py index 80496ca..39e7f1f 100644 --- a/nltk_contrib/lpath/at_lite/tableproxy.py +++ b/nltk_contrib/lpath/at_lite/tableproxy.py @@ -19,7 +19,7 @@ def prev(self): else: return None - def next(self): + def __next__(self): if self._limit > self._top: self._top += 1 return self._stack[self._top] @@ -149,7 +149,7 @@ def undo(self, n=1): def redo(self, n=1): for m in range(n): try: - op, arg1, arg2 = self.undoStack.next() + op, arg1, arg2 = next(self.undoStack) #print "redo", op, arg1, arg2 #print len(self.undoStack._stack) except TypeError: diff --git a/nltk_contrib/lpath/at_lite/tree.py b/nltk_contrib/lpath/at_lite/tree.py index 3c4812d..78ec7c3 100644 --- a/nltk_contrib/lpath/at_lite/tree.py +++ b/nltk_contrib/lpath/at_lite/tree.py @@ -1,4 +1,4 @@ -from treeio import TreeIo +from .treeio import TreeIo __all__ = ['TreeModel'] @@ -149,4 +149,4 @@ def follows(self, n): s = "(S (NP (N I)) (VP (VP (V saw) (NP (DT the) (N man))) (PP (P with) (NP (DT a) (N telescope)))))" t = bracket_parse(s) root = TreeModel.importNltkLiteTree(t) - print root.treebankString("label") + print((root.treebankString("label"))) diff --git a/nltk_contrib/lpath/at_lite/tree_qt.py b/nltk_contrib/lpath/at_lite/tree_qt.py index 5ee3867..d8a8709 100644 --- a/nltk_contrib/lpath/at_lite/tree_qt.py +++ b/nltk_contrib/lpath/at_lite/tree_qt.py @@ -1,5 +1,5 @@ from qt import QObject, PYSIGNAL -from tree import TreeModel as PureTree +from .tree import TreeModel as PureTree __all__ = ['TreeModel'] diff --git a/nltk_contrib/lpath/at_lite/treeedit_qlistview.py b/nltk_contrib/lpath/at_lite/treeedit_qlistview.py index e44c4f1..9961b34 100644 --- a/nltk_contrib/lpath/at_lite/treeedit_qlistview.py +++ b/nltk_contrib/lpath/at_lite/treeedit_qlistview.py @@ -1,5 +1,5 @@ from qt import QListView, QListViewItem, PYSIGNAL -from myaccel import AccelKeyHandler +from .myaccel import AccelKeyHandler __all__ = ['TreeEdit'] @@ -91,7 +91,7 @@ def accel_new(self): if self.data is None: return n = self.data.__class__() x = [self] + [None] * len(self.col2str) - item = apply(TreeEditItem,x) + item = TreeEditItem(*x) for sig in ("attach","insertLeft","insertRight","prune","splice"): n.connect(n,PYSIGNAL(sig),eval("item._%s"%sig)) self.takeItem(item) @@ -147,7 +147,7 @@ def setData(self, data, fields=[]): x = [T[-1],n.data[fields[0][0]]] for f,v in fields[1:]: x.append(str(n.data[f])) - e = apply(TreeEditItem, x) + e = TreeEditItem(*x) for sig in ("attach","insertLeft","insertRight","prune","splice"): n.connect(n,PYSIGNAL(sig),eval("e._%s"%sig)) e.treenode = n @@ -159,7 +159,7 @@ def setData(self, data, fields=[]): if __name__ == "__main__": - from tree_qt import TreeModel + from .tree_qt import TreeModel import qt class Demo(qt.QVBox): diff --git a/nltk_contrib/lpath/at_lite/treeio.py b/nltk_contrib/lpath/at_lite/treeio.py index e652e90..cd3d1ad 100644 --- a/nltk_contrib/lpath/at_lite/treeio.py +++ b/nltk_contrib/lpath/at_lite/treeio.py @@ -14,10 +14,10 @@ def treebankString(self, p): continue c = n.children if c: - s += ' (' + unicode(n.data[p]) + s += ' (' + str(n.data[p]) L = c + [None] + L[1:] else: - s += ' ' + unicode(n.data[p]) + s += ' ' + str(n.data[p]) L = L[1:] return s[1:] @@ -118,7 +118,7 @@ def exportLPathTable(self, TableModel, sid=0, tid=0): # Make sure all the node's application-specific attributes are recorded. r['attributes'] = [] if n.data != None: - for attr, value in n.data.iteritems(): + for attr, value in list(n.data.items()): if attr == 'label': r['name'] = value else: diff --git a/nltk_contrib/lpath/axis.py b/nltk_contrib/lpath/axis.py index a750651..1842f80 100644 --- a/nltk_contrib/lpath/axis.py +++ b/nltk_contrib/lpath/axis.py @@ -137,7 +137,7 @@ def drawLineHead(self, painter): self._drawNegationHead(painter) def drawShape(self, painter): - apply(painter.drawLine, self.points) + painter.drawLine(*self.points) self.drawLineHead(painter) def toggleHeadType(self): diff --git a/nltk_contrib/lpath/db.py b/nltk_contrib/lpath/db.py index c81cb22..469eeeb 100644 --- a/nltk_contrib/lpath/db.py +++ b/nltk_contrib/lpath/db.py @@ -4,14 +4,14 @@ import time from qt import * from threading import Thread, Lock -import lpath -import at_lite as at +from . import lpath +from . import at_lite as at #from pyPgSQL import PgSQL try: from sqlite3 import dbapi2 as sqlite except ImportError: from pysqlite2 import dbapi2 as sqlite -from lpathtree_qt import * +from .lpathtree_qt import * __all__ = ["LPathDB", "LPathDbI", "LPathPgSqlDB", "LPathOracleDB", "LPathMySQLDB"] @@ -89,7 +89,7 @@ class LPathDbI: LPATH_TABLE_HEADER = [ ('sid',int),('tid',int),('id',int),('pid',int), ('left',int),('right',int),('depth',int), - ('type',unicode),('name',unicode),('value',unicode) + ('type',str),('name',str),('value',str) ] EVENT_MORE_TREE = QEvent.User diff --git a/nltk_contrib/lpath/dbdialog.py b/nltk_contrib/lpath/dbdialog.py index a338809..22c6e3d 100644 --- a/nltk_contrib/lpath/dbdialog.py +++ b/nltk_contrib/lpath/dbdialog.py @@ -1,5 +1,5 @@ from qt import * -from db import * +from .db import * import os try: from pyPgSQL import PgSQL @@ -67,7 +67,7 @@ def connect(self): conn = PgSQL.connect(**conninfo) conn2 = PgSQL.connect(**conninfo) return LPathPgSqlDB(conn, conn2, conninfo["user"].ascii()) - except PgSQL.libpq.DatabaseError, e: + except PgSQL.libpq.DatabaseError as e: try: enc = os.environ['LANG'].split('.')[-1] msg = e.message.decode(enc) @@ -111,7 +111,7 @@ def connect(self): try: conn = cx_Oracle.connect(user+'/'+pw+service) conn2 = cx_Oracle.connect(user+'/'+pw+service) - except cx_Oracle.DatabaseError, e: + except cx_Oracle.DatabaseError as e: try: enc = os.environ['LANG'].split('.')[-1] msg = e.__str__().decode(enc) @@ -157,7 +157,7 @@ def connect(self): try: conn = MySQLdb.connect(**conninfo) return LPathMySQLDB(conn) - except MySQLdb.DatabaseError, e: + except MySQLdb.DatabaseError as e: try: enc = os.environ['LANG'].split('.')[-1] msg = e.message.decode(enc) @@ -235,7 +235,7 @@ def connectToDb(self, *args): try: self.db = self.wstack.visibleWidget().connect() self.accept() - except ConnectionError, e: + except ConnectionError as e: QMessageBox.critical(self, "Connection Error", "Unable to connect to database:\n" + e.__str__()) @@ -276,7 +276,7 @@ def __init__(self, tableNames, parent=None, name=None, def _okClicked(self): sel = self.listbox.selectedItem() if sel is not None: - self.tab = unicode(sel.text()) + self.tab = str(sel.text()) self.accept() else: QMessageBox.critical(self, "Error", "You didn't select a table.") diff --git a/nltk_contrib/lpath/lpath/__init__.py b/nltk_contrib/lpath/lpath/__init__.py index 33013e0..029ac92 100644 --- a/nltk_contrib/lpath/lpath/__init__.py +++ b/nltk_contrib/lpath/lpath/__init__.py @@ -1 +1 @@ -from lpath import * +from .lpath import * diff --git a/nltk_contrib/lpath/lpath/lpath.py b/nltk_contrib/lpath/lpath/lpath.py index a62027f..1ac5d58 100644 --- a/nltk_contrib/lpath/lpath/lpath.py +++ b/nltk_contrib/lpath/lpath/lpath.py @@ -142,11 +142,11 @@ def __unicode__(self): L = [] for x in self: if isinstance(x, str): - L.append(unicode(x)) - elif isinstance(x, unicode): + L.append(str(x)) + elif isinstance(x, str): L.append(x) elif isinstance(x, AND) or isinstance(x, OR) or isinstance(x, NOT): - L.append(unicode(x)) + L.append(str(x)) elif isinstance(x, flatten): for e in x: L.append("%s%s%s" % tuple(e)) @@ -155,7 +155,7 @@ def __unicode__(self): elif isinstance(x, Trans): L.append("exists (%s)" % x.getSql()) else: - L.append(unicode(x)) + L.append(str(x)) L.append(self.joiner) return "(" + " ".join(L[:-1]) + ")" @@ -182,7 +182,7 @@ def __str__(self): return "not " + str(self.lst) def __unicode__(self): - return "not " + unicode(self.lst) + return "not " + str(self.lst) class flatten(list): @@ -219,7 +219,7 @@ def __getattr__(self, k): if hasattr(self, k): eval('self.' + k) else: - raise(AttributeError("Step instance has no attribute '%s'" % k)) + raise AttributeError class Trans: @@ -286,7 +286,7 @@ def getSql(self): s2 = self.steps[i+1] self._interpreteAxis(s, s2.axis, s2) - w = unicode(self.WHERE).strip() + w = str(self.WHERE).strip() if w: sql += "where %s" % w return sql @@ -295,7 +295,7 @@ def _expand(self, t): name = "_" + t.node for c in t: name += "_" - if isinstance(c,str) or isinstance(c,unicode): + if isinstance(c,str) or isinstance(c,str): name += self.TR[c] else: name += c.node @@ -357,7 +357,7 @@ def _interpreteAxis(self, step1, axis, step2): [step1.left, "<=", step2.left], [step1.right, ">=", step2.right], [step1.depth, "<", step2.depth], - "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE)) + "not exists (select 1 from %s z where %s)" % (self.tname,str(zWHERE)) ] elif step2.conditional == '*': self.WHERE += [ @@ -366,7 +366,7 @@ def _interpreteAxis(self, step1, axis, step2): AND([step1.left, "<=", step2.left], [step1.right, ">=", step2.right], [step1.depth, "<", step2.depth], - "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE))) + "not exists (select 1 from %s z where %s)" % (self.tname,str(zWHERE))) )) ] @@ -408,7 +408,7 @@ def _interpreteAxis(self, step1, axis, step2): [step1.left, ">=", step2.left], [step1.right, "<=", step2.right], [step1.depth, ">", step2.depth], - "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE)) + "not exists (select 1 from %s z where %s)" % (self.tname,str(zWHERE)) ] elif step2.conditional == '*': self.WHERE += [ @@ -417,7 +417,7 @@ def _interpreteAxis(self, step1, axis, step2): AND([step1.left, ">=", step2.left], [step1.right, "<=", step2.right], [step1.depth, ">", step2.depth], - "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE))) + "not exists (select 1 from %s z where %s)" % (self.tname,str(zWHERE))) )) ] @@ -449,8 +449,8 @@ def _interpreteAxis(self, step1, axis, step2): ["z.left", ">=", step1.right], ["z.right", "<=", step2.left], NOT(GRP(flatten(step2.getConstraints()))), - "not exists (select 1 from %s c where %s)" % (self.tname,unicode(cWHERE)), - "not exists (select 1 from %s w where %s)" % (self.tname,unicode(wWHERE)) + "not exists (select 1 from %s c where %s)" % (self.tname,str(cWHERE)), + "not exists (select 1 from %s w where %s)" % (self.tname,str(wWHERE)) ) self.WHERE += [ @@ -470,7 +470,7 @@ def _interpreteAxis(self, step1, axis, step2): self.WHERE += [ [step1.right, "<=", step2.left], flatten(step2.getConstraints()), - "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE)) + "not exists (select 1 from %s z where %s)" % (self.tname,str(zWHERE)) ] elif step2.conditional == '*': self.WHERE += [ @@ -479,7 +479,7 @@ def _interpreteAxis(self, step1, axis, step2): GRP(AND( [step1.right, "<=", step2.left], flatten(step2.getConstraints()), - "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE)) + "not exists (select 1 from %s z where %s)" % (self.tname,str(zWHERE)) )))) ] @@ -511,8 +511,8 @@ def _interpreteAxis(self, step1, axis, step2): ["z.left", ">=", step2.right], ["z.right", "<=", step1.left], NOT(GRP(flatten(step2.getConstraints()))), - "not exists (select 1 from %s c where %s)" % (self.tname,unicode(cWHERE)), - "not exists (select 1 from %s w where %s)" % (self.tname,unicode(wWHERE)) + "not exists (select 1 from %s c where %s)" % (self.tname,str(cWHERE)), + "not exists (select 1 from %s w where %s)" % (self.tname,str(wWHERE)) ) self.WHERE += [ @@ -532,7 +532,7 @@ def _interpreteAxis(self, step1, axis, step2): self.WHERE += [ [step1.left, ">=", step2.right], flatten(step2.getConstraints()), - "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE)) + "not exists (select 1 from %s z where %s)" % (self.tname,str(zWHERE)) ] elif step2.conditional == '*': self.WHERE += [ @@ -541,7 +541,7 @@ def _interpreteAxis(self, step1, axis, step2): GRP(AND( [step1.left, ">=", step2.right], flatten(step2.getConstraints()), - "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE)) + "not exists (select 1 from %s z where %s)" % (self.tname,str(zWHERE)) )))) ] @@ -573,8 +573,8 @@ def _interpreteAxis(self, step1, axis, step2): ["z.left", ">=", step1.right], ["z.right", "<=", step2.left], NOT(GRP(flatten(step2.getConstraints()))), - "not exists (select 1 from %s c where %s)" % (self.tname,unicode(cWHERE)), - "not exists (select 1 from %s w where %s)" % (self.tname,unicode(wWHERE)) + "not exists (select 1 from %s c where %s)" % (self.tname,str(cWHERE)), + "not exists (select 1 from %s w where %s)" % (self.tname,str(wWHERE)) ) self.WHERE += [ @@ -596,7 +596,7 @@ def _interpreteAxis(self, step1, axis, step2): [step1.right, "<=", step2.left], [step1.pid, "=", step2.pid], flatten(step2.getConstraints()), - "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE)) + "not exists (select 1 from %s z where %s)" % (self.tname,str(zWHERE)) ] elif step2.conditional == '*': self.WHERE += [ @@ -606,7 +606,7 @@ def _interpreteAxis(self, step1, axis, step2): [step1.right, "<=", step2.left], [step1.pid, "=", step2.pid], flatten(step2.getConstraints()), - "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE)) + "not exists (select 1 from %s z where %s)" % (self.tname,str(zWHERE)) )))) ] @@ -638,8 +638,8 @@ def _interpreteAxis(self, step1, axis, step2): ["z.left", ">=", step2.right], ["z.right", "<=", step1.left], NOT(GRP(flatten(step2.getConstraints()))), - "not exists (select 1 from %s c where %s)" % (self.tname,unicode(cWHERE)), - "not exists (select 1 from %s w where %s)" % (self.tname,unicode(wWHERE)) + "not exists (select 1 from %s c where %s)" % (self.tname,str(cWHERE)), + "not exists (select 1 from %s w where %s)" % (self.tname,str(wWHERE)) ) self.WHERE += [ @@ -661,7 +661,7 @@ def _interpreteAxis(self, step1, axis, step2): [step1.left, ">=", step2.right], [step1.pid, "=", step2.pid], flatten(step2.getConstraints()), - "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE)) + "not exists (select 1 from %s z where %s)" % (self.tname,str(zWHERE)) ] elif step2.conditional == '*': self.WHERE += [ @@ -671,7 +671,7 @@ def _interpreteAxis(self, step1, axis, step2): [step1.left, ">=", step2.right], [step1.pid, "=", step2.pid], flatten(step2.getConstraints()), - "not exists (select 1 from %s z where %s)" % (self.tname,unicode(zWHERE)) + "not exists (select 1 from %s z where %s)" % (self.tname,str(zWHERE)) )))) ] @@ -936,7 +936,7 @@ def getSql(self): s2 = self.steps[i+1] self._interpreteAxis(s, s2.axis, s2) - w = unicode(self.WHERE).strip() + w = str(self.WHERE).strip() if w: sql += "where %s" % w return sql @@ -950,7 +950,7 @@ def _R_P(self, t): for i,s in enumerate(tr.steps[:-1]): s2 = tr.steps[i+1] tr._interpreteAxis(s, s2.axis, s2) - self.WHERE.append(unicode(tr.WHERE).strip()) + self.WHERE.append(str(tr.WHERE).strip()) def translate2(q,tname='T'): global T2, T3, T4, T5, T6, GR @@ -998,13 +998,13 @@ def translate(q,tname='T'): def print_profile(): - print - print " python startup: %6.3fs" % (T1-T0) - print " query tokenization: %6.3fs" % (T3-T2) - print " grammar parsing: %6.3fs" % (T4-T3) - print " chart parsing: %6.3fs" % (T5-T4) - print " translation: %6.3fs" % (T6-T5) - print + print() + print((" python startup: %6.3fs" % (T1-T0))) + print((" query tokenization: %6.3fs" % (T3-T2))) + print((" grammar parsing: %6.3fs" % (T4-T3))) + print((" chart parsing: %6.3fs" % (T5-T4))) + print((" translation: %6.3fs" % (T6-T5))) + print() def get_profile(): # tok/grammar/parsing/trans times @@ -1038,6 +1038,6 @@ def get_base_grammar(): #l = tokenize('//VP[{//^V->NP->PP$}]') #l = tokenize('//A//B//C') - print translate2(sys.argv[1])[1] + print((translate2(sys.argv[1])[1])) print_profile() #print get_grammar() diff --git a/nltk_contrib/lpath/lpath/tb2tbl.py b/nltk_contrib/lpath/lpath/tb2tbl.py index 020b502..b143306 100644 --- a/nltk_contrib/lpath/lpath/tb2tbl.py +++ b/nltk_contrib/lpath/lpath/tb2tbl.py @@ -13,7 +13,7 @@ def tb2tbl(tree,a,b): #conn.begin() #cursor.execute("begin") for r in tree.exportLPathTable(TableModel,a,b): - print r + print(r) cursor.execute(SQL1, tuple(r)) #cursor.execute("commit") conn.commit() @@ -25,8 +25,8 @@ def connectdb(opts): conn = PgSQL.connect( host=opts.host, port=opts.port, database=opts.db, user=opts.user, password=opts.passwd) - except PgSQL.libpq.DatabaseError, e: - print e + except PgSQL.libpq.DatabaseError as e: + print(e) sys.exit(1) return conn elif opts.servertype == 'oracle': @@ -43,12 +43,12 @@ def connectdb(opts): try: conn = MySQLdb.connect(host=opts.host, port=opts.port, db=opts.db, user=opts.user, passwd=opts.passwd) - except DatabaseError, e: - print e + except DatabaseError as e: + print(e) sys.exit(1) return conn - except ImportError, e: - print e + except ImportError as e: + print(e) sys.exit(1) def limit(servertype, sql, num): @@ -154,7 +154,7 @@ def getpass(): optpar.error("user name is missing") if opts.passwd is None: - print "Password:", + print("Password:", end=' ') opts.passwd = getpass() else: passwd = opts.passwd @@ -186,20 +186,20 @@ def getpass(): conn = connectdb(opts) cursor = conn.cursor() - print os.path.join('',os.path.dirname(sys.argv[0])) + print(os.path.join('',os.path.dirname(sys.argv[0]))) # check if table exists try: sql = limit(opts.servertype, "select * from "+opts.table, 1) cursor.execute(sql) - except DatabaseError, e: + except DatabaseError as e: if opts.create: p = os.path.join(os.path.dirname(sys.argv[0]),'lpath-schema.sql') for line in file(p).read().replace("TABLE",opts.table).split(';'): if line.strip(): cursor.execute(line) else: - print "table %s doesn't exist" % `opts.table` + print("table %s doesn't exist" % repr(opts.table)) sys.exit(1) # set correct table name in the insertion SQL @@ -232,20 +232,20 @@ def do(tree): reader = codecs.getreader('utf-8') if tbdir == '-': for tree in TreeModel.importTreebank(reader(sys.stdin)): - print tree + print(tree) do(tree) count -= 1 if count == 0: break else: for root, dirs, files in os.walk(tbdir): for f in files: - print f, + print(f, end=' ') if filter.match(f): p = os.path.join(root,f) for tree in TreeModel.importTreebank(reader(file(p))): do(tree) count -= 1 if count == 0: sys.exit(0) # done - print sid + print(sid) else: - print 'skipped' + print('skipped') diff --git a/nltk_contrib/lpath/lpathtree.py b/nltk_contrib/lpath/lpathtree.py index 792ef92..4cc2343 100644 --- a/nltk_contrib/lpath/lpathtree.py +++ b/nltk_contrib/lpath/lpathtree.py @@ -1,5 +1,5 @@ -import at_lite as at -from at_lite.tree import TreeModel as PureTree +from . import at_lite as at +from .at_lite.tree import TreeModel as PureTree __all__ = ['LPathTreeModel'] @@ -380,7 +380,7 @@ def lpScopeSiblings(self, filter=lambda x:True): L = [] if self.lpScope is not None: def f(node): - if node.lpScope == self.lpScope and filter(node): + if node.lpScope == self.lpScope and list(filter(node)): L.append(node) self.root.dfs(f) return L diff --git a/nltk_contrib/lpath/lpathtree_qt.py b/nltk_contrib/lpath/lpathtree_qt.py index 18284d6..7ac12a1 100644 --- a/nltk_contrib/lpath/lpathtree_qt.py +++ b/nltk_contrib/lpath/lpathtree_qt.py @@ -1,5 +1,5 @@ -from lpathtree import LPathTreeModel as PureLPathTree -from axis import * +from .lpathtree import LPathTreeModel as PureLPathTree +from .axis import * from qt import QObject __all__ = ['LPathTreeModel'] @@ -55,7 +55,7 @@ def redrawAxis(self): self.axis = cls(self.gui.canvas()) self.axis.target = target self.axis.root = root - apply(self.axis.setPoints, coords) + self.axis.setPoints(*coords) if self.getNot(): self.axis.setHeadType(Axis.HeadNegation) elif not self.lpOnMainTrunk(): @@ -71,7 +71,7 @@ def _newAxis(self, node): node.axis.target = node #coords = node.gui.connectingLine(self.gui) coords = self.gui.connectingLine(node.gui) - apply(node.axis.setPoints, coords) + node.axis.setPoints(*coords) if node.getNot(): node.axis.setHeadType(Axis.HeadNegation) elif not node.lpOnMainTrunk(): diff --git a/nltk_contrib/lpath/nodefeaturedialog.py b/nltk_contrib/lpath/nodefeaturedialog.py index 682901e..819a531 100644 --- a/nltk_contrib/lpath/nodefeaturedialog.py +++ b/nltk_contrib/lpath/nodefeaturedialog.py @@ -1,6 +1,6 @@ from qt import * -from at_lite import TableModel, TableEdit -import lpath +from .at_lite import TableModel, TableEdit +from . import lpath class NodeFeatureDialog(QDialog): def __init__(self, node, parent): @@ -8,7 +8,7 @@ def __init__(self, node, parent): self.setCaption('Node Attribute Dialog') self.resize(320,240) - tab = TableModel([("Name",unicode),("Value",unicode)]) + tab = TableModel([("Name",str),("Value",str)]) tab.insertRow(None, ['label',node.data['label']]) if '@func' in node.data: for v in node.data['@func']: diff --git a/nltk_contrib/lpath/overlay.py b/nltk_contrib/lpath/overlay.py index 9375844..809000e 100644 --- a/nltk_contrib/lpath/overlay.py +++ b/nltk_contrib/lpath/overlay.py @@ -1,5 +1,5 @@ import re -from translator import translate_sub +from .translator import translate_sub __all__ = ["find_overlays", "Overlay"]; @@ -138,7 +138,7 @@ def g(t, L): M = [] for match in TAB: - m = match.items() + m = list(match.items()) m.sort() L = [] for sym,tup in m: diff --git a/nltk_contrib/lpath/parselpath.py b/nltk_contrib/lpath/parselpath.py index 8b8a83d..a6c3389 100644 --- a/nltk_contrib/lpath/parselpath.py +++ b/nltk_contrib/lpath/parselpath.py @@ -1,6 +1,6 @@ -from lpath import tokenize -from lpathtree import LPathTreeModel -from translator import translate +from .lpath import tokenize +from .lpathtree import LPathTreeModel +from .translator import translate SCOPE = ['{','}'] BRANCH = ['[',']'] @@ -129,17 +129,17 @@ def parse_lpath(q): def f(t, n): if t is not None: - print (" "*n) + t.data['label'] + print(((" "*n) + t.data['label'])) for c in t.children: f(c, n+4) def g(t, n): if t is not None: - print (" "*n) + t.data['label'] + print(((" "*n) + t.data['label'])) for c in t.lpChildren: g(c, n+4) else: - print " "*n + "None" + print((" "*n + "None")) g(t,0) - print translate(t) + print((translate(t))) diff --git a/nltk_contrib/lpath/qba.py b/nltk_contrib/lpath/qba.py index 8628a09..10bf076 100644 --- a/nltk_contrib/lpath/qba.py +++ b/nltk_contrib/lpath/qba.py @@ -2,17 +2,17 @@ import os from qt import * from qtcanvas import * -from treecanvas import * -from treecanvasview import * -from lpathtree_qt import * -from axis import * -from db import * -from dbdialog import * -from sqlviewdialog import * -from overlay import * -from translator import translate -from parselpath import parse_lpath -from lpath import tokenize +from .treecanvas import * +from .treecanvasview import * +from .lpathtree_qt import * +from .axis import * +from .db import * +from .dbdialog import * +from .sqlviewdialog import * +from .overlay import * +from .translator import translate +from .parselpath import parse_lpath +from .lpath import tokenize class QBA(QMainWindow): def __init__(self, tree=None): @@ -171,7 +171,7 @@ def menu_File_SaveImage(self): "XPM (*.xpm)") if d.exec_loop() == QDialog.Rejected: return filenam = d.selectedFile() - filenam = unicode(filenam) + filenam = str(filenam) self._saveImageDir = os.path.dirname(filenam) if os.path.exists(filenam): res = QMessageBox.question( @@ -262,7 +262,7 @@ def main(): app.setMainWidget(w) if len(sys.argv) == 2: generator = LPathTreeModel.importTreebank(file(sys.argv[1])) - w.setTree(generator.next()) + w.setTree(next(generator)) w.show() w.setCaption('LPath QBA') # this is only necessary on windows app.exec_loop() diff --git a/nltk_contrib/lpath/sqlviewdialog.py b/nltk_contrib/lpath/sqlviewdialog.py index 294d55a..009f738 100644 --- a/nltk_contrib/lpath/sqlviewdialog.py +++ b/nltk_contrib/lpath/sqlviewdialog.py @@ -1,5 +1,5 @@ from qt import * -import lpath +from . import lpath class SqlViewDialog(QDialog): def __init__(self, lpql=None, parent=None, name=None, diff --git a/nltk_contrib/lpath/translator.py b/nltk_contrib/lpath/translator.py index cef9227..f7f106e 100644 --- a/nltk_contrib/lpath/translator.py +++ b/nltk_contrib/lpath/translator.py @@ -1,5 +1,5 @@ -from StringIO import StringIO -import at_lite as at +from io import StringIO +from . import at_lite as at __all__ = ["translate", "translate_sub"] diff --git a/nltk_contrib/lpath/treecanvas.py b/nltk_contrib/lpath/treecanvas.py index 5b56e11..757f515 100644 --- a/nltk_contrib/lpath/treecanvas.py +++ b/nltk_contrib/lpath/treecanvas.py @@ -1,6 +1,6 @@ from qtcanvas import * from qt import * -from treecanvasnode import * +from .treecanvasnode import * __all__ = ["TreeCanvas"] @@ -105,7 +105,7 @@ def redraw(self): item = node.gui item2 = node.parent.gui coords = item.connectingLine(item2) - apply(node.line.setPoints, coords) + node.line.setPoints(*coords) node.show() self.collapse(self._data) @@ -143,7 +143,7 @@ def _layout(self): line = QCanvasLine(self) line.setPen(pen) node.line = line - apply(line.setPoints, coords) + line.setPoints(*coords) node.show() self._w = self._width[self._data] diff --git a/nltk_contrib/lpath/treecanvasnode.py b/nltk_contrib/lpath/treecanvasnode.py index eaddda3..95b871f 100644 --- a/nltk_contrib/lpath/treecanvasnode.py +++ b/nltk_contrib/lpath/treecanvasnode.py @@ -1,6 +1,6 @@ from qt import * from qtcanvas import * -from lpathtree_qt import * +from .lpathtree_qt import * class Point: def __init__(self, *args): diff --git a/nltk_contrib/lpath/treecanvasview.py b/nltk_contrib/lpath/treecanvasview.py index 171a758..f7661fd 100644 --- a/nltk_contrib/lpath/treecanvasview.py +++ b/nltk_contrib/lpath/treecanvasview.py @@ -1,10 +1,10 @@ from qt import * from qtcanvas import * -from treecanvasnode import * -from nodefeaturedialog import * -from translator import translate -from axis import * -import lpath +from .treecanvasnode import * +from .nodefeaturedialog import * +from .translator import translate +from .axis import * +from . import lpath import math class FilterExpressionPopup(QLabel): @@ -59,7 +59,7 @@ def mousePressEvent(self, e): s,ans = QInputDialog.getText('Edit Filter Expression','Enter new filter expression', QLineEdit.Normal,self.text(),self) if ans: - s = unicode(s).strip() + s = str(s).strip() if s: self.node.filterExpression = s else: @@ -132,7 +132,7 @@ def processRightClickPopup(self, *args): s,ans = QInputDialog.getText('New Filter Expression','Enter filter expression', QLineEdit.Normal,s,self) if ans: - s = unicode(s).strip() + s = str(s).strip() if s: if lpath.translate("//A[%s]"%s) is None: QMessageBox.critical(self,"Error","Invalid filter expression.") @@ -147,7 +147,7 @@ def processRightClickPopup(self, *args): s,ans = QInputDialog.getText('Edit Label','Enter new label', QLineEdit.Normal,item.node.label,self) if ans: - s = unicode(s).strip() + s = str(s).strip() if s: if 'originalLabel' not in item.node.data: item.node.data['originalLabel'] = item.node.label diff --git a/nltk_contrib/misc/annotationgraph.py b/nltk_contrib/misc/annotationgraph.py index d841ea2..e5a072b 100644 --- a/nltk_contrib/misc/annotationgraph.py +++ b/nltk_contrib/misc/annotationgraph.py @@ -12,7 +12,7 @@ class AnnotationGraph(object): def __init__(self, t): self._edges = [] self._len = len(t.leaves()) - self._nodes = range(self._len) + self._nodes = list(range(self._len)) self._convert(t, 0) self._index = Index((start, (end, label)) for (start, end, label) in self._edges) @@ -75,7 +75,7 @@ def demo(): t = Tree(s) ag = AnnotationGraph(t) for p in ag.pas2([]): - print p + print(p) if __name__ == '__main__': demo() diff --git a/nltk_contrib/misc/didyoumean.py b/nltk_contrib/misc/didyoumean.py index 8018765..a0784a0 100644 --- a/nltk_contrib/misc/didyoumean.py +++ b/nltk_contrib/misc/didyoumean.py @@ -31,7 +31,7 @@ def specialhash(self, s): def test(self, token): hashed = self.specialhash(token) if hashed in self.learned: - words = self.learned[hashed].items() + words = list(self.learned[hashed].items()) sortby(words, 1, reverse=1) if token in [i[0] for i in words]: return 'This word seems OK' @@ -59,7 +59,7 @@ def demo(): d.learn() # choice of words to be relevant related to the brown corpus for i in "birdd, oklaoma, emphasise, bird, carot".split(", "): - print i, "-", d.test(i) + print((i, "-", d.test(i))) if __name__ == "__main__": demo() diff --git a/nltk_contrib/misc/fsa.py b/nltk_contrib/misc/fsa.py index 7105336..dd9a29f 100644 --- a/nltk_contrib/misc/fsa.py +++ b/nltk_contrib/misc/fsa.py @@ -64,8 +64,8 @@ def generate_transitions(self): A generator that yields each transition arrow in the FSA in the form (source, label, target). """ - for (state, map) in self._transitions.items(): - for (symbol, targets) in map.items(): + for (state, map) in list(self._transitions.items()): + for (symbol, targets) in list(map.items()): for target in targets: yield (state, symbol, target) @@ -74,7 +74,7 @@ def labels(self, s1, s2): A generator for all possible labels taking state s1 to state s2. """ map = self._transitions.get(s1, {}) - for (symbol, targets) in map.items(): + for (symbol, targets) in list(map.items()): if s2 in targets: yield symbol def sigma(self): @@ -127,7 +127,7 @@ def states(self): @returns: a list of all states in the FSA. @rtype: list """ - return self._transitions.keys() + return list(self._transitions.keys()) def add_final(self, state): """ @@ -177,9 +177,9 @@ def insert(self, s1, label, s2): @param s2: the destination of the transition """ if s1 not in self.states(): - raise ValueError, "State %s does not exist" % s1 + raise ValueError("State %s does not exist" % s1) if s2 not in self.states(): - raise ValueError, "State %s does not exist" % s1 + raise ValueError("State %s does not exist" % s1) self._add_transition(self._transitions, s1, label, s2) self._add_transition(self._reverse, s2, label, s1) @@ -203,16 +203,16 @@ def delete(self, s1, label, s2): @param s2: the destination of the transition """ if s1 not in self.states(): - raise ValueError, "State %s does not exist" % s1 + raise ValueError("State %s does not exist" % s1) if s2 not in self.states(): - raise ValueError, "State %s does not exist" % s1 + raise ValueError("State %s does not exist" % s1) self._del_transition(self._transitions, s1, label, s2) self._del_transition(self._reverse, s2, label, s1) def delete_state(self, state): "Removes a state and all its transitions from the FSA." if state not in self.states(): - raise ValueError, "State %s does not exist" % state + raise ValueError("State %s does not exist" % state) for (s1, label, s2) in self.incident_transitions(state): self.delete(s1, label, s2) del self._transitions[state] @@ -226,10 +226,10 @@ def incident_transitions(self, state): result = set() forward = self._transitions[state] backward = self._reverse[state] - for label, targets in forward.items(): + for label, targets in list(forward.items()): for target in targets: result.add((state, label, target)) - for label, targets in backward.items(): + for label, targets in list(backward.items()): for target in targets: result.add((target, label, state)) return result @@ -239,9 +239,9 @@ def relabel_state(self, old, new): Assigns a state a new identifier. """ if old not in self.states(): - raise ValueError, "State %s does not exist" % old + raise ValueError("State %s does not exist" % old) if new in self.states(): - raise ValueError, "State %s already exists" % new + raise ValueError("State %s already exists" % new) changes = [] for (s1, symbol, s2) in self.generate_transitions(): if s1 == old and s2 == old: @@ -274,8 +274,8 @@ def is_deterministic(self): Return whether this is a DFA (every symbol leads from a state to at most one target state). """ - for map in self._transitions.values(): - for targets in map.values(): + for map in list(self._transitions.values()): + for targets in list(map.values()): if len(targets) > 1: return False return True @@ -287,14 +287,14 @@ def nextState(self, state, symbol): """ next = self.next(state, symbol) if len(next) > 1: - raise ValueError, "This FSA is nondeterministic -- use nextStates instead." + raise ValueError("This FSA is nondeterministic -- use nextStates instead.") elif len(next) == 1: return list(next)[0] else: return None def forward_traverse(self, state): "All states reachable by following transitions from a given state." result = set() - for (symbol, targets) in self._transitions[state].items(): + for (symbol, targets) in list(self._transitions[state].items()): result = result.union(targets) return result @@ -302,7 +302,7 @@ def reverse_traverse(self, state): """All states from which a given state is reachable by following transitions.""" result = set() - for (symbol, targets) in self._reverse[state].items(): + for (symbol, targets) in list(self._reverse[state].items()): result = result.union(targets) return result @@ -334,7 +334,7 @@ def prune(self): self._clean_map(self._reverse[state]) def _clean_map(self, map): - for (key, value) in map.items(): + for (key, value) in list(map.items()): if len(value) == 0: del map[key] @@ -396,7 +396,7 @@ def dfa(self): for label in self.sigma(): nfa_next = tuple(self.e_closure(self.move(map[dfa_state], label))) - if map.has_key(nfa_next): + if nfa_next in map: dfa_next = map[nfa_next] else: dfa_next = dfa.new_state() @@ -412,7 +412,7 @@ def generate(self, maxlen, state=0, prefix=""): "Generate all accepting sequences of length at most maxlen." if maxlen > 0: if state in self._finals: - print prefix + print(prefix) for (s1, labels, s2) in self.outgoing_transitions(state): for label in labels(): self.generate(maxlen-1, s2, prefix+label) @@ -421,14 +421,14 @@ def pp(self): """ Print a representation of this FSA (in human-readable YAML format). """ - print yaml.dump(self) + print((yaml.dump(self))) @classmethod def from_yaml(cls, loader, node): map = loader.construct_mapping(node) result = cls(map.get('sigma', []), {}, map.get('finals', [])) - for (s1, map1) in map['transitions'].items(): - for (symbol, targets) in map1.items(): + for (s1, map1) in list(map['transitions'].items()): + for (symbol, targets) in list(map1.items()): for s2 in targets: result.insert(s1, symbol, s2) return result @@ -551,19 +551,19 @@ def demo(): # Use a regular expression to initialize the FSA. re = 'abcd' - print 'Regular Expression:', re + print(('Regular Expression:', re)) re2nfa(fsa, re) - print "NFA:" + print("NFA:") fsa.pp() # Convert the (nondeterministic) FSA to a deterministic FSA. dfa = fsa.dfa() - print "DFA:" + print("DFA:") dfa.pp() # Prune the DFA dfa.prune() - print "PRUNED DFA:" + print("PRUNED DFA:") dfa.pp() # Use the FSA to generate all strings of length less than 3 diff --git a/nltk_contrib/misc/huffman.py b/nltk_contrib/misc/huffman.py index fce7409..3d3ad0f 100644 --- a/nltk_contrib/misc/huffman.py +++ b/nltk_contrib/misc/huffman.py @@ -5,7 +5,7 @@ from operator import itemgetter def huffman_tree(text): - coding = nltk.FreqDist(text).items() + coding = list(nltk.FreqDist(text).items()) coding.sort(key=itemgetter(1)) while len(coding) > 1: a, b = coding[:2] @@ -67,8 +67,8 @@ def trial(train, texts): text_len = len(text) comp_len = len(encode(code_tree, text)) / 8.0 compression = (text_len - comp_len) / text_len - print compression, - print + print(compression, end=' ') + print() trial(train1, [test1, test2, test3]) trial(train2, [test1, test2, test3]) diff --git a/nltk_contrib/misc/kimmo.py b/nltk_contrib/misc/kimmo.py index 1e407e4..5685923 100644 --- a/nltk_contrib/misc/kimmo.py +++ b/nltk_contrib/misc/kimmo.py @@ -16,7 +16,7 @@ # TODO: remove Unix dependencies -import Tkinter +import tkinter import os, re, sys, types, string, glob, time, md5 from nltk_contrib.fsa import * @@ -37,7 +37,7 @@ and we want batch mode, big file, or big input test with output. """ ########################################################################### -from ScrolledText import ScrolledText +from tkinter.scrolledtext import ScrolledText class KimmoGUI: def __init__(self, grammar, text, title='Kimmo Interface v1.78'): @@ -58,46 +58,46 @@ def __init__(self, grammar, text, title='Kimmo Interface v1.78'): self.helpFilename = 'kimmo.help' - self._root = Tkinter.Tk() + self._root = tkinter.Tk() self._root.title(title) - ctlbuttons = Tkinter.Frame(self._root) + ctlbuttons = tkinter.Frame(self._root) ctlbuttons.pack(side='top', fill='x') - level1 = Tkinter.Frame(self._root) + level1 = tkinter.Frame(self._root) level1.pack(side='top', fill='none') - Tkinter.Frame(self._root).pack(side='top', fill='none') - level2 = Tkinter.Frame(self._root) + tkinter.Frame(self._root).pack(side='top', fill='none') + level2 = tkinter.Frame(self._root) level2.pack(side='top', fill='x') - buttons = Tkinter.Frame(self._root) + buttons = tkinter.Frame(self._root) buttons.pack(side='top', fill='none') - batchFrame = Tkinter.Frame(self._root) + batchFrame = tkinter.Frame(self._root) batchFrame.pack(side='top', fill='x') - self.batchpath = Tkinter.StringVar() - Tkinter.Label(batchFrame, text="Batch File:").pack(side='left') - Tkinter.Entry(batchFrame, background='white', foreground='black', + self.batchpath = tkinter.StringVar() + tkinter.Label(batchFrame, text="Batch File:").pack(side='left') + tkinter.Entry(batchFrame, background='white', foreground='black', width=30, textvariable=self.batchpath).pack(side='left') - Tkinter.Button(batchFrame, text='Go!', + tkinter.Button(batchFrame, text='Go!', background='#a0c0c0', foreground='black', command=self.batch).pack(side='left') - self.debugWin = Tkinter.StringVar() # change to a window and field eventually. - Tkinter.Entry(batchFrame, background='grey', foreground='red', + self.debugWin = tkinter.StringVar() # change to a window and field eventually. + tkinter.Entry(batchFrame, background='grey', foreground='red', width=30, textvariable=self.debugWin).pack(side='right') - self.wordIn = Tkinter.StringVar() - Tkinter.Label(level2, text="Generate or Recognize:").pack(side='left') - Tkinter.Entry(level2, background='white', foreground='black', + self.wordIn = tkinter.StringVar() + tkinter.Label(level2, text="Generate or Recognize:").pack(side='left') + tkinter.Entry(level2, background='white', foreground='black', width=30, textvariable=self.wordIn).pack(side='left') - lexiconFrame = Tkinter.Frame(level1) - Tkinter.Label(lexiconFrame, text="Lexicon & Alternations").pack(side='top', + lexiconFrame = tkinter.Frame(level1) + tkinter.Label(lexiconFrame, text="Lexicon & Alternations").pack(side='top', fill='x') self.lexicon = ScrolledText(lexiconFrame, background='white', foreground='black', width=50, height=36, wrap='none') # setup the scrollbar - scroll = Tkinter.Scrollbar(lexiconFrame, orient='horizontal',command=self.lexicon.xview) + scroll = tkinter.Scrollbar(lexiconFrame, orient='horizontal',command=self.lexicon.xview) scroll.pack(side='bottom', fill='x') self.lexicon.configure(xscrollcommand = scroll.set) @@ -105,36 +105,36 @@ def __init__(self, grammar, text, title='Kimmo Interface v1.78'): self.lexicon.pack(side='top') - midFrame = Tkinter.Frame(level1) - rulesFrame = Tkinter.Frame(midFrame) + midFrame = tkinter.Frame(level1) + rulesFrame = tkinter.Frame(midFrame) rulesFrame.pack(side='top', fill='x') - Tkinter.Label(rulesFrame, text="Rules/Subsets").pack(side='top', + tkinter.Label(rulesFrame, text="Rules/Subsets").pack(side='top', fill='x') self.rules = ScrolledText(rulesFrame, background='white', foreground='black', width=60, height=19, wrap='none') # setup the scrollbar - scroll = Tkinter.Scrollbar(rulesFrame, orient='horizontal',command=self.rules.xview) + scroll = tkinter.Scrollbar(rulesFrame, orient='horizontal',command=self.rules.xview) scroll.pack(side='bottom', fill='x') self.rules.configure(xscrollcommand = scroll.set) self.rules.pack(side='top') - midbetweenFrame = Tkinter.Frame(midFrame) + midbetweenFrame = tkinter.Frame(midFrame) midbetweenFrame.pack(side='top', fill='x') - Tkinter.Button(midbetweenFrame, text='clear', + tkinter.Button(midbetweenFrame, text='clear', background='#f0f0f0', foreground='black', - command= lambda start=1.0, end=Tkinter.END : self.results.delete(start,end) + command= lambda start=1.0, end=tkinter.END : self.results.delete(start,end) ).pack(side='right') - Tkinter.Label(midbetweenFrame, + tkinter.Label(midbetweenFrame, text="Results ").pack(side='right') self.results = ScrolledText(midFrame, background='white', foreground='black', width=60, height=13, wrap='none') # setup the scrollbar - scroll = Tkinter.Scrollbar(midFrame, orient='horizontal',command=self.results.xview) + scroll = tkinter.Scrollbar(midFrame, orient='horizontal',command=self.results.xview) scroll.pack(side='bottom', fill='x') self.results.configure(xscrollcommand = scroll.set) @@ -151,13 +151,13 @@ def __init__(self, grammar, text, title='Kimmo Interface v1.78'): self.alternation.pack(side='top') """ - Tkinter.Button(ctlbuttons, text='Quit', + tkinter.Button(ctlbuttons, text='Quit', background='#a0c0c0', foreground='black', command=self.destroy).pack(side='left') - self.loadMenuButton = Tkinter.Menubutton(ctlbuttons, text='Load', background='#a0c0c0', foreground='black', relief='raised') + self.loadMenuButton = tkinter.Menubutton(ctlbuttons, text='Load', background='#a0c0c0', foreground='black', relief='raised') self.loadMenuButton.pack(side='left') - self.loadMenu=Tkinter.Menu(self.loadMenuButton,tearoff=0) + self.loadMenu=tkinter.Menu(self.loadMenuButton,tearoff=0) self.loadMenu.add_command(label='Load Lexicon', underline=0,command = lambda filetype='.lex', targetWindow = self.lexicon, tf = 'l' : self.loadTypetoTarget(filetype, targetWindow, tf)) self.loadMenu.add_command(label='Load Rules', underline=0,command = lambda filetype='.rul', targetWindow = self.rules, tf = 'r' : self.loadTypetoTarget(filetype, targetWindow, tf)) @@ -166,9 +166,9 @@ def __init__(self, grammar, text, title='Kimmo Interface v1.78'): # - self.saveMenuButton = Tkinter.Menubutton(ctlbuttons, text='Save',background='#a0c0c0', foreground='black', relief='raised') + self.saveMenuButton = tkinter.Menubutton(ctlbuttons, text='Save',background='#a0c0c0', foreground='black', relief='raised') self.saveMenuButton.pack(side='left') - self.saveMenu=Tkinter.Menu(self.saveMenuButton,tearoff=0) + self.saveMenu=tkinter.Menu(self.saveMenuButton,tearoff=0) self.saveMenu.add_command(label='Save Lexicon', underline=0,command = lambda filename=self.lexfilename, sourceWindow = self.lexicon : self.writeToFilefromWindow(filename, sourceWindow,'w',0,'l')) self.saveMenu.add_command(label='Save Rules', underline=0,command = lambda filename=self.rulfilename, sourceWindow = self.rules : self.writeToFilefromWindow(filename, sourceWindow,'w',0,'r')) self.saveMenu.add_command(label='Save Results', underline=0,command = lambda filename='.results', sourceWindow = self.results : self.writeToFilefromWindow(filename, sourceWindow,'w',0)) @@ -176,12 +176,12 @@ def __init__(self, grammar, text, title='Kimmo Interface v1.78'): self.saveMenuButton["menu"]=self.saveMenu - Tkinter.Label(ctlbuttons, text=" Preset:").pack(side='left') + tkinter.Label(ctlbuttons, text=" Preset:").pack(side='left') - self.configValue = Tkinter.StringVar() - self.configsMenuButton = Tkinter.Menubutton(ctlbuttons, text='Configs', background='#a0c0c0', foreground='black', relief='raised') + self.configValue = tkinter.StringVar() + self.configsMenuButton = tkinter.Menubutton(ctlbuttons, text='Configs', background='#a0c0c0', foreground='black', relief='raised') self.configsMenuButton.pack(side='left') - self.configsMenu=Tkinter.Menu(self.configsMenuButton,tearoff=0) + self.configsMenu=tkinter.Menu(self.configsMenuButton,tearoff=0) # read the directory for cfgs, add them to the menu # add path expander, to expand ~ & given home dirs. @@ -210,21 +210,21 @@ def __init__(self, grammar, text, title='Kimmo Interface v1.78'): # background='#b0f0d0', foreground='#008b45', # command=self.generate).pack(side='right') - self.tracingbtn = Tkinter.Button(ctlbuttons, text='Tracing', + self.tracingbtn = tkinter.Button(ctlbuttons, text='Tracing', background='#fff0f0', foreground='black', command=lambda : self.create_destroyDebugTracing()).pack(side='right') - self.graphMenuButton = Tkinter.Menubutton(ctlbuttons, text='Graph', background='#d0d0e8', foreground='black', relief='raised') + self.graphMenuButton = tkinter.Menubutton(ctlbuttons, text='Graph', background='#d0d0e8', foreground='black', relief='raised') self.graphMenuButton.pack(side='right') - self.graphMenu=Tkinter.Menu(self.graphMenuButton,tearoff=0) + self.graphMenu=tkinter.Menu(self.graphMenuButton,tearoff=0) self.graphMenu.add_command(label='Graph Lexicon', underline=0,command = lambda which = 'l' : self.graph(which)) self.graphMenu.add_command(label='Graph FSA Rules', underline=0,command = lambda which = 'r' : self.graph(which)) # self.loadMenu.add_command(label='Load Lexicon', underline=0,command = lambda filetype='.lex', targetWindow = self.lexicon : loadTypetoTarget(self, filetype, targetWindow)) self.graphMenuButton["menu"]=self.graphMenu - self.helpbtn = Tkinter.Button(ctlbuttons, text='Help', + self.helpbtn = tkinter.Button(ctlbuttons, text='Help', background='#f0fff0', foreground='black', command=self.kimmoHelp).pack(side='right') @@ -233,10 +233,10 @@ def __init__(self, grammar, text, title='Kimmo Interface v1.78'): midFrame.pack(side='left') # alternationFrame.pack(side='left') - Tkinter.Button(level2, text='Generate', + tkinter.Button(level2, text='Generate', background='#a0c0c0', foreground='black', command=self.generate).pack(side='left') - Tkinter.Button(level2, text='Recognize', + tkinter.Button(level2, text='Recognize', background='#a0c0c0', foreground='black', command=self.recognize).pack(side='left') @@ -267,16 +267,16 @@ def __init__(self, grammar, text, title='Kimmo Interface v1.78'): # Enter mainloop. - Tkinter.mainloop() + tkinter.mainloop() except: - print 'Error creating Tree View' + print('Error creating Tree View') self.destroy() raise def init_menubar(self): - menubar = Tkinter.Menu(self._root) + menubar = tkinter.Menu(self._root) - filemenu = Tkinter.Menu(menubar, tearoff=0) + filemenu = tkinter.Menu(menubar, tearoff=0) filemenu.add_command(label='Save Rules', underline=0, command=self.save, accelerator='Ctrl-s') self._root.bind('', self.save) @@ -308,26 +308,26 @@ def create_destroyDebugTracing(self, *args): else: try: # have in its own special di decial class - self.dbgTracing = Tkinter.Toplevel() + self.dbgTracing = tkinter.Toplevel() self.dbgTracing.title("Tracing/Debug") - dbgTraceFrame2 = Tkinter.Frame(self.dbgTracing) + dbgTraceFrame2 = tkinter.Frame(self.dbgTracing) dbgTraceFrame2.pack(side='top', fill='x') - dbgTraceFrame = Tkinter.Frame(self.dbgTracing) + dbgTraceFrame = tkinter.Frame(self.dbgTracing) dbgTraceFrame.pack(side='top', fill='x',expand='yes') self.traceWindow = ScrolledText(dbgTraceFrame, background='#f4f4f4', foreground='#aa0000', width=45, height=24, wrap='none') - Tkinter.Button(dbgTraceFrame2, text='clear', + tkinter.Button(dbgTraceFrame2, text='clear', background='#a0c0c0', foreground='black', - command= lambda start=1.0, end=Tkinter.END : self.traceWindow.delete(start,end) + command= lambda start=1.0, end=tkinter.END : self.traceWindow.delete(start,end) ).pack(side='right') - Tkinter.Button(dbgTraceFrame2, text='Save', + tkinter.Button(dbgTraceFrame2, text='Save', background='#a0c0c0', foreground='black', command= lambda file=self.kimmoResultFile,windowName=self.traceWindow,mode='w',auto=0 : self.writeToFilefromWindow(file,windowName,mode,auto) ).pack(side='left') - scroll = Tkinter.Scrollbar(dbgTraceFrame, orient='horizontal',command=self.traceWindow.xview) + scroll = tkinter.Scrollbar(dbgTraceFrame, orient='horizontal',command=self.traceWindow.xview) scroll.pack(side='bottom', fill='x') self.traceWindow.configure(xscrollcommand = scroll.set) @@ -340,7 +340,7 @@ def create_destroyDebugTracing(self, *args): self.dbgTracing.protocol("WM_DELETE_WINDOW", self.create_destroyDebugTracing) except: - print 'Error creating Tree View' + print('Error creating Tree View') self.dbgTracing.destroy() self.dbgTracing = None self.debug = False @@ -355,7 +355,7 @@ def writeToFilefromWindow(self, filename, windowName, mode, auto, wt=None): if not (auto and windowName and filename): - from tkFileDialog import asksaveasfilename + from tkinter.filedialog import asksaveasfilename ftypes = [('Text file', '.txt'),('Rule file', '.rul'),('Lexicon file', '.lex'),('Alternations file', '.alt'), ('All files', '*')] filename = asksaveasfilename(filetypes=ftypes, @@ -365,7 +365,7 @@ def writeToFilefromWindow(self, filename, windowName, mode, auto, wt=None): self.guiError('Need File Name') return f = open(filename, 'w') - f.write(windowName.get(1.0,Tkinter.END)) + f.write(windowName.get(1.0,tkinter.END)) f.close() if filename: @@ -401,7 +401,7 @@ def save(self, *args): """ def configLoader(self,*args): - print args[0] + print(args[0]) filename = args[0] # if arg is a valid file, load by line. @@ -471,7 +471,7 @@ def loadIntoWindow(self, filename, windowField): text.append(line) # empty the window now that the file was valid - windowField.delete(1.0, Tkinter.END) + windowField.delete(1.0, tkinter.END) windowField.insert(1.0, '\n'.join(text)) @@ -483,7 +483,7 @@ def loadTypetoTarget(self, fileType, targetWindow, ftype = None): if not (fileType and targetWindow): return - from tkFileDialog import askopenfilename + from tkinter.filedialog import askopenfilename ftypes = [(fileType, fileType)] filename = askopenfilename(filetypes=ftypes, defaultextension=fileType) @@ -502,7 +502,7 @@ def load(self, *args): # graphical interface to file loading. "Load rule/lexicon set from a text file" - from tkFileDialog import askopenfilename + from tkinter.filedialog import askopenfilename ftypes = [('Text file', '.txt'), ('All files', '*')] # filename = askopenfilename(filetypes=ftypes, defaultextension='.txt') @@ -556,10 +556,10 @@ def load(self, *args): def clear(self, *args): "Clears the grammar and lexical and sentence inputs" - self.lexicon.delete(1.0, Tkinter.END) - self.rules.delete(1.0, Tkinter.END) + self.lexicon.delete(1.0, tkinter.END) + self.rules.delete(1.0, tkinter.END) # self.alternation.delete(1.0, Tkinter.END) - self.results.delete(1.0, Tkinter.END) + self.results.delete(1.0, tkinter.END) def destroy(self, *args): if self._root is None: return @@ -570,10 +570,10 @@ def destroy(self, *args): # for single stepping through a trace. # need to make the kimmo class capable of being interrupted & resumed. def step(self, *args): - print 'a' + print('a') def singlestep(self, *args): - print 'a' + print('a') def batch(self, *args): filename = self.batchpath.get() @@ -704,10 +704,10 @@ def graph(self, which): # check & set path, if necessary, need read and write access to path path = '' pathstatus = os.stat('./') # 0600 is r/w, binary evaluation - if not ((pathstatus[0] & 0600) == 0600): + if not ((pathstatus[0] & 0o600) == 0o600): path = '/tmp/' + str(os.environ.get("USER")) + '/' # need terminating / if not os.path.exists(path): - os.mkdir(path,0777) + os.mkdir(path,0o777) pathre = re.compile(r"^.*\/") @@ -779,7 +779,7 @@ def highlightMatches(self, word, window,color): matchIdx = '1.0' matchRight = '1.0' while matchIdx != '': - matchIdx = window.search(word,matchRight,count=1,stopindex=Tkinter.END) + matchIdx = window.search(word,matchRight,count=1,stopindex=tkinter.END) if matchIdx == '': break strptr = matchIdx.split(".") @@ -799,11 +799,11 @@ def initKimmo(self, *args): or recognize. (i.e. loading all rules, lexicon, and alternations """ # only initialize Kimmo if the contents of the *rules* have changed - tmprmd5 = md5.new(self.rules.get(1.0, Tkinter.END)) - tmplmd5 = md5.new(self.lexicon.get(1.0, Tkinter.END)) + tmprmd5 = md5.new(self.rules.get(1.0, tkinter.END)) + tmplmd5 = md5.new(self.lexicon.get(1.0, tkinter.END)) if (not self.kimmoinstance) or (self.rulemd5 != tmprmd5) or (self.lexmd5 != tmplmd5): self.guiError("Creating new Kimmo instance") - self.kimmoinstance = KimmoControl(self.lexicon.get(1.0, Tkinter.END),self.rules.get(1.0, Tkinter.END),'','',self.debug) + self.kimmoinstance = KimmoControl(self.lexicon.get(1.0, tkinter.END),self.rules.get(1.0, tkinter.END),'','',self.debug) self.guiError("") self.rulemd5 = tmprmd5 self.lexmd5 = tmplmd5 @@ -820,7 +820,7 @@ def initKimmo(self, *args): def refresh(self, *args): if self._root is None: return - print self.wordIn.get() + print(self.wordIn.get()) # CAPTURE PYTHON-KIMMO OUTPUT @@ -830,8 +830,8 @@ def capturePrint(self,*args): # if there is a trace/debug window if self.dbgTracing: - self.traceWindow.insert(Tkinter.END, string.join(args," ")) - self.traceWindow.see(Tkinter.END) + self.traceWindow.insert(tkinter.END, string.join(args," ")) + self.traceWindow.see(tkinter.END) # otherwise, just drop the output. @@ -858,7 +858,7 @@ def kimmoHelp(self,*args): # helpText = Tkinter.StringVar() helpText = '' try: f = open(self.helpFilename, 'r') - except IOError, e: + except IOError as e: self.guiError("HelpFile not loaded") return @@ -873,7 +873,7 @@ def kimmoHelp(self,*args): helpText = re.sub("\r","",helpText) - helpWindow = Tkinter.Toplevel() + helpWindow = tkinter.Toplevel() helpWindow.title("PyKimmo Documentation & Help") # help = Tkinter.Label(helpWindow,textvariable=helpText, justify='left' ) # @@ -884,14 +884,14 @@ def kimmoHelp(self,*args): help.pack(side='top') help.insert(1.0, helpText) # setup the scrollbar - scroll = Tkinter.Scrollbar(helpWindow, orient='horizontal',command=help.xview) + scroll = tkinter.Scrollbar(helpWindow, orient='horizontal',command=help.xview) scroll.pack(side='bottom', fill='x') help.configure(xscrollcommand = scroll.set) # now highlight up the file - matchIdx = Tkinter.END - matchRight = Tkinter.END - matchLen = Tkinter.IntVar() + matchIdx = tkinter.END + matchRight = tkinter.END + matchLen = tkinter.IntVar() tagId = 1 while 1: matchIdx = help.search(r"::[^\n]*::",matchIdx, stopindex=1.0, backwards=True, regexp=True, count=matchLen ) @@ -900,7 +900,7 @@ def kimmoHelp(self,*args): matchIdxFields = matchIdx.split(".") matchLenStr = matchIdxFields[0] + "." + str(string.atoi(matchIdxFields[1],10) + matchLen.get()) - print (matchIdx, matchLenStr) + print((matchIdx, matchLenStr)) help.tag_add(tagId, matchIdx, matchLenStr ) help.tag_configure(tagId, background='aquamarine', foreground='blue', underline=True) tagId += 1 @@ -974,11 +974,11 @@ def __getattr__(self, name): class tkImageView: def __init__(self, imagefileName, title): - self._root = Tkinter.Toplevel() + self._root = tkinter.Toplevel() self._root.title(title + ' (' + imagefileName + ')') - self.image = Tkinter.PhotoImage("LGraph",file=imagefileName) + self.image = tkinter.PhotoImage("LGraph",file=imagefileName) - Tkinter.Label(self._root, image=self.image).pack(side='top',fill='x') + tkinter.Label(self._root, image=self.image).pack(side='top',fill='x') # self._root.mainloop() def destroy(self, *args): @@ -989,11 +989,11 @@ def destroy(self, *args): ######################### Dialog Boxes ############################## -class ListDialog(Tkinter.Toplevel): +class ListDialog(tkinter.Toplevel): def __init__(self, parent, listOptions, title = None): - Tkinter.Toplevel.__init__(self, parent) + tkinter.Toplevel.__init__(self, parent) self.transient(parent) if title: @@ -1003,13 +1003,13 @@ def __init__(self, parent, listOptions, title = None): self.result = None - body = Tkinter.Frame(self) + body = tkinter.Frame(self) self.initial_focus = self.body(body) body.pack(padx=5, pady=5) - box = Tkinter.Frame(self) - Tkinter.Label(box,text="Select an FSA to graph").pack(side='top',fill='x') + box = tkinter.Frame(self) + tkinter.Label(box,text="Select an FSA to graph").pack(side='top',fill='x') box.pack() @@ -1043,13 +1043,13 @@ def body(self, master): def listbox(self, listOptions): - box = Tkinter.Frame(self) - self.lb = Tkinter.Listbox(box,height=len(listOptions),width=30,background='#f0f0ff', selectbackground='#c0e0ff' + box = tkinter.Frame(self) + self.lb = tkinter.Listbox(box,height=len(listOptions),width=30,background='#f0f0ff', selectbackground='#c0e0ff' ,selectmode='single') self.lb.pack() for x in listOptions: - self.lb.insert(Tkinter.END,x) + self.lb.insert(tkinter.END,x) box.pack() @@ -1057,11 +1057,11 @@ def buttonbox(self): # add standard button box. override if you don't want the # standard buttons - box = Tkinter.Frame(self) + box = tkinter.Frame(self) - w = Tkinter.Button(box, text="OK", width=10, command=self.ok, default="active") + w = tkinter.Button(box, text="OK", width=10, command=self.ok, default="active") w.pack(side="left", padx=5, pady=5) - w = Tkinter.Button(box, text="Cancel", width=10, command=self.cancel) + w = tkinter.Button(box, text="Cancel", width=10, command=self.cancel) w.pack(side="left", padx=5, pady=5) self.bind("<Return>", self.ok) @@ -1245,15 +1245,15 @@ def __init__(self, lexicon_string, rule_string, lexicon_file, rule_file, debug): self.s = KimmoRuleSet(self.ksubsets, self.kdefaults, self.krules) self.s.debug = debug self.ok = 1 - except RuntimeError, e: + except RuntimeError as e: self.errors = ('Caught:' + str(e) + ' ' + self.errors) - print 'Caught:', e - print "Setup of the kimmoinstance failed. Most likely cause" - print "is infinite recursion due to self-referential lexicon" - print "For instance:" - print "Begin: Begin Noun End" - print "Begin is pointing to itself. Simple example, but check" - print "to insure no directed loops" + print('Caught:', e) + print("Setup of the kimmoinstance failed. Most likely cause") + print("is infinite recursion due to self-referential lexicon") + print("For instance:") + print("Begin: Begin Noun End") + print("Begin is pointing to itself. Simple example, but check") + print("to insure no directed loops") self.ok = 0 @@ -1313,8 +1313,8 @@ def batch(self, filename): results_string += (batch_result_str) # place a separator between results - print '----- '+ time.strftime("%a, %d %b %Y %I:%M %p", time.gmtime()) +' -----\n' - print results_string + print('----- '+ time.strftime("%a, %d %b %Y %I:%M %p", time.gmtime()) +' -----\n') + print(results_string) @@ -2213,7 +2213,7 @@ def __init__(self, name, pair_description, state_descriptions): def name(self): return self._name def pairs(self): return self._pairs def start(self): return self._state_descriptions[0][0] - def is_state(self, index): return self.transitions.has_key(index) + def is_state(self, index): return index in self.transitions def contains_final(self, indices): @@ -2283,7 +2283,7 @@ def right_advance(self, current_states, input, output, subsets): # print 'any state match' # {col num, next state num (0 if fail), is final state} # if transition row is valid - if self.transitions.has_key(self.transitions[index][i]): ft = self.is_final[self.transitions[index][i]] + if self.transitions[index][i] in self.transitions: ft = self.is_final[self.transitions[index][i]] else : ft = '' any_next_states_ary.append([ i, self.transitions[index][i], ft, pair.__repr__() ] ) if not any_next_state: @@ -2297,7 +2297,7 @@ def right_advance(self, current_states, input, output, subsets): # times? (i.e. our state is already in next_state next_state_isset = 1 next_state = self.transitions[index][i] - if self.transitions.has_key(next_state): + if next_state in self.transitions: if not(next_state in next_states): next_states.append(next_state) @@ -2349,12 +2349,12 @@ def build_trie(self, words, word_position=0): for w in words: if len(w.letters()) <= word_position: continue fc = w.letters()[word_position] - if first_chars.has_key(fc): + if fc in first_chars: first_chars[fc].append(w) else: first_chars[fc] = [ w ] sub_tries = [] - for c, sub_words in first_chars.items(): + for c, sub_words in list(first_chars.items()): sub_tries.append( (c, self.build_trie(sub_words, word_position+1)) ) return ( [w for w in words if len(w.letters()) == word_position], sub_tries ) @@ -2410,12 +2410,12 @@ def _collect(self, name): # print 'current alternation: ' + name if name == None: return [] - elif self.alternations.has_key(name): + elif name in self.alternations: result = [] for ln in self.alternations[name].lexicon_names(): result.extend(self._collect(ln)) return result - elif self.lexicons.has_key(name): + elif name in self.lexicons: return [ self.lexicons[name] ] else: # raise ValueError('no lexicon or alternation named ' + name) @@ -2502,21 +2502,21 @@ def _debug_print_input_and_output(self, position, rule_states, morphological_sta padstring = '' for x in range(position): padstring = padstring + ' ' - print '%s%d %s:%s \n' % (padstring, position, this_input, this_output), - print '%s%d: Input: ' % (padstring, position,), + print('%s%d %s:%s \n' % (padstring, position, this_input, this_output), end=' ') + print('%s%d: Input: ' % (padstring, position,), end=' ') for i in input: - print ' ' + i + ' ', + print(' ' + i + ' ', end=' ') if this_input: - print '[' + this_input + ']...', - print + print('[' + this_input + ']...', end=' ') + print() - print '%s%d> Output: ' % (padstring, position,), + print('%s%d> Output: ' % (padstring, position,), end=' ') for o in output: - print ' ' + o + ' ', + print(' ' + o + ' ', end=' ') if this_output: - print '<' + this_output + '>...', - print + print('<' + this_output + '>...', end=' ') + print() # for (start, rule, fsa_states, required_truth_value) in rule_states: @@ -2524,7 +2524,7 @@ def _debug_print_input_and_output(self, position, rule_states, morphological_sta if False: # morphological_state: - print ' possible input chars = %s' % invert.possible_next_characters(morphological_state) + print(' possible input chars = %s' % invert.possible_next_characters(morphological_state)) # print morphological_state @@ -2548,7 +2548,7 @@ def _generate(self, input_tokens, position, rule_states, morphological_state, in if ((position >= len(input_tokens)) ): # and (not morphological_state) - if (self.debug) : print ' AT END OF WORD' + if (self.debug) : print(' AT END OF WORD') # FOR RECOGNIZER # this will yield some words twice, not all # also, recognizer is failing to put on the added information like "+genetive" @@ -2596,16 +2596,16 @@ def _generate(self, input_tokens, position, rule_states, morphological_state, in if (required_truth_value != truth_value): if (self.debug): - print ' BLOCKED by rule {%d %s %s}' % (start, rule, required_truth_value) - print fsa_states + print(' BLOCKED by rule {%d %s %s}' % (start, rule, required_truth_value)) + print(fsa_states) break else: if 0: # (self.debug): - print ' passed rule {%d %s %s}' % (start, rule, required_truth_value) + print(' passed rule {%d %s %s}' % (start, rule, required_truth_value)) else: if (self.debug): - print ' SUCCESS!' + print(' SUCCESS!') yield result_str, result_words else: if morphological_state: # recognizer; get the next possible surface chars that can result in @@ -2666,7 +2666,7 @@ def _generate(self, input_tokens, position, rule_states, morphological_state, in break else: if (0): # (self.debug): - print ' passed rule {%d %s %s}' % (start, rule, required_truth_value) + print(' passed rule {%d %s %s}' % (start, rule, required_truth_value)) elif (len(next_fsa_state_set) == 0): # if it isn't true, then it will have to fail, bcs we are at # the end of the state set. @@ -2676,15 +2676,15 @@ def _generate(self, input_tokens, position, rule_states, morphological_state, in break else: if (0): # (self.debug): - print ' passed rule {%d %s %s}' % (start, rule, required_truth_value) + print(' passed rule {%d %s %s}' % (start, rule, required_truth_value)) else: next_rule_states.append( (start, rule, next_fsa_state_set, required_truth_value) ) - if (self.debug) : print rule_state_debug + if (self.debug) : print(rule_state_debug) if (fail): if (self.debug): - print ' BLOCKED by rule %s' % (fail,) + print(' BLOCKED by rule %s' % (fail,)) continue @@ -2703,7 +2703,7 @@ def _generate(self, input_tokens, position, rule_states, morphological_state, in if (rule.rightFSA()): if (self.debug): - print ' adding rule {%d %s %s}' % (position, rule, required_truth_value) + print(' adding rule {%d %s %s}' % (position, rule, required_truth_value)) next_rule_states.append( (position, rule, [ rule.rightFSA().start() ], required_truth_value) ) else: if (required_truth_value == False): @@ -2711,7 +2711,7 @@ def _generate(self, input_tokens, position, rule_states, morphological_state, in continue else: if (0): # (self.debug): - print ' passed rule ' + str(rule) + print(' passed rule ' + str(rule)) # if did not fail, call recursively on next chars if (fail == None): @@ -2748,7 +2748,7 @@ def _generate(self, input_tokens, position, rule_states, morphological_state, in yield o else: if (self.debug): - print ' BLOCKED by rule ' + str(fail) + print(' BLOCKED by rule ' + str(fail)) def _initial_rule_states(self): return [ (0, rule, [ rule.start() ], True) for rule in self.rules() if isinstance(rule, KimmoFSARule)] @@ -2771,9 +2771,9 @@ def recognize(self, input_tokens, morphology=None): if not morphology_state: - print "Bad Morphological State, failing recognition" + print("Bad Morphological State, failing recognition") return - if (self.debug) : print 'recognize: ' + input_tokens + if (self.debug) : print('recognize: ' + input_tokens) # print output_words for o in self._generate(input_tokens, 0, self._initial_rule_states(), morphology_state, [], [], '', output_words, invert): @@ -2828,18 +2828,18 @@ def read_kimmo_file(filename, gui=None): path = os.path.expanduser(filename) try: f = open(path, 'r') - except IOError, e: + except IOError as e: path = find_corpus_file("kimmo", filename) try: f = open(path, 'r') - except IOError, e: + except IOError as e: if gui: gui.guiError(str(e)) else: - print str(e) - print "FAILURE" + print(str(e)) + print("FAILURE") return "" - print "Loaded:", path + print("Loaded:", path) return f # MAIN @@ -2866,20 +2866,20 @@ def read_kimmo_file(filename, gui=None): elif x == "debug": console_debug = 1 - print 'Tips:' - print 'kimmo.cfg is loaded by default, so if you name your project that, ' - print "it will be loaded at startup\n" + print('Tips:') + print('kimmo.cfg is loaded by default, so if you name your project that, ') + print("it will be loaded at startup\n") - print 'For commandline operation:' - print ' (for instance if you want to use a different editor)' - print "To Recognize:" - print " % python kimmo.py english.lex english.rul -r:cats" - print "To Generate:" - print " % python kimmo.py english.lex english.rul -g:cat+s" - print "To Batch Test:" - print " % python kimmo.py english.lex english.rul english.batch_test" - print "With Debug and Tracing:" - print " % python kimmo.py english.lex english.rul -r:cats debug\n" + print('For commandline operation:') + print(' (for instance if you want to use a different editor)') + print("To Recognize:") + print(" % python kimmo.py english.lex english.rul -r:cats") + print("To Generate:") + print(" % python kimmo.py english.lex english.rul -g:cat+s") + print("To Batch Test:") + print(" % python kimmo.py english.lex english.rul english.batch_test") + print("With Debug and Tracing:") + print(" % python kimmo.py english.lex english.rul -r:cats debug\n") # print filename_lex @@ -2894,17 +2894,17 @@ def read_kimmo_file(filename, gui=None): # creation failed, stop if not kimmoinstance.ok : - print kimmoinstance.errors + print(kimmoinstance.errors) sys.exit() if recognize_string: recognize_results = kimmoinstance.recognize(recognize_string) - print recognize_results + print(recognize_results) if generate_string: generate_results = kimmoinstance.generate(generate_string) - print generate_results # remember to format + print(generate_results) # remember to format if filename_batch_test: # run a batch kimmoinstance.batch(filename_batch_test) diff --git a/nltk_contrib/misc/langid.py b/nltk_contrib/misc/langid.py index 2211a17..d71419a 100644 --- a/nltk_contrib/misc/langid.py +++ b/nltk_contrib/misc/langid.py @@ -25,7 +25,7 @@ def run(classifier, training_data, gold_data): cls = classifier.get_class(gold_data[lang]) if cls == lang: correct += 1 - print correct, "in", len(gold_data), "correct" + print(correct, "in", len(gold_data), "correct") # features: character bigrams fd = detect.feature({"char-bigrams" : lambda t: [string.join(t)[n:n+2] for n in range(len(t)-1)]}) @@ -36,11 +36,11 @@ def run(classifier, training_data, gold_data): gold_data[lang] = training_data[lang][:50] training_data[lang] = training_data[lang][100:200] -print "Cosine classifier: ", +print("Cosine classifier: ", end=' ') run(classify.Cosine(fd), training_data, gold_data) -print "Naivebayes classifier: ", +print("Naivebayes classifier: ", end=' ') run(classify.NaiveBayes(fd), training_data, gold_data) -print "Spearman classifier: ", +print("Spearman classifier: ", end=' ') run(classify.Spearman(fd), training_data, gold_data) diff --git a/nltk_contrib/misc/lex.py b/nltk_contrib/misc/lex.py index 19b14b3..4631210 100644 --- a/nltk_contrib/misc/lex.py +++ b/nltk_contrib/misc/lex.py @@ -31,7 +31,7 @@ def phon_representer(dumper, data): """ Output 'phon' values in 'stem + affix' notation. """ - return dumper.represent_scalar(u'!phon', u'%s + %s' % \ + return dumper.represent_scalar('!phon', '%s + %s' % \ (data['stem'], data['affix'])) yaml.add_representer(Phon, phon_representer) @@ -61,7 +61,7 @@ def phon_constructor(loader, node): stem, affix = [normalize(s) for s in value.split('+')] return Phon(stem, affix) -yaml.add_constructor(u'!phon', phon_constructor) +yaml.add_constructor('!phon', phon_constructor) #following causes YAML to barf for some reason: #pattern = re.compile(r'^(\?)?\w+\s*\+\s*(\?)?\w+$') diff --git a/nltk_contrib/misc/marshal.py b/nltk_contrib/misc/marshal.py index 73226e6..19d845a 100644 --- a/nltk_contrib/misc/marshal.py +++ b/nltk_contrib/misc/marshal.py @@ -56,7 +56,7 @@ def marshal (self, filename): """ handler = file(filename, "w") - for text, tag in self._model.iteritems(): + for text, tag in list(self._model.items()): handler.write("%s:%s\n" % (text, tag)) handler.close() @@ -97,7 +97,7 @@ def marshal (self, filename): handler.write("length %i\n" % self._length) handler.write("minlength %i\n" % self._minlength) - for text, tag in self._model.iteritems(): + for text, tag in list(self._model.items()): handler.write("%s:%s\n" % (text, tag)) handler.close() @@ -203,4 +203,4 @@ def demo (): #tagger.marshal("ngram.test") tagger.unmarshal("ngram.test") - print tagger._model + print((tagger._model)) diff --git a/nltk_contrib/misc/marshalbrill.py b/nltk_contrib/misc/marshalbrill.py index 740504c..fa14d49 100644 --- a/nltk_contrib/misc/marshalbrill.py +++ b/nltk_contrib/misc/marshalbrill.py @@ -187,7 +187,7 @@ def apply_to(self, tokens): rule. @rtype: C{list} of C{int} """ - return self.apply_at(tokens, range(len(tokens))) + return self.apply_at(tokens, list(range(len(tokens)))) def apply_at(self, tokens, positions): """ @@ -373,7 +373,7 @@ def __hash__(self): # Needs to include extract_property in order to distinguish subclasses # A nicer way would be welcome. return hash( (self._original, self._replacement, self._conditions, - self.extract_property.func_code) ) + self.extract_property.__code__) ) def __repr__(self): conditions = ' and '.join(['%s in %d...%d' % (v,s,e) @@ -456,7 +456,7 @@ class BrillTemplateI(object): C{Brill} training algorithms to generate candidate rules. """ def __init__(self): - raise AssertionError, "BrillTemplateI is an abstract interface" + raise AssertionError("BrillTemplateI is an abstract interface") def applicable_rules(self, tokens, i, correctTag): """ @@ -478,7 +478,7 @@ def applicable_rules(self, tokens, i, correctTag): @type correctTag: (any) @rtype: C{list} of L{BrillRuleI} """ - raise AssertionError, "BrillTemplateI is an abstract interface" + raise AssertionError("BrillTemplateI is an abstract interface") def get_neighborhood(self, token, index): """ @@ -494,7 +494,7 @@ def get_neighborhood(self, token, index): @type index: C{int} @rtype: C{Set} """ - raise AssertionError, "BrillTemplateI is an abstract interface" + raise AssertionError("BrillTemplateI is an abstract interface") class ProximateTokensTemplate(BrillTemplateI): """ @@ -671,8 +671,8 @@ def train(self, train_tokens, max_rules=200, min_score=2): @param min_score: The minimum acceptable net error reduction that each transformation must produce in the corpus. """ - if self._trace > 0: print ("Training Brill tagger on %d tokens..." % - len(train_tokens)) + if self._trace > 0: print(("Training Brill tagger on %d tokens..." % + len(train_tokens))) # Create a new copy of the training token, and run the initial # tagger on this. We will progressively update this test @@ -691,7 +691,7 @@ def train(self, train_tokens, max_rules=200, min_score=2): train_tokens) if rule is None or score < min_score: if self._trace > 1: - print 'Insufficient improvement; stopping' + print('Insufficient improvement; stopping') break else: # Add the rule to our list of rules. @@ -746,7 +746,7 @@ def _best_rule(self, test_tokens, train_tokens): # once for each tag that the rule changes to an incorrect # value. score = fixscore - if correct_indices.has_key(rule.original_tag()): + if rule.original_tag() in correct_indices: for i in correct_indices[rule.original_tag()]: if rule.applies(test_tokens, i): score -= 1 @@ -791,7 +791,7 @@ def _find_rules(self, test_tokens, train_tokens): # Convert the dictionary into a list of (rule, score) tuples, # sorted in descending order of score. - rule_score_items = rule_score_dict.items() + rule_score_items = list(rule_score_dict.items()) temp = [(-score, rule) for (rule, score) in rule_score_items] temp.sort() return [(rule, -negscore) for (negscore, rule) in temp] @@ -818,7 +818,7 @@ def _find_rules_at(self, test_tokens, train_tokens, i): #//////////////////////////////////////////////////////////// def _trace_header(self): - print """ + print(""" B | S F r O | Score = Fixed - Broken c i o t | R Fixed = num tags changed incorrect -> correct @@ -826,13 +826,13 @@ def _trace_header(self): r e e e | l Other = num tags changed incorrect -> incorrect e d n r | e ------------------+------------------------------------------------------- - """.rstrip() + """.rstrip()) def _trace_rule(self, rule, score, fixscore, numchanges): if self._trace > 2: - print ('%4d%4d%4d%4d ' % (score, fixscore, fixscore-score, - numchanges-fixscore*2+score)), '|', - print rule + print(('%4d%4d%4d%4d ' % (score, fixscore, fixscore-score, + numchanges-fixscore*2+score)), '|', end=' ') + print(rule) ###################################################################### ## Fast Brill Tagger Trainer @@ -899,7 +899,7 @@ def _updateRuleApplies (rule, i): # If the rule is already known to apply here, ignore. # (This only happens if the position's tag hasn't changed.) - if positionsByRule[rule].has_key(i): + if i in positionsByRule[rule]: return if rule.replacement_tag() == train_tokens[i][1]: @@ -912,7 +912,7 @@ def _updateRuleApplies (rule, i): # Update rules in the other dictionaries del rulesByScore[ruleScores[rule]][rule] ruleScores[rule] += positionsByRule[rule][i] - if not rulesByScore.has_key(ruleScores[rule]): + if ruleScores[rule] not in rulesByScore: rulesByScore[ruleScores[rule]] = {} rulesByScore[ruleScores[rule]][rule] = None rulesByPosition[i].add(rule) @@ -922,7 +922,7 @@ def _updateRuleApplies (rule, i): def _updateRuleNotApplies (rule, i): del rulesByScore[ruleScores[rule]][rule] ruleScores[rule] -= positionsByRule[rule][i] - if not rulesByScore.has_key(ruleScores[rule]): + if ruleScores[rule] not in rulesByScore: rulesByScore[ruleScores[rule]] = {} rulesByScore[ruleScores[rule]][rule] = None @@ -939,22 +939,22 @@ def _updateRuleNotApplies (rule, i): tag = tagged_tokens[i][1] if tag != train_tokens[i][1]: errorIndices.append(i) - if not tagIndices.has_key(tag): + if tag not in tagIndices: tagIndices[tag] = [] tagIndices[tag].append(i) - print "Finding useful rules..." + print("Finding useful rules...") # Collect all rules that fix any errors, with their positive scores. for i in errorIndices: for template in self._templates: # Find the templated rules that could fix the error. for rule in template.applicable_rules(tagged_tokens, i, train_tokens[i][1]): - if not positionsByRule.has_key(rule): + if rule not in positionsByRule: _initRule(rule) _updateRuleApplies(rule, i) - print "Done initializing %i useful rules." %len(positionsByRule) + print("Done initializing %i useful rules." %len(positionsByRule)) if TESTING: after = -1 # bug-check only @@ -973,7 +973,7 @@ def _updateRuleNotApplies (rule, i): # best rule. bestRule = None - bestRules = rulesByScore[maxScore].keys() + bestRules = list(rulesByScore[maxScore].keys()) for rule in bestRules: # Find the first relevant index at or following the first @@ -990,7 +990,7 @@ def _updateRuleNotApplies (rule, i): # If we checked all remaining indices and found no more errors: if ruleScores[rule] == maxScore: firstUnknownIndex[rule] = len(tagged_tokens) # i.e., we checked them all - print "%i) %s (score: %i)" %(len(rules)+1, rule, maxScore) + print("%i) %s (score: %i)" %(len(rules)+1, rule, maxScore)) bestRule = rule break @@ -1002,29 +1002,29 @@ def _updateRuleNotApplies (rule, i): # bug-check only if TESTING: before = len(_errorPositions(tagged_tokens, train_tokens)) - print "There are %i errors before applying this rule." %before + print("There are %i errors before applying this rule." %before) assert after == -1 or before == after, \ "after=%i but before=%i" %(after,before) - print "Applying best rule at %i locations..." \ - %len(positionsByRule[bestRule].keys()) + print("Applying best rule at %i locations..." \ + %len(list(positionsByRule[bestRule].keys()))) # If we reach this point, we've found a new best rule. # Apply the rule at the relevant sites. # (apply_at is a little inefficient here, since we know the rule applies # and don't actually need to test it again.) rules.append(bestRule) - bestRule.apply_at(tagged_tokens, positionsByRule[bestRule].keys()) + bestRule.apply_at(tagged_tokens, list(positionsByRule[bestRule].keys())) # Update the tag index accordingly. - for i in positionsByRule[bestRule].keys(): # where it applied + for i in list(positionsByRule[bestRule].keys()): # where it applied # Update positions of tags # First, find and delete the index for i from the old tag. oldIndex = bisect.bisect_left(tagIndices[bestRule.original_tag()], i) del tagIndices[bestRule.original_tag()][oldIndex] # Then, insert i into the index list of the new tag. - if not tagIndices.has_key(bestRule.replacement_tag()): + if bestRule.replacement_tag() not in tagIndices: tagIndices[bestRule.replacement_tag()] = [] newIndex = bisect.bisect_left(tagIndices[bestRule.replacement_tag()], i) tagIndices[bestRule.replacement_tag()].insert(newIndex, i) @@ -1037,11 +1037,11 @@ def _updateRuleNotApplies (rule, i): # # If a template now generates a different set of rules, we have # to update our indices to reflect that. - print "Updating neighborhoods of changed sites.\n" + print("Updating neighborhoods of changed sites.\n") # First, collect all the indices that might get new rules. neighbors = set() - for i in positionsByRule[bestRule].keys(): # sites changed + for i in list(positionsByRule[bestRule].keys()): # sites changed for template in self._templates: neighbors.update(template.get_neighborhood(tagged_tokens, i)) @@ -1062,21 +1062,21 @@ def _updateRuleNotApplies (rule, i): # Update rules only now generated by this template for newRule in siteRules - rulesByPosition[i]: d += 1 - if not positionsByRule.has_key(newRule): + if newRule not in positionsByRule: e += 1 _initRule(newRule) # make a new rule w/score=0 _updateRuleApplies(newRule, i) # increment score, etc. if TESTING: after = before - maxScore - print "%i obsolete rule applications, %i new ones, " %(c,d)+ \ - "using %i previously-unseen rules." %e + print("%i obsolete rule applications, %i new ones, " %(c,d)+ \ + "using %i previously-unseen rules." %e) maxScore = max(rulesByScore.keys()) # may have gone up - if self._trace > 0: print ("Training Brill tagger on %d tokens..." % - len(train_tokens)) + if self._trace > 0: print(("Training Brill tagger on %d tokens..." % + len(train_tokens))) # Maintain a list of the rules that apply at each position. rules_by_position = [{} for tok in train_tokens] @@ -1164,7 +1164,7 @@ def demo(num_sents=100, max_rules=200, min_score=2, error_output = "errors.out", # train is the proportion of data used in training; the rest is reserved # for testing. - print "Loading tagged data..." + print("Loading tagged data...") sents = [] for item in treebank.items: sents.extend(treebank.tagged(item)) @@ -1182,13 +1182,13 @@ def demo(num_sents=100, max_rules=200, min_score=2, error_output = "errors.out", # Unigram tagger - print "Training unigram tagger:", + print("Training unigram tagger:", end=' ') u = tag.Unigram(backoff=NN_CD_tagger) # NB training and testing are required to use a list-of-lists structure, # so we wrap the flattened corpus data with the extra list structure. u.train([training_data]) - print("[accuracy: %f]" % tag.accuracy(u, [gold_data])) + print(("[accuracy: %f]" % tag.accuracy(u, [gold_data]))) # Brill tagger @@ -1209,13 +1209,13 @@ def demo(num_sents=100, max_rules=200, min_score=2, error_output = "errors.out", trainer = brill.BrillTrainer(u, templates, trace) b = trainer.train(training_data, max_rules, min_score) - print - print("Brill accuracy: %f" % tag.accuracy(b, [gold_data])) + print() + print(("Brill accuracy: %f" % tag.accuracy(b, [gold_data]))) print("\nRules: ") printRules = file(rule_output, 'w') for rule in b.rules(): - print(str(rule)) + print((str(rule))) printRules.write(str(rule)+"\n\n") testing_data = list(b.tag(testing_data)) @@ -1225,7 +1225,7 @@ def demo(num_sents=100, max_rules=200, min_score=2, error_output = "errors.out", for e in el: errorFile.write(e+"\n\n") errorFile.close() - print "Done; rules and errors saved to %s and %s." % (rule_output, error_output) + print("Done; rules and errors saved to %s and %s." % (rule_output, error_output)) if __name__ == '__main__': demo() diff --git a/nltk_contrib/misc/paradigm.py b/nltk_contrib/misc/paradigm.py index d3fcb3f..34861dd 100644 --- a/nltk_contrib/misc/paradigm.py +++ b/nltk_contrib/misc/paradigm.py @@ -22,7 +22,7 @@ # a.setOutput('term') # output is sent to terminal from xml.dom.ext.reader import Sax2 -from paradigmquery import ParadigmQuery +from .paradigmquery import ParadigmQuery import re, os class Paradigm(object): @@ -73,9 +73,9 @@ def prompt(self): s = "" while s != "exit": s = "exit" - try: s = raw_input(">") + try: s = input(">") except EOFError: - print s + print(s) if s == "exit": return if s == "quit": @@ -93,7 +93,7 @@ def show(self, p_string): # parse the query parse = ParadigmQuery(p_string) except: - print "Could not parse query." + print("Could not parse query.") return try: @@ -103,7 +103,7 @@ def show(self, p_string): if result == None: raise Error except: - print "Sorry, no result can be returned" + print("Sorry, no result can be returned") return try: @@ -111,7 +111,7 @@ def show(self, p_string): if self.format == "html": output = '\n' # Include CSS if we need to - if self.css <> None: + if self.css != None: output += '\n' @@ -124,14 +124,14 @@ def show(self, p_string): output = result.getText() except: output = None - print "--no output--" + print("--no output--") return # Print to terminal if output is set, otherwise to file if self.output == "term": - print output + print(output) else: - print "Output written to file:", self.output + print("Output written to file:", self.output) f = open(self.output, 'w') f.write(output) @@ -151,9 +151,9 @@ def setFormat(self, p_string=None): elif p_string == "text": self.format = "text" else: - print "Unknown format:", p_string - print "Valid formats are: text, html" - print "Setting format = text" + print("Unknown format:", p_string) + print("Valid formats are: text, html") + print("Setting format = text") self.format = "text" def setCSS(self, p_string=None): @@ -161,8 +161,8 @@ def setCSS(self, p_string=None): Set the file location for a Cascading Stylesheet: None or filename This allows for simple formatting """ - if p_string <> None: - print "Using CSS file:", p_string + if p_string != None: + print("Using CSS file:", p_string) self.output = p_string def setOutput(self, p_string=None): @@ -174,9 +174,9 @@ def setOutput(self, p_string=None): p_string = "term" # set to term if requested, otherwise filename if p_string == "term": - print "Directing output to terminal" + print("Directing output to terminal") else: - print "Directing output to file:", p_string + print("Directing output to file:", p_string) self.output = p_string @@ -201,7 +201,7 @@ def loadParadigm(self, p_filename ): f = open(try_filename) p_filename = try_filename except IOError: - print "Cannot find file" + print("Cannot find file") return None f.close() @@ -241,14 +241,14 @@ def loadParadigm(self, p_filename ): self.data.append(tmp_dict) # Talk to the user - print "Paradigm information successfully loaded from file:", p_filename + print("Paradigm information successfully loaded from file:", p_filename) # State the number and print out a list of attributes - print " "*4 + str(len(self.attributes)) + " attributes imported:", + print(" "*4 + str(len(self.attributes)) + " attributes imported:", end=' ') for att in self.attributes: - print att, - print + print(att, end=' ') + print() # State the number of paradigm objects imported - print " "*4 + str(len(self.data)) + " paradigm objects imported." + print(" "*4 + str(len(self.data)) + " paradigm objects imported.") return @@ -360,7 +360,7 @@ def __init__(self, p_paradigm, p_tree): self.paradigm.attributes[self.attribute] except KeyError: self.error = "I couldn't find this attribute: " + self.attribute - print self.error + print(self.error) def __getitem__(self, p_index): return self.paradigm.attributes[self.attribute][p_index] @@ -616,10 +616,10 @@ def getHTML(self): vertical_header_rows = vertical_header.split('') cell_rows = str_cells.replace('','').split('') # Join two lists - zipped = zip(vertical_header_rows, cell_rows) + zipped = list(zip(vertical_header_rows, cell_rows)) str_zipped = "" for (header,cells) in zipped: - if header <> '': + if header != '': str_zipped += header + cells + "\n" # Return all the elements @@ -629,22 +629,22 @@ def getHorizontalHTML(self,p_parentSpan=1): """ Return a horizontal html table (?) """ - print "?: getHorizontalHTML() called on a table." + print("?: getHorizontalHTML() called on a table.") return None def getText(self): """ Return text for this table (?) """ - print "?: getText() for a table? HAHAHAHAHA" - print "call setFormat('html') if you want to run queries like that" + print("?: getText() for a table? HAHAHAHAHA") + print("call setFormat('html') if you want to run queries like that") return def getConditions(self): """ Return conditions for this table (?) """ - print "?: getConditions() called on a table. I don't think so." + print("?: getConditions() called on a table. I don't think so.") return None def getMaxWidth(self): @@ -658,7 +658,7 @@ def getSpan(self): """ Return span for this table (?) """ - print "WTF: getSpan() called on a table." + print("WTF: getSpan() called on a table.") return None def getData(self, p_return, p_attDict): @@ -676,7 +676,7 @@ def getData(self, p_return, p_attDict): for datum in self.paradigm.data: inc = True # For each given attribute requirement - for att in p_attDict.keys(): + for att in list(p_attDict.keys()): # If the data object fails the requirement do not include if datum[att] != p_attDict[att]: inc = False @@ -704,74 +704,74 @@ def dictJoin(dict1,dict2): If there is any key overlap, dict1 wins! (just make sure this doesn't happen) """ - for key in dict1.keys(): + for key in list(dict1.keys()): dict2[key] = dict1[key] return dict2 def demo(): # Print the query - print """ + print(""" ================================================================================ Load: Paradigm(file) ================================================================================ -""" - print - print ">>> a = Paradigm('german.xml')" - print +""") + print() + print(">>> a = Paradigm('german.xml')") + print() a = Paradigm('german.xml') - print - print ">>> a.setOutput('term')" - print + print() + print(">>> a.setOutput('term')") + print() a.setOutput('term') - print - print ">>> a.setFormat('text')" - print + print() + print(">>> a.setFormat('text')") + print() a.setFormat('text') # Print a domain - print """ + print(""" ================================================================================ Domain: case ================================================================================ -""" - print - print ">>> a.show('case')" - print +""") + print() + print(">>> a.show('case')") + print() a.show('case') # Print a hierarchy - print """ + print(""" ================================================================================ Hierarchy: case/gender ================================================================================ -""" - print - print ">>> a.show('case/gender')" - print +""") + print() + print(">>> a.show('case/gender')") + print() a.show('case/gender') # Print a table - print """ + print(""" ================================================================================ Table: table(case/number,gender,content) ================================================================================ -""" - print - print ">>> a.setOutput('demo.html')" - print +""") + print() + print(">>> a.setOutput('demo.html')") + print() a.setOutput('demo.html') - print - print ">>> a.setFormat('html')" - print + print() + print(">>> a.setFormat('html')") + print() a.setFormat('html') - print - print ">>> a.show('table(case/number,gender,content)')" - print + print() + print(">>> a.show('table(case/number,gender,content)')") + print() a.show('table(case/number,gender,content)') # Some space - print + print() if __name__ == '__main__': demo() diff --git a/nltk_contrib/misc/paradigmquery.py b/nltk_contrib/misc/paradigmquery.py index 240c321..cfb50c0 100644 --- a/nltk_contrib/misc/paradigmquery.py +++ b/nltk_contrib/misc/paradigmquery.py @@ -47,7 +47,7 @@ def __init__(self, p_string=None): self.xml = None # If p_string was given, parse it - if p_string <> None: + if p_string != None: self.parse(p_string) def parse(self, p_string): @@ -124,7 +124,7 @@ def parse(self, p_string): try: self.parseList = rd_parser.get_parse_list(toklist)[0] except IndexError: - print "Could not parse query." + print("Could not parse query.") return # Set the nltk.parse.tree tree for this query to the global sentence @@ -142,13 +142,13 @@ def getTree(self): Returns the results from the CFG parsing """ if self.string == None: - print "No string has been parsed. Please use parse(string)." + print("No string has been parsed. Please use parse(string).") return None return self.nltktree def getXML(self): if self.string == None: - print "No string has been parsed. Please use parse(string)." + print("No string has been parsed. Please use parse(string).") return None return '\n' + self.xml \ + "" @@ -279,16 +279,16 @@ def demo(): query = r'table(one/two/three, four, five)' # Print the query - print """ + print(""" ================================================================================ Query: ParadigmQuery(query) ================================================================================ -""" +""") a = ParadigmQuery(query) - print query + print(query) # Print the Tree representation - print """ + print(""" ================================================================================ Tree: getTree() O is an operator @@ -296,19 +296,19 @@ def demo(): H is a hierarchy D is a domain ================================================================================ -""" - print a.getTree() +""") + print((a.getTree())) # Print the XML representation - print """ + print(""" ================================================================================ XML: getXML() ================================================================================ -""" - print a.getXML() +""") + print((a.getXML())) # Some space - print + print() if __name__ == '__main__': diff --git a/nltk_contrib/mit/six863/kimmo/__init__.py b/nltk_contrib/mit/six863/kimmo/__init__.py index 357aaf3..072f792 100644 --- a/nltk_contrib/mit/six863/kimmo/__init__.py +++ b/nltk_contrib/mit/six863/kimmo/__init__.py @@ -1 +1 @@ -from kimmo import * +from .kimmo import * diff --git a/nltk_contrib/mit/six863/kimmo/draw.py b/nltk_contrib/mit/six863/kimmo/draw.py index 5156742..b788f66 100644 --- a/nltk_contrib/mit/six863/kimmo/draw.py +++ b/nltk_contrib/mit/six863/kimmo/draw.py @@ -1,10 +1,10 @@ -import Tkinter as tk -from morphology import KimmoMorphology -from fsa import FSA +import tkinter as tk +from .morphology import KimmoMorphology +from .fsa import FSA class KimmoGUI(object): def __init__(self, ruleset, startTk=False): - import Tkinter as tk + import tkinter as tk if startTk: self._root = tk.Tk() else: self._root = tk.Toplevel() @@ -131,7 +131,7 @@ def wrap_pygraph(self, rule): def highlight_states(self, states, morph): select = self.listbox.curselection() or 0 self.listbox.delete(0, tk.END) - for (index, stored) in self.widget_store.items(): + for (index, stored) in list(self.widget_store.items()): graph, widget = stored if index == -1: state = morph else: state = states[index] diff --git a/nltk_contrib/mit/six863/kimmo/featurelite.py b/nltk_contrib/mit/six863/kimmo/featurelite.py index 7f5ca9c..c29d37c 100644 --- a/nltk_contrib/mit/six863/kimmo/featurelite.py +++ b/nltk_contrib/mit/six863/kimmo/featurelite.py @@ -91,7 +91,7 @@ class _FORWARD(object): instantiated. """ def __init__(self): - raise TypeError, "The _FORWARD class is not meant to be instantiated" + raise TypeError("The _FORWARD class is not meant to be instantiated") class Variable(object): """ @@ -237,7 +237,7 @@ def show(data): def variable_representer(dumper, var): "Output variables in YAML as ?name." - return dumper.represent_scalar(u'!var', u'?%s' % var.name()) + return dumper.represent_scalar('!var', '?%s' % var.name()) yaml.add_representer(Variable, variable_representer) def variable_constructor(loader, node): @@ -245,8 +245,8 @@ def variable_constructor(loader, node): value = loader.construct_scalar(node) name = value[1:] return Variable(name) -yaml.add_constructor(u'!var', variable_constructor) -yaml.add_implicit_resolver(u'!var', re.compile(r'^\?\w+$')) +yaml.add_constructor('!var', variable_constructor) +yaml.add_implicit_resolver('!var', re.compile(r'^\?\w+$')) def _copy_and_bind(feature, bindings, memo=None): """ @@ -258,14 +258,14 @@ def _copy_and_bind(feature, bindings, memo=None): if memo is None: memo = {} if id(feature) in memo: return memo[id(feature)] if isinstance(feature, Variable) and bindings is not None: - if not bindings.has_key(feature.name()): + if feature.name() not in bindings: bindings[feature.name()] = feature.copy() result = _copy_and_bind(bindings[feature.name()], None, memo) else: if isMapping(feature): # Construct a new object of the same class result = feature.__class__() - for (key, value) in feature.items(): + for (key, value) in list(feature.items()): result[key] = _copy_and_bind(value, bindings, memo) else: result = feature memo[id(feature)] = result @@ -576,7 +576,7 @@ def failerror(f1, f2): copy2 = _copy_and_bind(feature2, bindings2, copymemo) # Preserve links between bound variables and the two feature structures. for b in (bindings1, bindings2): - for (vname, value) in b.items(): + for (vname, value) in list(b.items()): value_id = id(value) if value_id in copymemo: b[vname] = copymemo[value_id] @@ -602,7 +602,7 @@ def _destructively_unify(feature1, feature2, bindings1, bindings2, memo, fail): UnificationFailure is raised, and the values of C{self} and C{other} are undefined. """ - if memo.has_key((id(feature1), id(feature2))): + if (id(feature1), id(feature2)) in memo: return memo[id(feature1), id(feature2)] unified = _do_unify(feature1, feature2, bindings1, bindings2, memo, fail) memo[id(feature1), id(feature2)] = unified @@ -643,9 +643,9 @@ def _do_unify(feature1, feature2, bindings1, bindings2, memo, fail): # At this point, we know they're both mappings. # Do the destructive part of unification. - while feature2.has_key(_FORWARD): feature2 = feature2[_FORWARD] + while _FORWARD in feature2: feature2 = feature2[_FORWARD] feature2[_FORWARD] = feature1 - for (fname, val2) in feature2.items(): + for (fname, val2) in list(feature2.items()): if fname == _FORWARD: continue val1 = feature1.get(fname) feature1[fname] = _destructively_unify(val1, val2, bindings1, @@ -658,12 +658,12 @@ def _apply_forwards(feature, visited): the target of its forward pointer (to preserve reentrance). """ if not isMapping(feature): return - if visited.has_key(id(feature)): return + if id(feature) in visited: return visited[id(feature)] = True - for fname, fval in feature.items(): + for fname, fval in list(feature.items()): if isMapping(fval): - while fval.has_key(_FORWARD): + while _FORWARD in fval: fval = fval[_FORWARD] feature[fname] = fval _apply_forwards(fval, visited) @@ -695,10 +695,10 @@ def _lookup_values(mapping, visited, remove=False): else: return var.forwarded_self() if not isMapping(mapping): return mapping - if visited.has_key(id(mapping)): return mapping + if id(mapping) in visited: return mapping visited[id(mapping)] = True - for fname, fval in mapping.items(): + for fname, fval in list(mapping.items()): if isMapping(fval): _lookup_values(fval, visited) elif isinstance(fval, Variable): @@ -719,9 +719,9 @@ def _apply_forwards_to_bindings(bindings): Replace any feature structures that have been forwarded by their new identities. """ - for (key, value) in bindings.items(): - if isMapping(value) and value.has_key(_FORWARD): - while value.has_key(_FORWARD): + for (key, value) in list(bindings.items()): + if isMapping(value) and _FORWARD in value: + while _FORWARD in value: value = value[_FORWARD] bindings[key] = value diff --git a/nltk_contrib/mit/six863/kimmo/fsa.py b/nltk_contrib/mit/six863/kimmo/fsa.py index 8a01e1b..f928c95 100644 --- a/nltk_contrib/mit/six863/kimmo/fsa.py +++ b/nltk_contrib/mit/six863/kimmo/fsa.py @@ -63,8 +63,8 @@ def generate_transitions(self): A generator that yields each transition arrow in the FSA in the form (source, label, target). """ - for (state, map) in self._transitions.items(): - for (symbol, targets) in map.items(): + for (state, map) in list(self._transitions.items()): + for (symbol, targets) in list(map.items()): for target in targets: yield (state, symbol, target) @@ -73,7 +73,7 @@ def labels(self, s1, s2): A generator for all possible labels taking state s1 to state s2. """ map = self._transitions.get(s1, {}) - for (symbol, targets) in map.items(): + for (symbol, targets) in list(map.items()): if s2 in targets: yield symbol def sigma(self): @@ -134,7 +134,7 @@ def states(self): @returns: a list of all states in the FSA. @rtype: list """ - return self._transitions.keys() + return list(self._transitions.keys()) def add_final(self, state): """ @@ -184,11 +184,11 @@ def insert(self, s1, label, s2): @param s2: the destination of the transition """ if s1 not in self.states(): - raise ValueError, "State %s does not exist in %s" % (s1, - self.states()) + raise ValueError("State %s does not exist in %s" % (s1, + self.states())) if s2 not in self.states(): - raise ValueError, "State %s does not exist in %s" % (s2, - self.states()) + raise ValueError("State %s does not exist in %s" % (s2, + self.states())) self._add_transition(self._transitions, s1, label, s2) self._add_transition(self._reverse, s2, label, s1) @@ -212,16 +212,16 @@ def delete(self, s1, label, s2): @param s2: the destination of the transition """ if s1 not in self.states(): - raise ValueError, "State %s does not exist" % s1 + raise ValueError("State %s does not exist" % s1) if s2 not in self.states(): - raise ValueError, "State %s does not exist" % s1 + raise ValueError("State %s does not exist" % s1) self._del_transition(self._transitions, s1, label, s2) self._del_transition(self._reverse, s2, label, s1) def delete_state(self, state): "Removes a state and all its transitions from the FSA." if state not in self.states(): - raise ValueError, "State %s does not exist" % state + raise ValueError("State %s does not exist" % state) for (s1, label, s2) in self.incident_transitions(state): self.delete(s1, label, s2) del self._transitions[state] @@ -235,10 +235,10 @@ def incident_transitions(self, state): result = set() forward = self._transitions[state] backward = self._reverse[state] - for label, targets in forward.items(): + for label, targets in list(forward.items()): for target in targets: result.add((state, label, target)) - for label, targets in backward.items(): + for label, targets in list(backward.items()): for target in targets: result.add((target, label, state)) return result @@ -248,9 +248,9 @@ def relabel_state(self, old, new): Assigns a state a new identifier. """ if old not in self.states(): - raise ValueError, "State %s does not exist" % old + raise ValueError("State %s does not exist" % old) if new in self.states(): - raise ValueError, "State %s already exists" % new + raise ValueError("State %s already exists" % new) changes = [] for (s1, symbol, s2) in self.generate_transitions(): if s1 == old and s2 == old: @@ -261,7 +261,7 @@ def relabel_state(self, old, new): changes.append((s1, symbol, s2, s1, symbol, new)) for (leftstate, symbol, rightstate, newleft, newsym, newright)\ in changes: - print leftstate, symbol, rightstate, newleft, newsym, newright + print((leftstate, symbol, rightstate, newleft, newsym, newright)) self.delete(leftstate, symbol, rightstate) self.insert_safe(newleft, newsym, newright) del self._transitions[old] @@ -284,8 +284,8 @@ def is_deterministic(self): Return whether this is a DFA (every symbol leads from a state to at most one target state). """ - for map in self._transitions.values(): - for targets in map.values(): + for map in list(self._transitions.values()): + for targets in list(map.values()): if len(targets) > 1: return False return True @@ -297,14 +297,14 @@ def nextState(self, state, symbol): """ next = self.next(state, symbol) if len(next) > 1: - raise ValueError, "This FSA is nondeterministic -- use nextStates instead." + raise ValueError("This FSA is nondeterministic -- use nextStates instead.") elif len(next) == 1: return list(next)[0] else: return None def forward_traverse(self, state): "All states reachable by following transitions from a given state." result = set() - for (symbol, targets) in self._transitions[state].items(): + for (symbol, targets) in list(self._transitions[state].items()): result = result.union(targets) return result @@ -312,7 +312,7 @@ def reverse_traverse(self, state): """All states from which a given state is reachable by following transitions.""" result = set() - for (symbol, targets) in self._reverse[state].items(): + for (symbol, targets) in list(self._reverse[state].items()): result = result.union(targets) return result @@ -344,7 +344,7 @@ def prune(self): self._clean_map(self._reverse[state]) def _clean_map(self, map): - for (key, value) in map.items(): + for (key, value) in list(map.items()): if len(value) == 0: del map[key] @@ -406,7 +406,7 @@ def dfa(self): for label in self.sigma(): nfa_next = tuple(self.e_closure(self.move(map[dfa_state], label))) - if map.has_key(nfa_next): + if nfa_next in map: dfa_next = map[nfa_next] else: dfa_next = dfa.new_state() @@ -422,7 +422,7 @@ def generate(self, maxlen, state=0, prefix=""): "Generate all accepting sequences of length at most maxlen." if maxlen > 0: if state in self._finals: - print prefix + print(prefix) for (s1, labels, s2) in self.outgoing_transitions(state): for label in labels(): self.generate(maxlen-1, s2, prefix+label) @@ -431,14 +431,14 @@ def pp(self): """ Print a representation of this FSA (in human-readable YAML format). """ - print yaml.dump(self) + print((yaml.dump(self))) @classmethod def from_yaml(cls, loader, node): map = loader.construct_mapping(node) result = cls(map.get('sigma', []), {}, map.get('finals', [])) - for (s1, map1) in map['transitions'].items(): - for (symbol, targets) in map1.items(): + for (s1, map1) in list(map['transitions'].items()): + for (symbol, targets) in list(map1.items()): for s2 in targets: result.insert(s1, symbol, s2) return result @@ -590,19 +590,19 @@ def demo(): # Use a regular expression to initialize the FSA. re = 'abcd' - print 'Regular Expression:', re + print(('Regular Expression:', re)) re2nfa(fsa, re) - print "NFA:" + print("NFA:") fsa.pp() # Convert the (nondeterministic) FSA to a deterministic FSA. dfa = fsa.dfa() - print "DFA:" + print("DFA:") dfa.pp() # Prune the DFA dfa.prune() - print "PRUNED DFA:" + print("PRUNED DFA:") dfa.pp() # Use the FSA to generate all strings of length less than 3 diff --git a/nltk_contrib/mit/six863/kimmo/kimmo.py b/nltk_contrib/mit/six863/kimmo/kimmo.py index 648ba39..d5a5917 100644 --- a/nltk_contrib/mit/six863/kimmo/kimmo.py +++ b/nltk_contrib/mit/six863/kimmo/kimmo.py @@ -2,15 +2,15 @@ # by Rob Speer (rspeer@mit.edu) # based on code from Carl de Marcken, Beracah Yankama, and Rob Speer -from rules import KimmoArrowRule, KimmoFSARule -from pairs import KimmoPair, sort_subsets -from morphology import * -from fsa import FSA +from .rules import KimmoArrowRule, KimmoFSARule +from .pairs import KimmoPair, sort_subsets +from .morphology import * +from .fsa import FSA import yaml def _pairify(state): newstate = {} - for label, targets in state.items(): + for label, targets in list(state.items()): newstate[KimmoPair.make(label)] = targets return newstate @@ -191,7 +191,7 @@ def recognize(self, surface, log=None): def _advance_rule(self, rule, state, pair): trans = rule.fsa()._transitions[state] - expected_pairs = sort_subsets(trans.keys(), self._subsets) + expected_pairs = sort_subsets(list(trans.keys()), self._subsets) for comppair in expected_pairs: if comppair.includes(pair, self._subsets): return rule.fsa().nextState(state, comppair) @@ -200,16 +200,16 @@ def _advance_rule(self, rule, state, pair): def _test_case(self, input, outputs, arrow, method): outputs.sort() if arrow == '<=': - print '%s %s %s' % (', '.join(outputs), arrow, input) + print('%s %s %s' % (', '.join(outputs), arrow, input)) else: - print '%s %s %s' % (input, arrow, ', '.join(outputs)) + print('%s %s %s' % (input, arrow, ', '.join(outputs))) value = method(input) if len(value) and isinstance(value[0], tuple): results = [v[0] for v in value] else: results = value results.sort() if outputs != results: - print ' Failed: got %s' % (', '.join(results) or 'no results') + print(' Failed: got %s' % (', '.join(results) or 'no results')) return False else: return True @@ -244,7 +244,7 @@ def batch_test(self, filename): arrow = arrow_to_try break if arrow is None: - raise ValueError, "Can't find arrow in line: %s" % line + raise ValueError("Can't find arrow in line: %s" % line) lexicals = lexicals.strip().split(', ') surfaces = surfaces.strip().split(', ') if lexicals == ['']: lexicals = [] @@ -348,28 +348,28 @@ def _from_yaml_dict(cls, map): if lexicon: lexicon = KimmoMorphology.load(lexicon) subsets = map['subsets'] - for key, value in subsets.items(): - if isinstance(value, basestring): + for key, value in list(subsets.items()): + if isinstance(value, str): subsets[key] = value.split() defaults = map['defaults'] - if isinstance(defaults, basestring): + if isinstance(defaults, str): defaults = defaults.split() defaults = [KimmoPair.make(text) for text in defaults] ruledic = map['rules'] rules = [] - for (name, rule) in ruledic.items(): + for (name, rule) in list(ruledic.items()): if isinstance(rule, dict): rules.append(KimmoFSARule.from_dfa_dict(name, rule, subsets)) - elif isinstance(rule, basestring): + elif isinstance(rule, str): if rule.strip().startswith('FSA'): rules.append(KimmoFSARule.parse_table(name, rule, subsets)) else: rules.append(KimmoArrowRule(name, rule, subsets)) else: - raise ValueError, "Can't recognize the data structure in '%s' as a rule: %s" % (name, rule) + raise ValueError("Can't recognize the data structure in '%s' as a rule: %s" % (name, rule)) return cls(subsets, defaults, rules, lexicon) def gui(self, startTk=True): - import draw + from . import draw return draw.KimmoGUI(self, startTk) draw_graphs = gui @@ -392,50 +392,50 @@ def step(self, pairs, curr, rules, prev_states, states, surface = ''.join(p.output() for p in pairs) indent = ' '*len(lexical) if self.verbosity > 2: - print '%s%s<%s>' % (indent, lexical, curr.input()) - print '%s%s<%s>' % (indent, surface, curr.output()) + print('%s%s<%s>' % (indent, lexical, curr.input())) + print('%s%s<%s>' % (indent, surface, curr.output())) for rule, state1, state2 in zip(rules, prev_states, states): - print '%s%s: %s => %s' % (indent, rule.name(), state1, state2) + print('%s%s: %s => %s' % (indent, rule.name(), state1, state2)) if morphology_state: - print '%sMorphology: %r => %s' % (indent, word, morphology_state) - print + print('%sMorphology: %r => %s' % (indent, word, morphology_state)) + print() elif self.verbosity > 1: - print '%s%s<%s>' % (indent, lexical, curr.input()) - print '%s%s<%s>' % (indent, surface, curr.output()) - z = zip(prev_states, states) + print('%s%s<%s>' % (indent, lexical, curr.input())) + print('%s%s<%s>' % (indent, surface, curr.output())) + z = list(zip(prev_states, states)) if morphology_state: z.append((word, morphology_state)) - print indent + (" ".join('%s>%s' % (old, new) for old, new in z)) + print(indent + (" ".join('%s>%s' % (old, new) for old, new in z))) blocked = [] for rule, state in zip(rules, states): if str(state).lower() in ['0', 'reject']: blocked.append(rule.name()) if blocked: - print '%s[blocked by %s]' % (indent, ", ".join(blocked)) - print + print('%s[blocked by %s]' % (indent, ", ".join(blocked))) + print() else: - print '%s%s<%s> | %s<%s>' % (indent, lexical, curr.input(), - surface, curr.output()), + print('%s%s<%s> | %s<%s>' % (indent, lexical, curr.input(), + surface, curr.output()), end=' ') if morphology_state: - print '\t%r => %s' % (word, morphology_state), + print('\t%r => %s' % (word, morphology_state), end=' ') blocked = [] for rule, state in zip(rules, states): if str(state).lower() in ['0', 'reject']: blocked.append(rule.name()) if blocked: - print ' [blocked by %s]' % (", ".join(blocked)), - print + print(' [blocked by %s]' % (", ".join(blocked)), end=' ') + print() def succeed(self, pairs): lexical = ''.join(p.input() for p in pairs) surface = ''.join(p.output() for p in pairs) indent = ' '*len(lexical) - print '%s%s' % (indent, lexical) - print '%s%s' % (indent, surface) - print '%sSUCCESS: %s <=> %s' % (indent, lexical, surface) - print - print + print('%s%s' % (indent, lexical)) + print('%s%s' % (indent, surface)) + print('%sSUCCESS: %s <=> %s' % (indent, lexical, surface)) + print() + print() def load(filename): """ diff --git a/nltk_contrib/mit/six863/kimmo/kimmotest.py b/nltk_contrib/mit/six863/kimmo/kimmotest.py index 0b0e33d..333ec66 100644 --- a/nltk_contrib/mit/six863/kimmo/kimmotest.py +++ b/nltk_contrib/mit/six863/kimmo/kimmotest.py @@ -1,4 +1,4 @@ -from kimmo import * +from .kimmo import * k = KimmoRuleSet.load('english.yaml') -print list(k.generate('`slip+ed', TextTrace(3))) -print list(k.recognize('slipped', TextTrace(1))) +print((list(k.generate('`slip+ed', TextTrace(3))))) +print((list(k.recognize('slipped', TextTrace(1))))) diff --git a/nltk_contrib/mit/six863/kimmo/morphology.py b/nltk_contrib/mit/six863/kimmo/morphology.py index 2e9283c..3ceac80 100644 --- a/nltk_contrib/mit/six863/kimmo/morphology.py +++ b/nltk_contrib/mit/six863/kimmo/morphology.py @@ -1,6 +1,6 @@ -from fsa import FSA +from .fsa import FSA import yaml -from featurelite import unify +from .featurelite import unify def startswith(stra, strb): return stra[:len(strb)] == strb @@ -44,14 +44,14 @@ def __init__(self, fsa): def fsa(self): return self._fsa def valid_lexical(self, state, word, alphabet): trans = self.fsa()._transitions[state] - for label in trans.keys(): + for label in list(trans.keys()): if label is not None and startswith(label[0], word) and len(label[0]) > len(word): next = label[0][len(word):] for pair in alphabet: if startswith(next, pair.input()): yield pair.input() def next_states(self, state, word): choices = self.fsa()._transitions[state] - for (key, value) in choices.items(): + for (key, value) in list(choices.items()): if key is None: if word == '': for next in value: yield (next, None) @@ -102,11 +102,11 @@ def from_text(text): word = '' fsa.insert_safe(state, (word, features), next) else: - print "Ignoring line in morphology: %r" % line + print(("Ignoring line in morphology: %r" % line)) return KimmoMorphology(fsa) def demo(): - print KimmoMorphology.load('english.lex') + print((KimmoMorphology.load('english.lex'))) if __name__ == '__main__': demo() diff --git a/nltk_contrib/mit/six863/kimmo/pairs.py b/nltk_contrib/mit/six863/kimmo/pairs.py index eb09a4b..da7ffb3 100644 --- a/nltk_contrib/mit/six863/kimmo/pairs.py +++ b/nltk_contrib/mit/six863/kimmo/pairs.py @@ -66,5 +66,5 @@ def make(text): parts = text.split(':') if len(parts) == 1: return KimmoPair(text, text) elif len(parts) == 2: return KimmoPair(parts[0], parts[1]) - else: raise ValueError, "Bad format for pair: %s" % text + else: raise ValueError("Bad format for pair: %s" % text) diff --git a/nltk_contrib/mit/six863/kimmo/rules.py b/nltk_contrib/mit/six863/kimmo/rules.py index e56d9ec..1947a9c 100644 --- a/nltk_contrib/mit/six863/kimmo/rules.py +++ b/nltk_contrib/mit/six863/kimmo/rules.py @@ -1,7 +1,7 @@ from nltk.parse import Tree -from fsa import FSA +from .fsa import FSA from nltk import tokenize -from pairs import KimmoPair, sort_subsets +from .pairs import KimmoPair, sort_subsets from copy import deepcopy import re, yaml @@ -65,13 +65,11 @@ def complete_fsa(self, fsa, fail_state=None): def parse_table(name, table, subsets): lines = table.split('\n') if len(lines) < 4: - raise ValueError,\ - "Rule %s has too few lines to be an FSA table." % name + raise ValueError("Rule %s has too few lines to be an FSA table." % name) pairs1 = lines[1].strip().split() pairs2 = lines[2].strip().split() if len(pairs1) != len(pairs2): - raise ValueError,\ - "Rule %s has pair definitions that don't line up." % name + raise ValueError("Rule %s has pair definitions that don't line up." % name) pairs = [KimmoPair(p1, p2) for p1, p2 in zip(pairs1, pairs2)] finals = [] fsa = FSA() @@ -80,18 +78,16 @@ def parse_table(name, table, subsets): if not line: continue groups = re.match(r'(\w+)(\.|:)\s*(.*)', line) if groups is None: - raise ValueError,\ - "Can't parse this line of the state table for rule %s:\n%s"\ - % (name, line) + raise ValueError("Can't parse this line of the state table for rule %s:\n%s"\ + % (name, line)) state, char, morestates = groups.groups() if fsa.start() == 0: fsa.set_start(state) if char == ':': finals.append(state) fsa.add_state(state) morestates = morestates.split() if len(morestates) != len(pairs): - raise ValueError,\ - "Rule %s has a row of the wrong length:\n%s\ngot %d items, should be %d"\ - % (name, line, len(morestates), len(pairs)) + raise ValueError("Rule %s has a row of the wrong length:\n%s\ngot %d items, should be %d"\ + % (name, line, len(morestates), len(pairs))) for pair, nextstate in zip(pairs, morestates): fsa.insert_safe(state, pair, nextstate) fsa.set_final(finals) @@ -101,11 +97,11 @@ def parse_table(name, table, subsets): def from_dfa_dict(name, states, subsets): fsa = FSA() pairs = set([KimmoPair.make('@')]) - for (statename, trans) in states.items(): + for (statename, trans) in list(states.items()): for label in trans: if label != 'others': pairs.add(KimmoPair.make(label)) - for (statename, trans) in states.items(): + for (statename, trans) in list(states.items()): parts = statename.split() source = parts[-1] if not parts[0].startswith('rej'): @@ -120,7 +116,7 @@ def from_dfa_dict(name, states, subsets): for label in trans: if label != 'others': used_pairs.add(KimmoPair.make(label)) - for label, target in trans.items(): + for label, target in list(trans.items()): if label.lower() == 'others': fsa.insert_safe(source, KimmoPair.make('@'), target) for pair in pairs.difference(used_pairs): @@ -366,11 +362,11 @@ def left_arrow(self): def demo(): rule = KimmoArrowRule("elision-e", "e:0 <== CN u _ +:@ VO", {'@': 'aeiouhklmnpw', 'VO': 'aeiou', 'CN': 'hklmnpw'}) - print rule - print rule._left_fsa - print rule._right_fsa - print - print rule._fsa + print(rule) + print((rule._left_fsa)) + print((rule._right_fsa)) + print() + print((rule._fsa)) if __name__ == '__main__': demo() diff --git a/nltk_contrib/mit/six863/parse/__init__.py b/nltk_contrib/mit/six863/parse/__init__.py index dcd1202..875dd56 100644 --- a/nltk_contrib/mit/six863/parse/__init__.py +++ b/nltk_contrib/mit/six863/parse/__init__.py @@ -131,7 +131,7 @@ def __init__(self): """ # Make sure we're not directly instantiated: if self.__class__ == AbstractParse: - raise AssertionError, "Abstract classes can't be instantiated" + raise AssertionError("Abstract classes can't be instantiated") def parse(self, sentence): return self.get_parse_list(sentence.split()) @@ -155,11 +155,11 @@ def batch_test(self, filename): line = line.strip() if not line: continue if line.startswith('#'): - print line + print(line) continue - print "Sentence:", line + print(("Sentence:", line)) parses = self.parse(line) - print "%d parses." % len(parses) - for tree in parses: print tree + print(("%d parses." % len(parses))) + for tree in parses: print(tree) from nltk.parse import * diff --git a/nltk_contrib/mit/six863/parse/category.py b/nltk_contrib/mit/six863/parse/category.py index cffda16..50a3afd 100644 --- a/nltk_contrib/mit/six863/parse/category.py +++ b/nltk_contrib/mit/six863/parse/category.py @@ -11,10 +11,10 @@ # $Id: category.py 4162 2007-03-01 00:46:05Z stevenbird $ from nltk.semantics import logic -from cfg import * +from .cfg import * from kimmo import kimmo -from featurelite import * +from .featurelite import * from copy import deepcopy import yaml # import nltk.yamltags @@ -130,16 +130,16 @@ def __setitem__(self, key, value): self._features[key] = value def items(self): - return self._features.items() + return list(self._features.items()) def keys(self): - return self._features.keys() + return list(self._features.keys()) def values(self): - return self._features.values() + return list(self._features.values()) def has_key(self, key): - return self._features.has_key(key) + return key in self._features def symbol(self): """ @@ -168,7 +168,7 @@ def feature_names(self): """ @return: a list of all features that have values. """ - return self._features.keys() + return list(self._features.keys()) has_feature = has_key @@ -183,7 +183,7 @@ def remove_unbound_vars(self): @staticmethod def _remove_unbound_vars(obj): - for (key, value) in obj.items(): + for (key, value) in list(obj.items()): if isinstance(value, Variable): del obj[key] elif isinstance(value, (Category, dict)): @@ -210,7 +210,7 @@ def __str__(self): def _str(cls, obj, reentrances, reentrance_ids): segments = [] - keys = obj.keys() + keys = list(obj.keys()) keys.sort() for fname in keys: if fname == cls.headname: continue @@ -391,14 +391,14 @@ def _parseval(cls, s, position, reentrances): # Semantic value of the form '; return an ApplicationExpression match = _PARSE_RE['application'].match(s, position) if match is not None: - fun = ParserSubstitute(match.group(2)).next() - arg = ParserSubstitute(match.group(3)).next() + fun = next(ParserSubstitute(match.group(2))) + arg = next(ParserSubstitute(match.group(3))) return ApplicationExpressionSubst(fun, arg), match.end() # other semantic value enclosed by '< >'; return value given by the lambda expr parser match = _PARSE_RE['semantics'].match(s, position) if match is not None: - return ParserSubstitute(match.group(1)).next(), match.end() + return next(ParserSubstitute(match.group(1))), match.end() # String value if s[position] in "'\"": @@ -457,11 +457,11 @@ def parse_rules(cls, s): try: lhs, position = cls.inner_parse(s, position) lhs = cls(lhs) - except ValueError, e: + except ValueError as e: estr = ('Error parsing field structure\n\n\t' + s + '\n\t' + ' '*e.args[1] + '^ ' + 'Expected %s\n' % e.args[0]) - raise ValueError, estr + raise ValueError(estr) lhs.freeze() match = _PARSE_RE['arrow'].match(s, position) @@ -475,11 +475,11 @@ def parse_rules(cls, s): try: val, position = cls.inner_parse(s, position, {}) if isinstance(val, dict): val = cls(val) - except ValueError, e: + except ValueError as e: estr = ('Error parsing field structure\n\n\t' + s + '\n\t' + ' '*e.args[1] + '^ ' + 'Expected %s\n' % e.args[0]) - raise ValueError, estr + raise ValueError(estr) if isinstance(val, Category): val.freeze() rhs.append(val) position = _PARSE_RE['whitespace'].match(s, position).end() @@ -521,7 +521,7 @@ class GrammarCategory(Category): def _str(cls, obj, reentrances, reentrance_ids): segments = [] - keys = obj.keys() + keys = list(obj.keys()) keys.sort() for fname in keys: if fname == cls.headname: continue @@ -576,9 +576,9 @@ def inner_parse(cls, s, position, reentrances=None): if slash_match is not None: position = slash_match.end() slash, position = GrammarCategory._parseval(s, position, reentrances) - if isinstance(slash, basestring): slash = {'pos': slash} + if isinstance(slash, str): slash = {'pos': slash} body['/'] = unify(body.get('/'), slash) - elif not body.has_key('/'): + elif '/' not in body: body['/'] = False return cls(body), position @@ -652,7 +652,7 @@ def lookup(word): return lookup def earley_parser(self, trace=1): - from featurechart import FeatureEarleyChartParse + from .featurechart import FeatureEarleyChartParse if self.kimmo is None: lexicon = self.earley_lexicon() else: lexicon = self.kimmo_lexicon() @@ -706,28 +706,28 @@ def read_file(filename): yaml.add_representer(GrammarCategory, GrammarCategory.to_yaml) def demo(): - print "Category(pos='n', agr=dict(number='pl', gender='f')):" - print - print Category(pos='n', agr=dict(number='pl', gender='f')) - print repr(Category(pos='n', agr=dict(number='pl', gender='f'))) - print - print "GrammarCategory.parse('NP/NP'):" - print - print GrammarCategory.parse('NP/NP') - print repr(GrammarCategory.parse('NP/NP')) - print - print "GrammarCategory.parse('?x/?x'):" - print - print GrammarCategory.parse('?x/?x') - print repr(GrammarCategory.parse('?x/?x')) - print - print "GrammarCategory.parse('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]'):" - print - print GrammarCategory.parse('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]') - print repr(GrammarCategory.parse('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]')) - print + print("Category(pos='n', agr=dict(number='pl', gender='f')):") + print() + print((Category(pos='n', agr=dict(number='pl', gender='f')))) + print((repr(Category(pos='n', agr=dict(number='pl', gender='f'))))) + print() + print("GrammarCategory.parse('NP/NP'):") + print() + print((GrammarCategory.parse('NP/NP'))) + print((repr(GrammarCategory.parse('NP/NP')))) + print() + print("GrammarCategory.parse('?x/?x'):") + print() + print((GrammarCategory.parse('?x/?x'))) + print((repr(GrammarCategory.parse('?x/?x')))) + print() + print("GrammarCategory.parse('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]'):") + print() + print((GrammarCategory.parse('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]'))) + print((repr(GrammarCategory.parse('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]')))) + print() g = GrammarFile.read_file("speer.cfg") - print g.grammar() + print((g.grammar())) if __name__ == '__main__': demo() diff --git a/nltk_contrib/mit/six863/parse/cfg.py b/nltk_contrib/mit/six863/parse/cfg.py index ad27ac8..725d62b 100644 --- a/nltk_contrib/mit/six863/parse/cfg.py +++ b/nltk_contrib/mit/six863/parse/cfg.py @@ -226,8 +226,8 @@ def __init__(self, lhs, rhs): @param rhs: The right-hand side of the new C{Production}. @type rhs: sequence of (C{Nonterminal} and (terminal)) """ - if isinstance(rhs, (str, unicode)): - raise TypeError, 'production right hand side should be a list, not a string' + if isinstance(rhs, str): + raise TypeError('production right hand side should be a list, not a string') self._lhs = lhs self._rhs = tuple(rhs) self._hash = hash((self._lhs, self._rhs)) @@ -385,7 +385,7 @@ def parse_production(s): """ # Use _PARSE_RE to check that it's valid. if not _PARSE_RE.match(s): - raise ValueError, 'Bad production string' + raise ValueError('Bad production string') # Use _SPLIT_RE to process it. pieces = _SPLIT_RE.split(s) pieces = [p for i,p in enumerate(pieces) if i%2==1] @@ -407,9 +407,9 @@ def parse_grammar(s): if line.startswith('#') or line=='': continue try: productions += parse_production(line) except ValueError: - raise ValueError, 'Unable to parse line %s' % linenum + raise ValueError('Unable to parse line %s' % linenum) if len(productions) == 0: - raise ValueError, 'No productions found!' + raise ValueError('No productions found!') start = productions[0].lhs() return Grammar(start, productions) @@ -429,11 +429,11 @@ def demo(): N, V, P, Det = cfg.nonterminals('N, V, P, Det') VP_slash_NP = VP/NP - print 'Some nonterminals:', [S, NP, VP, PP, N, V, P, Det, VP/NP] - print ' S.symbol() =>', `S.symbol()` - print + print('Some nonterminals:', [S, NP, VP, PP, N, V, P, Det, VP/NP]) + print(' S.symbol() =>', repr(S.symbol())) + print() - print cfg.Production(S, [NP]) + print(cfg.Production(S, [NP])) # Create some Grammar Productions grammar = cfg.parse_grammar(""" @@ -453,11 +453,11 @@ def demo(): P -> 'in' """) - print 'A Grammar:', `grammar` - print ' grammar.start() =>', `grammar.start()` - print ' grammar.productions() =>', + print('A Grammar:', repr(grammar)) + print(' grammar.start() =>', repr(grammar.start())) + print(' grammar.productions() =>', end=' ') # Use string.replace(...) is to line-wrap the output. - print `grammar.productions()`.replace(',', ',\n'+' '*25) - print + print(repr(grammar.productions()).replace(',', ',\n'+' '*25)) + print() if __name__ == '__main__': demo() diff --git a/nltk_contrib/mit/six863/parse/chart.py b/nltk_contrib/mit/six863/parse/chart.py index 009cbe1..6e0fcd9 100644 --- a/nltk_contrib/mit/six863/parse/chart.py +++ b/nltk_contrib/mit/six863/parse/chart.py @@ -9,8 +9,8 @@ # # $Id: chart.py 4157 2007-02-28 09:56:25Z stevenbird $ -from __init__ import * -from tree import Tree +from .__init__ import * +from .tree import Tree from nltk import cfg """ @@ -163,7 +163,7 @@ def dot(self): """ raise AssertionError('EdgeI is an abstract interface') - def next(self): + def __next__(self): """ @return: The element of this edge's right-hand side that immediately follows its dot. @@ -272,7 +272,7 @@ def rhs(self): return self._rhs def dot(self): return self._dot def is_complete(self): return self._dot == len(self._rhs) def is_incomplete(self): return self._dot != len(self._rhs) - def next(self): + def __next__(self): if self._dot >= len(self._rhs): return None else: return self._rhs[self._dot] @@ -335,7 +335,7 @@ def rhs(self): return () def dot(self): return 0 def is_complete(self): return True def is_incomplete(self): return False - def next(self): return None + def __next__(self): return None # Comparisons & hashing def __cmp__(self, other): @@ -488,12 +488,12 @@ def select(self, **restrictions): if restrictions=={}: return iter(self._edges) # Find the index corresponding to the given restrictions. - restr_keys = restrictions.keys() + restr_keys = list(restrictions.keys()) restr_keys.sort() restr_keys = tuple(restr_keys) # If it doesn't exist, then create it. - if not self._indexes.has_key(restr_keys): + if restr_keys not in self._indexes: self._add_index(restr_keys) vals = [restrictions[k] for k in restr_keys] @@ -507,7 +507,7 @@ def _add_index(self, restr_keys): # Make sure it's a valid index. for k in restr_keys: if not hasattr(EdgeI, k): - raise ValueError, 'Bad restriction: %s' % k + raise ValueError('Bad restriction: %s' % k) # Create the index. self._indexes[restr_keys] = {} @@ -539,12 +539,12 @@ def insert(self, edge, child_pointer_list): C{child_pointer_list} with C{edge}. """ # Is it a new edge? - if not self._edge_to_cpls.has_key(edge): + if edge not in self._edge_to_cpls: # Add it to the list of edges. self._edges.append(edge) # Register with indexes - for (restr_keys, index) in self._indexes.items(): + for (restr_keys, index) in list(self._indexes.items()): vals = [getattr(edge, k)() for k in restr_keys] index = self._indexes[restr_keys] index.setdefault(tuple(vals),[]).append(edge) @@ -553,7 +553,7 @@ def insert(self, edge, child_pointer_list): cpls = self._edge_to_cpls.setdefault(edge,{}) child_pointer_list = tuple(child_pointer_list) - if cpls.has_key(child_pointer_list): + if child_pointer_list in cpls: # We've already got this CPL; return false. return False else: @@ -600,7 +600,7 @@ def _trees(self, edge, complete, memo, tree_class): than once, we can reuse the same trees. """ # If we've seen this edge before, then reuse our old answer. - if memo.has_key(edge): return memo[edge] + if edge in memo: return memo[edge] trees = [] @@ -676,7 +676,7 @@ def child_pointer_lists(self, edge): been used to form this edge. """ # Make a copy, in case they modify it. - return self._edge_to_cpls.get(edge, {}).keys() + return list(self._edge_to_cpls.get(edge, {}).keys()) #//////////////////////////////////////////////////////////// # Display @@ -838,7 +838,7 @@ def apply(self, chart, grammar, *edges): @rtype: C{list} of L{EdgeI} @return: A list of the edges that were added. """ - raise AssertionError, 'ChartRuleI is an abstract interface' + raise AssertionError('ChartRuleI is an abstract interface') def apply_iter(self, chart, grammar, *edges): """ @@ -853,7 +853,7 @@ def apply_iter(self, chart, grammar, *edges): that should be passed to C{apply} is specified by the L{NUM_EDGES} class variable. """ - raise AssertionError, 'ChartRuleI is an abstract interface' + raise AssertionError('ChartRuleI is an abstract interface') def apply_everywhere(self, chart, grammar): """ @@ -863,7 +863,7 @@ def apply_everywhere(self, chart, grammar): @rtype: C{list} of L{EdgeI} @return: A list of the edges that were added. """ - raise AssertionError, 'ChartRuleI is an abstract interface' + raise AssertionError('ChartRuleI is an abstract interface') def apply_everywhere_iter(self, chart, grammar): """ @@ -874,7 +874,7 @@ def apply_everywhere_iter(self, chart, grammar): return. @rtype: C{iter} of L{EdgeI} """ - raise AssertionError, 'ChartRuleI is an abstract interface' + raise AssertionError('ChartRuleI is an abstract interface') class AbstractChartRule(object): """ @@ -892,7 +892,7 @@ class AbstractChartRule(object): # Subclasses must define apply_iter. def apply_iter(self, chart, grammar, *edges): - raise AssertionError, 'AbstractChartRule is an abstract class' + raise AssertionError('AbstractChartRule is an abstract class') # Default: loop through the given number of edges, and call # self.apply() for each set of edges. @@ -920,7 +920,7 @@ def apply_everywhere_iter(self, chart, grammar): yield new_edge else: - raise AssertionError, 'NUM_EDGES>3 is not currently supported' + raise AssertionError('NUM_EDGES>3 is not currently supported') # Default: delegate to apply_iter. def apply(self, chart, grammar, *edges): @@ -952,7 +952,7 @@ class FundamentalRule(AbstractChartRule): def apply_iter(self, chart, grammar, left_edge, right_edge): # Make sure the rule is applicable. if not (left_edge.end() == right_edge.start() and - left_edge.next() == right_edge.lhs() and + next(left_edge) == right_edge.lhs() and left_edge.is_incomplete() and right_edge.is_complete()): return @@ -992,7 +992,7 @@ def apply_iter(self, chart, grammar, edge1): if edge1.is_incomplete(): # edge1 = left_edge; edge2 = right_edge for edge2 in chart.select(start=edge1.end(), is_complete=True, - lhs=edge1.next()): + lhs=next(edge1)): for new_edge in fr.apply_iter(chart, grammar, edge1, edge2): yield new_edge else: @@ -1051,7 +1051,7 @@ class TopDownExpandRule(AbstractChartRule): NUM_EDGES = 1 def apply_iter(self, chart, grammar, edge): if edge.is_complete(): return - for prod in grammar.productions(lhs=edge.next()): + for prod in grammar.productions(lhs=next(edge)): new_edge = TreeEdge.from_production(prod, edge.end()) if chart.insert(new_edge, ()): yield new_edge @@ -1070,7 +1070,7 @@ def apply_iter(self, chart, grammar, edge): if edge.is_complete() or edge.end() >= chart.num_leaves(): return index = edge.end() leaf = chart.leaf(index) - if edge.next() == leaf: + if next(edge) == leaf: new_edge = LeafEdge(leaf, index) if chart.insert(new_edge, ()): yield new_edge @@ -1118,7 +1118,7 @@ def apply_iter(self, chart, grammar, edge): # If we've already applied this rule to an edge with the same # next & end, and the chart & grammar have not changed, then # just return (no new edges to add). - done = self._done.get((edge.next(), edge.end()), (None,None)) + done = self._done.get((next(edge), edge.end()), (None,None)) if done[0] is chart and done[1] is grammar: return # Add all the edges indicated by the top down expand rule. @@ -1126,7 +1126,7 @@ def apply_iter(self, chart, grammar, edge): yield e # Record the fact that we've applied this rule. - self._done[edge.next(), edge.end()] = (chart, grammar) + self._done[next(edge), edge.end()] = (chart, grammar) def __str__(self): return 'Top Down Expand Rule' @@ -1218,11 +1218,11 @@ def apply_iter(self, chart, gramar, edge): if edge.is_complete() or edge.end()>=chart.num_leaves(): return index = edge.end() leaf = chart.leaf(index) - if edge.next() in self._word_to_pos.get(leaf, []): + if next(edge) in self._word_to_pos.get(leaf, []): new_leaf_edge = LeafEdge(leaf, index) if chart.insert(new_leaf_edge, ()): yield new_leaf_edge - new_pos_edge = TreeEdge((index,index+1), edge.next(), + new_pos_edge = TreeEdge((index,index+1), next(edge), [leaf], 1) if chart.insert(new_pos_edge, (new_leaf_edge,)): yield new_pos_edge @@ -1283,7 +1283,7 @@ def get_parse_list(self, tokens, tree_class=Tree): # Width, for printing trace edges. w = 50/(chart.num_leaves()+1) - if self._trace > 0: print ' ', chart.pp_leaves(w) + if self._trace > 0: print(' ', chart.pp_leaves(w)) # Initialize the chart with a special "starter" edge. root = cfg.Nonterminal('[INIT]') @@ -1296,20 +1296,20 @@ def get_parse_list(self, tokens, tree_class=Tree): scanner = ScannerRule(self._lexicon) for end in range(chart.num_leaves()+1): - if self._trace > 1: print 'Processing queue %d' % end + if self._trace > 1: print('Processing queue %d' % end) for edge in chart.select(end=end): if edge.is_incomplete(): for e in predictor.apply(chart, grammar, edge): if self._trace > 0: - print 'Predictor', chart.pp_edge(e,w) + print('Predictor', chart.pp_edge(e,w)) if edge.is_incomplete(): for e in scanner.apply(chart, grammar, edge): if self._trace > 0: - print 'Scanner ', chart.pp_edge(e,w) + print('Scanner ', chart.pp_edge(e,w)) if edge.is_complete(): for e in completer.apply(chart, grammar, edge): if self._trace > 0: - print 'Completer', chart.pp_edge(e,w) + print('Completer', chart.pp_edge(e,w)) # Output a list of complete parses. return chart.parses(grammar.start(), tree_class=tree_class) @@ -1362,7 +1362,7 @@ def get_parse_list(self, tokens, tree_class=Tree): # Width, for printing trace edges. w = 50/(chart.num_leaves()+1) - if self._trace > 0: print chart.pp_leaves(w) + if self._trace > 0: print(chart.pp_leaves(w)) edges_added = 1 while edges_added > 0: @@ -1371,11 +1371,11 @@ def get_parse_list(self, tokens, tree_class=Tree): edges_added_by_rule = 0 for e in rule.apply_everywhere(chart, grammar): if self._trace > 0 and edges_added_by_rule == 0: - print '%s:' % rule + print('%s:' % rule) edges_added_by_rule += 1 - if self._trace > 1: print chart.pp_edge(e,w) + if self._trace > 1: print(chart.pp_edge(e,w)) if self._trace == 1 and edges_added_by_rule > 0: - print ' - Added %d edges' % edges_added_by_rule + print(' - Added %d edges' % edges_added_by_rule) edges_added += edges_added_by_rule # Return a list of complete parses. @@ -1437,14 +1437,14 @@ def step(self): added with the current strategy and grammar. """ if self._chart is None: - raise ValueError, 'Parser must be initialized first' + raise ValueError('Parser must be initialized first') while 1: self._restart = False w = 50/(self._chart.num_leaves()+1) for e in self._parse(): - if self._trace > 1: print self._current_chartrule - if self._trace > 0: print self._chart.pp_edge(e,w) + if self._trace > 1: print(self._current_chartrule) + if self._trace > 0: print(self._chart.pp_edge(e,w)) yield e if self._restart: break else: @@ -1578,23 +1578,23 @@ def demo(): # Tokenize a sample sentence. sent = 'I saw John with a dog with my cookie' - print "Sentence:\n", sent + print("Sentence:\n", sent) from nltk import tokenize tokens = list(tokenize.whitespace(sent)) - print tokens + print(tokens) # Ask the user which parser to test - print ' 1: Top-down chart parser' - print ' 2: Bottom-up chart parser' - print ' 3: Earley parser' - print ' 4: Stepping chart parser (alternating top-down & bottom-up)' - print ' 5: All parsers' - print '\nWhich parser (1-5)? ', + print(' 1: Top-down chart parser') + print(' 2: Bottom-up chart parser') + print(' 3: Earley parser') + print(' 4: Stepping chart parser (alternating top-down & bottom-up)') + print(' 5: All parsers') + print('\nWhich parser (1-5)? ', end=' ') choice = sys.stdin.readline().strip() - print + print() if choice not in '12345': - print 'Bad parser number' + print('Bad parser number') return # Keep track of how long each parser takes. @@ -1607,7 +1607,7 @@ def demo(): parses = cp.get_parse_list(tokens) times['top down'] = time.time()-t assert len(parses)==5, 'Not all parses found' - for tree in parses: print tree + for tree in parses: print(tree) # Run the bottom-up parser, if requested. if choice in ('2', '5'): @@ -1616,7 +1616,7 @@ def demo(): parses = cp.get_parse_list(tokens) times['bottom up'] = time.time()-t assert len(parses)==5, 'Not all parses found' - for tree in parses: print tree + for tree in parses: print(tree) # Run the earley, if requested. if choice in ('3', '5'): @@ -1625,7 +1625,7 @@ def demo(): parses = cp.get_parse_list(tokens) times['Earley parser'] = time.time()-t assert len(parses)==5, 'Not all parses found' - for tree in parses: print tree + for tree in parses: print(tree) # Run the stepping parser, if requested. if choice in ('4', '5'): @@ -1633,24 +1633,24 @@ def demo(): cp = SteppingChartParse(grammar, trace=1) cp.initialize(tokens) for i in range(5): - print '*** SWITCH TO TOP DOWN' + print('*** SWITCH TO TOP DOWN') cp.set_strategy(TD_STRATEGY) for j, e in enumerate(cp.step()): if j>20 or e is None: break - print '*** SWITCH TO BOTTOM UP' + print('*** SWITCH TO BOTTOM UP') cp.set_strategy(BU_STRATEGY) for j, e in enumerate(cp.step()): if j>20 or e is None: break times['stepping'] = time.time()-t assert len(cp.parses())==5, 'Not all parses found' - for parse in cp.parses(): print parse + for parse in cp.parses(): print(parse) # Print the times of all parsers: - maxlen = max(len(key) for key in times.keys()) - format = '%' + `maxlen` + 's parser: %6.3fsec' - times_items = times.items() + maxlen = max(len(key) for key in list(times.keys())) + format = '%' + repr(maxlen) + 's parser: %6.3fsec' + times_items = list(times.items()) times_items.sort(lambda a,b:cmp(a[1], b[1])) for (parser, t) in times_items: - print format % (parser, t) + print(format % (parser, t)) if __name__ == '__main__': demo() diff --git a/nltk_contrib/mit/six863/parse/featurechart.py b/nltk_contrib/mit/six863/parse/featurechart.py index 75bfe27..ab2b137 100644 --- a/nltk_contrib/mit/six863/parse/featurechart.py +++ b/nltk_contrib/mit/six863/parse/featurechart.py @@ -18,7 +18,7 @@ #from category import * from nltk import cfg -from featurelite import * +from .featurelite import * def load_earley(filename, trace=1): """ @@ -112,7 +112,7 @@ def lhs(self): @return: the value of the left-hand side with variables set. @rtype: C{Category} """ - return apply(TreeEdge.lhs(self), self._vars) + return TreeEdge.lhs(self)(*self._vars) def orig_lhs(self): """ @@ -126,7 +126,7 @@ def rhs(self): @return: the value of the right-hand side with variables set. @rtype: C{Category} """ - return tuple(apply(x, self._vars) for x in TreeEdge.rhs(self)) + return tuple(x(*self._vars) for x in TreeEdge.rhs(self)) def orig_rhs(self): """ @@ -161,7 +161,7 @@ def apply_iter(self, chart, grammar, left_edge, right_edge): left_bindings = left_edge.vars().copy() right_bindings = right_edge.vars().copy() try: - unified = unify(left_edge.next(), right_edge.lhs(), left_bindings, + unified = unify(next(left_edge), right_edge.lhs(), left_bindings, right_bindings, memo=self.unify_memo, trace=self.trace-2) if isinstance(unified, Category): unified.freeze() except UnificationFailure: return @@ -211,7 +211,7 @@ def apply_iter(self, chart, grammar, edge): for prod in grammar.productions(): bindings = edge.vars().copy() try: - unified = unify(edge.next(), prod.lhs(), bindings, {}, + unified = unify(next(edge), prod.lhs(), bindings, {}, memo=self.unify_memo, trace=self.trace-2) if isinstance(unified, Category): unified.freeze() except UnificationFailure: @@ -256,7 +256,7 @@ def get_parse_list(self, tokens): # Width, for printing trace edges. #w = 40/(chart.num_leaves()+1) w = 2 - if self._trace > 0: print ' '*9, chart.pp_leaves(w) + if self._trace > 0: print((' '*9, chart.pp_leaves(w))) # Initialize the chart with a special "starter" edge. root = GrammarCategory(pos='[INIT]') @@ -270,7 +270,7 @@ def get_parse_list(self, tokens): #scanner = FeatureScannerRule(self._lexicon) for end in range(chart.num_leaves()+1): - if self._trace > 1: print 'Processing queue %d' % end + if self._trace > 1: print(('Processing queue %d' % end)) # Scanner rule substitute, i.e. this is being used in place # of a proper FeatureScannerRule at the moment. @@ -283,14 +283,14 @@ def get_parse_list(self, tokens): {}) chart.insert(new_pos_edge, (new_leaf_edge,)) if self._trace > 0: - print 'Scanner ', chart.pp_edge(new_pos_edge,w) + print(('Scanner ', chart.pp_edge(new_pos_edge,w))) for edge in chart.select(end=end): if edge.is_incomplete(): for e in predictor.apply(chart, grammar, edge): if self._trace > 1: - print 'Predictor', chart.pp_edge(e,w) + print(('Predictor', chart.pp_edge(e,w))) #if edge.is_incomplete(): # for e in scanner.apply(chart, grammar, edge): # if self._trace > 0: @@ -298,7 +298,7 @@ def get_parse_list(self, tokens): if edge.is_complete(): for e in completer.apply(chart, grammar, edge): if self._trace > 0: - print 'Completer', chart.pp_edge(e,w) + print(('Completer', chart.pp_edge(e,w))) # Output a list of complete parses. return chart.parses(root) @@ -346,14 +346,14 @@ def lexicon(word): return earley_lexicon.get(word.upper(), []) sent = 'I saw John with a dog with my cookie' - print "Sentence:\n", sent + print(("Sentence:\n", sent)) from nltk import tokenize tokens = list(tokenize.whitespace(sent)) t = time.time() cp = FeatureEarleyChartParse(earley_grammar, lexicon, trace=1) trees = cp.get_parse_list(tokens) - print "Time: %s" % (time.time() - t) - for tree in trees: print tree + print(("Time: %s" % (time.time() - t))) + for tree in trees: print(tree) def run_profile(): import profile diff --git a/nltk_contrib/mit/six863/parse/featurelite.py b/nltk_contrib/mit/six863/parse/featurelite.py index fccb84d..74d8bb0 100644 --- a/nltk_contrib/mit/six863/parse/featurelite.py +++ b/nltk_contrib/mit/six863/parse/featurelite.py @@ -84,7 +84,7 @@ def isMapping(obj): class FeatureI(object): def __init__(self): - raise TypeError, "FeatureI is an abstract interface" + raise TypeError("FeatureI is an abstract interface") class _FORWARD(object): """ @@ -95,7 +95,7 @@ class _FORWARD(object): instantiated. """ def __init__(self): - raise TypeError, "The _FORWARD class is not meant to be instantiated" + raise TypeError("The _FORWARD class is not meant to be instantiated") class Variable(object): """ @@ -241,7 +241,7 @@ def show(data): def variable_representer(dumper, var): "Output variables in YAML as ?name." - return dumper.represent_scalar(u'!var', u'?%s' % var.name()) + return dumper.represent_scalar('!var', '?%s' % var.name()) yaml.add_representer(Variable, variable_representer) def variable_constructor(loader, node): @@ -249,8 +249,8 @@ def variable_constructor(loader, node): value = loader.construct_scalar(node) name = value[1:] return Variable(name) -yaml.add_constructor(u'!var', variable_constructor) -yaml.add_implicit_resolver(u'!var', re.compile(r'^\?\w+$')) +yaml.add_constructor('!var', variable_constructor) +yaml.add_implicit_resolver('!var', re.compile(r'^\?\w+$')) def _copy_and_bind(feature, bindings, memo=None): """ @@ -262,14 +262,14 @@ def _copy_and_bind(feature, bindings, memo=None): if memo is None: memo = {} if id(feature) in memo: return memo[id(feature)] if isinstance(feature, Variable) and bindings is not None: - if not bindings.has_key(feature.name()): + if feature.name() not in bindings: bindings[feature.name()] = feature.copy() result = _copy_and_bind(bindings[feature.name()], None, memo) else: if isMapping(feature): # Construct a new object of the same class result = feature.__class__() - for (key, value) in feature.items(): + for (key, value) in list(feature.items()): result[key] = _copy_and_bind(value, bindings, memo) else: result = feature memo[id(feature)] = result @@ -579,19 +579,19 @@ def failerror(f1, f2): if memo is None: memo = {} copymemo = {} - if memo.has_key((id(feature1), id(feature2))): + if (id(feature1), id(feature2)) in memo: result = memo[id(feature1), id(feature2)] if result is UnificationFailure: if trace > 2: - print '(cached) Unifying: %r + %r --> [fail]' % (feature1, feature2) + print('(cached) Unifying: %r + %r --> [fail]' % (feature1, feature2)) raise result() if trace > 2: - print '(cached) Unifying: %r + %r --> ' % (feature1, feature2), - print repr(result) + print('(cached) Unifying: %r + %r --> ' % (feature1, feature2), end=' ') + print(repr(result)) return result if trace > 1: - print 'Unifying: %r + %r --> ' % (feature1, feature2), + print('Unifying: %r + %r --> ' % (feature1, feature2), end=' ') # Make copies of the two structures (since the unification algorithm is # destructive). Use the same memo, to preserve reentrance links between @@ -600,7 +600,7 @@ def failerror(f1, f2): copy2 = _copy_and_bind(feature2, bindings2, copymemo) # Preserve links between bound variables and the two feature structures. for b in (bindings1, bindings2): - for (vname, value) in b.items(): + for (vname, value) in list(b.items()): value_id = id(value) if value_id in copymemo: b[vname] = copymemo[value_id] @@ -610,7 +610,7 @@ def failerror(f1, f2): unified = _destructively_unify(copy1, copy2, bindings1, bindings2, memo, fail) except UnificationFailure: - if trace > 1: print '[fail]' + if trace > 1: print('[fail]') memo[id(feature1), id(feature2)] = UnificationFailure raise @@ -622,9 +622,9 @@ def failerror(f1, f2): _lookup_values(bindings2, {}, remove=True) if trace > 1: - print repr(unified) + print(repr(unified)) elif trace > 0: - print 'Unifying: %r + %r --> %r' % (feature1, feature2, repr(unified)) + print('Unifying: %r + %r --> %r' % (feature1, feature2, repr(unified))) memo[id(feature1), id(feature2)] = unified return unified @@ -640,11 +640,11 @@ def _destructively_unify(feature1, feature2, bindings1, bindings2, memo, fail, and C{other} are undefined. """ if depth > 50: - print "Infinite recursion in this unification:" - print show(dict(feature1=feature1, feature2=feature2, - bindings1=bindings1, bindings2=bindings2, memo=memo)) - raise ValueError, "Infinite recursion in unification" - if memo.has_key((id(feature1), id(feature2))): + print("Infinite recursion in this unification:") + print(show(dict(feature1=feature1, feature2=feature2, + bindings1=bindings1, bindings2=bindings2, memo=memo))) + raise ValueError("Infinite recursion in unification") + if (id(feature1), id(feature2)) in memo: result = memo[id(feature1), id(feature2)] if result is UnificationFailure: raise result() unified = _do_unify(feature1, feature2, bindings1, bindings2, memo, fail, @@ -687,9 +687,9 @@ def _do_unify(feature1, feature2, bindings1, bindings2, memo, fail, depth=0): # At this point, we know they're both mappings. # Do the destructive part of unification. - while feature2.has_key(_FORWARD): feature2 = feature2[_FORWARD] + while _FORWARD in feature2: feature2 = feature2[_FORWARD] if feature1 is not feature2: feature2[_FORWARD] = feature1 - for (fname, val2) in feature2.items(): + for (fname, val2) in list(feature2.items()): if fname == _FORWARD: continue val1 = feature1.get(fname) feature1[fname] = _destructively_unify(val1, val2, bindings1, @@ -702,12 +702,12 @@ def _apply_forwards(feature, visited): the target of its forward pointer (to preserve reentrance). """ if not isMapping(feature): return - if visited.has_key(id(feature)): return + if id(feature) in visited: return visited[id(feature)] = True - for fname, fval in feature.items(): + for fname, fval in list(feature.items()): if isMapping(fval): - while fval.has_key(_FORWARD): + while _FORWARD in fval: fval = fval[_FORWARD] feature[fname] = fval _apply_forwards(fval, visited) @@ -739,10 +739,10 @@ def _lookup_values(mapping, visited, remove=False): else: return var.forwarded_self() if not isMapping(mapping): return mapping - if visited.has_key(id(mapping)): return mapping + if id(mapping) in visited: return mapping visited[id(mapping)] = True - for fname, fval in mapping.items(): + for fname, fval in list(mapping.items()): if isMapping(fval): _lookup_values(fval, visited) elif isinstance(fval, Variable): @@ -763,9 +763,9 @@ def _apply_forwards_to_bindings(bindings): Replace any feature structures that have been forwarded by their new identities. """ - for (key, value) in bindings.items(): - if isMapping(value) and value.has_key(_FORWARD): - while value.has_key(_FORWARD): + for (key, value) in list(bindings.items()): + if isMapping(value) and _FORWARD in value: + while _FORWARD in value: value = value[_FORWARD] bindings[key] = value diff --git a/nltk_contrib/mit/six863/parse/test.py b/nltk_contrib/mit/six863/parse/test.py index 6ea88fd..9348056 100644 --- a/nltk_contrib/mit/six863/parse/test.py +++ b/nltk_contrib/mit/six863/parse/test.py @@ -1,10 +1,10 @@ -from featurechart import * -from treeview import * +from .featurechart import * +from .treeview import * def demo(): cp = load_earley('gazdar6.cfg', trace=2) trees = cp.parse('the man who chased Fido returned') - for tree in trees: print tree + for tree in trees: print(tree) #run_profile() if __name__ == '__main__': demo() diff --git a/nltk_contrib/mit/six863/parse/treeview.py b/nltk_contrib/mit/six863/parse/treeview.py index b932d41..2608d54 100644 --- a/nltk_contrib/mit/six863/parse/treeview.py +++ b/nltk_contrib/mit/six863/parse/treeview.py @@ -1,4 +1,4 @@ -import Tkinter +import tkinter from nltk.draw import TreeWidget from nltk.draw import CanvasFrame @@ -7,32 +7,32 @@ class TreeView: def __init__(self, trees, root=None): if len(trees) == 0: - print "No trees to display." + print("No trees to display.") return newroot = False if root is None: - root = Tkinter.Tk() + root = tkinter.Tk() window = root newroot = True else: - window = Tkinter.Toplevel(root) + window = tkinter.Toplevel(root) window.title("Parse Tree") window.geometry("600x400") self.cf = CanvasFrame(window) self.cf.pack(side='top', expand=1, fill='both') - buttons = Tkinter.Frame(window) + buttons = tkinter.Frame(window) buttons.pack(side='bottom', fill='x') - self.spin = Tkinter.Spinbox(buttons, from_=1, to=len(trees), + self.spin = tkinter.Spinbox(buttons, from_=1, to=len(trees), command=self.showtree, width=3) if len(trees) > 1: self.spin.pack(side='left') - self.label = Tkinter.Label(buttons, text="of %d" % len(trees)) + self.label = tkinter.Label(buttons, text="of %d" % len(trees)) if len(trees) > 1: self.label.pack(side='left') - self.done = Tkinter.Button(buttons, text="Done", command=window.destroy) + self.done = tkinter.Button(buttons, text="Done", command=window.destroy) self.done.pack(side='right') - self.printps = Tkinter.Button(buttons, text="Print to Postscript", command=self.cf.print_to_file) + self.printps = tkinter.Button(buttons, text="Print to Postscript", command=self.cf.print_to_file) self.printps.pack(side='right') self.trees = trees diff --git a/nltk_contrib/mit/six863/semantics/__init__.py b/nltk_contrib/mit/six863/semantics/__init__.py index 51e3021..effaa19 100644 --- a/nltk_contrib/mit/six863/semantics/__init__.py +++ b/nltk_contrib/mit/six863/semantics/__init__.py @@ -131,7 +131,7 @@ def __init__(self): """ # Make sure we're not directly instantiated: if self.__class__ == AbstractParse: - raise AssertionError, "Abstract classes can't be instantiated" + raise AssertionError("Abstract classes can't be instantiated") def parse(self, sentence): return self.get_parse_list(sentence.split()) @@ -155,9 +155,9 @@ def batch_test(self, filename): line = line.strip() if not line: continue if line.startswith('#'): - print line + print(line) continue - print "Sentence:", line + print(("Sentence:", line)) parses = self.parse(line) - print "%d parses." % len(parses) - for tree in parses: print tree + print(("%d parses." % len(parses))) + for tree in parses: print(tree) diff --git a/nltk_contrib/mit/six863/semantics/batchtest.py b/nltk_contrib/mit/six863/semantics/batchtest.py index e662a64..2db2cab 100644 --- a/nltk_contrib/mit/six863/semantics/batchtest.py +++ b/nltk_contrib/mit/six863/semantics/batchtest.py @@ -1,5 +1,5 @@ -from featurechart import * -from treeview import * +from .featurechart import * +from .treeview import * def demo(): cp = load_earley('gazdar6.cfg', trace=2) diff --git a/nltk_contrib/mit/six863/semantics/category.py b/nltk_contrib/mit/six863/semantics/category.py index ea34939..0da86cb 100644 --- a/nltk_contrib/mit/six863/semantics/category.py +++ b/nltk_contrib/mit/six863/semantics/category.py @@ -10,11 +10,11 @@ # # $Id: category.py 4162 2007-03-01 00:46:05Z stevenbird $ -import logic +from . import logic from nltk.cfg import * #from kimmo import kimmo -from featurelite import * +from .featurelite import * from copy import deepcopy import yaml # import nltk.yamltags @@ -123,16 +123,16 @@ def __setitem__(self, key, value): self._features[key] = value def items(self): - return self._features.items() + return list(self._features.items()) def keys(self): - return self._features.keys() + return list(self._features.keys()) def values(self): - return self._features.values() + return list(self._features.values()) def has_key(self, key): - return self._features.has_key(key) + return key in self._features def symbol(self): """ @@ -161,7 +161,7 @@ def feature_names(self): """ @return: a list of all features that have values. """ - return self._features.keys() + return list(self._features.keys()) has_feature = has_key @@ -179,7 +179,7 @@ def substitute_bindings(self, bindings): @staticmethod def _remove_unbound_vars(obj): - for (key, value) in obj.items(): + for (key, value) in list(obj.items()): if isinstance(value, Variable): del obj[key] elif isinstance(value, (Category, dict)): @@ -206,7 +206,7 @@ def __str__(self): def _str(cls, obj, reentrances, reentrance_ids, normalize=False): segments = [] - keys = obj.keys() + keys = list(obj.keys()) keys.sort() for fname in keys: if fname == cls.headname: continue @@ -389,14 +389,14 @@ def _parseval(cls, s, position, reentrances): # Semantic value of the form '; return an ApplicationExpression match = _PARSE_RE['application'].match(s, position) if match is not None: - fun = ParserSubstitute(match.group(2)).next() - arg = ParserSubstitute(match.group(3)).next() + fun = next(ParserSubstitute(match.group(2))) + arg = next(ParserSubstitute(match.group(3))) return logic.ApplicationExpressionSubst(fun, arg), match.end() # other semantic value enclosed by '< >'; return value given by the lambda expr parser match = _PARSE_RE['semantics'].match(s, position) if match is not None: - return ParserSubstitute(match.group(1)).next(), match.end() + return next(ParserSubstitute(match.group(1))), match.end() # String value if s[position] in "'\"": @@ -455,11 +455,11 @@ def parse_rules(cls, s): try: lhs, position = cls.inner_parse(s, position) lhs = cls(lhs) - except ValueError, e: + except ValueError as e: estr = ('Error parsing field structure\n\n\t' + s + '\n\t' + ' '*e.args[1] + '^ ' + 'Expected %s\n' % e.args[0]) - raise ValueError, estr + raise ValueError(estr) lhs.freeze() match = _PARSE_RE['arrow'].match(s, position) @@ -473,11 +473,11 @@ def parse_rules(cls, s): try: val, position = cls.inner_parse(s, position, {}) if isinstance(val, dict): val = cls(val) - except ValueError, e: + except ValueError as e: estr = ('Error parsing field structure\n\n\t' + s + '\n\t' + ' '*e.args[1] + '^ ' + 'Expected %s\n' % e.args[0]) - raise ValueError, estr + raise ValueError(estr) if isinstance(val, Category): val.freeze() rhs.append(val) position = _PARSE_RE['whitespace'].match(s, position).end() @@ -519,7 +519,7 @@ class GrammarCategory(Category): def _str(cls, obj, reentrances, reentrance_ids, normalize=False): segments = [] - keys = obj.keys() + keys = list(obj.keys()) keys.sort() for fname in keys: if fname == cls.headname: continue @@ -576,9 +576,9 @@ def inner_parse(cls, s, position, reentrances=None): if slash_match is not None: position = slash_match.end() slash, position = GrammarCategory._parseval(s, position, reentrances) - if isinstance(slash, basestring): slash = {'pos': slash} + if isinstance(slash, str): slash = {'pos': slash} body['/'] = unify(body.get('/'), slash) - elif not body.has_key('/'): + elif '/' not in body: body['/'] = False return cls(body), position @@ -632,7 +632,7 @@ def lookup(word): return lookup def earley_parser(self, trace=1): - from featurechart import FeatureEarleyChartParse + from .featurechart import FeatureEarleyChartParse if self.kimmo is None: lexicon = self.earley_lexicon() else: lexicon = self.kimmo_lexicon() @@ -686,28 +686,28 @@ def read_file(filename): yaml.add_representer(GrammarCategory, GrammarCategory.to_yaml) def demo(): - print "Category(pos='n', agr=dict(number='pl', gender='f')):" - print - print Category(pos='n', agr=dict(number='pl', gender='f')) - print repr(Category(pos='n', agr=dict(number='pl', gender='f'))) - print - print "GrammarCategory.parse('NP[sem=/NP'):" - print - print GrammarCategory.parse(r'NP[sem=]/NP') - print repr(GrammarCategory.parse(r'NP[sem=]/NP')) - print - print "GrammarCategory.parse('?x/?x'):" - print - print GrammarCategory.parse('?x/?x') - print repr(GrammarCategory.parse('?x/?x')) - print - print "GrammarCategory.parse('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]'):" - print - print GrammarCategory.parse('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]') - print repr(GrammarCategory.parse('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]')) - print + print("Category(pos='n', agr=dict(number='pl', gender='f')):") + print() + print((Category(pos='n', agr=dict(number='pl', gender='f')))) + print((repr(Category(pos='n', agr=dict(number='pl', gender='f'))))) + print() + print("GrammarCategory.parse('NP[sem=/NP'):") + print() + print((GrammarCategory.parse(r'NP[sem=]/NP'))) + print((repr(GrammarCategory.parse(r'NP[sem=]/NP')))) + print() + print("GrammarCategory.parse('?x/?x'):") + print() + print((GrammarCategory.parse('?x/?x'))) + print((repr(GrammarCategory.parse('?x/?x')))) + print() + print("GrammarCategory.parse('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]'):") + print() + print((GrammarCategory.parse('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]'))) + print((repr(GrammarCategory.parse('VP[+fin, agr=?x, tense=past]/NP[+pl, agr=?x]')))) + print() g = GrammarFile.read_file("speer.cfg") - print g.grammar() + print((g.grammar())) if __name__ == '__main__': demo() diff --git a/nltk_contrib/mit/six863/semantics/cfg.py b/nltk_contrib/mit/six863/semantics/cfg.py index ad27ac8..725d62b 100644 --- a/nltk_contrib/mit/six863/semantics/cfg.py +++ b/nltk_contrib/mit/six863/semantics/cfg.py @@ -226,8 +226,8 @@ def __init__(self, lhs, rhs): @param rhs: The right-hand side of the new C{Production}. @type rhs: sequence of (C{Nonterminal} and (terminal)) """ - if isinstance(rhs, (str, unicode)): - raise TypeError, 'production right hand side should be a list, not a string' + if isinstance(rhs, str): + raise TypeError('production right hand side should be a list, not a string') self._lhs = lhs self._rhs = tuple(rhs) self._hash = hash((self._lhs, self._rhs)) @@ -385,7 +385,7 @@ def parse_production(s): """ # Use _PARSE_RE to check that it's valid. if not _PARSE_RE.match(s): - raise ValueError, 'Bad production string' + raise ValueError('Bad production string') # Use _SPLIT_RE to process it. pieces = _SPLIT_RE.split(s) pieces = [p for i,p in enumerate(pieces) if i%2==1] @@ -407,9 +407,9 @@ def parse_grammar(s): if line.startswith('#') or line=='': continue try: productions += parse_production(line) except ValueError: - raise ValueError, 'Unable to parse line %s' % linenum + raise ValueError('Unable to parse line %s' % linenum) if len(productions) == 0: - raise ValueError, 'No productions found!' + raise ValueError('No productions found!') start = productions[0].lhs() return Grammar(start, productions) @@ -429,11 +429,11 @@ def demo(): N, V, P, Det = cfg.nonterminals('N, V, P, Det') VP_slash_NP = VP/NP - print 'Some nonterminals:', [S, NP, VP, PP, N, V, P, Det, VP/NP] - print ' S.symbol() =>', `S.symbol()` - print + print('Some nonterminals:', [S, NP, VP, PP, N, V, P, Det, VP/NP]) + print(' S.symbol() =>', repr(S.symbol())) + print() - print cfg.Production(S, [NP]) + print(cfg.Production(S, [NP])) # Create some Grammar Productions grammar = cfg.parse_grammar(""" @@ -453,11 +453,11 @@ def demo(): P -> 'in' """) - print 'A Grammar:', `grammar` - print ' grammar.start() =>', `grammar.start()` - print ' grammar.productions() =>', + print('A Grammar:', repr(grammar)) + print(' grammar.start() =>', repr(grammar.start())) + print(' grammar.productions() =>', end=' ') # Use string.replace(...) is to line-wrap the output. - print `grammar.productions()`.replace(',', ',\n'+' '*25) - print + print(repr(grammar.productions()).replace(',', ',\n'+' '*25)) + print() if __name__ == '__main__': demo() diff --git a/nltk_contrib/mit/six863/semantics/chart.py b/nltk_contrib/mit/six863/semantics/chart.py index 8ed93f4..6cda1ff 100644 --- a/nltk_contrib/mit/six863/semantics/chart.py +++ b/nltk_contrib/mit/six863/semantics/chart.py @@ -9,7 +9,7 @@ # # $Id: chart.py 4157 2007-02-28 09:56:25Z stevenbird $ -from __init__ import * +from .__init__ import * from nltk import cfg, Tree """ @@ -162,7 +162,7 @@ def dot(self): """ raise AssertionError('EdgeI is an abstract interface') - def next(self): + def __next__(self): """ @return: The element of this edge's right-hand side that immediately follows its dot. @@ -271,7 +271,7 @@ def rhs(self): return self._rhs def dot(self): return self._dot def is_complete(self): return self._dot == len(self._rhs) def is_incomplete(self): return self._dot != len(self._rhs) - def next(self): + def __next__(self): if self._dot >= len(self._rhs): return None else: return self._rhs[self._dot] @@ -334,7 +334,7 @@ def rhs(self): return () def dot(self): return 0 def is_complete(self): return True def is_incomplete(self): return False - def next(self): return None + def __next__(self): return None # Comparisons & hashing def __cmp__(self, other): @@ -487,12 +487,12 @@ def select(self, **restrictions): if restrictions=={}: return iter(self._edges) # Find the index corresponding to the given restrictions. - restr_keys = restrictions.keys() + restr_keys = list(restrictions.keys()) restr_keys.sort() restr_keys = tuple(restr_keys) # If it doesn't exist, then create it. - if not self._indexes.has_key(restr_keys): + if restr_keys not in self._indexes: self._add_index(restr_keys) vals = [restrictions[k] for k in restr_keys] return iter(self._indexes[restr_keys].get(tuple(vals), [])) @@ -505,7 +505,7 @@ def _add_index(self, restr_keys): # Make sure it's a valid index. for k in restr_keys: if not hasattr(EdgeI, k): - raise ValueError, 'Bad restriction: %s' % k + raise ValueError('Bad restriction: %s' % k) # Create the index. self._indexes[restr_keys] = {} @@ -537,12 +537,12 @@ def insert(self, edge, child_pointer_list): C{child_pointer_list} with C{edge}. """ # Is it a new edge? - if not self._edge_to_cpls.has_key(edge): + if edge not in self._edge_to_cpls: # Add it to the list of edges. self._edges.append(edge) # Register with indexes - for (restr_keys, index) in self._indexes.items(): + for (restr_keys, index) in list(self._indexes.items()): vals = [getattr(edge, k)() for k in restr_keys] index = self._indexes[restr_keys] index.setdefault(tuple(vals),[]).append(edge) @@ -551,7 +551,7 @@ def insert(self, edge, child_pointer_list): cpls = self._edge_to_cpls.setdefault(edge,{}) child_pointer_list = tuple(child_pointer_list) - if cpls.has_key(child_pointer_list): + if child_pointer_list in cpls: # We've already got this CPL; return false. return False else: @@ -601,7 +601,7 @@ def _trees(self, edge, complete, memo, tree_class): than once, we can reuse the same trees. """ # If we've seen this edge before, then reuse our old answer. - if memo.has_key(edge): return memo[edge] + if edge in memo: return memo[edge] trees = [] @@ -677,7 +677,7 @@ def child_pointer_lists(self, edge): been used to form this edge. """ # Make a copy, in case they modify it. - return self._edge_to_cpls.get(edge, {}).keys() + return list(self._edge_to_cpls.get(edge, {}).keys()) #//////////////////////////////////////////////////////////// # Display @@ -839,7 +839,7 @@ def apply(self, chart, grammar, *edges): @rtype: C{list} of L{EdgeI} @return: A list of the edges that were added. """ - raise AssertionError, 'ChartRuleI is an abstract interface' + raise AssertionError('ChartRuleI is an abstract interface') def apply_iter(self, chart, grammar, *edges): """ @@ -854,7 +854,7 @@ def apply_iter(self, chart, grammar, *edges): that should be passed to C{apply} is specified by the L{NUM_EDGES} class variable. """ - raise AssertionError, 'ChartRuleI is an abstract interface' + raise AssertionError('ChartRuleI is an abstract interface') def apply_everywhere(self, chart, grammar): """ @@ -864,7 +864,7 @@ def apply_everywhere(self, chart, grammar): @rtype: C{list} of L{EdgeI} @return: A list of the edges that were added. """ - raise AssertionError, 'ChartRuleI is an abstract interface' + raise AssertionError('ChartRuleI is an abstract interface') def apply_everywhere_iter(self, chart, grammar): """ @@ -875,7 +875,7 @@ def apply_everywhere_iter(self, chart, grammar): return. @rtype: C{iter} of L{EdgeI} """ - raise AssertionError, 'ChartRuleI is an abstract interface' + raise AssertionError('ChartRuleI is an abstract interface') class AbstractChartRule(object): """ @@ -893,7 +893,7 @@ class AbstractChartRule(object): # Subclasses must define apply_iter. def apply_iter(self, chart, grammar, *edges): - raise AssertionError, 'AbstractChartRule is an abstract class' + raise AssertionError('AbstractChartRule is an abstract class') # Default: loop through the given number of edges, and call # self.apply() for each set of edges. @@ -921,7 +921,7 @@ def apply_everywhere_iter(self, chart, grammar): yield new_edge else: - raise AssertionError, 'NUM_EDGES>3 is not currently supported' + raise AssertionError('NUM_EDGES>3 is not currently supported') # Default: delegate to apply_iter. def apply(self, chart, grammar, *edges): @@ -953,7 +953,7 @@ class FundamentalRule(AbstractChartRule): def apply_iter(self, chart, grammar, left_edge, right_edge): # Make sure the rule is applicable. if not (left_edge.end() == right_edge.start() and - left_edge.next() == right_edge.lhs() and + next(left_edge) == right_edge.lhs() and left_edge.is_incomplete() and right_edge.is_complete()): return @@ -993,7 +993,7 @@ def apply_iter(self, chart, grammar, edge1): if edge1.is_incomplete(): # edge1 = left_edge; edge2 = right_edge for edge2 in chart.select(start=edge1.end(), is_complete=True, - lhs=edge1.next()): + lhs=next(edge1)): for new_edge in fr.apply_iter(chart, grammar, edge1, edge2): yield new_edge else: @@ -1052,7 +1052,7 @@ class TopDownExpandRule(AbstractChartRule): NUM_EDGES = 1 def apply_iter(self, chart, grammar, edge): if edge.is_complete(): return - for prod in grammar.productions(lhs=edge.next()): + for prod in grammar.productions(lhs=next(edge)): new_edge = TreeEdge.from_production(prod, edge.end()) if chart.insert(new_edge, ()): yield new_edge @@ -1071,7 +1071,7 @@ def apply_iter(self, chart, grammar, edge): if edge.is_complete() or edge.end() >= chart.num_leaves(): return index = edge.end() leaf = chart.leaf(index) - if edge.next() == leaf: + if next(edge) == leaf: new_edge = LeafEdge(leaf, index) if chart.insert(new_edge, ()): yield new_edge @@ -1119,7 +1119,7 @@ def apply_iter(self, chart, grammar, edge): # If we've already applied this rule to an edge with the same # next & end, and the chart & grammar have not changed, then # just return (no new edges to add). - done = self._done.get((edge.next(), edge.end()), (None,None)) + done = self._done.get((next(edge), edge.end()), (None,None)) if done[0] is chart and done[1] is grammar: return # Add all the edges indicated by the top down expand rule. @@ -1127,7 +1127,7 @@ def apply_iter(self, chart, grammar, edge): yield e # Record the fact that we've applied this rule. - self._done[edge.next(), edge.end()] = (chart, grammar) + self._done[next(edge), edge.end()] = (chart, grammar) def __str__(self): return 'Top Down Expand Rule' @@ -1219,11 +1219,11 @@ def apply_iter(self, chart, gramar, edge): if edge.is_complete() or edge.end()>=chart.num_leaves(): return index = edge.end() leaf = chart.leaf(index) - if edge.next() in self._word_to_pos.get(leaf, []): + if next(edge) in self._word_to_pos.get(leaf, []): new_leaf_edge = LeafEdge(leaf, index) if chart.insert(new_leaf_edge, ()): yield new_leaf_edge - new_pos_edge = TreeEdge((index,index+1), edge.next(), + new_pos_edge = TreeEdge((index,index+1), next(edge), [leaf], 1) if chart.insert(new_pos_edge, (new_leaf_edge,)): yield new_pos_edge @@ -1284,7 +1284,7 @@ def get_parse_list(self, tokens, tree_class=Tree): # Width, for printing trace edges. w = 50/(chart.num_leaves()+1) - if self._trace > 0: print ' ', chart.pp_leaves(w) + if self._trace > 0: print(' ', chart.pp_leaves(w)) # Initialize the chart with a special "starter" edge. root = cfg.Nonterminal('[INIT]') @@ -1297,20 +1297,20 @@ def get_parse_list(self, tokens, tree_class=Tree): scanner = ScannerRule(self._lexicon) for end in range(chart.num_leaves()+1): - if self._trace > 1: print 'Processing queue %d' % end + if self._trace > 1: print('Processing queue %d' % end) for edge in chart.select(end=end): if edge.is_incomplete(): for e in predictor.apply(chart, grammar, edge): if self._trace > 0: - print 'Predictor', chart.pp_edge(e,w) + print('Predictor', chart.pp_edge(e,w)) if edge.is_incomplete(): for e in scanner.apply(chart, grammar, edge): if self._trace > 0: - print 'Scanner ', chart.pp_edge(e,w) + print('Scanner ', chart.pp_edge(e,w)) if edge.is_complete(): for e in completer.apply(chart, grammar, edge): if self._trace > 0: - print 'Completer', chart.pp_edge(e,w) + print('Completer', chart.pp_edge(e,w)) # Output a list of complete parses. return chart.parses(grammar.start(), tree_class=tree_class) @@ -1363,7 +1363,7 @@ def get_parse_list(self, tokens, tree_class=Tree): # Width, for printing trace edges. w = 50/(chart.num_leaves()+1) - if self._trace > 0: print chart.pp_leaves(w) + if self._trace > 0: print(chart.pp_leaves(w)) edges_added = 1 while edges_added > 0: @@ -1372,11 +1372,11 @@ def get_parse_list(self, tokens, tree_class=Tree): edges_added_by_rule = 0 for e in rule.apply_everywhere(chart, grammar): if self._trace > 0 and edges_added_by_rule == 0: - print '%s:' % rule + print('%s:' % rule) edges_added_by_rule += 1 - if self._trace > 1: print chart.pp_edge(e,w) + if self._trace > 1: print(chart.pp_edge(e,w)) if self._trace == 1 and edges_added_by_rule > 0: - print ' - Added %d edges' % edges_added_by_rule + print(' - Added %d edges' % edges_added_by_rule) edges_added += edges_added_by_rule # Return a list of complete parses. @@ -1438,14 +1438,14 @@ def step(self): added with the current strategy and grammar. """ if self._chart is None: - raise ValueError, 'Parser must be initialized first' + raise ValueError('Parser must be initialized first') while 1: self._restart = False w = 50/(self._chart.num_leaves()+1) for e in self._parse(): - if self._trace > 1: print self._current_chartrule - if self._trace > 0: print self._chart.pp_edge(e,w) + if self._trace > 1: print(self._current_chartrule) + if self._trace > 0: print(self._chart.pp_edge(e,w)) yield e if self._restart: break else: @@ -1579,23 +1579,23 @@ def demo(): # Tokenize a sample sentence. sent = 'I saw John with a dog with my cookie' - print "Sentence:\n", sent + print("Sentence:\n", sent) from nltk import tokenize tokens = list(tokenize.whitespace(sent)) - print tokens + print(tokens) # Ask the user which parser to test - print ' 1: Top-down chart parser' - print ' 2: Bottom-up chart parser' - print ' 3: Earley parser' - print ' 4: Stepping chart parser (alternating top-down & bottom-up)' - print ' 5: All parsers' - print '\nWhich parser (1-5)? ', + print(' 1: Top-down chart parser') + print(' 2: Bottom-up chart parser') + print(' 3: Earley parser') + print(' 4: Stepping chart parser (alternating top-down & bottom-up)') + print(' 5: All parsers') + print('\nWhich parser (1-5)? ', end=' ') choice = sys.stdin.readline().strip() - print + print() if choice not in '12345': - print 'Bad parser number' + print('Bad parser number') return # Keep track of how long each parser takes. @@ -1608,7 +1608,7 @@ def demo(): parses = cp.get_parse_list(tokens) times['top down'] = time.time()-t assert len(parses)==5, 'Not all parses found' - for tree in parses: print tree + for tree in parses: print(tree) # Run the bottom-up parser, if requested. if choice in ('2', '5'): @@ -1617,7 +1617,7 @@ def demo(): parses = cp.get_parse_list(tokens) times['bottom up'] = time.time()-t assert len(parses)==5, 'Not all parses found' - for tree in parses: print tree + for tree in parses: print(tree) # Run the earley, if requested. if choice in ('3', '5'): @@ -1626,7 +1626,7 @@ def demo(): parses = cp.get_parse_list(tokens) times['Earley parser'] = time.time()-t assert len(parses)==5, 'Not all parses found' - for tree in parses: print tree + for tree in parses: print(tree) # Run the stepping parser, if requested. if choice in ('4', '5'): @@ -1634,24 +1634,24 @@ def demo(): cp = SteppingChartParse(grammar, trace=1) cp.initialize(tokens) for i in range(5): - print '*** SWITCH TO TOP DOWN' + print('*** SWITCH TO TOP DOWN') cp.set_strategy(TD_STRATEGY) for j, e in enumerate(cp.step()): if j>20 or e is None: break - print '*** SWITCH TO BOTTOM UP' + print('*** SWITCH TO BOTTOM UP') cp.set_strategy(BU_STRATEGY) for j, e in enumerate(cp.step()): if j>20 or e is None: break times['stepping'] = time.time()-t assert len(cp.parses())==5, 'Not all parses found' - for parse in cp.parses(): print parse + for parse in cp.parses(): print(parse) # Print the times of all parsers: - maxlen = max(len(key) for key in times.keys()) - format = '%' + `maxlen` + 's parser: %6.3fsec' - times_items = times.items() + maxlen = max(len(key) for key in list(times.keys())) + format = '%' + repr(maxlen) + 's parser: %6.3fsec' + times_items = list(times.items()) times_items.sort(lambda a,b:cmp(a[1], b[1])) for (parser, t) in times_items: - print format % (parser, t) + print(format % (parser, t)) if __name__ == '__main__': demo() diff --git a/nltk_contrib/mit/six863/semantics/featurechart.py b/nltk_contrib/mit/six863/semantics/featurechart.py index 9b73066..731b10c 100644 --- a/nltk_contrib/mit/six863/semantics/featurechart.py +++ b/nltk_contrib/mit/six863/semantics/featurechart.py @@ -13,11 +13,11 @@ """ import yaml -from chart import * -from category import * -import cfg +from .chart import * +from .category import * +from . import cfg -from featurelite import * +from .featurelite import * def load_earley(filename, trace=1): """ @@ -125,7 +125,7 @@ def rhs(self): @return: the value of the right-hand side with variables set. @rtype: C{Category} """ - return tuple(apply(x, self._vars) for x in TreeEdge.rhs(self)) + return tuple(x(*self._vars) for x in TreeEdge.rhs(self)) def orig_rhs(self): """ @@ -160,7 +160,7 @@ def apply_iter(self, chart, grammar, left_edge, right_edge): left_bindings = left_edge.vars().copy() right_bindings = right_edge.vars().copy() try: - unified = unify(left_edge.next(), right_edge.lhs(), left_bindings, + unified = unify(next(left_edge), right_edge.lhs(), left_bindings, right_bindings, memo=self.unify_memo, trace=self.trace-2) if isinstance(unified, Category): unified.freeze() except UnificationFailure: return @@ -213,7 +213,7 @@ def apply_iter(self, chart, grammar, edge): for prod in grammar.productions(): bindings = edge.vars().copy() try: - unified = unify(edge.next(), prod.lhs(), bindings, {}, + unified = unify(next(edge), prod.lhs(), bindings, {}, memo=self.unify_memo, trace=self.trace-2) if isinstance(unified, Category): unified.freeze() except UnificationFailure: @@ -258,7 +258,7 @@ def get_parse_list(self, tokens): # Width, for printing trace edges. #w = 40/(chart.num_leaves()+1) w = 2 - if self._trace > 0: print ' '*9, chart.pp_leaves(w) + if self._trace > 0: print((' '*9, chart.pp_leaves(w))) # Initialize the chart with a special "starter" edge. root = GrammarCategory(pos='[INIT]') @@ -272,7 +272,7 @@ def get_parse_list(self, tokens): #scanner = FeatureScannerRule(self._lexicon) for end in range(chart.num_leaves()+1): - if self._trace > 1: print 'Processing queue %d' % end + if self._trace > 1: print(('Processing queue %d' % end)) # Scanner rule substitute, i.e. this is being used in place # of a proper FeatureScannerRule at the moment. @@ -285,14 +285,14 @@ def get_parse_list(self, tokens): {}) chart.insert(new_pos_edge, (new_leaf_edge,)) if self._trace > 0: - print 'Scanner ', chart.pp_edge(new_pos_edge,w) + print(('Scanner ', chart.pp_edge(new_pos_edge,w))) for edge in chart.select(end=end): if edge.is_incomplete(): for e in predictor.apply(chart, grammar, edge): if self._trace > 1: - print 'Predictor', chart.pp_edge(e,w) + print(('Predictor', chart.pp_edge(e,w))) #if edge.is_incomplete(): # for e in scanner.apply(chart, grammar, edge): # if self._trace > 0: @@ -300,7 +300,7 @@ def get_parse_list(self, tokens): if edge.is_complete(): for e in completer.apply(chart, grammar, edge): if self._trace > 0: - print 'Completer', chart.pp_edge(e,w) + print(('Completer', chart.pp_edge(e,w))) # Output a list of complete parses. return chart.parses(root) @@ -348,14 +348,14 @@ def lexicon(word): return earley_lexicon.get(word.upper(), []) sent = 'I saw John with a dog with my cookie' - print "Sentence:\n", sent + print(("Sentence:\n", sent)) from nltk import tokenize tokens = list(tokenize.whitespace(sent)) t = time.time() cp = FeatureEarleyChartParse(earley_grammar, lexicon, trace=1) trees = cp.get_parse_list(tokens) - print "Time: %s" % (time.time() - t) - for tree in trees: print tree + print(("Time: %s" % (time.time() - t))) + for tree in trees: print(tree) def run_profile(): import profile diff --git a/nltk_contrib/mit/six863/semantics/featurelite.py b/nltk_contrib/mit/six863/semantics/featurelite.py index 60f8c0e..3dbb6dd 100644 --- a/nltk_contrib/mit/six863/semantics/featurelite.py +++ b/nltk_contrib/mit/six863/semantics/featurelite.py @@ -91,7 +91,7 @@ def isMapping(obj): class FeatureI(object): def __init__(self): - raise TypeError, "FeatureI is an abstract interface" + raise TypeError("FeatureI is an abstract interface") class _FORWARD(object): """ @@ -102,7 +102,7 @@ class _FORWARD(object): instantiated. """ def __init__(self): - raise TypeError, "The _FORWARD class is not meant to be instantiated" + raise TypeError("The _FORWARD class is not meant to be instantiated") class Variable(object): """ @@ -260,7 +260,7 @@ def substitute_bindings(self, bindings): # discard Variables which don't look like FeatureVariables if varstr.startswith('?'): var = makevar(varstr) - if bindings.has_key(var.name()): + if var.name() in bindings: newval = newval.replace(semvar, bindings[var.name()]) return newval @@ -278,13 +278,13 @@ def object_to_features(obj): if isMapping(obj): return obj dict = {} dict['__class__'] = obj.__class__.__name__ - for (key, value) in obj.__dict__.items(): + for (key, value) in list(obj.__dict__.items()): dict[key] = object_to_features(value) return dict def variable_representer(dumper, var): "Output variables in YAML as ?name." - return dumper.represent_scalar(u'!var', u'?%s' % var.name()) + return dumper.represent_scalar('!var', '?%s' % var.name()) yaml.add_representer(Variable, variable_representer) def variable_constructor(loader, node): @@ -292,8 +292,8 @@ def variable_constructor(loader, node): value = loader.construct_scalar(node) name = value[1:] return Variable(name) -yaml.add_constructor(u'!var', variable_constructor) -yaml.add_implicit_resolver(u'!var', re.compile(r'^\?\w+$')) +yaml.add_constructor('!var', variable_constructor) +yaml.add_implicit_resolver('!var', re.compile(r'^\?\w+$')) def _copy_and_bind(feature, bindings, memo=None): """ @@ -305,14 +305,14 @@ def _copy_and_bind(feature, bindings, memo=None): if memo is None: memo = {} if id(feature) in memo: return memo[id(feature)] if isinstance(feature, Variable) and bindings is not None: - if not bindings.has_key(feature.name()): + if feature.name() not in bindings: bindings[feature.name()] = feature.copy() result = _copy_and_bind(bindings[feature.name()], None, memo) else: if isMapping(feature): # Construct a new object of the same class result = feature.__class__() - for (key, value) in feature.items(): + for (key, value) in list(feature.items()): result[key] = _copy_and_bind(value, bindings, memo) elif isinstance(feature, SubstituteBindingsI): if bindings is not None: @@ -629,19 +629,19 @@ def failerror(f1, f2): if memo is None: memo = {} copymemo = {} - if memo.has_key((id(feature1), id(feature2))): + if (id(feature1), id(feature2)) in memo: result = memo[id(feature1), id(feature2)] if result is UnificationFailure: if trace > 2: - print '(cached) Unifying: %r + %r --> [fail]' % (feature1, feature2) + print('(cached) Unifying: %r + %r --> [fail]' % (feature1, feature2)) raise result() if trace > 2: - print '(cached) Unifying: %r + %r --> ' % (feature1, feature2), - print repr(result) + print('(cached) Unifying: %r + %r --> ' % (feature1, feature2), end=' ') + print(repr(result)) return result if trace > 1: - print 'Unifying: %r + %r --> ' % (feature1, feature2), + print('Unifying: %r + %r --> ' % (feature1, feature2), end=' ') # Make copies of the two structures (since the unification algorithm is # destructive). Use the same memo, to preserve reentrance links between @@ -650,7 +650,7 @@ def failerror(f1, f2): copy2 = _copy_and_bind(feature2, bindings2, copymemo) # Preserve links between bound variables and the two feature structures. for b in (bindings1, bindings2): - for (vname, value) in b.items(): + for (vname, value) in list(b.items()): value_id = id(value) if value_id in copymemo: b[vname] = copymemo[value_id] @@ -660,7 +660,7 @@ def failerror(f1, f2): unified = _destructively_unify(copy1, copy2, bindings1, bindings2, memo, fail) except UnificationFailure: - if trace > 1: print '[fail]' + if trace > 1: print('[fail]') memo[id(feature1), id(feature2)] = UnificationFailure raise @@ -672,9 +672,9 @@ def failerror(f1, f2): _lookup_values(bindings2, {}, remove=True) if trace > 1: - print repr(unified) + print(repr(unified)) elif trace > 0: - print 'Unifying: %r + %r --> %r' % (feature1, feature2, repr(unified)) + print('Unifying: %r + %r --> %r' % (feature1, feature2, repr(unified))) memo[id(feature1), id(feature2)] = unified return unified @@ -690,11 +690,11 @@ def _destructively_unify(feature1, feature2, bindings1, bindings2, memo, fail, and C{other} are undefined. """ if depth > 50: - print "Infinite recursion in this unification:" - print show(dict(feature1=feature1, feature2=feature2, - bindings1=bindings1, bindings2=bindings2, memo=memo)) - raise ValueError, "Infinite recursion in unification" - if memo.has_key((id(feature1), id(feature2))): + print("Infinite recursion in this unification:") + print(show(dict(feature1=feature1, feature2=feature2, + bindings1=bindings1, bindings2=bindings2, memo=memo))) + raise ValueError("Infinite recursion in unification") + if (id(feature1), id(feature2)) in memo: result = memo[id(feature1), id(feature2)] if result is UnificationFailure: raise result() unified = _do_unify(feature1, feature2, bindings1, bindings2, memo, fail, @@ -737,9 +737,9 @@ def _do_unify(feature1, feature2, bindings1, bindings2, memo, fail, depth=0): # At this point, we know they're both mappings. # Do the destructive part of unification. - while feature2.has_key(_FORWARD): feature2 = feature2[_FORWARD] + while _FORWARD in feature2: feature2 = feature2[_FORWARD] if feature1 is not feature2: feature2[_FORWARD] = feature1 - for (fname, val2) in feature2.items(): + for (fname, val2) in list(feature2.items()): if fname == _FORWARD: continue val1 = feature1.get(fname) feature1[fname] = _destructively_unify(val1, val2, bindings1, @@ -752,12 +752,12 @@ def _apply_forwards(feature, visited): the target of its forward pointer (to preserve reentrance). """ if not isMapping(feature): return - if visited.has_key(id(feature)): return + if id(feature) in visited: return visited[id(feature)] = True - for fname, fval in feature.items(): + for fname, fval in list(feature.items()): if isMapping(fval): - while fval.has_key(_FORWARD): + while _FORWARD in fval: fval = fval[_FORWARD] feature[fname] = fval _apply_forwards(fval, visited) @@ -789,10 +789,10 @@ def _lookup_values(mapping, visited, remove=False): else: return var.forwarded_self() if not isMapping(mapping): return mapping - if visited.has_key(id(mapping)): return mapping + if id(mapping) in visited: return mapping visited[id(mapping)] = True - for fname, fval in mapping.items(): + for fname, fval in list(mapping.items()): if isMapping(fval): _lookup_values(fval, visited) elif isinstance(fval, Variable): @@ -813,9 +813,9 @@ def _apply_forwards_to_bindings(bindings): Replace any feature structures that have been forwarded by their new identities. """ - for (key, value) in bindings.items(): - if isMapping(value) and value.has_key(_FORWARD): - while value.has_key(_FORWARD): + for (key, value) in list(bindings.items()): + if isMapping(value) and _FORWARD in value: + while _FORWARD in value: value = value[_FORWARD] bindings[key] = value diff --git a/nltk_contrib/mit/six863/semantics/interact.py b/nltk_contrib/mit/six863/semantics/interact.py index 2ca0f9a..234bd18 100644 --- a/nltk_contrib/mit/six863/semantics/interact.py +++ b/nltk_contrib/mit/six863/semantics/interact.py @@ -1,5 +1,5 @@ -from featurechart import * -from logic import Counter +from .featurechart import * +from .logic import Counter import sys def interact(grammar_filename, trace=2): @@ -14,10 +14,10 @@ def interact(grammar_filename, trace=2): # Read a line and parse it. trees = cp.parse(line) if len(trees) == 0: - print "I don't understand." + print("I don't understand.") continue elif len(trees) > 1: - print "That was ambiguous, but I'll guess at what you meant." + print("That was ambiguous, but I'll guess at what you meant.") # Extract semantic information from the parse tree. tree = trees[0] @@ -36,13 +36,13 @@ def interact(grammar_filename, trace=2): skolem = skolem.replace_unique(var, counter) if trace > 0: - print tree - print 'Semantic value:', skolem + print(tree) + print(('Semantic value:', skolem)) clauses = skolem.clauses() if trace > 1: - print "Got these clauses:" + print("Got these clauses:") for clause in clauses: - print '\t', clause + print(('\t', clause)) if pos == 'S': # Handle statements @@ -68,11 +68,11 @@ def interact(grammar_filename, trace=2): if success: # answer answer = bindings.get('wh', 'Yes.') - print answer['variable']['name'] + print((answer['variable']['name'])) else: # This is an open world without negation, so negative answers # aren't possible. - print "I don't know." + print("I don't know.") def demo(): interact('lab3-slash.cfg', trace=2) diff --git a/nltk_contrib/mit/six863/semantics/logic.py b/nltk_contrib/mit/six863/semantics/logic.py index 4fe71b8..430fae8 100644 --- a/nltk_contrib/mit/six863/semantics/logic.py +++ b/nltk_contrib/mit/six863/semantics/logic.py @@ -1,7 +1,7 @@ # Natural Language Toolkit: Logic from nltk.utilities import Counter -from featurelite import SubstituteBindingsMixin, FeatureI -from featurelite import Variable as FeatureVariable +from .featurelite import SubstituteBindingsMixin, FeatureI +from .featurelite import Variable as FeatureVariable _counter = Counter() def unique_variable(counter=None): @@ -137,7 +137,7 @@ def __repr__(self): raise NotImplementedError def __hash__(self): - raise NotImplementedError, self.__class__ + raise NotImplementedError(self.__class__) def normalize(self): if hasattr(self, '_normalized'): return self._normalized @@ -612,7 +612,7 @@ def parse(self, data): @returns: a parsed Expression """ self.feed(data) - result = self.next() + result = next(self) return result def process(self): @@ -629,7 +629,7 @@ def token(self, destructive=1): whether the token will be removed from the buffer; setting it to 0 gives lookahead capability.""" if self.buffer == '': - raise Error, "end of stream" + raise Error("end of stream") tok = None buffer = self.buffer while not tok: @@ -654,7 +654,7 @@ def isVariable(self, token): TOKENS.extend(Parser.BOOL) return token not in TOKENS - def next(self): + def __next__(self): """Parse the next complete expression from the stream and return it.""" tok = self.token() @@ -678,8 +678,8 @@ def next(self): tok = self.token() if tok != Parser.DOT: - raise Error, "parse error, unexpected token: %s" % tok - term = self.next() + raise Error("parse error, unexpected token: %s" % tok) + term = next(self) accum = factory(Variable(vars.pop()), term) while vars: accum = factory(Variable(vars.pop()), accum) @@ -687,12 +687,12 @@ def next(self): elif tok == Parser.OPEN: # Expression is an application expression: (M N) - first = self.next() - second = self.next() + first = next(self) + second = next(self) exps = [] while self.token(0) != Parser.CLOSE: # Support expressions like: (M N P) == ((M N) P) - exps.append(self.next()) + exps.append(next(self)) tok = self.token() # swallow the close token assert tok == Parser.CLOSE if isinstance(second, Operator): @@ -721,7 +721,7 @@ def next(self): # Expression is a simple variable expression: x return VariableExpression(Variable(tok)) else: - raise Error, "parse error, unexpected token: %s" % tok + raise Error("parse error, unexpected token: %s" % tok) # This is intended to be overridden, so that you can derive a Parser class # that constructs expressions using your subclasses. So far we only need @@ -762,7 +762,7 @@ def expressions(): ApplicationExpression(XZ, Y)))) O = LambdaExpression(x, LambdaExpression(y, XY)) N = ApplicationExpression(LambdaExpression(x, XA), I) - T = Parser('\\x y.(x y z)').next() + T = next(Parser('\\x y.(x y z)')) return [X, XZ, XYZ, I, K, L, S, B, C, O, N, T] def demo(): @@ -771,21 +771,21 @@ def demo(): P = VariableExpression(p) Q = VariableExpression(q) for l in expressions(): - print "Expression:", l - print "Variables:", l.variables() - print "Free:", l.free() - print "Subterms:", l.subterms() - print "Simplify:",l.simplify() + print(("Expression:", l)) + print(("Variables:", l.variables())) + print(("Free:", l.free())) + print(("Subterms:", l.subterms())) + print(("Simplify:",l.simplify())) la = ApplicationExpression(ApplicationExpression(l, P), Q) las = la.simplify() - print "Apply and simplify: %s -> %s" % (la, las) - ll = Parser(str(l)).next() - print 'l is:', l - print 'll is:', ll + print(("Apply and simplify: %s -> %s" % (la, las))) + ll = next(Parser(str(l))) + print(('l is:', l)) + print(('ll is:', ll)) assert l.equals(ll) - print "Serialize and reparse: %s -> %s" % (l, ll) - print "Variables:", ll.variables() - print "Normalize: %s" % ll.normalize() + print(("Serialize and reparse: %s -> %s" % (l, ll))) + print(("Variables:", ll.variables())) + print(("Normalize: %s" % ll.normalize())) if __name__ == '__main__': diff --git a/nltk_contrib/mit/six863/semantics/test.py b/nltk_contrib/mit/six863/semantics/test.py index 5aac91e..45d6068 100644 --- a/nltk_contrib/mit/six863/semantics/test.py +++ b/nltk_contrib/mit/six863/semantics/test.py @@ -1,14 +1,14 @@ -from featurechart import * -from treeview import * +from .featurechart import * +from .treeview import * def demo(): cp = load_earley('lab3-slash.cfg', trace=0) trees = cp.parse('Mary sees a dog in Noosa') for tree in trees: - print tree + print(tree) sem = tree[0].node['sem'] - print sem - print sem.skolemise().clauses() + print(sem) + print((sem.skolemise().clauses())) return sem.skolemise().clauses() #run_profile() diff --git a/nltk_contrib/mit/six863/semantics/testw.py b/nltk_contrib/mit/six863/semantics/testw.py index 1893945..3e3b243 100644 --- a/nltk_contrib/mit/six863/semantics/testw.py +++ b/nltk_contrib/mit/six863/semantics/testw.py @@ -1,14 +1,14 @@ -from featurechart import * -from treeview import * +from .featurechart import * +from .treeview import * def demo(): cp = load_earley('lab3-slash.cfg', trace=1) trees = cp.parse('Mary walks') for tree in trees: - print tree + print(tree) sem = tree[0].node['sem'] - print sem - print sem.skolemise().clauses() + print(sem) + print((sem.skolemise().clauses())) return sem.skolemise().clauses() #run_profile() diff --git a/nltk_contrib/mit/six863/semantics/treeview.py b/nltk_contrib/mit/six863/semantics/treeview.py index b932d41..2608d54 100644 --- a/nltk_contrib/mit/six863/semantics/treeview.py +++ b/nltk_contrib/mit/six863/semantics/treeview.py @@ -1,4 +1,4 @@ -import Tkinter +import tkinter from nltk.draw import TreeWidget from nltk.draw import CanvasFrame @@ -7,32 +7,32 @@ class TreeView: def __init__(self, trees, root=None): if len(trees) == 0: - print "No trees to display." + print("No trees to display.") return newroot = False if root is None: - root = Tkinter.Tk() + root = tkinter.Tk() window = root newroot = True else: - window = Tkinter.Toplevel(root) + window = tkinter.Toplevel(root) window.title("Parse Tree") window.geometry("600x400") self.cf = CanvasFrame(window) self.cf.pack(side='top', expand=1, fill='both') - buttons = Tkinter.Frame(window) + buttons = tkinter.Frame(window) buttons.pack(side='bottom', fill='x') - self.spin = Tkinter.Spinbox(buttons, from_=1, to=len(trees), + self.spin = tkinter.Spinbox(buttons, from_=1, to=len(trees), command=self.showtree, width=3) if len(trees) > 1: self.spin.pack(side='left') - self.label = Tkinter.Label(buttons, text="of %d" % len(trees)) + self.label = tkinter.Label(buttons, text="of %d" % len(trees)) if len(trees) > 1: self.label.pack(side='left') - self.done = Tkinter.Button(buttons, text="Done", command=window.destroy) + self.done = tkinter.Button(buttons, text="Done", command=window.destroy) self.done.pack(side='right') - self.printps = Tkinter.Button(buttons, text="Print to Postscript", command=self.cf.print_to_file) + self.printps = tkinter.Button(buttons, text="Print to Postscript", command=self.cf.print_to_file) self.printps.pack(side='right') self.trees = trees diff --git a/nltk_contrib/mit/six863/tagging/drawchart.py b/nltk_contrib/mit/six863/tagging/drawchart.py index 7a050c4..5aeb577 100644 --- a/nltk_contrib/mit/six863/tagging/drawchart.py +++ b/nltk_contrib/mit/six863/tagging/drawchart.py @@ -39,8 +39,8 @@ # widget system. import pickle -from tkFileDialog import asksaveasfilename, askopenfilename -import Tkinter, tkFont, tkMessageBox +from tkinter.filedialog import asksaveasfilename, askopenfilename +import tkinter, tkinter.font, tkinter.messagebox import math import os.path @@ -103,12 +103,12 @@ def __init__(self, parent, chart, toplevel=True, title='Chart Matrix', self._selected_cell = None if toplevel: - self._root = Tkinter.Toplevel(parent) + self._root = tkinter.Toplevel(parent) self._root.title(title) self._root.bind('', self.destroy) self._init_quit(self._root) else: - self._root = Tkinter.Frame(parent) + self._root = tkinter.Frame(parent) self._init_matrix(self._root) self._init_list(self._root) @@ -124,18 +124,18 @@ def __init__(self, parent, chart, toplevel=True, title='Chart Matrix', self.draw() def _init_quit(self, root): - quit = Tkinter.Button(root, text='Quit', command=self.destroy) + quit = tkinter.Button(root, text='Quit', command=self.destroy) quit.pack(side='bottom', expand=0, fill='none') def _init_matrix(self, root): - cframe = Tkinter.Frame(root, border=2, relief='sunken') + cframe = tkinter.Frame(root, border=2, relief='sunken') cframe.pack(expand=0, fill='none', padx=1, pady=3, side='top') - self._canvas = Tkinter.Canvas(cframe, width=200, height=200, + self._canvas = tkinter.Canvas(cframe, width=200, height=200, background='white') self._canvas.pack(expand=0, fill='none') def _init_numedges(self, root): - self._numedges_label = Tkinter.Label(root, text='0 edges') + self._numedges_label = tkinter.Label(root, text='0 edges') self._numedges_label.pack(expand=0, fill='none', side='top') def _init_list(self, root): @@ -212,8 +212,8 @@ def remove_callback(self, event, func=None): except: pass def _fire_callbacks(self, event, *args): - if not self._callbacks.has_key(event): return - for cb_func in self._callbacks[event].keys(): cb_func(*args) + if event not in self._callbacks: return + for cb_func in list(self._callbacks[event].keys()): cb_func(*args) def select_cell(self, i, j): if self._root is None: return @@ -274,9 +274,9 @@ def draw(self): # Labels and dotted lines for i in range(N): c.create_text(LEFT_MARGIN-2, i*dy+dy/2+TOP_MARGIN, - text=`i`, anchor='e') + text=repr(i), anchor='e') c.create_text(i*dx+dx/2+LEFT_MARGIN, N*dy+TOP_MARGIN+1, - text=`i`, anchor='n') + text=repr(i), anchor='n') c.create_line(LEFT_MARGIN, dy*(i+1)+TOP_MARGIN, dx*N+LEFT_MARGIN, dy*(i+1)+TOP_MARGIN, dash='.') c.create_line(dx*i+LEFT_MARGIN, TOP_MARGIN, @@ -327,21 +327,21 @@ def __init__(self, parent, chart, grammar, toplevel=True): self._selectbox = None if toplevel: - self._root = Tkinter.Toplevel(parent) + self._root = tkinter.Toplevel(parent) self._root.title('Chart Parsing Demo: Results') self._root.bind('', self.destroy) else: - self._root = Tkinter.Frame(parent) + self._root = tkinter.Frame(parent) # Buttons if toplevel: - buttons = Tkinter.Frame(self._root) + buttons = tkinter.Frame(self._root) buttons.pack(side='bottom', expand=0, fill='x') - Tkinter.Button(buttons, text='Quit', + tkinter.Button(buttons, text='Quit', command=self.destroy).pack(side='right') - Tkinter.Button(buttons, text='Print All', + tkinter.Button(buttons, text='Print All', command=self.print_all).pack(side='left') - Tkinter.Button(buttons, text='Print Selection', + tkinter.Button(buttons, text='Print Selection', command=self.print_selection).pack(side='left') # Canvas frame. @@ -404,7 +404,7 @@ def print_all(self, *e): def print_selection(self, *e): if self._root is None: return if self._selection is None: - tkMessageBox.showerror('Print Error', 'No tree selected') + tkinter.messagebox.showerror('Print Error', 'No tree selected') else: c = self._cframe.canvas() for widget in self._treewidgets: @@ -509,7 +509,7 @@ def __init__(self, *chart_filenames): self._operator = None # Set up the root window. - self._root = Tkinter.Tk() + self._root = tkinter.Tk() self._root.title('Chart Comparison') self._root.bind('', self.destroy) self._root.bind('', self.destroy) @@ -540,10 +540,10 @@ def mainloop(self, *args, **kwargs): #//////////////////////////////////////////////////////////// def _init_menubar(self, root): - menubar = Tkinter.Menu(root) + menubar = tkinter.Menu(root) # File menu - filemenu = Tkinter.Menu(menubar, tearoff=0) + filemenu = tkinter.Menu(menubar, tearoff=0) filemenu.add_command(label='Load Chart', accelerator='Ctrl-o', underline=0, command=self.load_chart_dialog) filemenu.add_command(label='Save Output', accelerator='Ctrl-s', @@ -554,7 +554,7 @@ def _init_menubar(self, root): menubar.add_cascade(label='File', underline=0, menu=filemenu) # Compare menu - opmenu = Tkinter.Menu(menubar, tearoff=0) + opmenu = tkinter.Menu(menubar, tearoff=0) opmenu.add_command(label='Intersection', command=self._intersection, accelerator='+') @@ -573,21 +573,21 @@ def _init_menubar(self, root): self._root.config(menu=menubar) def _init_divider(self, root): - divider = Tkinter.Frame(root, border=2, relief='sunken') + divider = tkinter.Frame(root, border=2, relief='sunken') divider.pack(side='top', fill='x', ipady=2) def _init_chartviews(self, root): opfont=('symbol', -36) # Font for operator. eqfont=('helvetica', -36) # Font for equals sign. - frame = Tkinter.Frame(root, background='#c0c0c0') + frame = tkinter.Frame(root, background='#c0c0c0') frame.pack(side='top', expand=1, fill='both') # The left matrix. - cv1_frame = Tkinter.Frame(frame, border=3, relief='groove') + cv1_frame = tkinter.Frame(frame, border=3, relief='groove') cv1_frame.pack(side='left', padx=8, pady=7, expand=1, fill='both') self._left_selector = MutableOptionMenu( - cv1_frame, self._charts.keys(), command=self._select_left) + cv1_frame, list(self._charts.keys()), command=self._select_left) self._left_selector.pack(side='top', pady=5, fill='x') self._left_matrix = ChartMatrixView(cv1_frame, self._emptychart, toplevel=False, @@ -599,15 +599,15 @@ def _init_chartviews(self, root): self._left_matrix.inactivate() # The operator. - self._op_label = Tkinter.Label(frame, text=' ', width=3, + self._op_label = tkinter.Label(frame, text=' ', width=3, background='#c0c0c0', font=opfont) self._op_label.pack(side='left', padx=5, pady=5) # The right matrix. - cv2_frame = Tkinter.Frame(frame, border=3, relief='groove') + cv2_frame = tkinter.Frame(frame, border=3, relief='groove') cv2_frame.pack(side='left', padx=8, pady=7, expand=1, fill='both') self._right_selector = MutableOptionMenu( - cv2_frame, self._charts.keys(), command=self._select_right) + cv2_frame, list(self._charts.keys()), command=self._select_right) self._right_selector.pack(side='top', pady=5, fill='x') self._right_matrix = ChartMatrixView(cv2_frame, self._emptychart, toplevel=False, @@ -619,13 +619,13 @@ def _init_chartviews(self, root): self._right_matrix.inactivate() # The equals sign - Tkinter.Label(frame, text='=', width=3, background='#c0c0c0', + tkinter.Label(frame, text='=', width=3, background='#c0c0c0', font=eqfont).pack(side='left', padx=5, pady=5) # The output matrix. - out_frame = Tkinter.Frame(frame, border=3, relief='groove') + out_frame = tkinter.Frame(frame, border=3, relief='groove') out_frame.pack(side='left', padx=8, pady=7, expand=1, fill='both') - self._out_label = Tkinter.Label(out_frame, text='Output') + self._out_label = tkinter.Label(out_frame, text='Output') self._out_label.pack(side='top', pady=9) self._out_matrix = ChartMatrixView(out_frame, self._emptychart, toplevel=False, @@ -637,19 +637,19 @@ def _init_chartviews(self, root): self._out_matrix.inactivate() def _init_buttons(self, root): - buttons = Tkinter.Frame(root) + buttons = tkinter.Frame(root) buttons.pack(side='bottom', pady=5, fill='x', expand=0) - Tkinter.Button(buttons, text='Intersection', + tkinter.Button(buttons, text='Intersection', command=self._intersection).pack(side='left') - Tkinter.Button(buttons, text='Union', + tkinter.Button(buttons, text='Union', command=self._union).pack(side='left') - Tkinter.Button(buttons, text='Difference', + tkinter.Button(buttons, text='Difference', command=self._difference).pack(side='left') - Tkinter.Frame(buttons, width=20).pack(side='left') - Tkinter.Button(buttons, text='Swap Charts', + tkinter.Frame(buttons, width=20).pack(side='left') + tkinter.Button(buttons, text='Swap Charts', command=self._swapcharts).pack(side='left') - Tkinter.Button(buttons, text='Detatch Output', + tkinter.Button(buttons, text='Detatch Output', command=self._detatch_out).pack(side='right') def _init_bindings(self, root): @@ -692,8 +692,8 @@ def save_chart_dialog(self, *args): defaultextension='.pickle') if not filename: return try: pickle.dump((self._out_chart), open(filename, 'w')) - except Exception, e: - tkMessageBox.showerror('Error Saving Chart', + except Exception as e: + tkinter.messagebox.showerror('Error Saving Chart', 'Unable to open file: %r\n%s' % (filename, e)) @@ -702,8 +702,8 @@ def load_chart_dialog(self, *args): defaultextension='.pickle') if not filename: return try: self.load_chart(filename) - except Exception, e: - tkMessageBox.showerror('Error Loading Chart', + except Exception as e: + tkinter.messagebox.showerror('Error Loading Chart', 'Unable to open file: %r\n%s' % (filename, e)) @@ -925,12 +925,12 @@ def __init__(self, chart, root=None, **kw): # If they didn't provide a main window, then set one up. if root is None: - top = Tkinter.Tk() + top = tkinter.Tk() top.title('Chart View') def destroy1(e, top=top): top.destroy() def destroy2(top=top): top.destroy() top.bind('q', destroy1) - b = Tkinter.Button(top, text='Done', command=destroy2) + b = tkinter.Button(top, text='Done', command=destroy2) b.pack(side='bottom') self._root = top else: @@ -947,9 +947,9 @@ def destroy2(top=top): top.destroy() # Create the sentence canvas. if draw_sentence: - cframe = Tkinter.Frame(self._root, relief='sunk', border=2) + cframe = tkinter.Frame(self._root, relief='sunk', border=2) cframe.pack(fill='both', side='bottom') - self._sentence_canvas = Tkinter.Canvas(cframe, height=50) + self._sentence_canvas = tkinter.Canvas(cframe, height=50) self._sentence_canvas['background'] = '#e0e0e0' self._sentence_canvas.pack(fill='both') #self._sentence_canvas['height'] = self._sentence_height @@ -976,12 +976,12 @@ def destroy2(top=top): top.destroy() def _init_fonts(self, root): - self._boldfont = tkFont.Font(family='helvetica', weight='bold', + self._boldfont = tkinter.font.Font(family='helvetica', weight='bold', size=self._fontsize) - self._font = tkFont.Font(family='helvetica', + self._font = tkinter.font.Font(family='helvetica', size=self._fontsize) # See: - self._sysfont = tkFont.Font(font=Tkinter.Button()["font"]) + self._sysfont = tkinter.font.Font(font=tkinter.Button()["font"]) root.option_add("*Font", self._sysfont) def _sb_canvas(self, root, expand='y', @@ -989,12 +989,12 @@ def _sb_canvas(self, root, expand='y', """ Helper for __init__: construct a canvas with a scrollbar. """ - cframe =Tkinter.Frame(root, relief='sunk', border=2) + cframe =tkinter.Frame(root, relief='sunk', border=2) cframe.pack(fill=fill, expand=expand, side=side) - canvas = Tkinter.Canvas(cframe, background='#e0e0e0') + canvas = tkinter.Canvas(cframe, background='#e0e0e0') # Give the canvas a scrollbar. - sb = Tkinter.Scrollbar(cframe, orient='vertical') + sb = tkinter.Scrollbar(cframe, orient='vertical') sb.pack(side='right', fill='y') canvas.pack(side='left', fill=fill, expand='yes') @@ -1079,7 +1079,7 @@ def update(self, chart=None): self._resize() else: for edge in self._chart: - if not self._edgetags.has_key(edge): + if edge not in self._edgetags: self._add_edge(edge) self._resize() @@ -1139,7 +1139,7 @@ def _add_edge(self, edge, minlvl=0): - Find an available level - Call _draw_edge """ - if self._edgetags.has_key(edge): return + if edge in self._edgetags: return self._analyze_edge(edge) self._grow() @@ -1246,11 +1246,11 @@ def _color_edge(self, edge, linecolor=None, textcolor=None): If no colors are specified, use intelligent defaults (dependant on selection, etc.) """ - if not self._edgetags.has_key(edge): return + if edge not in self._edgetags: return c = self._chart_canvas if linecolor is not None and textcolor is not None: - if self._marks.has_key(edge): + if edge in self._marks: linecolor = self._marks[edge] tags = self._edgetags[edge] c.itemconfig(tags[0], fill=linecolor) @@ -1262,7 +1262,7 @@ def _color_edge(self, edge, linecolor=None, textcolor=None): return else: N = self._chart.num_leaves() - if self._marks.has_key(edge): + if edge in self._marks: self._color_edge(self._marks[edge]) if (edge.is_complete() and edge.span() == (0, N)): self._color_edge(edge, '#084', '#042') @@ -1283,7 +1283,7 @@ def unmark_edge(self, edge=None): Unmark an edge (or all edges) """ if edge == None: - old_marked_edges = self._marks.keys() + old_marked_edges = list(self._marks.keys()) self._marks = {} for edge in old_marked_edges: self._color_edge(edge) @@ -1379,7 +1379,7 @@ def _draw_loclines(self): c2.tag_lower(t2) t3=c3.create_line(x, 0, x, BOTTOM) c3.tag_lower(t3) - t4=c3.create_text(x+2, 0, text=`i`, anchor='nw', + t4=c3.create_text(x+2, 0, text=repr(i), anchor='nw', font=self._font) c3.tag_lower(t4) #if i % 4 == 0: @@ -1574,8 +1574,8 @@ def remove_callback(self, event, func=None): except: pass def _fire_callbacks(self, event, *args): - if not self._callbacks.has_key(event): return - for cb_func in self._callbacks[event].keys(): cb_func(*args) + if event not in self._callbacks: return + for cb_func in list(self._callbacks[event].keys()): cb_func(*args) ####################################################################### # Pseudo Earley Rule @@ -1659,14 +1659,14 @@ def __init__(self, grammar, tokens, title='Chart Parsing Demo', self._root = None try: # Create the root window. - self._root = Tkinter.Tk() + self._root = tkinter.Tk() self._root.title(title) self._root.bind('', self.destroy) # Set up some frames. - frame3 = Tkinter.Frame(self._root) - frame2 = Tkinter.Frame(self._root) - frame1 = Tkinter.Frame(self._root) + frame3 = tkinter.Frame(self._root) + frame2 = tkinter.Frame(self._root) + frame1 = tkinter.Frame(self._root) frame3.pack(side='bottom', fill='none') frame2.pack(side='bottom', fill='x') frame1.pack(side='bottom', fill='both', expand=1) @@ -1687,7 +1687,7 @@ def __init__(self, grammar, tokens, title='Chart Parsing Demo', self.reset() except: - print 'Error creating Tree View' + print('Error creating Tree View') self.destroy() raise @@ -1725,25 +1725,25 @@ def _init_parser(self, grammar, tokens): def _init_fonts(self, root): # See: - self._sysfont = tkFont.Font(font=Tkinter.Button()["font"]) + self._sysfont = tkinter.font.Font(font=tkinter.Button()["font"]) root.option_add("*Font", self._sysfont) # TWhat's our font size (default=same as sysfont) - self._size = Tkinter.IntVar(root) + self._size = tkinter.IntVar(root) self._size.set(self._sysfont.cget('size')) - self._boldfont = tkFont.Font(family='helvetica', weight='bold', + self._boldfont = tkinter.font.Font(family='helvetica', weight='bold', size=self._size.get()) - self._font = tkFont.Font(family='helvetica', + self._font = tkinter.font.Font(family='helvetica', size=self._size.get()) def _init_animation(self): # Are we stepping? (default=yes) - self._step = Tkinter.IntVar(self._root) + self._step = tkinter.IntVar(self._root) self._step.set(1) # What's our animation speed (default=fast) - self._animate = Tkinter.IntVar(self._root) + self._animate = tkinter.IntVar(self._root) self._animate.set(3) # Default speed = fast # Are we currently animating? @@ -1757,60 +1757,60 @@ def _init_chartview(self, parent): def _init_rulelabel(self, parent): ruletxt = 'Last edge generated by:' - self._rulelabel1 = Tkinter.Label(parent,text=ruletxt, + self._rulelabel1 = tkinter.Label(parent,text=ruletxt, font=self._boldfont) - self._rulelabel2 = Tkinter.Label(parent, width=40, + self._rulelabel2 = tkinter.Label(parent, width=40, relief='groove', anchor='w', font=self._boldfont) self._rulelabel1.pack(side='left') self._rulelabel2.pack(side='left') - step = Tkinter.Checkbutton(parent, variable=self._step, + step = tkinter.Checkbutton(parent, variable=self._step, text='Step') step.pack(side='right') def _init_buttons(self, parent): - frame1 = Tkinter.Frame(parent) - frame2 = Tkinter.Frame(parent) + frame1 = tkinter.Frame(parent) + frame2 = tkinter.Frame(parent) frame1.pack(side='bottom', fill='x') frame2.pack(side='top', fill='none') - Tkinter.Button(frame1, text='Reset\nParser', + tkinter.Button(frame1, text='Reset\nParser', background='#90c0d0', foreground='black', command=self.reset).pack(side='right') #Tkinter.Button(frame1, text='Pause', # background='#90c0d0', foreground='black', # command=self.pause).pack(side='left') - Tkinter.Button(frame1, text='Top Down\nStrategy', + tkinter.Button(frame1, text='Top Down\nStrategy', background='#90c0d0', foreground='black', command=self.top_down_strategy).pack(side='left') - Tkinter.Button(frame1, text='Bottom Up\nStrategy', + tkinter.Button(frame1, text='Bottom Up\nStrategy', background='#90c0d0', foreground='black', command=self.bottom_up_strategy).pack(side='left') - Tkinter.Button(frame1, text='Earley\nAlgorithm', + tkinter.Button(frame1, text='Earley\nAlgorithm', background='#90c0d0', foreground='black', command=self.earley_algorithm).pack(side='left') - Tkinter.Button(frame2, text='Top Down Init\nRule', + tkinter.Button(frame2, text='Top Down Init\nRule', background='#90f090', foreground='black', command=self.top_down_init).pack(side='left') - Tkinter.Button(frame2, text='Top Down Expand\nRule', + tkinter.Button(frame2, text='Top Down Expand\nRule', background='#90f090', foreground='black', command=self.top_down_expand).pack(side='left') - Tkinter.Button(frame2, text='Top Down Match\nRule', + tkinter.Button(frame2, text='Top Down Match\nRule', background='#90f090', foreground='black', command=self.top_down_match).pack(side='left') - Tkinter.Frame(frame2, width=20).pack(side='left') + tkinter.Frame(frame2, width=20).pack(side='left') - Tkinter.Button(frame2, text='Bottom Up Init\nRule', + tkinter.Button(frame2, text='Bottom Up Init\nRule', background='#90f090', foreground='black', command=self.bottom_up_init).pack(side='left') - Tkinter.Button(frame2, text='Bottom Up Predict\nRule', + tkinter.Button(frame2, text='Bottom Up Predict\nRule', background='#90f090', foreground='black', command=self.bottom_up).pack(side='left') - Tkinter.Frame(frame2, width=20).pack(side='left') + tkinter.Frame(frame2, width=20).pack(side='left') - Tkinter.Button(frame2, text='Fundamental\nRule', + tkinter.Button(frame2, text='Fundamental\nRule', background='#90f090', foreground='black', command=self.fundamental).pack(side='left') @@ -1844,9 +1844,9 @@ def _init_bindings(self): self._root.bind('s', lambda e,s=self._step:s.set(not s.get())) def _init_menubar(self): - menubar = Tkinter.Menu(self._root) + menubar = tkinter.Menu(self._root) - filemenu = Tkinter.Menu(menubar, tearoff=0) + filemenu = tkinter.Menu(menubar, tearoff=0) filemenu.add_command(label='Save Chart', underline=0, command=self.save_chart, accelerator='Ctrl-s') filemenu.add_command(label='Load Chart', underline=0, @@ -1863,7 +1863,7 @@ def _init_menubar(self): command=self.destroy, accelerator='Ctrl-x') menubar.add_cascade(label='File', underline=0, menu=filemenu) - editmenu = Tkinter.Menu(menubar, tearoff=0) + editmenu = tkinter.Menu(menubar, tearoff=0) editmenu.add_command(label='Edit Grammar', underline=5, command=self.edit_grammar, accelerator='Ctrl-g') @@ -1872,14 +1872,14 @@ def _init_menubar(self): accelerator='Ctrl-t') menubar.add_cascade(label='Edit', underline=0, menu=editmenu) - viewmenu = Tkinter.Menu(menubar, tearoff=0) + viewmenu = tkinter.Menu(menubar, tearoff=0) viewmenu.add_command(label='Chart Matrix', underline=6, command=self.view_matrix) viewmenu.add_command(label='Results', underline=0, command=self.view_results) menubar.add_cascade(label='View', underline=0, menu=viewmenu) - rulemenu = Tkinter.Menu(menubar, tearoff=0) + rulemenu = tkinter.Menu(menubar, tearoff=0) rulemenu.add_command(label='Top Down Strategy', underline=0, command=self.top_down_strategy, accelerator='t') @@ -1904,7 +1904,7 @@ def _init_menubar(self): command=self.fundamental) menubar.add_cascade(label='Apply', underline=0, menu=rulemenu) - animatemenu = Tkinter.Menu(menubar, tearoff=0) + animatemenu = tkinter.Menu(menubar, tearoff=0) animatemenu.add_checkbutton(label="Step", underline=0, variable=self._step, accelerator='s') @@ -1922,7 +1922,7 @@ def _init_menubar(self): accelerator='+') menubar.add_cascade(label="Animate", underline=1, menu=animatemenu) - zoommenu = Tkinter.Menu(menubar, tearoff=0) + zoommenu = tkinter.Menu(menubar, tearoff=0) zoommenu.add_radiobutton(label='Tiny', variable=self._size, underline=0, value=10, command=self.resize) zoommenu.add_radiobutton(label='Small', variable=self._size, @@ -1935,7 +1935,7 @@ def _init_menubar(self): underline=0, value=24, command=self.resize) menubar.add_cascade(label='Zoom', underline=0, menu=zoommenu) - helpmenu = Tkinter.Menu(menubar, tearoff=0) + helpmenu = tkinter.Menu(menubar, tearoff=0) helpmenu.add_command(label='About', underline=0, command=self.about) helpmenu.add_command(label='Instructions', underline=0, @@ -2010,7 +2010,7 @@ def help(self, *e): def about(self, *e): ABOUT = ("NLTK Chart Parser Demo\n"+ "Written by Edward Loper") - tkMessageBox.showinfo('About: Chart Parser Demo', ABOUT) + tkinter.messagebox.showinfo('About: Chart Parser Demo', ABOUT) #//////////////////////////////////////////////////////////// # File Menu @@ -2035,9 +2035,9 @@ def load_chart(self, *args): if self._matrix: self._matrix.deselect_cell() if self._results: self._results.set_chart(chart) self._cp.set_chart(chart) - except Exception, e: + except Exception as e: raise - tkMessageBox.showerror('Error Loading Chart', + tkinter.messagebox.showerror('Error Loading Chart', 'Unable to open file: %r' % filename) def save_chart(self, *args): @@ -2047,9 +2047,9 @@ def save_chart(self, *args): if not filename: return try: pickle.dump(self._chart, open(filename, 'w')) - except Exception, e: + except Exception as e: raise - tkMessageBox.showerror('Error Saving Chart', + tkinter.messagebox.showerror('Error Saving Chart', 'Unable to open file: %r' % filename) def load_grammar(self, *args): @@ -2063,8 +2063,8 @@ def load_grammar(self, *args): else: grammar = cfg.parse_grammar(open(filename, 'r').read()) self.set_grammar(grammar) - except Exception, e: - tkMessageBox.showerror('Error Loading Grammar', + except Exception as e: + tkinter.messagebox.showerror('Error Loading Grammar', 'Unable to open file: %r' % filename) def save_grammar(self, *args): @@ -2082,8 +2082,8 @@ def save_grammar(self, *args): for prod in start: file.write('%s\n' % prod) for prod in rest: file.write('%s\n' % prod) file.close() - except Exception, e: - tkMessageBox.showerror('Error Saving Grammar', + except Exception as e: + tkinter.messagebox.showerror('Error Saving Grammar', 'Unable to open file: %r' % filename) def reset(self, *args): @@ -2209,7 +2209,7 @@ def _animate_strategy(self, speed=1): self._root.after(20, self._animate_strategy) def _apply_strategy(self): - new_edge = self._cpstep.next() + new_edge = next(self._cpstep) if new_edge is not None: self._show_new_edge(new_edge) @@ -2281,12 +2281,12 @@ def demo(): sent = 'John ate the cake on the table' tokens = list(tokenize.whitespace(sent)) - print 'grammar= (' + print('grammar= (') for rule in grammar.productions(): - print ' ', repr(rule)+',' - print ')' - print 'tokens = %r' % tokens - print 'Calling "ChartDemo(grammar, tokens)"...' + print((' ', repr(rule)+',')) + print(')') + print(('tokens = %r' % tokens)) + print('Calling "ChartDemo(grammar, tokens)"...') ChartDemo(grammar, tokens).mainloop() if __name__ == '__main__': diff --git a/nltk_contrib/mit/six863/tagging/tagparse.py b/nltk_contrib/mit/six863/tagging/tagparse.py index b59e522..4264356 100644 --- a/nltk_contrib/mit/six863/tagging/tagparse.py +++ b/nltk_contrib/mit/six863/tagging/tagparse.py @@ -1,6 +1,6 @@ from nltk.parse import chart from nltk import cfg -from drawchart import ChartDemo +from .drawchart import ChartDemo from nltk.tokenize.regexp import wordpunct #from nltk_contrib.mit.six863.kimmo import * import re, pickle @@ -27,7 +27,7 @@ def insert_tags(thechart, tokens): match = re.match(r"PREFIX\('.*?'\)(.*?)\(.*", feat) if match: pos = match.groups()[0] else: pos = feat.split('(')[0] - print surface, pos + print((surface, pos)) leafedge = chart.LeafEdge(word, i) thechart.insert(chart.TreeEdge((i, i+1), cfg.Nonterminal(pos), [word], dot=1), [leafedge]) diff --git a/nltk_contrib/rdf/rdf.py b/nltk_contrib/rdf/rdf.py index 1077f9b..370cc37 100644 --- a/nltk_contrib/rdf/rdf.py +++ b/nltk_contrib/rdf/rdf.py @@ -27,7 +27,7 @@ def make_rdf(ns, reldict, relsym=None, verbose=False): object = sym2uri(ns, reldict['objclass'], reldict['objsym']) triple = (subject, predicate, object) if verbose: - print triple + print(triple) return triple def make_rdfs(ns, reldict): @@ -47,7 +47,7 @@ def sym2uri(base, rdfclass, sym): """ Build a URI out of a base, a class term, and a symbol. """ - from urllib import quote + from urllib.parse import quote from rdflib import Namespace rdfclass = class_abbrev(rdfclass) rdfclass = rdfclass.lower() diff --git a/nltk_contrib/rdf/rdfquery.py b/nltk_contrib/rdf/rdfquery.py index 4535491..e62e54b 100644 --- a/nltk_contrib/rdf/rdfquery.py +++ b/nltk_contrib/rdf/rdfquery.py @@ -86,7 +86,7 @@ def demo(): semrep = sem.root_semrep(tree) trans = SPARQLTranslator() trans.translate(semrep) - print trans.query + print((trans.query)) if __name__ == '__main__': diff --git a/nltk_contrib/rdf/rdfvizualize.py b/nltk_contrib/rdf/rdfvizualize.py index 5ace10b..323d5d2 100644 --- a/nltk_contrib/rdf/rdfvizualize.py +++ b/nltk_contrib/rdf/rdfvizualize.py @@ -66,7 +66,7 @@ def graph2dot(self, filter_edges=False): # add subjects and objects as nodes in the Dot instance for s, o in self.graph.subject_objects(): for uri in s, o: - if uri not in nodes.keys(): + if uri not in list(nodes.keys()): # generate a new node identifier node_id = "n%03d" % count nodes[uri] = node_id @@ -121,9 +121,9 @@ def serialize_demo(): try: store = ConjunctiveGraph() store.parse(FILE, format='xml') - print store.serialize(format='xml') + print((store.serialize(format='xml'))) except OSError: - print "Cannot read file '%s'" % FILE + print(("Cannot read file '%s'" % FILE)) def make_dot_demo(infile): try: @@ -133,13 +133,13 @@ def make_dot_demo(infile): v = Visualizer(store) g = v.graph2dot(filter_edges=True) g.write('%s.dot' % basename) - print "Wrote '%s.dot'" % basename + print(("Wrote '%s.dot'" % basename)) g.write_png('%s.png' % basename, prog='dot') - print "Wrote '%s.png'" % basename + print(("Wrote '%s.png'" % basename)) g.write_svg('%s.svg' % basename, prog='dot') - print "Wrote '%s.svg'" % basename + print(("Wrote '%s.svg'" % basename)) except OSError: - print "Cannot read file '%s'" % FILE + print(("Cannot read file '%s'" % FILE)) def main(): @@ -169,9 +169,9 @@ def main(): #print '*' * 30 #serialize_demo() - print - print "Visualise an rdf graph with Graphviz" - print '*' * 30 + print() + print("Visualise an rdf graph with Graphviz") + print(('*' * 30)) make_dot_demo(infile) if __name__ == '__main__': diff --git a/nltk_contrib/readability/crawler.py b/nltk_contrib/readability/crawler.py index 555088f..33ecaf1 100644 --- a/nltk_contrib/readability/crawler.py +++ b/nltk_contrib/readability/crawler.py @@ -3,7 +3,7 @@ import random import os,re -from urlextracter import * +from .urlextracter import * from sgmllib import * class Crawler: @@ -13,13 +13,13 @@ class Crawler: def crawl(self,url): self.current = url - print "Crawling " + url + print(("Crawling " + url)) try: ue = URLextracter(url) except SGMLParseError: - print "This URL contains error that can't be handled by this app.\nSorry!" - print "=" * 30 - print "Trying new random URL" + print("This URL contains error that can't be handled by this app.\nSorry!") + print(("=" * 30)) + print("Trying new random URL") self.crawl(self.urls[random.randint(1,len(self.urls))]) return @@ -30,7 +30,7 @@ def crawl(self,url): filename += part + "." filename += "txt" - print "Stored as: " + filename + print(("Stored as: " + filename)) urls = "" try: # Set the path of where to store your data @@ -41,7 +41,7 @@ def crawl(self,url): if len(content) > 2: # Minimum 3 words try: - textToWrite = unicode("".join(content)) + textToWrite = str("".join(content)) except UnicodeDecodeError: textToWrite = str("".join(content)) f.write(textToWrite) @@ -50,9 +50,9 @@ def crawl(self,url): # Set this path to same as storage path os.remove("/path/to/saved/data/lang/%s" % filename) urls = ue.linklist - print "" + url + " mined!" + print(("" + url + " mined!")) except IOError: - print "Mined, but failed to store as file.\nSkipping this, going on to next!" + print("Mined, but failed to store as file.\nSkipping this, going on to next!") urls = self.urls ok_urls = [] for i in urls: @@ -68,12 +68,12 @@ def crawl(self,url): if len(ok_urls) < 2: ok_urls = self.crawled unique = True # Fake true - print str(len(ok_urls)) + print((str(len(ok_urls)))) else: unique = False next = random.randint(1,len(ok_urls)-1) - print next + print(next) new_url = ok_urls[next] while not unique: next = random.randint(1,len(ok_urls)-1) @@ -86,7 +86,7 @@ def crawl(self,url): new_url = ok_urls[next] unique = True else: - print "Already crawled " + new_url + print(("Already crawled " + new_url)) ok_urls.remove(new_url) if len(ok_urls) < 2: ok_urls = self.crawled diff --git a/nltk_contrib/readability/languageclassifier.py b/nltk_contrib/readability/languageclassifier.py index 3b13d90..7de1015 100644 --- a/nltk_contrib/readability/languageclassifier.py +++ b/nltk_contrib/readability/languageclassifier.py @@ -11,7 +11,7 @@ from nltk.corpus import stopwords -from urlextracter import URLextracter +from .urlextracter import URLextracter from sgmllib import * class NaiveBayes(object): @@ -72,12 +72,12 @@ def train(self, path): values = file.split('/') lang = values[-2] - if not self.p_lang.has_key(lang): + if lang not in self.p_lang: self.p_lang[lang] = 0.0 self.p_lang[lang] += 1.0 - if not self.files.has_key(lang): + if lang not in self.files: self.files[lang] = [] f = open(file, 'r') @@ -85,35 +85,35 @@ def train(self, path): f.close() # Calculate probabilities - for lang in self.p_lang.keys(): + for lang in list(self.p_lang.keys()): self.p_lang[lang] /= len(self.training_files) self.vocabulary = self.__createVocabulary(self.files) # Calculate P(O | H) p_word_given_lang = self.p_word_given_lang - for lang in self.files.keys(): + for lang in list(self.files.keys()): p_word_given_lang[lang] = {} - for word in self.vocabulary[lang].keys(): + for word in list(self.vocabulary[lang].keys()): p_word_given_lang[lang][word] = 1.0 for word in self.files[lang]: - if self.vocabulary[lang].has_key(word): + if word in self.vocabulary[lang]: p_word_given_lang[lang][word] += 1.0 - for word in self.vocabulary[lang].keys(): + for word in list(self.vocabulary[lang].keys()): p_word_given_lang[lang][word] /= len(self.files[lang]) + len(self.vocabulary[lang]) - print "Training finished...(training-set of size %d)" % len(self.training_files) + print(("Training finished...(training-set of size %d)" % len(self.training_files))) self.p_word_given_lang = p_word_given_lang - self.candidate_languages = self.files.keys() + self.candidate_languages = list(self.files.keys()) # Save result as a file output = open(os.path.join("files","lang_data.pickle"),'wb') data = {} data["p_word_given_lang"] = p_word_given_lang - data["canidate_languages"] = self.files.keys() + data["canidate_languages"] = list(self.files.keys()) data["p_lang"] = self.p_lang data["vocabulary"] = self.vocabulary pickler = pickle.dump(data, output, -1) @@ -128,16 +128,16 @@ def __createVocabulary(self, files): """ # Count number of occurance of each word word_count = {} - for lang in files.keys(): + for lang in list(files.keys()): for word in files[lang]: - if not word_count.has_key(word): + if word not in word_count: word_count[word] = 0 word_count[word] += 1 vocabulary = {} vocabulary['eng'] = {} vocabulary['no'] = {} - for word in word_count.keys(): + for word in list(word_count.keys()): if word_count[word] > 2: if word != '': if not word in self.nor_stopwords: @@ -155,7 +155,7 @@ def testAccuracy(self,test_files = ""): """ if test_files == "": - print "No test files given" + print("No test files given") return elif os.path.isdir(str(test_files)): self.test_files = glob.glob(test_files + "/*/*") @@ -186,7 +186,7 @@ def testAccuracy(self,test_files = ""): # Calculates P(O | H) * P(H) for candidate group p = math.log(self.p_lang[candidate_lang]) for word in file_to_be_classified: - if self.vocabulary[candidate_lang].has_key(word): + if word in self.vocabulary[candidate_lang]: p += math.log(self.p_word_given_lang[candidate_lang][word]) if p > max_p or max_p == 1: @@ -196,10 +196,10 @@ def testAccuracy(self,test_files = ""): total += 1.0 if true_lang != max_lang: errors += 1.0 - print "Classifying finished...(test-set of size %d)" % len(self.test_files) - print "Errors %d" % errors - print "Total %d" % total - print "Accuracy: %.3f" % (1.0 - errors/total) + print(("Classifying finished...(test-set of size %d)" % len(self.test_files))) + print(("Errors %d" % errors)) + print(("Total %d" % total)) + print(("Accuracy: %.3f" % (1.0 - errors/total))) def classifyText(self, text): """ @@ -219,7 +219,7 @@ def classifyText(self, text): unknown_words = [] known_words = [] for word in words: - if self.vocabulary[candidate_lang].has_key(word): + if word in self.vocabulary[candidate_lang]: p += math.log(self.p_word_given_lang[candidate_lang][word]) if word not in known_words: known_words.append(word) @@ -241,7 +241,7 @@ def classifyText(self, text): def classifyURL(self, url): ue = URLextracter(url) - print 'Classifying %s' % url + print(('Classifying %s' % url)) content = ue.output() content = re.sub(r"[^a-zA-ZæøåÆØÅ]", " ", content) content = content.strip() @@ -254,46 +254,46 @@ def report_unbalanced(self,tag): pass def demo(self): - print "Demo of language classifier" - print "=" * 40 + print("Demo of language classifier") + print(("=" * 40)) nb = NaiveBayes() nb.load(os.path.join("files","lang_data.pickle")) - print "Classifying plain text(10 first sentences from \"nltk.corpus.abc.sents\")" - print "=" * 40 + print("Classifying plain text(10 first sentences from \"nltk.corpus.abc.sents\")") + print(("=" * 40)) text = "" import nltk.corpus sents = nltk.corpus.abc.sents() for words in sents[0:10]: text+= " ".join(words) + "\n" - print text - print "=" * 40 - print "Languages is: %s" % nb.classifyText(text) + print(text) + print(("=" * 40)) + print(("Languages is: %s" % nb.classifyText(text))) - print "\n" - print "Classifying 10 URLs" - print "=" * 40 + print("\n") + print("Classifying 10 URLs") + print(("=" * 40)) lang = nb.classifyURL("http://harvardscience.harvard.edu/") - print "-->language: %s \n" % lang + print(("-->language: %s \n" % lang)) lang = nb.classifyURL("http://vg.no") - print "-->language: %s \n" % lang + print(("-->language: %s \n" % lang)) lang = nb.classifyURL("http://bbc.co.uk") - print "-->language: %s \n" % lang + print(("-->language: %s \n" % lang)) lang = nb.classifyURL("http://startsiden.no") - print "-->language: %s \n" % lang + print(("-->language: %s \n" % lang)) lang = nb.classifyURL("http://news.com") - print "-->language: %s \n" % lang + print(("-->language: %s \n" % lang)) lang = nb.classifyURL("http://www.munimadrid.es") - print "-->language: %s \n" % lang + print(("-->language: %s \n" % lang)) lang = nb.classifyURL("http://www.welt.de/") - print "-->language: %s \n" % lang + print(("-->language: %s \n" % lang)) lang = nb.classifyURL("http://www.news.pl/") - print "-->language: %s \n" % lang + print(("-->language: %s \n" % lang)) lang = nb.classifyURL("http://www.ekstrabladet.dk/") - print "-->language: %s \n" % lang + print(("-->language: %s \n" % lang)) lang = nb.classifyURL("http://www.gazzetta.it/") - print "-->language: %s \n" % lang + print(("-->language: %s \n" % lang)) demo = classmethod(demo) def demo(): diff --git a/nltk_contrib/readability/readabilitytests.py b/nltk_contrib/readability/readabilitytests.py index 7354269..76a574c 100644 --- a/nltk_contrib/readability/readabilitytests.py +++ b/nltk_contrib/readability/readabilitytests.py @@ -1,4 +1,4 @@ -from textanalyzer import * +from .textanalyzer import * import math class ReadabilityTool: @@ -196,17 +196,17 @@ def getReportAll(self, text = ''): # print ' RIX : %.1f' % rix # print '*' * 70 - print "=" * 100 - print "Recommended tests for lang: %s" % self.lang - print "=" * 100 - for testname in self.tests_given_lang[self.lang].keys(): - print testname + " : %.2f" % self.tests_given_lang[self.lang][testname](text) - print "=" * 100 - print "Other tests: (Warning! Use with care)" - print "=" * 100 - for testname in self.tests_given_lang["all"].keys(): - if not self.tests_given_lang[self.lang].has_key(testname): - print testname + " : %.2f" % self.tests_given_lang["all"][testname](text) + print(("=" * 100)) + print(("Recommended tests for lang: %s" % self.lang)) + print(("=" * 100)) + for testname in list(self.tests_given_lang[self.lang].keys()): + print((testname + " : %.2f" % self.tests_given_lang[self.lang][testname](text))) + print(("=" * 100)) + print("Other tests: (Warning! Use with care)") + print(("=" * 100)) + for testname in list(self.tests_given_lang["all"].keys()): + if testname not in self.tests_given_lang[self.lang]: + print((testname + " : %.2f" % self.tests_given_lang["all"][testname](text))) def demo(self): diff --git a/nltk_contrib/readability/syllables_no.py b/nltk_contrib/readability/syllables_no.py index 3f43222..ec920d3 100644 --- a/nltk_contrib/readability/syllables_no.py +++ b/nltk_contrib/readability/syllables_no.py @@ -78,10 +78,10 @@ def _stripWord(word): if line: toks = line.split() assert len(toks) == 2 - syllablesInFile[_stripWord(unicode(toks[0],"latin-1").encode("utf-8"))] = int(toks[1]) + syllablesInFile[_stripWord(str(toks[0],"latin-1").encode("utf-8"))] = int(toks[1]) def count(word): - word = unicode(word,"utf-8").encode("utf-8") + word = str(word,"utf-8").encode("utf-8") word = _stripWord(word) if not word: @@ -96,7 +96,7 @@ def count(word): # Count vowel groups count = 0 prev_was_vowel = 0 - vowels = [u"a", u"e", u"i", u"o", u"u", u"y", u"æ", u"ø", u"å"] + vowels = ["a", "e", "i", "o", "u", "y", "æ", "ø", "å"] #for vow in vowels: #vow = vow.decode("utf-8") for c in word.decode("utf-8"): diff --git a/nltk_contrib/readability/textanalyzer.py b/nltk_contrib/readability/textanalyzer.py index 127e736..fd4d220 100644 --- a/nltk_contrib/readability/textanalyzer.py +++ b/nltk_contrib/readability/textanalyzer.py @@ -3,9 +3,9 @@ import nltk.data from nltk.tokenize import * -import syllables_en -import syllables_no -from languageclassifier import * +from . import syllables_en +from . import syllables_no +from .languageclassifier import * import logging class textanalyzer(object): @@ -28,13 +28,13 @@ def analyzeText(self, text=''): syllablesCount = self.countSyllables(words) complexwordsCount = self.countComplexWords(text) averageWordsPerSentence = wordCount/sentenceCount - print ' Language: ' + self.lang - print ' Number of characters: ' + str(charCount) - print ' Number of words: ' + str(wordCount) - print ' Number of sentences: ' + str(sentenceCount) - print ' Number of syllables: ' + str(syllablesCount) - print ' Number of complex words: ' + str(complexwordsCount) - print ' Average words per sentence: ' + str(averageWordsPerSentence) + print((' Language: ' + self.lang)) + print((' Number of characters: ' + str(charCount))) + print((' Number of words: ' + str(wordCount))) + print((' Number of sentences: ' + str(sentenceCount))) + print((' Number of syllables: ' + str(syllablesCount))) + print((' Number of complex words: ' + str(complexwordsCount))) + print((' Average words per sentence: ' + str(averageWordsPerSentence))) #analyzeText = classmethod(analyzeText) @@ -126,14 +126,14 @@ def countComplexWords(self, text=''): #countComplexWords = classmethod(countComplexWords) def _setEncoding(self,text): - if not isinstance(text, unicode): + if not isinstance(text, str): try: - text = unicode(text, "utf8") + text = str(text, "utf8") except UnicodeError: try: - text = unicode(text, "iso8859_1") + text = str(text, "iso8859_1") except UnicodeError: - text = unicode(text, "ascii", "replace") + text = str(text, "ascii", "replace") return text #_setEncoding = classmethod(_setEncoding) @@ -154,9 +154,9 @@ def demo(self): # \nthe people, for the people, shall not perish from this earth." text = "Den 10. desember 1948 vedtok og kunngjorde De Forente Nasjoners tredje Generalforsamling Verdenserklæringen om Menneskerettighetene. Erklæringen ble vedtatt med 48 lands ja-stemmer. Ingen land stemte mot. 8 land avsto. Umiddelbart etter denne historiske begivenhet henstilte Generalforsamlingen til alle medlemsstater å bekjentgjøre Erklæringens tekst og sørge for at den blir distribuert, framvist, lest og forklart spesielt i skoler og andre læreinstitusjoner, uten hensyn til de forskjellige lands eller områders politiske status. Erklæringens offisielle tekst foreligger på FNs seks arbeidsspråk: arabisk, engelsk, fransk, kinesisk, russisk og spansk. En lang rekke av FNs medlemsstater har fulgt Generalforsamlingens oppfordring og oversatt Erklæringen til de nasjonale språk. Denne oversettelsen til norsk er utarbeidet i Utenriksdepartementet. På henvendelse til FNs nordiske informasjonskontor i København kan en få gratis eksemplarer av Erklæringen på FNs offisielle språk, de øvrige nordiske språk og et begrenset antall andre språk. VERDENSERKLÆRINGEN OM MENNESKERETTIGHETENE INNLEDNING Da anerkjennelsen av menneskeverd og like og umistelige rettigheter for alle medlemmer av menneskeslekten er grunnlaget for frihet, rettferdighet og fred i verden, da tilsidesettelse av og forakt for menneskerettighetene har ført til barbariske handlinger som har rystet menneskehetens samvittighet, og da framveksten av en verden hvor menneskene har tale- og trosfrihet og frihet fra frykt og nød, er blitt kunngjort som folkenes høyeste mål, da det er nødvendig at menneskerettighetene blir beskyttet av loven for at menneskene ikke skal tvinges til som siste utvei å gjøre opprør mot tyranni og undertrykkelse, da det er viktig å fremme utviklingen av vennskapelige forhold mellom nasjonene, da De Forente Nasjoners folk i Pakten på ny har bekreftet sin tro på grunnleggende menneskerettigheter, på menneskeverd og på like rett for menn og kvinner og har besluttet å arbeide for sosialt framskritt og bedre levevilkår under større Frihet, da medlemsstatene har forpliktet seg til i samarbeid med De Forente Nasjoner å sikre at menneskerettighetene og de grunnleggende friheter blir alminnelig respektert og overholdt, da en allmenn forståelse av disse rettigheter og friheter er av den største betydning for å virkeliggjøre denne forpliktelse, kunngjør GENERALFORSAMLINGEN nå denne VERDENSERKLÆRING OM MENNESKERETTIGHETENE som et felles mål for alle folk og alle nasjoner, for at hvert individ og hver samfunnsmyndighet, med denne erklæring stadig i tankene, skal søke gjennom undervisning og oppdragelse å fremme respekt for disse rettigheter og friheter, og ved nasjonale og internasjonale tiltak å sikre at de blir allment og effektivt anerkjent og overholdt både blant folkene i medlemsstatene selv og blant folkene i de områder som står under deres overhøyhet. Artikkel 1. Alle mennesker er født frie og med samme menneskeverd og menneskerettigheter. De er utstyrt med fornuft og samvittighet og bør handle mot hverandre i brorskapets ånd. Artikkel 2. Enhver har krav på alle de rettigheter og friheter som er nevnt i denne erklæring, uten forskjell av noen art, f. eks. på grunn av rase, farge, kjønn, språk, religion, politisk eller annen oppfatning, nasjonal eller sosial opprinnelse eiendom, fødsel eller annet forhold. Det skal heller ikke gjøres noen forskjell på grunn av den politiske, rettslige eller internasjonale stilling som innehas av det land eller det område en person hører til, enten landet er uavhengig, står under tilsyn, er ikke-selvstyrende, eller på annen måte har begrenset suverenitet. Artikkel 3. Enhver har rett til liv, frihet og personlig sikkerhet. Artikkel 4. Ingen må holdes i slaveri eller trelldom. Slaveri og slavehandel i alle former er forbudt. Artikkel 5. Ingen må utsettes for tortur eller grusom, umenneskelig eller nedverdigende behandling eller straff. Artikkel 6. Ethvert menneske har krav på overalt å bli anerkjent som rettssubjekt. Artikkel 7. Alle er like for loven og har uten diskriminering rett til samme beskyttelse av loven. Alle har krav på samme beskyttelse mot diskriminering i strid med denne erklæring og mot enhver oppfordring til slik diskriminering. Artikkel 8. Enhver har rett til effektiv hjelp av de kompetente nasjonale domstoler mot handlinger som krenker de grunnleggende rettigheter han er gitt i forfatning eller lov. Artikkel 9. Ingen må utsettes for vilkårlig arrest, fengsling eller landsforvisning. Artikkel 10. Enhver har krav på under full likestilling å få sin sak rettferdig og offentlig behandlet av en uavhengig og upartisk domstol når hans rettigheter og plikter skal fastsettes,og når en straffeanklage mot ham skal avgjøres. Artikkel 11. 1. Enhver som er anklaget for en straffbar handling har rett til å bli ansett som uskyldig til det er bevist ved offentlig domstolsbehandling, hvor han har hatt alle de garantier som er nødvendig for hans forsvar, at han er skyldig etter loven. 2. Ingen må dømmes for en handling eller unnlatelse som i henhold til nasjonal lov eller folkeretten ikke var straffbar på den tid da den ble begått. Heller ikke skal det kunne idømmes strengere straff enn den som det var hjemmel for på den tid da den straffbare handling ble begått. Artikkel 12. Ingen må utsettes for vilkårlig innblanding i privatliv, familie, hjem og korrespondanse, eller for angrep på ære og anseelse. Enhver har rett til lovens beskyttelse mot slik innblanding eller slike angrep. Artikkel 13. 1. Enhver har rett til å bevege seg fritt og til fritt å velge oppholdssted innenfor en stats grenser. 2. Enhver har rett til å forlate et hvilket som helst land innbefattet sitt eget og til å vende tilbake til sitt land. Artikkel 14. 1. Enhver har rett til i andre land å søke og ta imot asyl mot forfølgelse. 2. Denne rett kan ikke påberopes ved rettsforfølgelse som har reelt grunnlag i upolitiske forbrytelser eller handlinger som strider mot De Forente Nasjoners formål og prinsipper. Artikkel 15. 1. Enhver har rett til et statsborgerskap. Ingen skal vilkårlig berøves sitt statsborgerskap eller nektes retten til å forandre det. Artikkel 16. 1. Voksne menn og kvinner har rett til å gifte seg og stifte familie uten noen begrensning som skyldes rase, nasjonalitet eller religion. De har krav på like rettigheter ved inngåelse av ekteskapet, under ekteskapet og ved dets oppløsning. 2. Ekteskap må bare inngås etter fritt og fullt samtykke av de vordende ektefeller. 3. Familien er den naturlige og grunnleggende enhet i samfunnet og har krav på samfunnets og statens beskyttelse. Artikkel 17. 1. Enhver har rett til å eie eiendom alene eller sammen med andre. 2. Ingen må vilkårlig fratas sin eiendom. Artikkel 18. Enhver har rett til tanke-, samvittighets- og religionsfrihet. Denne rett omfatter frihet til å skifte religion eller tro, og frihet til enten alene eller sammen med andre, og offentlig eller privat, å gi uttrykk for sin religion eller tro gjennom undervisning, utøvelse, tilbedelse og ritualer. Artikkel 19. Enhver har rett til menings- og ytringsfrihet. Denne rett omfatter frihet til å hevde meninger uten innblanding og til å søke, motta og meddele opplysninger og ideer gjennom ethvert meddelelsesmiddel og uten hensyn til landegrenser. Artikkel 20. 1. Enhver har rett til fritt å delta i fredelige møter og organisasjoner. 2. Ingen må tvinges til å tilhøre en organisasjon. Artikkel 21. 1. Enhver har rett til å ta del i sitt lands styre, direkte eller gjennom fritt valgte representanter. 2. Enhver har rett til lik adgang til offentlig tjeneste i sitt land. 3. Folkets vilje skal være grunnlaget for offentlig myndighet. Denne vilje skal komme til uttrykk gjennom periodiske og reelle valg med allmenn og lik stemmerett og med hemmelig avstemning eller likeverdig fri stemmemåte. Artikkel 22. Enhver har som medlem av samfunnet rett til sosial trygghet og har krav på at de økonomiske, sosiale og kulturelle goder som er uunnværlige for hans verdighet og den frie utvikling av hans personlighet, blir skaffet til veie gjennom nasjonale tiltak og internasjonalt samarbeid i samsvar med hver enkelt stats organisasjon og ressurser. Artikkel 23. 1. Enhver har rett til arbeid, til fritt valg av yrke, til rettferdige og gode arbeidsforhold og til beskyttelse mot arbeidsløshet. 2. Enhver har uten diskriminering rett til lik betaling for likt arbeid. 3. Enhver som arbeider har rett til en rettferdig og god betaling som sikrer hans familie og ham selv en menneskeverdig tilværelse, og som om nødvendig blir utfylt ved annen sosial beskyttelse. 4. Enhver har rett til å danne og gå inn i fagforeninger for å beskytte sine interesser. Artikkel 24. Enhver har rett til hvile og fritid, herunder rimelig begrensning av arbeidstiden og regelmessige ferier med lønn. Artikkel 25. 1. Enhver har rett til en levestandard som er tilstrekkelig for hans og hans families helse og velvære, og som omfatter mat, klær, bolig og helseomsorg og nødvendige sosiale ytelser, og rett til trygghet i tilfelle av arbeidsløshet, sykdom, arbeidsuførhet, enkestand, alderdom eller annen mangel på eksistensmuligheter som skyldes forhold han ikke er herre over. 2. Mødre og barn har rett til spesiell omsorg og hjelp. Alle barn skal ha samme sosiale beskyttelse enten de er født i eller utenfor ekteskap. Artikkel 26. 1. Enhver har rett til undervisning. Undervisningen skal være gratis, i det minste på de elementære og grunnleggende trinn. Elementærundervisning skal være obligatorisk. Alle skal ha adgang til yrkesopplæring, og det skal være lik adgang for alle til høyere undervisning på grunnlag av kvalifikasjoner. 2. Undervisningen skal ta sikte på å utvikle den menneskelige personlighet og styrke respekten for menneskerettighetene og de grunnleggende friheter. Den skal fremme forståelse, toleranse og vennskap mellom alle nasjoner og rasegrupper eller religiøse grupper og skal støtte De Forente Nasjoners arbeid for å opprettholde fred. 3. Foreldre har fortrinnsrett til å bestemme hva slags undervisning deres barn skal få. Artikkel 27. 1. Enhver har rett til fritt å delta i samfunnets kulturelle liv, til å nyte kunst og til å få del i den vitenskapelige framgang og dens goder. 2. Enhver har rett til beskyttelse av de åndelige og materielle interesser som er et resultat av ethvert vitenskapelig, litterært eller kunstnerisk verk som han har skapt. Artikkel 28. Enhver har krav på en sosial og internasjonal orden som fullt ut kan virkeliggjøre de rettigheter og friheter som er nevnt i denne erklæring. Artikkel 29. 1. Enhver har plikter overfor samfunnet som alene gjør den frie og fulle utvikling av hans personlighet mulig. 2. Under utøvelsen av sine rettigheter og friheter skal enhver bare være undergitt slike begrensninger som er fastsatt i lov utelukkende med det formål å sikre den nødvendige anerkjennelse av og respekt for andres rettigheter og friheter, og de krav som moralen, den offentlige orden og den alminnelige velferd i et demokratisk samfunn med rette stiller. 3. Disse rettigheter og friheter må ikke i noe tilfelle utøves i strid med De Forente Nasjoners formål og prinsipper. Artikkel 30. Intet i denne erklæring skal tolkes slik at det gir noen stat, gruppe eller person rett til å ta del i noen virksomhet eller foreta noen handling som tar sikte på å ødelegge noen av de rettigheter og friheter som er nevnt i Erklæringen." - print "The text : \n" + ("=" * 40) - print text - print ("=" * 40) + "\nHas the following statistics\n" + ("=" * 40) + print(("The text : \n" + ("=" * 40))) + print(text) + print((("=" * 40) + "\nHas the following statistics\n" + ("=" * 40))) nb = NaiveBayes() ta = textanalyzer(nb.classifyText(text)) ta.analyzeText(text) diff --git a/nltk_contrib/readability/urlextracter.py b/nltk_contrib/readability/urlextracter.py index 9a43d98..cfd6e7a 100644 --- a/nltk_contrib/readability/urlextracter.py +++ b/nltk_contrib/readability/urlextracter.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Sets the encoding to utf-8 to avoid problems with æøå -import urllib -import htmlentitydefs +import urllib.request, urllib.parse, urllib.error +import html.entities from sgmllib import * import re @@ -16,12 +16,12 @@ class URLextracter(SGMLParser): def __init__(self, url='http://python.org'): self.reset() try: - self.sock = urllib.urlopen(url) + self.sock = urllib.request.urlopen(url) self.feed(self.sock.read()) self.sock.close() self.close() except IOError: - print "Could not connect, or the markup has (too) bad structure" + print("Could not connect, or the markup has (too) bad structure") raise SGMLParseError def start_style(self,attr): @@ -98,24 +98,24 @@ def output(self): def _setEncoding(self,text): try: - text = unicode(text, "utf8").encode("utf8") + text = str(text, "utf8").encode("utf8") except UnicodeError: try: - text = unicode(text, "iso8859_1").encode("utf8") + text = str(text, "iso8859_1").encode("utf8") except UnicodeError: - text = unicode(text, "ascii", "replace").encode("utf8") + text = str(text, "ascii", "replace").encode("utf8") return text def demo(self): - print 'This class takes an URL, and extracts the text it contains.' - print 'It also removes special characters and numbers,' - print 'and sentences must consist of at least' - print '3 words to not be ignored.' + print('This class takes an URL, and extracts the text it contains.') + print('It also removes special characters and numbers,') + print('and sentences must consist of at least') + print('3 words to not be ignored.') - print '\nFetching text from www.python.org' + print('\nFetching text from www.python.org') u = URLextracter() - print "=" * 40 - print u.output() + print(("=" * 40)) + print((u.output())) demo = classmethod(demo) def demo(): diff --git a/nltk_contrib/referring.py b/nltk_contrib/referring.py index da7bc98..a2926d9 100644 --- a/nltk_contrib/referring.py +++ b/nltk_contrib/referring.py @@ -221,23 +221,23 @@ def demo(): Object2 = {"type":"chihuahua", "size":"large", "colour":"white"} Object3 = {"type":"siamese-cat", "size":"small", "colour":"black"} - print "Given an entity defined as: " + print("Given an entity defined as: ") r = Object1 - print r + print(r) preferred_attrs = ["type", "colour", "size"] - print "In a set defined as: " + print("In a set defined as: ") contrast_set = [Object2, Object3] - print contrast_set + print(contrast_set) RE = IncrementalAlgorithm(KB, r, contrast_set, preferred_attrs).RE - print "The referring expression created to uniquely identify", - print "the referent is: " - print RE + print("The referring expression created to uniquely identify", end=' ') + print("the referent is: ") + print(RE) RE_string = "" for attr, val in RE: RE_string = val + " " + RE_string RE_string = "The " + RE_string - print "This can be surface-realized as:" - print RE_string + print("This can be surface-realized as:") + print(RE_string) if __name__ == "__main__": demo() diff --git a/nltk_contrib/refexpr/constraint.py b/nltk_contrib/refexpr/constraint.py index 92fb245..e833a40 100644 --- a/nltk_contrib/refexpr/constraint.py +++ b/nltk_contrib/refexpr/constraint.py @@ -126,17 +126,17 @@ def addVariable(self, variable, domain): @type domain: list, tuple, or instance of C{Domain} """ if variable in self._variables: - raise ValueError, "Tried to insert duplicated variable %s" % \ - repr(variable) + raise ValueError("Tried to insert duplicated variable %s" % \ + repr(variable)) if type(domain) in (list, tuple): domain = Domain(domain) elif isinstance(domain, Domain): domain = copy.copy(domain) else: - raise TypeError, "Domains must be instances of subclasses of "\ - "the Domain class" + raise TypeError("Domains must be instances of subclasses of "\ + "the Domain class") if not domain: - raise ValueError, "Domain is empty" + raise ValueError("Domain is empty") self._variables[variable] = domain def addVariables(self, variables, domain): @@ -187,8 +187,8 @@ def addConstraint(self, constraint, variables=None): if callable(constraint): constraint = FunctionConstraint(constraint) else: - raise ValueError, "Constraints must be instances of "\ - "subclasses of the Constraint class" + raise ValueError("Constraints must be instances of "\ + "subclasses of the Constraint class") self._constraints.append((constraint, variables)) def getSolution(self): @@ -259,7 +259,7 @@ def getSolutionIter(self): def _getArgs(self): domains = self._variables.copy() - allvariables = domains.keys() + allvariables = list(domains.keys()) constraints = [] for constraint, variables in self._constraints: if not variables: @@ -274,7 +274,7 @@ def _getArgs(self): for constraint, variables in constraints[:]: constraint.preProcess(variables, domains, constraints, vconstraints) - for domain in domains.values(): + for domain in list(domains.values()): domain.resetState() if not domain: return None, None, None @@ -368,8 +368,7 @@ def getSolution(self, domains, constraints, vconstraints): constraints affecting the given variables. @type vconstraints: dict """ - raise NotImplementedError, \ - "%s is an abstract class" % self.__class__.__name__ + raise NotImplementedError("%s is an abstract class" % self.__class__.__name__) def getSolutions(self, domains, constraints, vconstraints): """ @@ -383,8 +382,7 @@ def getSolutions(self, domains, constraints, vconstraints): constraints affecting the given variables. @type vconstraints: dict """ - raise NotImplementedError, \ - "%s provides only a single solution" % self.__class__.__name__ + raise NotImplementedError("%s provides only a single solution" % self.__class__.__name__) def getSolutionIter(self, domains, constraints, vconstraints): """ @@ -398,8 +396,7 @@ def getSolutionIter(self, domains, constraints, vconstraints): constraints affecting the given variables. @type vconstraints: dict """ - raise NotImplementedError, \ - "%s doesn't provide iteration" % self.__class__.__name__ + raise NotImplementedError("%s doesn't provide iteration" % self.__class__.__name__) class BacktrackingSolver(Solver): """ @@ -514,12 +511,12 @@ def getSolutionIter(self, domains, constraints, vconstraints): # Push state before looking for next variable. queue.append((variable, values, pushdomains)) - raise RuntimeError, "Can't happen" + raise RuntimeError("Can't happen") def getSolution(self, domains, constraints, vconstraints): iter = self.getSolutionIter(domains, constraints, vconstraints) try: - return iter.next() + return next(iter) except StopIteration: return None @@ -665,9 +662,9 @@ def getSolution(self, domains, constraints, vconstraints): # Initial assignment for variable in domains: assignments[variable] = random.choice(domains[variable]) - for _ in xrange(self._steps): + for _ in range(self._steps): conflicted = False - lst = domains.keys() + lst = list(domains.keys()) random.shuffle(lst) for variable in lst: # Check if variable is not in conflict @@ -986,7 +983,7 @@ class AllEqualConstraint(Constraint): def __call__(self, variables, domains, assignments, forwardcheck=False, _unassigned=Unassigned): singlevalue = _unassigned - for value in assignments.values(): + for value in list(assignments.values()): if singlevalue is _unassigned: singlevalue = value elif value != singlevalue: @@ -1242,7 +1239,7 @@ def __init__(self, set): def __call__(self, variables, domains, assignments, forwardcheck=False): # preProcess() will remove it. - raise RuntimeError, "Can't happen" + raise RuntimeError("Can't happen") def preProcess(self, variables, domains, constraints, vconstraints): set = self._set @@ -1277,7 +1274,7 @@ def __init__(self, set): def __call__(self, variables, domains, assignments, forwardcheck=False): # preProcess() will remove it. - raise RuntimeError, "Can't happen" + raise RuntimeError("Can't happen") def preProcess(self, variables, domains, constraints, vconstraints): set = self._set diff --git a/nltk_contrib/refexpr/drawers.py b/nltk_contrib/refexpr/drawers.py index 7f72f31..9fd7ca0 100644 --- a/nltk_contrib/refexpr/drawers.py +++ b/nltk_contrib/refexpr/drawers.py @@ -1,8 +1,8 @@ from random import shuffle -from full_brevity import * -from relational import * -from incremental import * -from util import generate_phrase, generate_phrase_rel +from .full_brevity import * +from .relational import * +from .incremental import * +from .util import generate_phrase, generate_phrase_rel if __name__ == '__main__': # This data is based on the drawer pictures from Vienthen and Dale (2006) @@ -268,7 +268,7 @@ shuffle(facts, lambda: 0.0) - fb = FullBrevity(filter(lambda f: f[0] != Rel, facts)) + fb = FullBrevity([f for f in facts if f[0] != Rel]) rel = Relational(facts) #The ordered priority for using attributes, important for incremental algorithm ranked_attrs = ["color", "row", "col", "corner"] @@ -279,19 +279,19 @@ #defines how to turn these rules into English phrases handlers = { - "col": lambda(desc): "column %s" % desc, - "row": lambda(desc): "row %s" % desc, - "corner": lambda(desc): "corner", - "above": lambda(lr): "above" if lr else "below", - "below": lambda(lr): "below" if lr else "above", - "right": lambda(lr): "to the right of" if lr else "to the left of", - "left": lambda(lr): "to the left of" if lr else "to the right of" + "col": lambda desc: "column %s" % desc, + "row": lambda desc: "row %s" % desc, + "corner": lambda desc: "corner", + "above": lambda lr: "above" if lr else "below", + "below": lambda lr: "below" if lr else "above", + "right": lambda lr: "to the right of" if lr else "to the left of", + "left": lambda lr: "to the left of" if lr else "to the right of" } #Generate phrases with each algorithm and print to screen for i in range(1, 17): obj_id = "d%s" % i - print "%#02d,\"Full Brevity\",\"%s\"" % (i, generate_phrase(fb.describe(obj_id), ranked_attrs, handlers)) - print "%#02d,\"Relational\",\"%s\"" % (i, generate_phrase_rel(rel.describe(obj_id), ranked_attrs, obj_id, handlers)) - print "%#02d,\"Incremental\",\"%s\"" % (i, generate_phrase(incr.describe(obj_id), ranked_attrs, handlers)) + print(("%#02d,\"Full Brevity\",\"%s\"" % (i, generate_phrase(fb.describe(obj_id), ranked_attrs, handlers)))) + print(("%#02d,\"Relational\",\"%s\"" % (i, generate_phrase_rel(rel.describe(obj_id), ranked_attrs, obj_id, handlers)))) + print(("%#02d,\"Incremental\",\"%s\"" % (i, generate_phrase(incr.describe(obj_id), ranked_attrs, handlers)))) diff --git a/nltk_contrib/refexpr/full_brevity.py b/nltk_contrib/refexpr/full_brevity.py index 185d97a..59f39cb 100644 --- a/nltk_contrib/refexpr/full_brevity.py +++ b/nltk_contrib/refexpr/full_brevity.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from util import validate_facts, Type, Rel, generate_phrase +from .util import validate_facts, Type, Rel, generate_phrase class FullBrevity: """ @@ -30,7 +30,7 @@ def __init__(self, facts): """ self.facts = facts self.object_ids = validate_facts(self.facts) - assert not any(map(lambda f: f == Rel, self.facts)), "Full Brevity does not support relationships" + assert not any([f == Rel for f in self.facts]), "Full Brevity does not support relationships" def describe(self, target_id): """ @@ -55,7 +55,7 @@ def describe(self, target_id): best_prop = None # Find the property that best constrains the distractors set - for prop_key in properties.keys(): + for prop_key in list(properties.keys()): prop_val = properties[prop_key] dist_set = [dist for dist in distractors if dist[prop_key][1] == prop_val[1]] if (best_set is None) or (len(dist_set) < len(best_set)): @@ -81,5 +81,5 @@ def example(): # Print English description for each object for obj_id in ["obj1", "obj2", "obj3"]: obj_type = [f for f in facts if f[0] == Type and f[2] == obj_id] # Include type for clarity - print "%s: %s" % (obj_id, generate_phrase(fb.describe(obj_id) + obj_type, ["color", "size"])) + print(("%s: %s" % (obj_id, generate_phrase(fb.describe(obj_id) + obj_type, ["color", "size"])))) diff --git a/nltk_contrib/refexpr/gre3d_facts.py b/nltk_contrib/refexpr/gre3d_facts.py index 3b2a38e..b3c79da 100644 --- a/nltk_contrib/refexpr/gre3d_facts.py +++ b/nltk_contrib/refexpr/gre3d_facts.py @@ -1,8 +1,8 @@ -from full_brevity import * -from incremental import * -from relational import * +from .full_brevity import * +from .incremental import * +from .relational import * -import util +from . import util def getFacts(): """ @@ -285,9 +285,9 @@ def getFacts(): taxonomy = Taxonomy({}) handlers = { - "in_front_of": lambda(lr): "in front of", - "left_of": lambda(lr): "to the left of", - "right_of": lambda(lr): "to the right of" + "in_front_of": lambda lr: "in front of", + "left_of": lambda lr: "to the left of", + "right_of": lambda lr: "to the right of" } #Print out the referring expressions generated by each algorithm for each scene @@ -301,7 +301,7 @@ def getFacts(): rel = Relational(facts[i]) desc_rel = rel.describe("r1") - print "%#02d,\"Full Brevity\",\"%s\"" % (i, util.generate_phrase(desc_fb, ranked_attrs)) - print "%#02d,\"Incremental\",\"%s\"" % (i, util.generate_phrase(desc_incr, ranked_attrs)) - print "%#02d,\"Relational\",\"%s\"" % (i, util.generate_phrase_rel(desc_rel, ranked_attrs, "r1", handlers)) + print(("%#02d,\"Full Brevity\",\"%s\"" % (i, util.generate_phrase(desc_fb, ranked_attrs)))) + print(("%#02d,\"Incremental\",\"%s\"" % (i, util.generate_phrase(desc_incr, ranked_attrs)))) + print(("%#02d,\"Relational\",\"%s\"" % (i, util.generate_phrase_rel(desc_rel, ranked_attrs, "r1", handlers)))) diff --git a/nltk_contrib/refexpr/incremental.py b/nltk_contrib/refexpr/incremental.py index 5eab89a..35c0e7f 100644 --- a/nltk_contrib/refexpr/incremental.py +++ b/nltk_contrib/refexpr/incremental.py @@ -1,7 +1,7 @@ import string from copy import copy, deepcopy -from util import validate_facts, Type, Rel, generate_phrase +from .util import validate_facts, Type, Rel, generate_phrase class Incremental: """ @@ -181,7 +181,7 @@ def example(): # Print English description for each object for obj_id in ["obj1", "obj2", "obj3"]: obj_type = [f for f in facts if f[0] == Type and f[2] == obj_id] # Include type for clarity - print "%s: %s" % (obj_id, generate_phrase(incr.describe(obj_id) + obj_type, ["color", "size"])) + print(("%s: %s" % (obj_id, generate_phrase(incr.describe(obj_id) + obj_type, ["color", "size"])))) class Taxonomy: diff --git a/nltk_contrib/refexpr/relational.py b/nltk_contrib/refexpr/relational.py index d192d69..179f939 100644 --- a/nltk_contrib/refexpr/relational.py +++ b/nltk_contrib/refexpr/relational.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import constraint +from . import constraint from copy import copy, deepcopy -from util import validate_facts, Type, Rel, generate_phrase_rel +from .util import validate_facts, Type, Rel, generate_phrase_rel class _RelationalVar: """Internal class used to represent relational variables""" @@ -70,7 +70,7 @@ def __get_facts_for(self, obj_id): def __fact_replace(self, fact, to_replace, replace_with): """Replaces all occurrences of to_replace in fact with replace_with""" - return fact[:2] + map(lambda fact_id: replace_with if (not isinstance(fact_id, _RelationalVar) and fact_id == to_replace) else fact_id, fact[2:]) + return fact[:2] + [replace_with if (not isinstance(fact_id, _RelationalVar) and fact_id == to_replace) else fact_id for fact_id in fact[2:]] def __get_context_set(self, constraints, obj_var): """Returns a set of objects that fit the given constraints for obj_var""" @@ -183,11 +183,11 @@ def example(): rel = Relational(facts) obj_types = [f for f in facts if f[0] == Type] # Include types in the description for clarity handlers = { - "on" : lambda(lr): "on" if lr else "on which lies", - "in" : lambda(lr): "in" if lr else "in which lies" + "on" : lambda lr: "on" if lr else "on which lies", + "in" : lambda lr: "in" if lr else "in which lies" } # Generate an English description for each object for obj_id in ["c1", "c2", "c3", "b1", "b2", "t1", "t2", "f1"]: - print "%s: %s" % (obj_id, generate_phrase_rel(rel.describe(obj_id) + obj_types, ["color"], obj_id, handlers)) + print(("%s: %s" % (obj_id, generate_phrase_rel(rel.describe(obj_id) + obj_types, ["color"], obj_id, handlers)))) diff --git a/nltk_contrib/refexpr/util.py b/nltk_contrib/refexpr/util.py index 09d15ea..cd63d75 100644 --- a/nltk_contrib/refexpr/util.py +++ b/nltk_contrib/refexpr/util.py @@ -97,7 +97,7 @@ def generate_phrase(description, attr_prefs, handlers = None): # Put the highest priority attributes next to the noun for attr in attr_prefs: if (attrs.count(attr) > 0): - if (handlers != None) and (handlers.has_key(attr)): + if (handlers != None) and (attr in handlers): attr_queue.insert(0, handlers[attr](desc_dict[attr])) else: attr_queue.insert(0, desc_dict[attr]) @@ -138,13 +138,13 @@ def generate_phrase_rel(description, attr_prefs, target_id, handlers = None, top # There is a difference between generating the phrases: # "the box on the table" and "the table on which the box sits" if cur_rel[2] == target_id: - if (handlers != None) and (handlers.has_key(rel_desc)): + if (handlers != None) and (rel_desc in handlers): rel_desc = handlers[rel_desc](True) other_desc = generate_phrase_rel(other_attrs, attr_prefs, cur_rel[3], handlers, False) clauses.append("%s %s %s" % (target_desc, rel_desc, other_desc)) else: - if (handlers != None) and (handlers.has_key(rel_desc)): + if (handlers != None) and (rel_desc in handlers): rel_desc = handlers[rel_desc](False) other_desc = generate_phrase_rel(other_attrs, attr_prefs, cur_rel[2], handlers, False) diff --git a/nltk_contrib/rte/logicentail.py b/nltk_contrib/rte/logicentail.py index ed73507..a48c4a3 100644 --- a/nltk_contrib/rte/logicentail.py +++ b/nltk_contrib/rte/logicentail.py @@ -40,13 +40,13 @@ def tag_sentences(self, text, hyp, verbose=False): if text_drs_list: text_ex = text_drs_list[0].simplify().toFol() else: - if verbose: print 'ERROR: No readings were generated for the Text' + if verbose: print('ERROR: No readings were generated for the Text') hyp_drs_list = glueclass.parse_to_meaning(hyp) if hyp_drs_list: hyp_ex = hyp_drs_list[0].simplify().toFol() else: - if verbose: print 'ERROR: No readings were generated for the Hypothesis' + if verbose: print('ERROR: No readings were generated for the Hypothesis') #1. proof T -> H #2. proof (BK & T) -> H @@ -56,27 +56,27 @@ def tag_sentences(self, text, hyp, verbose=False): #6. satisfy BK & T & H result = inference.Prover9().prove(hyp_ex, [text_ex]) - if verbose: print 'prove: T -> H: %s' % result + if verbose: print(('prove: T -> H: %s' % result)) if not result: bk = self._generate_BK(text, hyp, verbose) bk_exs = [bk_pair[0] for bk_pair in bk] if verbose: - print 'Generated Background Knowledge:' + print('Generated Background Knowledge:') for bk_ex in bk_exs: - print bk_ex + print(bk_ex) result = inference.Prover9().prove(hyp_ex, [text_ex]+bk_exs) - if verbose: print 'prove: (T & BK) -> H: %s' % result + if verbose: print(('prove: (T & BK) -> H: %s' % result)) if not result: consistent = self.check_consistency(bk_exs+[text_ex]) - if verbose: print 'consistency check: (BK & T): %s' % consistent + if verbose: print(('consistency check: (BK & T): %s' % consistent)) if consistent: consistent = self.check_consistency(bk_exs+[text_ex, hyp_ex]) - if verbose: print 'consistency check: (BK & T & H): %s' % consistent + if verbose: print(('consistency check: (BK & T & H): %s' % consistent)) return result @@ -98,8 +98,8 @@ def _generate_BK(self, text, hyp, verbose=False): hypbow = set(word.lower() for word in hyp) if verbose: - print 'textbow: %s' % textbow - print 'hypbow: %s' % hypbow + print(('textbow: %s' % textbow)) + print(('hypbow: %s' % hypbow)) if self.stop: textbow = textbow - self.stopwords @@ -225,9 +225,9 @@ def demo_inference_tagger(verbose=False): tagger = RTEInferenceTagger() text = 'John see a car' - print 'Text: ', text + print(('Text: ', text)) hyp = 'John watch an auto' - print 'Hyp: ', hyp + print(('Hyp: ', hyp)) # text_ex = LogicParser().parse('exists e x y.(david(x) & own(e) & subj(e,x) & obj(e,y) & car(y))') # hyp_ex = LogicParser().parse('exists e x y.(david(x) & have(e) & subj(e,x) & obj(e,y) & auto(y))') @@ -237,17 +237,17 @@ def demo_inference_tagger(verbose=False): if text_drs_list: text_ex = text_drs_list[0].simplify().toFol() else: - print 'ERROR: No readings were be generated for the Text' + print('ERROR: No readings were be generated for the Text') hyp_drs_list = glueclass.parse_to_meaning(hyp) if hyp_drs_list: hyp_ex = hyp_drs_list[0].simplify().toFol() else: - print 'ERROR: No readings were be generated for the Hypothesis' + print('ERROR: No readings were be generated for the Hypothesis') - print 'Text: ', text_ex - print 'Hyp: ', hyp_ex - print '' + print(('Text: ', text_ex)) + print(('Hyp: ', hyp_ex)) + print('') #1. proof T -> H #2. proof (BK & T) -> H @@ -257,67 +257,67 @@ def demo_inference_tagger(verbose=False): #6. satisfy BK & T & H result = inference.Prover9().prove(hyp_ex, [text_ex]) - print 'prove: T -> H: %s' % result + print(('prove: T -> H: %s' % result)) if result: - print 'Logical entailment\n' + print('Logical entailment\n') else: - print 'No logical entailment\n' + print('No logical entailment\n') bk = tagger._generate_BK(text, hyp, verbose) bk_exs = [bk_pair[0] for bk_pair in bk] - print 'Generated Background Knowledge:' + print('Generated Background Knowledge:') for bk_ex in bk_exs: - print bk_ex - print '' + print(bk_ex) + print('') result = inference.Prover9().prove(hyp_ex, [text_ex]+bk_exs) - print 'prove: (T & BK) -> H: %s' % result + print(('prove: (T & BK) -> H: %s' % result)) if result: - print 'Logical entailment\n' + print('Logical entailment\n') else: - print 'No logical entailment\n' + print('No logical entailment\n') # Check if the background knowledge axioms are inconsistent result = inference.Prover9().prove(assumptions=bk_exs+[text_ex]).prove() - print 'prove: (BK & T): %s' % result + print(('prove: (BK & T): %s' % result)) if result: - print 'Inconsistency -> Entailment unknown\n' + print('Inconsistency -> Entailment unknown\n') else: - print 'No inconsistency\n' + print('No inconsistency\n') result = inference.Prover9().prove(assumptions=bk_exs+[text_ex, hyp_ex]) - print 'prove: (BK & T & H): %s' % result + print(('prove: (BK & T & H): %s' % result)) if result: - print 'Inconsistency -> Entailment unknown\n' + print('Inconsistency -> Entailment unknown\n') else: - print 'No inconsistency\n' + print('No inconsistency\n') result = inference.Mace().build_model(assumptions=bk_exs+[text_ex]) - print 'satisfy: (BK & T): %s' % result + print(('satisfy: (BK & T): %s' % result)) if result: - print 'No inconsistency\n' + print('No inconsistency\n') else: - print 'Inconsistency -> Entailment unknown\n' + print('Inconsistency -> Entailment unknown\n') result = inference.Mace().build_model(assumptions=bk_exs+[text_ex, hyp_ex]).build_model() - print 'satisfy: (BK & T & H): %s' % result + print(('satisfy: (BK & T & H): %s' % result)) if result: - print 'No inconsistency\n' + print('No inconsistency\n') else: - print 'Inconsistency -> Entailment unknown\n' + print('Inconsistency -> Entailment unknown\n') def test_check_consistency(): a = LogicParser().parse('man(j)') b = LogicParser().parse('-man(j)') - print '%s, %s: %s' % (a, b, RTEInferenceTagger().check_consistency([a,b], True)) - print '%s, %s: %s' % (a, a, RTEInferenceTagger().check_consistency([a,a], True)) + print(('%s, %s: %s' % (a, b, RTEInferenceTagger().check_consistency([a,b], True)))) + print(('%s, %s: %s' % (a, a, RTEInferenceTagger().check_consistency([a,a], True)))) def tag(text, hyp): - print 'Text: ', text - print 'Hyp: ', hyp - print 'Entailment =', RTEInferenceTagger().tag_sentences(text, hyp, True) - print '' + print(('Text: ', text)) + print(('Hyp: ', hyp)) + print(('Entailment =', RTEInferenceTagger().tag_sentences(text, hyp, True))) + print('') if __name__ == '__main__': # test_check_consistency() diff --git a/nltk_contrib/scripttranscriber/MinEditDist/mEdit.py b/nltk_contrib/scripttranscriber/MinEditDist/mEdit.py index 8c86a7c..469d91a 100755 --- a/nltk_contrib/scripttranscriber/MinEditDist/mEdit.py +++ b/nltk_contrib/scripttranscriber/MinEditDist/mEdit.py @@ -52,7 +52,7 @@ ## List of all features -featList = FCDic.keys() +featList = list(FCDic.keys()) LClist = ['L', 'C', 'D'] PClist = ['C', 'D'] diff --git a/nltk_contrib/scripttranscriber/alignpairsFST.py b/nltk_contrib/scripttranscriber/alignpairsFST.py index 74ab324..04fa2e9 100755 --- a/nltk_contrib/scripttranscriber/alignpairsFST.py +++ b/nltk_contrib/scripttranscriber/alignpairsFST.py @@ -68,12 +68,12 @@ def ReadCostMatrix(matfile, symbols): row_label, costs = line.split(None,1) if genSymbols: symbols.append(row_label) if row_label not in symbols: - print "Error: label (%s) not in defined symbols list" % row_label + print("Error: label (%s) not in defined symbols list" % row_label) sys.exit(1) rows.append(row_label) costs = costs.split() if len(costs) != len(cols): - print 'Error: wrong number of costs on line %s' % line + print('Error: wrong number of costs on line %s' % line) sys.exit(1) for c in range(len(costs)): if costs[c] in ('inf', 'Inf', 'INF'): costs[c] = INF_ @@ -247,7 +247,7 @@ def main(matrixfile, symfile=None, infile=None): aln1, aln2, cost = AlignFSTs(binph1, binph2, binmatrix, syms) #aln1 = aln1.replace(EPSILON_, SHORT_EPS_) #aln2 = aln2.replace(EPSILON_, SHORT_EPS_) - print '%s\t%s\t%.6f' % (aln1, aln2, cost) + print('%s\t%s\t%.6f' % (aln1, aln2, cost)) ret = os.system('rm -f %s' % (binmatrix)) if ret != 0: sys.stderr.write('Error in rm\'ing matrix\n') @@ -255,8 +255,8 @@ def main(matrixfile, symfile=None, infile=None): if infile is not None: infp.close() def usage(called): - print '%s -m [-s ]' % (called), - print '[-i ]' + print('%s -m [-s ]' % (called), end=' ') + print('[-i ]') if __name__ == '__main__': try: @@ -282,6 +282,6 @@ def usage(called): infile = a if matfile is None: usage(sys.argv[0]) - print "Error: must provide a cost-matrix file." + print("Error: must provide a cost-matrix file.") sys.exit(2) main(matfile, symfile, infile) diff --git a/nltk_contrib/scripttranscriber/auxiliary_comp.py b/nltk_contrib/scripttranscriber/auxiliary_comp.py index bf53f27..97f5ecc 100644 --- a/nltk_contrib/scripttranscriber/auxiliary_comp.py +++ b/nltk_contrib/scripttranscriber/auxiliary_comp.py @@ -72,7 +72,7 @@ def Catenations(pys, result, string=''): def LookupString(chars, convert=False): pys = [] - for u in unicode(chars, 'utf8'): + for u in str(chars, 'utf8'): try: py = PINYIN_TABLE_[u.encode('utf8')] npy = [] diff --git a/nltk_contrib/scripttranscriber/chinese_extractor.py b/nltk_contrib/scripttranscriber/chinese_extractor.py index 15a558e..c39df98 100644 --- a/nltk_contrib/scripttranscriber/chinese_extractor.py +++ b/nltk_contrib/scripttranscriber/chinese_extractor.py @@ -1305,7 +1305,7 @@ class ChineseExtractor(EastAsianExtractor): """ def LineSegment(self, line): - try: utext = unicode(line.strip(), 'utf-8') + try: utext = str(line.strip(), 'utf-8') except TypeError: utext = line.strip() word = [] for u in utext: @@ -1331,7 +1331,7 @@ class ChinesePersonalNameExtractor(EastAsianExtractor): """ def LineSegment(self, line): - try: utext = unicode(line.strip(), 'utf-8') + try: utext = str(line.strip(), 'utf-8') except TypeError: utext = line.strip() for i in range(len(utext)): for k in [4, 3, 2]: diff --git a/nltk_contrib/scripttranscriber/documents.py b/nltk_contrib/scripttranscriber/documents.py index 338a97f..12a9b05 100644 --- a/nltk_contrib/scripttranscriber/documents.py +++ b/nltk_contrib/scripttranscriber/documents.py @@ -124,7 +124,7 @@ def XmlEncode(self, utf8=False): def XmlDump(self, file=None, utf8=False): if file is None: - print '%s\n' % (self.XmlEncode(utf8)) + print(('%s\n' % (self.XmlEncode(utf8)))) return p = open(file, 'w') p.write('%s\n' % self.XmlEncode(utf8)) diff --git a/nltk_contrib/scripttranscriber/extractor.py b/nltk_contrib/scripttranscriber/extractor.py index a6bd256..b4e9c3f 100644 --- a/nltk_contrib/scripttranscriber/extractor.py +++ b/nltk_contrib/scripttranscriber/extractor.py @@ -66,7 +66,7 @@ def LineSegment(self, line): ## Go 'word' by word to make this more robust to unicode decode ## errors. for w in line.split(): - try: ulinelist.append(unicode(w, 'utf-8')) + try: ulinelist.append(str(w, 'utf-8')) except UnicodeDecodeError: pass uline = ' '.join(ulinelist) clist = [] diff --git a/nltk_contrib/scripttranscriber/extractor_unittest.py b/nltk_contrib/scripttranscriber/extractor_unittest.py index 98b13dc..c2db454 100755 --- a/nltk_contrib/scripttranscriber/extractor_unittest.py +++ b/nltk_contrib/scripttranscriber/extractor_unittest.py @@ -64,7 +64,7 @@ def main(output = False): 'Token %d differs: %s != %s' % (i, all_tokens[i].String(), GOLDEN_[i]) - print '%s successful' % sys.argv[0] + print(('%s successful' % sys.argv[0])) if __name__ == '__main__': diff --git a/nltk_contrib/scripttranscriber/japanese_extractor.py b/nltk_contrib/scripttranscriber/japanese_extractor.py index 5915b14..0762d1d 100644 --- a/nltk_contrib/scripttranscriber/japanese_extractor.py +++ b/nltk_contrib/scripttranscriber/japanese_extractor.py @@ -31,7 +31,7 @@ class KatakanaExtractor(chinese_extractor.EastAsianExtractor): """ def LineSegment(self, line): - try: utext = unicode(line.strip(), 'utf-8') + try: utext = str(line.strip(), 'utf-8') except TypeError: utext = line.strip() word = [] for u in utext: diff --git a/nltk_contrib/scripttranscriber/makeindex.py b/nltk_contrib/scripttranscriber/makeindex.py index 30a335e..f389d28 100755 --- a/nltk_contrib/scripttranscriber/makeindex.py +++ b/nltk_contrib/scripttranscriber/makeindex.py @@ -13,13 +13,13 @@ import sys -print '' -print '' -print 'Pydoc for ScriptTranscriber' -print '' -print '' +print('') +print('') +print('Pydoc for ScriptTranscriber') +print('') +print('') for line in sys.stdin.readlines(): html = line.strip() - print '%s
' % (html, html) -print '' -print '' + print(('%s
' % (html, html))) +print('') +print('') diff --git a/nltk_contrib/scripttranscriber/miner.py b/nltk_contrib/scripttranscriber/miner.py index 8e36ad1..a68c6c5 100755 --- a/nltk_contrib/scripttranscriber/miner.py +++ b/nltk_contrib/scripttranscriber/miner.py @@ -170,7 +170,7 @@ def Comparator(doclist, result = comparator_.ComparisonResult() matches[(hash1, hash2)] = result did += 1 - values = matches.values() + values = list(matches.values()) values.sort(lambda x, y: comp(y.Cost(), x.Cost())) if pdump: sys.stderr.write('Dumping comparisons to %s...\n' % pdump) diff --git a/nltk_contrib/scripttranscriber/morph.py b/nltk_contrib/scripttranscriber/morph.py index 1ac061c..ba95cfb 100644 --- a/nltk_contrib/scripttranscriber/morph.py +++ b/nltk_contrib/scripttranscriber/morph.py @@ -86,7 +86,7 @@ def SetSubstringLength(self, length=DEFAULT_SUBSTRING_LENGTH_): def Morphs(self, string): try: return self.morphs_[string] - except AttributeError, KeyError: return '' + except AttributeError as KeyError: return '' def LabelDoclist(self): assert self.initialized_ == True, 'Must Initialize() the analyzer!' diff --git a/nltk_contrib/scripttranscriber/perceptron.py b/nltk_contrib/scripttranscriber/perceptron.py index 32c9fbf..08520c7 100755 --- a/nltk_contrib/scripttranscriber/perceptron.py +++ b/nltk_contrib/scripttranscriber/perceptron.py @@ -170,7 +170,7 @@ def DumpToFile(self, feature_map_file): """Dump the entire feature map to a file whose name is given as the parameter. """ fm_fp = open(feature_map_file, 'w') - for k, v in self.feature_dic_.iteritems(): + for k, v in list(self.feature_dic_.items()): fm_fp.write(k + '\t' + str(v) + '\n') fm_fp.close() return True diff --git a/nltk_contrib/scripttranscriber/perceptron_trainer.py b/nltk_contrib/scripttranscriber/perceptron_trainer.py index b977458..ec0721f 100755 --- a/nltk_contrib/scripttranscriber/perceptron_trainer.py +++ b/nltk_contrib/scripttranscriber/perceptron_trainer.py @@ -92,7 +92,7 @@ def Train(self, pos_examples_list): """ # if the peceptron is already trained, warn and abort if self.snow_p_.IsTrained(): - if DEBUG_: print 'Perceptron already trained (use Retrain?)' + if DEBUG_: print('Perceptron already trained (use Retrain?)') return False for example in pos_examples_list: @@ -144,7 +144,7 @@ def Retrain(self, new_positives): """ # if the perceptron has not been trained, warn and abort if not self.snow_p_.IsTrained(): - if DEBUG_: print 'Perceptron is not trained (use Train?)' + if DEBUG_: print('Perceptron is not trained (use Train?)') return False for example in new_positives: @@ -193,7 +193,7 @@ def Evaluate(self, s_token, t_token): Return: a tuple of activated target and activation, in the order as mentioned. """ if not self.snow_p_.IsTrained(): - if DEBUG_: print 'Perceptron not trained' + if DEBUG_: print('Perceptron not trained') return False test_ex = Example(s_token, t_token) @@ -257,8 +257,7 @@ def NearestNeighbors(self, positives): candidates = set(candidates) candidates = list(candidates) - distances = map(lambda x: - (x, Distance(x.split(), token2.split())), candidates) + distances = [(x, Distance(x.split(), token2.split())) for x in candidates] distances = sorted(distances, lambda x,y: x[1] - y[1]) for new_str in distances[1:5]: @@ -288,7 +287,7 @@ class WordShuffler: """ def __init__(self, l): self.l_ = l - self.left_els_ = map(lambda x: x[0], self.l_) + self.left_els_ = [x[0] for x in self.l_] def CreateShuffledList(self): shuffled_list = [] diff --git a/nltk_contrib/scripttranscriber/perceptron_trainer_unittest.py b/nltk_contrib/scripttranscriber/perceptron_trainer_unittest.py index 029440b..afc984b 100755 --- a/nltk_contrib/scripttranscriber/perceptron_trainer_unittest.py +++ b/nltk_contrib/scripttranscriber/perceptron_trainer_unittest.py @@ -47,28 +47,28 @@ def main(): # train the perceptron pt.Train(dict[0:1000]) first_run = EvaluateExamples(pt) - print first_run + print(first_run) # results here should be the same second_run = EvaluateExamples(pt) - print second_run + print(second_run) # learn from new examples # produce new results pt.Retrain(dict[1001:3000]) third_run = EvaluateExamples(pt) - print third_run + print(third_run) # this result should be the same as the third run fourth_run = EvaluateExamples(pt) - print fourth_run + print(fourth_run) # test if first_run == second_run and first_run != third_run \ and third_run == fourth_run: - print 'unittest successful' + print('unittest successful') else: - print 'unsuccessful' + print('unsuccessful') # clean up pt.CleanUp() diff --git a/nltk_contrib/scripttranscriber/pronouncer_unittest.py b/nltk_contrib/scripttranscriber/pronouncer_unittest.py index 9add929..1c7e51c 100755 --- a/nltk_contrib/scripttranscriber/pronouncer_unittest.py +++ b/nltk_contrib/scripttranscriber/pronouncer_unittest.py @@ -57,9 +57,9 @@ def LoadGolden(): for line in p: line = line.strip() word, pron = line.split('\t') - try: word = unicode(word, 'utf-8') + try: word = str(word, 'utf-8') except TypeError: pass - try: pron = unicode(pron, 'utf-8') + try: pron = str(pron, 'utf-8') except TypeError: pass try: GOLDEN_[word].AddPronunciation(pron) @@ -73,7 +73,7 @@ def main(output = False): if output: file = open(GOLDEN_FILE_, 'w') else: LoadGolden() for w in WORDS_: - try: w = unicode(w.strip(), 'utf-8') + try: w = str(w.strip(), 'utf-8') except TypeError: pass token_ = tokens.Token(w) pronouncer_ = pronouncer.UnitranPronouncer(token_) @@ -89,7 +89,7 @@ def main(output = False): file.write('%s\t%s\n' % (pronouncer_.Token().String(), p)) else: try: - string = unicode(pronouncer_.Token().String(), 'utf-8') + string = str(pronouncer_.Token().String(), 'utf-8') except TypeError: string = pronouncer_.Token().String() assert string in GOLDEN_, \ @@ -107,10 +107,10 @@ def main(output = False): nprons[i], gprons[i]) if output: - print 'generated %s' % GOLDEN_FILE_ + print(('generated %s' % GOLDEN_FILE_)) file.close() else: - print '%s successful' % sys.argv[0] + print(('%s successful' % sys.argv[0])) if __name__ == '__main__': diff --git a/nltk_contrib/scripttranscriber/sample.py b/nltk_contrib/scripttranscriber/sample.py index 9e83990..d0d96d2 100755 --- a/nltk_contrib/scripttranscriber/sample.py +++ b/nltk_contrib/scripttranscriber/sample.py @@ -105,7 +105,7 @@ def ComputePhoneMatches(doclist): comparator.ComputeDistance() result = comparator.ComparisonResult() matches[(hash1, hash2)] = result - values = matches.values() + values = list(matches.values()) values.sort(lambda x, y: cmp(x.Cost(), y.Cost())) p = open(MATCH_FILE_, 'w') ## zero out the file p.close() @@ -130,7 +130,7 @@ def ComputeTimeCorrelation(doclist): comparator.ComputeDistance() result = comparator.ComparisonResult() correlates[(hash1, hash2)] = result - values = correlates.values() + values = list(correlates.values()) values.sort(lambda x, y: cmp(x.Cost(), y.Cost())) p = open(CORR_FILE_, 'w') ## zero out the file p.close() diff --git a/nltk_contrib/scripttranscriber/thai_extractor.py b/nltk_contrib/scripttranscriber/thai_extractor.py index e1e2c32..bf51620 100644 --- a/nltk_contrib/scripttranscriber/thai_extractor.py +++ b/nltk_contrib/scripttranscriber/thai_extractor.py @@ -63,7 +63,7 @@ def Decode(self, val): return None def Dump(self, file): - keys = self.table_.keys() + keys = list(self.table_.keys()) keys.sort(lambda x, y: cmp(self.table_[x], self.table_[y])) p = open(file, 'w') for k in keys: @@ -90,7 +90,7 @@ def Listify(text): extraction. """ list = [] - for u in unicode(text, 'utf8'): + for u in str(text, 'utf8'): list.append(u.encode('utf8')) return list @@ -179,7 +179,7 @@ def LineSegment(self, line): } self.snow_session_ = snow.SnowSession(snow.MODE_SERVER, snow_test_args) - try: utext = unicode(line.strip(), 'utf-8') + try: utext = str(line.strip(), 'utf-8') except TypeError: utext = line.strip() segments = utext.split() for segment in segments: @@ -189,9 +189,8 @@ def LineSegment(self, line): seglist = Listify(segment.encode('utf8')) features = [] for i in range(len(seglist)): - feats = ', '.join(map(lambda x: str(x), - FeatureExtract(i, seglist, - self.feature_map_))) + ':\n' + feats = ', '.join([str(x) for x in FeatureExtract(i, seglist, + self.feature_map_)]) + ':\n' result = self.snow_session_.evaluateExample(feats) target, a, b, activation = result.split('\n')[1].split() target = int(target[:-1]) ## remove ':' diff --git a/nltk_contrib/scripttranscriber/thai_unittest.py b/nltk_contrib/scripttranscriber/thai_unittest.py index 8c47257..abe8643 100755 --- a/nltk_contrib/scripttranscriber/thai_unittest.py +++ b/nltk_contrib/scripttranscriber/thai_unittest.py @@ -85,7 +85,7 @@ def ComputePhoneMatches(doclist, match_file): comparator.ComputeDistance() result = comparator.ComparisonResult() matches[(hash1, hash2)] = result - values = matches.values() + values = list(matches.values()) values.sort(lambda x, y: cmp(x.Cost(), y.Cost())) p = open(match_file, 'w') ## zero out the file p.close() diff --git a/nltk_contrib/scripttranscriber/token_comp_unittest.py b/nltk_contrib/scripttranscriber/token_comp_unittest.py index 337dbbb..3bf251f 100755 --- a/nltk_contrib/scripttranscriber/token_comp_unittest.py +++ b/nltk_contrib/scripttranscriber/token_comp_unittest.py @@ -32,11 +32,11 @@ import auxiliary_comp from __init__ import BASE_ -PAIRS_ = [(u'高島屋', u'Takashimaya'), - (u'共產黨', u'공산당'), - (u'Kuomintang', u'國民黨'), - (u'ᏣᎳᎩ', u'Cherokee'), - (u'niqitsiavaliriniq', u'ᓂᕿᑦᓯᐊᕙᓕᕆᓂᖅ') +PAIRS_ = [('高島屋', 'Takashimaya'), + ('共產黨', '공산당'), + ('Kuomintang', '國民黨'), + ('ᏣᎳᎩ', 'Cherokee'), + ('niqitsiavaliriniq', 'ᓂᕿᑦᓯᐊᕙᓕᕆᓂᖅ') ] GOLDEN_FILE_ = '%s/testdata/token_comp_test.txt' % BASE_ @@ -67,12 +67,12 @@ def CreateDoclist(): doc.AddLang(lang) lang = tokens.Lang() lang.SetId('zho') - token_ = tokens.Token(u'克林頓') + token_ = tokens.Token('克林頓') token_.SetCount(3) token_.AddPronunciation('kh & l i n t u n') - token_.SetMorphs([u'克林頓']) + token_.SetMorphs(['克林頓']) lang.AddToken(token_) - token_ = tokens.Token(u'高島屋') + token_ = tokens.Token('高島屋') token_.SetCount(1) token_.AddPronunciation('k a u t a u u') token_.AddPronunciation('t A k A s i m A j a') @@ -95,10 +95,10 @@ def CreateDoclist(): doc.AddLang(lang) lang = tokens.Lang() lang.SetId('ara') - token_ = tokens.Token(u'كلينتون') + token_ = tokens.Token('كلينتون') token_.SetCount(3) token_.AddPronunciation('k l j n t w n') - token_.SetMorphs([u'كلينتون']) + token_.SetMorphs(['كلينتون']) lang.AddToken(token_) doc.AddLang(lang) doclist.AddDoc(doc) @@ -193,7 +193,7 @@ def TestAuxiliaryComparators(unitname): comparator.ComputeDistance() assert comparator.ComparisonResult().Cost() == auxiliary_comp.NO_MATCH_, \ '%s should not match %s' % (t2.String(), t2py.String()) - print '%s (auxiliary tests) successful' % unitname + print(('%s (auxiliary tests) successful' % unitname)) def main(output = False): diff --git a/nltk_contrib/scripttranscriber/tokens.py b/nltk_contrib/scripttranscriber/tokens.py index 1e65af2..3f2096d 100644 --- a/nltk_contrib/scripttranscriber/tokens.py +++ b/nltk_contrib/scripttranscriber/tokens.py @@ -200,7 +200,7 @@ def GetTokenStats(self, tok): except KeyError: return self.InitTokenStats(tok) def TokenStats(self): - return self.tokstats_.values() + return list(self.tokstats_.values()) def SetN(self, n): self.n_ = n @@ -288,7 +288,7 @@ def CompactTokens(self): try: map[hash_string].append(token_) except KeyError: map[hash_string] = [token_] ntokens = [] - keys = map.keys() + keys = list(map.keys()) keys.sort() for k in keys: token_ = map[k][0] diff --git a/nltk_contrib/scripttranscriber/unittest.py b/nltk_contrib/scripttranscriber/unittest.py index 40a4517..fe6149a 100644 --- a/nltk_contrib/scripttranscriber/unittest.py +++ b/nltk_contrib/scripttranscriber/unittest.py @@ -41,4 +41,4 @@ def CompareOutputFiles(gold_file, test_file): def TestUnitOutputs(unitname, gold_file, test_file): CompareOutputFiles(gold_file, test_file) - print '%s successful' % unitname + print(('%s successful' % unitname)) diff --git a/nltk_contrib/seqclass.py b/nltk_contrib/seqclass.py index 4f89524..d61ad33 100644 --- a/nltk_contrib/seqclass.py +++ b/nltk_contrib/seqclass.py @@ -19,7 +19,7 @@ def size(self): def classify(self, featuresets): if self.size() == 0: - raise ValueError, 'Tagger is not trained' + raise ValueError('Tagger is not trained') for i, featureset in enumerate(featuresets): @@ -91,7 +91,7 @@ def save_features(self, training_data, filename): stream = open(filename,'w') yaml.dump_all(training_data, stream) - print "Saving features to %s" % os.path.abspath(filename) + print(("Saving features to %s" % os.path.abspath(filename))) stream.close() @@ -100,7 +100,7 @@ def corpus2training_data(self, training_corpus, model_name='default', save=False dict_corpus = tabular2dict(training_corpus, KEYS) contexts = self.contexts(dict_corpus) - print "Detecting features" + print("Detecting features") training_data = [(self.detect_features(c), c[1]['label']) for c in contexts] if save: @@ -118,11 +118,11 @@ def train(self, training_corpus, classifier=iis): Train a classifier. """ if self.size() != 0: - raise ValueError, 'Classifier is already trained' + raise ValueError('Classifier is already trained') training_data = self.corpus2training_data(training_corpus) - print "Training classifier" + print("Training classifier") self._model = iis(training_data) diff --git a/nltk_contrib/stringcomp.py b/nltk_contrib/stringcomp.py index 5c2b2ee..2cf5511 100644 --- a/nltk_contrib/stringcomp.py +++ b/nltk_contrib/stringcomp.py @@ -89,20 +89,20 @@ def stringcomp (fx, fy): def demo (): - print "Comparison between 'python' and 'python': %.2f" % stringcomp("python", "python") - print "Comparison between 'python' and 'Python': %.2f" % stringcomp("python", "Python") - print "Comparison between 'NLTK' and 'NTLK': %.2f" % stringcomp("NLTK", "NTLK") - print "Comparison between 'abc' and 'def': %.2f" % stringcomp("abc", "def") + print(("Comparison between 'python' and 'python': %.2f" % stringcomp("python", "python"))) + print(("Comparison between 'python' and 'Python': %.2f" % stringcomp("python", "Python"))) + print(("Comparison between 'NLTK' and 'NTLK': %.2f" % stringcomp("NLTK", "NTLK"))) + print(("Comparison between 'abc' and 'def': %.2f" % stringcomp("abc", "def"))) - print "Word most similar to 'australia' in list ['canada', 'brazil', 'egypt', 'thailand', 'austria']:" + print("Word most similar to 'australia' in list ['canada', 'brazil', 'egypt', 'thailand', 'austria']:") max_score = 0.0 ; best_match = None for country in ["canada", "brazil", "egypt", "thailand", "austria"]: score = stringcomp("australia", country) if score > max_score: best_match = country max_score = score - print "(comparison between 'australia' and '%s': %.2f)" % (country, score) - print "Word most similar to 'australia' is '%s' (score: %.2f)" % (best_match, max_score) + print(("(comparison between 'australia' and '%s': %.2f)" % (country, score))) + print(("Word most similar to 'australia' is '%s' (score: %.2f)" % (best_match, max_score))) if __name__ == "__main__": demo() diff --git a/nltk_contrib/textgrid.py b/nltk_contrib/textgrid.py index 9cbd77c..e1be559 100644 --- a/nltk_contrib/textgrid.py +++ b/nltk_contrib/textgrid.py @@ -150,7 +150,7 @@ def __iter__(self): for tier in self.tiers: yield tier - def next(self): + def __next__(self): if self.idx == (self.size - 1): raise StopIteration self.idx += 1 @@ -450,11 +450,11 @@ def demo_TextGrid(demo_data): print("** Demo of the TextGrid class. **") fid = TextGrid(demo_data) - print("Tiers: %s" % (fid.size)) + print(("Tiers: %s" % (fid.size))) for i, tier in enumerate(fid): print("\n***") - print("Tier: %s" % (i + 1)) + print(("Tier: %s" % (i + 1))) print(tier) def demo(): diff --git a/nltk_contrib/timex.py b/nltk_contrib/timex.py index 3b1b5ae..4e0e0a7 100755 --- a/nltk_contrib/timex.py +++ b/nltk_contrib/timex.py @@ -11,9 +11,9 @@ try: from mx.DateTime import * except ImportError: - print """ + print(""" Requires eGenix.com mx Base Distribution -http://www.egenix.com/products/python/mxBase/""" +http://www.egenix.com/products/python/mxBase/""") # Predefined strings. numbers = "(^a(?=\s)|one|two|three|four|five|six|seven|eight|nine|ten| \ @@ -173,8 +173,7 @@ def ground(tagged_text, base_date): # Find all identified timex and put them into a list timex_regex = re.compile(r'.*?', re.DOTALL) timex_found = timex_regex.findall(tagged_text) - timex_found = map(lambda timex:re.sub(r'', '', timex), \ - timex_found) + timex_found = [re.sub(r'', '', timex) for timex in timex_found] # Calculate the new date accordingly for timex in timex_found: @@ -189,9 +188,9 @@ def ground(tagged_text, base_date): timex, re.IGNORECASE) value = split_timex[0] unit = split_timex[1] - num_list = map(lambda s:hashnum(s),re.findall(numbers + '+', \ - value, re.IGNORECASE)) - timex = `sum(num_list)` + ' ' + unit + num_list = [hashnum(s) for s in re.findall(numbers + '+', \ + value, re.IGNORECASE)] + timex = repr(sum(num_list)) + ' ' + unit # If timex matches ISO format, remove 'time' and reorder 'date' if re.match(r'\d+[/-]\d+[/-]\d+ \d+:\d+:\d+\.\d+', timex): @@ -351,7 +350,7 @@ def ground(tagged_text, base_date): def demo(): import nltk text = nltk.corpus.abc.raw('rural.txt')[:10000] - print tag(text) + print((tag(text))) if __name__ == '__main__': demo() diff --git a/nltk_contrib/wals.py b/nltk_contrib/wals.py index 68a371f..3d9f9ea 100644 --- a/nltk_contrib/wals.py +++ b/nltk_contrib/wals.py @@ -79,13 +79,13 @@ def load(self, data_dir, encoding): def open_csv(filename, remove_header=True): filename = os.path.join(data_dir, filename + '.' + file_ext) wals_file = csv.reader(open(filename, 'r'), dialect=self.dialect) - if remove_header: wals_file.next() + if remove_header: next(wals_file) for row in wals_file: - yield [unicode(cell, encoding) for cell in row] + yield [str(cell, encoding) for cell in row] def map_fields(vectors, fields): for vector in vectors: - yield dict(zip(fields, vector)) + yield dict(list(zip(fields, vector))) # Features self.features = dict((f['id'], f) for f in @@ -100,14 +100,14 @@ def map_fields(vectors, fields): map_fields(open_csv('languages'), language_fields)) # convert longitude and latitude to float from string - for l in self.languages.values(): + for l in list(self.languages.values()): l['latitude'] = float(l['latitude']) l['longitude'] = float(l['longitude']) # The datapoints file is more complicated. There is a column for # every feature, and a row for every language. Each cell is either # empty or contains a value dependent on the feature. rows = open_csv('datapoints', remove_header=False) - header = rows.next() + header = next(rows) self.data = defaultdict(dict) self.feat_lg_map = defaultdict(list) for row in rows: @@ -124,7 +124,7 @@ def _build_indices(self): self.iso_index = defaultdict(list) self.language_name_index = defaultdict(list) - for lg in self.languages.values(): + for lg in list(self.languages.values()): for iso in lg['iso_codes'].split(): self.iso_index[iso] += [lg] name = lg['name'].lower() @@ -141,7 +141,7 @@ def _build_language_hierarchy(self): # family -> genus # family -> subfamily -> genus lg_hier = {} - for lg in self.languages.values(): + for lg in list(self.languages.values()): family = lg_hier.setdefault(lg['family'], LHNode(lg['family'])) family.languages[lg['wals_code']] = lg @@ -165,12 +165,12 @@ def show_language(self, wals_code): @param wals_code: The WALS code for a language. """ - print self.languages[wals_code]['name'], '(%s):' % wals_code + print(self.languages[wals_code]['name'], '(%s):' % wals_code) data = self.data[wals_code] for feat in sorted(data.keys()): - print ' ', self.features[feat]['name'], '(%s):' % feat,\ + print(' ', self.features[feat]['name'], '(%s):' % feat,\ self.values[feat][data[feat]]['description'],\ - '(%s)' % self.values[feat][data[feat]]['value_id'] + '(%s)' % self.values[feat][data[feat]]['value_id']) def get_wals_codes_from_iso(self, iso_code): """ @@ -217,36 +217,36 @@ def get_languages_with_feature(self, feature, value=None, superclass=None): def demo(wals_directory=None, dialect='excel-tab', encoding='utf-8'): if not wals_directory: import sys - print >>sys.stderr, 'Error: No WALS data directory provided.' - print >>sys.stderr, ' You may obtain the database from ' +\ - 'http://wals.info/export' + print('Error: No WALS data directory provided.', file=sys.stderr) + print(' You may obtain the database from ' +\ + 'http://wals.info/export', file=sys.stderr) return w = WALS(wals_directory, dialect, encoding) # Basic statistics - print 'In database:\n %d\tlanguages\n %d\tfeatures ' %\ - (len(w.languages), len(w.features)) + print('In database:\n %d\tlanguages\n %d\tfeatures ' %\ + (len(w.languages), len(w.features))) # values are a nested dictionary (w.values[feature_id][value_id]) - num_vals = sum(map(len, w.values.values())) - print ' %d\ttotal values (%f avg. number per feature)' %\ - (num_vals, float(num_vals)/len(w.features)) + num_vals = sum(map(len, list(w.values.values()))) + print(' %d\ttotal values (%f avg. number per feature)' %\ + (num_vals, float(num_vals)/len(w.features))) # More statistics - print " %d languages specify feature 81A (order of S, O, and V)" %\ - (len(w.get_languages_with_feature('81A'))) - print " %d langauges have VOS order" %\ - (len(w.get_languages_with_feature('81A', value='4'))) + print(" %d languages specify feature 81A (order of S, O, and V)" %\ + (len(w.get_languages_with_feature('81A')))) + print(" %d langauges have VOS order" %\ + (len(w.get_languages_with_feature('81A', value='4')))) # Getting language data - print "\nGetting data for languages named 'Irish'" + print("\nGetting data for languages named 'Irish'") for wals_code in w.get_wals_codes_from_name('Irish'): l = w.languages[wals_code] - print ' %s (ISO-639 code: %s WALS code: %s)' %\ - (l['name'], l['iso_codes'], wals_code) - print "\nGetting data for languages with ISO 'isl'" + print(' %s (ISO-639 code: %s WALS code: %s)' %\ + (l['name'], l['iso_codes'], wals_code)) + print("\nGetting data for languages with ISO 'isl'") for wals_code in w.get_wals_codes_from_iso('isl'): w.show_language(wals_code) - print "\nLocations of dialects for the Min Nan language (ISO 'nan'):" + print("\nLocations of dialects for the Min Nan language (ISO 'nan'):") for wals_code in w.get_wals_codes_from_iso('nan'): l = w.languages[wals_code] - print " %s\tlat:%f\tlong:%f" %\ - (l['name'], l['latitude'], l['longitude']) + print(" %s\tlat:%f\tlong:%f" %\ + (l['name'], l['latitude'], l['longitude']))