This repository has been archived by the owner on Jun 2, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbcnlp_query.py
144 lines (130 loc) · 5.38 KB
/
bcnlp_query.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/usr/bin/python
# coding=UTF-8
#
# BitCurator NLP Tools
# Copyright (C) 2016 -2017
# All rights reserved.
#
# This code is distributed under the terms of the GNU General Public
# License, Version 3. See the text file "COPYING" for further details
# about the terms of this license.
#
# This file contains BitCurator NLP Tools database support.
#
import sqlalchemy
from sqlalchemy import *
import psycopg2
from sqlalchemy_utils import database_exists, create_database
import sys
import bcnlp_db
import logging
# Set up logging location
logging.basicConfig(filename='bcnlp_ent.log', level=logging.DEBUG)
try:
from argparse import ArgumentParser
except ImportError:
raise ImportError("This script requires ArgumentParser which is in Python 2.7 or Python 3.0")
#def bnGetEntitiesForDoc(doc_index, con, meta):
def bnGetInfoForDoc(doc_index, category, con, meta):
# First query the main table to see if this doc is there in the list
# and get its index.
logging.debug('In function bnGetInfiforDoc: doc_index:%s category:%s ', doc_index, category)
records = []
outfile = ""
for table in meta.tables:
logging.debug('table:%s ', table)
if table == "bcnlp_main":
logging.debug("Table is bcnlp_main")
table_obj = meta.tables['bcnlp_main']
for record in con.execute(table_obj.select()):
if record[0] == doc_index:
# Get the entities from the entity table for this doc
# Command to be executed: select * from <entity_table>;
if category == 'Entity':
entity_table = "bcnlp_entity_doc"+str(doc_index)
records = bnPrintTable(entity_table, con, meta)
outfile = entity_table+".txt"
elif category == 'NP':
np_table = "bcnlp_noun_doc"+str(doc_index)
records = bnPrintTable(np_table, con, meta)
outfile = np_table+".txt"
elif category == 'VP':
vp_table = "bcnlp_verb_doc"+str(doc_index)
records = bnPrintTable(vp_table, con, meta)
outfile = vp_table+".txt"
elif category == 'PP':
pp_table = "bcnlp_prepo_doc"+str(doc_index)
records = bnPrintTable(pp_table, con, meta)
outfile = pp_table+".txt"
elif category == 'sim':
logging.debug("Category is sim")
sim_table = 'doc'+str(doc_index)+'_sm_table'
logging.debug('SIM table: %s ', sim_table)
records = bnPrintTable(sim_table, con, meta)
logging.debug("records: %s", records)
return records
if outfile == "":
return None
with open(outfile, "w") as of:
for item in records:
of.write("%s\n" % item)
return outfile
'''
def bnGetEntitiesForDoc(doc_index, con, meta):
table_name = "bcnlp_entity_doc" + str(doc_index)
for table in meta.tables:
if table == table_name:
table_obj = meta.tables[table_name]
for record in con.execute(table_obj.select()):
'''
def bnGetDocIndexForDoc(con, meta, doc_name):
for table in meta.tables:
if table == "bcnlp_main":
logging.debug('bnGetDocIndexForDoc: found bcnlp_main table')
table_obj = meta.tables['bcnlp_main']
#doc_name = table['doc_name']
select_phrase = table_obj.select().where(table_obj.c.doc_name == doc_name)
for row in con.execute(select_phrase):
#print row
return row[0]
def bnPrintMainTable(con, meta):
row_list = []
for table in meta.tables:
if table == "bcnlp_main":
table_obj = meta.tables['bcnlp_main']
#doc_name = table['doc_name']
##for col in table_obj.c:
##print col
for row in con.execute(table_obj.select()):
## print row
row_list.append(row)
return row_list
def bnPrintTable(table_name, con, meta):
#print "Printing table ", table_name
row_list = []
for table in meta.tables:
if table == table_name:
table_obj = meta.tables[table_name]
for row in con.execute(table_obj.select()):
#print row
row_list.append(row)
return row_list
'''
def bnGetNumRecordsInTable(table_name, con, meta):
psql_cmd = "select count(*) "+ table_name
print "psql cmd for getting number of rows ", psql_cmd
table_obj = meta.tables[table_name]
select_phrase = table_obj.select().count(*) table_name
return(con.execute(select_phrase))
'''
if __name__ == "__main__":
parser = ArgumentParser(prog='bcnlp_query.py', description='Query the DB')
parser.add_argument('--i', action='store', help="... ")
parser.add_argument('--outdir', action='store', help="... ")
args = parser.parse_args()
con, meta = bcnlp_db.dbinit()
bnPrintMainTable(con, meta)
doc_index = bnGetDocIndexForDoc(con, meta, "13030.Smalltalk.Hugh+Brinkman.txt")
print("DOC Index : ", doc_index)
# Compare tables
bcnlp_db.dbu_execute_dbcmd('compare_two_tables', table1='bcnlp_entity_doc0', table2='bcnlp_entity_doc1')