forked from ajaynagesh/mimlre-ext
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tmp.properties.hoffmann
165 lines (139 loc) · 5.44 KB
/
tmp.properties.hoffmann
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#
# Global properties file for the KBP project
#
kbp.runid = mimlre_small_newAlgo
serializedRelationExtractorPath = kbp_relation_model_mimlre_small
work.dir = small_dataset/corpora
trainer.model = atleastonce
#
# BaselineNLProcessor settings
#
# annotators is used for test queries in KBPTester and SnippetsToCache. Do NOT mess with this property!
annotators = tokenize, ssplit, pos, lemma, ner, regexner, parse, dcoref
ssplit.htmlBoundariesToDiscard = p,text,post,postdate,poster,turn,speaker,quote
parser.maxlen = 50
regexner.mapping = resources/kbp/regexner/combined_map
regexner.ignorecase = true
oldCorefFormat = true
#
# KBPReader properties
#
# new indices over NFS, using our own customized annotation caching
index.kbp = small_dataset/index_dir
index.maxsentencelength = 50
# could be NONE, NER, TRIGGER, or BUCKETS (see IndexExtractor.java for more details)
index.train.sortmode = NONE
index.train.useknownslots = true
index.test.sortmode = BUCKETS
index.train.sentences.per.entity = 50
index.test.sentences.per.entity = 50
reader.useweb = false
index.train.usecache = true
index.test.usecache = true
index.cache.dir = small_dataset/index_cache
# number of sentences before and after the current to return as part of a match
index.context.previous = 0
index.context.next = 0
# How many extra results to consider when picking sentences.per.entity
index.extraresults.factor = 1.0
# same thing, but applied at the IndexAndWebCacheSentenceExtractor,
# which uses different sorting methods to pick the best ones
index.indexandweb.extraresults.factor = 1.0
# sort modes after combining all sentences... either sort index & web
# together or sort them separately
# see IndexAndWebCacheSentenceExtractor
index.combinemode = NO_SORTING
# we might use these trigger words during feature generation (disabled by default)
relation.triggers = resources/kbp/web_queries/keywords_no_ml
reader.multimatch = true
reader.enforcene = true
reader.domain.adapt = false
# possible values: all, three, two
# - all: makes one domain for each index
# - three: corpus, web, wiki
# - two: web, non-web
reader.domain.adapt.style = all
#
# block below relevant only for PipelineIndexExtractor, which should no longer be used (everything should be cached offline)
#
index.pipelinemethod = SPLIT
index.fullannotators = tokenize, ssplit, pos, lemma, ner, regexner, parse, dcoref
index.step1annotators = tokenize, ssplit
index.step2annotators = pos, lemma, ner, regexner, parse
overlapping.relations = resources/kbp/overlaps.tab
#
# KBPDomReader settings
#
kbp.mapping = small_dataset/resources/mapping
kbp.ner.types = small_dataset/resources/NER_types
kbp.manual.lists = small_dataset/resources/specific_relations
kbp.countries = small_dataset/resources/countries
kbp.states = small_dataset/resources/statesandprovinces
kbp.inputkb = small_dataset/input_kb/data
kbp.debugkb = small_dataset/
#
# Gazetteer info
#
nationalities = small_dataset/resources/CountryLexicalResource.db
states = small_dataset/resources/state-abbreviations.txt
#
# SemgrexExtractor
#
rule.dir = /NOT_RELEASED/u/nlp/data/TAC-KBP2010/patterns
priority.file = /NOT_RELEASED/u/nlp/data/TAC-KBP2010/relation.priorities
use.statistical.model = true
use.rulebased.model = false
#
# ErrorAnalysis
#
## don't need NLP analysis for the non-cached indices
## do NOT set this in regular train/test runs!
#index.minimal.analysis = true
# if true, run the analysis over devQueries; otherwise, run it over the training KB set in analysis.kb
analysis.test.mode = false
# small.xml contains ~100 entities; big.xml contains ~1000 entities
analysis.kb = /NOT_RELEASED/u/nlp/data/TAC-KBP2010/TAC_2009_KBP_Evaluation_Reference_Knowledge_Base/stanford_splits/analysis/big.xml
# use this property if you want a serialized map from relation name to coremap
#analysis.dumpSentences = /u/horatio/kbpsentences2.ser
#
# Model properties
#
# do not tune the acceptance threshold for a slot; accept everything
slot.threshold = 0
# keep only 5% of the negative examples; this is the best for KBP
negatives.sampleratio = 0.05
# use all negative labels; this is better when we aggresively subsample negatives
use.allnegs = true
# remove features seen less than 5 times; this makes everything much faster
featureCountThreshold = 5
# JointBayes settings (tuned on dev)
folds = 3
epochs = 8
features = 0
filter = all
inference.type = stable
# AtLeastOnce settings (tuned on dev)
perceptron.epochs = 2
inference.epochs = 1
algo.type = 1
#
# KBPTrainer/Tester settings
#
#nlpsub = true
relationFeatures = arg_words,arg_type,arg_order,full_tree_path,surface_distance_binary,surface_distance_bins,adjacent_words,entities_between_args,entity_counts_binary,entity_counts_bins,span_words_unigrams,dependency_path_lowlevel,dependency_path_words
# valid values: best, all
kbp.list.output = all
logLevel = SEVERE
readerLogLevel = SEVERE
#trainPath = resources/kbp/TAC_2009_KBP_Evaluation_Reference_Knowledge_Base/data
#testPath = /NOT_RELEASED/u/nlp/data/TAC-KBP2010/TAC_2009_KBP_Evaluation_Reference_Knowledge_Base/stanford_splits/devel
# test queries from the combined set of 2010 and 2011 queries
devQueries = resources/kbp/test_combined/TAC_KBP_Regular-Slot_Queries_DEVELOPMENT.xml
testQueries = resources/kbp/test_combined/TAC_KBP_Regular-Slot_Queries_TESTING.xml
kbp.goldresponses = resources/kbp/test_combined/TAC_KBP_Regular-Slot_Assessments
kbScoreFile = kb_score.txt
queryScoreFile = query_score
inference.during.tuning = false
doc.finding.during.tuning = true
# model combination properties
model.combination.enabled = false