1
+ /*
2
+ * The contents of this file are subject to the Mozilla Public License
3
+ * Version 1.1 (the "License"); you may not use this file except in
4
+ * compliance with the License. You may obtain a copy of the License at
5
+ * http://www.mozilla.org/MPL/
6
+ *
7
+ * Software distributed under the License is distributed on an "AS IS"
8
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
9
+ * License for the specific language governing rights and limitations
10
+ * under the License.
11
+ *
12
+ * The Original Code is "SharpSimpleNLG".
13
+ *
14
+ * The Initial Developer of the Original Code is Ehud Reiter, Albert Gatt and Dave Westwater.
15
+ * Portions created by Ehud Reiter, Albert Gatt and Dave Westwater are Copyright (C) 2010-11 The University of Aberdeen. All Rights Reserved.
16
+ *
17
+ * Contributor(s): Ehud Reiter, Albert Gatt, Dave Wewstwater, Roman Kutlak, Margaret Mitchell, Saad Mahamood, Nick Hodge
18
+ */
19
+
20
+ /* Additional Notes:
21
+ * - Original Java source is SimpleNLG from 12-Jun-2016 https://github.com/simplenlg/simplenlg
22
+ * - This is a port of the Java version to C# with no additional features
23
+ * - I have left the "Initial Developer" section to reflect this fact
24
+ * - Any questions, comments, feedback on this port can be sent to Nick Hodge <[email protected] >
25
+ */
26
+
27
+ using System . Text ;
28
+ using SimpleNLG . Extensions ;
29
+
30
+ namespace SharpNLG . Extensions
31
+ {
32
+ /**
33
+ * This class is used to parse numbers that are passed as figures, to determine
34
+ * whether they should take "a" or "an" as determiner.
35
+ *
36
+ * @author bertugatt
37
+ *
38
+ */
39
+
40
+ public class DeterminerAgrHelper
41
+ {
42
+ /*
43
+ * An array of strings which are exceptions to the rule that "an" comes
44
+ * before vowels
45
+ */
46
+ private static string [ ] AN_EXCEPTIONS = { "one" , "180" , "110" } ;
47
+
48
+ /*
49
+ * Start of string involving vowels, for use of "an"
50
+ */
51
+ private static string AN_AGREEMENT = @"\A(a|e|i|o|u).*" ;
52
+
53
+ /*
54
+ * Start of string involving numbers, for use of "an" -- courtesy of Chris
55
+ * Howell, Agfa healthcare corporation
56
+ */
57
+ // private static final string AN_NUMERAL_AGREEMENT =
58
+ // "^(((8((\\d+)|(\\d+(\\.|,)\\d+))?).*)|((11|18)(\\d{3,}|\\D)).*)$";
59
+
60
+ /**
61
+ * Check whether this string starts with a number that needs "an" (e.g.
62
+ * "an 18% increase")
63
+ *
64
+ * @param string
65
+ * the string
66
+ * @return <code>true</code> if this string starts with 11, 18, or 8,
67
+ * excluding strings that start with 180 or 110
68
+ */
69
+
70
+ public static bool requiresAn ( string stringa )
71
+ {
72
+ var req = false ;
73
+
74
+ var lowercaseInput = stringa . toLowerCase ( ) ;
75
+
76
+ if ( lowercaseInput . matches ( AN_AGREEMENT ) && ! isAnException ( lowercaseInput ) )
77
+ {
78
+ req = true ;
79
+
80
+ }
81
+ else
82
+ {
83
+ var numPref = getNumericPrefix ( lowercaseInput ) ;
84
+
85
+ if ( numPref != null && numPref . length ( ) > 0
86
+ && numPref . matches ( @"^(8|11|18).*$" ) )
87
+ {
88
+ var num = int . Parse ( numPref ) ;
89
+ req = checkNum ( num ) ;
90
+ }
91
+ }
92
+
93
+ return req ;
94
+ }
95
+
96
+ /*
97
+ * check whether a string beginning with a vowel is an exception and doesn't
98
+ * take "an" (e.g. "a one percent change")
99
+ *
100
+ * @return
101
+ */
102
+
103
+ private static bool isAnException ( string stringa )
104
+ {
105
+ foreach ( var ex in AN_EXCEPTIONS )
106
+ {
107
+ if ( stringa . matches ( "^" + ex + ".*" ) )
108
+ {
109
+ // if (string.equalsIgnoreCase(ex)) {
110
+ return true ;
111
+ }
112
+ }
113
+
114
+ return false ;
115
+ }
116
+
117
+ /*
118
+ * Returns <code>true</code> if the number starts with 8, 11 or 18 and is
119
+ * either less than 100 or greater than 1000, but excluding 180,000 etc.
120
+ */
121
+
122
+ private static bool checkNum ( int num )
123
+ {
124
+ var needsAn = false ;
125
+
126
+ // eight, eleven, eighty and eighteen
127
+ if ( num == 11 || num == 18 || num == 8 || ( num >= 80 && num < 90 ) )
128
+ {
129
+ needsAn = true ;
130
+
131
+ }
132
+ else if ( num > 1000 )
133
+ {
134
+ // num = Math.Round(num / 1000);
135
+ num = num / 1000 ;
136
+ needsAn = checkNum ( num ) ;
137
+ }
138
+
139
+ return needsAn ;
140
+ }
141
+
142
+ /*
143
+ * Retrieve the numeral prefix of a string.
144
+ */
145
+
146
+ private static string getNumericPrefix ( string stringa )
147
+ {
148
+ var numeric = new StringBuilder ( ) ;
149
+
150
+ if ( stringa != null )
151
+ {
152
+ stringa = stringa . Trim ( ) ;
153
+
154
+ if ( stringa . length ( ) > 0 )
155
+ {
156
+
157
+ var buffer = new StringBuilder ( stringa ) ;
158
+ var first = buffer . charAt ( 0 ) ;
159
+
160
+ if ( first . isDigit ( ) )
161
+ {
162
+ numeric . append ( first ) ;
163
+
164
+ for ( var i = 1 ; i < buffer . length ( ) ; i ++ )
165
+ {
166
+ var next = buffer . charAt ( i ) ;
167
+
168
+ if ( next . isDigit ( ) )
169
+ {
170
+ numeric . append ( next ) ;
171
+
172
+ // skip commas within numbers
173
+ }
174
+ else if ( next . Equals ( ',' ) )
175
+ {
176
+ continue ;
177
+
178
+ }
179
+ else
180
+ {
181
+ break ;
182
+ }
183
+ }
184
+ }
185
+ }
186
+ }
187
+
188
+ return numeric . length ( ) == 0 ? null : numeric . ToString ( ) ;
189
+ }
190
+
191
+
192
+ /**
193
+ * Check to see if a string ends with the indefinite article "a" and it agrees with {@code np}.
194
+ * @param text
195
+ * @param np
196
+ * @return an altered version of {@code text} to use "an" if it agrees with {@code np}, the original string otherwise.
197
+ */
198
+
199
+ public static string checkEndsWithIndefiniteArticle ( string text , string np )
200
+ {
201
+
202
+ var tokens = text . Split ( ' ' ) ;
203
+
204
+ var lastToken = tokens [ tokens . Length - 1 ] ;
205
+
206
+ if ( lastToken . equalsIgnoreCase ( "a" ) && DeterminerAgrHelper . requiresAn ( np ) )
207
+ {
208
+
209
+ tokens [ tokens . Length - 1 ] = "an" ;
210
+
211
+ return stringArrayToString ( tokens ) ;
212
+
213
+ }
214
+
215
+ return text ;
216
+
217
+ }
218
+
219
+ // Turns ["a","b","c"] into "a b c"
220
+ private static string stringArrayToString ( string [ ] sArray )
221
+ {
222
+
223
+ var buf = new StringBuilder ( ) ;
224
+
225
+ for ( var i = 0 ; i < sArray . Length ; i ++ )
226
+ {
227
+
228
+ buf . Append ( sArray [ i ] ) ;
229
+
230
+ if ( i != sArray . Length - 1 )
231
+ {
232
+
233
+ buf . Append ( " " ) ;
234
+
235
+ }
236
+
237
+ }
238
+
239
+ return buf . ToString ( ) ;
240
+
241
+ }
242
+
243
+ }
244
+ }
0 commit comments