@@ -23,120 +23,89 @@ import scala.collection.mutable.{ArrayBuffer, ListBuffer}
2323
2424import org .apache .spark .SparkException
2525
26- private [mllib] object NumericTokenizer {
27- val NUMBER = - 1
28- val END = - 2
29- }
30-
31- import NumericTokenizer ._
32-
3326/**
34- * Simple tokenizer for a numeric structure consisting of three types:
27+ * Simple parser for a numeric structure consisting of three types:
3528 *
3629 * - number: a double in Java's floating number format
3730 * - array: an array of numbers stored as `[v0,v1,...,vn]`
3831 * - tuple: a list of numbers, arrays, or tuples stored as `(...)`
39- *
40- * @param s input string
4132 */
42- private [mllib] class NumericTokenizer (s : String ) {
43-
44- private var allowComma = false
45- private var _value = Double .NaN
46- private val stringTokenizer = new StringTokenizer (s, " ()[]," , true )
33+ private [mllib] object NumericParser {
4734
48- /**
49- * Returns the most recent parsed number.
50- */
51- def value : Double = _value
35+ /** Parses a string into a Double, an Array[Double], or a Seq[Any]. */
36+ def parse (s : String ): Any = {
37+ val tokenizer = new StringTokenizer (s, " ()[]," , true )
38+ if (tokenizer.hasMoreTokens()) {
39+ val token = tokenizer.nextToken()
40+ if (token == " (" ) {
41+ parseTuple(tokenizer)
42+ } else if (token == " [" ) {
43+ parseArray(tokenizer)
44+ } else {
45+ // expecting a number
46+ java.lang.Double .parseDouble(token)
47+ }
48+ } else {
49+ throw new SparkException (s " Cannot find any token from the input string. " )
50+ }
51+ }
5252
53- /**
54- * Returns the next token, which could be any of the following:
55- * - '[', ']', '(', or ')'.
56- * - [[org.apache.spark.mllib.util.NumericTokenizer#NUMBER ]], call value() to get its value.
57- * - [[org.apache.spark.mllib.util.NumericTokenizer#END ]].
58- */
59- def next (): Int = {
60- if (stringTokenizer.hasMoreTokens()) {
61- val token = stringTokenizer.nextToken()
62- if (token == " (" || token == " [" ) {
63- allowComma = false
64- token.charAt(0 )
65- } else if (token == " )" || token == " ]" ) {
66- allowComma = true
67- token.charAt(0 )
53+ private def parseArray (tokenizer : StringTokenizer ): Array [Double ] = {
54+ val values = ArrayBuffer .empty[Double ]
55+ var parsing = true
56+ var allowComma = false
57+ var token : String = null
58+ while (parsing && tokenizer.hasMoreTokens()) {
59+ token = tokenizer.nextToken()
60+ if (token == " ]" ) {
61+ parsing = false
6862 } else if (token == " ," ) {
6963 if (allowComma) {
7064 allowComma = false
71- next()
7265 } else {
7366 throw new SparkException (" Found a ',' at a wrong position." )
7467 }
7568 } else {
7669 // expecting a number
77- _value = java.lang.Double .parseDouble(token)
70+ values.append( java.lang.Double .parseDouble(token) )
7871 allowComma = true
79- NUMBER
8072 }
81- } else {
82- END
8373 }
84- }
85- }
86-
87- /**
88- * Simple parser for tokens from [[org.apache.spark.mllib.util.NumericTokenizer ]].
89- */
90- private [mllib] object NumericParser {
91-
92- /** Parses a string into a Double, an Array[Double], or a Seq[Any]. */
93- def parse (s : String ): Any = parse(new NumericTokenizer (s))
94-
95- private def parse (tokenizer : NumericTokenizer ): Any = {
96- val token = tokenizer.next()
97- if (token == NUMBER ) {
98- tokenizer.value
99- } else if (token == '(' ) {
100- parseTuple(tokenizer)
101- } else if (token == '[' ) {
102- parseArray(tokenizer)
103- } else if (token == END ) {
104- null
105- } else {
106- throw new SparkException (s " Cannot recognize token type: $token. " )
107- }
108- }
109-
110- private def parseArray (tokenizer : NumericTokenizer ): Array [Double ] = {
111- val values = ArrayBuffer .empty[Double ]
112- var token = tokenizer.next()
113- while (token == NUMBER ) {
114- values.append(tokenizer.value)
115- token = tokenizer.next()
116- }
117- if (token != ']' ) {
118- throw new SparkException (s " An array must end with ] but got $token. " )
74+ if (parsing) {
75+ throw new SparkException (s " An array must end with ']'. " )
11976 }
12077 values.toArray
12178 }
12279
123- private def parseTuple (tokenizer : NumericTokenizer ): Seq [_] = {
80+ private def parseTuple (tokenizer : StringTokenizer ): Seq [_] = {
12481 val items = ListBuffer .empty[Any ]
125- var token = tokenizer.next()
126- while (token != ')' && token != END ) {
127- if (token == NUMBER ) {
128- items.append(tokenizer.value)
129- } else if (token == '(' ) {
82+ var parsing = true
83+ var allowComma = false
84+ var token : String = null
85+ while (parsing && tokenizer.hasMoreTokens()) {
86+ token = tokenizer.nextToken()
87+ if (token == " (" ) {
13088 items.append(parseTuple(tokenizer))
131- } else if (token == '[' ) {
89+ allowComma = true
90+ } else if (token == " [" ) {
13291 items.append(parseArray(tokenizer))
92+ allowComma = true
93+ } else if (token == " ," ) {
94+ if (allowComma) {
95+ allowComma = false
96+ } else {
97+ throw new SparkException (" Found a ',' at a wrong position." )
98+ }
99+ } else if (token == " )" ) {
100+ parsing = false
133101 } else {
134- throw new SparkException (s " Cannot recognize token type: $token. " )
102+ // expecting a number
103+ items.append(java.lang.Double .parseDouble(token))
104+ allowComma = true
135105 }
136- token = tokenizer.next()
137106 }
138- if (token != ')' ) {
139- throw new SparkException (s " A tuple must end with ) but got $token . " )
107+ if (parsing ) {
108+ throw new SparkException (s " A tuple must with ')' . " )
140109 }
141110 items.toSeq
142111 }
0 commit comments