@@ -187,7 +187,7 @@ impl<'a> StringReader<'a> {
187187 /// Advance peek_tok and peek_span to refer to the next token, and
188188 /// possibly update the interner.
189189 fn advance_token ( & mut self ) {
190- match self . consume_whitespace_and_comments ( ) {
190+ match self . scan_whitespace_or_comment ( ) {
191191 Some ( comment) => {
192192 self . peek_span = comment. sp ;
193193 self . peek_tok = comment. tok ;
@@ -339,8 +339,7 @@ impl<'a> StringReader<'a> {
339339
340340 /// PRECONDITION: self.curr is not whitespace
341341 /// Eats any kind of comment.
342- /// Returns a Some(sugared-doc-attr) if one exists, None otherwise
343- fn consume_any_line_comment ( & mut self ) -> Option < TokenAndSpan > {
342+ fn scan_comment ( & mut self ) -> Option < TokenAndSpan > {
344343 match self . curr {
345344 Some ( c) => {
346345 if c. is_whitespace ( ) {
@@ -375,28 +374,32 @@ impl<'a> StringReader<'a> {
375374 }
376375 self . bump ( ) ;
377376 }
378- let ret = self . with_str_from ( start_bpos, |string| {
377+ return self . with_str_from ( start_bpos, |string| {
379378 // but comments with only more "/"s are not
380- if !is_line_non_doc_comment ( string) {
381- Some ( TokenAndSpan {
382- tok : token:: DOC_COMMENT ( str_to_ident ( string) ) ,
383- sp : codemap:: mk_sp ( start_bpos, self . last_pos )
384- } )
379+ let tok = if is_doc_comment ( string) {
380+ token:: DOC_COMMENT ( str_to_ident ( string) )
385381 } else {
386- None
387- }
388- } ) ;
382+ token:: COMMENT
383+ } ;
389384
390- if ret. is_some ( ) {
391- return ret;
392- }
385+ return Some ( TokenAndSpan {
386+ tok : tok,
387+ sp : codemap:: mk_sp ( start_bpos, self . last_pos )
388+ } ) ;
389+ } ) ;
393390 } else {
391+ let start_bpos = self . last_pos - BytePos ( 2 ) ;
394392 while !self . curr_is ( '\n' ) && !self . is_eof ( ) { self . bump ( ) ; }
393+ return Some ( TokenAndSpan {
394+ tok : token:: COMMENT ,
395+ sp : codemap:: mk_sp ( start_bpos, self . last_pos )
396+ } ) ;
395397 }
396- // Restart whitespace munch.
397- self . consume_whitespace_and_comments ( )
398398 }
399- Some ( '*' ) => { self . bump ( ) ; self . bump ( ) ; self . consume_block_comment ( ) }
399+ Some ( '*' ) => {
400+ self . bump ( ) ; self . bump ( ) ;
401+ self . scan_block_comment ( )
402+ }
400403 _ => None
401404 }
402405 } else if self . curr_is ( '#' ) {
@@ -412,9 +415,15 @@ impl<'a> StringReader<'a> {
412415 let cmap = CodeMap :: new ( ) ;
413416 cmap. files . borrow_mut ( ) . push ( self . filemap . clone ( ) ) ;
414417 let loc = cmap. lookup_char_pos_adj ( self . last_pos ) ;
418+ debug ! ( "Skipping a shebang" ) ;
415419 if loc. line == 1 u && loc. col == CharPos ( 0 u) {
420+ // FIXME: Add shebang "token", return it
421+ let start = self . last_pos ;
416422 while !self . curr_is ( '\n' ) && !self . is_eof ( ) { self . bump ( ) ; }
417- return self . consume_whitespace_and_comments ( ) ;
423+ return Some ( TokenAndSpan {
424+ tok : token:: SHEBANG ( self . ident_from ( start) ) ,
425+ sp : codemap:: mk_sp ( start, self . last_pos )
426+ } ) ;
418427 }
419428 }
420429 None
@@ -423,15 +432,33 @@ impl<'a> StringReader<'a> {
423432 }
424433 }
425434
426- /// EFFECT: eats whitespace and comments.
427- /// Returns a Some(sugared-doc-attr) if one exists, None otherwise.
428- fn consume_whitespace_and_comments ( & mut self ) -> Option < TokenAndSpan > {
429- while is_whitespace ( self . curr ) { self . bump ( ) ; }
430- return self . consume_any_line_comment ( ) ;
435+ /// If there is whitespace, shebang, or a comment, scan it. Otherwise,
436+ /// return None.
437+ fn scan_whitespace_or_comment ( & mut self ) -> Option < TokenAndSpan > {
438+ match self . curr . unwrap_or ( '\0' ) {
439+ // # to handle shebang at start of file -- this is the entry point
440+ // for skipping over all "junk"
441+ '/' | '#' => {
442+ let c = self . scan_comment ( ) ;
443+ debug ! ( "scanning a comment {}" , c) ;
444+ c
445+ } ,
446+ c if is_whitespace ( Some ( c) ) => {
447+ let start_bpos = self . last_pos ;
448+ while is_whitespace ( self . curr ) { self . bump ( ) ; }
449+ let c = Some ( TokenAndSpan {
450+ tok : token:: WS ,
451+ sp : codemap:: mk_sp ( start_bpos, self . last_pos )
452+ } ) ;
453+ debug ! ( "scanning whitespace: {}" , c) ;
454+ c
455+ } ,
456+ _ => None
457+ }
431458 }
432459
433460 /// Might return a sugared-doc-attr
434- fn consume_block_comment ( & mut self ) -> Option < TokenAndSpan > {
461+ fn scan_block_comment ( & mut self ) -> Option < TokenAndSpan > {
435462 // block comments starting with "/**" or "/*!" are doc-comments
436463 let is_doc_comment = self . curr_is ( '*' ) || self . curr_is ( '!' ) ;
437464 let start_bpos = self . last_pos - BytePos ( 2 ) ;
@@ -466,28 +493,23 @@ impl<'a> StringReader<'a> {
466493 self . bump ( ) ;
467494 }
468495
469- let res = if is_doc_comment {
470- self . with_str_from ( start_bpos, |string| {
471- // but comments with only "*"s between two "/"s are not
472- if !is_block_non_doc_comment ( string) {
473- let string = if has_cr {
474- self . translate_crlf ( start_bpos, string,
475- "bare CR not allowed in block doc-comment" )
476- } else { string. into_maybe_owned ( ) } ;
477- Some ( TokenAndSpan {
478- tok : token:: DOC_COMMENT ( str_to_ident ( string. as_slice ( ) ) ) ,
479- sp : codemap:: mk_sp ( start_bpos, self . last_pos )
480- } )
481- } else {
482- None
483- }
484- } )
485- } else {
486- None
487- } ;
496+ self . with_str_from ( start_bpos, |string| {
497+ // but comments with only "*"s between two "/"s are not
498+ let tok = if is_block_doc_comment ( string) {
499+ let string = if has_cr {
500+ self . translate_crlf ( start_bpos, string,
501+ "bare CR not allowed in block doc-comment" )
502+ } else { string. into_maybe_owned ( ) } ;
503+ token:: DOC_COMMENT ( str_to_ident ( string. as_slice ( ) ) )
504+ } else {
505+ token:: COMMENT
506+ } ;
488507
489- // restart whitespace munch.
490- if res. is_some ( ) { res } else { self . consume_whitespace_and_comments ( ) }
508+ Some ( TokenAndSpan {
509+ tok : tok,
510+ sp : codemap:: mk_sp ( start_bpos, self . last_pos )
511+ } )
512+ } )
491513 }
492514
493515 /// Scan through any digits (base `radix`) or underscores, and return how
@@ -1242,12 +1264,18 @@ fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
12421264
12431265fn is_dec_digit ( c : Option < char > ) -> bool { return in_range ( c, '0' , '9' ) ; }
12441266
1245- pub fn is_line_non_doc_comment ( s : & str ) -> bool {
1246- s. starts_with ( "////" )
1267+ pub fn is_doc_comment ( s : & str ) -> bool {
1268+ let res = ( s. starts_with ( "///" ) && * s. as_bytes ( ) . get ( 3 ) . unwrap_or ( & b' ' ) != b'/' )
1269+ || s. starts_with ( "//!" ) ;
1270+ debug ! ( "is `{}` a doc comment? {}" , s, res) ;
1271+ res
12471272}
12481273
1249- pub fn is_block_non_doc_comment ( s : & str ) -> bool {
1250- s. starts_with ( "/***" )
1274+ pub fn is_block_doc_comment ( s : & str ) -> bool {
1275+ let res = ( s. starts_with ( "/**" ) && * s. as_bytes ( ) . get ( 3 ) . unwrap_or ( & b' ' ) != b'*' )
1276+ || s. starts_with ( "/*!" ) ;
1277+ debug ! ( "is `{}` a doc comment? {}" , s, res) ;
1278+ res
12511279}
12521280
12531281fn ident_start ( c : Option < char > ) -> bool {
@@ -1383,9 +1411,9 @@ mod test {
13831411 }
13841412
13851413 #[ test] fn line_doc_comments ( ) {
1386- assert ! ( !is_line_non_doc_comment ( "///" ) ) ;
1387- assert ! ( !is_line_non_doc_comment ( "/// blah" ) ) ;
1388- assert ! ( is_line_non_doc_comment ( "////" ) ) ;
1414+ assert ! ( is_doc_comment ( "///" ) ) ;
1415+ assert ! ( is_doc_comment ( "/// blah" ) ) ;
1416+ assert ! ( !is_doc_comment ( "////" ) ) ;
13891417 }
13901418
13911419 #[ test] fn nested_block_comments ( ) {
0 commit comments