@@ -8,7 +8,7 @@ use std::{
88
99use chrono:: { Datelike as _, Month , TimeZone as _, Utc } ;
1010use glob:: glob;
11- use regex:: Regex ;
11+ use regex:: { Regex , RegexSet } ;
1212
1313#[ derive( Debug , Copy , Clone , PartialEq , Eq ) ]
1414struct Date {
@@ -36,41 +36,56 @@ impl fmt::Display for Date {
3636 }
3737}
3838
39- fn make_date_regex ( ) -> Regex {
40- Regex :: new ( r"[aA]s\s+of\s+(\w+)\s+(\d{4})" ) . unwrap ( )
39+ fn make_date_regex ( ) -> Vec < Regex > {
40+ let patterns = [
41+ r"<!--\s+date-check:\s+(\w+)\s+(\d+{4})\s+-->" ,
42+ r"<!--\s+date-check\s+-->\s+(\w+)\s+(\d+{4})" ,
43+ ] ;
44+ let set = RegexSet :: new ( & patterns) . unwrap ( ) ;
45+ set. patterns ( )
46+ . iter ( )
47+ . map ( |pattern| Regex :: new ( pattern) . unwrap ( ) )
48+ . collect ( )
4149}
4250
43- fn collect_dates_from_file ( date_regex : & Regex , text : & str ) -> Vec < ( usize , Date ) > {
44- let mut line = 1 ;
45- let mut end_of_last_cap = 0 ;
46- date_regex
47- . captures_iter ( text)
48- . map ( |cap| {
49- (
50- cap. get ( 0 ) . unwrap ( ) . range ( ) ,
51- Date {
52- year : cap[ 2 ] . parse ( ) . unwrap ( ) ,
53- month : Month :: from_str ( & cap[ 1 ] ) . unwrap ( ) . number_from_month ( ) ,
54- } ,
55- )
56- } )
57- . map ( |( byte_range, date) | {
58- line += text[ end_of_last_cap..byte_range. end ]
59- . chars ( )
60- . filter ( |c| * c == '\n' )
61- . count ( ) ;
62- end_of_last_cap = byte_range. end ;
63- ( line, date)
64- } )
65- . collect ( )
51+ fn collect_dates_from_file ( date_regexes : & [ Regex ] , text : & str ) -> Vec < ( usize , Date ) > {
52+ let mut output = Vec :: new ( ) ;
53+ for date_regex in date_regexes {
54+ let mut line = 1 ;
55+ let mut end_of_last_cap = 0 ;
56+ let results: Vec < _ > = date_regex
57+ . captures_iter ( text)
58+ . filter_map ( |cap| {
59+ if let ( Some ( year) , Some ( month) ) = ( cap. get ( 2 ) , cap. get ( 1 ) ) {
60+ let year = year. as_str ( ) . parse ( ) . expect ( "year" ) ;
61+ let month = Month :: from_str ( month. as_str ( ) )
62+ . expect ( "month" )
63+ . number_from_month ( ) ;
64+ Some ( ( cap. get ( 0 ) . expect ( "all" ) . range ( ) , Date { year, month } ) )
65+ } else {
66+ None
67+ }
68+ } )
69+ . map ( |( byte_range, date) | {
70+ line += text[ end_of_last_cap..byte_range. end ]
71+ . chars ( )
72+ . filter ( |c| * c == '\n' )
73+ . count ( ) ;
74+ end_of_last_cap = byte_range. end ;
75+ ( line, date)
76+ } )
77+ . collect ( ) ;
78+ output. extend ( results) ;
79+ }
80+ output
6681}
6782
6883fn collect_dates ( paths : impl Iterator < Item = PathBuf > ) -> BTreeMap < PathBuf , Vec < ( usize , Date ) > > {
69- let date_regex = make_date_regex ( ) ;
84+ let date_regexes = make_date_regex ( ) ;
7085 let mut data = BTreeMap :: new ( ) ;
7186 for path in paths {
7287 let text = fs:: read_to_string ( & path) . unwrap ( ) ;
73- let dates = collect_dates_from_file ( & date_regex , & text) ;
88+ let dates = collect_dates_from_file ( & date_regexes , & text) ;
7489 if !dates. is_empty ( ) {
7590 data. insert ( path, dates) ;
7691 }
@@ -174,59 +189,129 @@ mod tests {
174189
175190 #[ test]
176191 fn test_date_regex ( ) {
177- let regex = make_date_regex ( ) ;
178- assert ! ( regex. is_match( "As of July 2022" ) ) ;
179- assert ! ( regex. is_match( "As of Jul 2022" ) ) ;
180- assert ! ( regex. is_match( "As of july 2022" ) ) ;
181- assert ! ( regex. is_match( "As of jul 2022" ) ) ;
182- assert ! ( regex. is_match( "as of jul 2022" ) ) ;
192+ let regexes = & make_date_regex ( ) ;
193+ assert ! ( regexes[ 0 ] . is_match( "<!-- date-check: jan 2021 -->" ) ) ;
194+ assert ! ( regexes[ 0 ] . is_match( "<!-- date-check: january 2021 -->" ) ) ;
195+ assert ! ( regexes[ 0 ] . is_match( "<!-- date-check: Jan 2021 -->" ) ) ;
196+ assert ! ( regexes[ 0 ] . is_match( "<!-- date-check: January 2021 -->" ) ) ;
197+ assert ! ( regexes[ 1 ] . is_match( "<!-- date-check --> jan 2021" ) ) ;
198+ assert ! ( regexes[ 1 ] . is_match( "<!-- date-check --> january 2021" ) ) ;
199+ assert ! ( regexes[ 1 ] . is_match( "<!-- date-check --> Jan 2021" ) ) ;
200+ assert ! ( regexes[ 1 ] . is_match( "<!-- date-check --> January 2021" ) ) ;
183201 }
184202
185203 #[ test]
186204 fn test_collect_dates_from_file ( ) {
187- let text = "Test1\n As of Jan 2021\n Test2\n As of Feb 2021 \
188- \n Test3\n Test4\n As of march 2021Bar\n as of apr 2021 \
189- \n Test5\n Test6\n Test7\n \n \n as of\n \n may 2021\n Test8
205+ let text = r"
206+ Test1
207+ <!-- date-check: jan 2021 -->
208+ Test2
209+ Foo<!-- date-check: february 2021
210+ -->
211+ Test3
212+ Test4
213+ Foo<!-- date-check: Mar 2021 -->Bar
214+ <!-- date-check: April 2021
215+ -->
216+ Test5
217+ Test6
218+ Test7
219+ <!-- date-check:
220+
221+ may 2021 -->
222+ Test8
223+ Test1
224+ <!-- date-check --> jan 2021
225+ Test2
226+ Foo<!-- date-check
227+ --> february 2021
228+ Test3
229+ Test4
230+ Foo<!-- date-check --> mar 2021 Bar
231+ <!-- date-check
232+ --> apr 2021
233+ Test5
234+ Test6
235+ Test7
236+ <!-- date-check
237+
238+ --> may 2021
239+ Test8 \
190240 " ;
191241 assert_eq ! (
192242 collect_dates_from_file( & make_date_regex( ) , text) ,
193243 vec![
194244 (
195- 2 ,
245+ 3 ,
246+ Date {
247+ year: 2021 ,
248+ month: 1 ,
249+ }
250+ ) ,
251+ (
252+ 6 ,
253+ Date {
254+ year: 2021 ,
255+ month: 2 ,
256+ }
257+ ) ,
258+ (
259+ 9 ,
260+ Date {
261+ year: 2021 ,
262+ month: 3 ,
263+ }
264+ ) ,
265+ (
266+ 11 ,
267+ Date {
268+ year: 2021 ,
269+ month: 4 ,
270+ }
271+ ) ,
272+ (
273+ 17 ,
274+ Date {
275+ year: 2021 ,
276+ month: 5 ,
277+ }
278+ ) ,
279+ (
280+ 20 ,
196281 Date {
197282 year: 2021 ,
198283 month: 1 ,
199284 }
200285 ) ,
201286 (
202- 4 ,
287+ 23 ,
203288 Date {
204289 year: 2021 ,
205290 month: 2 ,
206291 }
207292 ) ,
208293 (
209- 7 ,
294+ 26 ,
210295 Date {
211296 year: 2021 ,
212297 month: 3 ,
213298 }
214299 ) ,
215300 (
216- 8 ,
301+ 28 ,
217302 Date {
218303 year: 2021 ,
219304 month: 4 ,
220305 }
221306 ) ,
222307 (
223- 16 ,
308+ 34 ,
224309 Date {
225310 year: 2021 ,
226311 month: 5 ,
227312 }
228313 ) ,
229- ]
314+ ] ,
230315 ) ;
231316 }
232317}
0 commit comments