@@ -88,7 +88,7 @@ mod prepare;
88
88
pub use crate :: char_data:: { BidiClass , UNICODE_VERSION } ;
89
89
pub use crate :: data_source:: BidiDataSource ;
90
90
pub use crate :: level:: { Level , LTR_LEVEL , RTL_LEVEL } ;
91
- pub use crate :: prepare:: LevelRun ;
91
+ pub use crate :: prepare:: { LevelRun , LevelRunVec } ;
92
92
93
93
#[ cfg( feature = "hardcoded-data" ) ]
94
94
pub use crate :: char_data:: { bidi_class, HardcodedBidiData } ;
@@ -248,8 +248,14 @@ struct InitialInfoExt<'text> {
248
248
249
249
/// Parallel to base.paragraphs, records whether each paragraph is "pure LTR" that
250
250
/// requires no further bidi processing (i.e. there are no RTL characters or bidi
251
- /// control codes present).
252
- pure_ltr : Vec < bool > ,
251
+ /// control codes present), and whether any bidi isolation controls are present.
252
+ flags : Vec < ParagraphInfoFlags > ,
253
+ }
254
+
255
+ #[ derive( PartialEq , Debug ) ]
256
+ struct ParagraphInfoFlags {
257
+ is_pure_ltr : bool ,
258
+ has_isolate_controls : bool ,
253
259
}
254
260
255
261
impl < ' text > InitialInfoExt < ' text > {
@@ -269,12 +275,12 @@ impl<'text> InitialInfoExt<'text> {
269
275
default_para_level : Option < Level > ,
270
276
) -> InitialInfoExt < ' a > {
271
277
let mut paragraphs = Vec :: < ParagraphInfo > :: new ( ) ;
272
- let mut pure_ltr = Vec :: < bool > :: new ( ) ;
273
- let ( original_classes, _, _) = compute_initial_info (
278
+ let mut flags = Vec :: < ParagraphInfoFlags > :: new ( ) ;
279
+ let ( original_classes, _, _, _ ) = compute_initial_info (
274
280
data_source,
275
281
text,
276
282
default_para_level,
277
- Some ( ( & mut paragraphs, & mut pure_ltr ) ) ,
283
+ Some ( ( & mut paragraphs, & mut flags ) ) ,
278
284
) ;
279
285
280
286
InitialInfoExt {
@@ -283,7 +289,7 @@ impl<'text> InitialInfoExt<'text> {
283
289
original_classes,
284
290
paragraphs,
285
291
} ,
286
- pure_ltr ,
292
+ flags ,
287
293
}
288
294
}
289
295
}
@@ -299,8 +305,8 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
299
305
data_source : & D ,
300
306
text : & ' a T ,
301
307
default_para_level : Option < Level > ,
302
- mut split_paragraphs : Option < ( & mut Vec < ParagraphInfo > , & mut Vec < bool > ) > ,
303
- ) -> ( Vec < BidiClass > , Level , bool ) {
308
+ mut split_paragraphs : Option < ( & mut Vec < ParagraphInfo > , & mut Vec < ParagraphInfoFlags > ) > ,
309
+ ) -> ( Vec < BidiClass > , Level , bool , bool ) {
304
310
let mut original_classes = Vec :: with_capacity ( text. len ( ) ) ;
305
311
306
312
// The stack contains the starting code unit index for each nested isolate we're inside.
@@ -310,8 +316,8 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
310
316
let mut isolate_stack = Vec :: new ( ) ;
311
317
312
318
debug_assert ! (
313
- if let Some ( ( ref paragraphs, ref pure_ltr ) ) = split_paragraphs {
314
- paragraphs. is_empty( ) && pure_ltr . is_empty( )
319
+ if let Some ( ( ref paragraphs, ref flags ) ) = split_paragraphs {
320
+ paragraphs. is_empty( ) && flags . is_empty( )
315
321
} else {
316
322
true
317
323
}
@@ -323,6 +329,8 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
323
329
// Per-paragraph flag: can subsequent processing be skipped? Set to false if any
324
330
// RTL characters or bidi control characters are encountered in the paragraph.
325
331
let mut is_pure_ltr = true ;
332
+ // Set to true if any bidi isolation controls are present in the paragraph.
333
+ let mut has_isolate_controls = false ;
326
334
327
335
#[ cfg( feature = "flame_it" ) ]
328
336
flame:: start ( "compute_initial_info(): iter text.char_indices()" ) ;
@@ -341,7 +349,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
341
349
342
350
match class {
343
351
B => {
344
- if let Some ( ( ref mut paragraphs, ref mut pure_ltr ) ) = split_paragraphs {
352
+ if let Some ( ( ref mut paragraphs, ref mut flags ) ) = split_paragraphs {
345
353
// P1. Split the text into separate paragraphs. The paragraph separator is kept
346
354
// with the previous paragraph.
347
355
let para_end = i + len;
@@ -350,14 +358,18 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
350
358
// P3. If no character is found in p2, set the paragraph level to zero.
351
359
level : para_level. unwrap_or ( LTR_LEVEL ) ,
352
360
} ) ;
353
- pure_ltr. push ( is_pure_ltr) ;
361
+ flags. push ( ParagraphInfoFlags {
362
+ is_pure_ltr,
363
+ has_isolate_controls,
364
+ } ) ;
354
365
// Reset state for the start of the next paragraph.
355
366
para_start = para_end;
356
367
// TODO: Support defaulting to direction of previous paragraph
357
368
//
358
369
// <http://www.unicode.org/reports/tr9/#HL1>
359
370
para_level = default_para_level;
360
371
is_pure_ltr = true ;
372
+ has_isolate_controls = false ;
361
373
isolate_stack. clear ( ) ;
362
374
}
363
375
}
@@ -394,6 +406,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
394
406
395
407
RLI | LRI | FSI => {
396
408
is_pure_ltr = false ;
409
+ has_isolate_controls = true ;
397
410
isolate_stack. push ( i) ;
398
411
}
399
412
@@ -405,15 +418,18 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
405
418
}
406
419
}
407
420
408
- if let Some ( ( paragraphs, pure_ltr ) ) = split_paragraphs {
421
+ if let Some ( ( paragraphs, flags ) ) = split_paragraphs {
409
422
if para_start < text. len ( ) {
410
423
paragraphs. push ( ParagraphInfo {
411
424
range : para_start..text. len ( ) ,
412
425
level : para_level. unwrap_or ( LTR_LEVEL ) ,
413
426
} ) ;
414
- pure_ltr. push ( is_pure_ltr) ;
427
+ flags. push ( ParagraphInfoFlags {
428
+ is_pure_ltr,
429
+ has_isolate_controls,
430
+ } ) ;
415
431
}
416
- debug_assert_eq ! ( paragraphs. len( ) , pure_ltr . len( ) ) ;
432
+ debug_assert_eq ! ( paragraphs. len( ) , flags . len( ) ) ;
417
433
}
418
434
debug_assert_eq ! ( original_classes. len( ) , text. len( ) ) ;
419
435
@@ -424,6 +440,7 @@ fn compute_initial_info<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>(
424
440
original_classes,
425
441
para_level. unwrap_or ( LTR_LEVEL ) ,
426
442
is_pure_ltr,
443
+ has_isolate_controls,
427
444
)
428
445
}
429
446
@@ -482,20 +499,21 @@ impl<'text> BidiInfo<'text> {
482
499
text : & ' a str ,
483
500
default_para_level : Option < Level > ,
484
501
) -> BidiInfo < ' a > {
485
- let InitialInfoExt { base, pure_ltr , .. } =
502
+ let InitialInfoExt { base, flags , .. } =
486
503
InitialInfoExt :: new_with_data_source ( data_source, text, default_para_level) ;
487
504
488
505
let mut levels = Vec :: < Level > :: with_capacity ( text. len ( ) ) ;
489
506
let mut processing_classes = base. original_classes . clone ( ) ;
490
507
491
- for ( para, is_pure_ltr ) in base. paragraphs . iter ( ) . zip ( pure_ltr . iter ( ) ) {
508
+ for ( para, flags ) in base. paragraphs . iter ( ) . zip ( flags . iter ( ) ) {
492
509
let text = & text[ para. range . clone ( ) ] ;
493
510
let original_classes = & base. original_classes [ para. range . clone ( ) ] ;
494
511
495
512
compute_bidi_info_for_para (
496
513
data_source,
497
514
para,
498
- * is_pure_ltr,
515
+ flags. is_pure_ltr ,
516
+ flags. has_isolate_controls ,
499
517
text,
500
518
original_classes,
501
519
& mut processing_classes,
@@ -720,7 +738,7 @@ impl<'text> ParagraphBidiInfo<'text> {
720
738
) -> ParagraphBidiInfo < ' a > {
721
739
// Here we could create a ParagraphInitialInfo struct to parallel the one
722
740
// used by BidiInfo, but there doesn't seem any compelling reason for it.
723
- let ( original_classes, paragraph_level, is_pure_ltr) =
741
+ let ( original_classes, paragraph_level, is_pure_ltr, has_isolate_controls ) =
724
742
compute_initial_info ( data_source, text, default_para_level, None ) ;
725
743
726
744
let mut levels = Vec :: < Level > :: with_capacity ( text. len ( ) ) ;
@@ -738,6 +756,7 @@ impl<'text> ParagraphBidiInfo<'text> {
738
756
data_source,
739
757
& para_info,
740
758
is_pure_ltr,
759
+ has_isolate_controls,
741
760
text,
742
761
& original_classes,
743
762
& mut processing_classes,
@@ -1066,6 +1085,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>
1066
1085
data_source : & D ,
1067
1086
para : & ParagraphInfo ,
1068
1087
is_pure_ltr : bool ,
1088
+ has_isolate_controls : bool ,
1069
1089
text : & ' a T ,
1070
1090
original_classes : & [ BidiClass ] ,
1071
1091
processing_classes : & mut [ BidiClass ] ,
@@ -1079,16 +1099,26 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>
1079
1099
1080
1100
let processing_classes = & mut processing_classes[ para. range . clone ( ) ] ;
1081
1101
let levels = & mut levels[ para. range . clone ( ) ] ;
1102
+ let mut level_runs = LevelRunVec :: new ( ) ;
1082
1103
1083
1104
explicit:: compute (
1084
1105
text,
1085
1106
para. level ,
1086
1107
original_classes,
1087
1108
levels,
1088
1109
processing_classes,
1110
+ & mut level_runs,
1089
1111
) ;
1090
1112
1091
- let sequences = prepare:: isolating_run_sequences ( para. level , original_classes, levels) ;
1113
+ let mut sequences = prepare:: IsolatingRunSequenceVec :: new ( ) ;
1114
+ prepare:: isolating_run_sequences (
1115
+ para. level ,
1116
+ original_classes,
1117
+ levels,
1118
+ level_runs,
1119
+ has_isolate_controls,
1120
+ & mut sequences,
1121
+ ) ;
1092
1122
for sequence in & sequences {
1093
1123
implicit:: resolve_weak ( text, sequence, processing_classes) ;
1094
1124
implicit:: resolve_neutral (
@@ -1100,6 +1130,7 @@ fn compute_bidi_info_for_para<'a, D: BidiDataSource, T: TextSource<'a> + ?Sized>
1100
1130
processing_classes,
1101
1131
) ;
1102
1132
}
1133
+
1103
1134
implicit:: resolve_levels ( processing_classes, levels) ;
1104
1135
1105
1136
assign_levels_to_removed_chars ( para. level , original_classes, levels) ;
@@ -1549,6 +1580,24 @@ mod tests {
1549
1580
#[ cfg( feature = "hardcoded-data" ) ]
1550
1581
fn test_process_text ( ) {
1551
1582
let tests = vec ! [
1583
+ (
1584
+ // text
1585
+ "" ,
1586
+ // base level
1587
+ Some ( RTL_LEVEL ) ,
1588
+ // levels
1589
+ Level :: vec( & [ ] ) ,
1590
+ // original_classes
1591
+ vec![ ] ,
1592
+ // paragraphs
1593
+ vec![ ] ,
1594
+ // levels_u16
1595
+ Level :: vec( & [ ] ) ,
1596
+ // original_classes_u16
1597
+ vec![ ] ,
1598
+ // paragraphs_u16
1599
+ vec![ ] ,
1600
+ ) ,
1552
1601
(
1553
1602
// text
1554
1603
"abc123" ,
@@ -1710,6 +1759,19 @@ mod tests {
1710
1759
paragraphs: t. 4 . clone( ) ,
1711
1760
}
1712
1761
) ;
1762
+ // If it was empty, also test that ParagraphBidiInfo handles it safely.
1763
+ if t. 4 . len ( ) == 0 {
1764
+ assert_eq ! (
1765
+ ParagraphBidiInfo :: new( t. 0 , t. 1 ) ,
1766
+ ParagraphBidiInfo {
1767
+ text: t. 0 ,
1768
+ original_classes: t. 3 . clone( ) ,
1769
+ levels: t. 2 . clone( ) ,
1770
+ paragraph_level: RTL_LEVEL ,
1771
+ is_pure_ltr: true ,
1772
+ }
1773
+ )
1774
+ }
1713
1775
// If it was a single paragraph, also test ParagraphBidiInfo.
1714
1776
if t. 4 . len ( ) == 1 {
1715
1777
assert_eq ! (
0 commit comments