@@ -587,6 +587,135 @@ static always_inline ssize_t handle_unicode(char** sp, char **dp) {
587
587
return 0 ;
588
588
}
589
589
590
+ #define get_string_block { \
591
+ if (unlikely((opts & F_VALIDATE_STRING) != 0)) { \
592
+ bsresult = svcmpeq_n_u8(pg, v, '\\'); \
593
+ quoteresult = svcmpeq_n_u8(pg, v, '"'); \
594
+ escresult = svcmpeq_n_u8(pg, v, '\x1f'); \
595
+ } else { \
596
+ bsresult = svcmpeq_n_u8(pg, v, '\\'); \
597
+ quoteresult = svcmpeq_n_u8(pg, v, '"'); \
598
+ escresult = svpfalse_b(); \
599
+ }}
600
+
601
+ #define has_first_quote { \
602
+ svbool_t res = svorr_b_z (svptrue_b8 (), bsresult , escresult ); \
603
+ if (!svptest_any (svptrue_b8 (), res )) { \
604
+ temp = quoteresult ; \
605
+ } else { \
606
+ temp = svbrkb_b_z (svptrue_b8 (), res ); \
607
+ temp = svand_b_z (svptrue_b8 (), temp , quoteresult ); \
608
+ } \
609
+ }
610
+
611
+ #define has_first_backslash { \
612
+ if (!svptest_any (svptrue_b8 (), quoteresult )) { \
613
+ temp = bsresult ; \
614
+ } else { \
615
+ temp = svbrkb_b_z (svptrue_b8 (), quoteresult ); \
616
+ temp = svand_b_z (svptrue_b8 (), temp , bsresult ); \
617
+ } \
618
+ }
619
+
620
+ #define has_first_unescaped { \
621
+ if (!svptest_any (svptrue_b8 (), quoteresult )) { \
622
+ temp = escresult ; \
623
+ } else { \
624
+ temp = svbrkb_b_z (svptrue_b8 (), quoteresult ); \
625
+ temp = svand_b_z (svptrue_b8 (), temp , escresult ); \
626
+ } \
627
+ }
628
+
629
+ static always_inline long parse_string_inplace_sve (uint8_t * * cur , bool * has_esc , uint64_t opts ) {
630
+ uint8_t * start = * cur ;
631
+ svbool_t bsresult ;
632
+ svbool_t quoteresult ;
633
+ svbool_t escresult ;
634
+ svbool_t temp ;
635
+ svbool_t pg = svptrue_b8 ();
636
+ svuint8_t v ;
637
+ while (true) {
638
+ v = svld1_u8 (pg , (uint8_t * )(* cur ));
639
+ get_string_block ;
640
+ has_first_quote ;
641
+ if (svptest_any (svptrue_b8 (), temp )) {
642
+ * cur += svcntp_b8 (svptrue_b8 (), svbrkb_b_z (svptrue_b8 (), quoteresult )) + 1 ; // skip the quote char
643
+ * has_esc = false;
644
+ return * cur - start - 1 ;
645
+ }
646
+ has_first_backslash ;
647
+ if (unlikely (svptest_any (svptrue_b8 (), temp ))) {
648
+ break ;
649
+ }
650
+ has_first_unescaped ;
651
+ if (unlikely ((opts & F_VALIDATE_STRING ) != 0 && svptest_any (svptrue_b8 (), temp ))) {
652
+ * cur += svcntp_b8 (svptrue_b8 (), svbrkb_b_z (svptrue_b8 (), escresult ));
653
+ return - SONIC_CONTROL_CHAR ;
654
+ }
655
+ * cur += 32 ;
656
+ }
657
+ // deal with the escaped string
658
+ * has_esc = true;
659
+ has_first_backslash ;
660
+ * cur += svcntp_b8 (svptrue_b8 (), svbrkb_b_z (svptrue_b8 (), bsresult ));
661
+ uint8_t * dst = * cur ;
662
+ uint8_t esc ;
663
+ escape :
664
+ esc = * (* cur + 1 );
665
+ if (likely (esc ) != 'u' ) {
666
+ if (unlikely (ESCAPED_TAB [esc ]) == 0 ) {
667
+ return - SONIC_INVALID_ESCAPED ;
668
+ }
669
+ * cur += 2 ;
670
+ * dst ++ = ESCAPED_TAB [esc ];
671
+ } else if (handle_unicode ((char * * )cur , (char * * )& dst ) != 0 ) {
672
+ return - SONIC_INVALID_ESCAPED_UTF ;
673
+ }
674
+ // check continuous escaped char
675
+ if (* * cur == '\\' ) {
676
+ goto escape ;
677
+ }
678
+ find_and_move :
679
+ v = svld1_u8 (pg , (uint8_t * )(* cur ));
680
+ get_string_block ;
681
+ has_first_quote ;
682
+ if (svptest_any (svptrue_b8 (), temp )) {
683
+ while (true) {
684
+ repeat_8 ( {
685
+ if (* * cur != '"' ) {
686
+ * dst ++ = * * cur ;
687
+ * cur += 1 ;
688
+ } else {
689
+ * cur += 1 ;
690
+ return dst - start ;
691
+ }
692
+ });
693
+ }
694
+ }
695
+ has_first_unescaped ;
696
+ if (unlikely ((opts & F_VALIDATE_STRING ) != 0 && svptest_any (svptrue_b8 (), temp ))) {
697
+ * cur += svcntp_b8 (svptrue_b8 (), svbrkb_b_z (svptrue_b8 (), escresult ));
698
+ return - SONIC_CONTROL_CHAR ;
699
+ }
700
+ has_first_backslash ;
701
+ if (svptest_any (svptrue_b8 (), temp )) {
702
+ while (true) {
703
+ repeat_8 ( {
704
+ if (* * cur != '\\' ) {
705
+ * dst ++ = * * cur ;
706
+ * cur += 1 ;
707
+ } else {
708
+ goto escape ;
709
+ }
710
+ });
711
+ }
712
+ }
713
+ svst1_u8 (svptrue_b8 (), dst , v );
714
+ * cur += 32 ;
715
+ dst += 32 ;
716
+ goto find_and_move ;
717
+ }
718
+
590
719
// positive is length
591
720
// negative is - error_code
592
721
static always_inline long parse_string_inplace (uint8_t * * cur , bool * has_esc , uint64_t opts ) {
@@ -1112,7 +1241,7 @@ static always_inline error_code parse(GoParser* slf, reader* rdr, visitor* vis)
1112
1241
neg = false;
1113
1242
break ;
1114
1243
case '"' :
1115
- slen = parse_string_inplace (cur , & has_esc , slf -> opt );
1244
+ slen = parse_string_inplace_sve (cur , & has_esc , slf -> opt );
1116
1245
if (slen < 0 ) {
1117
1246
err = (error_code )(- slen );
1118
1247
}
@@ -1149,7 +1278,7 @@ static always_inline error_code parse(GoParser* slf, reader* rdr, visitor* vis)
1149
1278
// parse key
1150
1279
pos = offset_from (* cur );
1151
1280
1152
- slen = parse_string_inplace (cur , & has_esc , slf -> opt );
1281
+ slen = parse_string_inplace_sve (cur , & has_esc , slf -> opt );
1153
1282
if (slen < 0 ) {
1154
1283
err = (error_code )(- slen );
1155
1284
return err ;
@@ -1205,7 +1334,7 @@ static always_inline error_code parse(GoParser* slf, reader* rdr, visitor* vis)
1205
1334
neg = false;
1206
1335
break ;
1207
1336
case '"' :
1208
- slen = parse_string_inplace (cur , & has_esc , slf -> opt );
1337
+ slen = parse_string_inplace_sve (cur , & has_esc , slf -> opt );
1209
1338
if (slen < 0 ) {
1210
1339
err = (error_code )(- slen );
1211
1340
}
@@ -1293,7 +1422,7 @@ static always_inline error_code parse(GoParser* slf, reader* rdr, visitor* vis)
1293
1422
neg = false;
1294
1423
break ;
1295
1424
case '"' :
1296
- slen = parse_string_inplace (cur , & has_esc , slf -> opt );
1425
+ slen = parse_string_inplace_sve (cur , & has_esc , slf -> opt );
1297
1426
if (slen < 0 ) {
1298
1427
err = (error_code )(- slen );
1299
1428
}
@@ -1379,4 +1508,4 @@ long parse_with_padding(void* p) {
1379
1508
}
1380
1509
1381
1510
return err ;
1382
- }
1511
+ }
0 commit comments