@@ -852,39 +852,38 @@ static void traverse_for_entities(
852
852
853
853
unsigned code = 0 , code2 = 0 ;
854
854
const char * entity_end_ptr = NULL ;
855
- bool valid_entity = true;
856
855
857
856
if (current_ptr [1 ] == '#' ) {
858
857
/* Processing numeric entity */
859
858
const char * num_start = current_ptr + 2 ;
860
859
entity_end_ptr = num_start ;
861
860
if (process_numeric_entity (& entity_end_ptr , & code ) == FAILURE ) {
862
- valid_entity = false ;
861
+ goto invalid_incomplete_entity ;
863
862
}
864
- if (valid_entity && !all && (code > 63U || stage3_table_be_apos_00000 [code ].data .ent .entity == NULL )) {
863
+ if (!all && (code > 63U || stage3_table_be_apos_00000 [code ].data .ent .entity == NULL )) {
865
864
/* If we're in htmlspecialchars_decode, we're only decoding entities
866
865
* that represent &, <, >, " and '. Is this one of them? */
867
- valid_entity = false ;
868
- } else if (valid_entity && ( !unicode_cp_is_allowed (code , doctype ) ||
869
- (doctype == ENT_HTML_DOC_HTML5 && code == 0x0D ) )) {
866
+ goto invalid_incomplete_entity ;
867
+ } else if (!unicode_cp_is_allowed (code , doctype ) ||
868
+ (doctype == ENT_HTML_DOC_HTML5 && code == 0x0D )) {
870
869
/* are we allowed to decode this entity in this document type?
871
870
* HTML 5 is the only that has a character that cannot be used in
872
871
* a numeric entity but is allowed literally (U+000D). The
873
872
* unoptimized version would be ... || !numeric_entity_is_allowed(code) */
874
- valid_entity = false ;
873
+ goto invalid_incomplete_entity ;
875
874
}
876
875
} else {
877
- /* Processing named entity */
876
+ /* Processing named entity */
878
877
const char * name_start = current_ptr + 1 ;
879
878
/* Search for ';' */
880
879
const size_t max_search_len = MIN (LONGEST_ENTITY_LENGTH + 1 , input_end - name_start );
881
880
const char * semi_colon_ptr = memchr (name_start , ';' , max_search_len );
882
881
if (!semi_colon_ptr ) {
883
- valid_entity = false ;
882
+ goto invalid_incomplete_entity ;
884
883
} else {
885
884
const size_t name_len = semi_colon_ptr - name_start ;
886
885
if (name_len == 0 ) {
887
- valid_entity = false ;
886
+ goto invalid_incomplete_entity ;
888
887
} else {
889
888
if (resolve_named_entity_html (name_start , name_len , inv_map , & code , & code2 ) == FAILURE ) {
890
889
if (doctype == ENT_HTML_DOC_XHTML && name_len == 4 &&
@@ -895,7 +894,7 @@ static void traverse_for_entities(
895
894
* hack to support it */
896
895
code = (unsigned )'\'' ;
897
896
} else {
898
- valid_entity = false ;
897
+ goto invalid_incomplete_entity ;
899
898
}
900
899
}
901
900
entity_end_ptr = semi_colon_ptr ;
@@ -904,45 +903,51 @@ static void traverse_for_entities(
904
903
}
905
904
906
905
/* If entity_end_ptr is not found or does not point to ';', consider the entity invalid */
907
- if (!valid_entity || entity_end_ptr == NULL || * entity_end_ptr != ';' ) {
908
- * output_ptr ++ = * current_ptr ++ ;
909
- continue ;
906
+ if (entity_end_ptr == NULL ) {
907
+ goto invalid_incomplete_entity ;
910
908
}
911
909
912
910
/* Check if quotes are allowed for entities representing ' or " */
913
911
if ((code == '\'' && !(flags & ENT_HTML_QUOTE_SINGLE )) ||
914
912
(code == '"' && !(flags & ENT_HTML_QUOTE_DOUBLE )))
915
913
{
916
- valid_entity = false ;
914
+ goto invalid_complete_entity ;
917
915
}
918
916
919
917
/* UTF-8 doesn't need mapping (ISO-8859-1 doesn't either, but
920
918
* the call is needed to ensure the codepoint <= U+00FF) */
921
- if (valid_entity && charset != cs_utf_8 ) {
919
+ if (charset != cs_utf_8 ) {
922
920
/* replace unicode code point */
923
- if (map_from_unicode (code , charset , & code ) == FAILURE || code2 != 0 )
924
- valid_entity = false;
921
+ if (map_from_unicode (code , charset , & code ) == FAILURE || code2 != 0 ) {
922
+ goto invalid_complete_entity ;
923
+ }
925
924
}
926
925
927
- if (valid_entity ) {
928
- /* Write the parsed entity into the output buffer */
929
- output_ptr += write_octet_sequence ((unsigned char * )output_ptr , charset , code );
930
- if (code2 ) {
931
- output_ptr += write_octet_sequence ((unsigned char * )output_ptr , charset , code2 );
932
- }
933
- /* Move current_ptr past the semicolon */
934
- current_ptr = entity_end_ptr + 1 ;
926
+ /* Write the parsed entity into the output buffer */
927
+ output_ptr += write_octet_sequence ((unsigned char * )output_ptr , charset , code );
928
+ if (code2 ) {
929
+ output_ptr += write_octet_sequence ((unsigned char * )output_ptr , charset , code2 );
930
+ }
931
+ /* Move current_ptr past the semicolon */
932
+ current_ptr = entity_end_ptr + 1 ;
933
+ continue ;
934
+
935
+ invalid_incomplete_entity :
936
+ /* If the entity is invalid at parse stage or entity_end_ptr was never found, copy '&' as normal */
937
+ * output_ptr ++ = * current_ptr ++ ;
938
+ continue ;
939
+
940
+ invalid_complete_entity :
941
+ /* If the entity became invalid after we found entity_end_ptr */
942
+ if (entity_end_ptr ) {
943
+ const size_t len = entity_end_ptr - current_ptr ;
944
+ memcpy (output_ptr , current_ptr , len );
945
+ output_ptr += len ;
946
+ current_ptr = entity_end_ptr ;
935
947
} else {
936
- /* If the entity is invalid, copy characters from current_ptr up to entity_end_ptr */
937
- if (entity_end_ptr ) {
938
- const size_t len = entity_end_ptr - current_ptr ;
939
- memcpy (output_ptr , current_ptr , len );
940
- output_ptr += len ;
941
- current_ptr = entity_end_ptr ;
942
- } else {
943
- * output_ptr ++ = * current_ptr ++ ;
944
- }
948
+ * output_ptr ++ = * current_ptr ++ ;
945
949
}
950
+ continue ;
946
951
}
947
952
948
953
* output_ptr = '\0' ;
0 commit comments