From 0327f923ff75d32bb6180a06cafd8b02a6a1ff16 Mon Sep 17 00:00:00 2001 From: Reini Urban Date: Thu, 22 Jun 2017 16:51:50 +0200 Subject: [PATCH] Abort on Malformed UTF-8 character errors utf8n_to_uvchr_error() only warns on some Malformed UTF-8 characters, but scan_const needs to error here. Do it with yyerror() which accumulates all parser errors until it "has too many errors". Fixes 2 errors in #293, esp. id:000162,sig:06,src:026278+031045,op:splice,rep:32 and id:000001,sig:06,src:024259,op:arith8,pos:5,val:+27 which segfaulted in the error handler for "panic: constant overflowed allocated space" --- .git-rr-cache | 2 +- pod/perldiag.pod | 6 ++++++ toke.c | 6 +++--- utf8.c | 12 ++++++------ 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/.git-rr-cache b/.git-rr-cache index 063dbec90ec..cdfe37572a4 160000 --- a/.git-rr-cache +++ b/.git-rr-cache @@ -1 +1 @@ -Subproject commit 063dbec90ec42e7c78bfc68b75df5ad1f5090fd4 +Subproject commit cdfe37572a445534ad74c967b0cdaa4adeb5fc58 diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 0b3e3614f14..b80c73941ad 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -3539,6 +3539,12 @@ Perhaps the function's author was trying to write a subroutine signature but didn't enable that feature first (C), so the signature was instead interpreted as a bad prototype. +=item Malformed UTF-8 character + +(F) Perl detected one or more fatal UTF-8 errors while parsing a +constant UTF-8 string, which are detailed in the first warnings utf8 +message. See L + =item Malformed UTF-8 character%s (S utf8)(F) Perl detected a string that should be UTF-8, but didn't diff --git a/toke.c b/toke.c index e2b7dd142e3..57e50db54a2 100644 --- a/toke.c +++ b/toke.c @@ -4124,9 +4124,9 @@ S_scan_const(pTHX_ char *start) else if (this_utf8 && has_utf8) { /* Both UTF-8, can just copy */ const STRLEN len = UTF8SKIP(s); - /* We expect the source to have already been checked for - * malformedness */ - assert(isUTF8_CHAR((U8 *) s, (U8 *) send)); + /* utf8n_to_uvchr_error might have only warned: promote to error */ + if (!isUTF8_CHAR((U8 *) s, (U8 *) send)) + yyerror("Malformed UTF-8 character"); Copy(s, d, len, U8); d += len; diff --git a/utf8.c b/utf8.c index 464d6cd6a4f..3bb7baade75 100644 --- a/utf8.c +++ b/utf8.c @@ -810,14 +810,14 @@ Perl__byte_dump_string(pTHX_ const U8 * s, const STRLEN len, const bool format) PERL_STATIC_INLINE char * S_unexpected_non_continuation_text(pTHX_ const U8 * const s, - /* How many bytes to print */ - STRLEN print_len, + /* How many bytes to print */ + STRLEN print_len, - /* Which one is the non-continuation */ - const STRLEN non_cont_byte_pos, + /* Which one is the non-continuation */ + const STRLEN non_cont_byte_pos, - /* How many bytes should there be? */ - const STRLEN expect_len) + /* How many bytes should there be? */ + const STRLEN expect_len) { /* Return the malformation warning text for an unexpected continuation * byte. */