@@ -255,7 +255,7 @@ ParserState::stripIndentation(const PosIdx pos, std::vector<std::pair<PosIdx, st
255255 size_t minIndent = 1000000 ;
256256 size_t curIndent = 0 ;
257257 std::vector<int > nrIndentedLines (es.size (), 0 );
258- bool perfectPreallocate = true ;
258+ std::vector< bool > perfectPreallocate (es. size (), true ) ;
259259 size_t finalBlankLine = 0 ;
260260 for (const auto & [n, pair] : enumerate(es)) {
261261 auto & [i_pos, i] = pair;
@@ -276,15 +276,30 @@ ParserState::stripIndentation(const PosIdx pos, std::vector<std::pair<PosIdx, st
276276 if (str->p [j] == ' ' )
277277 curIndent++;
278278 else if (str->p [j] == ' \n ' ) {
279- /* if curIndent < minIndent, we can't calculate at this
280- * point how much indention we will remove; we will just
281- * have to over-allocate later. Fortunately in practice this
282- * shouldn't come up very much.
279+ /* if curIndent is less than the current value of minIndent,
280+ * we can't calculate at this point how much indention we
281+ * will remove; we will just have to over-allocate later.
282+ * Fortunately in practice this shouldn't come up very much.
283+ * It would have to look like (with spaces replaced with
284+ * underscore for clarity)
285+ *
286+ * ''
287+ * ________only long indentations initially
288+ * ________more long indentations
289+ * ____
290+ * ^^ empty line with a shorter indentation
291+ * ____after that there can be shorter indentations
292+ * ''
293+ *
294+ * most empty lines will have no indentation, and those that
295+ * do will usually be of at least the minium indentation,
296+ * and come after a line with said minimum indentation, in
297+ * which case we can perfectly pre-allocate the string.
283298 */
284- if (curIndent >= minIndent)
285- nrIndentedLines [n]++ ;
299+ if (curIndent < minIndent)
300+ perfectPreallocate [n] = false ;
286301 else
287- perfectPreallocate = false ;
302+ nrIndentedLines[n]++ ;
288303 /* Empty line, doesn't influence minimum
289304 indentation. */
290305 curIndent = 0 ;
@@ -321,7 +336,7 @@ ParserState::stripIndentation(const PosIdx pos, std::vector<std::pair<PosIdx, st
321336 * over-allocating. See comment above.
322337 */
323338 size_t size = 1 + t.l - nrIndentedLines[n] * minIndent - finalLineTrim;
324- if (size == 1 ) // ignore (most) empty strings before we allocate
339+ if (size == 1 ) // ignore empty strings before we allocate
325340 return ;
326341 char * s2 = (char *) alloc.allocate (size);
327342 size_t end = t.l - finalLineTrim;
@@ -345,16 +360,17 @@ ParserState::stripIndentation(const PosIdx pos, std::vector<std::pair<PosIdx, st
345360 atStartOfLine = true ;
346361 }
347362 }
348- if (perfectPreallocate)
363+ if (perfectPreallocate[n] )
349364 assert (c == size - 1 );
350365 else
351366 assert (c < size);
367+ // We should have caught empty strings before allocation. The only case
368+ // in which we have to over-allocate is a case with an empty line, which
369+ // is therefore not empty.
370+ assert (c > 0 );
352371 s2[c] = ' \0 ' ;
353372
354- // Ignore empty strings for a minor optimisation and AST simplification
355- if (*s2 != ' \0 ' ) {
356- es2->emplace_back (i->first , new ExprString (s2));
357- }
373+ es2->emplace_back (i->first , new ExprString (s2));
358374 };
359375 for (; i != es.end (); ++i, ++n) {
360376 std::visit (overloaded{trimExpr, trimString}, i->second );
0 commit comments