@@ -254,17 +254,11 @@ ParserState::stripIndentation(const PosIdx pos, std::vector<std::pair<PosIdx, st
254254 bool atStartOfLine = true ; /* = seen only whitespace in the current line */
255255 size_t minIndent = 1000000 ;
256256 size_t curIndent = 0 ;
257- std::vector<int > nrIndentedLines (es.size (), 0 );
258- std::vector<bool > perfectPreallocate (es.size (), true );
259- size_t finalBlankLine = 0 ;
260- for (const auto & [n, pair] : enumerate(es)) {
261- auto & [i_pos, i] = pair;
257+ for (auto & [i_pos, i] : es) {
262258 auto * str = std::get_if<StringToken>(&i);
263259 if (!str || !str->hasIndentation ) {
264260 /* Anti-quotations and escaped characters end the current start-of-line whitespace. */
265261 if (atStartOfLine) {
266- if (n > 0 )
267- nrIndentedLines[n - 1 ]++;
268262 atStartOfLine = false ;
269263 if (curIndent < minIndent)
270264 minIndent = curIndent;
@@ -276,35 +270,10 @@ ParserState::stripIndentation(const PosIdx pos, std::vector<std::pair<PosIdx, st
276270 if (str->p [j] == ' ' )
277271 curIndent++;
278272 else if (str->p [j] == ' \n ' ) {
279- /* if curIndent is less than the current value of minIndent,
280- * we can't calculate at this point how much indention we
281- * will remove; we will just have to over-allocate later.
282- * Fortunately in practice this shouldn't come up very much.
283- * It would have to look like (with spaces replaced with
284- * underscore for clarity)
285- *
286- * ''
287- * ________only long indentations initially
288- * ________more long indentations
289- * ____
290- * ^^ empty line with a shorter indentation
291- * ____after that there can be shorter indentations
292- * ''
293- *
294- * most empty lines will have no indentation, and those that
295- * do will usually be of at least the minium indentation,
296- * and come after a line with said minimum indentation, in
297- * which case we can perfectly pre-allocate the string.
298- */
299- if (curIndent < minIndent)
300- perfectPreallocate[n] = false ;
301- else
302- nrIndentedLines[n]++;
303273 /* Empty line, doesn't influence minimum
304274 indentation. */
305275 curIndent = 0 ;
306276 } else {
307- nrIndentedLines[n]++;
308277 atStartOfLine = false ;
309278 if (curIndent < minIndent)
310279 minIndent = curIndent;
@@ -315,64 +284,54 @@ ParserState::stripIndentation(const PosIdx pos, std::vector<std::pair<PosIdx, st
315284 }
316285 }
317286 }
318- if (atStartOfLine)
319- finalBlankLine = curIndent;
320287
321288 /* Strip spaces from each line. */
322289 auto * es2 = new std::vector<std::pair<PosIdx, Expr *>>;
323290 atStartOfLine = true ;
324291 size_t curDropped = 0 ;
325- size_t n = 0 ;
292+ size_t n = es. size () ;
326293 auto i = es.begin ();
327294 const auto trimExpr = [&](Expr * e) {
328295 atStartOfLine = false ;
329296 curDropped = 0 ;
330297 es2->emplace_back (i->first , e);
331298 };
332299 const auto trimString = [&](const StringToken & t) {
333- auto finalLineTrim = n == es.size () - 1 ? finalBlankLine : 0 ;
334- /* try to pre-calculate exactly how big of a string we need. In weird
335- * rare cases we can't efficiently pre-calculate it and will end up
336- * over-allocating. See comment above.
337- */
338- size_t size = 1 + t.l - nrIndentedLines[n] * minIndent - finalLineTrim;
339- if (size == 1 ) // ignore empty strings before we allocate
340- return ;
341- char * s2 = (char *) alloc.allocate (size);
342- size_t end = t.l - finalLineTrim;
343- size_t c = 0 ;
344- for (size_t j = 0 ; j < end; ++j) {
300+ std::string s2;
301+ for (size_t j = 0 ; j < t.l ; ++j) {
345302 if (atStartOfLine) {
346303 if (t.p [j] == ' ' ) {
347304 if (curDropped++ >= minIndent)
348- s2[c++] = t.p [j];
305+ s2 + = t.p [j];
349306 } else if (t.p [j] == ' \n ' ) {
350307 curDropped = 0 ;
351- s2[c++] = t.p [j];
308+ s2 + = t.p [j];
352309 } else {
353310 atStartOfLine = false ;
354311 curDropped = 0 ;
355- s2[c++] = t.p [j];
312+ s2 + = t.p [j];
356313 }
357314 } else {
358- s2[c++] = t.p [j];
315+ s2 + = t.p [j];
359316 if (t.p [j] == ' \n ' )
360317 atStartOfLine = true ;
361318 }
362319 }
363- if (perfectPreallocate[n])
364- assert (c == size - 1 );
365- else
366- assert (c < size);
367- // We should have caught empty strings before allocation. The only case
368- // in which we have to over-allocate is a case with an empty line, which
369- // is therefore not empty.
370- assert (c > 0 );
371- s2[c] = ' \0 ' ;
372-
373- es2->emplace_back (i->first , new ExprString (s2));
320+
321+ /* Remove the last line if it is empty and consists only of
322+ spaces. */
323+ if (n == 1 ) {
324+ std::string::size_type p = s2.find_last_of (' \n ' );
325+ if (p != std::string::npos && s2.find_first_not_of (' ' , p + 1 ) == std::string::npos)
326+ s2 = std::string (s2, 0 , p + 1 );
327+ }
328+
329+ // Ignore empty strings for a minor optimisation and AST simplification
330+ if (s2 != " " ) {
331+ es2->emplace_back (i->first , new ExprString (alloc, s2));
332+ }
374333 };
375- for (; i != es.end (); ++i, ++ n) {
334+ for (; i != es.end (); ++i, -- n) {
376335 std::visit (overloaded{trimExpr, trimString}, i->second );
377336 }
378337
0 commit comments