Skip to content

Commit eab467e

Browse files
committed
libexpr: introduce arena to hold ExprString strings
1. Saves 24-32 bytes per string (size of std::string) 2. Saves additional bytes by not over-allocating strings (in total we save ~1% memory) 3. Sets us up to perform a similar transformation on the other Expr subclasses 4. Makes ExprString trivially moveable (before the string data might move, causing the Value's pointer to become invalid). This is important so we can put ExprStrings in an std::vector and refer to them by index We have introduced a string copy in ParserState::stripIndentation(). This could be removed by pre-allocating the right sized string in the arena, but this adds complexity and doesn't seem to improve performance, so for now we've left the copy in.
1 parent c43ea09 commit eab467e

File tree

6 files changed

+79
-39
lines changed

6 files changed

+79
-39
lines changed

src/libexpr/eval.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3217,7 +3217,8 @@ Expr * EvalState::parse(
32173217
docComments = &it->second;
32183218
}
32193219

3220-
auto result = parseExprFromBuf(text, length, origin, basePath, symbols, settings, positions, *docComments, rootFS);
3220+
auto result = parseExprFromBuf(
3221+
text, length, origin, basePath, mem.exprs.alloc, symbols, settings, positions, *docComments, rootFS);
32213222

32223223
result->bindVars(*this, staticEnv);
32233224

src/libexpr/include/nix/expr/eval.hh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,11 @@ public:
355355
return stats;
356356
}
357357

358+
/**
359+
* Storage for the AST nodes
360+
*/
361+
Exprs exprs;
362+
358363
private:
359364
Statistics stats;
360365
};

src/libexpr/include/nix/expr/nixexpr.hh

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include <map>
55
#include <vector>
6+
#include <memory_resource>
67

78
#include "nix/expr/gc-small-vector.hh"
89
#include "nix/expr/value.hh"
@@ -84,6 +85,13 @@ std::string showAttrPath(const SymbolTable & symbols, const AttrPath & attrPath)
8485

8586
using UpdateQueue = SmallTemporaryValueVector<conservativeStackReservation>;
8687

88+
class Exprs
89+
{
90+
std::pmr::monotonic_buffer_resource buffer;
91+
public:
92+
std::pmr::polymorphic_allocator<char> alloc{&buffer};
93+
};
94+
8795
/* Abstract syntax of Nix expressions. */
8896

8997
struct Expr
@@ -173,13 +181,28 @@ struct ExprFloat : Expr
173181

174182
struct ExprString : Expr
175183
{
176-
std::string s;
177184
Value v;
178185

179-
ExprString(std::string && s)
180-
: s(std::move(s))
186+
/**
187+
* This is only for strings already allocated in our polymorphic allocator,
188+
* or that live at least that long (e.g. c++ string literals)
189+
*/
190+
ExprString(const char * s)
181191
{
182-
v.mkStringNoCopy(this->s.data());
192+
v.mkStringNoCopy(s);
193+
};
194+
195+
ExprString(std::pmr::polymorphic_allocator<char> & alloc, std::string_view sv)
196+
{
197+
auto len = sv.length();
198+
if (len == 0) {
199+
v.mkStringNoCopy("");
200+
return;
201+
}
202+
char * s = alloc.allocate(len + 1);
203+
sv.copy(s, len);
204+
s[len] = '\0';
205+
v.mkStringNoCopy(s);
183206
};
184207

185208
Value * maybeThunk(EvalState & state, Env & env) override;

src/libexpr/include/nix/expr/parser-state.hh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ struct LexerState
8282
struct ParserState
8383
{
8484
const LexerState & lexerState;
85+
std::pmr::polymorphic_allocator<char> & alloc;
8586
SymbolTable & symbols;
8687
PosTable & positions;
8788
Expr * result;
@@ -327,7 +328,7 @@ ParserState::stripIndentation(const PosIdx pos, std::vector<std::pair<PosIdx, st
327328

328329
// Ignore empty strings for a minor optimisation and AST simplification
329330
if (s2 != "") {
330-
es2->emplace_back(i->first, new ExprString(std::move(s2)));
331+
es2->emplace_back(i->first, new ExprString(alloc, s2));
331332
}
332333
};
333334
for (; i != es.end(); ++i, --n) {

src/libexpr/nixexpr.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ void ExprFloat::show(const SymbolTable & symbols, std::ostream & str) const
4040

4141
void ExprString::show(const SymbolTable & symbols, std::ostream & str) const
4242
{
43-
printLiteralString(str, s);
43+
printLiteralString(str, v.string_view());
4444
}
4545

4646
void ExprPath::show(const SymbolTable & symbols, std::ostream & str) const

src/libexpr/parser.y

Lines changed: 42 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ Expr * parseExprFromBuf(
6464
size_t length,
6565
Pos::Origin origin,
6666
const SourcePath & basePath,
67+
std::pmr::polymorphic_allocator<char> & alloc,
6768
SymbolTable & symbols,
6869
const EvalSettings & settings,
6970
PosTable & positions,
@@ -134,6 +135,7 @@ static Expr * makeCall(PosIdx pos, Expr * fn, Expr * arg) {
134135
std::vector<nix::AttrName> * attrNames;
135136
std::vector<std::pair<nix::AttrName, nix::PosIdx>> * inheritAttrs;
136137
std::vector<std::pair<nix::PosIdx, nix::Expr *>> * string_parts;
138+
std::variant<nix::Expr *, std::string_view> * to_be_string;
137139
std::vector<std::pair<nix::PosIdx, std::variant<nix::Expr *, nix::StringToken>>> * ind_string_parts;
138140
}
139141

@@ -148,7 +150,8 @@ static Expr * makeCall(PosIdx pos, Expr * fn, Expr * arg) {
148150
%type <inheritAttrs> attrs
149151
%type <string_parts> string_parts_interpolated
150152
%type <ind_string_parts> ind_string_parts
151-
%type <e> path_start string_parts string_attr
153+
%type <e> path_start
154+
%type <to_be_string> string_parts string_attr
152155
%type <id> attr
153156
%token <id> ID
154157
%token <str> STR IND_STR
@@ -303,7 +306,13 @@ expr_simple
303306
}
304307
| INT_LIT { $$ = new ExprInt($1); }
305308
| FLOAT_LIT { $$ = new ExprFloat($1); }
306-
| '"' string_parts '"' { $$ = $2; }
309+
| '"' string_parts '"' {
310+
std::visit(overloaded{
311+
[&](std::string_view str) { $$ = new ExprString(state->alloc, str); },
312+
[&](Expr * expr) { $$ = expr; }},
313+
*$2);
314+
delete $2;
315+
}
307316
| IND_STRING_OPEN ind_string_parts IND_STRING_CLOSE {
308317
$$ = state->stripIndentation(CUR_POS, std::move(*$2));
309318
delete $2;
@@ -314,11 +323,11 @@ expr_simple
314323
$$ = new ExprConcatStrings(CUR_POS, false, $2);
315324
}
316325
| SPATH {
317-
std::string path($1.p + 1, $1.l - 2);
326+
std::string_view path($1.p + 1, $1.l - 2);
318327
$$ = new ExprCall(CUR_POS,
319328
new ExprVar(state->s.findFile),
320329
{new ExprVar(state->s.nixPath),
321-
new ExprString(std::move(path))});
330+
new ExprString(state->alloc, path)});
322331
}
323332
| URI {
324333
static bool noURLLiterals = experimentalFeatureSettings.isEnabled(Xp::NoUrlLiterals);
@@ -327,7 +336,7 @@ expr_simple
327336
.msg = HintFmt("URL literals are disabled"),
328337
.pos = state->positions[CUR_POS]
329338
});
330-
$$ = new ExprString(std::string($1));
339+
$$ = new ExprString(state->alloc, $1);
331340
}
332341
| '(' expr ')' { $$ = $2; }
333342
/* Let expressions `let {..., body = ...}' are just desugared
@@ -344,19 +353,19 @@ expr_simple
344353
;
345354

346355
string_parts
347-
: STR { $$ = new ExprString(std::string($1)); }
348-
| string_parts_interpolated { $$ = new ExprConcatStrings(CUR_POS, true, $1); }
349-
| { $$ = new ExprString(""); }
356+
: STR { $$ = new std::variant<Expr *, std::string_view>($1); }
357+
| string_parts_interpolated { $$ = new std::variant<Expr *, std::string_view>(new ExprConcatStrings(CUR_POS, true, $1)); }
358+
| { $$ = new std::variant<Expr *, std::string_view>(std::string_view()); }
350359
;
351360

352361
string_parts_interpolated
353362
: string_parts_interpolated STR
354-
{ $$ = $1; $1->emplace_back(state->at(@2), new ExprString(std::string($2))); }
363+
{ $$ = $1; $1->emplace_back(state->at(@2), new ExprString(state->alloc, $2)); }
355364
| string_parts_interpolated DOLLAR_CURLY expr '}' { $$ = $1; $1->emplace_back(state->at(@2), $3); }
356365
| DOLLAR_CURLY expr '}' { $$ = new std::vector<std::pair<PosIdx, Expr *>>; $$->emplace_back(state->at(@1), $2); }
357366
| STR DOLLAR_CURLY expr '}' {
358367
$$ = new std::vector<std::pair<PosIdx, Expr *>>;
359-
$$->emplace_back(state->at(@1), new ExprString(std::string($1)));
368+
$$->emplace_back(state->at(@1), new ExprString(state->alloc, $1));
360369
$$->emplace_back(state->at(@2), $3);
361370
}
362371
;
@@ -454,15 +463,16 @@ attrs
454463
: attrs attr { $$ = $1; $1->emplace_back(AttrName(state->symbols.create($2)), state->at(@2)); }
455464
| attrs string_attr
456465
{ $$ = $1;
457-
ExprString * str = dynamic_cast<ExprString *>($2);
458-
if (str) {
459-
$$->emplace_back(AttrName(state->symbols.create(str->s)), state->at(@2));
460-
delete str;
461-
} else
462-
throw ParseError({
463-
.msg = HintFmt("dynamic attributes not allowed in inherit"),
464-
.pos = state->positions[state->at(@2)]
465-
});
466+
std::visit(overloaded {
467+
[&](std::string_view str) { $$->emplace_back(AttrName(state->symbols.create(str)), state->at(@2)); },
468+
[&](Expr * expr) {
469+
throw ParseError({
470+
.msg = HintFmt("dynamic attributes not allowed in inherit"),
471+
.pos = state->positions[state->at(@2)]
472+
});
473+
}
474+
}, *$2);
475+
delete $2;
466476
}
467477
| { $$ = new std::vector<std::pair<AttrName, PosIdx>>; }
468478
;
@@ -471,22 +481,20 @@ attrpath
471481
: attrpath '.' attr { $$ = $1; $1->push_back(AttrName(state->symbols.create($3))); }
472482
| attrpath '.' string_attr
473483
{ $$ = $1;
474-
ExprString * str = dynamic_cast<ExprString *>($3);
475-
if (str) {
476-
$$->push_back(AttrName(state->symbols.create(str->s)));
477-
delete str;
478-
} else
479-
$$->push_back(AttrName($3));
484+
std::visit(overloaded {
485+
[&](std::string_view str) { $$->push_back(AttrName(state->symbols.create(str))); },
486+
[&](Expr * expr) { $$->push_back(AttrName(expr)); }
487+
}, *$3);
488+
delete $3;
480489
}
481490
| attr { $$ = new std::vector<AttrName>; $$->push_back(AttrName(state->symbols.create($1))); }
482491
| string_attr
483492
{ $$ = new std::vector<AttrName>;
484-
ExprString *str = dynamic_cast<ExprString *>($1);
485-
if (str) {
486-
$$->push_back(AttrName(state->symbols.create(str->s)));
487-
delete str;
488-
} else
489-
$$->push_back(AttrName($1));
493+
std::visit(overloaded {
494+
[&](std::string_view str) { $$->push_back(AttrName(state->symbols.create(str))); },
495+
[&](Expr * expr) { $$->push_back(AttrName(expr)); }
496+
}, *$1);
497+
delete $1;
490498
}
491499
;
492500

@@ -497,7 +505,7 @@ attr
497505

498506
string_attr
499507
: '"' string_parts '"' { $$ = $2; }
500-
| DOLLAR_CURLY expr '}' { $$ = $2; }
508+
| DOLLAR_CURLY expr '}' { $$ = new std::variant<Expr *, std::string_view>($2); }
501509
;
502510

503511
expr_list
@@ -537,6 +545,7 @@ Expr * parseExprFromBuf(
537545
size_t length,
538546
Pos::Origin origin,
539547
const SourcePath & basePath,
548+
std::pmr::polymorphic_allocator<char> & alloc,
540549
SymbolTable & symbols,
541550
const EvalSettings & settings,
542551
PosTable & positions,
@@ -551,6 +560,7 @@ Expr * parseExprFromBuf(
551560
};
552561
ParserState state {
553562
.lexerState = lexerState,
563+
.alloc = alloc,
554564
.symbols = symbols,
555565
.positions = positions,
556566
.basePath = basePath,

0 commit comments

Comments
 (0)