Skip to content
1 change: 1 addition & 0 deletions RAW_RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@ final version.
* Added `gateway-error` retry-on policy.
* Added support for building envoy with exported symbols
This change allows scripts loaded with the lua filter to load shared object libraries such as those installed via luarocks.
* Added support for custom request/response headers with mixed static and dynamic values.
22 changes: 14 additions & 8 deletions source/common/router/header_formatter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
#include "common/json/json_loader.h"
#include "common/request_info/utility.h"

#include "absl/strings/str_cat.h"
#include "fmt/format.h"

namespace Envoy {
namespace Router {

namespace {

std::string formatUpstreamMetadataParseException(const std::string& params,
std::string formatUpstreamMetadataParseException(absl::string_view params,
const EnvoyException* cause = nullptr) {
std::string reason;
if (cause != nullptr) {
Expand All @@ -28,23 +29,27 @@ std::string formatUpstreamMetadataParseException(const std::string& params,
return fmt::format("Incorrect header configuration. Expected format "
"UPSTREAM_METADATA([\"namespace\", \"k\", ...]), actual format "
"UPSTREAM_METADATA{}{}",
params, reason);
absl::StrCat(params), reason);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are just trying to convert a string_view to a std::string? Use std::string(params)

}

// Parses the parameters for UPSTREAM_METADATA and returns a function suitable for accessing the
// specified metadata from an RequestInfo::RequestInfo. Expects a string formatted as:
// (["a", "b", "c"])
// There must be at least 2 array elements (a metadata namespace and at least 1 key).
std::function<std::string(const Envoy::RequestInfo::RequestInfo&)>
parseUpstreamMetadataField(const std::string& params_str) {
parseUpstreamMetadataField(absl::string_view params_str) {
params_str = StringUtil::trim(params_str);
if (params_str.empty() || params_str.front() != '(' || params_str.back() != ')') {
throw EnvoyException(formatUpstreamMetadataParseException(params_str));
}

absl::string_view json = params_str;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could also write this
absl::string_view json = params_str.substr(1, params_str.size() - 2); // trim parens.

json.remove_prefix(1);
json.remove_suffix(1);

std::vector<std::string> params;
try {
Json::ObjectSharedPtr parsed_params =
Json::Factory::loadFromString(StringUtil::subspan(params_str, 1, params_str.size() - 1));
Json::ObjectSharedPtr parsed_params = Json::Factory::loadFromString(absl::StrCat(json));

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

std::string(json)


for (const auto& param : parsed_params->asObjectArray()) {
params.emplace_back(param->asString());
Expand Down Expand Up @@ -113,7 +118,7 @@ parseUpstreamMetadataField(const std::string& params_str) {

} // namespace

RequestInfoHeaderFormatter::RequestInfoHeaderFormatter(const std::string& field_name, bool append)
RequestInfoHeaderFormatter::RequestInfoHeaderFormatter(absl::string_view field_name, bool append)
: append_(append) {
if (field_name == "PROTOCOL") {
field_extractor_ = [](const Envoy::RequestInfo::RequestInfo& request_info) {
Expand All @@ -125,11 +130,12 @@ RequestInfoHeaderFormatter::RequestInfoHeaderFormatter(const std::string& field_
return RequestInfo::Utility::formatDownstreamAddressNoPort(
*request_info.downstreamRemoteAddress());
};
} else if (StringUtil::startsWith(field_name.c_str(), "UPSTREAM_METADATA")) {
} else if (field_name.find_first_of("UPSTREAM_METADATA") == 0) {
field_extractor_ =
parseUpstreamMetadataField(field_name.substr(sizeof("UPSTREAM_METADATA") - 1));

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we have a STATIC_STRLEN macro in common/common/macros.h or similar?

} else {
throw EnvoyException(fmt::format("field '{}' not supported as custom header", field_name));
throw EnvoyException(
fmt::format("field '{}' not supported as custom header", absl::StrCat(field_name)));

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

std::string(field_name). Actually it'd be nice if fmt::format could be taught somehow about string_view as this will be a common pattern.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Filed #2457.

}
}

Expand Down
27 changes: 26 additions & 1 deletion source/common/router/header_formatter.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#include "envoy/access_log/access_log.h"

#include "absl/strings/string_view.h"

namespace Envoy {
namespace Router {

Expand All @@ -32,7 +34,7 @@ typedef std::unique_ptr<HeaderFormatter> HeaderFormatterPtr;
*/
class RequestInfoHeaderFormatter : public HeaderFormatter {
public:
RequestInfoHeaderFormatter(const std::string& field_name, bool append);
RequestInfoHeaderFormatter(absl::string_view field_name, bool append);

// HeaderFormatter::format
const std::string format(const Envoy::RequestInfo::RequestInfo& request_info) const override;
Expand Down Expand Up @@ -62,5 +64,28 @@ class PlainHeaderFormatter : public HeaderFormatter {
const bool append_;
};

/**
* A formatter that produces a value by concatenating the results of multiple HeaderFormatters.
*/
class CompoundHeaderFormatter : public HeaderFormatter {
public:
CompoundHeaderFormatter(std::vector<HeaderFormatterPtr>&& formatters, bool append)
: formatters_(std::move(formatters)), append_(append){};

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

super nit: remove trailing ';'


// HeaderFormatter::format
const std::string format(const Envoy::RequestInfo::RequestInfo& request_info) const override {
std::string buf;
for (const auto& formatter : formatters_) {
buf += formatter->format(request_info);
}
return buf;
};
bool append() const override { return append_; }

private:
const std::vector<HeaderFormatterPtr> formatters_;
const bool append_;
};

} // namespace Router
} // namespace Envoy
200 changes: 189 additions & 11 deletions source/common/router/header_parser.cc
Original file line number Diff line number Diff line change
@@ -1,28 +1,206 @@
#include "common/router/header_parser.h"

#include <ctype.h>

#include <string>

#include "common/common/assert.h"
#include "common/protobuf/utility.h"

#include "absl/strings/str_join.h"
#include "absl/strings/str_split.h"

namespace Envoy {
namespace Router {

namespace {

enum class ParserState {
Literal, // processing literal data
VariableName, // consuming a %VAR% name
ExpectArray, // expect starting [ in %VAR([...])%
ExpectString, // expect starting " in array of strings
String, // consuming an array element string
ExpectArrayDelimiterOrEnd, // expect array delimiter (,) or end of array (])
ExpectArgsEnd, // expect closing ) in %VAR(...)%
ExpectVariableEnd // expect closing % in %VAR(...)%
};

std::string unescape(absl::string_view sv) { return absl::StrJoin(absl::StrSplit(sv, "%%"), "%"); }

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's worth a comment at least why you didn't use absl::StrReplaceAll().

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll switch to that function. It's not on their docs website (that I saw), so I didn't realize it existed.


// Implements a state machine to parse custom headers. Each character of the custom header format

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

very nice and readable code in general. I like this better than the regex. Also I think it can potentially be made more general, as described below.

However I think this parser does deserve some rigorous unit tests :)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added enough tests to exercise each case in the switch statement and the branches within them. Is there a particular part you'd like more test cases for?

// is either literal text (with % escaped as %%) or part of a %VAR% or %VAR(["args"])% expression.
// The statement machine does minimal validation of the arguments (if any) and does not know the
// names of valid variables. Interpretation of the variable name and arguments is delegated to
// RequestInfoHeaderFormatter.
HeaderFormatterPtr parseInternal(const envoy::api::v2::HeaderValueOption& header_value_option) {
const std::string& format = header_value_option.header().value();
const bool append = PROTOBUF_GET_WRAPPED_OR_DEFAULT(header_value_option, append, true);

if (format.find("%") == 0) {
const size_t last_occ_pos = format.rfind("%");
if (last_occ_pos == std::string::npos || last_occ_pos <= 1) {
throw EnvoyException(fmt::format("Incorrect header configuration. Expected variable format "
"%<variable_name>%, actual format {}",
format));
absl::string_view format(header_value_option.header().value());
if (format.empty()) {
return HeaderFormatterPtr{new PlainHeaderFormatter("", append)};

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: std::make_unique

}

std::vector<HeaderFormatterPtr> formatters;

size_t pos = 0, start = 0;
ParserState state = ParserState::Literal;
do {
const char ch = format[pos];
const bool has_next_ch = (pos + 1) < format.size();

switch (state) {
case ParserState::Literal:
// Searching for start of %VARIABLE% expression.
if (ch != '%') {
break;
}

if (!has_next_ch) {
throw EnvoyException(
fmt::format("Invalid header configuration. Un-escaped % at position {}", pos));
}

if (format[pos + 1] == '%') {
// Escaped %, skip next character.
pos++;
break;
}

// Un-escaped %: start of variable name. Create a formatter for preceding characters, if
// any.
state = ParserState::VariableName;
if (pos > start) {
absl::string_view literal = format.substr(start, pos - start);
formatters.emplace_back(new PlainHeaderFormatter(unescape(literal), append));
}
start = pos + 1;
break;

case ParserState::VariableName:
// Consume "VAR" from "%VAR%" or "%VAR(...)%"
if (ch == '%') {
// Found complete variable name, add formatter.
formatters.emplace_back(
new RequestInfoHeaderFormatter(format.substr(start, pos - start), append));
start = pos + 1;
state = ParserState::Literal;
break;
}

if (ch == '(') {
// Variable with arguments, search for start of arg array.
state = ParserState::ExpectArray;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was expecting you to bump a depth-count here. Scanning forward a bit, it looks like the algorithm is very sensitive to a particular json structure. I think it might actually be just as easy (and maybe very few if any additional lines of code) to have your scanner track the paren depth for () [] {} and the quoting state for " and '. In a way it might be easier to review as the reviewer would just need to take into account basic lexical nesting and not the schema you are tuned to.

Perhaps this is speculative generality but I also wonder if the json structure you expect might change over time requiring the next person to then update this routine.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The history of using JSON here is that when custom headers only supported the entire header value being custom (vs. what this PR is doing) we thought it would be easier to just use the JSON parser to handle arguments rather than writing an argument parser. In retrospect, I wish I had just handled arguments. FWIW, I'd be willing to toss the JSON business and rewrite this parser to just take quoted arguments (e.g. %UPSTREAM_METADATA("a", "b", "c")%. But if we keep JSON, the upshot is that I don't expect anyone to want anything other than an array. Something more complicated would (IMO) warrant modifying the protobuf definition of HeaderValueOption.

The JSON spec doesn't seem to allow single-quoted strings, so I don't think we need to handle them in any event.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment spawned a bunch of thought from me, but not that much typing. As I'm relatively new to Envoy I still don't have a great sense of what needs to be fast (e.g. runs per request) and what can't be changed (because it's already released and documented).

If this runs per request, I suspect it'd be worth considering dumping JSON parsing and just scan using string_view-related routines. If this only runs at startup, I'm a little less concerned, however I'm also looking at istio/istio#2554 (comment) and #2373 which are all around startup speed. That doesn't mean this would be a problem but I think it's worth understanding it.

The second part of this: is the syntax you need to parse set in stone? If so, and JSON generality is not needed, it'd be nice to drop it in favor of something that could be more easily and quickly parsed.

For the moment I'll continue the review assuming the answers are "the speed does not matter" and "the format cannot be changed".

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Parsing these strings only happens only at configuration time.

The syntax needn't be set in stone, but there's also some user friction related to changing this stuff. It looks to me that all this was added after 1.5 and 1.6 isn't out yet, so if we were going to change it, now is the time. @mattklein123 what do you think?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have a strong opinion. This seems fine to me. What's the proposed alternative?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now the parser does just enough parsing to find the start/end of % expressions, but needs to know something about JSON to do it. We could just make the parser smart enough to parse the params and ditch the JSON parser. While we're there we could drop the square brackets. If you want to do this I'll put in another PR.

}
break;

case ParserState::ExpectArray:
// Skip over whitespace searching for the start of JSON array args.
if (ch == '[') {
// Search for first argument string
state = ParserState::ExpectString;
} else if (!isspace(ch)) {
throw EnvoyException(fmt::format(
"Invalid header configuration. Expecting JSON array of arguments after '{}', but "
"found '{}'",
absl::StrCat(format.substr(start, pos - start)), ch));
}
break;

case ParserState::ExpectArrayDelimiterOrEnd:
// Skip over whitespace searching for a comma or close bracket.
if (ch == ',') {
state = ParserState::ExpectString;
} else if (ch == ']') {
state = ParserState::ExpectArgsEnd;
} else if (!isspace(ch)) {
throw EnvoyException(fmt::format(
"Invalid header configuration. Expecting ',', ']', or whitespace after '{}', but "
"found '{}'",
absl::StrCat(format.substr(start, pos - start)), ch));
}
break;

case ParserState::ExpectString:
// Skip over whitespace looking for the starting quote of a JSON string.
if (ch == '"') {
state = ParserState::String;
} else if (!isspace(ch)) {
throw EnvoyException(fmt::format(
"Invalid header configuration. Expecting '\"' or whitespace after '{}', but found '{}'",
absl::StrCat(format.substr(start, pos - start)), ch));
}
break;

case ParserState::String:
// Consume a JSON string (ignoring backslash-escaped chars).
if (ch == '\\') {
if (!has_next_ch) {
throw EnvoyException(fmt::format(
"Invalid header configuration. Un-terminated backslash in JSON string after '{}'",
absl::StrCat(format.substr(start, pos - start))));
}

// Skip escaped char.
pos++;
} else if (ch == '"') {
state = ParserState::ExpectArrayDelimiterOrEnd;
}
break;

case ParserState::ExpectArgsEnd:
// Search for the closing paren of a %VAR(...)% expression.
if (ch == ')') {
state = ParserState::ExpectVariableEnd;
} else if (!isspace(ch)) {
throw EnvoyException(fmt::format(
"Invalid header configuration. Expecting ')' or whitespace after '{}', but found '{}'",
absl::StrCat(format.substr(start, pos - start)), ch));
}
break;

case ParserState::ExpectVariableEnd:
// Search for closing % of a %VAR(...)% expression
if (ch == '%') {
formatters.emplace_back(
new RequestInfoHeaderFormatter(format.substr(start, pos - start), append));
start = pos + 1;
state = ParserState::Literal;
break;
}

if (!isspace(ch)) {
throw EnvoyException(fmt::format(
"Invalid header configuration. Expecting '%' or whitespace after '{}', but found '{}'",
absl::StrCat(format.substr(start, pos - start)), ch));
}
break;

default:
NOT_REACHED;
}
return HeaderFormatterPtr{
new RequestInfoHeaderFormatter(format.substr(1, last_occ_pos - 1), append)};
} else {
return HeaderFormatterPtr{new PlainHeaderFormatter(format, append)};
} while (++pos < format.size());

if (state != ParserState::Literal) {
// Parsing terminated mid-variable.
throw EnvoyException(
fmt::format("Invalid header configuration. Un-terminated variable expression '{}'",
absl::StrCat(format.substr(start, pos - start))));
}

if (pos > start) {
// Trailing constant data.
absl::string_view literal = format.substr(start, pos - start);
formatters.emplace_back(new PlainHeaderFormatter(unescape(literal), append));
}

ASSERT(formatters.size() > 0);

if (formatters.size() == 1) {
return std::move(formatters[0]);
}

return HeaderFormatterPtr{new CompoundHeaderFormatter(std::move(formatters), append)};

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: std::make_unique

}

} // namespace
Expand Down
3 changes: 1 addition & 2 deletions test/common/router/config_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3280,8 +3280,7 @@ TEST(CustomRequestHeadersTest, CustomHeaderWrongFormat) {
NiceMock<Envoy::RequestInfo::MockRequestInfo> request_info;
EXPECT_THROW_WITH_MESSAGE(
ConfigImpl config(parseRouteConfigurationFromJson(json), runtime, cm, true), EnvoyException,
"Incorrect header configuration. Expected variable format %<variable_name>%, actual format "
"%CLIENT_IP");
"Invalid header configuration. Un-terminated variable expression 'CLIENT_IP'");
}

TEST(MetadataMatchCriteriaImpl, Create) {
Expand Down
Loading