Skip to content
1 change: 1 addition & 0 deletions RAW_RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,4 @@ final version.
* Added support for building envoy with exported symbols
This change allows scripts loaded with the lua filter to load shared object libraries such as those installed via luarocks.
* Added support for more granular weighted cluster routing by allowing the total weight to be specified in configuration.
* Added support for custom request/response headers with mixed static and dynamic values.
5 changes: 5 additions & 0 deletions source/common/common/macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ namespace Envoy {
*/
#define ARRAY_SIZE(X) (sizeof(X) / sizeof(X[0]))

/**
* @return the length of a static string literal, e.g. STATIC_STRLEN("foo") == 3.
*/
#define STATIC_STRLEN(X) (sizeof(X) - 1)

/**
* Helper macros from enum to string macros.
*/
Expand Down
23 changes: 13 additions & 10 deletions source/common/router/header_formatter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,33 +18,35 @@ namespace Router {

namespace {

std::string formatUpstreamMetadataParseException(const std::string& params,
std::string formatUpstreamMetadataParseException(absl::string_view params,
const EnvoyException* cause = nullptr) {
std::string reason;
if (cause != nullptr) {
reason = fmt::format(", because {}", cause->what());
}

return fmt::format("Incorrect header configuration. Expected format "
return fmt::format("Invalid header configuration. Expected format "
"UPSTREAM_METADATA([\"namespace\", \"k\", ...]), actual format "
"UPSTREAM_METADATA{}{}",
params, reason);
std::string(params), reason);
}

// Parses the parameters for UPSTREAM_METADATA and returns a function suitable for accessing the
// specified metadata from an RequestInfo::RequestInfo. Expects a string formatted as:
// (["a", "b", "c"])
// There must be at least 2 array elements (a metadata namespace and at least 1 key).
std::function<std::string(const Envoy::RequestInfo::RequestInfo&)>
parseUpstreamMetadataField(const std::string& params_str) {
parseUpstreamMetadataField(absl::string_view params_str) {
params_str = StringUtil::trim(params_str);
if (params_str.empty() || params_str.front() != '(' || params_str.back() != ')') {
throw EnvoyException(formatUpstreamMetadataParseException(params_str));
}

absl::string_view json = params_str.substr(1, params_str.size() - 2); // trim parens

std::vector<std::string> params;
try {
Json::ObjectSharedPtr parsed_params =
Json::Factory::loadFromString(StringUtil::subspan(params_str, 1, params_str.size() - 1));
Json::ObjectSharedPtr parsed_params = Json::Factory::loadFromString(std::string(json));

for (const auto& param : parsed_params->asObjectArray()) {
params.emplace_back(param->asString());
Expand Down Expand Up @@ -113,7 +115,7 @@ parseUpstreamMetadataField(const std::string& params_str) {

} // namespace

RequestInfoHeaderFormatter::RequestInfoHeaderFormatter(const std::string& field_name, bool append)
RequestInfoHeaderFormatter::RequestInfoHeaderFormatter(absl::string_view field_name, bool append)
: append_(append) {
if (field_name == "PROTOCOL") {
field_extractor_ = [](const Envoy::RequestInfo::RequestInfo& request_info) {
Expand All @@ -125,11 +127,12 @@ RequestInfoHeaderFormatter::RequestInfoHeaderFormatter(const std::string& field_
return RequestInfo::Utility::formatDownstreamAddressNoPort(
*request_info.downstreamRemoteAddress());
};
} else if (StringUtil::startsWith(field_name.c_str(), "UPSTREAM_METADATA")) {
} else if (field_name.find_first_of("UPSTREAM_METADATA") == 0) {
field_extractor_ =
parseUpstreamMetadataField(field_name.substr(sizeof("UPSTREAM_METADATA") - 1));
parseUpstreamMetadataField(field_name.substr(STATIC_STRLEN("UPSTREAM_METADATA")));
} else {
throw EnvoyException(fmt::format("field '{}' not supported as custom header", field_name));
throw EnvoyException(
fmt::format("field '{}' not supported as custom header", std::string(field_name)));
}
}

Expand Down
29 changes: 27 additions & 2 deletions source/common/router/header_formatter.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#include "envoy/access_log/access_log.h"

#include "absl/strings/string_view.h"

namespace Envoy {
namespace Router {

Expand All @@ -32,7 +34,7 @@ typedef std::unique_ptr<HeaderFormatter> HeaderFormatterPtr;
*/
class RequestInfoHeaderFormatter : public HeaderFormatter {
public:
RequestInfoHeaderFormatter(const std::string& field_name, bool append);
RequestInfoHeaderFormatter(absl::string_view field_name, bool append);

// HeaderFormatter::format
const std::string format(const Envoy::RequestInfo::RequestInfo& request_info) const override;
Expand All @@ -49,7 +51,7 @@ class RequestInfoHeaderFormatter : public HeaderFormatter {
class PlainHeaderFormatter : public HeaderFormatter {
public:
PlainHeaderFormatter(const std::string& static_header_value, bool append)
: static_value_(static_header_value), append_(append){};
: static_value_(static_header_value), append_(append) {}

// HeaderFormatter::format
const std::string format(const Envoy::RequestInfo::RequestInfo&) const override {
Expand All @@ -62,5 +64,28 @@ class PlainHeaderFormatter : public HeaderFormatter {
const bool append_;
};

/**
* A formatter that produces a value by concatenating the results of multiple HeaderFormatters.
*/
class CompoundHeaderFormatter : public HeaderFormatter {
public:
CompoundHeaderFormatter(std::vector<HeaderFormatterPtr>&& formatters, bool append)
: formatters_(std::move(formatters)), append_(append) {}

// HeaderFormatter::format
const std::string format(const Envoy::RequestInfo::RequestInfo& request_info) const override {
std::string buf;
for (const auto& formatter : formatters_) {
buf += formatter->format(request_info);
}
return buf;
};
bool append() const override { return append_; }

private:
const std::vector<HeaderFormatterPtr> formatters_;
const bool append_;
};

} // namespace Router
} // namespace Envoy
201 changes: 190 additions & 11 deletions source/common/router/header_parser.cc
Original file line number Diff line number Diff line change
@@ -1,28 +1,207 @@
#include "common/router/header_parser.h"

#include <ctype.h>

#include <memory>
#include <string>

#include "common/common/assert.h"
#include "common/protobuf/utility.h"

#include "absl/strings/str_cat.h"
#include "absl/strings/str_replace.h"

namespace Envoy {
namespace Router {

namespace {

enum class ParserState {
Literal, // processing literal data
VariableName, // consuming a %VAR% name
ExpectArray, // expect starting [ in %VAR([...])%
ExpectString, // expect starting " in array of strings
String, // consuming an array element string
ExpectArrayDelimiterOrEnd, // expect array delimiter (,) or end of array (])
ExpectArgsEnd, // expect closing ) in %VAR(...)%
ExpectVariableEnd // expect closing % in %VAR(...)%
};

std::string unescape(absl::string_view sv) { return absl::StrReplaceAll(sv, {{"%%", "%"}}); }

// Implements a state machine to parse custom headers. Each character of the custom header format
// is either literal text (with % escaped as %%) or part of a %VAR% or %VAR(["args"])% expression.
// The statement machine does minimal validation of the arguments (if any) and does not know the
// names of valid variables. Interpretation of the variable name and arguments is delegated to
// RequestInfoHeaderFormatter.
HeaderFormatterPtr parseInternal(const envoy::api::v2::HeaderValueOption& header_value_option) {
const std::string& format = header_value_option.header().value();
const bool append = PROTOBUF_GET_WRAPPED_OR_DEFAULT(header_value_option, append, true);

if (format.find("%") == 0) {
const size_t last_occ_pos = format.rfind("%");
if (last_occ_pos == std::string::npos || last_occ_pos <= 1) {
throw EnvoyException(fmt::format("Incorrect header configuration. Expected variable format "
"%<variable_name>%, actual format {}",
format));
absl::string_view format(header_value_option.header().value());
if (format.empty()) {
return std::make_unique<PlainHeaderFormatter>("", append);
}

std::vector<HeaderFormatterPtr> formatters;

size_t pos = 0, start = 0;
ParserState state = ParserState::Literal;
do {
const char ch = format[pos];
const bool has_next_ch = (pos + 1) < format.size();

switch (state) {
case ParserState::Literal:
// Searching for start of %VARIABLE% expression.
if (ch != '%') {
break;
}

if (!has_next_ch) {
throw EnvoyException(
fmt::format("Invalid header configuration. Un-escaped % at position {}", pos));
}

if (format[pos + 1] == '%') {
// Escaped %, skip next character.
pos++;
break;
}

// Un-escaped %: start of variable name. Create a formatter for preceding characters, if
// any.
state = ParserState::VariableName;
if (pos > start) {
absl::string_view literal = format.substr(start, pos - start);
formatters.emplace_back(new PlainHeaderFormatter(unescape(literal), append));
}
start = pos + 1;
break;

case ParserState::VariableName:
// Consume "VAR" from "%VAR%" or "%VAR(...)%"
if (ch == '%') {
// Found complete variable name, add formatter.
formatters.emplace_back(
new RequestInfoHeaderFormatter(format.substr(start, pos - start), append));
start = pos + 1;
state = ParserState::Literal;
break;
}

if (ch == '(') {
// Variable with arguments, search for start of arg array.
state = ParserState::ExpectArray;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was expecting you to bump a depth-count here. Scanning forward a bit, it looks like the algorithm is very sensitive to a particular json structure. I think it might actually be just as easy (and maybe very few if any additional lines of code) to have your scanner track the paren depth for () [] {} and the quoting state for " and '. In a way it might be easier to review as the reviewer would just need to take into account basic lexical nesting and not the schema you are tuned to.

Perhaps this is speculative generality but I also wonder if the json structure you expect might change over time requiring the next person to then update this routine.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The history of using JSON here is that when custom headers only supported the entire header value being custom (vs. what this PR is doing) we thought it would be easier to just use the JSON parser to handle arguments rather than writing an argument parser. In retrospect, I wish I had just handled arguments. FWIW, I'd be willing to toss the JSON business and rewrite this parser to just take quoted arguments (e.g. %UPSTREAM_METADATA("a", "b", "c")%. But if we keep JSON, the upshot is that I don't expect anyone to want anything other than an array. Something more complicated would (IMO) warrant modifying the protobuf definition of HeaderValueOption.

The JSON spec doesn't seem to allow single-quoted strings, so I don't think we need to handle them in any event.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment spawned a bunch of thought from me, but not that much typing. As I'm relatively new to Envoy I still don't have a great sense of what needs to be fast (e.g. runs per request) and what can't be changed (because it's already released and documented).

If this runs per request, I suspect it'd be worth considering dumping JSON parsing and just scan using string_view-related routines. If this only runs at startup, I'm a little less concerned, however I'm also looking at istio/istio#2554 (comment) and #2373 which are all around startup speed. That doesn't mean this would be a problem but I think it's worth understanding it.

The second part of this: is the syntax you need to parse set in stone? If so, and JSON generality is not needed, it'd be nice to drop it in favor of something that could be more easily and quickly parsed.

For the moment I'll continue the review assuming the answers are "the speed does not matter" and "the format cannot be changed".

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Parsing these strings only happens only at configuration time.

The syntax needn't be set in stone, but there's also some user friction related to changing this stuff. It looks to me that all this was added after 1.5 and 1.6 isn't out yet, so if we were going to change it, now is the time. @mattklein123 what do you think?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have a strong opinion. This seems fine to me. What's the proposed alternative?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now the parser does just enough parsing to find the start/end of % expressions, but needs to know something about JSON to do it. We could just make the parser smart enough to parse the params and ditch the JSON parser. While we're there we could drop the square brackets. If you want to do this I'll put in another PR.

}
break;

case ParserState::ExpectArray:
// Skip over whitespace searching for the start of JSON array args.
if (ch == '[') {
// Search for first argument string
state = ParserState::ExpectString;
} else if (!isspace(ch)) {
throw EnvoyException(fmt::format(
"Invalid header configuration. Expecting JSON array of arguments after '{}', but "
"found '{}'",
absl::StrCat(format.substr(start, pos - start)), ch));
}
break;

case ParserState::ExpectArrayDelimiterOrEnd:
// Skip over whitespace searching for a comma or close bracket.
if (ch == ',') {
state = ParserState::ExpectString;
} else if (ch == ']') {
state = ParserState::ExpectArgsEnd;
} else if (!isspace(ch)) {
throw EnvoyException(fmt::format(
"Invalid header configuration. Expecting ',', ']', or whitespace after '{}', but "
"found '{}'",
absl::StrCat(format.substr(start, pos - start)), ch));
}
break;

case ParserState::ExpectString:
// Skip over whitespace looking for the starting quote of a JSON string.
if (ch == '"') {
state = ParserState::String;
} else if (!isspace(ch)) {
throw EnvoyException(fmt::format(
"Invalid header configuration. Expecting '\"' or whitespace after '{}', but found '{}'",
absl::StrCat(format.substr(start, pos - start)), ch));
}
break;

case ParserState::String:
// Consume a JSON string (ignoring backslash-escaped chars).
if (ch == '\\') {
if (!has_next_ch) {
throw EnvoyException(fmt::format(
"Invalid header configuration. Un-terminated backslash in JSON string after '{}'",
absl::StrCat(format.substr(start, pos - start))));
}

// Skip escaped char.
pos++;
} else if (ch == '"') {
state = ParserState::ExpectArrayDelimiterOrEnd;
}
break;

case ParserState::ExpectArgsEnd:
// Search for the closing paren of a %VAR(...)% expression.
if (ch == ')') {
state = ParserState::ExpectVariableEnd;
} else if (!isspace(ch)) {
throw EnvoyException(fmt::format(
"Invalid header configuration. Expecting ')' or whitespace after '{}', but found '{}'",
absl::StrCat(format.substr(start, pos - start)), ch));
}
break;

case ParserState::ExpectVariableEnd:
// Search for closing % of a %VAR(...)% expression
if (ch == '%') {
formatters.emplace_back(
new RequestInfoHeaderFormatter(format.substr(start, pos - start), append));
start = pos + 1;
state = ParserState::Literal;
break;
}

if (!isspace(ch)) {
throw EnvoyException(fmt::format(
"Invalid header configuration. Expecting '%' or whitespace after '{}', but found '{}'",
absl::StrCat(format.substr(start, pos - start)), ch));
}
break;

default:
NOT_REACHED;
}
return HeaderFormatterPtr{
new RequestInfoHeaderFormatter(format.substr(1, last_occ_pos - 1), append)};
} else {
return HeaderFormatterPtr{new PlainHeaderFormatter(format, append)};
} while (++pos < format.size());

if (state != ParserState::Literal) {
// Parsing terminated mid-variable.
throw EnvoyException(
fmt::format("Invalid header configuration. Un-terminated variable expression '{}'",
absl::StrCat(format.substr(start, pos - start))));
}

if (pos > start) {
// Trailing constant data.
absl::string_view literal = format.substr(start, pos - start);
formatters.emplace_back(new PlainHeaderFormatter(unescape(literal), append));
}

ASSERT(formatters.size() > 0);

if (formatters.size() == 1) {
return std::move(formatters[0]);
}

return std::make_unique<CompoundHeaderFormatter>(std::move(formatters), append);
}

} // namespace
Expand Down
3 changes: 1 addition & 2 deletions test/common/router/config_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3397,8 +3397,7 @@ TEST(CustomRequestHeadersTest, CustomHeaderWrongFormat) {
NiceMock<Envoy::RequestInfo::MockRequestInfo> request_info;
EXPECT_THROW_WITH_MESSAGE(
ConfigImpl config(parseRouteConfigurationFromJson(json), runtime, cm, true), EnvoyException,
"Incorrect header configuration. Expected variable format %<variable_name>%, actual format "
"%CLIENT_IP");
"Invalid header configuration. Un-terminated variable expression 'CLIENT_IP'");
}

TEST(MetadataMatchCriteriaImpl, Create) {
Expand Down
Loading