Skip to content

Commit

Permalink
Filter user-tracking parameters from query string (fixes brave/brave-…
Browse files Browse the repository at this point in the history
  • Loading branch information
fmarier committed Aug 22, 2019
1 parent ede1c99 commit 8045692
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 0 deletions.
84 changes: 84 additions & 0 deletions browser/net/brave_site_hacks_network_delegate_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <memory>
#include <string>
#include <vector>

#include "base/sequenced_task_runner.h"
#include "base/strings/string_util.h"
Expand All @@ -29,6 +30,10 @@ namespace brave {

namespace {

const std::vector<std::string> query_string_trackers = {
"fbclid", "gclid", "msclkid", "mc_eid"
};

bool ApplyPotentialReferrerBlock(std::shared_ptr<BraveRequestInfo> ctx) {
DCHECK_CURRENTLY_ON(BrowserThread::IO);
GURL target_origin = ctx->request_url.GetOrigin();
Expand All @@ -50,12 +55,91 @@ bool ApplyPotentialReferrerBlock(std::shared_ptr<BraveRequestInfo> ctx) {
return false;
}

bool ApplyPotentialQueryStringFilter(std::shared_ptr<BraveRequestInfo> ctx) {
DCHECK_CURRENTLY_ON(BrowserThread::IO);
if (!ctx->request_url.has_query()) {
return false;
}

std::string new_query = ctx->request_url.query();

bool modified = false;
for (auto tracker : query_string_trackers) {
size_t key_size, key_start;

// Look for the tracker anywhere in the query string
// (e.g. ?...fbclid=1234...).
{
const std::string key = tracker + "=";
key_size = key.size();
key_start = new_query.find(key, 0);
}
if (key_start == std::string::npos) {
continue;
}

// Check to see if the tracker is preceded by a '&'
// (e.g. ?...&fbclid=1234...).
if (key_start != 0) {
if (new_query[--key_start] != '&') {
// Tracker is substring of another key (e.g. ?...&abc-fbclid=1234...).
continue;
}
key_size++;
}

{
// Find the size of the tracking parameter value.
const size_t value_start = key_start + key_size;
size_t value_end = new_query.find("&", key_start + 1);
if (value_end == std::string::npos) {
value_end = new_query.size(); // Tracker is the last query parameter.
}
if (value_end <= value_start) {
DCHECK_EQ(value_end, value_start);
continue; // Empty value, no need to remove tracker.
}

// Remove tracking parameter and its value from the string.
modified = true;
new_query.erase(key_start, value_end - key_start);
}

// Remove the trailing '&' if we removed the first parameter
// (e.g. ?fbclid=1234&...).
if (!new_query.empty() && key_start == 0) {
DCHECK_EQ(new_query[0], '&');
new_query.erase(key_start, 1);
}

if (new_query.empty()) {
break; // Nothing left to filter.
}
}

if (modified) {
url::Replacements<char> replacements;
if (new_query.empty()) {
replacements.ClearQuery();
} else {
replacements.SetQuery(new_query.c_str(),
url::Component(0, new_query.size()));
}
ctx->new_url_spec =
ctx->request_url.ReplaceComponents(replacements).spec();
return true;
}

return false;
}

} // namespace

int OnBeforeURLRequest_SiteHacksWork(
const ResponseCallback& next_callback,
std::shared_ptr<BraveRequestInfo> ctx) {
ApplyPotentialReferrerBlock(ctx);
ApplyPotentialQueryStringFilter(ctx);
return net::OK;
}

Expand Down
83 changes: 83 additions & 0 deletions browser/net/brave_site_hacks_network_delegate_helper_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "brave/browser/net/url_context.h"
Expand Down Expand Up @@ -267,4 +268,86 @@ TEST_F(BraveSiteHacksNetworkDelegateHelperTest,
});
}

TEST_F(BraveSiteHacksNetworkDelegateHelperTest, QueryStringUntouched) {
std::vector<GURL> urls({
GURL("https://example.com/"),
GURL("https://example.com/?"),
GURL("https://example.com/?+%20"),
GURL("https://user:[email protected]/path/file.html?foo=1#fragment"),
GURL("http://user:[email protected]/path/file.html?foo=1&bar=2#fragment"),
GURL("https://example.com/?file=https%3A%2F%2Fexample.com%2Ftest.pdf"),
GURL("https://example.com/?title=1+2&caption=1%202"),
GURL("https://example.com/?foo=1&&bar=2#fragment"),
GURL("https://example.com/?foo&bar=&#fragment"),
GURL("https://example.com/?foo=1&fbcid=no&gcid=no&mc_cid=no&bar=&#frag"),
GURL("https://example.com/?fbclid=&gclid&=mc_eid&msclkid="),
GURL("https://example.com/?value=fbclid=1&not-gclid=2&foo+mc_eid=3"),
GURL("https://example.com/?+fbclid=1"),
GURL("https://example.com/?%20fbclid=1"),
GURL("https://example.com/#fbclid=1"),
});
std::for_each(urls.begin(), urls.end(), [this](GURL url){
net::TestDelegate test_delegate;
std::unique_ptr<net::URLRequest> request =
context()->CreateRequest(url, net::IDLE, &test_delegate,
TRAFFIC_ANNOTATION_FOR_TESTS);

std::shared_ptr<brave::BraveRequestInfo>
brave_request_info(new brave::BraveRequestInfo());
brave::BraveRequestInfo::FillCTXFromRequest(request.get(),
brave_request_info);
brave::ResponseCallback callback;
int ret = brave::OnBeforeURLRequest_SiteHacksWork(callback,
brave_request_info);
EXPECT_EQ(ret, net::OK);
// new_url should not be set
EXPECT_TRUE(brave_request_info->new_url_spec.empty());
EXPECT_EQ(request->url(), url);
});
}

TEST_F(BraveSiteHacksNetworkDelegateHelperTest, QueryStringFiltered) {
std::vector<std::pair<GURL, GURL> > urls({
// { original url, expected url after filtering }
{ GURL("https://example.com/?fbclid=1234"), GURL("https://example.com/") },
{ GURL("https://example.com/?fbclid=1234&"), GURL("https://example.com/") },
{ GURL("https://example.com/?&fbclid=1234"), GURL("https://example.com/") },
{ GURL("https://example.com/?gclid=1234"), GURL("https://example.com/") },
{ GURL("https://example.com/?fbclid=0&gclid=1&msclkid=a&mc_eid=a1"),
GURL("https://example.com/") },
{ GURL("https://example.com/?fbclid=&foo=1&bar=2&gclid=abc"),
GURL("https://example.com/?fbclid=&foo=1&bar=2") },
{ GURL("https://example.com/?fbclid=&foo=1&gclid=1234&bar=2"),
GURL("https://example.com/?fbclid=&foo=1&bar=2") },
{ GURL("http://u:[email protected]/path/file.html?foo=1&fbclid=abcd#fragment"),
GURL("http://u:[email protected]/path/file.html?foo=1#fragment") },
// Obscure edge cases that break most parsers:
{ GURL("https://example.com/?fbclid&foo&&gclid=2&bar=&%20"),
GURL("https://example.com/?fbclid&foo&&bar=&%20") },
{ GURL("https://example.com/?fbclid=1&1==2&=msclkid&foo=bar&&a=b=c&"),
GURL("https://example.com/?1==2&=msclkid&foo=bar&&a=b=c&") },
{ GURL("https://example.com/?fbclid=1&=2&?foo=yes&bar=2+"),
GURL("https://example.com/?=2&?foo=yes&bar=2+") },
{ GURL("https://example.com/?fbclid=1&a+b+c=some%20thing&1%202=3+4"),
GURL("https://example.com/?a+b+c=some%20thing&1%202=3+4") },
});
std::for_each(urls.begin(), urls.end(), [this](std::pair<GURL, GURL> url){
net::TestDelegate test_delegate;
std::unique_ptr<net::URLRequest> request =
context()->CreateRequest(url.first, net::IDLE, &test_delegate,
TRAFFIC_ANNOTATION_FOR_TESTS);

std::shared_ptr<brave::BraveRequestInfo>
brave_request_info(new brave::BraveRequestInfo());
brave::BraveRequestInfo::FillCTXFromRequest(request.get(),
brave_request_info);
brave::ResponseCallback callback;
int ret = brave::OnBeforeURLRequest_SiteHacksWork(callback,
brave_request_info);
EXPECT_EQ(ret, net::OK);
EXPECT_STREQ(brave_request_info->new_url_spec.c_str(),
url.second.spec().c_str());
});
}

} // namespace

0 comments on commit 8045692

Please sign in to comment.