From df10da12940bb7816a1e15593983f86877bedc43 Mon Sep 17 00:00:00 2001 From: nctl144 Date: Fri, 13 Jul 2018 12:01:47 -0500 Subject: [PATCH] rm test files --- .gitignore | 2 + gurl_unittest.cc | 878 --------- ipc/url_param_traits_unittest.cc | 72 - mojom/url_gurl_mojom_traits_unittest.cc | 93 - origin_unittest.cc | 379 ---- scheme_host_port_unittest.cc | 285 --- url_canon_icu_unittest.cc | 162 -- url_canon_unittest.cc | 2402 ----------------------- url_parse_unittest.cc | 690 ------- url_util_unittest.cc | 502 ----- 10 files changed, 2 insertions(+), 5463 deletions(-) create mode 100644 .gitignore delete mode 100644 gurl_unittest.cc delete mode 100644 ipc/url_param_traits_unittest.cc delete mode 100644 mojom/url_gurl_mojom_traits_unittest.cc delete mode 100644 origin_unittest.cc delete mode 100644 scheme_host_port_unittest.cc delete mode 100644 url_canon_icu_unittest.cc delete mode 100644 url_canon_unittest.cc delete mode 100644 url_parse_unittest.cc delete mode 100644 url_util_unittest.cc diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c50f4a9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +**/*_unittest.cc +*_unittest.cc diff --git a/gurl_unittest.cc b/gurl_unittest.cc deleted file mode 100644 index eefa736..0000000 --- a/gurl_unittest.cc +++ /dev/null @@ -1,878 +0,0 @@ -// Copyright 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include - -#include "base/macros.h" -#include "base/strings/utf_string_conversions.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "url/gurl.h" -#include "url/url_canon.h" -#include "url/url_test_utils.h" - -namespace url { - -namespace { - -template -void SetupReplacement( - void (Replacements::*func)(const CHAR*, const Component&), - Replacements* replacements, - const CHAR* str) { - if (str) { - Component comp; - if (str[0]) - comp.len = static_cast(strlen(str)); - (replacements->*func)(str, comp); - } -} - -// Returns the canonicalized string for the given URL string for the -// GURLTest.Types test. -std::string TypesTestCase(const char* src) { - GURL gurl(src); - return gurl.possibly_invalid_spec(); -} - -} // namespace - -// Different types of URLs should be handled differently, and handed off to -// different canonicalizers. -TEST(GURLTest, Types) { - // URLs with unknown schemes should be treated as path URLs, even when they - // have things like "://". - EXPECT_EQ("something:///HOSTNAME.com/", - TypesTestCase("something:///HOSTNAME.com/")); - - // Conversely, URLs with known schemes should always trigger standard URL - // handling. - EXPECT_EQ("http://hostname.com/", TypesTestCase("http:HOSTNAME.com")); - EXPECT_EQ("http://hostname.com/", TypesTestCase("http:/HOSTNAME.com")); - EXPECT_EQ("http://hostname.com/", TypesTestCase("http://HOSTNAME.com")); - EXPECT_EQ("http://hostname.com/", TypesTestCase("http:///HOSTNAME.com")); - -#ifdef WIN32 - // URLs that look like Windows absolute path specs. - EXPECT_EQ("file:///C:/foo.txt", TypesTestCase("c:\\foo.txt")); - EXPECT_EQ("file:///Z:/foo.txt", TypesTestCase("Z|foo.txt")); - EXPECT_EQ("file://server/foo.txt", TypesTestCase("\\\\server\\foo.txt")); - EXPECT_EQ("file://server/foo.txt", TypesTestCase("//server/foo.txt")); -#endif -} - -// Test the basic creation and querying of components in a GURL. We assume that -// the parser is already tested and works, so we are mostly interested if the -// object does the right thing with the results. -TEST(GURLTest, Components) { - GURL empty_url(base::UTF8ToUTF16("")); - EXPECT_TRUE(empty_url.is_empty()); - EXPECT_FALSE(empty_url.is_valid()); - - GURL url(base::UTF8ToUTF16("http://user:pass@google.com:99/foo;bar?q=a#ref")); - EXPECT_FALSE(url.is_empty()); - EXPECT_TRUE(url.is_valid()); - EXPECT_TRUE(url.SchemeIs("http")); - EXPECT_FALSE(url.SchemeIsFile()); - - // This is the narrow version of the URL, which should match the wide input. - EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url.spec()); - - EXPECT_EQ("http", url.scheme()); - EXPECT_EQ("user", url.username()); - EXPECT_EQ("pass", url.password()); - EXPECT_EQ("google.com", url.host()); - EXPECT_EQ("99", url.port()); - EXPECT_EQ(99, url.IntPort()); - EXPECT_EQ("/foo;bar", url.path()); - EXPECT_EQ("q=a", url.query()); - EXPECT_EQ("ref", url.ref()); - - // Test parsing userinfo with special characters. - GURL url_special_pass("http://user:%40!$&'()*+,;=:@google.com:12345"); - EXPECT_TRUE(url_special_pass.is_valid()); - // GURL canonicalizes some delimiters. - EXPECT_EQ("%40!$&%27()*+,%3B%3D%3A", url_special_pass.password()); - EXPECT_EQ("google.com", url_special_pass.host()); - EXPECT_EQ("12345", url_special_pass.port()); -} - -TEST(GURLTest, Empty) { - GURL url; - EXPECT_FALSE(url.is_valid()); - EXPECT_EQ("", url.spec()); - - EXPECT_EQ("", url.scheme()); - EXPECT_EQ("", url.username()); - EXPECT_EQ("", url.password()); - EXPECT_EQ("", url.host()); - EXPECT_EQ("", url.port()); - EXPECT_EQ(PORT_UNSPECIFIED, url.IntPort()); - EXPECT_EQ("", url.path()); - EXPECT_EQ("", url.query()); - EXPECT_EQ("", url.ref()); -} - -TEST(GURLTest, Copy) { - GURL url(base::UTF8ToUTF16( - "http://user:pass@google.com:99/foo;bar?q=a#ref")); - - GURL url2(url); - EXPECT_TRUE(url2.is_valid()); - - EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec()); - EXPECT_EQ("http", url2.scheme()); - EXPECT_EQ("user", url2.username()); - EXPECT_EQ("pass", url2.password()); - EXPECT_EQ("google.com", url2.host()); - EXPECT_EQ("99", url2.port()); - EXPECT_EQ(99, url2.IntPort()); - EXPECT_EQ("/foo;bar", url2.path()); - EXPECT_EQ("q=a", url2.query()); - EXPECT_EQ("ref", url2.ref()); - - // Copying of invalid URL should be invalid - GURL invalid; - GURL invalid2(invalid); - EXPECT_FALSE(invalid2.is_valid()); - EXPECT_EQ("", invalid2.spec()); - EXPECT_EQ("", invalid2.scheme()); - EXPECT_EQ("", invalid2.username()); - EXPECT_EQ("", invalid2.password()); - EXPECT_EQ("", invalid2.host()); - EXPECT_EQ("", invalid2.port()); - EXPECT_EQ(PORT_UNSPECIFIED, invalid2.IntPort()); - EXPECT_EQ("", invalid2.path()); - EXPECT_EQ("", invalid2.query()); - EXPECT_EQ("", invalid2.ref()); -} - -TEST(GURLTest, Assign) { - GURL url(base::UTF8ToUTF16( - "http://user:pass@google.com:99/foo;bar?q=a#ref")); - - GURL url2; - url2 = url; - EXPECT_TRUE(url2.is_valid()); - - EXPECT_EQ("http://user:pass@google.com:99/foo;bar?q=a#ref", url2.spec()); - EXPECT_EQ("http", url2.scheme()); - EXPECT_EQ("user", url2.username()); - EXPECT_EQ("pass", url2.password()); - EXPECT_EQ("google.com", url2.host()); - EXPECT_EQ("99", url2.port()); - EXPECT_EQ(99, url2.IntPort()); - EXPECT_EQ("/foo;bar", url2.path()); - EXPECT_EQ("q=a", url2.query()); - EXPECT_EQ("ref", url2.ref()); - - // Assignment of invalid URL should be invalid - GURL invalid; - GURL invalid2; - invalid2 = invalid; - EXPECT_FALSE(invalid2.is_valid()); - EXPECT_EQ("", invalid2.spec()); - EXPECT_EQ("", invalid2.scheme()); - EXPECT_EQ("", invalid2.username()); - EXPECT_EQ("", invalid2.password()); - EXPECT_EQ("", invalid2.host()); - EXPECT_EQ("", invalid2.port()); - EXPECT_EQ(PORT_UNSPECIFIED, invalid2.IntPort()); - EXPECT_EQ("", invalid2.path()); - EXPECT_EQ("", invalid2.query()); - EXPECT_EQ("", invalid2.ref()); -} - -// This is a regression test for http://crbug.com/309975. -TEST(GURLTest, SelfAssign) { - GURL a("filesystem:http://example.com/temporary/"); - // This should not crash. - a = *&a; // The *& defeats Clang's -Wself-assign warning. -} - -TEST(GURLTest, CopyFileSystem) { - GURL url(base::UTF8ToUTF16( - "filesystem:https://user:pass@google.com:99/t/foo;bar?q=a#ref")); - - GURL url2(url); - EXPECT_TRUE(url2.is_valid()); - - EXPECT_EQ("filesystem:https://google.com:99/t/foo;bar?q=a#ref", url2.spec()); - EXPECT_EQ("filesystem", url2.scheme()); - EXPECT_EQ("", url2.username()); - EXPECT_EQ("", url2.password()); - EXPECT_EQ("", url2.host()); - EXPECT_EQ("", url2.port()); - EXPECT_EQ(PORT_UNSPECIFIED, url2.IntPort()); - EXPECT_EQ("/foo;bar", url2.path()); - EXPECT_EQ("q=a", url2.query()); - EXPECT_EQ("ref", url2.ref()); - - const GURL* inner = url2.inner_url(); - ASSERT_TRUE(inner); - EXPECT_EQ("https", inner->scheme()); - EXPECT_EQ("", inner->username()); - EXPECT_EQ("", inner->password()); - EXPECT_EQ("google.com", inner->host()); - EXPECT_EQ("99", inner->port()); - EXPECT_EQ(99, inner->IntPort()); - EXPECT_EQ("/t", inner->path()); - EXPECT_EQ("", inner->query()); - EXPECT_EQ("", inner->ref()); -} - -TEST(GURLTest, IsValid) { - const char* valid_cases[] = { - "http://google.com", - "unknown://google.com", - "http://user:pass@google.com", - "http://google.com:12345", - "http://google.com/path", - "http://google.com//path", - "http://google.com?k=v#fragment", - "http://user:pass@google.com:12345/path?k=v#fragment", - "http:/path", - "http:path", - }; - for (size_t i = 0; i < arraysize(valid_cases); i++) { - EXPECT_TRUE(GURL(valid_cases[i]).is_valid()) - << "Case: " << valid_cases[i]; - } - - const char* invalid_cases[] = { - "http://?k=v", - "http:://google.com", - "http//google.com", - "http://google.com:12three45", - "://google.com", - "path", - }; - for (size_t i = 0; i < arraysize(invalid_cases); i++) { - EXPECT_FALSE(GURL(invalid_cases[i]).is_valid()) - << "Case: " << invalid_cases[i]; - } -} - -TEST(GURLTest, ExtraSlashesBeforeAuthority) { - // According to RFC3986, the hierarchical part for URI with an authority - // must use only two slashes; GURL intentionally just ignores extra slashes - // if there are more than 2, and parses the following part as an authority. - GURL url("http:///host"); - EXPECT_EQ("host", url.host()); - EXPECT_EQ("/", url.path()); -} - -// Given an invalid URL, we should still get most of the components. -TEST(GURLTest, ComponentGettersWorkEvenForInvalidURL) { - GURL url("http:google.com:foo"); - EXPECT_FALSE(url.is_valid()); - EXPECT_EQ("http://google.com:foo/", url.possibly_invalid_spec()); - - EXPECT_EQ("http", url.scheme()); - EXPECT_EQ("", url.username()); - EXPECT_EQ("", url.password()); - EXPECT_EQ("google.com", url.host()); - EXPECT_EQ("foo", url.port()); - EXPECT_EQ(PORT_INVALID, url.IntPort()); - EXPECT_EQ("/", url.path()); - EXPECT_EQ("", url.query()); - EXPECT_EQ("", url.ref()); -} - -TEST(GURLTest, Resolve) { - // The tricky cases for relative URL resolving are tested in the - // canonicalizer unit test. Here, we just test that the GURL integration - // works properly. - struct ResolveCase { - const char* base; - const char* relative; - bool expected_valid; - const char* expected; - } resolve_cases[] = { - {"http://www.google.com/", "foo.html", true, "http://www.google.com/foo.html"}, - {"http://www.google.com/foo/", "bar", true, "http://www.google.com/foo/bar"}, - {"http://www.google.com/foo/", "/bar", true, "http://www.google.com/bar"}, - {"http://www.google.com/foo", "bar", true, "http://www.google.com/bar"}, - {"http://www.google.com/", "http://images.google.com/foo.html", true, "http://images.google.com/foo.html"}, - {"http://www.google.com/", "http://images.\tgoogle.\ncom/\rfoo.html", true, "http://images.google.com/foo.html"}, - {"http://www.google.com/blah/bloo?c#d", "../../../hello/./world.html?a#b", true, "http://www.google.com/hello/world.html?a#b"}, - {"http://www.google.com/foo#bar", "#com", true, "http://www.google.com/foo#com"}, - {"http://www.google.com/", "Https:images.google.com", true, "https://images.google.com/"}, - // A non-standard base can be replaced with a standard absolute URL. - {"data:blahblah", "http://google.com/", true, "http://google.com/"}, - {"data:blahblah", "http:google.com", true, "http://google.com/"}, - // Filesystem URLs have different paths to test. - {"filesystem:http://www.google.com/type/", "foo.html", true, "filesystem:http://www.google.com/type/foo.html"}, - {"filesystem:http://www.google.com/type/", "../foo.html", true, "filesystem:http://www.google.com/type/foo.html"}, - }; - - for (size_t i = 0; i < arraysize(resolve_cases); i++) { - // 8-bit code path. - GURL input(resolve_cases[i].base); - GURL output = input.Resolve(resolve_cases[i].relative); - EXPECT_EQ(resolve_cases[i].expected_valid, output.is_valid()) << i; - EXPECT_EQ(resolve_cases[i].expected, output.spec()) << i; - EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL); - - // Wide code path. - GURL inputw(base::UTF8ToUTF16(resolve_cases[i].base)); - GURL outputw = - input.Resolve(base::UTF8ToUTF16(resolve_cases[i].relative)); - EXPECT_EQ(resolve_cases[i].expected_valid, outputw.is_valid()) << i; - EXPECT_EQ(resolve_cases[i].expected, outputw.spec()) << i; - EXPECT_EQ(outputw.SchemeIsFileSystem(), outputw.inner_url() != NULL); - } -} - -TEST(GURLTest, GetOrigin) { - struct TestCase { - const char* input; - const char* expected; - } cases[] = { - {"http://www.google.com", "http://www.google.com/"}, - {"javascript:window.alert(\"hello,world\");", ""}, - {"http://user:pass@www.google.com:21/blah#baz", "http://www.google.com:21/"}, - {"http://user@www.google.com", "http://www.google.com/"}, - {"http://:pass@www.google.com", "http://www.google.com/"}, - {"http://:@www.google.com", "http://www.google.com/"}, - {"filesystem:http://www.google.com/temp/foo?q#b", "http://www.google.com/"}, - {"filesystem:http://user:pass@google.com:21/blah#baz", "http://google.com:21/"}, - }; - for (size_t i = 0; i < arraysize(cases); i++) { - GURL url(cases[i].input); - GURL origin = url.GetOrigin(); - EXPECT_EQ(cases[i].expected, origin.spec()); - } -} - -TEST(GURLTest, GetAsReferrer) { - struct TestCase { - const char* input; - const char* expected; - } cases[] = { - {"http://www.google.com", "http://www.google.com/"}, - {"http://user:pass@www.google.com:21/blah#baz", "http://www.google.com:21/blah"}, - {"http://user@www.google.com", "http://www.google.com/"}, - {"http://:pass@www.google.com", "http://www.google.com/"}, - {"http://:@www.google.com", "http://www.google.com/"}, - {"http://www.google.com/temp/foo?q#b", "http://www.google.com/temp/foo?q"}, - {"not a url", ""}, - {"unknown-scheme://foo.html", ""}, - {"file:///tmp/test.html", ""}, - {"https://www.google.com", "https://www.google.com/"}, - }; - for (size_t i = 0; i < arraysize(cases); i++) { - GURL url(cases[i].input); - GURL origin = url.GetAsReferrer(); - EXPECT_EQ(cases[i].expected, origin.spec()); - } -} - -TEST(GURLTest, GetWithEmptyPath) { - struct TestCase { - const char* input; - const char* expected; - } cases[] = { - {"http://www.google.com", "http://www.google.com/"}, - {"javascript:window.alert(\"hello, world\");", ""}, - {"http://www.google.com/foo/bar.html?baz=22", "http://www.google.com/"}, - {"filesystem:http://www.google.com/temporary/bar.html?baz=22", "filesystem:http://www.google.com/temporary/"}, - {"filesystem:file:///temporary/bar.html?baz=22", "filesystem:file:///temporary/"}, - }; - - for (size_t i = 0; i < arraysize(cases); i++) { - GURL url(cases[i].input); - GURL empty_path = url.GetWithEmptyPath(); - EXPECT_EQ(cases[i].expected, empty_path.spec()); - } -} - -TEST(GURLTest, GetWithoutFilename) { - struct TestCase { - const char* input; - const char* expected; - } cases[] = { - // Common Standard URLs. - {"https://www.google.com", "https://www.google.com/"}, - {"https://www.google.com/", "https://www.google.com/"}, - {"https://www.google.com/maps.htm", "https://www.google.com/"}, - {"https://www.google.com/maps/", "https://www.google.com/maps/"}, - {"https://www.google.com/index.html", "https://www.google.com/"}, - {"https://www.google.com/index.html?q=maps", "https://www.google.com/"}, - {"https://www.google.com/index.html#maps/", "https://www.google.com/"}, - {"https://foo:bar@www.google.com/maps.htm", "https://foo:bar@www.google.com/"}, - {"https://www.google.com/maps/au/index.html", "https://www.google.com/maps/au/"}, - {"https://www.google.com/maps/au/north", "https://www.google.com/maps/au/"}, - {"https://www.google.com/maps/au/north/", "https://www.google.com/maps/au/north/"}, - {"https://www.google.com/maps/au/index.html?q=maps#fragment/", "https://www.google.com/maps/au/"}, - {"http://www.google.com:8000/maps/au/index.html?q=maps#fragment/", "http://www.google.com:8000/maps/au/"}, - {"https://www.google.com/maps/au/north/?q=maps#fragment", "https://www.google.com/maps/au/north/"}, - {"https://www.google.com/maps/au/north?q=maps#fragment", "https://www.google.com/maps/au/"}, - // Less common standard URLs. - {"filesystem:http://www.google.com/temporary/bar.html?baz=22", "filesystem:http://www.google.com/temporary/"}, - {"file:///temporary/bar.html?baz=22","file:///temporary/"}, - {"ftp://foo/test/index.html", "ftp://foo/test/"}, - {"gopher://foo/test/index.html", "gopher://foo/test/"}, - {"ws://foo/test/index.html", "ws://foo/test/"}, - // Non-standard, hierarchical URLs. - {"chrome://foo/bar.html", "chrome://foo/"}, - {"httpa://foo/test/index.html", "httpa://foo/test/"}, - // Non-standard, non-hierarchical URLs. - {"blob:https://foo.bar/test/index.html", ""}, - {"about:blank", ""}, - {"data:foobar", ""}, - {"scheme:opaque_data", ""}, - // Invalid URLs. - {"foobar", ""}, - }; - - for (size_t i = 0; i < arraysize(cases); i++) { - GURL url(cases[i].input); - GURL without_filename = url.GetWithoutFilename(); - EXPECT_EQ(cases[i].expected, without_filename.spec()) << i; - } -} - -TEST(GURLTest, Replacements) { - // The URL canonicalizer replacement test will handle most of these case. - // The most important thing to do here is to check that the proper - // canonicalizer gets called based on the scheme of the input. - struct ReplaceCase { - const char* base; - const char* scheme; - const char* username; - const char* password; - const char* host; - const char* port; - const char* path; - const char* query; - const char* ref; - const char* expected; - } replace_cases[] = { - {"http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, NULL, NULL, - NULL, "/", "", "", "http://www.google.com/"}, - {"http://www.google.com/foo/bar.html?foo#bar", "javascript", "", "", "", - "", "window.open('foo');", "", "", "javascript:window.open('foo');"}, - {"file:///C:/foo/bar.txt", "http", NULL, NULL, "www.google.com", "99", - "/foo", "search", "ref", "http://www.google.com:99/foo?search#ref"}, -#ifdef WIN32 - {"http://www.google.com/foo/bar.html?foo#bar", "file", "", "", "", "", - "c:\\", "", "", "file:///C:/"}, -#endif - {"filesystem:http://www.google.com/foo/bar.html?foo#bar", NULL, NULL, - NULL, NULL, NULL, "/", "", "", "filesystem:http://www.google.com/foo/"}, - // Lengthen the URL instead of shortening it, to test creation of - // inner_url. - {"filesystem:http://www.google.com/foo/", NULL, NULL, NULL, NULL, NULL, - "bar.html", "foo", "bar", - "filesystem:http://www.google.com/foo/bar.html?foo#bar"}, - }; - - for (size_t i = 0; i < arraysize(replace_cases); i++) { - const ReplaceCase& cur = replace_cases[i]; - GURL url(cur.base); - GURL::Replacements repl; - SetupReplacement(&GURL::Replacements::SetScheme, &repl, cur.scheme); - SetupReplacement(&GURL::Replacements::SetUsername, &repl, cur.username); - SetupReplacement(&GURL::Replacements::SetPassword, &repl, cur.password); - SetupReplacement(&GURL::Replacements::SetHost, &repl, cur.host); - SetupReplacement(&GURL::Replacements::SetPort, &repl, cur.port); - SetupReplacement(&GURL::Replacements::SetPath, &repl, cur.path); - SetupReplacement(&GURL::Replacements::SetQuery, &repl, cur.query); - SetupReplacement(&GURL::Replacements::SetRef, &repl, cur.ref); - GURL output = url.ReplaceComponents(repl); - - EXPECT_EQ(replace_cases[i].expected, output.spec()); - - EXPECT_EQ(output.SchemeIsFileSystem(), output.inner_url() != NULL); - if (output.SchemeIsFileSystem()) { - // TODO(mmenke): inner_url()->spec() is currently the same as the spec() - // for the GURL itself. This should be fixed. - // See https://crbug.com/619596 - EXPECT_EQ(replace_cases[i].expected, output.inner_url()->spec()); - } - } -} - -TEST(GURLTest, ClearFragmentOnDataUrl) { - // http://crbug.com/291747 - a data URL may legitimately have trailing - // whitespace in the spec after the ref is cleared. Test this does not trigger - // the Parsed importing validation DCHECK in GURL. - GURL url(" data: one ? two # three "); - - // By default the trailing whitespace will have been stripped. - EXPECT_EQ("data: one ? two # three", url.spec()); - GURL::Replacements repl; - repl.ClearRef(); - GURL url_no_ref = url.ReplaceComponents(repl); - - EXPECT_EQ("data: one ? two ", url_no_ref.spec()); - - // Importing a parsed URL via this constructor overload will retain trailing - // whitespace. - GURL import_url(url_no_ref.spec(), - url_no_ref.parsed_for_possibly_invalid_spec(), - url_no_ref.is_valid()); - EXPECT_EQ(url_no_ref, import_url); - EXPECT_EQ(import_url.query(), " two "); -} - -TEST(GURLTest, PathForRequest) { - struct TestCase { - const char* input; - const char* expected; - const char* inner_expected; - } cases[] = { - {"http://www.google.com", "/", NULL}, - {"http://www.google.com/", "/", NULL}, - {"http://www.google.com/foo/bar.html?baz=22", "/foo/bar.html?baz=22", NULL}, - {"http://www.google.com/foo/bar.html#ref", "/foo/bar.html", NULL}, - {"http://www.google.com/foo/bar.html?query#ref", "/foo/bar.html?query", NULL}, - {"filesystem:http://www.google.com/temporary/foo/bar.html?query#ref", "/foo/bar.html?query", "/temporary"}, - {"filesystem:http://www.google.com/temporary/foo/bar.html?query", "/foo/bar.html?query", "/temporary"}, - }; - - for (size_t i = 0; i < arraysize(cases); i++) { - GURL url(cases[i].input); - std::string path_request = url.PathForRequest(); - EXPECT_EQ(cases[i].expected, path_request); - EXPECT_EQ(cases[i].inner_expected == NULL, url.inner_url() == NULL); - if (url.inner_url() && cases[i].inner_expected) - EXPECT_EQ(cases[i].inner_expected, url.inner_url()->PathForRequest()); - } -} - -TEST(GURLTest, EffectiveIntPort) { - struct PortTest { - const char* spec; - int expected_int_port; - } port_tests[] = { - // http - {"http://www.google.com/", 80}, - {"http://www.google.com:80/", 80}, - {"http://www.google.com:443/", 443}, - - // https - {"https://www.google.com/", 443}, - {"https://www.google.com:443/", 443}, - {"https://www.google.com:80/", 80}, - - // ftp - {"ftp://www.google.com/", 21}, - {"ftp://www.google.com:21/", 21}, - {"ftp://www.google.com:80/", 80}, - - // gopher - {"gopher://www.google.com/", 70}, - {"gopher://www.google.com:70/", 70}, - {"gopher://www.google.com:80/", 80}, - - // file - no port - {"file://www.google.com/", PORT_UNSPECIFIED}, - {"file://www.google.com:443/", PORT_UNSPECIFIED}, - - // data - no port - {"data:www.google.com:90", PORT_UNSPECIFIED}, - {"data:www.google.com", PORT_UNSPECIFIED}, - - // filesystem - no port - {"filesystem:http://www.google.com:90/t/foo", PORT_UNSPECIFIED}, - {"filesystem:file:///t/foo", PORT_UNSPECIFIED}, - }; - - for (size_t i = 0; i < arraysize(port_tests); i++) { - GURL url(port_tests[i].spec); - EXPECT_EQ(port_tests[i].expected_int_port, url.EffectiveIntPort()); - } -} - -TEST(GURLTest, IPAddress) { - struct IPTest { - const char* spec; - bool expected_ip; - } ip_tests[] = { - {"http://www.google.com/", false}, - {"http://192.168.9.1/", true}, - {"http://192.168.9.1.2/", false}, - {"http://192.168.m.1/", false}, - {"http://2001:db8::1/", false}, - {"http://[2001:db8::1]/", true}, - {"", false}, - {"some random input!", false}, - }; - - for (size_t i = 0; i < arraysize(ip_tests); i++) { - GURL url(ip_tests[i].spec); - EXPECT_EQ(ip_tests[i].expected_ip, url.HostIsIPAddress()); - } -} - -TEST(GURLTest, HostNoBrackets) { - struct TestCase { - const char* input; - const char* expected_host; - const char* expected_plainhost; - } cases[] = { - {"http://www.google.com", "www.google.com", "www.google.com"}, - {"http://[2001:db8::1]/", "[2001:db8::1]", "2001:db8::1"}, - {"http://[::]/", "[::]", "::"}, - - // Don't require a valid URL, but don't crash either. - {"http://[]/", "[]", ""}, - {"http://[x]/", "[x]", "x"}, - {"http://[x/", "[x", "[x"}, - {"http://x]/", "x]", "x]"}, - {"http://[/", "[", "["}, - {"http://]/", "]", "]"}, - {"", "", ""}, - }; - for (size_t i = 0; i < arraysize(cases); i++) { - GURL url(cases[i].input); - EXPECT_EQ(cases[i].expected_host, url.host()); - EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBrackets()); - EXPECT_EQ(cases[i].expected_plainhost, url.HostNoBracketsPiece()); - } -} - -TEST(GURLTest, DomainIs) { - GURL url_1("http://google.com/foo"); - EXPECT_TRUE(url_1.DomainIs("google.com")); - - // Subdomain and port are ignored. - GURL url_2("http://www.google.com:99/foo"); - EXPECT_TRUE(url_2.DomainIs("google.com")); - - // Different top-level domain. - GURL url_3("http://www.google.com.cn/foo"); - EXPECT_FALSE(url_3.DomainIs("google.com")); - - // Different host name. - GURL url_4("http://www.iamnotgoogle.com/foo"); - EXPECT_FALSE(url_4.DomainIs("google.com")); - - // The input must be lower-cased otherwise DomainIs returns false. - GURL url_5("http://www.google.com/foo"); - EXPECT_FALSE(url_5.DomainIs("Google.com")); - - // If the URL is invalid, DomainIs returns false. - GURL invalid_url("google.com"); - EXPECT_FALSE(invalid_url.is_valid()); - EXPECT_FALSE(invalid_url.DomainIs("google.com")); - - GURL url_with_escape_chars("https://www.,.test"); - EXPECT_TRUE(url_with_escape_chars.is_valid()); - EXPECT_EQ(url_with_escape_chars.host(), "www.%2C.test"); - EXPECT_TRUE(url_with_escape_chars.DomainIs("%2C.test")); -} - -TEST(GURLTest, DomainIsTerminatingDotBehavior) { - // If the host part ends with a dot, it matches input domains - // with or without a dot. - GURL url_with_dot("http://www.google.com./foo"); - EXPECT_TRUE(url_with_dot.DomainIs("google.com")); - EXPECT_TRUE(url_with_dot.DomainIs("google.com.")); - EXPECT_TRUE(url_with_dot.DomainIs(".com")); - EXPECT_TRUE(url_with_dot.DomainIs(".com.")); - - // But, if the host name doesn't end with a dot and the input - // domain does, then it's considered to not match. - GURL url_without_dot("http://google.com/foo"); - EXPECT_FALSE(url_without_dot.DomainIs("google.com.")); - - // If the URL ends with two dots, it doesn't match. - GURL url_with_two_dots("http://www.google.com../foo"); - EXPECT_FALSE(url_with_two_dots.DomainIs("google.com")); -} - -TEST(GURLTest, DomainIsWithFilesystemScheme) { - GURL url_1("filesystem:http://www.google.com:99/foo/"); - EXPECT_TRUE(url_1.DomainIs("google.com")); - - GURL url_2("filesystem:http://www.iamnotgoogle.com/foo/"); - EXPECT_FALSE(url_2.DomainIs("google.com")); -} - -// Newlines should be stripped from inputs. -TEST(GURLTest, Newlines) { - // Constructor. - GURL url_1(" \t ht\ntp://\twww.goo\rgle.com/as\ndf \n "); - EXPECT_EQ("http://www.google.com/asdf", url_1.spec()); - EXPECT_FALSE( - url_1.parsed_for_possibly_invalid_spec().potentially_dangling_markup); - - // Relative path resolver. - GURL url_2 = url_1.Resolve(" \n /fo\to\r "); - EXPECT_EQ("http://www.google.com/foo", url_2.spec()); - EXPECT_FALSE( - url_2.parsed_for_possibly_invalid_spec().potentially_dangling_markup); - - // Constructor. - GURL url_3(" \t ht\ntp://\twww.goo\rgle.com/as\ndf< \n "); - EXPECT_EQ("http://www.google.com/asdf%3C", url_3.spec()); - EXPECT_TRUE( - url_3.parsed_for_possibly_invalid_spec().potentially_dangling_markup); - - // Relative path resolver. - GURL url_4 = url_1.Resolve(" \n /fo\to<\r "); - EXPECT_EQ("http://www.google.com/foo%3C", url_4.spec()); - EXPECT_TRUE( - url_4.parsed_for_possibly_invalid_spec().potentially_dangling_markup); - - // Note that newlines are NOT stripped from ReplaceComponents. -} - -TEST(GURLTest, IsStandard) { - GURL a("http:foo/bar"); - EXPECT_TRUE(a.IsStandard()); - - GURL b("foo:bar/baz"); - EXPECT_FALSE(b.IsStandard()); - - GURL c("foo://bar/baz"); - EXPECT_FALSE(c.IsStandard()); - - GURL d("cid:bar@baz"); - EXPECT_FALSE(d.IsStandard()); -} - -TEST(GURLTest, SchemeIsHTTPOrHTTPS) { - EXPECT_TRUE(GURL("http://bar/").SchemeIsHTTPOrHTTPS()); - EXPECT_TRUE(GURL("HTTPS://BAR").SchemeIsHTTPOrHTTPS()); - EXPECT_FALSE(GURL("ftp://bar/").SchemeIsHTTPOrHTTPS()); -} - -TEST(GURLTest, SchemeIsWSOrWSS) { - EXPECT_TRUE(GURL("WS://BAR/").SchemeIsWSOrWSS()); - EXPECT_TRUE(GURL("wss://bar/").SchemeIsWSOrWSS()); - EXPECT_FALSE(GURL("http://bar/").SchemeIsWSOrWSS()); -} - -TEST(GURLTest, SchemeIsCryptographic) { - EXPECT_TRUE(GURL("https://foo.bar.com/").SchemeIsCryptographic()); - EXPECT_TRUE(GURL("HTTPS://foo.bar.com/").SchemeIsCryptographic()); - EXPECT_TRUE(GURL("HtTpS://foo.bar.com/").SchemeIsCryptographic()); - - EXPECT_TRUE(GURL("wss://foo.bar.com/").SchemeIsCryptographic()); - EXPECT_TRUE(GURL("WSS://foo.bar.com/").SchemeIsCryptographic()); - EXPECT_TRUE(GURL("WsS://foo.bar.com/").SchemeIsCryptographic()); - - EXPECT_FALSE(GURL("http://foo.bar.com/").SchemeIsCryptographic()); - EXPECT_FALSE(GURL("ws://foo.bar.com/").SchemeIsCryptographic()); -} - -TEST(GURLTest, SchemeIsBlob) { - EXPECT_TRUE(GURL("BLOB://BAR/").SchemeIsBlob()); - EXPECT_TRUE(GURL("blob://bar/").SchemeIsBlob()); - EXPECT_FALSE(GURL("http://bar/").SchemeIsBlob()); -} - -TEST(GURLTest, ContentAndPathForNonStandardURLs) { - struct TestCase { - const char* url; - const char* expected; - } cases[] = { - {"null", ""}, - {"not-a-standard-scheme:this is arbitrary content", - "this is arbitrary content"}, - {"view-source:http://example.com/path", "http://example.com/path"}, - {"blob:http://example.com/GUID", "http://example.com/GUID"}, - {"blob://http://example.com/GUID", "//http://example.com/GUID"}, - {"blob:http://user:password@example.com/GUID", - "http://user:password@example.com/GUID"}, - - // TODO(mkwst): This seems like a bug. https://crbug.com/513600 - {"filesystem:http://example.com/path", "/"}, - }; - - for (const auto& test : cases) { - GURL url(test.url); - EXPECT_EQ(test.expected, url.path()) << test.url; - EXPECT_EQ(test.expected, url.GetContent()) << test.url; - } -} - -TEST(GURLTest, IsAboutBlank) { - const std::string kAboutBlankUrls[] = {"about:blank", "about:blank?foo", - "about:blank/#foo", - "about:blank?foo#foo"}; - for (const auto& url : kAboutBlankUrls) - EXPECT_TRUE(GURL(url).IsAboutBlank()) << url; - - const std::string kNotAboutBlankUrls[] = { - "http:blank", "about:blan", "about://blank", - "about:blank/foo", "about://:8000/blank", "about://foo:foo@/blank", - "foo@about:blank", "foo:bar@about:blank", "about:blank:8000"}; - for (const auto& url : kNotAboutBlankUrls) - EXPECT_FALSE(GURL(url).IsAboutBlank()) << url; -} - -TEST(GURLTest, EqualsIgnoringRef) { - const struct { - const char* url_a; - const char* url_b; - bool are_equals; - } kTestCases[] = { - // No ref. - {"http://a.com", "http://a.com", true}, - {"http://a.com", "http://b.com", false}, - - // Same Ref. - {"http://a.com#foo", "http://a.com#foo", true}, - {"http://a.com#foo", "http://b.com#foo", false}, - - // Different Refs. - {"http://a.com#foo", "http://a.com#bar", true}, - {"http://a.com#foo", "http://b.com#bar", false}, - - // One has a ref, the other doesn't. - {"http://a.com#foo", "http://a.com", true}, - {"http://a.com#foo", "http://b.com", false}, - - // Empty refs. - {"http://a.com#", "http://a.com#", true}, - {"http://a.com#", "http://a.com", true}, - - // URLs that differ only by their last character. - {"http://aaa", "http://aab", false}, - {"http://aaa#foo", "http://aab#foo", false}, - - // Different size of the part before the ref. - {"http://123#a", "http://123456#a", false}, - - // Blob URLs - {"blob:http://a.com#foo", "blob:http://a.com#foo", true}, - {"blob:http://a.com#foo", "blob:http://a.com#bar", true}, - {"blob:http://a.com#foo", "blob:http://b.com#bar", false}, - - // Filesystem URLs - {"filesystem:http://a.com#foo", "filesystem:http://a.com#foo", true}, - {"filesystem:http://a.com#foo", "filesystem:http://a.com#bar", true}, - {"filesystem:http://a.com#foo", "filesystem:http://b.com#bar", false}, - - // Data URLs - {"data:text/html,a#foo", "data:text/html,a#bar", true}, - {"data:text/html,a#foo", "data:text/html,a#foo", true}, - {"data:text/html,a#foo", "data:text/html,b#foo", false}, - }; - - for (const auto& test_case : kTestCases) { - SCOPED_TRACE(testing::Message() - << std::endl - << "url_a = " << test_case.url_a << std::endl - << "url_b = " << test_case.url_b << std::endl); - // A versus B. - EXPECT_EQ(test_case.are_equals, - GURL(test_case.url_a).EqualsIgnoringRef(GURL(test_case.url_b))); - // B versus A. - EXPECT_EQ(test_case.are_equals, - GURL(test_case.url_b).EqualsIgnoringRef(GURL(test_case.url_a))); - } -} - -TEST(GURLTest, DebugAlias) { - GURL url("https://foo.com/bar"); - DEBUG_ALIAS_FOR_GURL(url_debug_alias, url); - EXPECT_STREQ("https://foo.com/bar", url_debug_alias); -} - -} // namespace url diff --git a/ipc/url_param_traits_unittest.cc b/ipc/url_param_traits_unittest.cc deleted file mode 100644 index 16eeab0..0000000 --- a/ipc/url_param_traits_unittest.cc +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright (c) 2016 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include - -#include "ipc/ipc_message.h" -#include "ipc/ipc_message_utils.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "url/gurl.h" -#include "url/ipc/url_param_traits.h" - -// Tests that serialize/deserialize correctly understand each other. -TEST(IPCMessageTest, Serialize) { - const char* serialize_cases[] = { - "http://www.google.com/", - "http://user:pass@host.com:888/foo;bar?baz#nop", - }; - - for (size_t i = 0; i < arraysize(serialize_cases); i++) { - GURL input(serialize_cases[i]); - IPC::Message msg(1, 2, IPC::Message::PRIORITY_NORMAL); - IPC::ParamTraits::Write(&msg, input); - - GURL output; - base::PickleIterator iter(msg); - EXPECT_TRUE(IPC::ParamTraits::Read(&msg, &iter, &output)); - - // We want to test each component individually to make sure its range was - // correctly serialized and deserialized, not just the spec. - EXPECT_EQ(input.possibly_invalid_spec(), output.possibly_invalid_spec()); - EXPECT_EQ(input.is_valid(), output.is_valid()); - EXPECT_EQ(input.scheme(), output.scheme()); - EXPECT_EQ(input.username(), output.username()); - EXPECT_EQ(input.password(), output.password()); - EXPECT_EQ(input.host(), output.host()); - EXPECT_EQ(input.port(), output.port()); - EXPECT_EQ(input.path(), output.path()); - EXPECT_EQ(input.query(), output.query()); - EXPECT_EQ(input.ref(), output.ref()); - } - - // Test an excessively long GURL. - { - const std::string url = std::string("http://example.org/").append( - url::kMaxURLChars + 1, 'a'); - GURL input(url.c_str()); - IPC::Message msg(1, 2, IPC::Message::PRIORITY_NORMAL); - IPC::ParamTraits::Write(&msg, input); - - GURL output; - base::PickleIterator iter(msg); - EXPECT_TRUE(IPC::ParamTraits::Read(&msg, &iter, &output)); - EXPECT_TRUE(output.is_empty()); - } - - // Test an invalid GURL. - { - IPC::Message msg; - msg.WriteString("#inva://idurl/"); - GURL output; - base::PickleIterator iter(msg); - EXPECT_FALSE(IPC::ParamTraits::Read(&msg, &iter, &output)); - } - - // Also test the corrupt case. - IPC::Message msg(1, 2, IPC::Message::PRIORITY_NORMAL); - msg.WriteInt(99); - GURL output; - base::PickleIterator iter(msg); - EXPECT_FALSE(IPC::ParamTraits::Read(&msg, &iter, &output)); -} diff --git a/mojom/url_gurl_mojom_traits_unittest.cc b/mojom/url_gurl_mojom_traits_unittest.cc deleted file mode 100644 index e11d063..0000000 --- a/mojom/url_gurl_mojom_traits_unittest.cc +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2016 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include - -#include "base/logging.h" -#include "base/macros.h" -#include "base/message_loop/message_loop.h" -#include "mojo/public/cpp/bindings/binding.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "url/mojom/url_test.mojom.h" - -namespace url { - -class UrlTestImpl : public mojom::UrlTest { - public: - explicit UrlTestImpl(mojo::InterfaceRequest request) - : binding_(this, std::move(request)) {} - - // UrlTest: - void BounceUrl(const GURL& in, BounceUrlCallback callback) override { - std::move(callback).Run(in); - } - - void BounceOrigin(const Origin& in, BounceOriginCallback callback) override { - std::move(callback).Run(in); - } - - private: - mojo::Binding binding_; -}; - -// Mojo version of chrome IPC test in url/ipc/url_param_traits_unittest.cc. -TEST(MojoGURLStructTraitsTest, Basic) { - base::MessageLoop message_loop; - - mojom::UrlTestPtr proxy; - UrlTestImpl impl(MakeRequest(&proxy)); - - const char* serialize_cases[] = { - "http://www.google.com/", "http://user:pass@host.com:888/foo;bar?baz#nop", - }; - - for (size_t i = 0; i < arraysize(serialize_cases); i++) { - GURL input(serialize_cases[i]); - GURL output; - EXPECT_TRUE(proxy->BounceUrl(input, &output)); - - // We want to test each component individually to make sure its range was - // correctly serialized and deserialized, not just the spec. - EXPECT_EQ(input.possibly_invalid_spec(), output.possibly_invalid_spec()); - EXPECT_EQ(input.is_valid(), output.is_valid()); - EXPECT_EQ(input.scheme(), output.scheme()); - EXPECT_EQ(input.username(), output.username()); - EXPECT_EQ(input.password(), output.password()); - EXPECT_EQ(input.host(), output.host()); - EXPECT_EQ(input.port(), output.port()); - EXPECT_EQ(input.path(), output.path()); - EXPECT_EQ(input.query(), output.query()); - EXPECT_EQ(input.ref(), output.ref()); - } - - // Test an excessively long GURL. - { - const std::string url = - std::string("http://example.org/").append(kMaxURLChars + 1, 'a'); - GURL input(url.c_str()); - GURL output; - EXPECT_TRUE(proxy->BounceUrl(input, &output)); - EXPECT_TRUE(output.is_empty()); - } - - // Test basic Origin serialization. - Origin non_unique = Origin::UnsafelyCreateOriginWithoutNormalization( - "http", "www.google.com", 80); - Origin output; - EXPECT_TRUE(proxy->BounceOrigin(non_unique, &output)); - EXPECT_EQ(non_unique, output); - EXPECT_FALSE(output.unique()); - - Origin unique; - EXPECT_TRUE(proxy->BounceOrigin(unique, &output)); - EXPECT_TRUE(output.unique()); - - Origin normalized = - Origin::CreateFromNormalizedTuple("http", "www.google.com", 80); - EXPECT_TRUE(proxy->BounceOrigin(normalized, &output)); - EXPECT_EQ(normalized, output); - EXPECT_FALSE(output.unique()); -} - -} // namespace url diff --git a/origin_unittest.cc b/origin_unittest.cc deleted file mode 100644 index 08f08e6..0000000 --- a/origin_unittest.cc +++ /dev/null @@ -1,379 +0,0 @@ -// Copyright 2015 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include -#include - -#include "base/logging.h" -#include "base/macros.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "url/gurl.h" -#include "url/origin.h" - -namespace { - -void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) { - EXPECT_EQ(a, b); - const url::Parsed& a_parsed = a.parsed_for_possibly_invalid_spec(); - const url::Parsed& b_parsed = b.parsed_for_possibly_invalid_spec(); - EXPECT_EQ(a_parsed.scheme.begin, b_parsed.scheme.begin); - EXPECT_EQ(a_parsed.scheme.len, b_parsed.scheme.len); - EXPECT_EQ(a_parsed.username.begin, b_parsed.username.begin); - EXPECT_EQ(a_parsed.username.len, b_parsed.username.len); - EXPECT_EQ(a_parsed.password.begin, b_parsed.password.begin); - EXPECT_EQ(a_parsed.password.len, b_parsed.password.len); - EXPECT_EQ(a_parsed.host.begin, b_parsed.host.begin); - EXPECT_EQ(a_parsed.host.len, b_parsed.host.len); - EXPECT_EQ(a_parsed.port.begin, b_parsed.port.begin); - EXPECT_EQ(a_parsed.port.len, b_parsed.port.len); - EXPECT_EQ(a_parsed.path.begin, b_parsed.path.begin); - EXPECT_EQ(a_parsed.path.len, b_parsed.path.len); - EXPECT_EQ(a_parsed.query.begin, b_parsed.query.begin); - EXPECT_EQ(a_parsed.query.len, b_parsed.query.len); - EXPECT_EQ(a_parsed.ref.begin, b_parsed.ref.begin); - EXPECT_EQ(a_parsed.ref.len, b_parsed.ref.len); -} - -TEST(OriginTest, UniqueOriginComparison) { - url::Origin unique_origin; - EXPECT_EQ("", unique_origin.scheme()); - EXPECT_EQ("", unique_origin.host()); - EXPECT_EQ(0, unique_origin.port()); - EXPECT_TRUE(unique_origin.unique()); - EXPECT_FALSE(unique_origin.IsSameOriginWith(unique_origin)); - - const char* const urls[] = {"data:text/html,Hello!", - "javascript:alert(1)", - "file://example.com:443/etc/passwd", - "yay", - "http::///invalid.example.com/"}; - - for (auto* test_url : urls) { - SCOPED_TRACE(test_url); - GURL url(test_url); - url::Origin origin = url::Origin::Create(url); - EXPECT_EQ("", origin.scheme()); - EXPECT_EQ("", origin.host()); - EXPECT_EQ(0, origin.port()); - EXPECT_TRUE(origin.unique()); - EXPECT_FALSE(origin.IsSameOriginWith(origin)); - EXPECT_FALSE(unique_origin.IsSameOriginWith(origin)); - EXPECT_FALSE(origin.IsSameOriginWith(unique_origin)); - - ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL()); - } -} - -TEST(OriginTest, ConstructFromTuple) { - struct TestCases { - const char* const scheme; - const char* const host; - const uint16_t port; - } cases[] = { - {"http", "example.com", 80}, - {"http", "example.com", 123}, - {"https", "example.com", 443}, - }; - - for (const auto& test_case : cases) { - testing::Message scope_message; - scope_message << test_case.scheme << "://" << test_case.host << ":" - << test_case.port; - SCOPED_TRACE(scope_message); - url::Origin origin = url::Origin::CreateFromNormalizedTuple( - test_case.scheme, test_case.host, test_case.port); - - EXPECT_EQ(test_case.scheme, origin.scheme()); - EXPECT_EQ(test_case.host, origin.host()); - EXPECT_EQ(test_case.port, origin.port()); - } -} - -TEST(OriginTest, ConstructFromGURL) { - url::Origin different_origin = - url::Origin::Create(GURL("https://not-in-the-list.test/")); - - struct TestCases { - const char* const url; - const char* const expected_scheme; - const char* const expected_host; - const uint16_t expected_port; - } cases[] = { - // IP Addresses - {"http://192.168.9.1/", "http", "192.168.9.1", 80}, - {"http://[2001:db8::1]/", "http", "[2001:db8::1]", 80}, - - // Punycode - {"http://☃.net/", "http", "xn--n3h.net", 80}, - {"blob:http://☃.net/", "http", "xn--n3h.net", 80}, - - // Generic URLs - {"http://example.com/", "http", "example.com", 80}, - {"http://example.com:123/", "http", "example.com", 123}, - {"https://example.com/", "https", "example.com", 443}, - {"https://example.com:123/", "https", "example.com", 123}, - {"http://user:pass@example.com/", "http", "example.com", 80}, - {"http://example.com:123/?query", "http", "example.com", 123}, - {"https://example.com/#1234", "https", "example.com", 443}, - {"https://u:p@example.com:123/?query#1234", "https", "example.com", 123}, - - // Registered URLs - {"ftp://example.com/", "ftp", "example.com", 21}, - {"gopher://example.com/", "gopher", "example.com", 70}, - {"ws://example.com/", "ws", "example.com", 80}, - {"wss://example.com/", "wss", "example.com", 443}, - - // file: URLs - {"file:///etc/passwd", "file", "", 0}, - {"file://example.com/etc/passwd", "file", "example.com", 0}, - - // Filesystem: - {"filesystem:http://example.com/type/", "http", "example.com", 80}, - {"filesystem:http://example.com:123/type/", "http", "example.com", 123}, - {"filesystem:https://example.com/type/", "https", "example.com", 443}, - {"filesystem:https://example.com:123/type/", "https", "example.com", 123}, - - // Blob: - {"blob:http://example.com/guid-goes-here", "http", "example.com", 80}, - {"blob:http://example.com:123/guid-goes-here", "http", "example.com", 123}, - {"blob:https://example.com/guid-goes-here", "https", "example.com", 443}, - {"blob:http://u:p@example.com/guid-goes-here", "http", "example.com", 80}, - }; - - for (const auto& test_case : cases) { - SCOPED_TRACE(test_case.url); - GURL url(test_case.url); - EXPECT_TRUE(url.is_valid()); - url::Origin origin = url::Origin::Create(url); - EXPECT_EQ(test_case.expected_scheme, origin.scheme()); - EXPECT_EQ(test_case.expected_host, origin.host()); - EXPECT_EQ(test_case.expected_port, origin.port()); - EXPECT_FALSE(origin.unique()); - EXPECT_TRUE(origin.IsSameOriginWith(origin)); - EXPECT_FALSE(different_origin.IsSameOriginWith(origin)); - EXPECT_FALSE(origin.IsSameOriginWith(different_origin)); - - ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL()); - } -} - -TEST(OriginTest, Serialization) { - struct TestCases { - const char* const url; - const char* const expected; - } cases[] = { - {"http://192.168.9.1/", "http://192.168.9.1"}, - {"http://[2001:db8::1]/", "http://[2001:db8::1]"}, - {"http://☃.net/", "http://xn--n3h.net"}, - {"http://example.com/", "http://example.com"}, - {"http://example.com:123/", "http://example.com:123"}, - {"https://example.com/", "https://example.com"}, - {"https://example.com:123/", "https://example.com:123"}, - {"file:///etc/passwd", "file://"}, - {"file://example.com/etc/passwd", "file://"}, - }; - - for (const auto& test_case : cases) { - SCOPED_TRACE(test_case.url); - GURL url(test_case.url); - EXPECT_TRUE(url.is_valid()); - url::Origin origin = url::Origin::Create(url); - std::string serialized = origin.Serialize(); - ExpectParsedUrlsEqual(GURL(serialized), origin.GetURL()); - - EXPECT_EQ(test_case.expected, serialized); - - // The '<<' operator should produce the same serialization as Serialize(). - std::stringstream out; - out << origin; - EXPECT_EQ(test_case.expected, out.str()); - } -} - -TEST(OriginTest, Comparison) { - // These URLs are arranged in increasing order: - const char* const urls[] = { - "data:uniqueness", - "http://a:80", - "http://b:80", - "https://a:80", - "https://b:80", - "http://a:81", - "http://b:81", - "https://a:81", - "https://b:81", - }; - - for (size_t i = 0; i < arraysize(urls); i++) { - GURL current_url(urls[i]); - url::Origin current = url::Origin::Create(current_url); - for (size_t j = i; j < arraysize(urls); j++) { - GURL compare_url(urls[j]); - url::Origin to_compare = url::Origin::Create(compare_url); - EXPECT_EQ(i < j, current < to_compare) << i << " < " << j; - EXPECT_EQ(j < i, to_compare < current) << j << " < " << i; - } - } -} - -TEST(OriginTest, UnsafelyCreate) { - struct TestCase { - const char* scheme; - const char* host; - uint16_t port; - } cases[] = { - {"http", "example.com", 80}, - {"http", "example.com", 123}, - {"https", "example.com", 443}, - {"https", "example.com", 123}, - {"file", "", 0}, - {"file", "example.com", 0}, - }; - - for (const auto& test : cases) { - SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":" - << test.port); - url::Origin origin = url::Origin::UnsafelyCreateOriginWithoutNormalization( - test.scheme, test.host, test.port); - EXPECT_EQ(test.scheme, origin.scheme()); - EXPECT_EQ(test.host, origin.host()); - EXPECT_EQ(test.port, origin.port()); - EXPECT_FALSE(origin.unique()); - EXPECT_TRUE(origin.IsSameOriginWith(origin)); - - ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL()); - } -} - -TEST(OriginTest, UnsafelyCreateUniqueOnInvalidInput) { - struct TestCases { - const char* scheme; - const char* host; - uint16_t port = 80; - } cases[] = {{"", "", 0}, - {"data", "", 0}, - {"blob", "", 0}, - {"filesystem", "", 0}, - {"data", "example.com"}, - {"http", "☃.net"}, - {"http\nmore", "example.com"}, - {"http\rmore", "example.com"}, - {"http\n", "example.com"}, - {"http\r", "example.com"}, - {"http", "example.com\nnot-example.com"}, - {"http", "example.com\rnot-example.com"}, - {"http", "example.com\n"}, - {"http", "example.com\r"}, - {"http", "example.com", 0}, - {"file", ""}}; - - for (const auto& test : cases) { - SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":" - << test.port); - url::Origin origin = url::Origin::UnsafelyCreateOriginWithoutNormalization( - test.scheme, test.host, test.port); - EXPECT_EQ("", origin.scheme()); - EXPECT_EQ("", origin.host()); - EXPECT_EQ(0, origin.port()); - EXPECT_TRUE(origin.unique()); - EXPECT_FALSE(origin.IsSameOriginWith(origin)); - - ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL()); - } -} - -TEST(OriginTest, UnsafelyCreateUniqueViaEmbeddedNulls) { - struct TestCases { - const char* scheme; - size_t scheme_length; - const char* host; - size_t host_length; - uint16_t port = 80; - } cases[] = {{"http\0more", 9, "example.com", 11}, - {"http\0", 5, "example.com", 11}, - {"\0http", 5, "example.com", 11}, - {"http", 4, "example.com\0not-example.com", 27}, - {"http", 4, "example.com\0", 12}, - {"http", 4, "\0example.com", 12}}; - - for (const auto& test : cases) { - SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":" - << test.port); - url::Origin origin = url::Origin::UnsafelyCreateOriginWithoutNormalization( - std::string(test.scheme, test.scheme_length), - std::string(test.host, test.host_length), test.port); - EXPECT_EQ("", origin.scheme()); - EXPECT_EQ("", origin.host()); - EXPECT_EQ(0, origin.port()); - EXPECT_TRUE(origin.unique()); - EXPECT_FALSE(origin.IsSameOriginWith(origin)); - - ExpectParsedUrlsEqual(GURL(origin.Serialize()), origin.GetURL()); - } -} - -TEST(OriginTest, DomainIs) { - const struct { - const char* url; - const char* lower_ascii_domain; - bool expected_domain_is; - } kTestCases[] = { - {"http://google.com/foo", "google.com", true}, - {"http://www.google.com:99/foo", "google.com", true}, - {"http://www.google.com.cn/foo", "google.com", false}, - {"http://www.google.comm", "google.com", false}, - {"http://www.iamnotgoogle.com/foo", "google.com", false}, - {"http://www.google.com/foo", "Google.com", false}, - - // If the host ends with a dot, it matches domains with or without a dot. - {"http://www.google.com./foo", "google.com", true}, - {"http://www.google.com./foo", "google.com.", true}, - {"http://www.google.com./foo", ".com", true}, - {"http://www.google.com./foo", ".com.", true}, - - // But, if the host doesn't end with a dot and the input domain does, then - // it's considered to not match. - {"http://google.com/foo", "google.com.", false}, - - // If the host ends with two dots, it doesn't match. - {"http://www.google.com../foo", "google.com", false}, - - // Filesystem scheme. - {"filesystem:http://www.google.com:99/foo/", "google.com", true}, - {"filesystem:http://www.iamnotgoogle.com/foo/", "google.com", false}, - - // File scheme. - {"file:///home/user/text.txt", "", false}, - {"file:///home/user/text.txt", "txt", false}, - }; - - for (const auto& test_case : kTestCases) { - SCOPED_TRACE(testing::Message() << "(url, domain): (" << test_case.url - << ", " << test_case.lower_ascii_domain - << ")"); - GURL url(test_case.url); - ASSERT_TRUE(url.is_valid()); - url::Origin origin = url::Origin::Create(url); - - EXPECT_EQ(test_case.expected_domain_is, - origin.DomainIs(test_case.lower_ascii_domain)); - } - - // If the URL is invalid, DomainIs returns false. - GURL invalid_url("google.com"); - ASSERT_FALSE(invalid_url.is_valid()); - EXPECT_FALSE(url::Origin::Create(invalid_url).DomainIs("google.com")); - - // Unique origins. - EXPECT_FALSE(url::Origin().DomainIs("")); - EXPECT_FALSE(url::Origin().DomainIs("com")); -} - -TEST(OriginTest, DebugAlias) { - url::Origin origin1 = url::Origin::Create(GURL("https://foo.com/bar")); - DEBUG_ALIAS_FOR_ORIGIN(origin1_debug_alias, origin1); - EXPECT_STREQ("https://foo.com", origin1_debug_alias); -} - -} // namespace diff --git a/scheme_host_port_unittest.cc b/scheme_host_port_unittest.cc deleted file mode 100644 index 9a18d2f..0000000 --- a/scheme_host_port_unittest.cc +++ /dev/null @@ -1,285 +0,0 @@ -// Copyright 2015 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include -#include - -#include "base/macros.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "url/gurl.h" -#include "url/scheme_host_port.h" -#include "url/url_util.h" - -namespace { - -class SchemeHostPortTest : public testing::Test { - public: - SchemeHostPortTest() = default; - ~SchemeHostPortTest() override { - // Reset any added schemes. - url::Shutdown(); - } - - private: - DISALLOW_COPY_AND_ASSIGN(SchemeHostPortTest); -}; - -void ExpectParsedUrlsEqual(const GURL& a, const GURL& b) { - EXPECT_EQ(a, b); - const url::Parsed& a_parsed = a.parsed_for_possibly_invalid_spec(); - const url::Parsed& b_parsed = b.parsed_for_possibly_invalid_spec(); - EXPECT_EQ(a_parsed.scheme.begin, b_parsed.scheme.begin); - EXPECT_EQ(a_parsed.scheme.len, b_parsed.scheme.len); - EXPECT_EQ(a_parsed.username.begin, b_parsed.username.begin); - EXPECT_EQ(a_parsed.username.len, b_parsed.username.len); - EXPECT_EQ(a_parsed.password.begin, b_parsed.password.begin); - EXPECT_EQ(a_parsed.password.len, b_parsed.password.len); - EXPECT_EQ(a_parsed.host.begin, b_parsed.host.begin); - EXPECT_EQ(a_parsed.host.len, b_parsed.host.len); - EXPECT_EQ(a_parsed.port.begin, b_parsed.port.begin); - EXPECT_EQ(a_parsed.port.len, b_parsed.port.len); - EXPECT_EQ(a_parsed.path.begin, b_parsed.path.begin); - EXPECT_EQ(a_parsed.path.len, b_parsed.path.len); - EXPECT_EQ(a_parsed.query.begin, b_parsed.query.begin); - EXPECT_EQ(a_parsed.query.len, b_parsed.query.len); - EXPECT_EQ(a_parsed.ref.begin, b_parsed.ref.begin); - EXPECT_EQ(a_parsed.ref.len, b_parsed.ref.len); -} - -TEST_F(SchemeHostPortTest, Invalid) { - url::SchemeHostPort invalid; - EXPECT_EQ("", invalid.scheme()); - EXPECT_EQ("", invalid.host()); - EXPECT_EQ(0, invalid.port()); - EXPECT_TRUE(invalid.IsInvalid()); - EXPECT_TRUE(invalid.Equals(invalid)); - - const char* urls[] = { - "data:text/html,Hello!", "javascript:alert(1)", - "file://example.com:443/etc/passwd", - - // These schemes do not follow the generic URL syntax, so make sure we - // treat them as invalid (scheme, host, port) tuples (even though such - // URLs' _Origin_ might have a (scheme, host, port) tuple, they themselves - // do not). This is only *implicitly* checked in the code, by means of - // blob schemes not being standard, and filesystem schemes having type - // SCHEME_WITHOUT_AUTHORITY. If conditions change such that the implicit - // checks no longer hold, this policy should be made explicit. - "blob:https://example.com/uuid-goes-here", - "filesystem:https://example.com/temporary/yay.png"}; - - for (auto* test : urls) { - SCOPED_TRACE(test); - GURL url(test); - url::SchemeHostPort tuple(url); - EXPECT_EQ("", tuple.scheme()); - EXPECT_EQ("", tuple.host()); - EXPECT_EQ(0, tuple.port()); - EXPECT_TRUE(tuple.IsInvalid()); - EXPECT_TRUE(tuple.Equals(tuple)); - EXPECT_TRUE(tuple.Equals(invalid)); - EXPECT_TRUE(invalid.Equals(tuple)); - ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL()); - } -} - -TEST_F(SchemeHostPortTest, ExplicitConstruction) { - struct TestCases { - const char* scheme; - const char* host; - uint16_t port; - } cases[] = { - {"http", "example.com", 80}, - {"http", "example.com", 123}, - {"https", "example.com", 443}, - {"https", "example.com", 123}, - {"file", "", 0}, - {"file", "example.com", 0}, - }; - - for (const auto& test : cases) { - SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":" - << test.port); - url::SchemeHostPort tuple(test.scheme, test.host, test.port); - EXPECT_EQ(test.scheme, tuple.scheme()); - EXPECT_EQ(test.host, tuple.host()); - EXPECT_EQ(test.port, tuple.port()); - EXPECT_FALSE(tuple.IsInvalid()); - EXPECT_TRUE(tuple.Equals(tuple)); - ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL()); - } -} - -TEST_F(SchemeHostPortTest, InvalidConstruction) { - struct TestCases { - const char* scheme; - const char* host; - uint16_t port; - } cases[] = {{"", "", 0}, - {"data", "", 0}, - {"blob", "", 0}, - {"filesystem", "", 0}, - {"http", "", 80}, - {"data", "example.com", 80}, - {"http", "☃.net", 80}, - {"http\nmore", "example.com", 80}, - {"http\rmore", "example.com", 80}, - {"http\n", "example.com", 80}, - {"http\r", "example.com", 80}, - {"http", "example.com\nnot-example.com", 80}, - {"http", "example.com\rnot-example.com", 80}, - {"http", "example.com\n", 80}, - {"http", "example.com\r", 80}, - {"http", "example.com", 0}, - {"file", "", 80}}; - - for (const auto& test : cases) { - SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":" - << test.port); - url::SchemeHostPort tuple(test.scheme, test.host, test.port); - EXPECT_EQ("", tuple.scheme()); - EXPECT_EQ("", tuple.host()); - EXPECT_EQ(0, tuple.port()); - EXPECT_TRUE(tuple.IsInvalid()); - EXPECT_TRUE(tuple.Equals(tuple)); - ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL()); - } -} - -TEST_F(SchemeHostPortTest, InvalidConstructionWithEmbeddedNulls) { - struct TestCases { - const char* scheme; - size_t scheme_length; - const char* host; - size_t host_length; - uint16_t port; - } cases[] = {{"http\0more", 9, "example.com", 11, 80}, - {"http\0", 5, "example.com", 11, 80}, - {"\0http", 5, "example.com", 11, 80}, - {"http", 4, "example.com\0not-example.com", 27, 80}, - {"http", 4, "example.com\0", 12, 80}, - {"http", 4, "\0example.com", 12, 80}}; - - for (const auto& test : cases) { - SCOPED_TRACE(testing::Message() << test.scheme << "://" << test.host << ":" - << test.port); - url::SchemeHostPort tuple(std::string(test.scheme, test.scheme_length), - std::string(test.host, test.host_length), - test.port); - EXPECT_EQ("", tuple.scheme()); - EXPECT_EQ("", tuple.host()); - EXPECT_EQ(0, tuple.port()); - EXPECT_TRUE(tuple.IsInvalid()); - ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL()); - } -} - -TEST_F(SchemeHostPortTest, GURLConstruction) { - struct TestCases { - const char* url; - const char* scheme; - const char* host; - uint16_t port; - } cases[] = { - {"http://192.168.9.1/", "http", "192.168.9.1", 80}, - {"http://[2001:db8::1]/", "http", "[2001:db8::1]", 80}, - {"http://☃.net/", "http", "xn--n3h.net", 80}, - {"http://example.com/", "http", "example.com", 80}, - {"http://example.com:123/", "http", "example.com", 123}, - {"https://example.com/", "https", "example.com", 443}, - {"https://example.com:123/", "https", "example.com", 123}, - {"file:///etc/passwd", "file", "", 0}, - {"file://example.com/etc/passwd", "file", "example.com", 0}, - {"http://u:p@example.com/", "http", "example.com", 80}, - {"http://u:p@example.com/path", "http", "example.com", 80}, - {"http://u:p@example.com/path?123", "http", "example.com", 80}, - {"http://u:p@example.com/path?123#hash", "http", "example.com", 80}, - }; - - for (const auto& test : cases) { - SCOPED_TRACE(test.url); - GURL url(test.url); - EXPECT_TRUE(url.is_valid()); - url::SchemeHostPort tuple(url); - EXPECT_EQ(test.scheme, tuple.scheme()); - EXPECT_EQ(test.host, tuple.host()); - EXPECT_EQ(test.port, tuple.port()); - EXPECT_FALSE(tuple.IsInvalid()); - EXPECT_TRUE(tuple.Equals(tuple)); - ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL()); - } -} - -TEST_F(SchemeHostPortTest, Serialization) { - struct TestCases { - const char* url; - const char* expected; - } cases[] = { - {"http://192.168.9.1/", "http://192.168.9.1"}, - {"http://[2001:db8::1]/", "http://[2001:db8::1]"}, - {"http://☃.net/", "http://xn--n3h.net"}, - {"http://example.com/", "http://example.com"}, - {"http://example.com:123/", "http://example.com:123"}, - {"https://example.com/", "https://example.com"}, - {"https://example.com:123/", "https://example.com:123"}, - {"file:///etc/passwd", "file://"}, - {"file://example.com/etc/passwd", "file://example.com"}, - }; - - for (const auto& test : cases) { - SCOPED_TRACE(test.url); - GURL url(test.url); - url::SchemeHostPort tuple(url); - EXPECT_EQ(test.expected, tuple.Serialize()); - ExpectParsedUrlsEqual(GURL(tuple.Serialize()), tuple.GetURL()); - } -} - -TEST_F(SchemeHostPortTest, Comparison) { - // These tuples are arranged in increasing order: - struct SchemeHostPorts { - const char* scheme; - const char* host; - uint16_t port; - } tuples[] = { - {"http", "a", 80}, - {"http", "b", 80}, - {"https", "a", 80}, - {"https", "b", 80}, - {"http", "a", 81}, - {"http", "b", 81}, - {"https", "a", 81}, - {"https", "b", 81}, - }; - - for (size_t i = 0; i < arraysize(tuples); i++) { - url::SchemeHostPort current(tuples[i].scheme, tuples[i].host, - tuples[i].port); - for (size_t j = i; j < arraysize(tuples); j++) { - url::SchemeHostPort to_compare(tuples[j].scheme, tuples[j].host, - tuples[j].port); - EXPECT_EQ(i < j, current < to_compare) << i << " < " << j; - EXPECT_EQ(j < i, to_compare < current) << j << " < " << i; - } - } -} - -// Some schemes have optional authority. Make sure that GURL conversion from -// SchemeHostPort is not opinionated in that regard. For more info, See -// crbug.com/820194, where we considered all SchemeHostPorts with -// SCHEME_WITH_HOST (i.e., without ports) as valid with empty hosts, even though -// most are not (e.g. chrome URLs). -TEST_F(SchemeHostPortTest, EmptyHostGurlConversion) { - url::AddStandardScheme("chrome", url::SCHEME_WITH_HOST); - - GURL chrome_url("chrome:"); - EXPECT_FALSE(chrome_url.is_valid()); - - url::SchemeHostPort chrome_tuple("chrome", "", 0); - EXPECT_FALSE(chrome_tuple.GetURL().is_valid()); - ExpectParsedUrlsEqual(GURL(chrome_tuple.Serialize()), chrome_tuple.GetURL()); - ExpectParsedUrlsEqual(chrome_url, chrome_tuple.GetURL()); -} - -} // namespace url diff --git a/url_canon_icu_unittest.cc b/url_canon_icu_unittest.cc deleted file mode 100644 index af320f9..0000000 --- a/url_canon_icu_unittest.cc +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright 2014 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include - -#include "base/macros.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "third_party/icu/source/common/unicode/ucnv.h" -#include "url/url_canon.h" -#include "url/url_canon_icu.h" -#include "url/url_canon_stdstring.h" -#include "url/url_test_utils.h" - -namespace url { - -namespace { - -// Wrapper around a UConverter object that managers creation and destruction. -class UConvScoper { - public: - explicit UConvScoper(const char* charset_name) { - UErrorCode err = U_ZERO_ERROR; - converter_ = ucnv_open(charset_name, &err); - } - - ~UConvScoper() { - if (converter_) - ucnv_close(converter_); - } - - // Returns the converter object, may be NULL. - UConverter* converter() const { return converter_; } - - private: - UConverter* converter_; -}; - -TEST(URLCanonIcuTest, ICUCharsetConverter) { - struct ICUCase { - const wchar_t* input; - const char* encoding; - const char* expected; - } icu_cases[] = { - // UTF-8. - {L"Hello, world", "utf-8", "Hello, world"}, - {L"\x4f60\x597d", "utf-8", "\xe4\xbd\xa0\xe5\xa5\xbd"}, - // Non-BMP UTF-8. - {L"!\xd800\xdf00!", "utf-8", "!\xf0\x90\x8c\x80!"}, - // Big5 - {L"\x4f60\x597d", "big5", "\xa7\x41\xa6\x6e"}, - // Unrepresentable character in the destination set. - {L"hello\x4f60\x06de\x597dworld", "big5", - "hello\xa7\x41%26%231758%3B\xa6\x6eworld"}, - }; - - for (size_t i = 0; i < arraysize(icu_cases); i++) { - UConvScoper conv(icu_cases[i].encoding); - ASSERT_TRUE(conv.converter() != NULL); - ICUCharsetConverter converter(conv.converter()); - - std::string str; - StdStringCanonOutput output(&str); - - base::string16 input_str( - test_utils::TruncateWStringToUTF16(icu_cases[i].input)); - int input_len = static_cast(input_str.length()); - converter.ConvertFromUTF16(input_str.c_str(), input_len, &output); - output.Complete(); - - EXPECT_STREQ(icu_cases[i].expected, str.c_str()); - } - - // Test string sizes around the resize boundary for the output to make sure - // the converter resizes as needed. - const int static_size = 16; - UConvScoper conv("utf-8"); - ASSERT_TRUE(conv.converter()); - ICUCharsetConverter converter(conv.converter()); - for (int i = static_size - 2; i <= static_size + 2; i++) { - // Make a string with the appropriate length. - base::string16 input; - for (int ch = 0; ch < i; ch++) - input.push_back('a'); - - RawCanonOutput output; - converter.ConvertFromUTF16(input.c_str(), static_cast(input.length()), - &output); - EXPECT_EQ(input.length(), static_cast(output.length())); - } -} - -TEST(URLCanonIcuTest, QueryWithConverter) { - struct QueryCase { - const char* input8; - const wchar_t* input16; - const char* encoding; - const char* expected; - } query_cases[] = { - // Regular ASCII case in some different encodings. - {"foo=bar", L"foo=bar", "utf-8", "?foo=bar"}, - {"foo=bar", L"foo=bar", "shift_jis", "?foo=bar"}, - {"foo=bar", L"foo=bar", "gb2312", "?foo=bar"}, - // Chinese input/output - {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "gb2312", - "?q=%C4%E3%BA%C3"}, - {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "big5", "?q=%A7A%A6n"}, - // Unencodable character in the destination character set should be - // escaped. The escape sequence unescapes to be the entity name: - // "?q=你" - {"q=Chinese\xef\xbc\xa7", L"q=Chinese\xff27", "iso-8859-1", - "?q=Chinese%26%2365319%3B"}, - }; - - for (size_t i = 0; i < arraysize(query_cases); i++) { - Component out_comp; - - UConvScoper conv(query_cases[i].encoding); - ASSERT_TRUE(!query_cases[i].encoding || conv.converter()); - ICUCharsetConverter converter(conv.converter()); - - if (query_cases[i].input8) { - int len = static_cast(strlen(query_cases[i].input8)); - Component in_comp(0, len); - std::string out_str; - - StdStringCanonOutput output(&out_str); - CanonicalizeQuery(query_cases[i].input8, in_comp, &converter, &output, - &out_comp); - output.Complete(); - - EXPECT_EQ(query_cases[i].expected, out_str); - } - - if (query_cases[i].input16) { - base::string16 input16( - test_utils::TruncateWStringToUTF16(query_cases[i].input16)); - int len = static_cast(input16.length()); - Component in_comp(0, len); - std::string out_str; - - StdStringCanonOutput output(&out_str); - CanonicalizeQuery(input16.c_str(), in_comp, &converter, &output, - &out_comp); - output.Complete(); - - EXPECT_EQ(query_cases[i].expected, out_str); - } - } - - // Extra test for input with embedded NULL; - std::string out_str; - StdStringCanonOutput output(&out_str); - Component out_comp; - CanonicalizeQuery("a \x00z\x01", Component(0, 5), NULL, &output, &out_comp); - output.Complete(); - EXPECT_EQ("?a%20%00z%01", out_str); -} - -} // namespace - -} // namespace url diff --git a/url_canon_unittest.cc b/url_canon_unittest.cc deleted file mode 100644 index a29db7f..0000000 --- a/url_canon_unittest.cc +++ /dev/null @@ -1,2402 +0,0 @@ -// Copyright 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include -#include - -#include "base/macros.h" -#include "base/strings/utf_string_conversions.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "url/third_party/mozilla/url_parse.h" -#include "url/url_canon.h" -#include "url/url_canon_internal.h" -#include "url/url_canon_stdstring.h" -#include "url/url_test_utils.h" - -namespace url { - -namespace { - -struct ComponentCase { - const char* input; - const char* expected; - Component expected_component; - bool expected_success; -}; - -// ComponentCase but with dual 8-bit/16-bit input. Generally, the unit tests -// treat each input as optional, and will only try processing if non-NULL. -// The output is always 8-bit. -struct DualComponentCase { - const char* input8; - const wchar_t* input16; - const char* expected; - Component expected_component; - bool expected_success; -}; - -// Test cases for CanonicalizeIPAddress(). The inputs are identical to -// DualComponentCase, but the output has extra CanonHostInfo fields. -struct IPAddressCase { - const char* input8; - const wchar_t* input16; - const char* expected; - Component expected_component; - - // CanonHostInfo fields, for verbose output. - CanonHostInfo::Family expected_family; - int expected_num_ipv4_components; - const char* expected_address_hex; // Two hex chars per IP address byte. -}; - -std::string BytesToHexString(unsigned char bytes[16], int length) { - EXPECT_TRUE(length == 0 || length == 4 || length == 16) - << "Bad IP address length: " << length; - std::string result; - for (int i = 0; i < length; ++i) { - result.push_back(kHexCharLookup[(bytes[i] >> 4) & 0xf]); - result.push_back(kHexCharLookup[bytes[i] & 0xf]); - } - return result; -} - -struct ReplaceCase { - const char* base; - const char* scheme; - const char* username; - const char* password; - const char* host; - const char* port; - const char* path; - const char* query; - const char* ref; - const char* expected; -}; - -// Magic string used in the replacements code that tells SetupReplComp to -// call the clear function. -const char kDeleteComp[] = "|"; - -// Sets up a replacement for a single component. This is given pointers to -// the set and clear function for the component being replaced, and will -// either set the component (if it exists) or clear it (if the replacement -// string matches kDeleteComp). -// -// This template is currently used only for the 8-bit case, and the strlen -// causes it to fail in other cases. It is left a template in case we have -// tests for wide replacements. -template -void SetupReplComp( - void (Replacements::*set)(const CHAR*, const Component&), - void (Replacements::*clear)(), - Replacements* rep, - const CHAR* str) { - if (str && str[0] == kDeleteComp[0]) { - (rep->*clear)(); - } else if (str) { - (rep->*set)(str, Component(0, static_cast(strlen(str)))); - } -} - -} // namespace - -TEST(URLCanonTest, DoAppendUTF8) { - struct UTF8Case { - unsigned input; - const char* output; - } utf_cases[] = { - // Valid code points. - {0x24, "\x24"}, - {0xA2, "\xC2\xA2"}, - {0x20AC, "\xE2\x82\xAC"}, - {0x24B62, "\xF0\xA4\xAD\xA2"}, - {0x10FFFF, "\xF4\x8F\xBF\xBF"}, - }; - std::string out_str; - for (size_t i = 0; i < arraysize(utf_cases); i++) { - out_str.clear(); - StdStringCanonOutput output(&out_str); - AppendUTF8Value(utf_cases[i].input, &output); - output.Complete(); - EXPECT_EQ(utf_cases[i].output, out_str); - } -} - -#if defined(GTEST_HAS_DEATH_TEST) -// TODO(mattm): Can't run this in debug mode for now, since the DCHECK will -// cause the Chromium stack trace dialog to appear and hang the test. -// See http://crbug.com/49580. -#if defined(NDEBUG) && !defined(DCHECK_ALWAYS_ON) -#define MAYBE_DoAppendUTF8Invalid DoAppendUTF8Invalid -#else -#define MAYBE_DoAppendUTF8Invalid DISABLED_DoAppendUTF8Invalid -#endif -TEST(URLCanonTest, MAYBE_DoAppendUTF8Invalid) { - std::string out_str; - StdStringCanonOutput output(&out_str); - // Invalid code point (too large). - ASSERT_DEBUG_DEATH({ - AppendUTF8Value(0x110000, &output); - output.Complete(); - EXPECT_EQ("", out_str); - }, ""); -} -#endif // defined(GTEST_HAS_DEATH_TEST) - -TEST(URLCanonTest, UTF) { - // Low-level test that we handle reading, canonicalization, and writing - // UTF-8/UTF-16 strings properly. - struct UTFCase { - const char* input8; - const wchar_t* input16; - bool expected_success; - const char* output; - } utf_cases[] = { - // Valid canonical input should get passed through & escaped. - {"\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d", true, "%E4%BD%A0%E5%A5%BD"}, - // Test a character that takes > 16 bits (U+10300 = old italic letter A) - {"\xF0\x90\x8C\x80", L"\xd800\xdf00", true, "%F0%90%8C%80"}, - // Non-shortest-form UTF-8 characters are invalid. The bad bytes should - // each be replaced with the invalid character (EF BF DB in UTF-8). - {"\xf0\x84\xbd\xa0\xe5\xa5\xbd", NULL, false, - "%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%E5%A5%BD"}, - // Invalid UTF-8 sequences should be marked as invalid (the first - // sequence is truncated). - {"\xe4\xa0\xe5\xa5\xbd", L"\xd800\x597d", false, "%EF%BF%BD%E5%A5%BD"}, - // Character going off the end. - {"\xe4\xbd\xa0\xe5\xa5", L"\x4f60\xd800", false, "%E4%BD%A0%EF%BF%BD"}, - // ...same with low surrogates with no high surrogate. - {nullptr, L"\xdc00", false, "%EF%BF%BD"}, - // Test a UTF-8 encoded surrogate value is marked as invalid. - // ED A0 80 = U+D800 - {"\xed\xa0\x80", NULL, false, "%EF%BF%BD%EF%BF%BD%EF%BF%BD"}, - // ...even when paired. - {"\xed\xa0\x80\xed\xb0\x80", nullptr, false, - "%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD%EF%BF%BD"}, - }; - - std::string out_str; - for (size_t i = 0; i < arraysize(utf_cases); i++) { - if (utf_cases[i].input8) { - out_str.clear(); - StdStringCanonOutput output(&out_str); - - int input_len = static_cast(strlen(utf_cases[i].input8)); - bool success = true; - for (int ch = 0; ch < input_len; ch++) { - success &= AppendUTF8EscapedChar(utf_cases[i].input8, &ch, input_len, - &output); - } - output.Complete(); - EXPECT_EQ(utf_cases[i].expected_success, success); - EXPECT_EQ(std::string(utf_cases[i].output), out_str); - } - if (utf_cases[i].input16) { - out_str.clear(); - StdStringCanonOutput output(&out_str); - - base::string16 input_str( - test_utils::TruncateWStringToUTF16(utf_cases[i].input16)); - int input_len = static_cast(input_str.length()); - bool success = true; - for (int ch = 0; ch < input_len; ch++) { - success &= AppendUTF8EscapedChar(input_str.c_str(), &ch, input_len, - &output); - } - output.Complete(); - EXPECT_EQ(utf_cases[i].expected_success, success); - EXPECT_EQ(std::string(utf_cases[i].output), out_str); - } - - if (utf_cases[i].input8 && utf_cases[i].input16 && - utf_cases[i].expected_success) { - // Check that the UTF-8 and UTF-16 inputs are equivalent. - - // UTF-16 -> UTF-8 - std::string input8_str(utf_cases[i].input8); - base::string16 input16_str( - test_utils::TruncateWStringToUTF16(utf_cases[i].input16)); - EXPECT_EQ(input8_str, base::UTF16ToUTF8(input16_str)); - - // UTF-8 -> UTF-16 - EXPECT_EQ(input16_str, base::UTF8ToUTF16(input8_str)); - } - } -} - -TEST(URLCanonTest, Scheme) { - // Here, we're mostly testing that unusual characters are handled properly. - // The canonicalizer doesn't do any parsing or whitespace detection. It will - // also do its best on error, and will escape funny sequences (these won't be - // valid schemes and it will return error). - // - // Note that the canonicalizer will append a colon to the output to separate - // out the rest of the URL, which is not present in the input. We check, - // however, that the output range includes everything but the colon. - ComponentCase scheme_cases[] = { - {"http", "http:", Component(0, 4), true}, - {"HTTP", "http:", Component(0, 4), true}, - {" HTTP ", "%20http%20:", Component(0, 10), false}, - {"htt: ", "htt%3A%20:", Component(0, 9), false}, - {"\xe4\xbd\xa0\xe5\xa5\xbdhttp", "%E4%BD%A0%E5%A5%BDhttp:", Component(0, 22), false}, - // Don't re-escape something already escaped. Note that it will - // "canonicalize" the 'A' to 'a', but that's OK. - {"ht%3Atp", "ht%3atp:", Component(0, 7), false}, - {"", ":", Component(0, 0), false}, - }; - - std::string out_str; - - for (size_t i = 0; i < arraysize(scheme_cases); i++) { - int url_len = static_cast(strlen(scheme_cases[i].input)); - Component in_comp(0, url_len); - Component out_comp; - - out_str.clear(); - StdStringCanonOutput output1(&out_str); - bool success = CanonicalizeScheme(scheme_cases[i].input, in_comp, &output1, - &out_comp); - output1.Complete(); - - EXPECT_EQ(scheme_cases[i].expected_success, success); - EXPECT_EQ(std::string(scheme_cases[i].expected), out_str); - EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len); - - // Now try the wide version. - out_str.clear(); - StdStringCanonOutput output2(&out_str); - - base::string16 wide_input(base::UTF8ToUTF16(scheme_cases[i].input)); - in_comp.len = static_cast(wide_input.length()); - success = CanonicalizeScheme(wide_input.c_str(), in_comp, &output2, - &out_comp); - output2.Complete(); - - EXPECT_EQ(scheme_cases[i].expected_success, success); - EXPECT_EQ(std::string(scheme_cases[i].expected), out_str); - EXPECT_EQ(scheme_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(scheme_cases[i].expected_component.len, out_comp.len); - } - - // Test the case where the scheme is declared nonexistent, it should be - // converted into an empty scheme. - Component out_comp; - out_str.clear(); - StdStringCanonOutput output(&out_str); - - EXPECT_FALSE(CanonicalizeScheme("", Component(0, -1), &output, &out_comp)); - output.Complete(); - - EXPECT_EQ(std::string(":"), out_str); - EXPECT_EQ(0, out_comp.begin); - EXPECT_EQ(0, out_comp.len); -} - -TEST(URLCanonTest, Host) { - IPAddressCase host_cases[] = { - // Basic canonicalization, uppercase should be converted to lowercase. - {"GoOgLe.CoM", L"GoOgLe.CoM", "google.com", Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""}, - // Spaces and some other characters should be escaped. - {"Goo%20 goo%7C|.com", L"Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com", Component(0, 22), CanonHostInfo::NEUTRAL, -1, ""}, - // Exciting different types of spaces! - {NULL, L"GOO\x00a0\x3000goo.com", "goo%20%20goo.com", Component(0, 16), CanonHostInfo::NEUTRAL, -1, ""}, - // Other types of space (no-break, zero-width, zero-width-no-break) are - // name-prepped away to nothing. - {NULL, L"GOO\x200b\x2060\xfeffgoo.com", "googoo.com", Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""}, - // Ideographic full stop (full-width period for Chinese, etc.) should be - // treated as a dot. - {NULL, L"www.foo\x3002" L"bar.com", "www.foo.bar.com", Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""}, - // Invalid unicode characters should fail... - // ...In wide input, ICU will barf and we'll end up with the input as - // escaped UTF-8 (the invalid character should be replaced with the - // replacement character). - {"\xef\xb7\x90zyx.com", L"\xfdd0zyx.com", "%EF%BF%BDzyx.com", Component(0, 16), CanonHostInfo::BROKEN, -1, ""}, - // ...This is the same as previous but with with escaped. - {"%ef%b7%90zyx.com", L"%ef%b7%90zyx.com", "%EF%BF%BDzyx.com", Component(0, 16), CanonHostInfo::BROKEN, -1, ""}, - // Test name prepping, fullwidth input should be converted to ASCII and NOT - // IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16. - {"\xef\xbc\xa7\xef\xbd\x8f.com", L"\xff27\xff4f.com", "go.com", Component(0, 6), CanonHostInfo::NEUTRAL, -1, ""}, - // Test that fullwidth escaped values are properly name-prepped, - // then converted or rejected. - // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input) - {"\xef\xbc\x85\xef\xbc\x94\xef\xbc\x91.com", L"\xff05\xff14\xff11.com", "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""}, - {"%ef%bc%85%ef%bc%94%ef%bc%91.com", L"%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""}, - // ...%00 in fullwidth should fail (also as escaped UTF-8 input) - {"\xef\xbc\x85\xef\xbc\x90\xef\xbc\x90.com", L"\xff05\xff10\xff10.com", "%00.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""}, - {"%ef%bc%85%ef%bc%90%ef%bc%90.com", L"%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""}, - // ICU will convert weird percents into ASCII percents, but not unescape - // further. A weird percent is U+FE6A (EF B9 AA in UTF-8) which is a - // "small percent". At this point we should be within our rights to mark - // anything as invalid since the URL is corrupt or malicious. The code - // happens to allow ASCII characters (%41 = "A" -> 'a') to be unescaped - // and kept as valid, so we validate that behavior here, but this level - // of fixing the input shouldn't be seen as required. "%81" is invalid. - {"\xef\xb9\xaa" "41.com", L"\xfe6a" L"41.com", "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""}, - {"%ef%b9%aa" "41.com", L"\xfe6a" L"41.com", "a.com", Component(0, 5), CanonHostInfo::NEUTRAL, -1, ""}, - {"\xef\xb9\xaa" "81.com", L"\xfe6a" L"81.com", "%81.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""}, - {"%ef%b9%aa" "81.com", L"\xfe6a" L"81.com", "%81.com", Component(0, 7), CanonHostInfo::BROKEN, -1, ""}, - // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN - {"\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"\x4f60\x597d\x4f60\x597d", "xn--6qqa088eba", Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""}, - // See http://unicode.org/cldr/utility/idna.jsp for other - // examples/experiments and http://goo.gl/7yG11o - // for the full list of characters handled differently by - // IDNA 2003, UTS 46 (http://unicode.org/reports/tr46/ ) and IDNA 2008. - - // 4 Deviation characters are mapped/ignored in UTS 46 transitional - // mechansm. UTS 46, table 4 row (g). - // Sharp-s is mapped to 'ss' in UTS 46 and IDNA 2003. - // Otherwise, it'd be "xn--fuball-cta.de". - {"fu\xc3\x9f" "ball.de", L"fu\x00df" L"ball.de", "fussball.de", - Component(0, 11), CanonHostInfo::NEUTRAL, -1, ""}, - // Final-sigma (U+03C3) is mapped to regular sigma (U+03C2). - // Otherwise, it'd be "xn--wxaijb9b". - {"\xcf\x83\xcf\x8c\xce\xbb\xce\xbf\xcf\x82", L"\x3c3\x3cc\x3bb\x3bf\x3c2", - "xn--wxaikc6b", Component(0, 12), - CanonHostInfo::NEUTRAL, -1, ""}, - // ZWNJ (U+200C) and ZWJ (U+200D) are mapped away in UTS 46 transitional - // handling as well as in IDNA 2003. - {"a\xe2\x80\x8c" "b\xe2\x80\x8d" "c", L"a\x200c" L"b\x200d" L"c", "abc", - Component(0, 3), CanonHostInfo::NEUTRAL, -1, ""}, - // ZWJ between Devanagari characters is still mapped away in UTS 46 - // transitional handling. IDNA 2008 would give xn--11bo0mv54g. - {"\xe0\xa4\x95\xe0\xa5\x8d\xe2\x80\x8d\xe0\xa4\x9c", - L"\x915\x94d\x200d\x91c", "xn--11bo0m", - Component(0, 10), CanonHostInfo::NEUTRAL, -1, ""}, - // Fullwidth exclamation mark is disallowed. UTS 46, table 4, row (b) - // However, we do allow this at the moment because we don't use - // STD3 rules and canonicalize full-width ASCII to ASCII. - {"wow\xef\xbc\x81", L"wow\xff01", "wow%21", - Component(0, 6), CanonHostInfo::NEUTRAL, -1, ""}, - // U+2132 (turned capital F) is disallowed. UTS 46, table 4, row (c) - // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2 - {"\xe2\x84\xb2oo", L"\x2132oo", "%E2%84%B2oo", - Component(0, 11), CanonHostInfo::BROKEN, -1, ""}, - // U+2F868 (CJK Comp) is disallowed. UTS 46, table 4, row (d) - // Allowed in IDNA 2003, but the mapping changed after Unicode 3.2 - {"\xf0\xaf\xa1\xa8\xe5\xa7\xbb.cn", L"\xd87e\xdc68\x59fb.cn", - "%F0%AF%A1%A8%E5%A7%BB.cn", - Component(0, 24), CanonHostInfo::BROKEN, -1, ""}, - // Maps uppercase letters to lower case letters. UTS 46 table 4 row (e) - {"M\xc3\x9cNCHEN", L"M\xdcNCHEN", "xn--mnchen-3ya", - Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""}, - // An already-IDNA host is not modified. - {"xn--mnchen-3ya", L"xn--mnchen-3ya", "xn--mnchen-3ya", - Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""}, - // Symbol/punctuations are allowed in IDNA 2003/UTS46. - // Not allowed in IDNA 2008. UTS 46 table 4 row (f). - {"\xe2\x99\xa5ny.us", L"\x2665ny.us", "xn--ny-s0x.us", - Component(0, 13), CanonHostInfo::NEUTRAL, -1, ""}, - // U+11013 is new in Unicode 6.0 and is allowed. UTS 46 table 4, row (h) - // We used to allow it because we passed through unassigned code points. - {"\xf0\x91\x80\x93.com", L"\xd804\xdc13.com", "xn--n00d.com", - Component(0, 12), CanonHostInfo::NEUTRAL, -1, ""}, - // U+0602 is disallowed in UTS46/IDNA 2008. UTS 46 table 4, row(i) - // Used to be allowed in INDA 2003. - {"\xd8\x82.eg", L"\x602.eg", "%D8%82.eg", - Component(0, 9), CanonHostInfo::BROKEN, -1, ""}, - // U+20B7 is new in Unicode 5.2 (not a part of IDNA 2003 based - // on Unicode 3.2). We did allow it in the past because we let unassigned - // code point pass. We continue to allow it even though it's a - // "punctuation and symbol" blocked in IDNA 2008. - // UTS 46 table 4, row (j) - {"\xe2\x82\xb7.com", L"\x20b7.com", "xn--wzg.com", - Component(0, 11), CanonHostInfo::NEUTRAL, -1, ""}, - // Maps uppercase letters to lower case letters. - // In IDNA 2003, it's allowed without case-folding - // ( xn--bc-7cb.com ) because it's not defined in Unicode 3.2 - // (added in Unicode 4.1). UTS 46 table 4 row (k) - {"bc\xc8\xba.com", L"bc\x23a.com", "xn--bc-is1a.com", - Component(0, 15), CanonHostInfo::NEUTRAL, -1, ""}, - // Maps U+FF43 (Full Width Small Letter C) to 'c'. - {"ab\xef\xbd\x83.xyz", L"ab\xff43.xyz", "abc.xyz", - Component(0, 7), CanonHostInfo::NEUTRAL, -1, ""}, - // Maps U+1D68C (Math Monospace Small C) to 'c'. - // U+1D68C = \xD835\xDE8C in UTF-16 - {"ab\xf0\x9d\x9a\x8c.xyz", L"ab\xd835\xde8c.xyz", "abc.xyz", - Component(0, 7), CanonHostInfo::NEUTRAL, -1, ""}, - // BiDi check test - // "Divehi" in Divehi (Thaana script) ends with BidiClass=NSM. - // Disallowed in IDNA 2003 but now allowed in UTS 46/IDNA 2008. - {"\xde\x8b\xde\xa8\xde\x88\xde\xac\xde\x80\xde\xa8", - L"\x78b\x7a8\x788\x7ac\x780\x7a8", "xn--hqbpi0jcw", - Component(0, 13), CanonHostInfo::NEUTRAL, -1, ""}, - // Disallowed in both IDNA 2003 and 2008 with BiDi check. - // Labels starting with a RTL character cannot end with a LTR character. - {"\xd8\xac\xd8\xa7\xd8\xb1xyz", L"\x62c\x627\x631xyz", - "%D8%AC%D8%A7%D8%B1xyz", Component(0, 21), - CanonHostInfo::BROKEN, -1, ""}, - // Labels starting with a RTL character can end with BC=EN (European - // number). Disallowed in IDNA 2003 but now allowed. - {"\xd8\xac\xd8\xa7\xd8\xb1" "2", L"\x62c\x627\x631" L"2", - "xn--2-ymcov", Component(0, 11), - CanonHostInfo::NEUTRAL, -1, ""}, - // Labels starting with a RTL character cannot have "L" characters - // even if it ends with an BC=EN. Disallowed in both IDNA 2003/2008. - {"\xd8\xac\xd8\xa7\xd8\xb1xy2", L"\x62c\x627\x631xy2", - "%D8%AC%D8%A7%D8%B1xy2", Component(0, 21), - CanonHostInfo::BROKEN, -1, ""}, - // Labels starting with a RTL character can end with BC=AN (Arabic number) - // Disallowed in IDNA 2003, but now allowed. - {"\xd8\xac\xd8\xa7\xd8\xb1\xd9\xa2", L"\x62c\x627\x631\x662", - "xn--mgbjq0r", Component(0, 11), - CanonHostInfo::NEUTRAL, -1, ""}, - // Labels starting with a RTL character cannot have "L" characters - // even if it ends with an BC=AN (Arabic number). - // Disallowed in both IDNA 2003/2008. - {"\xd8\xac\xd8\xa7\xd8\xb1xy\xd9\xa2", L"\x62c\x627\x631xy\x662", - "%D8%AC%D8%A7%D8%B1xy%D9%A2", Component(0, 26), - CanonHostInfo::BROKEN, -1, ""}, - // Labels starting with a RTL character cannot mix BC=EN and BC=AN - {"\xd8\xac\xd8\xa7\xd8\xb1xy2\xd9\xa2", L"\x62c\x627\x631xy2\x662", - "%D8%AC%D8%A7%D8%B1xy2%D9%A2", Component(0, 27), - CanonHostInfo::BROKEN, -1, ""}, - // As of Unicode 6.2, U+20CF is not assigned. We do not allow it. - {"\xe2\x83\x8f.com", L"\x20cf.com", "%E2%83%8F.com", - Component(0, 13), CanonHostInfo::BROKEN, -1, ""}, - // U+0080 is not allowed. - {"\xc2\x80.com", L"\x80.com", "%C2%80.com", - Component(0, 10), CanonHostInfo::BROKEN, -1, ""}, - // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped - // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped - // UTF-8 (wide case). The output should be equivalent to the true wide - // character input above). - {"%E4%BD%A0%E5%A5%BD\xe4\xbd\xa0\xe5\xa5\xbd", - L"%E4%BD%A0%E5%A5%BD\x4f60\x597d", "xn--6qqa088eba", - Component(0, 14), CanonHostInfo::NEUTRAL, -1, ""}, - // Invalid escaped characters should fail and the percents should be - // escaped. - {"%zz%66%a", L"%zz%66%a", "%25zzf%25a", Component(0, 10), - CanonHostInfo::BROKEN, -1, ""}, - // If we get an invalid character that has been escaped. - {"%25", L"%25", "%25", Component(0, 3), - CanonHostInfo::BROKEN, -1, ""}, - {"hello%00", L"hello%00", "hello%00", Component(0, 8), - CanonHostInfo::BROKEN, -1, ""}, - // Escaped numbers should be treated like IP addresses if they are. - {"%30%78%63%30%2e%30%32%35%30.01", L"%30%78%63%30%2e%30%32%35%30.01", - "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, - "C0A80001"}, - {"%30%78%63%30%2e%30%32%35%30.01%2e", L"%30%78%63%30%2e%30%32%35%30.01%2e", - "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, - "C0A80001"}, - // Invalid escaping should trigger the regular host error handling. - {"%3g%78%63%30%2e%30%32%35%30%2E.01", L"%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01", Component(0, 17), CanonHostInfo::BROKEN, -1, ""}, - // Something that isn't exactly an IP should get treated as a host and - // spaces escaped. - {"192.168.0.1 hello", L"192.168.0.1 hello", "192.168.0.1%20hello", Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""}, - // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP. - // These are "0Xc0.0250.01" in fullwidth. - {"\xef\xbc\x90%Ef%bc\xb8%ef%Bd%83\xef\xbc\x90%EF%BC%8E\xef\xbc\x90\xef\xbc\x92\xef\xbc\x95\xef\xbc\x90\xef\xbc%8E\xef\xbc\x90\xef\xbc\x91", L"\xff10\xff38\xff43\xff10\xff0e\xff10\xff12\xff15\xff10\xff0e\xff10\xff11", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"}, - // Broken IP addresses get marked as such. - {"192.168.0.257", L"192.168.0.257", "192.168.0.257", Component(0, 13), CanonHostInfo::BROKEN, -1, ""}, - {"[google.com]", L"[google.com]", "[google.com]", Component(0, 12), CanonHostInfo::BROKEN, -1, ""}, - // Cyrillic letter followed by '(' should return punycode for '(' escaped - // before punycode string was created. I.e. - // if '(' is escaped after punycode is created we would get xn--%28-8tb - // (incorrect). - {"\xd1\x82(", L"\x0442(", "xn--%28-7ed", Component(0, 11), - CanonHostInfo::NEUTRAL, -1, ""}, - // Address with all hexidecimal characters with leading number of 1<<32 - // or greater and should return NEUTRAL rather than BROKEN if not all - // components are numbers. - {"12345678912345.de", L"12345678912345.de", "12345678912345.de", Component(0, 17), CanonHostInfo::NEUTRAL, -1, ""}, - {"1.12345678912345.de", L"1.12345678912345.de", "1.12345678912345.de", Component(0, 19), CanonHostInfo::NEUTRAL, -1, ""}, - {"12345678912345.12345678912345.de", L"12345678912345.12345678912345.de", "12345678912345.12345678912345.de", Component(0, 32), CanonHostInfo::NEUTRAL, -1, ""}, - {"1.2.0xB3A73CE5B59.de", L"1.2.0xB3A73CE5B59.de", "1.2.0xb3a73ce5b59.de", Component(0, 20), CanonHostInfo::NEUTRAL, -1, ""}, - {"12345678912345.0xde", L"12345678912345.0xde", "12345678912345.0xde", Component(0, 19), CanonHostInfo::BROKEN, -1, ""}, - // A label that starts with "xn--" but contains non-ASCII characters should - // be an error. Escape the invalid characters. - {"xn--m\xc3\xbcnchen", L"xn--m\xfcnchen", "xn--m%C3%BCnchen", Component(0, 16), CanonHostInfo::BROKEN, -1, ""}, - }; - - // CanonicalizeHost() non-verbose. - std::string out_str; - for (size_t i = 0; i < arraysize(host_cases); i++) { - // Narrow version. - if (host_cases[i].input8) { - int host_len = static_cast(strlen(host_cases[i].input8)); - Component in_comp(0, host_len); - Component out_comp; - - out_str.clear(); - StdStringCanonOutput output(&out_str); - - bool success = CanonicalizeHost(host_cases[i].input8, in_comp, &output, - &out_comp); - output.Complete(); - - EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN, - success) << "for input: " << host_cases[i].input8; - EXPECT_EQ(std::string(host_cases[i].expected), out_str) << - "for input: " << host_cases[i].input8; - EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin) << - "for input: " << host_cases[i].input8; - EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len) << - "for input: " << host_cases[i].input8; - } - - // Wide version. - if (host_cases[i].input16) { - base::string16 input16( - test_utils::TruncateWStringToUTF16(host_cases[i].input16)); - int host_len = static_cast(input16.length()); - Component in_comp(0, host_len); - Component out_comp; - - out_str.clear(); - StdStringCanonOutput output(&out_str); - - bool success = CanonicalizeHost(input16.c_str(), in_comp, &output, - &out_comp); - output.Complete(); - - EXPECT_EQ(host_cases[i].expected_family != CanonHostInfo::BROKEN, - success); - EXPECT_EQ(std::string(host_cases[i].expected), out_str); - EXPECT_EQ(host_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(host_cases[i].expected_component.len, out_comp.len); - } - } - - // CanonicalizeHostVerbose() - for (size_t i = 0; i < arraysize(host_cases); i++) { - // Narrow version. - if (host_cases[i].input8) { - int host_len = static_cast(strlen(host_cases[i].input8)); - Component in_comp(0, host_len); - - out_str.clear(); - StdStringCanonOutput output(&out_str); - CanonHostInfo host_info; - - CanonicalizeHostVerbose(host_cases[i].input8, in_comp, &output, - &host_info); - output.Complete(); - - EXPECT_EQ(host_cases[i].expected_family, host_info.family); - EXPECT_EQ(std::string(host_cases[i].expected), out_str); - EXPECT_EQ(host_cases[i].expected_component.begin, - host_info.out_host.begin); - EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len); - EXPECT_EQ(std::string(host_cases[i].expected_address_hex), - BytesToHexString(host_info.address, host_info.AddressLength())); - if (host_cases[i].expected_family == CanonHostInfo::IPV4) { - EXPECT_EQ(host_cases[i].expected_num_ipv4_components, - host_info.num_ipv4_components); - } - } - - // Wide version. - if (host_cases[i].input16) { - base::string16 input16( - test_utils::TruncateWStringToUTF16(host_cases[i].input16)); - int host_len = static_cast(input16.length()); - Component in_comp(0, host_len); - - out_str.clear(); - StdStringCanonOutput output(&out_str); - CanonHostInfo host_info; - - CanonicalizeHostVerbose(input16.c_str(), in_comp, &output, &host_info); - output.Complete(); - - EXPECT_EQ(host_cases[i].expected_family, host_info.family); - EXPECT_EQ(std::string(host_cases[i].expected), out_str); - EXPECT_EQ(host_cases[i].expected_component.begin, - host_info.out_host.begin); - EXPECT_EQ(host_cases[i].expected_component.len, host_info.out_host.len); - EXPECT_EQ(std::string(host_cases[i].expected_address_hex), - BytesToHexString(host_info.address, host_info.AddressLength())); - if (host_cases[i].expected_family == CanonHostInfo::IPV4) { - EXPECT_EQ(host_cases[i].expected_num_ipv4_components, - host_info.num_ipv4_components); - } - } - } -} - -TEST(URLCanonTest, IPv4) { - IPAddressCase cases[] = { - // Empty is not an IP address. - {"", L"", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - {".", L".", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Regular IP addresses in different bases. - {"192.168.0.1", L"192.168.0.1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"}, - {"0300.0250.00.01", L"0300.0250.00.01", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"}, - {"0xC0.0Xa8.0x0.0x1", L"0xC0.0Xa8.0x0.0x1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"}, - // Non-IP addresses due to invalid characters. - {"192.168.9.com", L"192.168.9.com", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Invalid characters for the base should be rejected. - {"19a.168.0.1", L"19a.168.0.1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - {"0308.0250.00.01", L"0308.0250.00.01", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - {"0xCG.0xA8.0x0.0x1", L"0xCG.0xA8.0x0.0x1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // If there are not enough components, the last one should fill them out. - {"192", L"192", "0.0.0.192", Component(0, 9), CanonHostInfo::IPV4, 1, "000000C0"}, - {"0xC0a80001", L"0xC0a80001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"}, - {"030052000001", L"030052000001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"}, - {"000030052000001", L"000030052000001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 1, "C0A80001"}, - {"192.168", L"192.168", "192.0.0.168", Component(0, 11), CanonHostInfo::IPV4, 2, "C00000A8"}, - {"192.0x00A80001", L"192.0x000A80001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 2, "C0A80001"}, - {"0xc0.052000001", L"0xc0.052000001", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 2, "C0A80001"}, - {"192.168.1", L"192.168.1", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0A80001"}, - // Too many components means not an IP address. - {"192.168.0.0.1", L"192.168.0.0.1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // We allow a single trailing dot. - {"192.168.0.1.", L"192.168.0.1.", "192.168.0.1", Component(0, 11), CanonHostInfo::IPV4, 4, "C0A80001"}, - {"192.168.0.1. hello", L"192.168.0.1. hello", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - {"192.168.0.1..", L"192.168.0.1..", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Two dots in a row means not an IP address. - {"192.168..1", L"192.168..1", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Any numerical overflow should be marked as BROKEN. - {"0x100.0", L"0x100.0", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0x100.0.0", L"0x100.0.0", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0x100.0.0.0", L"0x100.0.0.0", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0.0x100.0.0", L"0.0x100.0.0", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0.0.0x100.0", L"0.0.0x100.0", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0.0.0.0x100", L"0.0.0.0x100", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0.0.0x10000", L"0.0.0x10000", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0.0x1000000", L"0.0x1000000", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0x100000000", L"0x100000000", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - // Repeat the previous tests, minus 1, to verify boundaries. - {"0xFF.0", L"0xFF.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 2, "FF000000"}, - {"0xFF.0.0", L"0xFF.0.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 3, "FF000000"}, - {"0xFF.0.0.0", L"0xFF.0.0.0", "255.0.0.0", Component(0, 9), CanonHostInfo::IPV4, 4, "FF000000"}, - {"0.0xFF.0.0", L"0.0xFF.0.0", "0.255.0.0", Component(0, 9), CanonHostInfo::IPV4, 4, "00FF0000"}, - {"0.0.0xFF.0", L"0.0.0xFF.0", "0.0.255.0", Component(0, 9), CanonHostInfo::IPV4, 4, "0000FF00"}, - {"0.0.0.0xFF", L"0.0.0.0xFF", "0.0.0.255", Component(0, 9), CanonHostInfo::IPV4, 4, "000000FF"}, - {"0.0.0xFFFF", L"0.0.0xFFFF", "0.0.255.255", Component(0, 11), CanonHostInfo::IPV4, 3, "0000FFFF"}, - {"0.0xFFFFFF", L"0.0xFFFFFF", "0.255.255.255", Component(0, 13), CanonHostInfo::IPV4, 2, "00FFFFFF"}, - {"0xFFFFFFFF", L"0xFFFFFFFF", "255.255.255.255", Component(0, 15), CanonHostInfo::IPV4, 1, "FFFFFFFF"}, - // Old trunctations tests. They're all "BROKEN" now. - {"276.256.0xf1a2.077777", L"276.256.0xf1a2.077777", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"192.168.0.257", L"192.168.0.257", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"192.168.0xa20001", L"192.168.0xa20001", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"192.015052000001", L"192.015052000001", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"0X12C0a80001", L"0X12C0a80001", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"276.1.2", L"276.1.2", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - // Spaces should be rejected. - {"192.168.0.1 hello", L"192.168.0.1 hello", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Very large numbers. - {"0000000000000300.0x00000000000000fF.00000000000000001", L"0000000000000300.0x00000000000000fF.00000000000000001", "192.255.0.1", Component(0, 11), CanonHostInfo::IPV4, 3, "C0FF0001"}, - {"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", L"0000000000000300.0xffffffffFFFFFFFF.3022415481470977", "", Component(0, 11), CanonHostInfo::BROKEN, -1, ""}, - // A number has no length limit, but long numbers can still overflow. - {"00000000000000000001", L"00000000000000000001", "0.0.0.1", Component(0, 7), CanonHostInfo::IPV4, 1, "00000001"}, - {"0000000000000000100000000000000001", L"0000000000000000100000000000000001", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - // If a long component is non-numeric, it's a hostname, *not* a broken IP. - {"0.0.0.000000000000000000z", L"0.0.0.000000000000000000z", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - {"0.0.0.100000000000000000z", L"0.0.0.100000000000000000z", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Truncation of all zeros should still result in 0. - {"0.00.0x.0x0", L"0.00.0x.0x0", "0.0.0.0", Component(0, 7), CanonHostInfo::IPV4, 4, "00000000"}, - }; - - for (size_t i = 0; i < arraysize(cases); i++) { - // 8-bit version. - Component component(0, static_cast(strlen(cases[i].input8))); - - std::string out_str1; - StdStringCanonOutput output1(&out_str1); - CanonHostInfo host_info; - CanonicalizeIPAddress(cases[i].input8, component, &output1, &host_info); - output1.Complete(); - - EXPECT_EQ(cases[i].expected_family, host_info.family); - EXPECT_EQ(std::string(cases[i].expected_address_hex), - BytesToHexString(host_info.address, host_info.AddressLength())); - if (host_info.family == CanonHostInfo::IPV4) { - EXPECT_STREQ(cases[i].expected, out_str1.c_str()); - EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin); - EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len); - EXPECT_EQ(cases[i].expected_num_ipv4_components, - host_info.num_ipv4_components); - } - - // 16-bit version. - base::string16 input16( - test_utils::TruncateWStringToUTF16(cases[i].input16)); - component = Component(0, static_cast(input16.length())); - - std::string out_str2; - StdStringCanonOutput output2(&out_str2); - CanonicalizeIPAddress(input16.c_str(), component, &output2, &host_info); - output2.Complete(); - - EXPECT_EQ(cases[i].expected_family, host_info.family); - EXPECT_EQ(std::string(cases[i].expected_address_hex), - BytesToHexString(host_info.address, host_info.AddressLength())); - if (host_info.family == CanonHostInfo::IPV4) { - EXPECT_STREQ(cases[i].expected, out_str2.c_str()); - EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin); - EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len); - EXPECT_EQ(cases[i].expected_num_ipv4_components, - host_info.num_ipv4_components); - } - } -} - -TEST(URLCanonTest, IPv6) { - IPAddressCase cases[] = { - // Empty is not an IP address. - {"", L"", "", Component(), CanonHostInfo::NEUTRAL, -1, ""}, - // Non-IPs with [:] characters are marked BROKEN. - {":", L":", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[", L"[", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[:", L"[:", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"]", L"]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {":]", L":]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[]", L"[]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[:]", L"[:]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - // Regular IP address is invalid without bounding '[' and ']'. - {"2001:db8::1", L"2001:db8::1", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[2001:db8::1", L"[2001:db8::1", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"2001:db8::1]", L"2001:db8::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - // Regular IP addresses. - {"[::]", L"[::]", "[::]", Component(0,4), CanonHostInfo::IPV6, -1, "00000000000000000000000000000000"}, - {"[::1]", L"[::1]", "[::1]", Component(0,5), CanonHostInfo::IPV6, -1, "00000000000000000000000000000001"}, - {"[1::]", L"[1::]", "[1::]", Component(0,5), CanonHostInfo::IPV6, -1, "00010000000000000000000000000000"}, - - // Leading zeros should be stripped. - {"[000:01:02:003:004:5:6:007]", L"[000:01:02:003:004:5:6:007]", "[0:1:2:3:4:5:6:7]", Component(0,17), CanonHostInfo::IPV6, -1, "00000001000200030004000500060007"}, - - // Upper case letters should be lowercased. - {"[A:b:c:DE:fF:0:1:aC]", L"[A:b:c:DE:fF:0:1:aC]", "[a:b:c:de:ff:0:1:ac]", Component(0,20), CanonHostInfo::IPV6, -1, "000A000B000C00DE00FF0000000100AC"}, - - // The same address can be written with different contractions, but should - // get canonicalized to the same thing. - {"[1:0:0:2::3:0]", L"[1:0:0:2::3:0]", "[1::2:0:0:3:0]", Component(0,14), CanonHostInfo::IPV6, -1, "00010000000000020000000000030000"}, - {"[1::2:0:0:3:0]", L"[1::2:0:0:3:0]", "[1::2:0:0:3:0]", Component(0,14), CanonHostInfo::IPV6, -1, "00010000000000020000000000030000"}, - - // Addresses with embedded IPv4. - {"[::192.168.0.1]", L"[::192.168.0.1]", "[::c0a8:1]", Component(0,10), CanonHostInfo::IPV6, -1, "000000000000000000000000C0A80001"}, - {"[::ffff:192.168.0.1]", L"[::ffff:192.168.0.1]", "[::ffff:c0a8:1]", Component(0,15), CanonHostInfo::IPV6, -1, "00000000000000000000FFFFC0A80001"}, - {"[::eeee:192.168.0.1]", L"[::eeee:192.168.0.1]", "[::eeee:c0a8:1]", Component(0, 15), CanonHostInfo::IPV6, -1, "00000000000000000000EEEEC0A80001"}, - {"[2001::192.168.0.1]", L"[2001::192.168.0.1]", "[2001::c0a8:1]", Component(0, 14), CanonHostInfo::IPV6, -1, "200100000000000000000000C0A80001"}, - {"[1:2:192.168.0.1:5:6]", L"[1:2:192.168.0.1:5:6]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - - // IPv4 with last component missing. - {"[::ffff:192.1.2]", L"[::ffff:192.1.2]", "[::ffff:c001:2]", Component(0,15), CanonHostInfo::IPV6, -1, "00000000000000000000FFFFC0010002"}, - - // IPv4 using hex. - // TODO(eroman): Should this format be disallowed? - {"[::ffff:0xC0.0Xa8.0x0.0x1]", L"[::ffff:0xC0.0Xa8.0x0.0x1]", "[::ffff:c0a8:1]", Component(0,15), CanonHostInfo::IPV6, -1, "00000000000000000000FFFFC0A80001"}, - - // There may be zeros surrounding the "::" contraction. - {"[0:0::0:0:8]", L"[0:0::0:0:8]", "[::8]", Component(0,5), CanonHostInfo::IPV6, -1, "00000000000000000000000000000008"}, - - {"[2001:db8::1]", L"[2001:db8::1]", "[2001:db8::1]", Component(0,13), CanonHostInfo::IPV6, -1, "20010DB8000000000000000000000001"}, - - // Can only have one "::" contraction in an IPv6 string literal. - {"[2001::db8::1]", L"[2001::db8::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - // No more than 2 consecutive ':'s. - {"[2001:db8:::1]", L"[2001:db8:::1]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[:::]", L"[:::]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - // Non-IP addresses due to invalid characters. - {"[2001::.com]", L"[2001::.com]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - // If there are not enough components, the last one should fill them out. - // ... omitted at this time ... - // Too many components means not an IP address. Similarly, with too few - // if using IPv4 compat or mapped addresses. - {"[::192.168.0.0.1]", L"[::192.168.0.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[::ffff:192.168.0.0.1]", L"[::ffff:192.168.0.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[1:2:3:4:5:6:7:8:9]", L"[1:2:3:4:5:6:7:8:9]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - // Too many bits (even though 8 comonents, the last one holds 32 bits). - {"[0:0:0:0:0:0:0:192.168.0.1]", L"[0:0:0:0:0:0:0:192.168.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - - // Too many bits specified -- the contraction would have to be zero-length - // to not exceed 128 bits. - {"[1:2:3:4:5:6::192.168.0.1]", L"[1:2:3:4:5:6::192.168.0.1]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - - // The contraction is for 16 bits of zero. - {"[1:2:3:4:5:6::8]", L"[1:2:3:4:5:6::8]", "[1:2:3:4:5:6:0:8]", Component(0,17), CanonHostInfo::IPV6, -1, "00010002000300040005000600000008"}, - - // Cannot have a trailing colon. - {"[1:2:3:4:5:6:7:8:]", L"[1:2:3:4:5:6:7:8:]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[1:2:3:4:5:6:192.168.0.1:]", L"[1:2:3:4:5:6:192.168.0.1:]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - - // Cannot have negative numbers. - {"[-1:2:3:4:5:6:7:8]", L"[-1:2:3:4:5:6:7:8]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - - // Scope ID -- the URL may contain an optional ["%" ] section. - // The scope_id should be included in the canonicalized URL, and is an - // unsigned decimal number. - - // Invalid because no ID was given after the percent. - - // Don't allow scope-id - {"[1::%1]", L"[1::%1]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[1::%eth0]", L"[1::%eth0]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[1::%]", L"[1::%]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[%]", L"[%]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[::%:]", L"[::%:]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - - // Don't allow leading or trailing colons. - {"[:0:0::0:0:8]", L"[:0:0::0:0:8]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[0:0::0:0:8:]", L"[0:0::0:0:8:]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - {"[:0:0::0:0:8:]", L"[:0:0::0:0:8:]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - - // We allow a single trailing dot. - // ... omitted at this time ... - // Two dots in a row means not an IP address. - {"[::192.168..1]", L"[::192.168..1]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - // Any non-first components get truncated to one byte. - // ... omitted at this time ... - // Spaces should be rejected. - {"[::1 hello]", L"[::1 hello]", "", Component(), CanonHostInfo::BROKEN, -1, ""}, - }; - - for (size_t i = 0; i < arraysize(cases); i++) { - // 8-bit version. - Component component(0, static_cast(strlen(cases[i].input8))); - - std::string out_str1; - StdStringCanonOutput output1(&out_str1); - CanonHostInfo host_info; - CanonicalizeIPAddress(cases[i].input8, component, &output1, &host_info); - output1.Complete(); - - EXPECT_EQ(cases[i].expected_family, host_info.family); - EXPECT_EQ(std::string(cases[i].expected_address_hex), - BytesToHexString(host_info.address, host_info.AddressLength())) << "iter " << i << " host " << cases[i].input8; - if (host_info.family == CanonHostInfo::IPV6) { - EXPECT_STREQ(cases[i].expected, out_str1.c_str()); - EXPECT_EQ(cases[i].expected_component.begin, - host_info.out_host.begin); - EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len); - } - - // 16-bit version. - base::string16 input16( - test_utils::TruncateWStringToUTF16(cases[i].input16)); - component = Component(0, static_cast(input16.length())); - - std::string out_str2; - StdStringCanonOutput output2(&out_str2); - CanonicalizeIPAddress(input16.c_str(), component, &output2, &host_info); - output2.Complete(); - - EXPECT_EQ(cases[i].expected_family, host_info.family); - EXPECT_EQ(std::string(cases[i].expected_address_hex), - BytesToHexString(host_info.address, host_info.AddressLength())); - if (host_info.family == CanonHostInfo::IPV6) { - EXPECT_STREQ(cases[i].expected, out_str2.c_str()); - EXPECT_EQ(cases[i].expected_component.begin, host_info.out_host.begin); - EXPECT_EQ(cases[i].expected_component.len, host_info.out_host.len); - } - } -} - -TEST(URLCanonTest, IPEmpty) { - std::string out_str1; - StdStringCanonOutput output1(&out_str1); - CanonHostInfo host_info; - - // This tests tests. - const char spec[] = "192.168.0.1"; - CanonicalizeIPAddress(spec, Component(), &output1, &host_info); - EXPECT_FALSE(host_info.IsIPAddress()); - - CanonicalizeIPAddress(spec, Component(0, 0), &output1, &host_info); - EXPECT_FALSE(host_info.IsIPAddress()); -} - -// Verifies that CanonicalizeHostSubstring produces the expected output and -// does not "fix" IP addresses. Because this code is a subset of -// CanonicalizeHost, the shared functionality is not tested. -TEST(URLCanonTest, CanonicalizeHostSubstring) { - // Basic sanity check. - { - std::string out_str; - StdStringCanonOutput output(&out_str); - EXPECT_TRUE(CanonicalizeHostSubstring("M\xc3\x9cNCHEN.com", - Component(0, 12), &output)); - output.Complete(); - EXPECT_EQ("xn--mnchen-3ya.com", out_str); - } - - // Failure case. - { - std::string out_str; - StdStringCanonOutput output(&out_str); - EXPECT_FALSE(CanonicalizeHostSubstring( - test_utils::TruncateWStringToUTF16(L"\xfdd0zyx.com").c_str(), - Component(0, 8), &output)); - output.Complete(); - EXPECT_EQ("%EF%BF%BDzyx.com", out_str); - } - - // Should return true for empty input strings. - { - std::string out_str; - StdStringCanonOutput output(&out_str); - EXPECT_TRUE(CanonicalizeHostSubstring("", Component(0, 0), &output)); - output.Complete(); - EXPECT_EQ(std::string(), out_str); - } - - // Numbers that look like IP addresses should not be changed. - { - std::string out_str; - StdStringCanonOutput output(&out_str); - EXPECT_TRUE( - CanonicalizeHostSubstring("01.02.03.04", Component(0, 11), &output)); - output.Complete(); - EXPECT_EQ("01.02.03.04", out_str); - } -} - -TEST(URLCanonTest, UserInfo) { - // Note that the canonicalizer should escape and treat empty components as - // not being there. - - // We actually parse a full input URL so we can get the initial components. - struct UserComponentCase { - const char* input; - const char* expected; - Component expected_username; - Component expected_password; - bool expected_success; - } user_info_cases[] = { - {"http://user:pass@host.com/", "user:pass@", Component(0, 4), Component(5, 4), true}, - {"http://@host.com/", "", Component(0, -1), Component(0, -1), true}, - {"http://:@host.com/", "", Component(0, -1), Component(0, -1), true}, - {"http://foo:@host.com/", "foo@", Component(0, 3), Component(0, -1), true}, - {"http://:foo@host.com/", ":foo@", Component(0, 0), Component(1, 3), true}, - {"http://^ :$\t@host.com/", "%5E%20:$%09@", Component(0, 6), Component(7, 4), true}, - {"http://user:pass@/", "user:pass@", Component(0, 4), Component(5, 4), true}, - {"http://%2540:bar@domain.com/", "%2540:bar@", Component(0, 5), Component(6, 3), true }, - - // IE7 compatibility: old versions allowed backslashes in usernames, but - // IE7 does not. We disallow it as well. - {"ftp://me\\mydomain:pass@foo.com/", "", Component(0, -1), Component(0, -1), true}, - }; - - for (size_t i = 0; i < arraysize(user_info_cases); i++) { - int url_len = static_cast(strlen(user_info_cases[i].input)); - Parsed parsed; - ParseStandardURL(user_info_cases[i].input, url_len, &parsed); - Component out_user, out_pass; - std::string out_str; - StdStringCanonOutput output1(&out_str); - - bool success = CanonicalizeUserInfo(user_info_cases[i].input, - parsed.username, - user_info_cases[i].input, - parsed.password, - &output1, - &out_user, - &out_pass); - output1.Complete(); - - EXPECT_EQ(user_info_cases[i].expected_success, success); - EXPECT_EQ(std::string(user_info_cases[i].expected), out_str); - EXPECT_EQ(user_info_cases[i].expected_username.begin, out_user.begin); - EXPECT_EQ(user_info_cases[i].expected_username.len, out_user.len); - EXPECT_EQ(user_info_cases[i].expected_password.begin, out_pass.begin); - EXPECT_EQ(user_info_cases[i].expected_password.len, out_pass.len); - - // Now try the wide version - out_str.clear(); - StdStringCanonOutput output2(&out_str); - base::string16 wide_input(base::UTF8ToUTF16(user_info_cases[i].input)); - success = CanonicalizeUserInfo(wide_input.c_str(), - parsed.username, - wide_input.c_str(), - parsed.password, - &output2, - &out_user, - &out_pass); - output2.Complete(); - - EXPECT_EQ(user_info_cases[i].expected_success, success); - EXPECT_EQ(std::string(user_info_cases[i].expected), out_str); - EXPECT_EQ(user_info_cases[i].expected_username.begin, out_user.begin); - EXPECT_EQ(user_info_cases[i].expected_username.len, out_user.len); - EXPECT_EQ(user_info_cases[i].expected_password.begin, out_pass.begin); - EXPECT_EQ(user_info_cases[i].expected_password.len, out_pass.len); - } -} - -TEST(URLCanonTest, Port) { - // We only need to test that the number gets properly put into the output - // buffer. The parser unit tests will test scanning the number correctly. - // - // Note that the CanonicalizePort will always prepend a colon to the output - // to separate it from the colon that it assumes precedes it. - struct PortCase { - const char* input; - int default_port; - const char* expected; - Component expected_component; - bool expected_success; - } port_cases[] = { - // Invalid input should be copied w/ failure. - {"as df", 80, ":as%20df", Component(1, 7), false}, - {"-2", 80, ":-2", Component(1, 2), false}, - // Default port should be omitted. - {"80", 80, "", Component(0, -1), true}, - {"8080", 80, ":8080", Component(1, 4), true}, - // PORT_UNSPECIFIED should mean always keep the port. - {"80", PORT_UNSPECIFIED, ":80", Component(1, 2), true}, - }; - - for (size_t i = 0; i < arraysize(port_cases); i++) { - int url_len = static_cast(strlen(port_cases[i].input)); - Component in_comp(0, url_len); - Component out_comp; - std::string out_str; - StdStringCanonOutput output1(&out_str); - bool success = CanonicalizePort(port_cases[i].input, - in_comp, - port_cases[i].default_port, - &output1, - &out_comp); - output1.Complete(); - - EXPECT_EQ(port_cases[i].expected_success, success); - EXPECT_EQ(std::string(port_cases[i].expected), out_str); - EXPECT_EQ(port_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(port_cases[i].expected_component.len, out_comp.len); - - // Now try the wide version - out_str.clear(); - StdStringCanonOutput output2(&out_str); - base::string16 wide_input(base::UTF8ToUTF16(port_cases[i].input)); - success = CanonicalizePort(wide_input.c_str(), - in_comp, - port_cases[i].default_port, - &output2, - &out_comp); - output2.Complete(); - - EXPECT_EQ(port_cases[i].expected_success, success); - EXPECT_EQ(std::string(port_cases[i].expected), out_str); - EXPECT_EQ(port_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(port_cases[i].expected_component.len, out_comp.len); - } -} - -TEST(URLCanonTest, Path) { - DualComponentCase path_cases[] = { - // ----- path collapsing tests ----- - {"/././foo", L"/././foo", "/foo", Component(0, 4), true}, - {"/./.foo", L"/./.foo", "/.foo", Component(0, 5), true}, - {"/foo/.", L"/foo/.", "/foo/", Component(0, 5), true}, - {"/foo/./", L"/foo/./", "/foo/", Component(0, 5), true}, - // double dots followed by a slash or the end of the string count - {"/foo/bar/..", L"/foo/bar/..", "/foo/", Component(0, 5), true}, - {"/foo/bar/../", L"/foo/bar/../", "/foo/", Component(0, 5), true}, - // don't count double dots when they aren't followed by a slash - {"/foo/..bar", L"/foo/..bar", "/foo/..bar", Component(0, 10), true}, - // some in the middle - {"/foo/bar/../ton", L"/foo/bar/../ton", "/foo/ton", Component(0, 8), true}, - {"/foo/bar/../ton/../../a", L"/foo/bar/../ton/../../a", "/a", Component(0, 2), true}, - // we should not be able to go above the root - {"/foo/../../..", L"/foo/../../..", "/", Component(0, 1), true}, - {"/foo/../../../ton", L"/foo/../../../ton", "/ton", Component(0, 4), true}, - // escaped dots should be unescaped and treated the same as dots - {"/foo/%2e", L"/foo/%2e", "/foo/", Component(0, 5), true}, - {"/foo/%2e%2", L"/foo/%2e%2", "/foo/.%2", Component(0, 8), true}, - {"/foo/%2e./%2e%2e/.%2e/%2e.bar", L"/foo/%2e./%2e%2e/.%2e/%2e.bar", "/..bar", Component(0, 6), true}, - // Multiple slashes in a row should be preserved and treated like empty - // directory names. - {"////../..", L"////../..", "//", Component(0, 2), true}, - - // ----- escaping tests ----- - {"/foo", L"/foo", "/foo", Component(0, 4), true}, - // Valid escape sequence - {"/%20foo", L"/%20foo", "/%20foo", Component(0, 7), true}, - // Invalid escape sequence we should pass through unchanged. - {"/foo%", L"/foo%", "/foo%", Component(0, 5), true}, - {"/foo%2", L"/foo%2", "/foo%2", Component(0, 6), true}, - // Invalid escape sequence: bad characters should be treated the same as - // the sourrounding text, not as escaped (in this case, UTF-8). - {"/foo%2zbar", L"/foo%2zbar", "/foo%2zbar", Component(0, 10), true}, - {"/foo%2\xc2\xa9zbar", NULL, "/foo%2%C2%A9zbar", Component(0, 16), true}, - {NULL, L"/foo%2\xc2\xa9zbar", "/foo%2%C3%82%C2%A9zbar", Component(0, 22), true}, - // Regular characters that are escaped should be unescaped - {"/foo%41%7a", L"/foo%41%7a", "/fooAz", Component(0, 6), true}, - // Funny characters that are unescaped should be escaped - {"/foo\x09\x91%91", NULL, "/foo%09%91%91", Component(0, 13), true}, - {NULL, L"/foo\x09\x91%91", "/foo%09%C2%91%91", Component(0, 16), true}, - // Invalid characters that are escaped should cause a failure. - {"/foo%00%51", L"/foo%00%51", "/foo%00Q", Component(0, 8), false}, - // Some characters should be passed through unchanged regardless of esc. - {"/(%28:%3A%29)", L"/(%28:%3A%29)", "/(%28:%3A%29)", Component(0, 13), true}, - // Characters that are properly escaped should not have the case changed - // of hex letters. - {"/%3A%3a%3C%3c", L"/%3A%3a%3C%3c", "/%3A%3a%3C%3c", Component(0, 13), true}, - // Funny characters that are unescaped should be escaped - {"/foo\tbar", L"/foo\tbar", "/foo%09bar", Component(0, 10), true}, - // Backslashes should get converted to forward slashes - {"\\foo\\bar", L"\\foo\\bar", "/foo/bar", Component(0, 8), true}, - // Hashes found in paths (possibly only when the caller explicitly sets - // the path on an already-parsed URL) should be escaped. - {"/foo#bar", L"/foo#bar", "/foo%23bar", Component(0, 10), true}, - // %7f should be allowed and %3D should not be unescaped (these were wrong - // in a previous version). - {"/%7Ffp3%3Eju%3Dduvgw%3Dd", L"/%7Ffp3%3Eju%3Dduvgw%3Dd", "/%7Ffp3%3Eju%3Dduvgw%3Dd", Component(0, 24), true}, - // @ should be passed through unchanged (escaped or unescaped). - {"/@asdf%40", L"/@asdf%40", "/@asdf%40", Component(0, 9), true}, - // Nested escape sequences should result in escaping the leading '%' if - // unescaping would result in a new escape sequence. - {"/%A%42", L"/%A%42", "/%25AB", Component(0, 6), true}, - {"/%%41B", L"/%%41B", "/%25AB", Component(0, 6), true}, - {"/%%41%42", L"/%%41%42", "/%25AB", Component(0, 6), true}, - // Make sure truncated "nested" escapes don't result in reading off the - // string end. - {"/%%41", L"/%%41", "/%A", Component(0, 3), true}, - // Don't unescape the leading '%' if unescaping doesn't result in a valid - // new escape sequence. - {"/%%470", L"/%%470", "/%G0", Component(0, 4), true}, - {"/%%2D%41", L"/%%2D%41", "/%-A", Component(0, 4), true}, - // Don't erroneously downcast a UTF-16 charater in a way that makes it - // look like part of an escape sequence. - {NULL, L"/%%41\x0130", "/%A%C4%B0", Component(0, 9), true}, - - // ----- encoding tests ----- - // Basic conversions - {"/\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xbd\xa0\xe5\xa5\xbd", L"/\x4f60\x597d\x4f60\x597d", "/%E4%BD%A0%E5%A5%BD%E4%BD%A0%E5%A5%BD", Component(0, 37), true}, - // Invalid unicode characters should fail. We only do validation on - // UTF-16 input, so this doesn't happen on 8-bit. - {"/\xef\xb7\x90zyx", NULL, "/%EF%B7%90zyx", Component(0, 13), true}, - {NULL, L"/\xfdd0zyx", "/%EF%BF%BDzyx", Component(0, 13), false}, - }; - - for (size_t i = 0; i < arraysize(path_cases); i++) { - if (path_cases[i].input8) { - int len = static_cast(strlen(path_cases[i].input8)); - Component in_comp(0, len); - Component out_comp; - std::string out_str; - StdStringCanonOutput output(&out_str); - bool success = - CanonicalizePath(path_cases[i].input8, in_comp, &output, &out_comp); - output.Complete(); - - EXPECT_EQ(path_cases[i].expected_success, success); - EXPECT_EQ(path_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(path_cases[i].expected_component.len, out_comp.len); - EXPECT_EQ(path_cases[i].expected, out_str); - } - - if (path_cases[i].input16) { - base::string16 input16( - test_utils::TruncateWStringToUTF16(path_cases[i].input16)); - int len = static_cast(input16.length()); - Component in_comp(0, len); - Component out_comp; - std::string out_str; - StdStringCanonOutput output(&out_str); - - bool success = - CanonicalizePath(input16.c_str(), in_comp, &output, &out_comp); - output.Complete(); - - EXPECT_EQ(path_cases[i].expected_success, success); - EXPECT_EQ(path_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(path_cases[i].expected_component.len, out_comp.len); - EXPECT_EQ(path_cases[i].expected, out_str); - } - } - - // Manual test: embedded NULLs should be escaped and the URL should be marked - // as invalid. - const char path_with_null[] = "/ab\0c"; - Component in_comp(0, 5); - Component out_comp; - - std::string out_str; - StdStringCanonOutput output(&out_str); - bool success = CanonicalizePath(path_with_null, in_comp, &output, &out_comp); - output.Complete(); - EXPECT_FALSE(success); - EXPECT_EQ("/ab%00c", out_str); -} - -TEST(URLCanonTest, Query) { - struct QueryCase { - const char* input8; - const wchar_t* input16; - const char* expected; - } query_cases[] = { - // Regular ASCII case. - {"foo=bar", L"foo=bar", "?foo=bar"}, - // Allow question marks in the query without escaping - {"as?df", L"as?df", "?as?df"}, - // Always escape '#' since it would mark the ref. - {"as#df", L"as#df", "?as%23df"}, - // Escape some questionable 8-bit characters, but never unescape. - {"\x02hello\x7f bye", L"\x02hello\x7f bye", "?%02hello%7F%20bye"}, - {"%40%41123", L"%40%41123", "?%40%41123"}, - // Chinese input/output - {"q=\xe4\xbd\xa0\xe5\xa5\xbd", L"q=\x4f60\x597d", "?q=%E4%BD%A0%E5%A5%BD"}, - // Invalid UTF-8/16 input should be replaced with invalid characters. - {"q=\xed\xed", L"q=\xd800\xd800", "?q=%EF%BF%BD%EF%BF%BD"}, - // Don't allow < or > because sometimes they are used for XSS if the - // URL is echoed in content. Firefox does this, IE doesn't. - {"q=", L"q=", "?q=%3Casdf%3E"}, - // Escape double quotemarks in the query. - {"q=\"asdf\"", L"q=\"asdf\"", "?q=%22asdf%22"}, - }; - - for (size_t i = 0; i < arraysize(query_cases); i++) { - Component out_comp; - - if (query_cases[i].input8) { - int len = static_cast(strlen(query_cases[i].input8)); - Component in_comp(0, len); - std::string out_str; - - StdStringCanonOutput output(&out_str); - CanonicalizeQuery(query_cases[i].input8, in_comp, NULL, &output, - &out_comp); - output.Complete(); - - EXPECT_EQ(query_cases[i].expected, out_str); - } - - if (query_cases[i].input16) { - base::string16 input16( - test_utils::TruncateWStringToUTF16(query_cases[i].input16)); - int len = static_cast(input16.length()); - Component in_comp(0, len); - std::string out_str; - - StdStringCanonOutput output(&out_str); - CanonicalizeQuery(input16.c_str(), in_comp, NULL, &output, &out_comp); - output.Complete(); - - EXPECT_EQ(query_cases[i].expected, out_str); - } - } - - // Extra test for input with embedded NULL; - std::string out_str; - StdStringCanonOutput output(&out_str); - Component out_comp; - CanonicalizeQuery("a \x00z\x01", Component(0, 5), NULL, &output, &out_comp); - output.Complete(); - EXPECT_EQ("?a%20%00z%01", out_str); -} - -TEST(URLCanonTest, Ref) { - // Refs are trivial, it just checks the encoding. - DualComponentCase ref_cases[] = { - {"hello!", L"hello!", "#hello!", Component(1, 6), true}, - // We should escape spaces, double-quotes, angled braces, and backtics. - {"hello, world", L"hello, world", "#hello,%20world", Component(1, 14), - true}, - {"hello,\"world", L"hello,\"world", "#hello,%22world", Component(1, 14), - true}, - {"hello,world", L"hello,>world", "#hello,%3Eworld", Component(1, 14), - true}, - {"hello,`world", L"hello,`world", "#hello,%60world", Component(1, 14), - true}, - // UTF-8/wide input should be preserved - {"\xc2\xa9", L"\xa9", "#%C2%A9", Component(1, 6), true}, - // Test a characer that takes > 16 bits (U+10300 = old italic letter A) - {"\xF0\x90\x8C\x80ss", L"\xd800\xdf00ss", "#%F0%90%8C%80ss", - Component(1, 14), true}, - // Escaping should be preserved unchanged, even invalid ones - {"%41%a", L"%41%a", "#%41%a", Component(1, 5), true}, - // Invalid UTF-8/16 input should be flagged and the input made valid - {"\xc2", NULL, "#%EF%BF%BD", Component(1, 9), true}, - {NULL, L"\xd800\x597d", "#%EF%BF%BD%E5%A5%BD", Component(1, 18), true}, - // Test a Unicode invalid character. - {"a\xef\xb7\x90", L"a\xfdd0", "#a%EF%BF%BD", Component(1, 10), true}, - // Refs can have # signs and we should preserve them. - {"asdf#qwer", L"asdf#qwer", "#asdf#qwer", Component(1, 9), true}, - {"#asdf", L"#asdf", "##asdf", Component(1, 5), true}, - }; - - for (size_t i = 0; i < arraysize(ref_cases); i++) { - // 8-bit input - if (ref_cases[i].input8) { - int len = static_cast(strlen(ref_cases[i].input8)); - Component in_comp(0, len); - Component out_comp; - - std::string out_str; - StdStringCanonOutput output(&out_str); - CanonicalizeRef(ref_cases[i].input8, in_comp, &output, &out_comp); - output.Complete(); - - EXPECT_EQ(ref_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(ref_cases[i].expected_component.len, out_comp.len); - EXPECT_EQ(ref_cases[i].expected, out_str); - } - - // 16-bit input - if (ref_cases[i].input16) { - base::string16 input16( - test_utils::TruncateWStringToUTF16(ref_cases[i].input16)); - int len = static_cast(input16.length()); - Component in_comp(0, len); - Component out_comp; - - std::string out_str; - StdStringCanonOutput output(&out_str); - CanonicalizeRef(input16.c_str(), in_comp, &output, &out_comp); - output.Complete(); - - EXPECT_EQ(ref_cases[i].expected_component.begin, out_comp.begin); - EXPECT_EQ(ref_cases[i].expected_component.len, out_comp.len); - EXPECT_EQ(ref_cases[i].expected, out_str); - } - } - - // Try one with an embedded NULL. It should be stripped. - const char null_input[5] = "ab\x00z"; - Component null_input_component(0, 4); - Component out_comp; - - std::string out_str; - StdStringCanonOutput output(&out_str); - CanonicalizeRef(null_input, null_input_component, &output, &out_comp); - output.Complete(); - - EXPECT_EQ(1, out_comp.begin); - EXPECT_EQ(3, out_comp.len); - EXPECT_EQ("#abz", out_str); -} - -TEST(URLCanonTest, CanonicalizeStandardURL) { - // The individual component canonicalize tests should have caught the cases - // for each of those components. Here, we just need to test that the various - // parts are included or excluded properly, and have the correct separators. - struct URLCase { - const char* input; - const char* expected; - bool expected_success; - } cases[] = { - {"http://www.google.com/foo?bar=baz#", - "http://www.google.com/foo?bar=baz#", true}, - {"http://[www.google.com]/", "http://[www.google.com]/", false}, - {"ht\ttp:@www.google.com:80/;p?#", "ht%09tp://www.google.com:80/;p?#", - false}, - {"http:////////user:@google.com:99?foo", "http://user@google.com:99/?foo", - true}, - {"www.google.com", ":www.google.com/", false}, - {"http://192.0x00A80001", "http://192.168.0.1/", true}, - {"http://www/foo%2Ehtml", "http://www/foo.html", true}, - {"http://user:pass@/", "http://user:pass@/", false}, - {"http://%25DOMAIN:foobar@foodomain.com/", - "http://%25DOMAIN:foobar@foodomain.com/", true}, - - // Backslashes should get converted to forward slashes. - {"http:\\\\www.google.com\\foo", "http://www.google.com/foo", true}, - - // Busted refs shouldn't make the whole thing fail. - {"http://www.google.com/asdf#\xc2", - "http://www.google.com/asdf#%EF%BF%BD", true}, - - // Basic port tests. - {"http://foo:80/", "http://foo/", true}, - {"http://foo:81/", "http://foo:81/", true}, - {"httpa://foo:80/", "httpa://foo:80/", true}, - {"http://foo:-80/", "http://foo:-80/", false}, - - {"https://foo:443/", "https://foo/", true}, - {"https://foo:80/", "https://foo:80/", true}, - {"ftp://foo:21/", "ftp://foo/", true}, - {"ftp://foo:80/", "ftp://foo:80/", true}, - {"gopher://foo:70/", "gopher://foo/", true}, - {"gopher://foo:443/", "gopher://foo:443/", true}, - {"ws://foo:80/", "ws://foo/", true}, - {"ws://foo:81/", "ws://foo:81/", true}, - {"ws://foo:443/", "ws://foo:443/", true}, - {"ws://foo:815/", "ws://foo:815/", true}, - {"wss://foo:80/", "wss://foo:80/", true}, - {"wss://foo:81/", "wss://foo:81/", true}, - {"wss://foo:443/", "wss://foo/", true}, - {"wss://foo:815/", "wss://foo:815/", true}, - - // This particular code path ends up "backing up" to replace an invalid - // host ICU generated with an escaped version. Test that in the context - // of a full URL to make sure the backing up doesn't mess up the non-host - // parts of the URL. "EF B9 AA" is U+FE6A which is a type of percent that - // ICU will convert to an ASCII one, generating "%81". - {"ws:)W\x1eW\xef\xb9\xaa" - "81:80/", - "ws://%29w%1ew%81/", false}, - }; - - for (size_t i = 0; i < arraysize(cases); i++) { - int url_len = static_cast(strlen(cases[i].input)); - Parsed parsed; - ParseStandardURL(cases[i].input, url_len, &parsed); - - Parsed out_parsed; - std::string out_str; - StdStringCanonOutput output(&out_str); - bool success = CanonicalizeStandardURL( - cases[i].input, url_len, parsed, - SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL, &output, &out_parsed); - output.Complete(); - - EXPECT_EQ(cases[i].expected_success, success); - EXPECT_EQ(cases[i].expected, out_str); - } -} - -// The codepath here is the same as for regular canonicalization, so we just -// need to test that things are replaced or not correctly. -TEST(URLCanonTest, ReplaceStandardURL) { - ReplaceCase replace_cases[] = { - // Common case of truncating the path. - {"http://www.google.com/foo?bar=baz#ref", NULL, NULL, NULL, NULL, NULL, "/", kDeleteComp, kDeleteComp, "http://www.google.com/"}, - // Replace everything - {"http://a:b@google.com:22/foo;bar?baz@cat", "https", "me", "pw", "host.com", "99", "/path", "query", "ref", "https://me:pw@host.com:99/path?query#ref"}, - // Replace nothing - {"http://a:b@google.com:22/foo?baz@cat", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "http://a:b@google.com:22/foo?baz@cat"}, - // Replace scheme with filesystem. The result is garbage, but you asked - // for it. - {"http://a:b@google.com:22/foo?baz@cat", "filesystem", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "filesystem://a:b@google.com:22/foo?baz@cat"}, - }; - - for (size_t i = 0; i < arraysize(replace_cases); i++) { - const ReplaceCase& cur = replace_cases[i]; - int base_len = static_cast(strlen(cur.base)); - Parsed parsed; - ParseStandardURL(cur.base, base_len, &parsed); - - Replacements r; - typedef Replacements R; // Clean up syntax. - - // Note that for the scheme we pass in a different clear function since - // there is no function to clear the scheme. - SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme); - SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username); - SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password); - SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host); - SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port); - SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path); - SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query); - SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref); - - std::string out_str; - StdStringCanonOutput output(&out_str); - Parsed out_parsed; - ReplaceStandardURL(replace_cases[i].base, parsed, r, - SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL, - &output, &out_parsed); - output.Complete(); - - EXPECT_EQ(replace_cases[i].expected, out_str); - } - - // The path pointer should be ignored if the address is invalid. - { - const char src[] = "http://www.google.com/here_is_the_path"; - int src_len = static_cast(strlen(src)); - - Parsed parsed; - ParseStandardURL(src, src_len, &parsed); - - // Replace the path to 0 length string. By using 1 as the string address, - // the test should get an access violation if it tries to dereference it. - Replacements r; - r.SetPath(reinterpret_cast(0x00000001), Component(0, 0)); - std::string out_str1; - StdStringCanonOutput output1(&out_str1); - Parsed new_parsed; - ReplaceStandardURL(src, parsed, r, - SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL, - &output1, &new_parsed); - output1.Complete(); - EXPECT_STREQ("http://www.google.com/", out_str1.c_str()); - - // Same with an "invalid" path. - r.SetPath(reinterpret_cast(0x00000001), Component()); - std::string out_str2; - StdStringCanonOutput output2(&out_str2); - ReplaceStandardURL(src, parsed, r, - SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, NULL, - &output2, &new_parsed); - output2.Complete(); - EXPECT_STREQ("http://www.google.com/", out_str2.c_str()); - } -} - -TEST(URLCanonTest, ReplaceFileURL) { - ReplaceCase replace_cases[] = { - // Replace everything - {"file:///C:/gaba?query#ref", NULL, NULL, NULL, "filer", NULL, "/foo", "b", "c", "file://filer/foo?b#c"}, - // Replace nothing - {"file:///C:/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///C:/gaba?query#ref"}, - // Clear non-path components (common) - {"file:///C:/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "file:///C:/gaba"}, - // Replace path with something that doesn't begin with a slash and make - // sure it gets added properly. - {"file:///C:/gaba", NULL, NULL, NULL, NULL, NULL, "interesting/", NULL, NULL, "file:///interesting/"}, - {"file:///home/gaba?query#ref", NULL, NULL, NULL, "filer", NULL, "/foo", "b", "c", "file://filer/foo?b#c"}, - {"file:///home/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///home/gaba?query#ref"}, - {"file:///home/gaba?query#ref", NULL, NULL, NULL, NULL, NULL, NULL, kDeleteComp, kDeleteComp, "file:///home/gaba"}, - {"file:///home/gaba", NULL, NULL, NULL, NULL, NULL, "interesting/", NULL, NULL, "file:///interesting/"}, - // Replace scheme -- shouldn't do anything. - {"file:///C:/gaba?query#ref", "http", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "file:///C:/gaba?query#ref"}, - }; - - for (size_t i = 0; i < arraysize(replace_cases); i++) { - const ReplaceCase& cur = replace_cases[i]; - int base_len = static_cast(strlen(cur.base)); - Parsed parsed; - ParseFileURL(cur.base, base_len, &parsed); - - Replacements r; - typedef Replacements R; // Clean up syntax. - SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme); - SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username); - SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password); - SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host); - SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port); - SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path); - SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query); - SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref); - - std::string out_str; - StdStringCanonOutput output(&out_str); - Parsed out_parsed; - ReplaceFileURL(cur.base, parsed, r, NULL, &output, &out_parsed); - output.Complete(); - - EXPECT_EQ(replace_cases[i].expected, out_str); - } -} - -TEST(URLCanonTest, ReplaceFileSystemURL) { - ReplaceCase replace_cases[] = { - // Replace everything in the outer URL. - {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, - NULL, "/foo", "b", "c", "filesystem:file:///temporary/foo?b#c"}, - // Replace nothing - {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, "filesystem:file:///temporary/gaba?query#ref"}, - // Clear non-path components (common) - {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, - NULL, NULL, kDeleteComp, kDeleteComp, - "filesystem:file:///temporary/gaba"}, - // Replace path with something that doesn't begin with a slash and make - // sure it gets added properly. - {"filesystem:file:///temporary/gaba?query#ref", NULL, NULL, NULL, NULL, - NULL, "interesting/", NULL, NULL, - "filesystem:file:///temporary/interesting/?query#ref"}, - // Replace scheme -- shouldn't do anything except canonicalize. - {"filesystem:http://u:p@bar.com/t/gaba?query#ref", "http", NULL, NULL, - NULL, NULL, NULL, NULL, NULL, - "filesystem:http://bar.com/t/gaba?query#ref"}, - // Replace username -- shouldn't do anything except canonicalize. - {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, "u2", NULL, NULL, - NULL, NULL, NULL, NULL, "filesystem:http://bar.com/t/gaba?query#ref"}, - // Replace password -- shouldn't do anything except canonicalize. - {"filesystem:http://u:p@bar.com/t/gaba?query#ref", NULL, NULL, "pw2", - NULL, NULL, NULL, NULL, NULL, - "filesystem:http://bar.com/t/gaba?query#ref"}, - // Replace host -- shouldn't do anything except canonicalize. - {"filesystem:http://u:p@bar.com:80/t/gaba?query#ref", NULL, NULL, NULL, - "foo.com", NULL, NULL, NULL, NULL, - "filesystem:http://bar.com/t/gaba?query#ref"}, - // Replace port -- shouldn't do anything except canonicalize. - {"filesystem:http://u:p@bar.com:40/t/gaba?query#ref", NULL, NULL, NULL, - NULL, "41", NULL, NULL, NULL, - "filesystem:http://bar.com:40/t/gaba?query#ref"}, - }; - - for (size_t i = 0; i < arraysize(replace_cases); i++) { - const ReplaceCase& cur = replace_cases[i]; - int base_len = static_cast(strlen(cur.base)); - Parsed parsed; - ParseFileSystemURL(cur.base, base_len, &parsed); - - Replacements r; - typedef Replacements R; // Clean up syntax. - SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme); - SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username); - SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password); - SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host); - SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port); - SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path); - SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query); - SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref); - - std::string out_str; - StdStringCanonOutput output(&out_str); - Parsed out_parsed; - ReplaceFileSystemURL(cur.base, parsed, r, NULL, &output, &out_parsed); - output.Complete(); - - EXPECT_EQ(replace_cases[i].expected, out_str); - } -} - -TEST(URLCanonTest, ReplacePathURL) { - ReplaceCase replace_cases[] = { - // Replace everything - {"data:foo", "javascript", NULL, NULL, NULL, NULL, "alert('foo?');", NULL, NULL, "javascript:alert('foo?');"}, - // Replace nothing - {"data:foo", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "data:foo"}, - // Replace one or the other - {"data:foo", "javascript", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "javascript:foo"}, - {"data:foo", NULL, NULL, NULL, NULL, NULL, "bar", NULL, NULL, "data:bar"}, - {"data:foo", NULL, NULL, NULL, NULL, NULL, kDeleteComp, NULL, NULL, "data:"}, - }; - - for (size_t i = 0; i < arraysize(replace_cases); i++) { - const ReplaceCase& cur = replace_cases[i]; - int base_len = static_cast(strlen(cur.base)); - Parsed parsed; - ParsePathURL(cur.base, base_len, false, &parsed); - - Replacements r; - typedef Replacements R; // Clean up syntax. - SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme); - SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username); - SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password); - SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host); - SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port); - SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path); - SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query); - SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref); - - std::string out_str; - StdStringCanonOutput output(&out_str); - Parsed out_parsed; - ReplacePathURL(cur.base, parsed, r, &output, &out_parsed); - output.Complete(); - - EXPECT_EQ(replace_cases[i].expected, out_str); - } -} - -TEST(URLCanonTest, ReplaceMailtoURL) { - ReplaceCase replace_cases[] = { - // Replace everything - {"mailto:jon@foo.com?body=sup", "mailto", NULL, NULL, NULL, NULL, "addr1", "to=tony", NULL, "mailto:addr1?to=tony"}, - // Replace nothing - {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "mailto:jon@foo.com?body=sup"}, - // Replace the path - {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "jason", NULL, NULL, "mailto:jason?body=sup"}, - // Replace the query - {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "custom=1", NULL, "mailto:jon@foo.com?custom=1"}, - // Replace the path and query - {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "jason", "custom=1", NULL, "mailto:jason?custom=1"}, - // Set the query to empty (should leave trailing question mark) - {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "", NULL, "mailto:jon@foo.com?"}, - // Clear the query - {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, NULL, "|", NULL, "mailto:jon@foo.com"}, - // Clear the path - {"mailto:jon@foo.com?body=sup", NULL, NULL, NULL, NULL, NULL, "|", NULL, NULL, "mailto:?body=sup"}, - // Clear the path + query - {"mailto:", NULL, NULL, NULL, NULL, NULL, "|", "|", NULL, "mailto:"}, - // Setting the ref should have no effect - {"mailto:addr1", NULL, NULL, NULL, NULL, NULL, NULL, NULL, "BLAH", "mailto:addr1"}, - }; - - for (size_t i = 0; i < arraysize(replace_cases); i++) { - const ReplaceCase& cur = replace_cases[i]; - int base_len = static_cast(strlen(cur.base)); - Parsed parsed; - ParseMailtoURL(cur.base, base_len, &parsed); - - Replacements r; - typedef Replacements R; - SetupReplComp(&R::SetScheme, &R::ClearRef, &r, cur.scheme); - SetupReplComp(&R::SetUsername, &R::ClearUsername, &r, cur.username); - SetupReplComp(&R::SetPassword, &R::ClearPassword, &r, cur.password); - SetupReplComp(&R::SetHost, &R::ClearHost, &r, cur.host); - SetupReplComp(&R::SetPort, &R::ClearPort, &r, cur.port); - SetupReplComp(&R::SetPath, &R::ClearPath, &r, cur.path); - SetupReplComp(&R::SetQuery, &R::ClearQuery, &r, cur.query); - SetupReplComp(&R::SetRef, &R::ClearRef, &r, cur.ref); - - std::string out_str; - StdStringCanonOutput output(&out_str); - Parsed out_parsed; - ReplaceMailtoURL(cur.base, parsed, r, &output, &out_parsed); - output.Complete(); - - EXPECT_EQ(replace_cases[i].expected, out_str); - } -} - -TEST(URLCanonTest, CanonicalizeFileURL) { - struct URLCase { - const char* input; - const char* expected; - bool expected_success; - Component expected_host; - Component expected_path; - } cases[] = { -#ifdef _WIN32 - // Windows-style paths - {"file:c:\\foo\\bar.html", "file:///C:/foo/bar.html", true, Component(), - Component(7, 16)}, - {" File:c|////foo\\bar.html", "file:///C:////foo/bar.html", true, - Component(), Component(7, 19)}, - {"file:", "file:///", true, Component(), Component(7, 1)}, - {"file:UNChost/path", "file://unchost/path", true, Component(7, 7), - Component(14, 5)}, - // CanonicalizeFileURL supports absolute Windows style paths for IE - // compatibility. Note that the caller must decide that this is a file - // URL itself so it can call the file canonicalizer. This is usually - // done automatically as part of relative URL resolving. - {"c:\\foo\\bar", "file:///C:/foo/bar", true, Component(), - Component(7, 11)}, - {"C|/foo/bar", "file:///C:/foo/bar", true, Component(), Component(7, 11)}, - {"/C|\\foo\\bar", "file:///C:/foo/bar", true, Component(), - Component(7, 11)}, - {"//C|/foo/bar", "file:///C:/foo/bar", true, Component(), - Component(7, 11)}, - {"//server/file", "file://server/file", true, Component(7, 6), - Component(13, 5)}, - {"\\\\server\\file", "file://server/file", true, Component(7, 6), - Component(13, 5)}, - {"/\\server/file", "file://server/file", true, Component(7, 6), - Component(13, 5)}, - // We should preserve the number of slashes after the colon for IE - // compatibility, except when there is none, in which case we should - // add one. - {"file:c:foo/bar.html", "file:///C:/foo/bar.html", true, Component(), - Component(7, 16)}, - {"file:/\\/\\C:\\\\//foo\\bar.html", "file:///C:////foo/bar.html", true, - Component(), Component(7, 19)}, - // Three slashes should be non-UNC, even if there is no drive spec (IE - // does this, which makes the resulting request invalid). - {"file:///foo/bar.txt", "file:///foo/bar.txt", true, Component(), - Component(7, 12)}, - // TODO(brettw) we should probably fail for invalid host names, which - // would change the expected result on this test. We also currently allow - // colon even though it's probably invalid, because its currently the - // "natural" result of the way the canonicalizer is written. There doesn't - // seem to be a strong argument for why allowing it here would be bad, so - // we just tolerate it and the load will fail later. - {"FILE:/\\/\\7:\\\\//foo\\bar.html", "file://7:////foo/bar.html", false, - Component(7, 2), Component(9, 16)}, - {"file:filer/home\\me", "file://filer/home/me", true, Component(7, 5), - Component(12, 8)}, - // Make sure relative paths can't go above the "C:" - {"file:///C:/foo/../../../bar.html", "file:///C:/bar.html", true, - Component(), Component(7, 12)}, - // Busted refs shouldn't make the whole thing fail. - {"file:///C:/asdf#\xc2", "file:///C:/asdf#%EF%BF%BD", true, Component(), - Component(7, 8)}, -#else - // Unix-style paths - {"file:///home/me", "file:///home/me", true, Component(), Component(7, 8)}, - // Windowsy ones should get still treated as Unix-style. - {"file:c:\\foo\\bar.html", "file:///c:/foo/bar.html", true, Component(), Component(7, 16)}, - {"file:c|//foo\\bar.html", "file:///c%7C//foo/bar.html", true, Component(), Component(7, 19)}, - // file: tests from WebKit (LayoutTests/fast/loader/url-parse-1.html) - {"//", "file:///", true, Component(), Component(7, 1)}, - {"///", "file:///", true, Component(), Component(7, 1)}, - {"///test", "file:///test", true, Component(), Component(7, 5)}, - {"file://test", "file://test/", true, Component(7, 4), Component(11, 1)}, - {"file://localhost", "file://localhost/", true, Component(7, 9), Component(16, 1)}, - {"file://localhost/", "file://localhost/", true, Component(7, 9), Component(16, 1)}, - {"file://localhost/test", "file://localhost/test", true, Component(7, 9), Component(16, 5)}, -#endif // _WIN32 - }; - - for (size_t i = 0; i < arraysize(cases); i++) { - int url_len = static_cast(strlen(cases[i].input)); - Parsed parsed; - ParseFileURL(cases[i].input, url_len, &parsed); - - Parsed out_parsed; - std::string out_str; - StdStringCanonOutput output(&out_str); - bool success = CanonicalizeFileURL(cases[i].input, url_len, parsed, NULL, - &output, &out_parsed); - output.Complete(); - - EXPECT_EQ(cases[i].expected_success, success); - EXPECT_EQ(cases[i].expected, out_str); - - // Make sure the spec was properly identified, the file canonicalizer has - // different code for writing the spec. - EXPECT_EQ(0, out_parsed.scheme.begin); - EXPECT_EQ(4, out_parsed.scheme.len); - - EXPECT_EQ(cases[i].expected_host.begin, out_parsed.host.begin); - EXPECT_EQ(cases[i].expected_host.len, out_parsed.host.len); - - EXPECT_EQ(cases[i].expected_path.begin, out_parsed.path.begin); - EXPECT_EQ(cases[i].expected_path.len, out_parsed.path.len); - } -} - -TEST(URLCanonTest, CanonicalizeFileSystemURL) { - struct URLCase { - const char* input; - const char* expected; - bool expected_success; - } cases[] = { - {"Filesystem:htTp://www.Foo.com:80/tempoRary", "filesystem:http://www.foo.com/tempoRary/", true}, - {"filesystem:httpS://www.foo.com/temporary/", "filesystem:https://www.foo.com/temporary/", true}, - {"filesystem:http://www.foo.com//", "filesystem:http://www.foo.com//", false}, - {"filesystem:http://www.foo.com/persistent/bob?query#ref", "filesystem:http://www.foo.com/persistent/bob?query#ref", true}, - {"filesystem:fIle://\\temporary/", "filesystem:file:///temporary/", true}, - {"filesystem:fiLe:///temporary", "filesystem:file:///temporary/", true}, - {"filesystem:File:///temporary/Bob?qUery#reF", "filesystem:file:///temporary/Bob?qUery#reF", true}, - }; - - for (size_t i = 0; i < arraysize(cases); i++) { - int url_len = static_cast(strlen(cases[i].input)); - Parsed parsed; - ParseFileSystemURL(cases[i].input, url_len, &parsed); - - Parsed out_parsed; - std::string out_str; - StdStringCanonOutput output(&out_str); - bool success = CanonicalizeFileSystemURL(cases[i].input, url_len, parsed, - NULL, &output, &out_parsed); - output.Complete(); - - EXPECT_EQ(cases[i].expected_success, success); - EXPECT_EQ(cases[i].expected, out_str); - - // Make sure the spec was properly identified, the filesystem canonicalizer - // has different code for writing the spec. - EXPECT_EQ(0, out_parsed.scheme.begin); - EXPECT_EQ(10, out_parsed.scheme.len); - if (success) - EXPECT_GT(out_parsed.path.len, 0); - } -} - -TEST(URLCanonTest, CanonicalizePathURL) { - // Path URLs should get canonicalized schemes but nothing else. - struct PathCase { - const char* input; - const char* expected; - } path_cases[] = { - {"javascript:", "javascript:"}, - {"JavaScript:Foo", "javascript:Foo"}, - {"Foo:\":This /is interesting;?#", "foo:\":This /is interesting;?#"}, - }; - - for (size_t i = 0; i < arraysize(path_cases); i++) { - int url_len = static_cast(strlen(path_cases[i].input)); - Parsed parsed; - ParsePathURL(path_cases[i].input, url_len, true, &parsed); - - Parsed out_parsed; - std::string out_str; - StdStringCanonOutput output(&out_str); - bool success = CanonicalizePathURL(path_cases[i].input, url_len, parsed, - &output, &out_parsed); - output.Complete(); - - EXPECT_TRUE(success); - EXPECT_EQ(path_cases[i].expected, out_str); - - EXPECT_EQ(0, out_parsed.host.begin); - EXPECT_EQ(-1, out_parsed.host.len); - - // When we end with a colon at the end, there should be no path. - if (path_cases[i].input[url_len - 1] == ':') { - EXPECT_EQ(0, out_parsed.GetContent().begin); - EXPECT_EQ(-1, out_parsed.GetContent().len); - } - } -} - -TEST(URLCanonTest, CanonicalizeMailtoURL) { - struct URLCase { - const char* input; - const char* expected; - bool expected_success; - Component expected_path; - Component expected_query; - } cases[] = { - // Null character should be escaped to %00. - // Keep this test first in the list as it is handled specially below. - {"mailto:addr1\0addr2?foo", - "mailto:addr1%00addr2?foo", - true, Component(7, 13), Component(21, 3)}, - {"mailto:addr1", - "mailto:addr1", - true, Component(7, 5), Component()}, - {"mailto:addr1@foo.com", - "mailto:addr1@foo.com", - true, Component(7, 13), Component()}, - // Trailing whitespace is stripped. - {"MaIlTo:addr1 \t ", - "mailto:addr1", - true, Component(7, 5), Component()}, - {"MaIlTo:addr1?to=jon", - "mailto:addr1?to=jon", - true, Component(7, 5), Component(13,6)}, - {"mailto:addr1,addr2", - "mailto:addr1,addr2", - true, Component(7, 11), Component()}, - // Embedded spaces must be encoded. - {"mailto:addr1, addr2", - "mailto:addr1,%20addr2", - true, Component(7, 14), Component()}, - {"mailto:addr1, addr2?subject=one two ", - "mailto:addr1,%20addr2?subject=one%20two", - true, Component(7, 14), Component(22, 17)}, - {"mailto:addr1%2caddr2", - "mailto:addr1%2caddr2", - true, Component(7, 13), Component()}, - {"mailto:\xF0\x90\x8C\x80", - "mailto:%F0%90%8C%80", - true, Component(7, 12), Component()}, - // Invalid -- UTF-8 encoded surrogate value. - {"mailto:\xed\xa0\x80", - "mailto:%EF%BF%BD%EF%BF%BD%EF%BF%BD", - false, Component(7, 27), Component()}, - {"mailto:addr1?", - "mailto:addr1?", - true, Component(7, 5), Component(13, 0)}, - // Certain characters have special meanings and must be encoded. - {"mailto:! \x22$&()+,-./09:;<=>@AZ[\\]&_`az{|}~\x7f?Query! \x22$&()+,-./09:;<=>@AZ[\\]&_`az{|}~", - "mailto:!%20%22$&()+,-./09:;%3C=%3E@AZ[\\]&_%60az%7B%7C%7D~%7F?Query!%20%22$&()+,-./09:;%3C=%3E@AZ[\\]&_`az{|}~", - true, Component(7, 53), Component(61, 47)}, - }; - - // Define outside of loop to catch bugs where components aren't reset - Parsed parsed; - Parsed out_parsed; - - for (size_t i = 0; i < arraysize(cases); i++) { - int url_len = static_cast(strlen(cases[i].input)); - if (i == 0) { - // The first test case purposely has a '\0' in it -- don't count it - // as the string terminator. - url_len = 22; - } - ParseMailtoURL(cases[i].input, url_len, &parsed); - - std::string out_str; - StdStringCanonOutput output(&out_str); - bool success = CanonicalizeMailtoURL(cases[i].input, url_len, parsed, - &output, &out_parsed); - output.Complete(); - - EXPECT_EQ(cases[i].expected_success, success); - EXPECT_EQ(cases[i].expected, out_str); - - // Make sure the spec was properly identified - EXPECT_EQ(0, out_parsed.scheme.begin); - EXPECT_EQ(6, out_parsed.scheme.len); - - EXPECT_EQ(cases[i].expected_path.begin, out_parsed.path.begin); - EXPECT_EQ(cases[i].expected_path.len, out_parsed.path.len); - - EXPECT_EQ(cases[i].expected_query.begin, out_parsed.query.begin); - EXPECT_EQ(cases[i].expected_query.len, out_parsed.query.len); - } -} - -#ifndef WIN32 - -TEST(URLCanonTest, _itoa_s) { - // We fill the buffer with 0xff to ensure that it's getting properly - // null-terminated. We also allocate one byte more than what we tell - // _itoa_s about, and ensure that the extra byte is untouched. - char buf[6]; - memset(buf, 0xff, sizeof(buf)); - EXPECT_EQ(0, _itoa_s(12, buf, sizeof(buf) - 1, 10)); - EXPECT_STREQ("12", buf); - EXPECT_EQ('\xFF', buf[3]); - - // Test the edge cases - exactly the buffer size and one over - memset(buf, 0xff, sizeof(buf)); - EXPECT_EQ(0, _itoa_s(1234, buf, sizeof(buf) - 1, 10)); - EXPECT_STREQ("1234", buf); - EXPECT_EQ('\xFF', buf[5]); - - memset(buf, 0xff, sizeof(buf)); - EXPECT_EQ(EINVAL, _itoa_s(12345, buf, sizeof(buf) - 1, 10)); - EXPECT_EQ('\xFF', buf[5]); // should never write to this location - - // Test the template overload (note that this will see the full buffer) - memset(buf, 0xff, sizeof(buf)); - EXPECT_EQ(0, _itoa_s(12, buf, 10)); - EXPECT_STREQ("12", buf); - EXPECT_EQ('\xFF', buf[3]); - - memset(buf, 0xff, sizeof(buf)); - EXPECT_EQ(0, _itoa_s(12345, buf, 10)); - EXPECT_STREQ("12345", buf); - - EXPECT_EQ(EINVAL, _itoa_s(123456, buf, 10)); - - // Test that radix 16 is supported. - memset(buf, 0xff, sizeof(buf)); - EXPECT_EQ(0, _itoa_s(1234, buf, sizeof(buf) - 1, 16)); - EXPECT_STREQ("4d2", buf); - EXPECT_EQ('\xFF', buf[5]); -} - -TEST(URLCanonTest, _itow_s) { - // We fill the buffer with 0xff to ensure that it's getting properly - // null-terminated. We also allocate one byte more than what we tell - // _itoa_s about, and ensure that the extra byte is untouched. - base::char16 buf[6]; - const char fill_mem = 0xff; - const base::char16 fill_char = 0xffff; - memset(buf, fill_mem, sizeof(buf)); - EXPECT_EQ(0, _itow_s(12, buf, sizeof(buf) / 2 - 1, 10)); - EXPECT_EQ(base::UTF8ToUTF16("12"), base::string16(buf)); - EXPECT_EQ(fill_char, buf[3]); - - // Test the edge cases - exactly the buffer size and one over - EXPECT_EQ(0, _itow_s(1234, buf, sizeof(buf) / 2 - 1, 10)); - EXPECT_EQ(base::UTF8ToUTF16("1234"), base::string16(buf)); - EXPECT_EQ(fill_char, buf[5]); - - memset(buf, fill_mem, sizeof(buf)); - EXPECT_EQ(EINVAL, _itow_s(12345, buf, sizeof(buf) / 2 - 1, 10)); - EXPECT_EQ(fill_char, buf[5]); // should never write to this location - - // Test the template overload (note that this will see the full buffer) - memset(buf, fill_mem, sizeof(buf)); - EXPECT_EQ(0, _itow_s(12, buf, 10)); - EXPECT_EQ(base::UTF8ToUTF16("12"), - base::string16(buf)); - EXPECT_EQ(fill_char, buf[3]); - - memset(buf, fill_mem, sizeof(buf)); - EXPECT_EQ(0, _itow_s(12345, buf, 10)); - EXPECT_EQ(base::UTF8ToUTF16("12345"), base::string16(buf)); - - EXPECT_EQ(EINVAL, _itow_s(123456, buf, 10)); -} - -#endif // !WIN32 - -// Returns true if the given two structures are the same. -static bool ParsedIsEqual(const Parsed& a, const Parsed& b) { - return a.scheme.begin == b.scheme.begin && a.scheme.len == b.scheme.len && - a.username.begin == b.username.begin && a.username.len == b.username.len && - a.password.begin == b.password.begin && a.password.len == b.password.len && - a.host.begin == b.host.begin && a.host.len == b.host.len && - a.port.begin == b.port.begin && a.port.len == b.port.len && - a.path.begin == b.path.begin && a.path.len == b.path.len && - a.query.begin == b.query.begin && a.query.len == b.query.len && - a.ref.begin == b.ref.begin && a.ref.len == b.ref.len; -} - -TEST(URLCanonTest, ResolveRelativeURL) { - struct RelativeCase { - const char* base; // Input base URL: MUST BE CANONICAL - bool is_base_hier; // Is the base URL hierarchical - bool is_base_file; // Tells us if the base is a file URL. - const char* test; // Input URL to test against. - bool succeed_relative; // Whether we expect IsRelativeURL to succeed - bool is_rel; // Whether we expect |test| to be relative or not. - bool succeed_resolve; // Whether we expect ResolveRelativeURL to succeed. - const char* resolved; // What we expect in the result when resolving. - } rel_cases[] = { - // Basic absolute input. - {"http://host/a", true, false, "http://another/", true, false, false, NULL}, - {"http://host/a", true, false, "http:////another/", true, false, false, NULL}, - // Empty relative URLs should only remove the ref part of the URL, - // leaving the rest unchanged. - {"http://foo/bar", true, false, "", true, true, true, "http://foo/bar"}, - {"http://foo/bar#ref", true, false, "", true, true, true, "http://foo/bar"}, - {"http://foo/bar#", true, false, "", true, true, true, "http://foo/bar"}, - // Spaces at the ends of the relative path should be ignored. - {"http://foo/bar", true, false, " another ", true, true, true, "http://foo/another"}, - {"http://foo/bar", true, false, " . ", true, true, true, "http://foo/"}, - {"http://foo/bar", true, false, " \t ", true, true, true, "http://foo/bar"}, - // Matching schemes without two slashes are treated as relative. - {"http://host/a", true, false, "http:path", true, true, true, "http://host/path"}, - {"http://host/a/", true, false, "http:path", true, true, true, "http://host/a/path"}, - {"http://host/a", true, false, "http:/path", true, true, true, "http://host/path"}, - {"http://host/a", true, false, "HTTP:/path", true, true, true, "http://host/path"}, - // Nonmatching schemes are absolute. - {"http://host/a", true, false, "https:host2", true, false, false, NULL}, - {"http://host/a", true, false, "htto:/host2", true, false, false, NULL}, - // Absolute path input - {"http://host/a", true, false, "/b/c/d", true, true, true, "http://host/b/c/d"}, - {"http://host/a", true, false, "\\b\\c\\d", true, true, true, "http://host/b/c/d"}, - {"http://host/a", true, false, "/b/../c", true, true, true, "http://host/c"}, - {"http://host/a?b#c", true, false, "/b/../c", true, true, true, "http://host/c"}, - {"http://host/a", true, false, "\\b/../c?x#y", true, true, true, "http://host/c?x#y"}, - {"http://host/a?b#c", true, false, "/b/../c?x#y", true, true, true, "http://host/c?x#y"}, - // Relative path input - {"http://host/a", true, false, "b", true, true, true, "http://host/b"}, - {"http://host/a", true, false, "bc/de", true, true, true, "http://host/bc/de"}, - {"http://host/a/", true, false, "bc/de?query#ref", true, true, true, "http://host/a/bc/de?query#ref"}, - {"http://host/a/", true, false, ".", true, true, true, "http://host/a/"}, - {"http://host/a/", true, false, "..", true, true, true, "http://host/"}, - {"http://host/a/", true, false, "./..", true, true, true, "http://host/"}, - {"http://host/a/", true, false, "../.", true, true, true, "http://host/"}, - {"http://host/a/", true, false, "././.", true, true, true, "http://host/a/"}, - {"http://host/a?query#ref", true, false, "../../../foo", true, true, true, "http://host/foo"}, - // Query input - {"http://host/a", true, false, "?foo=bar", true, true, true, "http://host/a?foo=bar"}, - {"http://host/a?x=y#z", true, false, "?", true, true, true, "http://host/a?"}, - {"http://host/a?x=y#z", true, false, "?foo=bar#com", true, true, true, "http://host/a?foo=bar#com"}, - // Ref input - {"http://host/a", true, false, "#ref", true, true, true, "http://host/a#ref"}, - {"http://host/a#b", true, false, "#", true, true, true, "http://host/a#"}, - {"http://host/a?foo=bar#hello", true, false, "#bye", true, true, true, "http://host/a?foo=bar#bye"}, - // Non-hierarchical base: no relative handling. Relative input should - // error, and if a scheme is present, it should be treated as absolute. - {"data:foobar", false, false, "baz.html", false, false, false, NULL}, - {"data:foobar", false, false, "data:baz", true, false, false, NULL}, - {"data:foobar", false, false, "data:/base", true, false, false, NULL}, - // Non-hierarchical base: absolute input should succeed. - {"data:foobar", false, false, "http://host/", true, false, false, NULL}, - {"data:foobar", false, false, "http:host", true, false, false, NULL}, - // Non-hierarchical base: empty URL should give error. - {"data:foobar", false, false, "", false, false, false, NULL}, - // Invalid schemes should be treated as relative. - {"http://foo/bar", true, false, "./asd:fgh", true, true, true, "http://foo/asd:fgh"}, - {"http://foo/bar", true, false, ":foo", true, true, true, "http://foo/:foo"}, - {"http://foo/bar", true, false, " hello world", true, true, true, "http://foo/hello%20world"}, - {"data:asdf", false, false, ":foo", false, false, false, NULL}, - {"data:asdf", false, false, "bad(':foo')", false, false, false, NULL}, - // We should treat semicolons like any other character in URL resolving - {"http://host/a", true, false, ";foo", true, true, true, "http://host/;foo"}, - {"http://host/a;", true, false, ";foo", true, true, true, "http://host/;foo"}, - {"http://host/a", true, false, ";/../bar", true, true, true, "http://host/bar"}, - // Relative URLs can also be written as "//foo/bar" which is relative to - // the scheme. In this case, it would take the old scheme, so for http - // the example would resolve to "http://foo/bar". - {"http://host/a", true, false, "//another", true, true, true, "http://another/"}, - {"http://host/a", true, false, "//another/path?query#ref", true, true, true, "http://another/path?query#ref"}, - {"http://host/a", true, false, "///another/path", true, true, true, "http://another/path"}, - {"http://host/a", true, false, "//Another\\path", true, true, true, "http://another/path"}, - {"http://host/a", true, false, "//", true, true, false, "http:"}, - // IE will also allow one or the other to be a backslash to get the same - // behavior. - {"http://host/a", true, false, "\\/another/path", true, true, true, "http://another/path"}, - {"http://host/a", true, false, "/\\Another\\path", true, true, true, "http://another/path"}, -#ifdef WIN32 - // Resolving against Windows file base URLs. - {"file:///C:/foo", true, true, "http://host/", true, false, false, NULL}, - {"file:///C:/foo", true, true, "bar", true, true, true, "file:///C:/bar"}, - {"file:///C:/foo", true, true, "../../../bar.html", true, true, true, "file:///C:/bar.html"}, - {"file:///C:/foo", true, true, "/../bar.html", true, true, true, "file:///C:/bar.html"}, - // But two backslashes on Windows should be UNC so should be treated - // as absolute. - {"http://host/a", true, false, "\\\\another\\path", true, false, false, NULL}, - // IE doesn't support drive specs starting with two slashes. It fails - // immediately and doesn't even try to load. We fix it up to either - // an absolute path or UNC depending on what it looks like. - {"file:///C:/something", true, true, "//c:/foo", true, true, true, "file:///C:/foo"}, - {"file:///C:/something", true, true, "//localhost/c:/foo", true, true, true, "file:///C:/foo"}, - // Windows drive specs should be allowed and treated as absolute. - {"file:///C:/foo", true, true, "c:", true, false, false, NULL}, - {"file:///C:/foo", true, true, "c:/foo", true, false, false, NULL}, - {"http://host/a", true, false, "c:\\foo", true, false, false, NULL}, - // Relative paths with drive letters should be allowed when the base is - // also a file. - {"file:///C:/foo", true, true, "/z:/bar", true, true, true, "file:///Z:/bar"}, - // Treat absolute paths as being off of the drive. - {"file:///C:/foo", true, true, "/bar", true, true, true, "file:///C:/bar"}, - {"file://localhost/C:/foo", true, true, "/bar", true, true, true, "file://localhost/C:/bar"}, - {"file:///C:/foo/com/", true, true, "/bar", true, true, true, "file:///C:/bar"}, - // On Windows, two slashes without a drive letter when the base is a file - // means that the path is UNC. - {"file:///C:/something", true, true, "//somehost/path", true, true, true, "file://somehost/path"}, - {"file:///C:/something", true, true, "/\\//somehost/path", true, true, true, "file://somehost/path"}, -#else - // On Unix we fall back to relative behavior since there's nothing else - // reasonable to do. - {"http://host/a", true, false, "\\\\Another\\path", true, true, true, "http://another/path"}, -#endif - // Even on Windows, we don't allow relative drive specs when the base - // is not file. - {"http://host/a", true, false, "/c:\\foo", true, true, true, "http://host/c:/foo"}, - {"http://host/a", true, false, "//c:\\foo", true, true, true, "http://c/foo"}, - // Ensure that ports aren't allowed for hosts relative to a file url. - // Although the result string shows a host:port portion, the call to - // resolve the relative URL returns false, indicating parse failure, - // which is what is required. - {"file:///foo.txt", true, true, "//host:80/bar.txt", true, true, false, "file://host:80/bar.txt"}, - // Filesystem URL tests; filesystem URLs are only valid and relative if - // they have no scheme, e.g. "./index.html". There's no valid equivalent - // to http:index.html. - {"filesystem:http://host/t/path", true, false, "filesystem:http://host/t/path2", true, false, false, NULL}, - {"filesystem:http://host/t/path", true, false, "filesystem:https://host/t/path2", true, false, false, NULL}, - {"filesystem:http://host/t/path", true, false, "http://host/t/path2", true, false, false, NULL}, - {"http://host/t/path", true, false, "filesystem:http://host/t/path2", true, false, false, NULL}, - {"filesystem:http://host/t/path", true, false, "./path2", true, true, true, "filesystem:http://host/t/path2"}, - {"filesystem:http://host/t/path/", true, false, "path2", true, true, true, "filesystem:http://host/t/path/path2"}, - {"filesystem:http://host/t/path", true, false, "filesystem:http:path2", true, false, false, NULL}, - // Absolute URLs are still not relative to a non-standard base URL. - {"about:blank", false, false, "http://X/A", true, false, true, ""}, - {"about:blank", false, false, "content://content.Provider/", true, false, true, ""}, - }; - - for (size_t i = 0; i < arraysize(rel_cases); i++) { - const RelativeCase& cur_case = rel_cases[i]; - - Parsed parsed; - int base_len = static_cast(strlen(cur_case.base)); - if (cur_case.is_base_file) - ParseFileURL(cur_case.base, base_len, &parsed); - else if (cur_case.is_base_hier) - ParseStandardURL(cur_case.base, base_len, &parsed); - else - ParsePathURL(cur_case.base, base_len, false, &parsed); - - // First see if it is relative. - int test_len = static_cast(strlen(cur_case.test)); - bool is_relative; - Component relative_component; - bool succeed_is_rel = IsRelativeURL( - cur_case.base, parsed, cur_case.test, test_len, cur_case.is_base_hier, - &is_relative, &relative_component); - - EXPECT_EQ(cur_case.succeed_relative, succeed_is_rel) << - "succeed is rel failure on " << cur_case.test; - EXPECT_EQ(cur_case.is_rel, is_relative) << - "is rel failure on " << cur_case.test; - // Now resolve it. - if (succeed_is_rel && is_relative && cur_case.is_rel) { - std::string resolved; - StdStringCanonOutput output(&resolved); - Parsed resolved_parsed; - - bool succeed_resolve = ResolveRelativeURL( - cur_case.base, parsed, cur_case.is_base_file, cur_case.test, - relative_component, NULL, &output, &resolved_parsed); - output.Complete(); - - EXPECT_EQ(cur_case.succeed_resolve, succeed_resolve); - EXPECT_EQ(cur_case.resolved, resolved) << " on " << cur_case.test; - - // Verify that the output parsed structure is the same as parsing a - // the URL freshly. - Parsed ref_parsed; - int resolved_len = static_cast(resolved.size()); - if (cur_case.is_base_file) { - ParseFileURL(resolved.c_str(), resolved_len, &ref_parsed); - } else if (cur_case.is_base_hier) { - ParseStandardURL(resolved.c_str(), resolved_len, &ref_parsed); - } else { - ParsePathURL(resolved.c_str(), resolved_len, false, &ref_parsed); - } - EXPECT_TRUE(ParsedIsEqual(ref_parsed, resolved_parsed)); - } - } -} - -// It used to be the case that when we did a replacement with a long buffer of -// UTF-16 characters, we would get invalid data in the URL. This is because the -// buffer that it used to hold the UTF-8 data was resized, while some pointers -// were still kept to the old buffer that was removed. -TEST(URLCanonTest, ReplacementOverflow) { - const char src[] = "file:///C:/foo/bar"; - int src_len = static_cast(strlen(src)); - Parsed parsed; - ParseFileURL(src, src_len, &parsed); - - // Override two components, the path with something short, and the query with - // something long enough to trigger the bug. - Replacements repl; - base::string16 new_query; - for (int i = 0; i < 4800; i++) - new_query.push_back('a'); - - base::string16 new_path(test_utils::TruncateWStringToUTF16(L"/foo")); - repl.SetPath(new_path.c_str(), Component(0, 4)); - repl.SetQuery(new_query.c_str(), - Component(0, static_cast(new_query.length()))); - - // Call ReplaceComponents on the string. It doesn't matter if we call it for - // standard URLs, file URLs, etc, since they will go to the same replacement - // function that was buggy. - Parsed repl_parsed; - std::string repl_str; - StdStringCanonOutput repl_output(&repl_str); - ReplaceFileURL(src, parsed, repl, NULL, &repl_output, &repl_parsed); - repl_output.Complete(); - - // Generate the expected string and check. - std::string expected("file:///foo?"); - for (size_t i = 0; i < new_query.length(); i++) - expected.push_back('a'); - EXPECT_TRUE(expected == repl_str); -} - -TEST(URLCanonTest, DefaultPortForScheme) { - struct TestCases { - const char* scheme; - const int expected_port; - } cases[]{ - {"http", 80}, - {"https", 443}, - {"ftp", 21}, - {"ws", 80}, - {"wss", 443}, - {"gopher", 70}, - {"fake-scheme", PORT_UNSPECIFIED}, - {"HTTP", PORT_UNSPECIFIED}, - {"HTTPS", PORT_UNSPECIFIED}, - {"FTP", PORT_UNSPECIFIED}, - {"WS", PORT_UNSPECIFIED}, - {"WSS", PORT_UNSPECIFIED}, - {"GOPHER", PORT_UNSPECIFIED}, - }; - - for (auto& test_case : cases) { - SCOPED_TRACE(test_case.scheme); - EXPECT_EQ(test_case.expected_port, - DefaultPortForScheme(test_case.scheme, strlen(test_case.scheme))); - } -} - -TEST(URLCanonTest, IDNToASCII) { - RawCanonOutputW<1024> output; - - // Basic ASCII test. - base::string16 str = base::UTF8ToUTF16("hello"); - EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output)); - EXPECT_EQ(base::UTF8ToUTF16("hello"), base::string16(output.data())); - output.set_length(0); - - // Mixed ASCII/non-ASCII. - str = base::UTF8ToUTF16("hellö"); - EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output)); - EXPECT_EQ(base::UTF8ToUTF16("xn--hell-8qa"), base::string16(output.data())); - output.set_length(0); - - // All non-ASCII. - str = base::UTF8ToUTF16("你好"); - EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output)); - EXPECT_EQ(base::UTF8ToUTF16("xn--6qq79v"), base::string16(output.data())); - output.set_length(0); - - // Characters that need mapping (the resulting Punycode is the encoding for - // "1⁄4"). - str = base::UTF8ToUTF16("¼"); - EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output)); - EXPECT_EQ(base::UTF8ToUTF16("xn--14-c6t"), base::string16(output.data())); - output.set_length(0); - - // String to encode already starts with "xn--", and all ASCII. Should not - // modify the string. - str = base::UTF8ToUTF16("xn--hell-8qa"); - EXPECT_TRUE(IDNToASCII(str.data(), str.length(), &output)); - EXPECT_EQ(base::UTF8ToUTF16("xn--hell-8qa"), base::string16(output.data())); - output.set_length(0); - - // String to encode already starts with "xn--", and mixed ASCII/non-ASCII. - // Should fail, due to a special case: if the label starts with "xn--", it - // should be parsed as Punycode, which must be all ASCII. - str = base::UTF8ToUTF16("xn--hellö"); - EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output)); - output.set_length(0); - - // String to encode already starts with "xn--", and mixed ASCII/non-ASCII. - // This tests that there is still an error for the character '⁄' (U+2044), - // which would be a valid ASCII character, U+0044, if the high byte were - // ignored. - str = base::UTF8ToUTF16("xn--1⁄4"); - EXPECT_FALSE(IDNToASCII(str.data(), str.length(), &output)); - output.set_length(0); -} - -} // namespace url diff --git a/url_parse_unittest.cc b/url_parse_unittest.cc deleted file mode 100644 index b80e865..0000000 --- a/url_parse_unittest.cc +++ /dev/null @@ -1,690 +0,0 @@ -// Copyright 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "url/third_party/mozilla/url_parse.h" - -#include - -#include "base/macros.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "url/third_party/mozilla/url_parse.h" - -// Interesting IE file:isms... -// -// file:/foo/bar file:///foo/bar -// The result here seems totally invalid!?!? This isn't UNC. -// -// file:/ -// file:// or any other number of slashes -// IE6 doesn't do anything at all if you click on this link. No error: -// nothing. IE6's history system seems to always color this link, so I'm -// guessing that it maps internally to the empty URL. -// -// C:\ file:///C:/ -// / file:///C:/ -// /foo file:///C:/foo -// Interestingly, IE treats "/" as an alias for "c:\", which makes sense, -// but is weird to think about on Windows. -// -// file:foo/ file:foo/ (invalid?!?!?) -// file:/foo/ file:///foo/ (invalid?!?!?) -// file://foo/ file://foo/ (UNC to server "foo") -// file:///foo/ file:///foo/ (invalid) -// file:////foo/ file://foo/ (UNC to server "foo") -// Any more than four slashes is also treated as UNC. -// -// file:C:/ file://C:/ -// file:/C:/ file://C:/ -// The number of slashes after "file:" don't matter if the thing following -// it looks like an absolute drive path. Also, slashes and backslashes are -// equally valid here. - -namespace url { -namespace { - -// Used for regular URL parse cases. -struct URLParseCase { - const char* input; - - const char* scheme; - const char* username; - const char* password; - const char* host; - int port; - const char* path; - const char* query; - const char* ref; -}; - -// Simpler version of URLParseCase for testing path URLs. -struct PathURLParseCase { - const char* input; - - const char* scheme; - const char* path; -}; - -// Simpler version of URLParseCase for testing mailto URLs. -struct MailtoURLParseCase { - const char* input; - - const char* scheme; - const char* path; - const char* query; -}; - -// More complicated version of URLParseCase for testing filesystem URLs. -struct FileSystemURLParseCase { - const char* input; - - const char* inner_scheme; - const char* inner_username; - const char* inner_password; - const char* inner_host; - int inner_port; - const char* inner_path; - const char* path; - const char* query; - const char* ref; -}; - -bool ComponentMatches(const char* input, - const char* reference, - const Component& component) { - // If the component is nonexistent (length == -1), it should begin at 0. - EXPECT_TRUE(component.len >= 0 || component.len == -1); - - // Begin should be valid. - EXPECT_LE(0, component.begin); - - // A NULL reference means the component should be nonexistent. - if (!reference) - return component.len == -1; - if (component.len < 0) - return false; // Reference is not NULL but we don't have anything - - if (strlen(reference) != static_cast(component.len)) - return false; // Lengths don't match - - // Now check the actual characters. - return strncmp(reference, &input[component.begin], component.len) == 0; -} - -void ExpectInvalidComponent(const Component& component) { - EXPECT_EQ(0, component.begin); - EXPECT_EQ(-1, component.len); -} - -// Parsed ---------------------------------------------------------------------- - -TEST(URLParser, Length) { - const char* length_cases[] = { - // One with everything in it. - "http://user:pass@host:99/foo?bar#baz", - // One with nothing in it. - "", - // Working backwards, let's start taking off stuff from the full one. - "http://user:pass@host:99/foo?bar#", - "http://user:pass@host:99/foo?bar", - "http://user:pass@host:99/foo?", - "http://user:pass@host:99/foo", - "http://user:pass@host:99/", - "http://user:pass@host:99", - "http://user:pass@host:", - "http://user:pass@host", - "http://host", - "http://user@", - "http:", - }; - for (size_t i = 0; i < arraysize(length_cases); i++) { - int true_length = static_cast(strlen(length_cases[i])); - - Parsed parsed; - ParseStandardURL(length_cases[i], true_length, &parsed); - - EXPECT_EQ(true_length, parsed.Length()); - } -} - -TEST(URLParser, CountCharactersBefore) { - struct CountCase { - const char* url; - Parsed::ComponentType component; - bool include_delimiter; - int expected_count; - } count_cases[] = { - // Test each possibility in the case where all components are present. - // 0 1 2 - // 0123456789012345678901 - {"http://u:p@h:8/p?q#r", Parsed::SCHEME, true, 0}, - {"http://u:p@h:8/p?q#r", Parsed::SCHEME, false, 0}, - {"http://u:p@h:8/p?q#r", Parsed::USERNAME, true, 7}, - {"http://u:p@h:8/p?q#r", Parsed::USERNAME, false, 7}, - {"http://u:p@h:8/p?q#r", Parsed::PASSWORD, true, 9}, - {"http://u:p@h:8/p?q#r", Parsed::PASSWORD, false, 9}, - {"http://u:p@h:8/p?q#r", Parsed::HOST, true, 11}, - {"http://u:p@h:8/p?q#r", Parsed::HOST, false, 11}, - {"http://u:p@h:8/p?q#r", Parsed::PORT, true, 12}, - {"http://u:p@h:8/p?q#r", Parsed::PORT, false, 13}, - {"http://u:p@h:8/p?q#r", Parsed::PATH, false, 14}, - {"http://u:p@h:8/p?q#r", Parsed::PATH, true, 14}, - {"http://u:p@h:8/p?q#r", Parsed::QUERY, true, 16}, - {"http://u:p@h:8/p?q#r", Parsed::QUERY, false, 17}, - {"http://u:p@h:8/p?q#r", Parsed::REF, true, 18}, - {"http://u:p@h:8/p?q#r", Parsed::REF, false, 19}, - // Now test when the requested component is missing. - {"http://u:p@h:8/p?", Parsed::REF, true, 17}, - {"http://u:p@h:8/p?q", Parsed::REF, true, 18}, - {"http://u:p@h:8/p#r", Parsed::QUERY, true, 16}, - {"http://u:p@h:8#r", Parsed::PATH, true, 14}, - {"http://u:p@h/", Parsed::PORT, true, 12}, - {"http://u:p@/", Parsed::HOST, true, 11}, - // This case is a little weird. It will report that the password would - // start where the host begins. This is arguably correct, although you - // could also argue that it should start at the '@' sign. Doing it - // starting with the '@' sign is actually harder, so we don't bother. - {"http://u@h/", Parsed::PASSWORD, true, 9}, - {"http://h/", Parsed::USERNAME, true, 7}, - {"http:", Parsed::USERNAME, true, 5}, - {"", Parsed::SCHEME, true, 0}, - // Make sure a random component still works when there's nothing there. - {"", Parsed::REF, true, 0}, - // File URLs are special with no host, so we test those. - {"file:///c:/foo", Parsed::USERNAME, true, 7}, - {"file:///c:/foo", Parsed::PASSWORD, true, 7}, - {"file:///c:/foo", Parsed::HOST, true, 7}, - {"file:///c:/foo", Parsed::PATH, true, 7}, - }; - for (size_t i = 0; i < arraysize(count_cases); i++) { - int length = static_cast(strlen(count_cases[i].url)); - - // Simple test to distinguish file and standard URLs. - Parsed parsed; - if (length > 0 && count_cases[i].url[0] == 'f') - ParseFileURL(count_cases[i].url, length, &parsed); - else - ParseStandardURL(count_cases[i].url, length, &parsed); - - int chars_before = parsed.CountCharactersBefore( - count_cases[i].component, count_cases[i].include_delimiter); - EXPECT_EQ(count_cases[i].expected_count, chars_before); - } -} - -// Standard -------------------------------------------------------------------- - -// Input Scheme Usrname Passwd Host Port Path Query Ref -// ------------------------------------ ------- ------- ---------- ------------ --- ---------- ------------ ----- -static URLParseCase cases[] = { - // Regular URL with all the parts -{"http://user:pass@foo:21/bar;par?b#c", "http", "user", "pass", "foo", 21, "/bar;par","b", "c"}, - - // Known schemes should lean towards authority identification -{"http:foo.com", "http", NULL, NULL, "foo.com", -1, NULL, NULL, NULL}, - - // Spaces! -{"\t :foo.com \n", "", NULL, NULL, "foo.com", -1, NULL, NULL, NULL}, -{" foo.com ", NULL, NULL, NULL, "foo.com", -1, NULL, NULL, NULL}, -{"a:\t foo.com", "a", NULL, NULL, "\t foo.com", -1, NULL, NULL, NULL}, -{"http://f:21/ b ? d # e ", "http", NULL, NULL, "f", 21, "/ b ", " d ", " e"}, - - // Invalid port numbers should be identified and turned into -2, empty port - // numbers should be -1. Spaces aren't allowed in port numbers -{"http://f:/c", "http", NULL, NULL, "f", -1, "/c", NULL, NULL}, -{"http://f:0/c", "http", NULL, NULL, "f", 0, "/c", NULL, NULL}, -{"http://f:00000000000000/c", "http", NULL, NULL, "f", 0, "/c", NULL, NULL}, -{"http://f:00000000000000000000080/c", "http", NULL, NULL, "f", 80, "/c", NULL, NULL}, -{"http://f:b/c", "http", NULL, NULL, "f", -2, "/c", NULL, NULL}, -{"http://f: /c", "http", NULL, NULL, "f", -2, "/c", NULL, NULL}, -{"http://f:\n/c", "http", NULL, NULL, "f", -2, "/c", NULL, NULL}, -{"http://f:fifty-two/c", "http", NULL, NULL, "f", -2, "/c", NULL, NULL}, -{"http://f:999999/c", "http", NULL, NULL, "f", -2, "/c", NULL, NULL}, -{"http://f: 21 / b ? d # e ", "http", NULL, NULL, "f", -2, "/ b ", " d ", " e"}, - - // Creative URLs missing key elements -{"", NULL, NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{" \t", NULL, NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{":foo.com/", "", NULL, NULL, "foo.com", -1, "/", NULL, NULL}, -{":foo.com\\", "", NULL, NULL, "foo.com", -1, "\\", NULL, NULL}, -{":", "", NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{":a", "", NULL, NULL, "a", -1, NULL, NULL, NULL}, -{":/", "", NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{":\\", "", NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{":#", "", NULL, NULL, NULL, -1, NULL, NULL, ""}, -{"#", NULL, NULL, NULL, NULL, -1, NULL, NULL, ""}, -{"#/", NULL, NULL, NULL, NULL, -1, NULL, NULL, "/"}, -{"#\\", NULL, NULL, NULL, NULL, -1, NULL, NULL, "\\"}, -{"#;?", NULL, NULL, NULL, NULL, -1, NULL, NULL, ";?"}, -{"?", NULL, NULL, NULL, NULL, -1, NULL, "", NULL}, -{"/", NULL, NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{":23", "", NULL, NULL, "23", -1, NULL, NULL, NULL}, -{"/:23", "/", NULL, NULL, "23", -1, NULL, NULL, NULL}, -{"//", NULL, NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{"::", "", NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{"::23", "", NULL, NULL, NULL, 23, NULL, NULL, NULL}, -{"foo://", "foo", NULL, NULL, NULL, -1, NULL, NULL, NULL}, - - // Username/passwords and things that look like them -{"http://a:b@c:29/d", "http", "a", "b", "c", 29, "/d", NULL, NULL}, -{"http::@c:29", "http", "", "", "c", 29, NULL, NULL, NULL}, - // ... "]" in the password field isn't allowed, but we tolerate it here... -{"http://&a:foo(b]c@d:2/", "http", "&a", "foo(b]c", "d", 2, "/", NULL, NULL}, -{"http://::@c@d:2", "http", "", ":@c", "d", 2, NULL, NULL, NULL}, -{"http://foo.com:b@d/", "http", "foo.com", "b", "d", -1, "/", NULL, NULL}, - -{"http://foo.com/\\@", "http", NULL, NULL, "foo.com", -1, "/\\@", NULL, NULL}, -{"http:\\\\foo.com\\", "http", NULL, NULL, "foo.com", -1, "\\", NULL, NULL}, -{"http:\\\\a\\b:c\\d@foo.com\\", "http", NULL, NULL, "a", -1, "\\b:c\\d@foo.com\\", NULL, NULL}, - - // Tolerate different numbers of slashes. -{"foo:/", "foo", NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{"foo:/bar.com/", "foo", NULL, NULL, "bar.com", -1, "/", NULL, NULL}, -{"foo://///////", "foo", NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{"foo://///////bar.com/", "foo", NULL, NULL, "bar.com", -1, "/", NULL, NULL}, -{"foo:////://///", "foo", NULL, NULL, NULL, -1, "/////", NULL, NULL}, - - // Raw file paths on Windows aren't handled by the parser. -{"c:/foo", "c", NULL, NULL, "foo", -1, NULL, NULL, NULL}, -{"//foo/bar", NULL, NULL, NULL, "foo", -1, "/bar", NULL, NULL}, - - // Use the first question mark for the query and the ref. -{"http://foo/path;a??e#f#g", "http", NULL, NULL, "foo", -1, "/path;a", "?e", "f#g"}, -{"http://foo/abcd?efgh?ijkl", "http", NULL, NULL, "foo", -1, "/abcd", "efgh?ijkl", NULL}, -{"http://foo/abcd#foo?bar", "http", NULL, NULL, "foo", -1, "/abcd", NULL, "foo?bar"}, - - // IPv6, check also interesting uses of colons. -{"[61:24:74]:98", "[61", NULL, NULL, "24:74]", 98, NULL, NULL, NULL}, -{"http://[61:27]:98", "http", NULL, NULL, "[61:27]", 98, NULL, NULL, NULL}, -{"http:[61:27]/:foo", "http", NULL, NULL, "[61:27]", -1, "/:foo", NULL, NULL}, -{"http://[1::2]:3:4", "http", NULL, NULL, "[1::2]:3", 4, NULL, NULL, NULL}, - - // Partially-complete IPv6 literals, and related cases. -{"http://2001::1", "http", NULL, NULL, "2001:", 1, NULL, NULL, NULL}, -{"http://[2001::1", "http", NULL, NULL, "[2001::1", -1, NULL, NULL, NULL}, -{"http://2001::1]", "http", NULL, NULL, "2001::1]", -1, NULL, NULL, NULL}, -{"http://2001::1]:80", "http", NULL, NULL, "2001::1]", 80, NULL, NULL, NULL}, -{"http://[2001::1]", "http", NULL, NULL, "[2001::1]", -1, NULL, NULL, NULL}, -{"http://[2001::1]:80", "http", NULL, NULL, "[2001::1]", 80, NULL, NULL, NULL}, -{"http://[[::]]", "http", NULL, NULL, "[[::]]", -1, NULL, NULL, NULL}, - -}; - -TEST(URLParser, Standard) { - // Declared outside for loop to try to catch cases in init() where we forget - // to reset something that is reset by the constructor. - Parsed parsed; - for (size_t i = 0; i < arraysize(cases); i++) { - const char* url = cases[i].input; - ParseStandardURL(url, static_cast(strlen(url)), &parsed); - int port = ParsePort(url, parsed.port); - - EXPECT_TRUE(ComponentMatches(url, cases[i].scheme, parsed.scheme)); - EXPECT_TRUE(ComponentMatches(url, cases[i].username, parsed.username)); - EXPECT_TRUE(ComponentMatches(url, cases[i].password, parsed.password)); - EXPECT_TRUE(ComponentMatches(url, cases[i].host, parsed.host)); - EXPECT_EQ(cases[i].port, port); - EXPECT_TRUE(ComponentMatches(url, cases[i].path, parsed.path)); - EXPECT_TRUE(ComponentMatches(url, cases[i].query, parsed.query)); - EXPECT_TRUE(ComponentMatches(url, cases[i].ref, parsed.ref)); - } -} - -// PathURL -------------------------------------------------------------------- - -// Various incarnations of path URLs. -static PathURLParseCase path_cases[] = { -{"", NULL, NULL}, -{":", "", NULL}, -{":/", "", "/"}, -{"/", NULL, "/"}, -{" This is \\interesting// \t", NULL, "This is \\interesting// \t"}, -{"about:", "about", NULL}, -{"about:blank", "about", "blank"}, -{" about: blank ", "about", " blank "}, -{"javascript :alert(\"He:/l\\l#o?foo\"); ", "javascript ", "alert(\"He:/l\\l#o?foo\"); "}, -}; - -TEST(URLParser, PathURL) { - // Declared outside for loop to try to catch cases in init() where we forget - // to reset something that is reset by the constructor. - Parsed parsed; - for (size_t i = 0; i < arraysize(path_cases); i++) { - const char* url = path_cases[i].input; - ParsePathURL(url, static_cast(strlen(url)), false, &parsed); - - EXPECT_TRUE(ComponentMatches(url, path_cases[i].scheme, parsed.scheme)) - << i; - EXPECT_TRUE(ComponentMatches(url, path_cases[i].path, parsed.GetContent())) - << i; - - // The remaining components are never used for path URLs. - ExpectInvalidComponent(parsed.username); - ExpectInvalidComponent(parsed.password); - ExpectInvalidComponent(parsed.host); - ExpectInvalidComponent(parsed.port); - } -} - -// Various incarnations of file URLs. -static URLParseCase file_cases[] = { -#ifdef WIN32 -{"file:server", "file", NULL, NULL, "server", -1, NULL, NULL, NULL}, -{" file: server \t", "file", NULL, NULL, " server",-1, NULL, NULL, NULL}, -{"FiLe:c|", "FiLe", NULL, NULL, NULL, -1, "c|", NULL, NULL}, -{"FILE:/\\\\/server/file", "FILE", NULL, NULL, "server", -1, "/file", NULL, NULL}, -{"file://server/", "file", NULL, NULL, "server", -1, "/", NULL, NULL}, -{"file://localhost/c:/", "file", NULL, NULL, NULL, -1, "/c:/", NULL, NULL}, -{"file://127.0.0.1/c|\\", "file", NULL, NULL, NULL, -1, "/c|\\", NULL, NULL}, -{"file:/", "file", NULL, NULL, NULL, -1, NULL, NULL, NULL}, -{"file:", "file", NULL, NULL, NULL, -1, NULL, NULL, NULL}, - // If there is a Windows drive letter, treat any number of slashes as the - // path part. -{"file:c:\\fo\\b", "file", NULL, NULL, NULL, -1, "c:\\fo\\b", NULL, NULL}, -{"file:/c:\\foo/bar", "file", NULL, NULL, NULL, -1, "/c:\\foo/bar",NULL, NULL}, -{"file://c:/f\\b", "file", NULL, NULL, NULL, -1, "/c:/f\\b", NULL, NULL}, -{"file:///C:/foo", "file", NULL, NULL, NULL, -1, "/C:/foo", NULL, NULL}, -{"file://///\\/\\/c:\\f\\b", "file", NULL, NULL, NULL, -1, "/c:\\f\\b", NULL, NULL}, - // If there is not a drive letter, we should treat is as UNC EXCEPT for - // three slashes, which we treat as a Unix style path. -{"file:server/file", "file", NULL, NULL, "server", -1, "/file", NULL, NULL}, -{"file:/server/file", "file", NULL, NULL, "server", -1, "/file", NULL, NULL}, -{"file://server/file", "file", NULL, NULL, "server", -1, "/file", NULL, NULL}, -{"file:///server/file", "file", NULL, NULL, NULL, -1, "/server/file",NULL, NULL}, -{"file://\\server/file", "file", NULL, NULL, NULL, -1, "\\server/file",NULL, NULL}, -{"file:////server/file", "file", NULL, NULL, "server", -1, "/file", NULL, NULL}, - // Queries and refs are valid for file URLs as well. -{"file:///C:/foo.html?#", "file", NULL, NULL, NULL, -1, "/C:/foo.html", "", ""}, -{"file:///C:/foo.html?query=yes#ref", "file", NULL, NULL, NULL, -1, "/C:/foo.html", "query=yes", "ref"}, -#else // WIN32 - // No slashes. - {"file:", "file", NULL, NULL, NULL, -1, NULL, NULL, NULL}, - {"file:path", "file", NULL, NULL, NULL, -1, "path", NULL, NULL}, - {"file:path/", "file", NULL, NULL, NULL, -1, "path/", NULL, NULL}, - {"file:path/f.txt", "file", NULL, NULL, NULL, -1, "path/f.txt", NULL, NULL}, - // One slash. - {"file:/", "file", NULL, NULL, NULL, -1, "/", NULL, NULL}, - {"file:/path", "file", NULL, NULL, NULL, -1, "/path", NULL, NULL}, - {"file:/path/", "file", NULL, NULL, NULL, -1, "/path/", NULL, NULL}, - {"file:/path/f.txt", "file", NULL, NULL, NULL, -1, "/path/f.txt", NULL, NULL}, - // Two slashes. - {"file://", "file", NULL, NULL, NULL, -1, NULL, NULL, NULL}, - {"file://server", "file", NULL, NULL, "server", -1, NULL, NULL, NULL}, - {"file://server/", "file", NULL, NULL, "server", -1, "/", NULL, NULL}, - {"file://server/f.txt", "file", NULL, NULL, "server", -1, "/f.txt", NULL, NULL}, - // Three slashes. - {"file:///", "file", NULL, NULL, NULL, -1, "/", NULL, NULL}, - {"file:///path", "file", NULL, NULL, NULL, -1, "/path", NULL, NULL}, - {"file:///path/", "file", NULL, NULL, NULL, -1, "/path/", NULL, NULL}, - {"file:///path/f.txt", "file", NULL, NULL, NULL, -1, "/path/f.txt", NULL, NULL}, - // More than three slashes. - {"file:////", "file", NULL, NULL, NULL, -1, "/", NULL, NULL}, - {"file:////path", "file", NULL, NULL, NULL, -1, "/path", NULL, NULL}, - {"file:////path/", "file", NULL, NULL, NULL, -1, "/path/", NULL, NULL}, - {"file:////path/f.txt", "file", NULL, NULL, NULL, -1, "/path/f.txt", NULL, NULL}, - // Schemeless URLs - {"path/f.txt", NULL, NULL, NULL, NULL, -1, "path/f.txt", NULL, NULL}, - {"path:80/f.txt", "path", NULL, NULL, NULL, -1, "80/f.txt", NULL, NULL}, - {"path/f.txt:80", "path/f.txt",NULL, NULL, NULL, -1, "80", NULL, NULL}, // Wrong. - {"/path/f.txt", NULL, NULL, NULL, NULL, -1, "/path/f.txt", NULL, NULL}, - {"/path:80/f.txt", NULL, NULL, NULL, NULL, -1, "/path:80/f.txt",NULL, NULL}, - {"/path/f.txt:80", NULL, NULL, NULL, NULL, -1, "/path/f.txt:80",NULL, NULL}, - {"//server/f.txt", NULL, NULL, NULL, "server", -1, "/f.txt", NULL, NULL}, - {"//server:80/f.txt", NULL, NULL, NULL, "server:80",-1, "/f.txt", NULL, NULL}, - {"//server/f.txt:80", NULL, NULL, NULL, "server", -1, "/f.txt:80", NULL, NULL}, - {"///path/f.txt", NULL, NULL, NULL, NULL, -1, "/path/f.txt", NULL, NULL}, - {"///path:80/f.txt", NULL, NULL, NULL, NULL, -1, "/path:80/f.txt",NULL, NULL}, - {"///path/f.txt:80", NULL, NULL, NULL, NULL, -1, "/path/f.txt:80",NULL, NULL}, - {"////path/f.txt", NULL, NULL, NULL, NULL, -1, "/path/f.txt", NULL, NULL}, - {"////path:80/f.txt", NULL, NULL, NULL, NULL, -1, "/path:80/f.txt",NULL, NULL}, - {"////path/f.txt:80", NULL, NULL, NULL, NULL, -1, "/path/f.txt:80",NULL, NULL}, - // Queries and refs are valid for file URLs as well. - {"file:///foo.html?#", "file", NULL, NULL, NULL, -1, "/foo.html", "", ""}, - {"file:///foo.html?q=y#ref", "file", NULL, NULL, NULL, -1, "/foo.html", "q=y", "ref"}, -#endif // WIN32 -}; - -TEST(URLParser, ParseFileURL) { - // Declared outside for loop to try to catch cases in init() where we forget - // to reset something that is reset by the construtor. - Parsed parsed; - for (size_t i = 0; i < arraysize(file_cases); i++) { - const char* url = file_cases[i].input; - ParseFileURL(url, static_cast(strlen(url)), &parsed); - int port = ParsePort(url, parsed.port); - - EXPECT_TRUE(ComponentMatches(url, file_cases[i].scheme, parsed.scheme)) - << " for case #" << i << " [" << url << "] " - << parsed.scheme.begin << ", " << parsed.scheme.len; - - EXPECT_TRUE(ComponentMatches(url, file_cases[i].username, parsed.username)) - << " for case #" << i << " [" << url << "] " - << parsed.username.begin << ", " << parsed.username.len; - - EXPECT_TRUE(ComponentMatches(url, file_cases[i].password, parsed.password)) - << " for case #" << i << " [" << url << "] " - << parsed.password.begin << ", " << parsed.password.len; - - EXPECT_TRUE(ComponentMatches(url, file_cases[i].host, parsed.host)) - << " for case #" << i << " [" << url << "] " - << parsed.host.begin << ", " << parsed.host.len; - - EXPECT_EQ(file_cases[i].port, port) - << " for case #" << i << " [ " << url << "] " << port; - - EXPECT_TRUE(ComponentMatches(url, file_cases[i].path, parsed.path)) - << " for case #" << i << " [" << url << "] " - << parsed.path.begin << ", " << parsed.path.len; - - EXPECT_TRUE(ComponentMatches(url, file_cases[i].query, parsed.query)) - << " for case #" << i << " [" << url << "] " - << parsed.query.begin << ", " << parsed.query.len; - - EXPECT_TRUE(ComponentMatches(url, file_cases[i].ref, parsed.ref)) - << " for case #" << i << " [ "<< url << "] " - << parsed.query.begin << ", " << parsed.scheme.len; - } -} - - -TEST(URLParser, ExtractFileName) { - struct FileCase { - const char* input; - const char* expected; - } file_cases[] = { - {"http://www.google.com", NULL}, - {"http://www.google.com/", ""}, - {"http://www.google.com/search", "search"}, - {"http://www.google.com/search/", ""}, - {"http://www.google.com/foo/bar.html?baz=22", "bar.html"}, - {"http://www.google.com/foo/bar.html#ref", "bar.html"}, - {"http://www.google.com/search/;param", ""}, - {"http://www.google.com/foo/bar.html;param#ref", "bar.html"}, - {"http://www.google.com/foo/bar.html;foo;param#ref", "bar.html"}, - {"http://www.google.com/foo/bar.html?query#ref", "bar.html"}, - {"http://www.google.com/foo;/bar.html", "bar.html"}, - {"http://www.google.com/foo;/", ""}, - {"http://www.google.com/foo;", "foo"}, - {"http://www.google.com/;", ""}, - {"http://www.google.com/foo;bar;html", "foo"}, - }; - - for (size_t i = 0; i < arraysize(file_cases); i++) { - const char* url = file_cases[i].input; - int len = static_cast(strlen(url)); - - Parsed parsed; - ParseStandardURL(url, len, &parsed); - - Component file_name; - ExtractFileName(url, parsed.path, &file_name); - - EXPECT_TRUE(ComponentMatches(url, file_cases[i].expected, file_name)); - } -} - -// Returns true if the parameter with index |parameter| in the given URL's -// query string. The expected key can be NULL to indicate no such key index -// should exist. The parameter number is 1-based. -static bool NthParameterIs(const char* url, - int parameter, - const char* expected_key, - const char* expected_value) { - Parsed parsed; - ParseStandardURL(url, static_cast(strlen(url)), &parsed); - - Component query = parsed.query; - - for (int i = 1; i <= parameter; i++) { - Component key, value; - if (!ExtractQueryKeyValue(url, &query, &key, &value)) { - if (parameter >= i && !expected_key) - return true; // Expected nonexistent key, got one. - return false; // Not enough keys. - } - - if (i == parameter) { - if (!expected_key) - return false; - - if (strncmp(&url[key.begin], expected_key, key.len) != 0) - return false; - if (strncmp(&url[value.begin], expected_value, value.len) != 0) - return false; - return true; - } - } - return expected_key == NULL; // We didn't find that many parameters. -} - -TEST(URLParser, ExtractQueryKeyValue) { - EXPECT_TRUE(NthParameterIs("http://www.google.com", 1, NULL, NULL)); - - // Basic case. - char a[] = "http://www.google.com?arg1=1&arg2=2&bar"; - EXPECT_TRUE(NthParameterIs(a, 1, "arg1", "1")); - EXPECT_TRUE(NthParameterIs(a, 2, "arg2", "2")); - EXPECT_TRUE(NthParameterIs(a, 3, "bar", "")); - EXPECT_TRUE(NthParameterIs(a, 4, NULL, NULL)); - - // Empty param at the end. - char b[] = "http://www.google.com?foo=bar&"; - EXPECT_TRUE(NthParameterIs(b, 1, "foo", "bar")); - EXPECT_TRUE(NthParameterIs(b, 2, NULL, NULL)); - - // Empty param at the beginning. - char c[] = "http://www.google.com?&foo=bar"; - EXPECT_TRUE(NthParameterIs(c, 1, "", "")); - EXPECT_TRUE(NthParameterIs(c, 2, "foo", "bar")); - EXPECT_TRUE(NthParameterIs(c, 3, NULL, NULL)); - - // Empty key with value. - char d[] = "http://www.google.com?=foo"; - EXPECT_TRUE(NthParameterIs(d, 1, "", "foo")); - EXPECT_TRUE(NthParameterIs(d, 2, NULL, NULL)); - - // Empty value with key. - char e[] = "http://www.google.com?foo="; - EXPECT_TRUE(NthParameterIs(e, 1, "foo", "")); - EXPECT_TRUE(NthParameterIs(e, 2, NULL, NULL)); - - // Empty key and values. - char f[] = "http://www.google.com?&&==&="; - EXPECT_TRUE(NthParameterIs(f, 1, "", "")); - EXPECT_TRUE(NthParameterIs(f, 2, "", "")); - EXPECT_TRUE(NthParameterIs(f, 3, "", "=")); - EXPECT_TRUE(NthParameterIs(f, 4, "", "")); - EXPECT_TRUE(NthParameterIs(f, 5, NULL, NULL)); -} - -// MailtoURL -------------------------------------------------------------------- - -static MailtoURLParseCase mailto_cases[] = { -//|input |scheme |path |query -{"mailto:foo@gmail.com", "mailto", "foo@gmail.com", NULL}, -{" mailto: to \t", "mailto", " to", NULL}, -{"mailto:addr1%2C%20addr2 ", "mailto", "addr1%2C%20addr2", NULL}, -{"Mailto:addr1, addr2 ", "Mailto", "addr1, addr2", NULL}, -{"mailto:addr1:addr2 ", "mailto", "addr1:addr2", NULL}, -{"mailto:?to=addr1,addr2", "mailto", NULL, "to=addr1,addr2"}, -{"mailto:?to=addr1%2C%20addr2", "mailto", NULL, "to=addr1%2C%20addr2"}, -{"mailto:addr1?to=addr2", "mailto", "addr1", "to=addr2"}, -{"mailto:?body=#foobar#", "mailto", NULL, "body=#foobar#",}, -{"mailto:#?body=#foobar#", "mailto", "#", "body=#foobar#"}, -}; - -TEST(URLParser, MailtoUrl) { - // Declared outside for loop to try to catch cases in init() where we forget - // to reset something that is reset by the constructor. - Parsed parsed; - for (size_t i = 0; i < arraysize(mailto_cases); ++i) { - const char* url = mailto_cases[i].input; - ParseMailtoURL(url, static_cast(strlen(url)), &parsed); - int port = ParsePort(url, parsed.port); - - EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].scheme, parsed.scheme)); - EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].path, parsed.path)); - EXPECT_TRUE(ComponentMatches(url, mailto_cases[i].query, parsed.query)); - EXPECT_EQ(PORT_UNSPECIFIED, port); - - // The remaining components are never used for mailto URLs. - ExpectInvalidComponent(parsed.username); - ExpectInvalidComponent(parsed.password); - ExpectInvalidComponent(parsed.port); - ExpectInvalidComponent(parsed.ref); - } -} - -// Various incarnations of filesystem URLs. -static FileSystemURLParseCase filesystem_cases[] = { - // Regular URL with all the parts -{"filesystem:http://user:pass@foo:21/temporary/bar;par?b#c", "http", "user", "pass", "foo", 21, "/temporary", "/bar;par", "b", "c"}, -{"filesystem:https://foo/persistent/bar;par/", "https", NULL, NULL, "foo", -1, "/persistent", "/bar;par/", NULL, NULL}, -{"filesystem:file:///persistent/bar;par/", "file", NULL, NULL, NULL, -1, "/persistent", "/bar;par/", NULL, NULL}, -{"filesystem:file:///persistent/bar;par/?query#ref", "file", NULL, NULL, NULL, -1, "/persistent", "/bar;par/", "query", "ref"}, -{"filesystem:file:///persistent", "file", NULL, NULL, NULL, -1, "/persistent", "", NULL, NULL}, -}; - -TEST(URLParser, FileSystemURL) { - // Declared outside for loop to try to catch cases in init() where we forget - // to reset something that is reset by the constructor. - Parsed parsed; - for (size_t i = 0; i < arraysize(filesystem_cases); i++) { - const FileSystemURLParseCase* parsecase = &filesystem_cases[i]; - const char* url = parsecase->input; - ParseFileSystemURL(url, static_cast(strlen(url)), &parsed); - - EXPECT_TRUE(ComponentMatches(url, "filesystem", parsed.scheme)); - EXPECT_EQ(!parsecase->inner_scheme, !parsed.inner_parsed()); - // Only check the inner_parsed if there is one. - if (parsed.inner_parsed()) { - EXPECT_TRUE(ComponentMatches(url, parsecase->inner_scheme, - parsed.inner_parsed()->scheme)); - EXPECT_TRUE(ComponentMatches(url, parsecase->inner_username, - parsed.inner_parsed()->username)); - EXPECT_TRUE(ComponentMatches(url, parsecase->inner_password, - parsed.inner_parsed()->password)); - EXPECT_TRUE(ComponentMatches(url, parsecase->inner_host, - parsed.inner_parsed()->host)); - int port = ParsePort(url, parsed.inner_parsed()->port); - EXPECT_EQ(parsecase->inner_port, port); - - // The remaining components are never used for filesystem URLs. - ExpectInvalidComponent(parsed.inner_parsed()->query); - ExpectInvalidComponent(parsed.inner_parsed()->ref); - } - - EXPECT_TRUE(ComponentMatches(url, parsecase->path, parsed.path)); - EXPECT_TRUE(ComponentMatches(url, parsecase->query, parsed.query)); - EXPECT_TRUE(ComponentMatches(url, parsecase->ref, parsed.ref)); - - // The remaining components are never used for filesystem URLs. - ExpectInvalidComponent(parsed.username); - ExpectInvalidComponent(parsed.password); - ExpectInvalidComponent(parsed.host); - ExpectInvalidComponent(parsed.port); - } -} - -} // namespace -} // namespace url diff --git a/url_util_unittest.cc b/url_util_unittest.cc deleted file mode 100644 index 526d63f..0000000 --- a/url_util_unittest.cc +++ /dev/null @@ -1,502 +0,0 @@ -// Copyright 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include - -#include "base/macros.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "url/third_party/mozilla/url_parse.h" -#include "url/url_canon.h" -#include "url/url_canon_stdstring.h" -#include "url/url_test_utils.h" -#include "url/url_util.h" - -namespace url { - -class URLUtilTest : public testing::Test { - public: - URLUtilTest() = default; - ~URLUtilTest() override { - // Reset any added schemes. - Shutdown(); - } - - private: - DISALLOW_COPY_AND_ASSIGN(URLUtilTest); -}; - -TEST_F(URLUtilTest, FindAndCompareScheme) { - Component found_scheme; - - // Simple case where the scheme is found and matches. - const char kStr1[] = "http://www.com/"; - EXPECT_TRUE(FindAndCompareScheme( - kStr1, static_cast(strlen(kStr1)), "http", NULL)); - EXPECT_TRUE(FindAndCompareScheme( - kStr1, static_cast(strlen(kStr1)), "http", &found_scheme)); - EXPECT_TRUE(found_scheme == Component(0, 4)); - - // A case where the scheme is found and doesn't match. - EXPECT_FALSE(FindAndCompareScheme( - kStr1, static_cast(strlen(kStr1)), "https", &found_scheme)); - EXPECT_TRUE(found_scheme == Component(0, 4)); - - // A case where there is no scheme. - const char kStr2[] = "httpfoobar"; - EXPECT_FALSE(FindAndCompareScheme( - kStr2, static_cast(strlen(kStr2)), "http", &found_scheme)); - EXPECT_TRUE(found_scheme == Component()); - - // When there is an empty scheme, it should match the empty scheme. - const char kStr3[] = ":foo.com/"; - EXPECT_TRUE(FindAndCompareScheme( - kStr3, static_cast(strlen(kStr3)), "", &found_scheme)); - EXPECT_TRUE(found_scheme == Component(0, 0)); - - // But when there is no scheme, it should fail. - EXPECT_FALSE(FindAndCompareScheme("", 0, "", &found_scheme)); - EXPECT_TRUE(found_scheme == Component()); - - // When there is a whitespace char in scheme, it should canonicalize the URL - // before comparison. - const char whtspc_str[] = " \r\n\tjav\ra\nscri\tpt:alert(1)"; - EXPECT_TRUE(FindAndCompareScheme(whtspc_str, - static_cast(strlen(whtspc_str)), - "javascript", &found_scheme)); - EXPECT_TRUE(found_scheme == Component(1, 10)); - - // Control characters should be stripped out on the ends, and kept in the - // middle. - const char ctrl_str[] = "\02jav\02scr\03ipt:alert(1)"; - EXPECT_FALSE(FindAndCompareScheme(ctrl_str, - static_cast(strlen(ctrl_str)), - "javascript", &found_scheme)); - EXPECT_TRUE(found_scheme == Component(1, 11)); -} - -TEST_F(URLUtilTest, IsStandard) { - const char kHTTPScheme[] = "http"; - EXPECT_TRUE(IsStandard(kHTTPScheme, Component(0, strlen(kHTTPScheme)))); - - const char kFooScheme[] = "foo"; - EXPECT_FALSE(IsStandard(kFooScheme, Component(0, strlen(kFooScheme)))); -} - -TEST_F(URLUtilTest, IsReferrerScheme) { - const char kHTTPScheme[] = "http"; - EXPECT_TRUE(IsReferrerScheme(kHTTPScheme, Component(0, strlen(kHTTPScheme)))); - - const char kFooScheme[] = "foo"; - EXPECT_FALSE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme)))); -} - -TEST_F(URLUtilTest, AddReferrerScheme) { - const char kFooScheme[] = "foo"; - EXPECT_FALSE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme)))); - AddReferrerScheme(kFooScheme, url::SCHEME_WITH_HOST); - EXPECT_TRUE(IsReferrerScheme(kFooScheme, Component(0, strlen(kFooScheme)))); -} - -TEST_F(URLUtilTest, GetStandardSchemeType) { - url::SchemeType scheme_type; - - const char kHTTPScheme[] = "http"; - scheme_type = url::SCHEME_WITHOUT_AUTHORITY; - EXPECT_TRUE(GetStandardSchemeType(kHTTPScheme, - Component(0, strlen(kHTTPScheme)), - &scheme_type)); - EXPECT_EQ(url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION, scheme_type); - - const char kFilesystemScheme[] = "filesystem"; - scheme_type = url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION; - EXPECT_TRUE(GetStandardSchemeType(kFilesystemScheme, - Component(0, strlen(kFilesystemScheme)), - &scheme_type)); - EXPECT_EQ(url::SCHEME_WITHOUT_AUTHORITY, scheme_type); - - const char kFooScheme[] = "foo"; - scheme_type = url::SCHEME_WITH_HOST_PORT_AND_USER_INFORMATION; - EXPECT_FALSE(GetStandardSchemeType(kFooScheme, - Component(0, strlen(kFooScheme)), - &scheme_type)); -} - -TEST_F(URLUtilTest, ReplaceComponents) { - Parsed parsed; - RawCanonOutputT output; - Parsed new_parsed; - - // Check that the following calls do not cause crash - Replacements replacements; - replacements.SetRef("test", Component(0, 4)); - ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, &new_parsed); - ReplaceComponents("", 0, parsed, replacements, NULL, &output, &new_parsed); - replacements.ClearRef(); - replacements.SetHost("test", Component(0, 4)); - ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, &new_parsed); - ReplaceComponents("", 0, parsed, replacements, NULL, &output, &new_parsed); - - replacements.ClearHost(); - ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, &new_parsed); - ReplaceComponents("", 0, parsed, replacements, NULL, &output, &new_parsed); - ReplaceComponents(NULL, 0, parsed, replacements, NULL, &output, &new_parsed); - ReplaceComponents("", 0, parsed, replacements, NULL, &output, &new_parsed); -} - -static std::string CheckReplaceScheme(const char* base_url, - const char* scheme) { - // Make sure the input is canonicalized. - RawCanonOutput<32> original; - Parsed original_parsed; - Canonicalize(base_url, strlen(base_url), true, NULL, &original, - &original_parsed); - - Replacements replacements; - replacements.SetScheme(scheme, Component(0, strlen(scheme))); - - std::string output_string; - StdStringCanonOutput output(&output_string); - Parsed output_parsed; - ReplaceComponents(original.data(), original.length(), original_parsed, - replacements, NULL, &output, &output_parsed); - - output.Complete(); - return output_string; -} - -TEST_F(URLUtilTest, ReplaceScheme) { - EXPECT_EQ("https://google.com/", - CheckReplaceScheme("http://google.com/", "https")); - EXPECT_EQ("file://google.com/", - CheckReplaceScheme("http://google.com/", "file")); - EXPECT_EQ("http://home/Build", - CheckReplaceScheme("file:///Home/Build", "http")); - EXPECT_EQ("javascript:foo", - CheckReplaceScheme("about:foo", "javascript")); - EXPECT_EQ("://google.com/", - CheckReplaceScheme("http://google.com/", "")); - EXPECT_EQ("http://google.com/", - CheckReplaceScheme("about:google.com", "http")); - EXPECT_EQ("http:", CheckReplaceScheme("", "http")); - -#ifdef WIN32 - // Magic Windows drive letter behavior when converting to a file URL. - EXPECT_EQ("file:///E:/foo/", - CheckReplaceScheme("http://localhost/e:foo/", "file")); -#endif - - // This will probably change to "about://google.com/" when we fix - // http://crbug.com/160 which should also be an acceptable result. - EXPECT_EQ("about://google.com/", - CheckReplaceScheme("http://google.com/", "about")); - - EXPECT_EQ("http://example.com/%20hello%20#%20world", - CheckReplaceScheme("myscheme:example.com/ hello # world ", "http")); -} - -TEST_F(URLUtilTest, DecodeURLEscapeSequences) { - struct DecodeCase { - const char* input; - const char* output; - DecodeURLResult result; - } decode_cases[] = { - {"hello, world", "hello, world", DecodeURLResult::kAsciiOnly}, - {"%01%02%03%04%05%06%07%08%09%0a%0B%0C%0D%0e%0f/", - "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0B\x0C\x0D\x0e\x0f/", - DecodeURLResult::kAsciiOnly}, - {"%10%11%12%13%14%15%16%17%18%19%1a%1B%1C%1D%1e%1f/", - "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1B\x1C\x1D\x1e\x1f/", - DecodeURLResult::kAsciiOnly}, - {"%20%21%22%23%24%25%26%27%28%29%2a%2B%2C%2D%2e%2f/", - " !\"#$%&'()*+,-.//", DecodeURLResult::kAsciiOnly}, - {"%30%31%32%33%34%35%36%37%38%39%3a%3B%3C%3D%3e%3f/", "0123456789:;<=>?/", - DecodeURLResult::kAsciiOnly}, - {"%40%41%42%43%44%45%46%47%48%49%4a%4B%4C%4D%4e%4f/", "@ABCDEFGHIJKLMNO/", - DecodeURLResult::kAsciiOnly}, - {"%50%51%52%53%54%55%56%57%58%59%5a%5B%5C%5D%5e%5f/", - "PQRSTUVWXYZ[\\]^_/", DecodeURLResult::kAsciiOnly}, - {"%60%61%62%63%64%65%66%67%68%69%6a%6B%6C%6D%6e%6f/", "`abcdefghijklmno/", - DecodeURLResult::kAsciiOnly}, - {"%70%71%72%73%74%75%76%77%78%79%7a%7B%7C%7D%7e%7f/", - "pqrstuvwxyz{|}~\x7f/", DecodeURLResult::kAsciiOnly}, - // Test un-UTF-8-ization. - {"%e4%bd%a0%e5%a5%bd", "\xe4\xbd\xa0\xe5\xa5\xbd", - DecodeURLResult::kUTF8}, - }; - - for (size_t i = 0; i < arraysize(decode_cases); i++) { - const char* input = decode_cases[i].input; - RawCanonOutputT output; - EXPECT_EQ(decode_cases[i].result, - DecodeURLEscapeSequences(input, strlen(input), &output)); - EXPECT_EQ(decode_cases[i].output, - base::UTF16ToUTF8(base::string16(output.data(), - output.length()))); - } - - // Our decode should decode %00 - const char zero_input[] = "%00"; - RawCanonOutputT zero_output; - DecodeURLEscapeSequences(zero_input, strlen(zero_input), &zero_output); - EXPECT_NE("%00", base::UTF16ToUTF8( - base::string16(zero_output.data(), zero_output.length()))); - - // Test the error behavior for invalid UTF-8. - { - const char invalid_input[] = "%e4%a0%e5%a5%bd"; - const base::char16 invalid_expected[4] = {0x00e4, 0x00a0, 0x597d, 0}; - RawCanonOutputT invalid_output; - EXPECT_EQ(DecodeURLResult::kMixed, - DecodeURLEscapeSequences(invalid_input, strlen(invalid_input), - &invalid_output)); - EXPECT_EQ(base::string16(invalid_expected), - base::string16(invalid_output.data(), invalid_output.length())); - } - { - const char invalid_input[] = "%e4%a0%e5%bd"; - const base::char16 invalid_expected[5] = {0x00e4, 0x00a0, 0x00e5, 0x00bd, - 0}; - RawCanonOutputT invalid_output; - EXPECT_EQ(DecodeURLResult::kIsomorphic, - DecodeURLEscapeSequences(invalid_input, strlen(invalid_input), - &invalid_output)); - EXPECT_EQ(base::string16(invalid_expected), - base::string16(invalid_output.data(), invalid_output.length())); - } -} - -TEST_F(URLUtilTest, TestEncodeURIComponent) { - struct EncodeCase { - const char* input; - const char* output; - } encode_cases[] = { - {"hello, world", "hello%2C%20world"}, - {"\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F", - "%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F"}, - {"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", - "%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F"}, - {" !\"#$%&'()*+,-./", - "%20!%22%23%24%25%26%27()*%2B%2C-.%2F"}, - {"0123456789:;<=>?", - "0123456789%3A%3B%3C%3D%3E%3F"}, - {"@ABCDEFGHIJKLMNO", - "%40ABCDEFGHIJKLMNO"}, - {"PQRSTUVWXYZ[\\]^_", - "PQRSTUVWXYZ%5B%5C%5D%5E_"}, - {"`abcdefghijklmno", - "%60abcdefghijklmno"}, - {"pqrstuvwxyz{|}~\x7f", - "pqrstuvwxyz%7B%7C%7D~%7F"}, - }; - - for (size_t i = 0; i < arraysize(encode_cases); i++) { - const char* input = encode_cases[i].input; - RawCanonOutputT buffer; - EncodeURIComponent(input, strlen(input), &buffer); - std::string output(buffer.data(), buffer.length()); - EXPECT_EQ(encode_cases[i].output, output); - } -} - -TEST_F(URLUtilTest, TestResolveRelativeWithNonStandardBase) { - // This tests non-standard (in the sense that IsStandard() == false) - // hierarchical schemes. - struct ResolveRelativeCase { - const char* base; - const char* rel; - bool is_valid; - const char* out; - } resolve_non_standard_cases[] = { - // Resolving a relative path against a non-hierarchical URL should fail. - {"scheme:opaque_data", "/path", false, ""}, - // Resolving a relative path against a non-standard authority-based base - // URL doesn't alter the authority section. - {"scheme://Authority/", "../path", true, "scheme://Authority/path"}, - // A non-standard hierarchical base is resolved with path URL - // canonicalization rules. - {"data:/Blah:Blah/", "file.html", true, "data:/Blah:Blah/file.html"}, - {"data:/Path/../part/part2", "file.html", true, - "data:/Path/../part/file.html"}, - {"data://text/html,payload", "//user:pass@host:33////payload22", true, - "data://user:pass@host:33////payload22"}, - // Path URL canonicalization rules also apply to non-standard authority- - // based URLs. - {"custom://Authority/", "file.html", true, - "custom://Authority/file.html"}, - {"custom://Authority/", "other://Auth/", true, "other://Auth/"}, - {"custom://Authority/", "../../file.html", true, - "custom://Authority/file.html"}, - {"custom://Authority/path/", "file.html", true, - "custom://Authority/path/file.html"}, - {"custom://Authority:NoCanon/path/", "file.html", true, - "custom://Authority:NoCanon/path/file.html"}, - // It's still possible to get an invalid path URL. - {"custom://Invalid:!#Auth/", "file.html", false, ""}, - // A path with an authority section gets canonicalized under standard URL - // rules, even though the base was non-standard. - {"content://content.Provider/", "//other.Provider", true, - "content://other.provider/"}, - - // Resolving an absolute URL doesn't cause canonicalization of the - // result. - {"about:blank", "custom://Authority", true, "custom://Authority"}, - // Fragment URLs can be resolved against a non-standard base. - {"scheme://Authority/path", "#fragment", true, - "scheme://Authority/path#fragment"}, - {"scheme://Authority/", "#fragment", true, - "scheme://Authority/#fragment"}, - // Resolving should fail if the base URL is authority-based but is - // missing a path component (the '/' at the end). - {"scheme://Authority", "path", false, ""}, - // Test resolving a fragment (only) against any kind of base-URL. - {"about:blank", "#id42", true, "about:blank#id42"}, - {"about:blank", " #id42", true, "about:blank#id42"}, - {"about:blank#oldfrag", "#newfrag", true, "about:blank#newfrag"}, - // A surprising side effect of allowing fragments to resolve against - // any URL scheme is we might break javascript: URLs by doing so... - {"javascript:alert('foo#bar')", "#badfrag", true, - "javascript:alert('foo#badfrag"}, - // In this case, the backslashes will not be canonicalized because it's a - // non-standard URL, but they will be treated as a path separators, - // giving the base URL here a path of "\". - // - // The result here is somewhat arbitrary. One could argue it should be - // either "aaa://a\" or "aaa://a/" since the path is being replaced with - // the "current directory". But in the context of resolving on data URLs, - // adding the requested dot doesn't seem wrong either. - {"aaa://a\\", "aaa:.", true, "aaa://a\\."}}; - - for (size_t i = 0; i < arraysize(resolve_non_standard_cases); i++) { - const ResolveRelativeCase& test_data = resolve_non_standard_cases[i]; - Parsed base_parsed; - ParsePathURL(test_data.base, strlen(test_data.base), false, &base_parsed); - - std::string resolved; - StdStringCanonOutput output(&resolved); - Parsed resolved_parsed; - bool valid = ResolveRelative(test_data.base, strlen(test_data.base), - base_parsed, test_data.rel, - strlen(test_data.rel), NULL, &output, - &resolved_parsed); - output.Complete(); - - EXPECT_EQ(test_data.is_valid, valid) << i; - if (test_data.is_valid && valid) - EXPECT_EQ(test_data.out, resolved) << i; - } -} - -TEST_F(URLUtilTest, TestNoRefComponent) { - // The hash-mark must be ignored when mailto: scheme is parsed, - // even if the URL has a base and relative part. - const char* base = "mailto://to/"; - const char* rel = "any#body"; - - Parsed base_parsed; - ParsePathURL(base, strlen(base), false, &base_parsed); - - std::string resolved; - StdStringCanonOutput output(&resolved); - Parsed resolved_parsed; - - bool valid = ResolveRelative(base, strlen(base), - base_parsed, rel, - strlen(rel), NULL, &output, - &resolved_parsed); - EXPECT_TRUE(valid); - EXPECT_FALSE(resolved_parsed.ref.is_valid()); -} - -TEST_F(URLUtilTest, PotentiallyDanglingMarkup) { - struct ResolveRelativeCase { - const char* base; - const char* rel; - bool potentially_dangling_markup; - const char* out; - } cases[] = { - {"https://example.com/", "/path<", false, "https://example.com/path%3C"}, - {"https://example.com/", "\n/path<", true, "https://example.com/path%3C"}, - {"https://example.com/", "\r/path<", true, "https://example.com/path%3C"}, - {"https://example.com/", "\t/path<", true, "https://example.com/path%3C"}, - {"https://example.com/", "/pa\nth<", true, "https://example.com/path%3C"}, - {"https://example.com/", "/pa\rth<", true, "https://example.com/path%3C"}, - {"https://example.com/", "/pa\tth<", true, "https://example.com/path%3C"}, - {"https://example.com/", "/path\n<", true, "https://example.com/path%3C"}, - {"https://example.com/", "/path\r<", true, "https://example.com/path%3C"}, - {"https://example.com/", "/path\r<", true, "https://example.com/path%3C"}, - {"https://example.com/", "\n/