Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disallow dot-dot segments in the path argument of url_from_file_path #36

Merged
merged 2 commits into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 78 additions & 25 deletions include/upa/url.h
Original file line number Diff line number Diff line change
Expand Up @@ -1035,13 +1035,15 @@ inline bool pathname_has_windows_drive(string_view pathname) noexcept {
is_windows_drive(pathname[1], pathname[2]);
}

// Check string is absolute Windows drive path (for example: "C:\\path" or "C:/path")
/// Check string is absolute Windows drive path (for example: "C:\\path" or "C:/path")
/// @return pointer to the path after first (back)slash, or `nullptr` if path is not
/// absolute Windows drive path
template <typename CharT>
constexpr bool is_windows_drive_absolute_path(const CharT* pointer, const CharT* last) noexcept {
return
last - pointer > 2 &&
constexpr const CharT* is_windows_drive_absolute_path(const CharT* pointer, const CharT* last) noexcept {
return (last - pointer > 2 &&
detail::is_windows_drive(pointer[0], pointer[1]) &&
detail::is_windows_slash(pointer[2]);
detail::is_windows_slash(pointer[2]))
? pointer + 3 : nullptr;
}

} // namespace detail
Expand Down Expand Up @@ -2946,24 +2948,29 @@ inline void url_setter::insert_part(url::PartType new_pt, const char* str, std::
}
#endif

// Check UNC path
//
// Input - path string with the first two backslashes skipped
//
/// @brief Check UNC path
///
/// Input - path string with the first two backslashes skipped
///
/// @param[in] first start of path string
/// @param[in] last end of path string
/// @return pointer to the end of the UNC share name, or `nullptr`
/// if input is not valid UNC
template <typename CharT>
inline bool is_unc_path(const CharT* first, const CharT* last)
inline const CharT* is_unc_path(const CharT* first, const CharT* last)
{
// https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-dfsc/149a3039-98ce-491a-9268-2f5ddef08192
std::size_t path_components_count = 0;
const CharT* end_of_share_name = nullptr;
const auto* start = first;
while (start != last) {
const auto* pcend = std::find_if(start, last, detail::is_windows_slash<CharT>);
// path components MUST be at least one character in length
if (start == pcend)
return false;
return nullptr;
// path components MUST NOT contain a backslash (\) or a null
if (std::find(start, pcend, '\0') != pcend)
return false;
return nullptr;

++path_components_count;

Expand All @@ -2975,12 +2982,12 @@ inline bool is_unc_path(const CharT* first, const CharT* last)
// Do not allow "?" and "." hostnames, because "\\?\" means Win32 file
// namespace and "\\.\" means Win32 device namespace
if (start[0] == '?' || start[0] == '.')
return false;
return nullptr;
break;
case 2:
// Do not allow Windows drive letter, because it is not a valid hostname
if (detail::is_windows_drive(start[0], start[1]))
return false;
return nullptr;
break;
}
break;
Expand All @@ -2991,21 +2998,47 @@ inline bool is_unc_path(const CharT* first, const CharT* last)
switch (pcend - start) {
case 1:
if (start[0] == '.')
return false;
return nullptr;
break;
case 2:
if (start[0] == '.' && start[1] == '.')
return false;
return nullptr;
break;
}
// A valid UNC path MUST contain two or more path components
end_of_share_name = pcend;
break;
default:;
}
if (pcend == last) break;
start = pcend + 1; // skip '\'
}
// A valid UNC path MUST contain two or more path components
return path_components_count >= 2;
return end_of_share_name;
}

/// @brief Check path contains ".." segment
///
/// @param[in] first start of path string
/// @param[in] last end of path string
/// @param[in] is_slash function to check char is slash (or backslash)
/// @return true if path contains ".." segment
template <typename CharT, typename IsSlash>
inline bool has_dot_dot_segment(const CharT* first, const CharT* last, IsSlash is_slash) {
if (last - first >= 2) {
const auto* ptr = first;
const auto* end = last - 1;
while ((ptr = std::char_traits<CharT>::find(ptr, end - ptr, '.')) != nullptr) {
if (ptr[1] == '.' &&
(ptr == first || is_slash(*(ptr - 1))) &&
(last - ptr == 2 || is_slash(ptr[2])))
return true;
// skip '.' and following char
ptr += 2;
if (ptr >= end)
break;
}
}
return false;
}

} // namespace detail
Expand Down Expand Up @@ -3044,15 +3077,32 @@ enum class file_path_format {

/// @brief Make URL from OS file path
///
/// The file path must be absolute and must not contain any dot-dot (..)
/// segments.
///
/// There is a difference in how paths with dot-dot segments are normalized in the OS and in the
/// WHATWG URL standard. For example, in POSIX the path `/a//../b` is normalized to `/b`, while
/// the URL parser normalizes this path to `/a/b`. This library does not implement OS specific path
/// normalization, which is the main reason why it does not accept paths with dot-dot segments.
/// Therefore, if there are such segments in the path, it should be normalized by OS tools before
/// being submitted to this function. Normalization can be done using the POSIX `realpath`
/// function, the Windows `GetFullPathName` function, or, if you are using C++17, the
/// `std::filesystem::canonical` function.
///
/// Throws url_error exception on error.
///
/// @param[in] str absolute file path string
/// @param[in] format file path format, one of upa::file_path_format::detect,
/// upa::file_path_format::posix, upa::file_path_format::windows,
/// upa::file_path_format::native
/// @return file URL
/// @see [Pathname (POSIX)](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_271),
/// [realpath](https://pubs.opengroup.org/onlinepubs/9699919799/functions/realpath.html),
/// [GetFullPathName](https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew),
/// [std::filesystem::canonical](https://en.cppreference.com/w/cpp/filesystem/canonical)
template <class StrT, enable_if_str_arg_t<StrT> = 0>
inline url url_from_file_path(StrT&& str, file_path_format format = file_path_format::detect) {
using CharT = str_arg_char_t<StrT>;
const auto inp = make_str_arg(std::forward<StrT>(str));
const auto* first = inp.begin();
const auto* last = inp.end();
Expand All @@ -3069,11 +3119,14 @@ inline url url_from_file_path(StrT&& str, file_path_format format = file_path_fo
}

const auto* pointer = first;
const auto* start_of_check = first;
const code_point_set* no_encode_set = nullptr;

std::string str_url("file://");

if (format == file_path_format::posix) {
if (detail::has_dot_dot_segment(start_of_check, last, [](CharT c) { return c == '/'; }))
throw url_error(validation_errc::file_unsupported_path, "Unsupported file path");
// Absolute POSIX path
no_encode_set = &posix_path_no_encode_set;
} else {
Expand Down Expand Up @@ -3107,18 +3160,18 @@ inline url url_from_file_path(StrT&& str, file_path_format format = file_path_fo
is_unc = true;
}
}
if (is_unc
start_of_check = is_unc
? detail::is_unc_path(pointer, last)
: detail::is_windows_drive_absolute_path(pointer, last)) {
no_encode_set = &raw_path_no_encode_set;
if (!is_unc) str_url.push_back('/'); // start path
} else {
: detail::is_windows_drive_absolute_path(pointer, last);
if (start_of_check == nullptr ||
detail::has_dot_dot_segment(start_of_check, last, detail::is_windows_slash<CharT>))
throw url_error(validation_errc::file_unsupported_path, "Unsupported file path");
}
no_encode_set = &raw_path_no_encode_set;
if (!is_unc) str_url.push_back('/'); // start path
}

// Check for null characters
if (util::contains_null(pointer, last))
if (util::contains_null(start_of_check, last))
throw url_error(validation_errc::null_character, "Path contains null character");

// make URL
Expand Down
30 changes: 30 additions & 0 deletions test/test-url.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,25 @@ TEST_CASE("Invalid UTF-32 in hostname") {

// URL utilities

TEST_CASE("detail::has_dot_dot_segment") {
const auto has_dot_dot_segment = [](upa::string_view path) {
return upa::detail::has_dot_dot_segment(path.data(), path.data() + path.length(),
[](char c) { return c == '/'; });
};

CHECK(has_dot_dot_segment(".."));
CHECK(has_dot_dot_segment("../"));
CHECK(has_dot_dot_segment("/.."));
CHECK(has_dot_dot_segment("/../"));
CHECK(has_dot_dot_segment("/./.."));
CHECK_FALSE(has_dot_dot_segment("."));
CHECK_FALSE(has_dot_dot_segment("/./"));
CHECK_FALSE(has_dot_dot_segment("./."));
CHECK_FALSE(has_dot_dot_segment("..."));
CHECK_FALSE(has_dot_dot_segment("/.../"));
CHECK_FALSE(has_dot_dot_segment("/a../..z/"));
}

TEST_CASE("url_from_file_path") {
SUBCASE("POSIX path") {
CHECK(upa::url_from_file_path("/").href() == "file:///");
Expand All @@ -600,13 +619,17 @@ TEST_CASE("url_from_file_path") {
CHECK(upa::url_from_file_path("/c:/last").href() == "file:///c%3A/last");
CHECK(upa::url_from_file_path("/c|/last").href() == "file:///c%7C/last");
CHECK(upa::url_from_file_path("/\\", upa::file_path_format::posix).href() == "file:///%5C");
CHECK(upa::url_from_file_path("/..\\", upa::file_path_format::posix).href() == "file:///..%5C");
// empty path
CHECK_THROWS_AS(upa::url_from_file_path(""), upa::url_error);
// non absolute path
CHECK_THROWS_AS(upa::url_from_file_path("path", upa::file_path_format::posix), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("C:\\path", upa::file_path_format::posix), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("C:/path", upa::file_path_format::posix), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\h\\p", upa::file_path_format::posix), upa::url_error);
// ".." segments
CHECK_THROWS_AS(upa::url_from_file_path("/..", upa::file_path_format::posix), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("/../", upa::file_path_format::posix), upa::url_error);
// null character
CHECK_THROWS_AS(upa::url_from_file_path(std::string{ "/p\0", 3 }, upa::file_path_format::posix), upa::url_error);
}
Expand Down Expand Up @@ -667,6 +690,13 @@ TEST_CASE("url_from_file_path") {
// unsupported pathes
CHECK_THROWS_AS(upa::url_from_file_path("\\\\?\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt"), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\\\.\\Volume{b75e2c83-0000-0000-0000-602f00000000}\\Test\\Foo.txt"), upa::url_error);
// ".." segments
CHECK_THROWS_AS(upa::url_from_file_path("C:\\..", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("C:\\..\\", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("C:/..", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("C:/../", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\h\\sn\\..", upa::file_path_format::windows), upa::url_error);
CHECK_THROWS_AS(upa::url_from_file_path("\\h\\sn\\../", upa::file_path_format::windows), upa::url_error);
// null character
CHECK_THROWS_AS(upa::url_from_file_path(std::string{ "C:\\p\0", 5 }, upa::file_path_format::windows), upa::url_error);
}
Expand Down
Loading