Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve sanitize_filename #321

Merged
merged 1 commit into from
Jan 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 67 additions & 116 deletions include/crow/utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
#include <string>
#include <unordered_map>

#include <boost/algorithm/string.hpp>

#include "crow/settings.h"

namespace crow
Expand Down Expand Up @@ -627,125 +625,78 @@ namespace crow

inline static void sanitize_filename(std::string& data, char replacement = '_')
{
unsigned char i = 0, length_limit;

length_limit = data.length() < 255 ? data.length() : 255;
data = data.substr(0, length_limit);
if (data.length() > 255)
data.resize(255);

for (; i < length_limit; i++)
static const auto toUpper = [](char c) {
return ((c >= 'a') && (c <= 'z')) ? (c - ('a' - 'A')) : c;
};
// Check for special device names. The Windows behavior is really odd here, it will consider both AUX and AUX.txt
// a special device. Thus we search for the string (case-insensitive), and then check if the string ends or if
// is has a dangerous follow up character (.:\/)
auto sanitizeSpecialFile = [](std::string& source, unsigned ofs, const char* pattern, bool includeNumber, char replacement) {
unsigned i = ofs, len = source.length();
The-EDev marked this conversation as resolved.
Show resolved Hide resolved
const char* p = pattern;
while (*p)
{
if (i >= len) return;
if (toUpper(source[i]) != *p) return;
++i;
++p;
}
if (includeNumber)
{
if ((i >= len) || (source[i] < '1') || (source[i] > '9')) return;
++i;
}
if ((i >= len) || (source[i] == '.') || (source[i] == ':') || (source[i] == '/') || (source[i] == '\\'))
{
source.erase(ofs + 1, (i - ofs) - 1);
source[ofs] = replacement;
}
};
bool checkForSpecialEntries = true;
for (unsigned i = 0; i < data.length(); ++i)
{
switch ((unsigned char)data[i])
// Recognize directory traversals and the special devices CON/PRN/AUX/NULL/COM[1-]/LPT[1-9]
if (checkForSpecialEntries)
{
checkForSpecialEntries = false;
switch (toUpper(data[i]))
{
case 'A':
sanitizeSpecialFile(data, i, "AUX", false, replacement);
break;
case 'C':
sanitizeSpecialFile(data, i, "CON", false, replacement);
sanitizeSpecialFile(data, i, "COM", true, replacement);
break;
case 'L':
sanitizeSpecialFile(data, i, "LPT", true, replacement);
break;
case 'N':
sanitizeSpecialFile(data, i, "NUL", false, replacement);
break;
case 'P':
sanitizeSpecialFile(data, i, "PRN", false, replacement);
break;
case '.':
sanitizeSpecialFile(data, i, "..", false, replacement);
break;
}
}

// Sanitize individual characters
unsigned char c = data[i];
if ((c < ' ') || ((c >= 0x80) && (c <= 0x9F)) || (c == '?') || (c == '<') || (c == '>') || (c == ':') || (c == '*') || (c == '|') || (c == '\"'))
{
data[i] = replacement;
}
else if ((c == '/') || (c == '\\'))
{
// WARNING While I can't see how using '\' or '/' would cause a problem, it still warrants an investigation
//case '/':
case '?':
case '<':
case '>':
//case '\\':
case ':':
case '*':
case '|':
case '\"':

case 0x00:
case 0x01:
case 0x02:
case 0x03:
case 0x04:
case 0x05:
case 0x06:
case 0x07:
case 0x08:
case 0x09:
case 0x0a:
case 0x0b:
case 0x0c:
case 0x0d:
case 0x0e:
case 0x0f:
case 0x10:
case 0x11:
case 0x12:
case 0x13:
case 0x14:
case 0x15:
case 0x16:
case 0x17:
case 0x18:
case 0x19:
case 0x1a:
case 0x1b:
case 0x1c:
case 0x1d:
case 0x1e:
case 0x1f:

case 0x80:
case 0x81:
case 0x82:
case 0x83:
case 0x84:
case 0x85:
case 0x86:
case 0x87:
case 0x88:
case 0x89:
case 0x8a:
case 0x8b:
case 0x8c:
case 0x8d:
case 0x8e:
case 0x8f:
case 0x90:
case 0x91:
case 0x92:
case 0x93:
case 0x94:
case 0x95:
case 0x96:
case 0x97:
case 0x98:
case 0x99:
case 0x9a:
case 0x9b:
case 0x9c:
case 0x9d:
case 0x9e:
case 0x9f:

data[i] = replacement;
break;

default:
break;
checkForSpecialEntries = true;
}
}
std::string str_replacement(1, replacement);

boost::ireplace_all(data, "..", str_replacement);

boost::ireplace_all(data, "CON", str_replacement);
boost::ireplace_all(data, "PRN", str_replacement);
boost::ireplace_all(data, "AUX", str_replacement);
boost::ireplace_all(data, "NUL", str_replacement);
boost::ireplace_all(data, "COM1", str_replacement);
boost::ireplace_all(data, "COM2", str_replacement);
boost::ireplace_all(data, "COM3", str_replacement);
boost::ireplace_all(data, "COM4", str_replacement);
boost::ireplace_all(data, "COM5", str_replacement);
boost::ireplace_all(data, "COM6", str_replacement);
boost::ireplace_all(data, "COM7", str_replacement);
boost::ireplace_all(data, "COM8", str_replacement);
boost::ireplace_all(data, "COM9", str_replacement);
boost::ireplace_all(data, "LPT1", str_replacement);
boost::ireplace_all(data, "LPT2", str_replacement);
boost::ireplace_all(data, "LPT3", str_replacement);
boost::ireplace_all(data, "LPT4", str_replacement);
boost::ireplace_all(data, "LPT5", str_replacement);
boost::ireplace_all(data, "LPT6", str_replacement);
boost::ireplace_all(data, "LPT7", str_replacement);
boost::ireplace_all(data, "LPT8", str_replacement);
boost::ireplace_all(data, "LPT9", str_replacement);
}

} // namespace utility
Expand Down
28 changes: 28 additions & 0 deletions tests/unittest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2447,6 +2447,34 @@ TEST_CASE("base64")
CHECK(crow::utility::base64decode(sample_bin2_enc_np, 6) == std::string(reinterpret_cast<char const*>(sample_bin2)).substr(0, 4));
} // base64

TEST_CASE("sanitize_filename")
{
auto sanitize_filename = [](string s) {
crow::utility::sanitize_filename(s);
return s;
};
CHECK(sanitize_filename("abc/def") == "abc/def");
CHECK(sanitize_filename("abc/../def") == "abc/_/def");
CHECK(sanitize_filename("abc/..\\..\\..//.../def") == "abc/_\\_\\_//_./def");
CHECK(sanitize_filename("abc/..../def") == "abc/_../def");
CHECK(sanitize_filename("abc/x../def") == "abc/x../def");
CHECK(sanitize_filename("../etc/passwd") == "_/etc/passwd");
CHECK(sanitize_filename("abc/AUX") == "abc/_");
CHECK(sanitize_filename("abc/AUX/foo") == "abc/_/foo");
CHECK(sanitize_filename("abc/AUX:") == "abc/__");
CHECK(sanitize_filename("abc/AUXxy") == "abc/AUXxy");
CHECK(sanitize_filename("abc/AUX.xy") == "abc/_.xy");
CHECK(sanitize_filename("abc/NUL") == "abc/_");
CHECK(sanitize_filename("abc/NU") == "abc/NU");
CHECK(sanitize_filename("abc/NuL") == "abc/_");
CHECK(sanitize_filename("abc/LPT1\\") == "abc/_\\");
CHECK(sanitize_filename("abc/COM1") == "abc/_");
CHECK(sanitize_filename("ab?<>:*|\"cd") == "ab_______cd");
CHECK(sanitize_filename("abc/COM9") == "abc/_");
CHECK(sanitize_filename("abc/COM") == "abc/COM");
CHECK(sanitize_filename("abc/CON") == "abc/_");
}

TEST_CASE("get_port")
{
SimpleApp app;
Expand Down