CrowCpp · The-EDev · Jan 24, 2022 · Jan 22, 2022
diff --git a/include/crow/utility.h b/include/crow/utility.h
@@ -9,8 +9,6 @@
 #include <string>
 #include <unordered_map>
 
-#include <boost/algorithm/string.hpp>
-
 #include "crow/settings.h"
 
 namespace crow
@@ -627,125 +625,78 @@ namespace crow
 
         inline static void sanitize_filename(std::string& data, char replacement = '_')
         {
-            unsigned char i = 0, length_limit;
-
-            length_limit = data.length() < 255 ? data.length() : 255;
-            data = data.substr(0, length_limit);
+            if (data.length() > 255)
+                data.resize(255);
 
-            for (; i < length_limit; i++)
+            static const auto toUpper = [](char c) {
+                return ((c >= 'a') && (c <= 'z')) ? (c - ('a' - 'A')) : c;
+            };
+            // Check for special device names. The Windows behavior is really odd here, it will consider both AUX and AUX.txt
+            // a special device. Thus we search for the string (case-insensitive), and then check if the string ends or if
+            // is has a dangerous follow up character (.:\/)
+            auto sanitizeSpecialFile = [](std::string& source, unsigned ofs, const char* pattern, bool includeNumber, char replacement) {
+                unsigned i = ofs, len = source.length();
+                const char* p = pattern;
+                while (*p)
+                {
+                    if (i >= len) return;
+                    if (toUpper(source[i]) != *p) return;
+                    ++i;
+                    ++p;
+                }
+                if (includeNumber)
+                {
+                    if ((i >= len) || (source[i] < '1') || (source[i] > '9')) return;
+                    ++i;
+                }
+                if ((i >= len) || (source[i] == '.') || (source[i] == ':') || (source[i] == '/') || (source[i] == '\\'))
+                {
+                    source.erase(ofs + 1, (i - ofs) - 1);
+                    source[ofs] = replacement;
+                }
+            };
+            bool checkForSpecialEntries = true;
+            for (unsigned i = 0; i < data.length(); ++i)
             {
-                switch ((unsigned char)data[i])
+                // Recognize directory traversals and the special devices CON/PRN/AUX/NULL/COM[1-]/LPT[1-9]
+                if (checkForSpecialEntries)
+                {
+                    checkForSpecialEntries = false;
+                    switch (toUpper(data[i]))
+                    {
+                        case 'A':
+                            sanitizeSpecialFile(data, i, "AUX", false, replacement);
+                            break;
+                        case 'C':
+                            sanitizeSpecialFile(data, i, "CON", false, replacement);
+                            sanitizeSpecialFile(data, i, "COM", true, replacement);
+                            break;
+                        case 'L':
+                            sanitizeSpecialFile(data, i, "LPT", true, replacement);
+                            break;
+                        case 'N':
+                            sanitizeSpecialFile(data, i, "NUL", false, replacement);
+                            break;
+                        case 'P':
+                            sanitizeSpecialFile(data, i, "PRN", false, replacement);
+                            break;
+                        case '.':
+                            sanitizeSpecialFile(data, i, "..", false, replacement);
+                            break;
+                    }
+                }
+
+                // Sanitize individual characters
+                unsigned char c = data[i];
+                if ((c < ' ') || ((c >= 0x80) && (c <= 0x9F)) || (c == '?') || (c == '<') || (c == '>') || (c == ':') || (c == '*') || (c == '|') || (c == '\"'))
+                {
+                    data[i] = replacement;
+                }
+                else if ((c == '/') || (c == '\\'))
                 {
-                    // WARNING While I can't see how using '\' or '/' would cause a problem, it still warrants an investigation
-                    //case '/':
-                    case '?':
-                    case '<':
-                    case '>':
-                    //case '\\':
-                    case ':':
-                    case '*':
-                    case '|':
-                    case '\"':
-
-                    case 0x00:
-                    case 0x01:
-                    case 0x02:
-                    case 0x03:
-                    case 0x04:
-                    case 0x05:
-                    case 0x06:
-                    case 0x07:
-                    case 0x08:
-                    case 0x09:
-                    case 0x0a:
-                    case 0x0b:
-                    case 0x0c:
-                    case 0x0d:
-                    case 0x0e:
-                    case 0x0f:
-                    case 0x10:
-                    case 0x11:
-                    case 0x12:
-                    case 0x13:
-                    case 0x14:
-                    case 0x15:
-                    case 0x16:
-                    case 0x17:
-                    case 0x18:
-                    case 0x19:
-                    case 0x1a:
-                    case 0x1b:
-                    case 0x1c:
-                    case 0x1d:
-                    case 0x1e:
-                    case 0x1f:
-
-                    case 0x80:
-                    case 0x81:
-                    case 0x82:
-                    case 0x83:
-                    case 0x84:
-                    case 0x85:
-                    case 0x86:
-                    case 0x87:
-                    case 0x88:
-                    case 0x89:
-                    case 0x8a:
-                    case 0x8b:
-                    case 0x8c:
-                    case 0x8d:
-                    case 0x8e:
-                    case 0x8f:
-                    case 0x90:
-                    case 0x91:
-                    case 0x92:
-                    case 0x93:
-                    case 0x94:
-                    case 0x95:
-                    case 0x96:
-                    case 0x97:
-                    case 0x98:
-                    case 0x99:
-                    case 0x9a:
-                    case 0x9b:
-                    case 0x9c:
-                    case 0x9d:
-                    case 0x9e:
-                    case 0x9f:
-
-                        data[i] = replacement;
-                        break;
-
-                    default:
-                        break;
+                    checkForSpecialEntries = true;
                 }
             }
-            std::string str_replacement(1, replacement);
-
-            boost::ireplace_all(data, "..", str_replacement);
-
-            boost::ireplace_all(data, "CON", str_replacement);
-            boost::ireplace_all(data, "PRN", str_replacement);
-            boost::ireplace_all(data, "AUX", str_replacement);
-            boost::ireplace_all(data, "NUL", str_replacement);
-            boost::ireplace_all(data, "COM1", str_replacement);
-            boost::ireplace_all(data, "COM2", str_replacement);
-            boost::ireplace_all(data, "COM3", str_replacement);
-            boost::ireplace_all(data, "COM4", str_replacement);
-            boost::ireplace_all(data, "COM5", str_replacement);
-            boost::ireplace_all(data, "COM6", str_replacement);
-            boost::ireplace_all(data, "COM7", str_replacement);
-            boost::ireplace_all(data, "COM8", str_replacement);
-            boost::ireplace_all(data, "COM9", str_replacement);
-            boost::ireplace_all(data, "LPT1", str_replacement);
-            boost::ireplace_all(data, "LPT2", str_replacement);
-            boost::ireplace_all(data, "LPT3", str_replacement);
-            boost::ireplace_all(data, "LPT4", str_replacement);
-            boost::ireplace_all(data, "LPT5", str_replacement);
-            boost::ireplace_all(data, "LPT6", str_replacement);
-            boost::ireplace_all(data, "LPT7", str_replacement);
-            boost::ireplace_all(data, "LPT8", str_replacement);
-            boost::ireplace_all(data, "LPT9", str_replacement);
         }
 
     } // namespace utility

diff --git a/tests/unittest.cpp b/tests/unittest.cpp
@@ -2447,6 +2447,34 @@ TEST_CASE("base64")
     CHECK(crow::utility::base64decode(sample_bin2_enc_np, 6) == std::string(reinterpret_cast<char const*>(sample_bin2)).substr(0, 4));
 } // base64
 
+TEST_CASE("sanitize_filename")
+{
+    auto sanitize_filename = [](string s) {
+        crow::utility::sanitize_filename(s);
+        return s;
+    };
+    CHECK(sanitize_filename("abc/def") == "abc/def");
+    CHECK(sanitize_filename("abc/../def") == "abc/_/def");
+    CHECK(sanitize_filename("abc/..\\..\\..//.../def") == "abc/_\\_\\_//_./def");
+    CHECK(sanitize_filename("abc/..../def") == "abc/_../def");
+    CHECK(sanitize_filename("abc/x../def") == "abc/x../def");
+    CHECK(sanitize_filename("../etc/passwd") == "_/etc/passwd");
+    CHECK(sanitize_filename("abc/AUX") == "abc/_");
+    CHECK(sanitize_filename("abc/AUX/foo") == "abc/_/foo");
+    CHECK(sanitize_filename("abc/AUX:") == "abc/__");
+    CHECK(sanitize_filename("abc/AUXxy") == "abc/AUXxy");
+    CHECK(sanitize_filename("abc/AUX.xy") == "abc/_.xy");
+    CHECK(sanitize_filename("abc/NUL") == "abc/_");
+    CHECK(sanitize_filename("abc/NU") == "abc/NU");
+    CHECK(sanitize_filename("abc/NuL") == "abc/_");
+    CHECK(sanitize_filename("abc/LPT1\\") == "abc/_\\");
+    CHECK(sanitize_filename("abc/COM1") == "abc/_");
+    CHECK(sanitize_filename("ab?<>:*|\"cd") == "ab_______cd");
+    CHECK(sanitize_filename("abc/COM9") == "abc/_");
+    CHECK(sanitize_filename("abc/COM") == "abc/COM");
+    CHECK(sanitize_filename("abc/CON") == "abc/_");
+}
+
 TEST_CASE("get_port")
 {
     SimpleApp app;