Merge pull request github#17611 from microsoft/brodes/wcharcharconver…

…sion_false_positives_upstream5 Brodes/wcharcharconversion false positives upstream5
michaelnebel · Oct 1, 2024 · 2427227 · 2427227
2 parents 204e4c5 + c496503
commit 2427227
Show file tree

Hide file tree

Showing 4 changed files with 140 additions and 6 deletions.
diff --git a/cpp/ql/src/Security/CWE/CWE-704/WcharCharConversion.ql b/cpp/ql/src/Security/CWE/CWE-704/WcharCharConversion.ql
@@ -13,23 +13,85 @@
  */
 
 import cpp
+import semmle.code.cpp.controlflow.Guards
 
 class WideCharPointerType extends PointerType {
   WideCharPointerType() { this.getBaseType() instanceof WideCharType }
 }
 
+/**
+ * Given type `t`, recurses through and returns all
+ * intermediate base types, including `t`.
+ */
+Type getABaseType(Type t) {
+  result = t
+  or
+  result = getABaseType(t.(DerivedType).getBaseType())
+  or
+  result = getABaseType(t.(TypedefType).getBaseType())
+}
+
 /**
  * A type that may also be `CharPointerType`, but that are likely used as arbitrary buffers.
  */
 class UnlikelyToBeAStringType extends Type {
   UnlikelyToBeAStringType() {
-    this.(PointerType).getBaseType().(CharType).isUnsigned() or
-    this.(PointerType).getBaseType().getName().toLowerCase().matches("%byte") or
-    this.getName().toLowerCase().matches("%byte") or
-    this.(PointerType).getBaseType().hasName("uint8_t")
+    exists(Type targ | getABaseType(this) = targ |
+      // NOTE: not using CharType isUnsigned, but rather look for any explicitly declared unsigned
+      // char types. Assuming these are used for buffers, not strings.
+      targ.(CharType).getName().toLowerCase().matches("unsigned%") or
+      targ.getName().toLowerCase().matches(["uint8_t", "%byte%"])
+    )
   }
 }
 
+// Types that can be wide depending on the UNICODE macro
+// see https://learn.microsoft.com/en-us/windows/win32/winprog/windows-data-types
+class UnicodeMacroDependentWidthType extends Type {
+  UnicodeMacroDependentWidthType() {
+    exists(Type targ | getABaseType(this) = targ |
+      targ.getName() in [
+          "LPCTSTR",
+          "LPTSTR",
+          "PCTSTR",
+          "PTSTR",
+          "TBYTE",
+          "TCHAR"
+        ]
+    )
+  }
+}
+
+class UnicodeMacro extends Macro {
+  UnicodeMacro() { this.getName().toLowerCase().matches("%unicode%") }
+}
+
+class UnicodeMacroInvocation extends MacroInvocation {
+  UnicodeMacroInvocation() { this.getMacro() instanceof UnicodeMacro }
+}
+
+/**
+ * Holds when a expression whose type is UnicodeMacroDependentWidthType and
+ * is observed to be guarded by a check involving a bitwise-and operation
+ * with a UnicodeMacroInvocation.
+ * Such expressions are assumed to be checked dynamically, i.e.,
+ * the flag would indicate if UNICODE typing is set correctly to allow
+ * or disallow a widening cast.
+ */
+predicate isLikelyDynamicallyChecked(Expr e) {
+  e.getType() instanceof UnicodeMacroDependentWidthType and
+  exists(GuardCondition gc, BitwiseAndExpr bai, UnicodeMacroInvocation umi |
+    bai.getAnOperand() = umi.getExpr()
+  |
+    // bai == 0 is false when reaching `e.getBasicBlock()`.
+    // That is, bai != 0 when reaching `e.getBasicBlock()`.
+    gc.ensuresEq(bai, 0, e.getBasicBlock(), false)
+    or
+    // bai == k and k != 0 is true when reaching `e.getBasicBlock()`.
+    gc.ensuresEq(bai, any(int k | k != 0), e.getBasicBlock(), true)
+  )
+}
+
 from Expr e1, Cast e2
 where
   e2 = e1.getConversion() and
@@ -42,7 +104,11 @@ where
   not e1.getType() instanceof UnlikelyToBeAStringType and
   // Avoid castings from 'new' expressions as typically these will be safe
   // Example: `__Type* ret = reinterpret_cast<__Type*>(New(m_pmo) char[num * sizeof(__Type)]);`
-  not exists(NewOrNewArrayExpr newExpr | newExpr.getAChild*() = e1)
+  not exists(NewOrNewArrayExpr newExpr | newExpr.getAChild*() = e1) and
+  // Avoid cases where the cast is guarded by a check to determine if
+  // unicode encoding is enabled in such a way to disallow the dangerous cast
+  // at runtime.
+  not isLikelyDynamicallyChecked(e1)
 select e1,
   "Conversion from " + e1.getType().toString() + " to " + e2.getType().toString() +
     ". Use of invalid string can lead to undefined behavior."
diff --git a/cpp/ql/src/change-notes/2024-09-26-wcharcharconversion-false-positives.md b/cpp/ql/src/change-notes/2024-09-26-wcharcharconversion-false-positives.md
@@ -0,0 +1,5 @@
+---
+category: minorAnalysis
+---
+* The `cpp/incorrect-string-type-conversion` query now produces fewer false positives caused by failure to detect byte arrays.
+* The `cpp/incorrect-string-type-conversion` query now produces fewer false positives caused by failure to recognize dynamic checks prior to possible dangerous widening.
diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-704/WcharCharConversion.cpp b/cpp/ql/test/query-tests/Security/CWE/CWE-704/WcharCharConversion.cpp
@@ -53,4 +53,59 @@ void NonStringFalsePositiveTest2(unsigned char* buffer)
 {
 	wchar_t *lpWchar = NULL;
 	lpWchar = (LPWSTR)buffer; // Possible False Positive
-}
+}
+
+typedef unsigned char BYTE;
+using FOO = BYTE*;
+
+void NonStringFalsePositiveTest3(FOO buffer)
+{
+	wchar_t *lpWchar = NULL;
+	lpWchar = (LPWSTR)buffer; // GOOD
+}
+
+#define UNICODE 0x8
+
+// assume EMPTY_MACRO is tied to if UNICODE is enabled
+#ifdef EMPTY_MACRO
+typedef WCHAR* LPTSTR;
+#else
+typedef char* LPTSTR;
+#endif
+
+void CheckedConversionFalsePositiveTest3(unsigned short flags, LPTSTR buffer)
+{
+	wchar_t *lpWchar = NULL;
+	if(flags & UNICODE)
+		lpWchar = (LPWSTR)buffer; // GOOD
+	else
+		lpWchar = (LPWSTR)buffer; // BUG
+
+	if((flags & UNICODE) == 0x8)
+		lpWchar = (LPWSTR)buffer; // GOOD
+	else
+		lpWchar = (LPWSTR)buffer; // BUG
+
+	if((flags & UNICODE) != 0x8)
+		lpWchar = (LPWSTR)buffer; // BUG
+	else
+		lpWchar = (LPWSTR)buffer; // GOOD
+
+	// Bad operator precedence
+	if(flags & UNICODE == 0x8)
+		lpWchar = (LPWSTR)buffer; // BUG
+	else
+		lpWchar = (LPWSTR)buffer; // BUG
+
+	if((flags & UNICODE) != 0)
+		lpWchar = (LPWSTR)buffer; // GOOD
+	else
+		lpWchar = (LPWSTR)buffer; // BUG
+
+	if((flags & UNICODE) == 0)
+		lpWchar = (LPWSTR)buffer; // BUG
+	else
+		lpWchar = (LPWSTR)buffer; // GOOD
+
+	lpWchar = (LPWSTR)buffer; // BUG
+}
diff --git a/cpp/ql/test/query-tests/Security/CWE/CWE-704/WcharCharConversion.expected b/cpp/ql/test/query-tests/Security/CWE/CWE-704/WcharCharConversion.expected
@@ -3,3 +3,11 @@
 | WcharCharConversion.cpp:24:22:24:27 | lpChar | Conversion from char * to wchar_t *. Use of invalid string can lead to undefined behavior. |
 | WcharCharConversion.cpp:26:23:26:28 | lpChar | Conversion from char * to LPCWSTR. Use of invalid string can lead to undefined behavior. |
 | WcharCharConversion.cpp:27:17:27:22 | lpChar | Conversion from char * to LPWSTR. Use of invalid string can lead to undefined behavior. |
+| WcharCharConversion.cpp:82:21:82:26 | buffer | Conversion from LPTSTR to LPWSTR. Use of invalid string can lead to undefined behavior. |
+| WcharCharConversion.cpp:87:21:87:26 | buffer | Conversion from LPTSTR to LPWSTR. Use of invalid string can lead to undefined behavior. |
+| WcharCharConversion.cpp:90:21:90:26 | buffer | Conversion from LPTSTR to LPWSTR. Use of invalid string can lead to undefined behavior. |
+| WcharCharConversion.cpp:96:21:96:26 | buffer | Conversion from LPTSTR to LPWSTR. Use of invalid string can lead to undefined behavior. |
+| WcharCharConversion.cpp:98:21:98:26 | buffer | Conversion from LPTSTR to LPWSTR. Use of invalid string can lead to undefined behavior. |
+| WcharCharConversion.cpp:103:21:103:26 | buffer | Conversion from LPTSTR to LPWSTR. Use of invalid string can lead to undefined behavior. |
+| WcharCharConversion.cpp:106:21:106:26 | buffer | Conversion from LPTSTR to LPWSTR. Use of invalid string can lead to undefined behavior. |
+| WcharCharConversion.cpp:110:20:110:25 | buffer | Conversion from LPTSTR to LPWSTR. Use of invalid string can lead to undefined behavior. |