diff --git a/Dockerfile b/Dockerfile index b7eecedc..2c794008 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,11 +43,45 @@ RUN apt-get update && apt-get install -y \ xz-utils \ tar \ cpio \ + pkg-config \ + libgtk-3-dev libglib2.0-dev libpango1.0-dev libharfbuzz-dev \ + libfreetype6-dev libfontconfig1-dev libgdk-pixbuf-2.0-dev \ + libicu-dev libpng-dev libjpeg-turbo8-dev libtiff-dev \ + autoconf2.13 nasm yasm zip \ + python3-venv \ + libx11-dev libx11-xcb-dev libxcb1-dev libxcb-shm0-dev \ + libxext-dev libxrandr-dev libxcomposite-dev libxcursor-dev \ + libxdamage-dev libxfixes-dev libxi-dev libxtst-dev \ + mesa-common-dev libegl1-mesa-dev libopengl-dev \ + libasound2-dev libpulse-dev \ + libdbus-1-dev libdbus-glib-1-dev \ + zlib1g-dev libffi-dev \ + && rm -rf /var/lib/apt/lists/* # Install Oh My Zsh for prettier shell RUN sh -c "$(curl -fsSL https://raw.github.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" "" --unattended +# Install Nodejs 20 +RUN set -eux; \ + mkdir -p /etc/apt/keyrings; \ + curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \ + | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg; \ + echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" \ + > /etc/apt/sources.list.d/nodesource.list; \ + apt-get update; \ + apt-get install -y --no-install-recommends nodejs; \ + node -v && npm -v; \ + rm -rf /var/lib/apt/lists/* + +# Rust set up +RUN set -eux; \ + curl -fsSL https://sh.rustup.rs | sh -s -- -y --default-toolchain 1.82.0; \ + rustc --version && cargo --version; \ + apt-get -y purge cbindgen || true; \ + cargo install cbindgen --version 0.26.0 --force; \ + cbindgen --version + # Set zsh as default shell RUN chsh -s $(which zsh) diff --git a/commits/commits-firefox-debug.txt b/commits/commits-firefox-debug.txt new file mode 100644 index 00000000..540d2fe0 --- /dev/null +++ b/commits/commits-firefox-debug.txt @@ -0,0 +1,2 @@ +4cf24787d4df11a84fa3ba7eadbb77e9fb915e53,Use-After-Free +76b5668417e02fc6aa260245ad35e8dd1186a063,Call-On-Null-Pointer \ No newline at end of file diff --git a/commits/commits-firefox.txt b/commits/commits-firefox.txt new file mode 100644 index 00000000..47e20266 --- /dev/null +++ b/commits/commits-firefox.txt @@ -0,0 +1,10 @@ +1b003f8ad9abb9a809098ef5c1f7c215d237c9f4,Null-Pointer-Dereference +24021be559865d82a56fa16c5efbca5065d01b5a,Null-Pointer-Dereference +d2a896eed4209bf34265a5f8921151bc1de87990,Null-Pointer-Dereference +f19fb265799386caafd9dc02f5efa161149b8de8,Null-Pointer-Dereference +4db507facff9908f99cf7ece1108a5f528a8e4c8,Null-Pointer-Dereference +0bf62ba30a2f93ae7380e528a9200fab7c5e4735,Use-After-Free +568f530b13981fa3f72442723623f5d7d34d9f58,Use-After-Free +056813bb47d5c35237ad82dce5efa7705cea98eb,Use-After-Free +c935848bc0d62704ecd407c16e1a0231b67b57b8,Use-After-Free +d025b9ed06bc36beb8719d1e983c4d217ab06ce2,Use-After-Free \ No newline at end of file diff --git a/prompt_template/firefox/examples/double-free/checker.cpp b/prompt_template/firefox/examples/double-free/checker.cpp new file mode 100644 index 00000000..4611f5f3 --- /dev/null +++ b/prompt_template/firefox/examples/double-free/checker.cpp @@ -0,0 +1,216 @@ +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Checkers/Taint.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/Environment.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" +#include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/StmtVisitor.h" +#include "llvm/Support/raw_ostream.h" +#include "clang/StaticAnalyzer/Checkers/utility.h" + + +using namespace clang; +using namespace ento; +using namespace taint; + +// Define a unique taint tag for devm_ allocations. +static TaintTagType TaintTag = 101; + +namespace { + +class SAGenTestChecker + : public Checker // For checking post-call conditions +{ + mutable std::unique_ptr BT; + +public: + // Constructor to initialize the BugType describing our double-free bug. + SAGenTestChecker() + : BT(new BugType(this, "Double Free of devm Allocated Memory", + "Memory Management")) {} + + // This callback can be used to model the behavior of functions, including + // allocating memory or mutating states in a custom way. + bool evalCall(const CallEvent &Call, CheckerContext &C) const; + + // Post-call check: track when devm_* allocation functions return memory, + // marking the returned pointer as "tainted" (i.e., devm-allocated). + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; + + // Pre-call check: if the function is a known free function (kfree, kvfree, or + // pinctrl_utils_free_map), verify if the passed pointer was previously + // devm-allocated. If so, report a double-free issue. + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + +private: + void reportDoubleFree(const CallEvent &Call, CheckerContext &C, + const MemRegion *Region) const; +}; + +} // end anonymous namespace + +/// evalCall - Used to model certain function calls manually. Here, we intercept +/// certain devm_* allocations to simulate a symbolic region allocation. +bool SAGenTestChecker::evalCall(const CallEvent &Call, CheckerContext &C) const { + ProgramStateRef State = C.getState(); + const IdentifierInfo *Callee = Call.getCalleeIdentifier(); + if (!Callee) + return false; + + // If the function name matches any of the devm_* memory allocation functions, + // create a symbolic region to represent the newly allocated memory. + if (Callee->getName() == "devm_kcalloc" || + Callee->getName() == "devm_kmalloc" || + Callee->getName() == "devm_kzalloc" || + Callee->getName() == "devm_kmalloc_array") { + + // Retrieve the original call expression. + const Expr *expr = Call.getOriginExpr(); + if (!expr) + return false; + + const CallExpr *CE = dyn_cast(expr); + if (!CE) + return false; + + // Create a conjured symbol representing the allocated memory. This + // effectively simulates an allocation site for the static analyzer. + unsigned Count = C.blockCount(); + SValBuilder &svalBuilder = C.getSValBuilder(); + const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); + DefinedSVal RetVal = + svalBuilder.getConjuredHeapSymbolVal(CE, LCtx, Count).castAs(); + + // Initialize the symbolic memory with an undefined value. This is optional + // but often done in the analyzer to track data flows. + State = State->bindDefaultInitial(RetVal, UndefinedVal(), LCtx); + + // Bind the symbolic allocation to the call expression's return value. + State = State->BindExpr(CE, C.getLocationContext(), RetVal); + + // If the return value is not a location, do not continue. + if (!RetVal.getAs()) + return false; + + // Finally, add the new state transition to the analyzer. + if (State) + C.addTransition(State); + } + + // This indicates whether the call produced a new or different state. + bool isDifferent = C.isDifferent(); + return isDifferent; +} + +/// checkPostCall - After the call is evaluated, we mark the returned pointer +/// as tainted if it comes from a devm_* allocation function. +void SAGenTestChecker::checkPostCall(const CallEvent &Call, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + const IdentifierInfo *Callee = Call.getCalleeIdentifier(); + if (!Callee) + return; + + // If it's one of our target devm_* allocation functions, taint the result. + if (Callee->getName() == "devm_kcalloc" || + Callee->getName() == "devm_kmalloc" || + Callee->getName() == "devm_kzalloc" || + Callee->getName() == "devm_kmalloc_array") { + + // Ensure we have a valid call expression. + const CallExpr *CE = dyn_cast(Call.getOriginExpr()); + if (!CE) + return; + + // Retrieve the return value. + SVal RetVal = Call.getReturnValue(); + SymbolRef retSymbol = RetVal.getAsSymbol(); + if (retSymbol) { + // Mark the symbol as "tainted" with our custom TaintTag, + // indicating devm allocation. + State = addTaint(State, retSymbol, TaintTag); + } + // Save the new state. + C.addTransition(State); + } +} + +/// checkPreCall - Before kfree, kvfree, or pinctrl_utils_free_map is called, +/// check if the pointer to be freed is tagged as devm-allocated. If so, +/// issue a double-free warning. +void SAGenTestChecker::checkPreCall(const CallEvent &Call, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + const IdentifierInfo *Callee = Call.getCalleeIdentifier(); + if (!Callee) + return; + + // Handle pinctrl_utils_free_map. Note that the pointer is passed as + // the second argument (index 1). + if (Callee->getName() == "pinctrl_utils_free_map") { + SVal arg1 = Call.getArgSVal(1); + SymbolRef argSymbol = arg1.getAsSymbol(); + + if (argSymbol) { + // If this symbol was tainted as devm-allocated, report a double-free. + if (isTainted(State, argSymbol, TaintTag)) { + reportDoubleFree(Call, C, arg1.getAsRegion()); + } + } + } + + // Handle kfree/kvfree. The pointer is the first argument (index 0). + if (Callee->getName() == "kfree" || Callee->getName() == "kvfree") { + SVal arg0 = Call.getArgSVal(0); + SymbolRef argSymbol = arg0.getAsSymbol(); + + if (argSymbol) { + // If this symbol was tainted as devm-allocated, report a double-free. + if (isTainted(State, argSymbol, TaintTag)) { + reportDoubleFree(Call, C, arg0.getAsRegion()); + } + } + } +} + +/// reportDoubleFree - Emit a warning if devm-allocated memory is freed using +/// a standard free function, indicating a possible double-free. +void SAGenTestChecker::reportDoubleFree(const CallEvent &Call, + CheckerContext &C, + const MemRegion *Region) const { + if (!BT) + return; + + // Generate a node in the exploded graph for this error. + ExplodedNode *N = C.generateNonFatalErrorNode(); + if (!N) + return; + + // Create and populate a bug report object. + auto report = std::make_unique( + *BT, "Double free of devm_* allocated memory", N); + report->addRange(Call.getSourceRange()); + C.emitReport(std::move(report)); +} + +//===----------------------------------------------------------------------===// +// Checker Registration +//===----------------------------------------------------------------------===// + +extern "C" void clang_registerCheckers(CheckerRegistry ®istry) { + registry.addChecker( + "custom.SAGenTestChecker", + "Detects double free of memory allocated by devm_* functions", + ""); +} + +extern "C" const char clang_analyzerAPIVersionString[] = + CLANG_ANALYZER_API_VERSION_STRING; diff --git a/prompt_template/firefox/examples/double-free/patch.md b/prompt_template/firefox/examples/double-free/patch.md new file mode 100644 index 00000000..fb5075f9 --- /dev/null +++ b/prompt_template/firefox/examples/double-free/patch.md @@ -0,0 +1,197 @@ +## Patch Description + +pinctrl: sophgo: fix double free in cv1800_pctrl_dt_node_to_map() + +'map' is allocated using devm_* which takes care of freeing the allocated +data, but in error paths there is a call to pinctrl_utils_free_map() +which also does kfree(map) which leads to a double free. + +Use kcalloc() instead of devm_kcalloc() as freeing is manually handled. + +Fixes: a29d8e93e710 ("pinctrl: sophgo: add support for CV1800B SoC") +Signed-off-by: Harshit Mogalapalli +Link: https://lore.kernel.org/20241010111830.3474719-1-harshit.m.mogalapalli@oracle.com +Signed-off-by: Linus Walleij + +## Buggy Code + +```c +// drivers/pinctrl/sophgo/pinctrl-cv18xx.c +static int cv1800_pctrl_dt_node_to_map(struct pinctrl_dev *pctldev, + struct device_node *np, + struct pinctrl_map **maps, + unsigned int *num_maps) +{ + struct cv1800_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev); + struct device *dev = pctrl->dev; + struct device_node *child; + struct pinctrl_map *map; + const char **grpnames; + const char *grpname; + int ngroups = 0; + int nmaps = 0; + int ret; + + for_each_available_child_of_node(np, child) + ngroups += 1; + + grpnames = devm_kcalloc(dev, ngroups, sizeof(*grpnames), GFP_KERNEL); + if (!grpnames) + return -ENOMEM; + + map = devm_kcalloc(dev, ngroups * 2, sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + + ngroups = 0; + mutex_lock(&pctrl->mutex); + for_each_available_child_of_node(np, child) { + int npins = of_property_count_u32_elems(child, "pinmux"); + unsigned int *pins; + struct cv1800_pin_mux_config *pinmuxs; + u32 config, power; + int i; + + if (npins < 1) { + dev_err(dev, "invalid pinctrl group %pOFn.%pOFn\n", + np, child); + ret = -EINVAL; + goto dt_failed; + } + + grpname = devm_kasprintf(dev, GFP_KERNEL, "%pOFn.%pOFn", + np, child); + if (!grpname) { + ret = -ENOMEM; + goto dt_failed; + } + + grpnames[ngroups++] = grpname; + + pins = devm_kcalloc(dev, npins, sizeof(*pins), GFP_KERNEL); + if (!pins) { + ret = -ENOMEM; + goto dt_failed; + } + + pinmuxs = devm_kcalloc(dev, npins, sizeof(*pinmuxs), GFP_KERNEL); + if (!pinmuxs) { + ret = -ENOMEM; + goto dt_failed; + } + + for (i = 0; i < npins; i++) { + ret = of_property_read_u32_index(child, "pinmux", + i, &config); + if (ret) + goto dt_failed; + + pins[i] = cv1800_dt_get_pin(config); + pinmuxs[i].config = config; + pinmuxs[i].pin = cv1800_get_pin(pctrl, pins[i]); + + if (!pinmuxs[i].pin) { + dev_err(dev, "failed to get pin %d\n", pins[i]); + ret = -ENODEV; + goto dt_failed; + } + + ret = cv1800_verify_pinmux_config(&pinmuxs[i]); + if (ret) { + dev_err(dev, "group %s pin %d is invalid\n", + grpname, i); + goto dt_failed; + } + } + + ret = cv1800_verify_pin_group(pinmuxs, npins); + if (ret) { + dev_err(dev, "group %s is invalid\n", grpname); + goto dt_failed; + } + + ret = of_property_read_u32(child, "power-source", &power); + if (ret) + goto dt_failed; + + if (!(power == PIN_POWER_STATE_3V3 || power == PIN_POWER_STATE_1V8)) { + dev_err(dev, "group %s have unsupported power: %u\n", + grpname, power); + ret = -ENOTSUPP; + goto dt_failed; + } + + ret = cv1800_set_power_cfg(pctrl, pinmuxs[0].pin->power_domain, + power); + if (ret) + goto dt_failed; + + map[nmaps].type = PIN_MAP_TYPE_MUX_GROUP; + map[nmaps].data.mux.function = np->name; + map[nmaps].data.mux.group = grpname; + nmaps += 1; + + ret = pinconf_generic_parse_dt_config(child, pctldev, + &map[nmaps].data.configs.configs, + &map[nmaps].data.configs.num_configs); + if (ret) { + dev_err(dev, "failed to parse pin config of group %s: %d\n", + grpname, ret); + goto dt_failed; + } + + ret = pinctrl_generic_add_group(pctldev, grpname, + pins, npins, pinmuxs); + if (ret < 0) { + dev_err(dev, "failed to add group %s: %d\n", grpname, ret); + goto dt_failed; + } + + /* don't create a map if there are no pinconf settings */ + if (map[nmaps].data.configs.num_configs == 0) + continue; + + map[nmaps].type = PIN_MAP_TYPE_CONFIGS_GROUP; + map[nmaps].data.configs.group_or_pin = grpname; + nmaps += 1; + } + + ret = pinmux_generic_add_function(pctldev, np->name, + grpnames, ngroups, NULL); + if (ret < 0) { + dev_err(dev, "error adding function %s: %d\n", np->name, ret); + goto function_failed; + } + + *maps = map; + *num_maps = nmaps; + mutex_unlock(&pctrl->mutex); + + return 0; + +dt_failed: + of_node_put(child); +function_failed: + pinctrl_utils_free_map(pctldev, map, nmaps); + mutex_unlock(&pctrl->mutex); + return ret; +} +``` + +## Bug Fix Patch + +```diff +diff --git a/drivers/pinctrl/sophgo/pinctrl-cv18xx.c b/drivers/pinctrl/sophgo/pinctrl-cv18xx.c +index d18fc5aa84f7..57f2674e75d6 100644 +--- a/drivers/pinctrl/sophgo/pinctrl-cv18xx.c ++++ b/drivers/pinctrl/sophgo/pinctrl-cv18xx.c +@@ -221,7 +221,7 @@ static int cv1800_pctrl_dt_node_to_map(struct pinctrl_dev *pctldev, + if (!grpnames) + return -ENOMEM; + +- map = devm_kcalloc(dev, ngroups * 2, sizeof(*map), GFP_KERNEL); ++ map = kcalloc(ngroups * 2, sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + +``` diff --git a/prompt_template/firefox/examples/double-free/pattern.md b/prompt_template/firefox/examples/double-free/pattern.md new file mode 100644 index 00000000..d92f9653 --- /dev/null +++ b/prompt_template/firefox/examples/double-free/pattern.md @@ -0,0 +1,3 @@ +### Bug Pattern + +The bug pattern in the provided patch is the use of `devm_kcalloc()` for allocating memory, which results in automatic memory management by the device-managed allocation API. This can lead to a double free issue when manual deallocation is also performed with functions like `pinctrl_utils_free_map()`. The root cause is combining automatic device-managed memory allocation with manual memory deallocation, which can result in freeing memory twice and cause undefined behavior diff --git a/prompt_template/firefox/examples/double-free/plan.md b/prompt_template/firefox/examples/double-free/plan.md new file mode 100644 index 00000000..950e3b98 --- /dev/null +++ b/prompt_template/firefox/examples/double-free/plan.md @@ -0,0 +1,21 @@ +### Plan + +1. **Declare a Taint Tag:** + - Use a unique identifier (e.g., `static TaintTagType TaintTag = 101;`) to mark allocations from `devm_*` functions. + +2. **Model the Memory Allocation (evalCall):** + - In the `evalCall` method, intercept calls to `devm_kcalloc`, `devm_kmalloc`, etc. + - Create a symbolic region to represent the newly allocated memory using `getConjuredHeapSymbolVal`. + - Bind this symbolic region to the return expression of the call. + +3. **Taint the Return Value (checkPostCall):** + - In the `checkPostCall` callback, if the callee is `devm_*`, retrieve the return value’s symbol and mark it as tainted (using `addTaint(State, retSymbol, TaintTag)`). + +4. **Check Before Freeing (checkPreCall):** + - Intercept calls to `kfree`, `kvfree`, and `pinctrl_utils_free_map`. + - Extract the pointer argument’s symbol. + - If the symbol is tainted, it indicates that this pointer originates from a `devm_*` allocation. Hence, report a potential double-free. + +5. **Report Bugs (reportDoubleFree):** + - Generate an error node using `generateNonFatalErrorNode`. + - Create a `PathSensitiveBugReport` for the user, describing the “Double free of devm_* allocated memory.” diff --git a/prompt_template/firefox/examples/null-ptr-derefence/checker.cpp b/prompt_template/firefox/examples/null-ptr-derefence/checker.cpp new file mode 100644 index 00000000..f71d0246 --- /dev/null +++ b/prompt_template/firefox/examples/null-ptr-derefence/checker.cpp @@ -0,0 +1,274 @@ +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Checkers/Taint.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/Environment.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" +#include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/StmtVisitor.h" +#include "llvm/Support/raw_ostream.h" +#include "clang/StaticAnalyzer/Checkers/utility.h" + +#include "clang/AST/ASTContext.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Stmt.h" +#include "llvm/Support/Casting.h" + +using namespace clang; +using namespace ento; + +// A program-state map from MemRegions to a boolean that tells whether +// that region has been "checked" for null (true) or is still unchecked (false). +REGISTER_MAP_WITH_PROGRAMSTATE(PossibleNullPtrMap, const MemRegion*, bool) +// Program state map to track pointer aliasing +REGISTER_MAP_WITH_PROGRAMSTATE(PtrAliasMap, const MemRegion*, const MemRegion*) + +//---------------------------------------------------------------------- +// Helper: Is this devm_kasprintf? +//---------------------------------------------------------------------- +static bool isDevmKasprintf(const CallEvent &Call) { + if (const IdentifierInfo *ID = Call.getCalleeIdentifier()) { + return ID->getName() == "devm_kasprintf"; + } + return false; +} + +ProgramStateRef setChecked(ProgramStateRef State, const MemRegion *MR) { + const bool *Checked = State->get(MR); + if (Checked && *Checked == false) { + State = State->set(MR, true); + } + + auto AliasReg = State->get(MR); + if (AliasReg) { // Fix 1: Adjust type to pointer to const + const bool *AliasChecked = State->get(*AliasReg); + if (AliasChecked && *AliasChecked == false) { + State = State->set(*AliasReg, true); + } + } + return State; +} + +//---------------------------------------------------------------------- +// Main Checker Class +//---------------------------------------------------------------------- +namespace { +class SAGenTestChecker + : public Checker< + check::PostCall, + check::PreCall, + check::BranchCondition, + check::Location, + check::Bind + > { + + BugType BT; // We'll initialize in constructor + +public: + SAGenTestChecker() + : BT(this, "Dereference before NULL check") {} + + // Checker callbacks + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + void checkBranchCondition(const Stmt *Condition, CheckerContext &C) const; + void checkLocation(SVal Loc, bool isLoad, const Stmt *S, CheckerContext &C) const; + void checkBind(SVal Loc, SVal Val, const Stmt *StoreE, CheckerContext &C) const; +}; +} // end anonymous namespace + +//---------------------------------------------------------------------- +// checkPostCall: Called after a function call is evaluated +//---------------------------------------------------------------------- +void SAGenTestChecker::checkPostCall(const CallEvent &Call, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + // If the call is devm_kasprintf, mark its return region as "unchecked" = false + if (isDevmKasprintf(Call)) { + const MemRegion *MR = Call.getReturnValue().getAsRegion(); // Fix 2: semicolon added + if (!MR) + return; + + State = State->set(MR, false); + } + + C.addTransition(State); +} + +//---------------------------------------------------------------------- +// checkPreCall: Called right before a function call is evaluated +//---------------------------------------------------------------------- +void SAGenTestChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { + llvm::SmallVector DerefParams; + if (!functionKnownToDeref(Call, DerefParams)) + return; // not one of our "known to deref" functions + + ProgramStateRef State = C.getState(); + + // For each parameter index known to be dereferenced + for (unsigned Idx : DerefParams) { + if (Idx >= Call.getNumArgs()) + continue; + + SVal ArgVal = Call.getArgSVal(Idx); + if (const MemRegion *MR = ArgVal.getAsRegion()) { + const MemRegion *BaseReg = MR->getBaseRegion(); + + const bool *Checked = State->get(BaseReg); + if (Checked && *Checked == false) { + // We found a pointer from devm_kasprintf that is about to be dereferenced + // by a known-deref function, but it was never checked for NULL. + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + + auto report = std::make_unique( + BT, "Passing devm_kasprintf pointer to a function that dereferences " + "it without checking for NULL", N); + // Optionally, you can add a range or more information + C.emitReport(std::move(report)); + } + } + } +} + +//---------------------------------------------------------------------- +// checkBranchCondition: Called when evaluating an 'if (...)' or 'while(...)' condition +//---------------------------------------------------------------------- +void SAGenTestChecker::checkBranchCondition(const Stmt *Condition, CheckerContext &C) const { + ProgramStateRef State = C.getState(); + const Expr *CondE = dyn_cast(Condition); + if (!CondE) { + // Not an expression-based condition, just add a transition + C.addTransition(State); + return; + } + + // Remove casts/parens + CondE = CondE->IgnoreParenCasts(); + + // 1) Look for "if (!ptr)" + if (const auto *UO = dyn_cast(CondE)) { + if (UO->getOpcode() == UO_LNot) { + const Expr *SubE = UO->getSubExpr()->IgnoreParenCasts(); + SVal SubVal = State->getSVal(SubE, C.getLocationContext()); + if (const MemRegion *MR = SubVal.getAsRegion()) { + if (const MemRegion *BaseReg = MR->getBaseRegion()) { + // Mark pointer as "checked." + State = setChecked(State, BaseReg); + } + } + } + } + // 2) Look for "if (ptr == NULL)" or "if (ptr != NULL)" + else if (const auto *BO = dyn_cast(CondE)) { + BinaryOperator::Opcode Op = BO->getOpcode(); + if (Op == BO_EQ || Op == BO_NE) { + const Expr *LHS = BO->getLHS()->IgnoreParenCasts(); + const Expr *RHS = BO->getRHS()->IgnoreParenCasts(); + + bool LHSIsNull = LHS->isNullPointerConstant(C.getASTContext(), + Expr::NPC_ValueDependentIsNull); + bool RHSIsNull = RHS->isNullPointerConstant(C.getASTContext(), + Expr::NPC_ValueDependentIsNull); + // Identify which side is the pointer + const Expr *PtrExpr = nullptr; + if (LHSIsNull && !RHSIsNull) { + PtrExpr = RHS; + } else if (RHSIsNull && !LHSIsNull) { + PtrExpr = LHS; + } + + if (PtrExpr) { + SVal PtrVal = State->getSVal(PtrExpr, C.getLocationContext()); + if (const MemRegion *MR = PtrVal.getAsRegion()) { + if (const MemRegion *BaseReg = MR->getBaseRegion()) { + // Mark as checked + State = setChecked(State, BaseReg); + } + } + } + } + } + // 3) Look for "if (ptr)" + else { + SVal CondVal = State->getSVal(CondE, C.getLocationContext()); + if (const MemRegion *MR = CondVal.getAsRegion()) { + if (const MemRegion *BaseReg = MR->getBaseRegion()) { + // Mark pointer as "checked." + State = setChecked(State, BaseReg); + } + } + } + + C.addTransition(State); +} + +//---------------------------------------------------------------------- +// checkLocation: Called on explicit memory load/store (e.g. *ptr) +//---------------------------------------------------------------------- +void SAGenTestChecker::checkLocation(SVal loc, bool isLoad, const Stmt *S, CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + if (const MemRegion *MR = loc.getAsRegion()) { + if (const MemRegion *BaseReg = MR->getBaseRegion()) { + const bool *Checked = State->get(BaseReg); + if (Checked && *Checked == false) { + // We found a direct dereference of an unchecked pointer from devm_kasprintf + ExplodedNode *N = C.generateErrorNode(); + if (!N) + return; + + auto report = std::make_unique( + BT, "Dereference before checking for null (devm_kasprintf)", N); + report->addRange(S->getSourceRange()); // highlight the statement + C.emitReport(std::move(report)); + } + } + } +} + +//---------------------------------------------------------------------- +// checkBind: Called when a value is bound to a memory region +//---------------------------------------------------------------------- +void SAGenTestChecker::checkBind(SVal Loc, SVal Val, const Stmt *StoreE, + CheckerContext &C) const { + ProgramStateRef State = C.getState(); + + // Get the left-hand side region. + if (const MemRegion *LHSReg = Loc.getAsRegion()) { + // Get the right-hand side region (if any). + LHSReg = LHSReg->getBaseRegion(); + if (!LHSReg) + return; + + if (const MemRegion *MR = Val.getAsRegion()) { + MR = MR->getBaseRegion(); + if (!MR) + return; + + State = State->set(LHSReg, MR); + State = State->set(MR, LHSReg); + } + } + C.addTransition(State); +} + +//---------------------------------------------------------------------- +// Registration +//---------------------------------------------------------------------- +extern "C" void clang_registerCheckers(CheckerRegistry ®istry) { + registry.addChecker( + "custom.SAGenTestChecker", + "Detects dereferences of pointers returned by devm_kasprintf without NULL checking", + "" + ); +} + +extern "C" const char clang_analyzerAPIVersionString[] = + CLANG_ANALYZER_API_VERSION_STRING; diff --git a/prompt_template/firefox/examples/null-ptr-derefence/patch.md b/prompt_template/firefox/examples/null-ptr-derefence/patch.md new file mode 100644 index 00000000..d6494826 --- /dev/null +++ b/prompt_template/firefox/examples/null-ptr-derefence/patch.md @@ -0,0 +1,118 @@ +### Patch Description + +ice: Fix some null pointer dereference issues in ice_ptp.c + +devm_kasprintf() returns a pointer to dynamically allocated memory +which can be NULL upon failure. + +### Buggy Code + +```c +// drivers/net/ethernet/intel/ice/ice_ptp.c +static int ice_ptp_register_auxbus_driver(struct ice_pf *pf) +{ + struct auxiliary_driver *aux_driver; + struct ice_ptp *ptp; + struct device *dev; + char *name; + int err; + + ptp = &pf->ptp; + dev = ice_pf_to_dev(pf); + aux_driver = &ptp->ports_owner.aux_driver; + INIT_LIST_HEAD(&ptp->ports_owner.ports); + mutex_init(&ptp->ports_owner.lock); + name = devm_kasprintf(dev, GFP_KERNEL, "ptp_aux_dev_%u_%u_clk%u", + pf->pdev->bus->number, PCI_SLOT(pf->pdev->devfn), + ice_get_ptp_src_clock_index(&pf->hw)); + + aux_driver->name = name; + aux_driver->shutdown = ice_ptp_auxbus_shutdown; + aux_driver->suspend = ice_ptp_auxbus_suspend; + aux_driver->remove = ice_ptp_auxbus_remove; + aux_driver->resume = ice_ptp_auxbus_resume; + aux_driver->probe = ice_ptp_auxbus_probe; + aux_driver->id_table = ice_ptp_auxbus_create_id_table(pf, name); + if (!aux_driver->id_table) + return -ENOMEM; + + err = auxiliary_driver_register(aux_driver); + if (err) { + devm_kfree(dev, aux_driver->id_table); + dev_err(dev, "Failed registering aux_driver, name <%s>\n", + name); + } + + return err; +} +``` +```c +// drivers/net/ethernet/intel/ice/ice_ptp.c +static int ice_ptp_create_auxbus_device(struct ice_pf *pf) +{ + struct auxiliary_device *aux_dev; + struct ice_ptp *ptp; + struct device *dev; + char *name; + int err; + u32 id; + + ptp = &pf->ptp; + id = ptp->port.port_num; + dev = ice_pf_to_dev(pf); + + aux_dev = &ptp->port.aux_dev; + + name = devm_kasprintf(dev, GFP_KERNEL, "ptp_aux_dev_%u_%u_clk%u", + pf->pdev->bus->number, PCI_SLOT(pf->pdev->devfn), + ice_get_ptp_src_clock_index(&pf->hw)); + + aux_dev->name = name; + aux_dev->id = id; + aux_dev->dev.release = ice_ptp_release_auxbus_device; + aux_dev->dev.parent = dev; + + err = auxiliary_device_init(aux_dev); + if (err) + goto aux_err; + + err = auxiliary_device_add(aux_dev); + if (err) { + auxiliary_device_uninit(aux_dev); + goto aux_err; + } + + return 0; +aux_err: + dev_err(dev, "Failed to create PTP auxiliary bus device <%s>\n", name); + devm_kfree(dev, name); + return err; +} +``` + +### Bug Fix Patch + +```diff +diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c +index c4fe28017b8d..3b6605c8585e 100644 +--- a/drivers/net/ethernet/intel/ice/ice_ptp.c ++++ b/drivers/net/ethernet/intel/ice/ice_ptp.c +@@ -2863,6 +2863,8 @@ static int ice_ptp_register_auxbus_driver(struct ice_pf *pf) + name = devm_kasprintf(dev, GFP_KERNEL, "ptp_aux_dev_%u_%u_clk%u", + pf->pdev->bus->number, PCI_SLOT(pf->pdev->devfn), + ice_get_ptp_src_clock_index(&pf->hw)); ++ if (!name) ++ return -ENOMEM; + + aux_driver->name = name; + aux_driver->shutdown = ice_ptp_auxbus_shutdown; +@@ -3109,6 +3111,8 @@ static int ice_ptp_create_auxbus_device(struct ice_pf *pf) + name = devm_kasprintf(dev, GFP_KERNEL, "ptp_aux_dev_%u_%u_clk%u", + pf->pdev->bus->number, PCI_SLOT(pf->pdev->devfn), + ice_get_ptp_src_clock_index(&pf->hw)); ++ if (!name) ++ return -ENOMEM; + + aux_dev->name = name; + aux_dev->id = id; +``` diff --git a/prompt_template/firefox/examples/null-ptr-derefence/pattern.md b/prompt_template/firefox/examples/null-ptr-derefence/pattern.md new file mode 100644 index 00000000..dc7f1925 --- /dev/null +++ b/prompt_template/firefox/examples/null-ptr-derefence/pattern.md @@ -0,0 +1,3 @@ +### Bug Pattern + +The bug pattern is that the function `devm_kasprintf()` can return NULL if it fails to allocate memory. When the return value is not checked and is subsequently dereferenced, it can lead to a NULL pointer dereference. This pattern can cause the program to crash if it tries to use the pointer returned by `devm_kasprintf()` without ensuring it is non-NULL. diff --git a/prompt_template/firefox/examples/null-ptr-derefence/plan.md b/prompt_template/firefox/examples/null-ptr-derefence/plan.md new file mode 100644 index 00000000..90880764 --- /dev/null +++ b/prompt_template/firefox/examples/null-ptr-derefence/plan.md @@ -0,0 +1,28 @@ +### Plan + +1. **Create and Manage Program State Maps:** + - Define two maps using `REGISTER_MAP_WITH_PROGRAMSTATE`: + - A `PossibleNullPtrMap` that associates `MemRegion`s with a boolean indicating whether they have been NULL-checked (`true` if checked, `false` if unchecked). + - A `PtrAliasMap` to track alias relationships. This is needed so that if one pointer is checked, its aliases are also marked as checked. + +2. **Identify the Relevant Function (`devm_kasprintf`):** + - Implement an internal helper function `isDevmKasprintf(const CallEvent &Call)`. + - In `checkPostCall`, if the function is `devm_kasprintf`, mark the return region in `PossibleNullPtrMap` as unchecked (`false`), since it hasn't undergone a NULL check yet. + +3. **Marking Pointers as Checked:** + - Implement a helper function `setChecked(State, Region)` which marks a pointer (and its aliases) as checked in the `PossibleNullPtrMap`. + - This function is used whenever the checker determines a pointer has been NULL-checked. + +4. **Observing Conditions (BranchCondition):** + - In `checkBranchCondition`, examine the condition: + - If it looks like `if (!ptr)`, `if (ptr == NULL)`, `if (ptr != NULL)`, or just `if (ptr)`, determine the region being tested. + - Once identified, call `setChecked(...)` on that region. + +5. **Detecting Dereferences (Location):** + - In `checkLocation`, catch any read/write operation (`*ptr`). + - If the pointer has a mapping in `PossibleNullPtrMap` and it is still set to `false`, issue a warning (using `C.emitReport(...)`) because the pointer might be `NULL`-not-checked. + +6. **Tracking Aliases (Bind):** + - In `checkBind`, when a pointer is stored into another pointer (e.g., `p2 = p1;`), record this alias in `PtrAliasMap`. + - When one pointer becomes checked, `setChecked(...)` will update the aliases as well. + - Do not update the `PossibleNullPtrMap` in the `checkBind` function. diff --git a/prompt_template/firefox/examples/uninit/checker.cpp b/prompt_template/firefox/examples/uninit/checker.cpp new file mode 100644 index 00000000..fa29ffba --- /dev/null +++ b/prompt_template/firefox/examples/uninit/checker.cpp @@ -0,0 +1,137 @@ +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Checkers/Taint.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/Environment.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" +#include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "clang/StaticAnalyzer/Checkers/utility.h" + +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/StmtVisitor.h" + +using namespace clang; +using namespace ento; +using namespace taint; + +// Program state map to track uninitialized memory regions +REGISTER_MAP_WITH_PROGRAMSTATE(UninitMemoryMap, const MemRegion *, bool) +// Program state map to track pointer aliasing +REGISTER_MAP_WITH_PROGRAMSTATE(PtrAliasMap, const MemRegion*, const MemRegion*) + +namespace { + +class SAGenTestChecker : public Checker { + mutable std::unique_ptr BT; + + public: + SAGenTestChecker() : BT(new BugType(this, "Kernel Information Leak")) {} + + void checkPostCall(const CallEvent &Call, CheckerContext &C) const; + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + void checkLocation(SVal Loc, bool isLoad, const Stmt *S, CheckerContext &C) const; + + private: + void reportInfoLeak(const MemRegion *MR, CheckerContext &C) const; +}; + +void SAGenTestChecker::checkPostCall(const CallEvent &Call, CheckerContext &C) const { + ProgramStateRef State = C.getState(); + const IdentifierInfo *Callee = Call.getCalleeIdentifier(); + if (!Callee) + return; + + if (Callee->getName() == "kmalloc") { + // Track kmalloc allocations, mark memory as uninitialized + const Expr *expr = Call.getOriginExpr(); + if (!expr) + return; + + const CallExpr *CE = dyn_cast(expr); + if (!CE) + return; + + const MemRegion *MR = getMemRegionFromExpr(CE, C); + if (!MR) + return; + + MR = MR->getBaseRegion(); + if (!MR) + return; + State = State->set(MR, true); + C.addTransition(State); + } else if (Callee->getName() == "kzalloc") { + // Track kzalloc allocations, which zero-initialize memory + const Expr *expr = Call.getOriginExpr(); + if (!expr) + return; + + const CallExpr *CE = dyn_cast(expr); + if (!CE) + return; + + const MemRegion *MR = getMemRegionFromExpr(CE, C); + if (!MR) + return; + + MR = MR->getBaseRegion(); + if (!MR) + return; + State = State->set(MR, false); + C.addTransition(State); + } +} + +void SAGenTestChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { + ProgramStateRef State = C.getState(); + const IdentifierInfo *Callee = Call.getCalleeIdentifier(); + if (!Callee) + return; + + // Check for copy_to_user(user_dst, kernel_src, size) + if (Callee->getName() == "copy_to_user") { + SVal Arg1 = Call.getArgSVal(1); + const MemRegion *MR = Arg1.getAsRegion(); + if (!MR) + return; + + MR = MR->getBaseRegion(); + if (!MR) + return; + const bool *Uninit = State->get(MR); + if (Uninit && *Uninit) { + reportInfoLeak(MR, C); + } + } +} + +void SAGenTestChecker::checkLocation(SVal Loc, bool isLoad, const Stmt *S, CheckerContext &C) const { + +} + +void SAGenTestChecker::reportInfoLeak(const MemRegion *MR, CheckerContext &C) const { + ExplodedNode *N = C.generateNonFatalErrorNode(); + if (!N) + return; + + auto report = std::make_unique( + *BT, "Potential kernel information leak due to uninitialized kmalloc memory being copied to user space", N); + C.emitReport(std::move(report)); +} + +} // end anonymous namespace + +extern "C" void clang_registerCheckers(CheckerRegistry ®istry) { + registry.addChecker( + "custom.SAGenTestChecker", + "Detects kernel information leaks by uninitialized kmalloc memory being copied to user space", + ""); +} + +extern "C" const char clang_analyzerAPIVersionString[] = + CLANG_ANALYZER_API_VERSION_STRING; diff --git a/prompt_template/firefox/examples/uninit/patch.md b/prompt_template/firefox/examples/uninit/patch.md new file mode 100644 index 00000000..81842907 --- /dev/null +++ b/prompt_template/firefox/examples/uninit/patch.md @@ -0,0 +1,107 @@ +### Patch Description + +do_sys_name_to_handle(): use kzalloc() to fix kernel-infoleak + +syzbot identified a kernel information leak vulnerability in +do_sys_name_to_handle() and issued the following report [1]. + +Bytes 18-19 of 20 are uninitialized +Memory access of size 20 starts at ffff888128a46380 +Data copied to user address 0000000020000240" + +Per Chuck Lever's suggestion, use kzalloc() instead of kmalloc() to +solve the problem. + +Fixes: 990d6c2d7aee ("vfs: Add name to file handle conversion support") +Suggested-by: Chuck Lever III +Reported-and-tested-by: +Signed-off-by: Nikita Zhandarovich +Link: https://lore.kernel.org/r/20240119153906.4367-1-n.zhandarovich@fintech.ru +Reviewed-by: Jan Kara +Signed-off-by: Christian Brauner + +### Buggy Code + +```c +// fs/fhandle.c +static long do_sys_name_to_handle(const struct path *path, + struct file_handle __user *ufh, + int __user *mnt_id, int fh_flags) +{ + long retval; + struct file_handle f_handle; + int handle_dwords, handle_bytes; + struct file_handle *handle = NULL; + + /* + * We need to make sure whether the file system support decoding of + * the file handle if decodeable file handle was requested. + */ + if (!exportfs_can_encode_fh(path->dentry->d_sb->s_export_op, fh_flags)) + return -EOPNOTSUPP; + + if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) + return -EFAULT; + + if (f_handle.handle_bytes > MAX_HANDLE_SZ) + return -EINVAL; + + handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, + GFP_KERNEL); + if (!handle) + return -ENOMEM; + + /* convert handle size to multiple of sizeof(u32) */ + handle_dwords = f_handle.handle_bytes >> 2; + + /* we ask for a non connectable maybe decodeable file handle */ + retval = exportfs_encode_fh(path->dentry, + (struct fid *)handle->f_handle, + &handle_dwords, fh_flags); + handle->handle_type = retval; + /* convert handle size to bytes */ + handle_bytes = handle_dwords * sizeof(u32); + handle->handle_bytes = handle_bytes; + if ((handle->handle_bytes > f_handle.handle_bytes) || + (retval == FILEID_INVALID) || (retval < 0)) { + /* As per old exportfs_encode_fh documentation + * we could return ENOSPC to indicate overflow + * But file system returned 255 always. So handle + * both the values + */ + if (retval == FILEID_INVALID || retval == -ENOSPC) + retval = -EOVERFLOW; + /* + * set the handle size to zero so we copy only + * non variable part of the file_handle + */ + handle_bytes = 0; + } else + retval = 0; + /* copy the mount id */ + if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) || + copy_to_user(ufh, handle, + sizeof(struct file_handle) + handle_bytes)) + retval = -EFAULT; + kfree(handle); + return retval; +} +``` + +### Bug Fix Patch + +```diff +diff --git a/fs/fhandle.c b/fs/fhandle.c +index 18b3ba8dc8ea..57a12614addf 100644 +--- a/fs/fhandle.c ++++ b/fs/fhandle.c +@@ -36,7 +36,7 @@ static long do_sys_name_to_handle(const struct path *path, + if (f_handle.handle_bytes > MAX_HANDLE_SZ) + return -EINVAL; + +- handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, ++ handle = kzalloc(sizeof(struct file_handle) + f_handle.handle_bytes, + GFP_KERNEL); + if (!handle) + return -ENOMEM; +``` diff --git a/prompt_template/firefox/examples/uninit/pattern.md b/prompt_template/firefox/examples/uninit/pattern.md new file mode 100644 index 00000000..f8ff450f --- /dev/null +++ b/prompt_template/firefox/examples/uninit/pattern.md @@ -0,0 +1,3 @@ +### Bug Pattern + +The bug pattern is using `kmalloc()` to allocate memory for a buffer that is later copied to user space without properly initializing the allocated memory. This can result in a kernel information leak if the allocated memory contains uninitialized or leftover data, which is then exposed to user space. The root cause is the lack of proper memory initialization after allocation, leading to potential exposure of sensitive kernel data. Using `kzalloc()` instead ensures that the allocated memory is zeroed out, preventing such information leaks. diff --git a/prompt_template/firefox/examples/uninit/plan.md b/prompt_template/firefox/examples/uninit/plan.md new file mode 100644 index 00000000..25fc6102 --- /dev/null +++ b/prompt_template/firefox/examples/uninit/plan.md @@ -0,0 +1,23 @@ +### Plan + +1. **Register Program State Map:** + - Define two maps using `REGISTER_MAP_WITH_PROGRAMSTATE`: + - Use `REGISTER_MAP_WITH_PROGRAMSTATE(UninitMemoryMap, const MemRegion *, bool)` to map memory regions to an initialization flag. + - A `PtrAliasMap` to track alias relationships. This is needed so that if one pointer is checked, its aliases are also marked as checked. + +2. **Track Memory Allocations (`checkPostCall`):** + - **For `kmalloc`:** + - Retrieve the call expression and its base `MemRegion`. + - Mark the region as uninitialized (`true`). + - **For `kzalloc`:** + - Retrieve the call expression and its base `MemRegion`. + - Mark the region as initialized (`false`). + +3. **Detect Information Leak (`checkPreCall`):** + - Identify calls to `copy_to_user`. + - Retrieve the kernel source argument’s base `MemRegion`. + - If the region is flagged as uninitialized in `UninitMemoryMap`, call `reportInfoLeak` to generate a warning. + +4. **Bug Reporting (`reportInfoLeak`):** + - Generate a non-fatal error node. + - Emit a bug report with a message indicating potential kernel information leakage. diff --git a/prompt_template/firefox/knowledge/suggestions.md b/prompt_template/firefox/knowledge/suggestions.md new file mode 100644 index 00000000..4d5849b5 --- /dev/null +++ b/prompt_template/firefox/knowledge/suggestions.md @@ -0,0 +1,28 @@ +# Suggestions + +- Always perform a NULL check after retrieving a pointer type. + +- When you are going to track the return value of a function, if the type of the return value is a pointer (e.g. `int*`), you should use `MemRegion*` to mark it. If the type is a basic type (e.g. `int`), you should use `SymbolRef`. + +- Use `generateNonFatalErrorNode()` rather than `generateErrorNode()` to report all possible bugs in a file. + +- When you are going to infer the maximal value, invoke `inferSymbolMaxVal()` to help you. For example, when infering the maximal value of `a*b`, invoke `inferSymbolMaxVal()` twice to infer the maximal values of `a` and `b`, and multiply the values to infer the final maximal value. + +- If you are not sure whether there is a bug or not because of missing information (e.g. undecidable array size), DO NOT report it as potential bug. + +- **Always** invoke `getBaseRegion()` to get the base region of a memory region. For example, after the statement "const MemRegion *BaseReg = Loc.getAsRegion();", you should perform "BaseReg = BaseReg->getBaseRegion();". + +- Do not perform `IgnoreImplicit()` before invoking the function `getMemRegionFromExpr()`, and you must perform `getBaseRegion()` after this function. + +- For pointer analysis, please use a program state (like `REGISTER_MAP_WITH_PROGRAMSTATE(PtrAliasMap, const MemRegion*, const MemRegion*)`) and `checkBind` to track the aliasing information. + +- DO NOT use placeholder logic in the checker. Always implement the logic in the checker. + +- Use `std::make_unique` or `std::make_unique` to create a bug report. Note, the error message should be **short** and clear. + +- When verifying a function call's name, use the utility function ExprHasName() for accurate checking. + - Bad example: `const IdentifierInfo *Callee = Call.getCalleeIdentifier(); if (!Callee || Callee->getName() != "check_add_overflow") return;` + - Good example: `const Expr *OriginExpr = Call.getOriginExpr(); if (!OriginExpr || !ExprHasName(OriginExpr, "check_add_overflow", C)) return;` + - For other checking like type checking, use its corresponding Clang API, like `C.getASTContext().getTypeSize(Ty)` + +- Please follow the plan!! diff --git a/prompt_template/firefox/knowledge/template.md b/prompt_template/firefox/knowledge/template.md new file mode 100644 index 00000000..5ded1d1f --- /dev/null +++ b/prompt_template/firefox/knowledge/template.md @@ -0,0 +1,55 @@ +# Checker Template + +```cpp +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Checkers/Taint.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/Environment.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" +#include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/StmtVisitor.h" +#include "llvm/Support/raw_ostream.h" +#include "clang/StaticAnalyzer/Checkers/utility.h" +{{Add your includes here}} + +using namespace clang; +using namespace ento; +using namespace taint; + +{{Customize program states}} // If necessary + +namespace { +/* The checker callbacks are to be decided. */ +class SAGenTestChecker : public Checker<{{Callback Functions}}> { + mutable std::unique_ptr BT; + + public: + SAGenTestChecker() : BT(new BugType(this, "{{Bug Group}}", "{{Bug Type}}")) {} + + {{Declaration of Callback Functions}} + + private: + + {{Declaration of Self-Defined Functions}} +}; + +{{Your Functions: functions should be complete and runable}} + +} // end anonymous namespace + +extern "C" void clang_registerCheckers(CheckerRegistry ®istry) { + registry.addChecker( + "custom.SAGenTestChecker", + "{{Checker descriptions to be filled}}", + ""); +} + +extern "C" const char clang_analyzerAPIVersionString[] = + CLANG_ANALYZER_API_VERSION_STRING; +``` diff --git a/prompt_template/firefox/knowledge/utility.md b/prompt_template/firefox/knowledge/utility.md new file mode 100644 index 00000000..3be2f4b8 --- /dev/null +++ b/prompt_template/firefox/knowledge/utility.md @@ -0,0 +1,157 @@ +# Utility Functions + +```cpp +// Going upward in an AST tree, and find the Stmt of a specific type +template +const T* findSpecificTypeInParents(const Stmt *S, CheckerContext &C); + +// Going downward in an AST tree, and find the Stmt of a secific type +// Only return one of the statements if there are many +template +const T* findSpecificTypeInChildren(const Stmt *S); + +bool EvaluateExprToInt(llvm::APSInt &EvalRes, const Expr *expr, CheckerContext &C) { + Expr::EvalResult ExprRes; + if (expr->EvaluateAsInt(ExprRes, C.getASTContext())) { + EvalRes = ExprRes.Val.getInt(); + return true; + } + return false; +} + +const llvm::APSInt *inferSymbolMaxVal(SymbolRef Sym, CheckerContext &C) { + ProgramStateRef State = C.getState(); + const llvm::APSInt *maxVal = State->getConstraintManager().getSymMaxVal(State, Sym); + return maxVal; +} + +// The expression should be the DeclRefExpr of the array +bool getArraySizeFromExpr(llvm::APInt &ArraySize, const Expr *E) { + if (const DeclRefExpr *DRE = dyn_cast(E->IgnoreImplicit())) { + if (const VarDecl *VD = dyn_cast(DRE->getDecl())) { + QualType QT = VD->getType(); + if (const ConstantArrayType *ArrayType = dyn_cast(QT.getTypePtr())) { + ArraySize = ArrayType->getSize(); + return true; + } + } + } + return false; +} + +bool getStringSize(llvm::APInt &StringSize, const Expr *E) { + if (const auto *SL = dyn_cast(E->IgnoreImpCasts())) { + StringSize = llvm::APInt(32, SL->getLength()); + return true; + } + return false; +} + +const MemRegion* getMemRegionFromExpr(const Expr* E, CheckerContext &C) { + ProgramStateRef State = C.getState(); + return State->getSVal(E, C.getLocationContext()).getAsRegion(); +} + +struct KnownDerefFunction { + const char *Name; ///< The function name. + llvm::SmallVector Params; ///< The parameter indices that get dereferenced. +}; + +/// \brief Determines if the given call is to a function known to dereference +/// certain pointer parameters. +/// +/// This function looks up the call's callee name in a known table of functions +/// that definitely dereference one or more of their pointer parameters. If the +/// function is found, it appends the 0-based parameter indices that are dereferenced +/// into \p DerefParams and returns \c true. Otherwise, it returns \c false. +/// +/// \param[in] Call The function call to examine. +/// \param[out] DerefParams +/// A list of parameter indices that the function is known to dereference. +/// +/// \return \c true if the function is found in the known-dereference table, +/// \c false otherwise. +bool functionKnownToDeref(const CallEvent &Call, + llvm::SmallVectorImpl &DerefParams) { + if (const IdentifierInfo *ID = Call.getCalleeIdentifier()) { + StringRef FnName = ID->getName(); + + for (const auto &Entry : DerefTable) { + if (FnName.equals(Entry.Name)) { + // We found the function in our table, copy its param indices + DerefParams.append(Entry.Params.begin(), Entry.Params.end()); + return true; + } + } + } + return false; +} + +/// \brief Determines if the source text of an expression contains a specified name. +bool ExprHasName(const Expr *E, StringRef Name, CheckerContext &C) { + if (!E) + return false; + + // Use const reference since getSourceManager() returns a const SourceManager. + const SourceManager &SM = C.getSourceManager(); + const LangOptions &LangOpts = C.getLangOpts(); + // Retrieve the source text corresponding to the expression. + CharSourceRange Range = CharSourceRange::getTokenRange(E->getSourceRange()); + StringRef ExprText = Lexer::getSourceText(Range, SM, LangOpts); + + // Check if the extracted text contains the specified name. + return ExprText.contains(Name); +} +``` + +# Clang Check Functions + +```cpp +void checkPreStmt (const ReturnStmt *DS, CheckerContext &C) const + // Pre-visit the Statement. + +void checkPostStmt (const DeclStmt *DS, CheckerContext &C) const + // Post-visit the Statement. + +void checkPreCall (const CallEvent &Call, CheckerContext &C) const + // Pre-visit an abstract "call" event. + +void checkPostCall (const CallEvent &Call, CheckerContext &C) const + // Post-visit an abstract "call" event. + +void checkBranchCondition (const Stmt *Condition, CheckerContext &Ctx) const + // Pre-visit of the condition statement of a branch (such as IfStmt). + + +void checkLocation (SVal Loc, bool IsLoad, const Stmt *S, CheckerContext &) const + // Called on a load from and a store to a location. + +void checkBind (SVal Loc, SVal Val, const Stmt *S, CheckerContext &) const + // Called on binding of a value to a location. + + +void checkBeginFunction (CheckerContext &Ctx) const + // Called when the analyzer core starts analyzing a function, regardless of whether it is analyzed at the top level or is inlined. + +void checkEndFunction (const ReturnStmt *RS, CheckerContext &Ctx) const + // Called when the analyzer core reaches the end of a function being analyzed regardless of whether it is analyzed at the top level or is inlined. + +void checkEndAnalysis (ExplodedGraph &G, BugReporter &BR, ExprEngine &Eng) const + // Called after all the paths in the ExplodedGraph reach end of path. + + +bool evalCall (const CallEvent &Call, CheckerContext &C) const + // Evaluates function call. + +ProgramStateRef evalAssume (ProgramStateRef State, SVal Cond, bool Assumption) const + // Handles assumptions on symbolic values. + +ProgramStateRef checkRegionChanges (ProgramStateRef State, const InvalidatedSymbols *Invalidated, ArrayRef< const MemRegion * > ExplicitRegions, ArrayRef< const MemRegion * > Regions, const LocationContext *LCtx, const CallEvent *Call) const + // Called when the contents of one or more regions change. + +void checkASTDecl (const FunctionDecl *D, AnalysisManager &Mgr, BugReporter &BR) const + // Check every declaration in the AST. + +void checkASTCodeBody (const Decl *D, AnalysisManager &Mgr, BugReporter &BR) const + // Check every declaration that has a statement body in the AST. +``` diff --git a/prompt_template/firefox/patch2pattern-general.md b/prompt_template/firefox/patch2pattern-general.md new file mode 100644 index 00000000..fc2b4f6a --- /dev/null +++ b/prompt_template/firefox/patch2pattern-general.md @@ -0,0 +1,27 @@ +# Instruction + +You will be provided with a patch in Firefox (mozilla-central) codebase. +Please analyze the patch and find out the **bug pattern** in this patch. +A **bug pattern** is the root cause of this bug, meaning that programs with this pattern will have a great possibility of having the same bug. +Note that the bug pattern should be **general and abstract** enough to identify similar buggy code patterns in other parts of the codebase. + +# Examples + +{{examples}} + +# Target Patch + +{{input_patch}} + +# Formatting + +Please tell me the **bug pattern** of the provided patch. +Please try not to wrap your response in functions if several lines of code are enough to express this pattern. + +Your response should be like: + +``` +## Bug Pattern + +{{describe the bug pattern here}} +``` diff --git a/prompt_template/firefox/patch2pattern.md b/prompt_template/firefox/patch2pattern.md new file mode 100644 index 00000000..1ed13b0a --- /dev/null +++ b/prompt_template/firefox/patch2pattern.md @@ -0,0 +1,45 @@ +# Instruction + +You will be provided with a patch in Firefox (mozilla-central) codebase. +Please analyze the patch and find out the **bug pattern** in this patch. +A **bug pattern** is the root cause of this bug, meaning that programs with this pattern will have a great possibility of having the same bug. +Note that the bug pattern should be specific and accurate, which can be used to identify the buggy code provided in the patch. + +When inferring the pattern, consider common Firefox/C++ bug types, for example (as relevant to the patch): + +Ownership & Lifetime (C++): RefPtr / nsCOMPtr / already_AddRefed, UniquePtr, move semantics, RAII (constructors/destructors), temporary/dangling references, Span, Maybe, Result. + +XPCOM / Refcounting: AddRef/Release balance, NS_IF_ADDREF, cycle-collection annotations/participation. + +Threading / Main-thread affinity: NS_IsMainThread(), ThreadSafe structures, atomic races, background vs. main-thread misuse. + +Error handling & fallible flows: nsresult propagation (NS_FAILED/NS_SUCCEEDED), NS_WARN_IF, fallible allocations, early-returns. + +MOZ annotations & macros: MOZ_ASSERT, MOZ_RELEASE_ASSERT, MOZ_CRASH, MOZ_CAN_RUN_SCRIPT, nullability contracts. + +Container/string misuse: nsTArray/nsCString/nsString capacity/length mismatches, out-of-bounds, iterator invalidation. + +IPC/DOM/Graphics specifics: lifetime across processes/actors, COMPtr leaks, off-main-thread use of main-thread-only objects. + +Build specifics: unified build off-by-default for analysis—avoid Linux-kernel-specific assumptions. + +# Examples + +{{examples}} + +# Target Patch + +{{input_patch}} + +# Formatting + +Please tell me the **bug pattern** of the provided patch. +Please try not to wrap your response in functions if several lines of code are enough to express this pattern. + +Your response should be like: + +``` +## Bug Pattern + +{{describe the bug pattern here}} +``` diff --git a/prompt_template/firefox/plan2checker.md b/prompt_template/firefox/plan2checker.md new file mode 100644 index 00000000..b88af4da --- /dev/null +++ b/prompt_template/firefox/plan2checker.md @@ -0,0 +1,49 @@ +# Instruction + +You are proficient in writing Clang Static Analyzer checkers. + +Please help me write a CSA checker to detect a specific bug pattern. +You can refer to the `Target Bug Pattern` and `Target Patch` sections to help you understand the bug pattern. +Please make sure your checker can detect the bug shown in the `Buggy Code` section. +Please refer to the `Plan` section to implement the checker. +You can use the functions in `Utility Functions` section to help you write the checker. + +The version of the Clang environment is Clang-18. You should consider the API compatibility. +Target project: Firefox (C++). +The checker you write just needs to be able to detect the bug in C++ language, no need to consider C or Objective-C. + +Please complete the template in `Checker Template` section. You should complete the content wrapped in `{{...}}`. + +**Please read `Suggestions` section before writing the checker!** + +{{utility_functions}} + +{{suggestions}} + +# Examples + +{{examples}} + +# Target Bug Pattern + +{{input_pattern}} + +# Target Patch + +{{input_patch}} + +# Target Plan + +{{input_plan}} + +{{checker_template}} + +# Formatting + +Please show me the completed checker. + +Your response should be like: + +```cpp +{{checker code here}} +``` diff --git a/src/agent.py b/src/agent.py index f4d68be7..f1ef08ee 100644 --- a/src/agent.py +++ b/src/agent.py @@ -8,13 +8,40 @@ from model import invoke_llm from tools import error_formatting, grab_error_message -prompt_template_dir = Path(__file__).parent.parent / "prompt_template" -example_dir = prompt_template_dir / "examples" +PROMPT_ROOT = Path(__file__).parent.parent / "prompt_template" +PROMPT_FIREFOX_ROOT = PROMPT_ROOT / "firefox" + +def _detect_target_name() -> str: + t = getattr(global_config, "target", None) + if t: + name = getattr(t, "name", None) + if isinstance(name, str) and name: + return name.lower() + return str(global_config.get("target_type", "linux")).lower() + +def _load_with_fallback(relpath: str) -> str: + """Use firefox if exists.""" + if _detect_target_name() == "firefox": + p_fx = FIREFOX_ROOT / relpath + if p_fx.exists(): + return p_fx.read_text() + return (PROMPT_ROOT / relpath).read_text() + +def _load_examples() -> str: + if _detect_target_name() == "firefox": + p_fx = FIREFOX_ROOT / "examples" + if p_fx.exists(): + return p_fx + return PROMPT_ROOT / "examples" + +prompt_template_dir = PROMPT_ROOT + +example_dir = _load_examples() default_checker_examples = [] -UTILITY_FUNCTION = (prompt_template_dir / "knowledge" / "utility.md").read_text() -SUGGESTIONS = (prompt_template_dir / "knowledge" / "suggestions.md").read_text() -TEMPLATE = (prompt_template_dir / "knowledge" / "template.md").read_text() +UTILITY_FUNCTION = _load_with_fallback("knowledge/utility.md") +SUGGESTIONS = _load_with_fallback("knowledge/suggestions.md") +TEMPLATE = _load_with_fallback("knowledge/template.md") class Example(BaseModel): @@ -65,13 +92,13 @@ def get_example_text( return example_text -patch2checker_template = (prompt_template_dir / "patch2checker.md").read_text() -patch2pattern_template = ( +patch2checker_template = _load_with_fallback("patch2checker.md") +patch2checker_template = ( patch2checker_template.replace("{{utility_functions}}", UTILITY_FUNCTION) .replace("{{suggestions}}", SUGGESTIONS) .replace("{{checker_template}}", TEMPLATE) ) -patch2checker_template = patch2pattern_template.replace( +patch2checker_template = patch2checker_template.replace( "{{examples}}", get_example_text( default_checker_examples, @@ -83,7 +110,7 @@ def get_example_text( ) """Patch to Pattern""" -patch2pattern_template = (prompt_template_dir / "patch2pattern.md").read_text() +patch2pattern_template = _load_with_fallback("patch2pattern.md") patch2pattern_template = patch2pattern_template.replace( "{{examples}}", get_example_text( @@ -95,9 +122,7 @@ def get_example_text( ), ) -patch2pattern_general_template = ( - prompt_template_dir / "patch2pattern-general.md" -).read_text() +patch2pattern_general_template = _load_with_fallback("patch2pattern-general.md") patch2pattern_general_template = patch2pattern_general_template.replace( "{{examples}}", get_example_text( @@ -110,32 +135,28 @@ def get_example_text( ) """Pattern to Plan""" -pattern2plan_template = (prompt_template_dir / "pattern2plan.md").read_text() +pattern2plan_template = _load_with_fallback("pattern2plan.md") pattern2plan_template = pattern2plan_template.replace( "{{utility_functions}}", UTILITY_FUNCTION ) """Pattern to Plan without utility functions""" -pattern2plan_template_no_utility = ( - prompt_template_dir / "pattern2plan-no-utility.md" -).read_text() +pattern2plan_template_no_utility = _load_with_fallback("pattern2plan-no-utility.md") """Plan to Checker""" -plan2checker_template = (prompt_template_dir / "plan2checker.md").read_text() +plan2checker_template = _load_with_fallback("plan2checker.md") plan2checker_template = ( plan2checker_template.replace("{{utility_functions}}", UTILITY_FUNCTION) .replace("{{suggestions}}", SUGGESTIONS) .replace("{{checker_template}}", TEMPLATE) ) -plan2checker_template_no_utility = ( - prompt_template_dir / "plan2checker-no-utility.md" -).read_text() +plan2checker_template_no_utility = _load_with_fallback("plan2checker-no-utility.md") plan2checker_template_no_utility = plan2checker_template_no_utility.replace( "{{suggestions}}", SUGGESTIONS ).replace("{{checker_template}}", TEMPLATE) -label_commit_template = (prompt_template_dir / "label_commit.md").read_text() +label_commit_template = _load_with_fallback("label_commit.md") def label_commit(id: str, iter: int, commit_id, patch: str): diff --git a/src/backends/csa.py b/src/backends/csa.py index c08148fc..af74bb84 100644 --- a/src/backends/csa.py +++ b/src/backends/csa.py @@ -3,6 +3,7 @@ import os import random import re +import time import shlex import shutil import subprocess as sp @@ -2457,75 +2458,346 @@ def _run_checker_firefox( return num_bugs def _analyze_firefox_files_with_scan_build( - self, target, source_files: list[str], checker_name: str = "SAGenTest" + self, target, source_files: list[str], checker_name: str = "SAGenTestChecker" ) -> list: """ - Analyze Firefox source files using scan-build with custom SAGEN checker. - Disables all built-in checkers and enables only the custom checker. - - Args: - target: Firefox target instance - source_files: List of source files to analyze - checker_name: Name of the custom checker to enable - - Returns: - list: List of detected bugs/issues + Firefox-safe analysis with per-commit & per-run results. + - Force local LLVM toolchain (disable bootstrap to avoid artifact downloads). + - Create per-commit objdir and per-run results dir under tmp/. + - Persist ALL logs under tmp/firefox_scan_results///_logs (never delete). + - Probe CSA plugin + checker before running scan-build. + - Gate incremental builds on a FULL BUILD completion check. + If full build is not complete, run a full `./mach build` first. + - Filter build targets against objdir ONLY after full build is complete. + - Pick & install a rustup toolchain based on the commit date, and set repo-local override. """ + import os + import shlex + import subprocess as sp + import json + import time + import datetime + import shutil + import types + from pathlib import Path + logger.info(f"Analyzing {len(source_files)} Firefox files with scan-build") + repo_dir = Path(target.repo.working_dir) llvm_build_dir = (self.backend_path / "build").absolute() - results_dir = Path("tmp/firefox_scan_results") + analyzer_clang = llvm_build_dir / "bin" / "clang" + analyzer_clangxx = llvm_build_dir / "bin" / "clang++" + scan_build = llvm_build_dir / "bin" / "scan-build" + plugin_so = llvm_build_dir / "lib" / "SAGenTestPlugin.so" + + # Resolve commit for naming + rev = sp.run(["git", "rev-parse", "--short", "HEAD"], cwd=repo_dir, capture_output=True, text=True) + commit_short = (rev.stdout or "unknown").strip() or "unknown" + objdir_name = f"obj-scan-{commit_short}" + topobjdir = repo_dir / objdir_name + + # Per-run results dir + run_id = datetime.datetime.now().strftime("%Y-%m-%d-%H%M%S-%f") + results_root = repo_dir / "tmp" / "firefox_scan_results" + results_dir = results_root / commit_short / run_id results_dir.mkdir(parents=True, exist_ok=True) + log_dir = results_dir / "_logs" + log_dir.mkdir(parents=True, exist_ok=True) - bugs_found = [] + # Temporary logger sink + cmd_log = log_dir / "cmd-run.log" + sink_id = logger.add(str(cmd_log), level="DEBUG", retention="30 days") + logger.debug(f"[init] temp cmd logger -> {cmd_log}") + + def _log_env(e: dict) -> str: + keys = ["MOZCONFIG", "MOZ_AUTOMATION", "CI", "CC", "CXX", "NODEJS"] + return ", ".join(f"{k}={e.get(k, '')}" for k in keys) + + def _write_file(path: Path, content: str): + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(content or "") + + def run_cmd(cmd, *, name: str, cwd: Path, env: dict | None = None, + timeout: int | None = None, check: bool = False): + t0 = time.time() + cmd_str = " ".join(shlex.quote(str(x)) for x in cmd) + logger.debug(f"[CMD:{name}] cwd={cwd} env=({_log_env(env or {})})") + logger.debug(f"[CMD:{name}] $ {cmd_str}") + _write_file(log_dir / f"{name}.cmd.txt", cmd_str + "\n") + + res = sp.run(cmd, cwd=cwd, env=env, capture_output=True, text=True, timeout=timeout) + dt = time.time() - t0 + logger.debug(f"[CMD:{name}] rc={res.returncode} took={dt:.2f}s") + + _write_file(log_dir / f"{name}.stdout.log", res.stdout or "") + _write_file(log_dir / f"{name}.stderr.log", res.stderr or "") + + if res.stdout: + logger.debug(f"[CMD:{name}] STDOUT:\n{res.stdout}") + if res.stderr: + logger.debug(f"[CMD:{name}] STDERR:\n{res.stderr}") + + if check and res.returncode != 0: + raise RuntimeError(f"{name} failed (rc={res.returncode})") + return res + + def run_cmd_stream(cmd, *, name: str, cwd: Path, env: dict | None = None, timeout: int | None = None): + cmd_str = " ".join(shlex.quote(str(x)) for x in cmd) + logger.debug(f"[CMD:{name}] cwd={cwd} env=({_log_env(env or {})})") + logger.debug(f"[CMD:{name}] $ {cmd_str}") + _write_file(log_dir / f"{name}.cmd.txt", cmd_str + "\n") + + t0 = time.time() + proc = sp.Popen( + cmd, cwd=cwd, env=env, text=True, bufsize=1, + stdout=sp.PIPE, stderr=sp.STDOUT + ) + out_path = log_dir / f"{name}.stdout.log" + with open(out_path, "w", encoding="utf-8", buffering=1) as fout: + for line in proc.stdout: + fout.write(line) + logger.debug(f"[{name}] {line.rstrip()}") - try: - # Build scan-build command with disabled built-in checkers and custom checker - # Use -Xanalyzer to pass arguments to clang's static analyzer + try: + rc = proc.wait(timeout=timeout) + except sp.TimeoutExpired: + proc.kill() + rc = -9 + logger.warning(f"[CMD:{name}] timeout after {timeout}s; killed.") + dt = time.time() - t0 + logger.debug(f"[CMD:{name}] rc={rc} took={dt:.2f}s") + return types.SimpleNamespace(returncode=rc, stdout="", stderr=f"(streamed to {out_path})") + + def _detect_checker_fullname() -> tuple[bool, str]: cmd = [ - f"{llvm_build_dir}/bin/scan-build", - "--use-cc=clang", - "--use-c++=clang++", - "-load-plugin", f"{llvm_build_dir}/lib/SAGenTestPlugin.so", - "-enable-checker", f"custom.{checker_name}", - "-o", str(results_dir), - "./mach", "build" + str(analyzer_clang), "-fsyntax-only", "-x", "c", "-", + "-Xclang", "-load", "-Xclang", str(plugin_so), + "-Xclang", "-analyzer-checker-help", ] + r = run_cmd(cmd, name="probe_checker", cwd=repo_dir, env=os.environ.copy(), timeout=60) + expected = f"custom.{checker_name}" + if r.returncode != 0 or not r.stdout: + logger.warning("[probe] CSA plugin may not have loaded; running without plugin.") + return (False, "") + ok = any(expected in ln for ln in r.stdout.splitlines()) + if not ok: + candidates = [ln.strip() for ln in r.stdout.splitlines() if checker_name in ln] + if candidates: + logger.warning(f"[probe] '{expected}' not found; candidates: {candidates[:3]}") + else: + logger.warning(f"[probe] '{expected}' not found in analyzer-checker-help.") + return (ok, expected if ok else "") + + def _pick_rust_for_date(iso_date: str) -> str: + try: + if iso_date.endswith("Z"): + iso_date = iso_date.replace("Z", "+00:00") + d = datetime.datetime.fromisoformat(iso_date).date() + except Exception: + d = datetime.date.today() + if d >= datetime.date(2024, 10, 1): + return "1.88.0" + if d >= datetime.date(2024, 1, 1): + return "1.82.0" + if d >= datetime.date(2023, 1, 1): + return "1.66.1" + if d >= datetime.date(2022, 1, 1): + return "1.63.0" + return "1.56.1" + + def _ensure_rust_override(repo: Path, toolchain: str): + rustup = shutil.which("rustup") + if not rustup: + logger.warning("[rust] rustup not found; cannot set override automatically.") + return + run_cmd([rustup, "toolchain", "install", toolchain], + name="rustup_toolchain_install", cwd=repo, env=os.environ.copy(), timeout=1800) + run_cmd([rustup, "override", "set", toolchain], + name="rustup_override_set", cwd=repo, env=os.environ.copy(), timeout=300) + ver = run_cmd(["rustc", "--version"], name="rustc_version_after_override", + cwd=repo, env=os.environ.copy(), timeout=60) + logger.info(f"[rust] using: {(ver.stdout or ver.stderr or '').strip()} (override={toolchain})") + + # NEW: full-build completeness check (heuristic but robust) + def _is_full_build_complete(objdir: Path) -> bool: + """ + Heuristics for a 'full build complete' state: + - configure finished: /config.status + - export/headers available: /dist/include/js-config.h + - linked artifacts present: one of {libxul, firefox/js shell} in /dist/bin + """ + if not (objdir / "config.status").exists(): + return False + if not (objdir / "dist" / "include" / "js-config.h").exists(): + return False + bin_dir = objdir / "dist" / "bin" + if not bin_dir.exists(): + return False + markers = ["libxul.so", "libxul.dylib", "xul.dll", "firefox", "firefox.exe", "js", "js.exe"] + return any((bin_dir / m).exists() for m in markers) - # Add specific object files to force rebuilding if provided and not too many - if len(source_files) < 5: - # Convert source files to object files to force mach to rebuild - object_files = [Firefox.get_object_name(f) for f in source_files] - # Remove existing object files to force recompilation - obj_dir = Path(target.repo.working_dir) / "obj-x86_64-pc-linux-gnu" - for obj_file in object_files: - full_obj_path = obj_dir / obj_file - if full_obj_path.exists(): - full_obj_path.unlink() - logger.info(f"Removed existing object file: {full_obj_path}") - cmd.extend(object_files) - - logger.info(f"Running scan-build command: {' '.join(cmd)}") + bugs_found = [] + try: + # --- commit date -> rustc toolchain (install & override if possible) + cdate = sp.run(["git", "show", "-s", "--format=%cI", "HEAD"], + cwd=repo_dir, capture_output=True, text=True) + commit_iso = (cdate.stdout or "").strip() + _write_file(log_dir / "commit_date.txt", commit_iso + "\n") + toolchain = _pick_rust_for_date(commit_iso or "") + _write_file(log_dir / "rust_toolchain.picked.txt", toolchain + "\n") + _ensure_rust_override(repo_dir, toolchain) + + # mozconfig + mozconfig_path = repo_dir / "mozconfig" + desired_mozconfig = "\n".join([ + f"mk_add_options MOZ_OBJDIR=@TOPSRCDIR@/{objdir_name}", + "ac_add_options --without-wasm-sandboxed-libraries", + "ac_add_options --disable-bootstrap", + ]) + "\n" + + need_reconfigure = False + if (not mozconfig_path.exists()) or (mozconfig_path.read_text() != desired_mozconfig): + _write_file(mozconfig_path, desired_mozconfig) + need_reconfigure = True + logger.info(f"{'Created' if not mozconfig_path.exists() else 'Updated'} mozconfig for {commit_short}") + _write_file(log_dir / "mozconfig.snapshot", desired_mozconfig) + + # environment + env = os.environ.copy() + env["MOZCONFIG"] = str(mozconfig_path) + env.pop("MOZ_AUTOMATION", None) + env.pop("CI", None) + env["CC"] = str(analyzer_clang) + env["CXX"] = str(analyzer_clangxx) + env.setdefault("PYTHONUNBUFFERED", "1") + + # venv + (optional) clobber on reconfigure + run_cmd(["./mach", "create-virtualenvs"], name="mach_create_venvs", cwd=repo_dir, env=env, timeout=900) + if need_reconfigure: + run_cmd(["./mach", "clobber"], name="mach_clobber", cwd=repo_dir, env=env, timeout=1800) + + cfg = run_cmd(["./mach", "-v", "configure"], name="mach_configure", cwd=repo_dir, env=env, timeout=2400) + if cfg.returncode != 0: + logger.warning("mach configure returned non-zero; see logs for details.") + + # Tool versions + versions = {} + env_json = run_cmd(["./mach", "environment", "--format", "json"], + name="mach_environment", cwd=repo_dir, env=env, timeout=300) + try: + if env_json.stdout: + data = json.loads(env_json.stdout) + substs = data.get("substs", {}) + versions["build_CC"] = substs.get("CC") + versions["build_CXX"] = substs.get("CXX") + versions["build_PYTHON3"] = substs.get("PYTHON3") or substs.get("PYTHON") + # Also record topobjdir for sanity + _write_file(log_dir / "topobjdir.txt", (data.get("topobjdir") or "") + "\n") + except Exception as e: + logger.debug(f"[versions] parse env failed: {e}") - result = sp.run( - cmd, - cwd=target.repo.working_dir, - capture_output=True, - text=True, - timeout=1800 # 30 minutes timeout - ) + pyv = run_cmd(["./mach", "python", "-c", "import sys;print(sys.version)"], + name="mach_python_version", cwd=repo_dir, env=env, timeout=120) + versions["build_python_version"] = (pyv.stdout or "").strip() - # Parse results from scan-build output - if result.returncode == 0 or "scan-build:" in result.stderr: - bugs_found = self._parse_scan_build_results(results_dir) - logger.info(f"Scan-build analysis completed, found {len(bugs_found)} issues") + cav = run_cmd([str(analyzer_clang), "--version"], + name="analyzer_clang_version", cwd=repo_dir, env=env, timeout=60) + versions["analyzer_clang_version"] = (cav.stdout or cav.stderr or "").strip() + _write_file(log_dir / "tool_versions.json", json.dumps(versions, indent=2)) + + # --- probe plugin/checker + plugin_ok, full_checker = _detect_checker_fullname() + + # --- FULL BUILD GATE: if full build is NOT complete, run a full build first + full_ready_before = _is_full_build_complete(topobjdir) + _write_file(log_dir / "full_build_ready.before.txt", str(full_ready_before) + "\n") + + # Compose base scan-build command (plugin optional) + base_cmd = [ + str(scan_build), + f"--use-cc={analyzer_clang}", + f"--use-c++={analyzer_clangxx}", + "-o", str(results_dir), + ] + if plugin_ok: + base_cmd += ["-load-plugin", str(plugin_so), "-enable-checker", full_checker] else: - logger.warning(f"Scan-build analysis had errors: {result.stderr}") - bugs_found = [] + logger.warning("[scan-build] running WITHOUT custom plugin.") + + if not full_ready_before: + logger.info("[build] Full build is NOT complete -> running full `./mach build` under scan-build") + cmd_full = base_cmd + ["./mach", "build", "-j8"] + result = run_cmd_stream(cmd_full, name="scan_build_full", cwd=repo_dir, env=env, timeout=7200) + _write_file(log_dir / "scan-build-full.stdout.log", result.stdout or "") + _write_file(log_dir / "scan-build-full.stderr.log", result.stderr or "") + + # Re-check after potential full build + full_ready_after = _is_full_build_complete(topobjdir) + _write_file(log_dir / "full_build_ready.after.txt", str(full_ready_after) + "\n") + + # --- If full build is complete, allow incremental per-directory build; otherwise we already did full build. + build_args: list[str] = [] + if full_ready_after: + # Derive candidate per-dir targets from source files + def _rel_parent(p: str) -> str: + try: + rel = str(Path(p).parent) + relpath = str(Path(rel).resolve().relative_to(repo_dir.resolve())) + except Exception: + relpath = str(Path(p).parent) + return relpath + + raw_targets = sorted({_rel_parent(f) for f in source_files if f}) + raw_targets = [ + t for t in raw_targets + if t and (repo_dir / t).exists() and t.strip().lower() != "windows" + ] + + # Keep only those present in objdir (active for this configuration) + for t in raw_targets: + if (topobjdir / t).exists(): + build_args.append(t) + else: + logger.debug(f"[targets] drop '{topobjdir}/{t}' (inactive or platform-mismatch)") + + if full_ready_after and build_args: + logger.info(f"[build] Full build complete -> running INCREMENTAL build for targets: {build_args}") + cmd_inc = base_cmd + ["./mach", "build", "-j8", *build_args] + result = run_cmd_stream(cmd_inc, name="scan_build_incremental", cwd=repo_dir, env=env, timeout=3600) + _write_file(log_dir / "scan-build-incremental.stdout.log", result.stdout or "") + _write_file(log_dir / "scan-build-incremental.stderr.log", result.stderr or "") + else: + if full_ready_after: + logger.info("[build] Full build complete but no valid incremental targets; skipping incremental pass.") + # else: full build already executed above + + # Parse reports + bugs_found = self._parse_scan_build_results(results_dir) if results_dir.exists() else [] + logger.info(f"Scan-build done, issues={len(bugs_found)}") + + summary = { + "commit": commit_short, + "run_id": run_id, + "objdir": objdir_name, + "full_build_ready_before": full_ready_before, + "full_build_ready_after": full_ready_after, + "incremental_targets": build_args or [], + "checker": full_checker if plugin_ok else "", + "issues": len(bugs_found), + "results_dir": str(results_dir), + "log_dir": str(log_dir), + } + _write_file(results_dir / "_run_summary.json", json.dumps(summary, indent=2)) except Exception as e: logger.error(f"Error during scan-build analysis: {e}") bugs_found = [] + finally: + try: + logger.remove(sink_id) + except Exception: + pass + logger.debug("[cleanup] removed temp cmd logger sink (files persist)") return bugs_found diff --git a/src/checker-initial.cpp b/src/checker-initial.cpp new file mode 100644 index 00000000..b8f62a71 --- /dev/null +++ b/src/checker-initial.cpp @@ -0,0 +1,108 @@ +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Checkers/Taint.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/Environment.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" +#include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/StmtVisitor.h" +#include "llvm/Support/raw_ostream.h" +#include "clang/StaticAnalyzer/Checkers/utility.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" + +using namespace clang; +using namespace ento; +using namespace taint; + +// In this checker, we detect a bug pattern where an allocation (via new CSSImportantRule) +// is not checked for a null return before its pointer is used with NS_ADDREF. +// This can lead to a null pointer dereference in out-of-memory cases. + +namespace { + +class SAGenTestChecker : public Checker< check::PreCall > { + mutable std::unique_ptr BT; + +public: + SAGenTestChecker() : BT(new BugType(this, "Unchecked allocation", "Memory Error")) {} + + // Callback invoked before a function call is evaluated. + void checkPreCall(const CallEvent &Call, CheckerContext &C) const; + +private: + // Helper function to report a bug when an unchecked allocation is found. + void reportUncheckedAllocation(const CallEvent &Call, CheckerContext &C, const Expr *ArgExpr) const; +}; + +void SAGenTestChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const { + // First, get the origin expression and check if it contains "NS_ADDREF". + const Expr *OriginExpr = Call.getOriginExpr(); + if (!OriginExpr) + return; + if (!ExprHasName(OriginExpr, "NS_ADDREF", C)) + return; + + // Ensure that NS_ADDREF is invoked with at least one argument. + if (Call.getNumArgs() < 1) + return; + + // Get the SVal of the first argument (the pointer subject to NS_ADDREF). + SVal PtrVal = Call.getArgSVal(0); + + // Retrieve its corresponding expression. + const Expr *ArgExpr = Call.getArgExpr(0); + if (!ArgExpr) + return; + + // Get the memory region corresponding to this pointer. + // Note: We must not call IgnoreImplicit() before getMemRegionFromExpr. + const MemRegion *MR = getMemRegionFromExpr(ArgExpr, C); + if (!MR) + return; + MR = MR->getBaseRegion(); + + // Use the program state to check whether the pointer might be null. + const ProgramStateRef State = C.getState(); + ProgramStateRef stateNonNull, stateNull; + if (State->assume(PtrVal, true, stateNonNull, stateNull)) { + // If there exists a feasible state where the pointer is null, + // report an unchecked allocation bug. + if (stateNull) + reportUncheckedAllocation(Call, C, ArgExpr); + } +} + +void SAGenTestChecker::reportUncheckedAllocation(const CallEvent &Call, + CheckerContext &C, + const Expr *ArgExpr) const { + // Generate an error node in the exploded graph. + ExplodedNode *N = C.generateNonFatalErrorNode(); + if (!N) + return; + + // Prepare and emit a bug report. + auto report = std::make_unique( + *BT, + "Unchecked allocation: new CSSImportantRule returned null before NS_ADDREF", + N); + report->addRange(ArgExpr->getSourceRange()); + C.emitReport(std::move(report)); +} + +} // end anonymous namespace + +extern "C" void clang_registerCheckers(CheckerRegistry ®istry) { + registry.addChecker( + "custom.SAGenTestChecker", + "Detects unchecked allocation: new CSSImportantRule returned null before NS_ADDREF", + ""); +} + +extern "C" const char clang_analyzerAPIVersionString[] = + CLANG_ANALYZER_API_VERSION_STRING; diff --git a/src/checker-repaired.cpp b/src/checker-repaired.cpp new file mode 100644 index 00000000..aefbc712 --- /dev/null +++ b/src/checker-repaired.cpp @@ -0,0 +1,84 @@ +#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" +#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" +#include "clang/StaticAnalyzer/Checkers/Taint.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/Environment.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h" +#include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Lex/Lexer.h" +#include "llvm/Support/raw_ostream.h" +#include "clang/StaticAnalyzer/Checkers/utility.h" + +using namespace clang; +using namespace ento; +using namespace taint; + +namespace { + +// The checker detects instances where an allocated resource (a gfxWindowsSurface) +// is validated with a simple null-pointer check instead of using a proper status check +// (e.g. testing the result of the CairoStatus() call). This checker intercepts branch conditions, +// and if the source code for the condition references the variable "target" without mentioning "CairoStatus", +// it reports an error. +class SAGenTestChecker : public Checker< check::BranchCondition > { + mutable std::unique_ptr BT; + +public: + SAGenTestChecker() + : BT(new BugType(this, "Resource Initialization Check", + "Incorrect resource initialization check")) {} + + void checkBranchCondition(const Stmt *Condition, CheckerContext &C) const; + +private: + // (Optional helper functions could be added here if more sophisticated AST inspection is needed) +}; + +void SAGenTestChecker::checkBranchCondition(const Stmt *Condition, CheckerContext &C) const { + if (!Condition) + return; + + // We are looking for conditions that reference the resource variable "target". + // Use the utility function to check if "target" is present in the expression. + const Expr *CondExpr = dyn_cast(Condition); + if (!CondExpr) + return; + + if (ExprHasName(CondExpr, "target", C)) { + // Retrieve the source text of the condition. + const SourceManager &SM = C.getSourceManager(); + const LangOptions &LangOpts = C.getLangOpts(); + CharSourceRange Range = CharSourceRange::getTokenRange(Condition->getSourceRange()); + StringRef CondText = Lexer::getSourceText(Range, SM, LangOpts); + + // If the condition text does not contain "CairoStatus", + // then the error check is likely a null-pointer check (e.g. "if (!target)"). + if (!CondText.contains("CairoStatus")) { + ExplodedNode *N = C.generateNonFatalErrorNode(); + if (!N) + return; + auto Report = std::make_unique( + *BT, "Incorrect resource initialization check: null-pointer test used instead of checking CairoStatus", N); + Report->addRange(Condition->getSourceRange()); + C.emitReport(std::move(Report)); + } + } + C.addTransition(C.getState()); +} + +} // end anonymous namespace + +extern "C" void clang_registerCheckers(CheckerRegistry ®istry) { + registry.addChecker( + "custom.SAGenTestChecker", + "Detects missing resource status check (using null pointer check instead of CairoStatus) on gfxWindowsSurface", + ""); +} + +extern "C" const char clang_analyzerAPIVersionString[] = CLANG_ANALYZER_API_VERSION_STRING; diff --git a/src/global_config.py b/src/global_config.py index c521c248..80008624 100644 --- a/src/global_config.py +++ b/src/global_config.py @@ -10,6 +10,7 @@ from targets.factory import TargetFactory from targets.linux import Linux from targets.v8 import V8 +from targets.firefox import Firefox logger = loguru.logger @@ -49,11 +50,15 @@ def setup(self, config_path: str = "config.yaml"): if "v8_dir" in self._config: self._config["v8"] = V8(self.get("v8_dir")) + if "firefox_dir" in self._config: + self._config["firefox"] = Firefox(self.get("firefox_dir")) if "linux_dir" in self._config: self._config["linux"] = Linux(self.get("linux_dir")) if target_type == "v8": self._config["target"] = self._config["v8"] + elif target_type == "firefox": + self._config["target"] = self._config["firefox"] else: self._config["target"] = self._config["linux"] self._config["backend"] = ClangBackend(self.get("LLVM_dir"))