diff --git a/lib/inspect.py b/lib/inspect.py index 77c3d75..6b61b55 100644 --- a/lib/inspect.py +++ b/lib/inspect.py @@ -376,7 +376,7 @@ def convert_sarif(app_name, repo_context, sarif_files, findings_fname): tags.append( { "key": "owasp_category", - "value": owasp_category.split("-")[0].capitalize(), + "value": owasp_category, "shiftleft_managed": True, } ) @@ -402,6 +402,9 @@ def convert_sarif(app_name, repo_context, sarif_files, findings_fname): lineno = location.get("physicalLocation", {})["region"][ "startLine" ] + end_lineno = location.get("physicalLocation", {})[ + "contextRegion" + ]["endLine"] finding = { "app": app_name, "type": "extscan", @@ -412,6 +415,7 @@ def convert_sarif(app_name, repo_context, sarif_files, findings_fname): utils.calculate_line_hash( filename, lineno, + end_lineno, location.get("physicalLocation", {})["region"][ "snippet" ]["text"], diff --git a/lib/pyt/cfg/expr_visitor.py b/lib/pyt/cfg/expr_visitor.py index be3feaa..cdf233c 100644 --- a/lib/pyt/cfg/expr_visitor.py +++ b/lib/pyt/cfg/expr_visitor.py @@ -101,7 +101,7 @@ def init_function_cfg(self, node, module_definitions): first_node = module_statements.first_statement - if CALL_IDENTIFIER not in first_node.label: + if first_node is not None and CALL_IDENTIFIER not in first_node.label: entry_node.connect(first_node) last_nodes = module_statements.last_statements @@ -138,7 +138,7 @@ def visit_Attribute(self, node): def visit_Name(self, node): return self.visit_miscelleaneous_node(node) - def visit_NameConstant(self, node): + def visit_Constant(self, node): return self.visit_miscelleaneous_node(node) def visit_Str(self, node): @@ -250,6 +250,8 @@ def save_def_args_in_temp( # Create e.g. temp_N_def_arg1 = call_arg1_label_visitor.result for each argument for i, call_arg in enumerate(call_args): + if i > len(def_args) - 1: + break # If this results in an IndexError it is invalid Python def_arg_temp_name = ( "temp_" + str(saved_function_call_index) + "_" + def_args[i] @@ -333,7 +335,11 @@ def create_local_scope_from_def_args( preceding call to save_def_args_in_temp. """ # Create e.g. def_arg1 = temp_N_def_arg1 for each argument + if def_args is None: + return for i in range(len(call_args)): + if i > len(def_args) - 1: + return def_arg_local_name = def_args[i] def_arg_temp_name = ( "temp_" + str(saved_function_call_index) + "_" + def_args[i] @@ -370,6 +376,8 @@ def visit_and_get_function_nodes(self, definition, first_node): self.connect_if_allowed(previous_node, entry_node) function_body_connect_statements = self.stmt_star_handler(definition.node.body) + if isinstance(function_body_connect_statements, IgnoredNode): + return (IgnoredNode, first_node) entry_node.connect(function_body_connect_statements.first_statement) exit_node = self.append_node(EntryOrExitNode("Exit " + definition.name)) @@ -439,6 +447,8 @@ def return_handler( saved_function_call_index(int): Unique number for each call. first_node(EntryOrExitNode or RestoreNode): Used to connect previous statements to this function. """ + if not hasattr(function_nodes, "__iter__"): + return if any(isinstance(node, YieldNode) for node in function_nodes): # Presence of a `YieldNode` means that the function is a generator rhs_prefix = "yld_" diff --git a/lib/pyt/cfg/stmt_visitor.py b/lib/pyt/cfg/stmt_visitor.py index d280caa..5d4aaee 100644 --- a/lib/pyt/cfg/stmt_visitor.py +++ b/lib/pyt/cfg/stmt_visitor.py @@ -52,6 +52,12 @@ module.name for module in iter_modules() } # Don't warn about failing to import these +# Some builtin packages to break recursion +BUILTIN_PKGS = ["os", "logging", "json", "markdown"] + +# Cache to keep track of visited modules to break recursion +visited_module_paths = {} + class StmtVisitor(ast.NodeVisitor): def __init__(self, allow_local_directory_imports=True): @@ -190,6 +196,8 @@ def handle_or_else(self, orelse, test): else_connect_statements = self.stmt_star_handler( orelse, prev_node_to_avoid=self.nodes[-1] ) + if isinstance(else_connect_statements, IgnoredNode): + return IgnoredNode() test.connect(else_connect_statements.first_statement) return else_connect_statements.last_statements @@ -205,6 +213,8 @@ def visit_If(self, node): if node.orelse: orelse_last_nodes = self.handle_or_else(node.orelse, test) + if isinstance(orelse_last_nodes, IgnoredNode): + return IgnoredNode() body_connect_stmts.last_statements.extend(orelse_last_nodes) else: body_connect_stmts.last_statements.append( @@ -229,6 +239,8 @@ def visit_Return(self, node): if isinstance(node.value, ast.Call): return_value_of_call = self.visit(node.value) + if not hasattr(return_value_of_call, "left_hand_side"): + return None return_node = ReturnNode( LHS + " = " + return_value_of_call.left_hand_side, LHS, @@ -293,7 +305,8 @@ def visit_Try(self, node): orelse_last_nodes = self.handle_or_else( node.orelse, body.last_statements[-1] ) - body.last_statements.extend(orelse_last_nodes) + if not isinstance(orelse_last_nodes, IgnoredNode): + body.last_statements.extend(orelse_last_nodes) if node.finalbody: finalbody = self.stmt_star_handler(node.finalbody) @@ -482,6 +495,8 @@ def assignment_call_node(self, left_hand_label, ast_node): self.undecided = True # Used for handling functions in assignments call = self.visit(ast_node.value) + if not hasattr(call, "left_hand_side"): + return None call_label = call.left_hand_side call_assignment = AssignmentCallNode( @@ -807,12 +822,16 @@ def add_module( # noqa: C901 """ module_path = module[1] + if module_or_package_name in BUILTIN_PKGS: + uninspectable_modules.add(module_or_package_name) + return IgnoredNode() + if visited_module_paths.get(module[0]): + return IgnoredNode() + visited_module_paths[module[0]] = True parent_definitions = self.module_definitions_stack[-1] # Here, in `visit_Import` and in `visit_ImportFrom` are the only places the `import_alias_mapping` is updated parent_definitions.import_alias_mapping.update(import_alias_mapping) parent_definitions.import_names = local_names - if not module_or_package_name: - return new_module_definitions = ModuleDefinitions(local_names, module_or_package_name) new_module_definitions.is_init = is_init self.module_definitions_stack.append(new_module_definitions) @@ -824,7 +843,7 @@ def add_module( # noqa: C901 ) tree = generate_ast(module_path) if not tree: - return None + return IgnoredNode() # module[0] is None during e.g. "from . import foo", so we must str() self.nodes.append(EntryOrExitNode("Module Entry " + str(module[0]))) self.visit(tree) @@ -899,7 +918,6 @@ def add_module( # noqa: C901 ) parent_definition.node = def_.node parent_definitions.definitions.append(parent_definition) - return exit_node def from_directory_import( @@ -1001,6 +1019,9 @@ def handle_relative_import(self, node): # Is it a file? if name_with_dir.endswith(".py"): + if visited_module_paths.get(name_with_dir): + return IgnoredNode() + visited_module_paths[name_with_dir] = True return self.add_module( module=(node.module, name_with_dir), module_or_package_name=None, @@ -1083,8 +1104,14 @@ def visit_ImportFrom(self, node): ) for module in self.project_modules: name = module[0] + if node.level == 0: + break if node.module == name: if os.path.isdir(module[1]): + if visited_module_paths.get(module[1]): + return IgnoredNode() + # Break recursion + visited_module_paths[module[1]] = True return self.from_directory_import( module, not_as_alias_handler(node.names), @@ -1107,7 +1134,6 @@ def visit_ImportFrom(self, node): local_definitions.import_alias_mapping[ name.asname or name.name ] = "{}.{}".format(node.module, name.name) - if node.module not in uninspectable_modules: uninspectable_modules.add(node.module) return IgnoredNode() diff --git a/lib/pyt/helper_visitors/label_visitor.py b/lib/pyt/helper_visitors/label_visitor.py index b3eb1fd..774d91b 100644 --- a/lib/pyt/helper_visitors/label_visitor.py +++ b/lib/pyt/helper_visitors/label_visitor.py @@ -162,7 +162,7 @@ def visit_keyword(self, node): def insert_space(self): self.result += " " - def visit_NameConstant(self, node): + def visit_Constant(self, node): self.result += str(node.value) def visit_Subscript(self, node): diff --git a/lib/pyt/vulnerabilities/trigger_definitions_parser.py b/lib/pyt/vulnerabilities/trigger_definitions_parser.py index 7d9c7b8..a83d466 100644 --- a/lib/pyt/vulnerabilities/trigger_definitions_parser.py +++ b/lib/pyt/vulnerabilities/trigger_definitions_parser.py @@ -57,6 +57,9 @@ def kwarg_propagates(self, keyword): def get_kwarg_from_position(self, index): return self.arg_position_to_kwarg.get(index) + def __str__(self): + return f"Sink: Type: {self.sink_type}, Trigger: {self._trigger}" + @property def all_arguments_propagate_taint(self): if self.kwarg_list: diff --git a/lib/pyt/vulnerabilities/vulnerabilities.py b/lib/pyt/vulnerabilities/vulnerabilities.py index 956758c..2dadb35 100644 --- a/lib/pyt/vulnerabilities/vulnerabilities.py +++ b/lib/pyt/vulnerabilities/vulnerabilities.py @@ -104,7 +104,7 @@ def find_triggers(nodes, trigger_words): """ trigger_nodes = list() for node in nodes: - trigger_nodes.extend(iter(label_contains(node, trigger_words))) + trigger_nodes.extend(iter(label_starts_with(node, trigger_words))) return trigger_nodes @@ -124,6 +124,28 @@ def label_contains(node, triggers): yield TriggerNode(trigger, node) +def label_starts_with(node, triggers): + """Determine if node starts with the trigger_words provided. + + Args: + node(Node): CFG node to check. + trigger_words(list[Union[Sink, Source]]): list of trigger words to look for. + + Returns: + Iterable of TriggerNodes found. Can be multiple because multiple + trigger_words can be in one node. + """ + for trigger in triggers: + if trigger.trigger_word in node.label: + if ( + f"ret_{trigger.trigger_word}" in node.label + or f" {trigger.trigger_word}" in node.label + or f".{trigger.trigger_word}" in node.label + or node.label.startswith(trigger.trigger_word) + ): + yield TriggerNode(trigger, node) + + def build_sanitiser_node_dict(cfg, sinks_in_file): """Build a dict of string -> TriggerNode pairs, where the string is the sanitiser and the TriggerNode is a TriggerNode of the sanitiser. @@ -229,8 +251,11 @@ def get_vulnerability_chains(current_node, sink, def_use, chain=[]): yield chain else: vuln_chain = list(chain) - vuln_chain.append(use) - yield from get_vulnerability_chains(use, sink, def_use, vuln_chain) + if use not in vuln_chain: + vuln_chain.append(use) + yield from get_vulnerability_chains(use, sink, def_use, vuln_chain) + else: + yield chain def how_vulnerable( @@ -319,7 +344,6 @@ def get_vulnerability(source, sink, triggers, lattice, cfg, blackbox_mapping): sink_args = get_sink_args(sink.cfg_node) else: sink_args = get_sink_args_which_propagate(sink, sink.cfg_node.ast_node) - tainted_node_in_sink_arg = get_tainted_node_in_sink_args( sink_args, nodes_in_constraint, ) @@ -363,10 +387,33 @@ def get_vulnerability(source, sink, triggers, lattice, cfg, blackbox_mapping): vuln_deets["reassignment_nodes"] = chain return vuln_factory(vulnerability_type)(**vuln_deets) - return None +def filter_over_taint(vulnerability, source, sink, blackbox_mapping): + """Filter over tainted objects such as Sensitive Data Leaks + """ + source_cfg = source.cfg_node + sink_cfg = sink.cfg_node + sensitive_data_list = blackbox_mapping.get("sensitive_data_list") + sensitive_allowed_log_levels = blackbox_mapping.get("sensitive_allowed_log_levels") + source_type = source.source_type + sink_type = sink.sink_type + if sink_type == "Logging": + # Ignore logging for non-sensitive data + if source_cfg.label.lower() not in sensitive_data_list: + return None + # Ignore vulnerabilities with acceptable log levels + for log_level in sensitive_allowed_log_levels: + if log_level in sink.trigger_word.lower(): + return None + # render method based on Framework_Parameter is a known FP + if sink_type == "ReturnedToUser": + if sink.trigger_word == "render(" and source_type == "Framework_Parameter": + return None + return vulnerability + + def find_vulnerabilities_in_cfg( cfg, definitions, lattice, blackbox_mapping, vulnerabilities_list ): @@ -385,6 +432,11 @@ def find_vulnerabilities_in_cfg( vulnerability = get_vulnerability( source, sink, triggers, lattice, cfg, blackbox_mapping ) + # Filter over-tained vulnerability + if vulnerability: + vulnerability = filter_over_taint( + vulnerability, source, sink, blackbox_mapping + ) if vulnerability: vulnerabilities_list.append(vulnerability) diff --git a/lib/pyt/vulnerabilities/vulnerability_helper.py b/lib/pyt/vulnerabilities/vulnerability_helper.py index b711960..d48c196 100644 --- a/lib/pyt/vulnerabilities/vulnerability_helper.py +++ b/lib/pyt/vulnerabilities/vulnerability_helper.py @@ -106,6 +106,7 @@ def as_dict(self): source_path = self.source.path.split("/")[-1] sink_path = self.sink.path.split("/")[-1] rule_id = self.source_type + rule_name = f"Data flow from {self.source_type} to {self.sink_type}" description = f"User controlled data flow from the source `{source_path}:{self.source.line_number}` to the sink `{sink_path}:{self.sink.line_number}`" rule = source_sink_rules.get(f"{self.source_type}:{self.sink_type}") @@ -116,13 +117,19 @@ def as_dict(self): rule_name = rule.name severity = rule.severity message_format = rule.message_format - message_format = message_format.replace( - "{$sources}", f"{source_path}:{self.source.line_number}" - ) + owasp_category = rule.owasp_category + sources = f"{source_path}:{self.source.line_number}" + if self.source_type == "Framework_Parameter": + source_parameter = self.source.label + sources = ( + f"{source_parameter} in {source_path}:{self.source.line_number}" + ) + message_format = message_format.replace("{$sources}", sources) message_format = message_format.replace( "{$sinks}", f"{sink_path}:{self.sink.line_number}" ) description = message_format + return { "rule_id": rule_id, "rule_name": rule_name, diff --git a/lib/pyt/vulnerability_definitions/all_sources_sinks.pyt b/lib/pyt/vulnerability_definitions/all_sources_sinks.pyt index aecd0d8..765929c 100644 --- a/lib/pyt/vulnerability_definitions/all_sources_sinks.pyt +++ b/lib/pyt/vulnerability_definitions/all_sources_sinks.pyt @@ -139,6 +139,13 @@ "shutil.move(": {}, "shutil.chown(": {}, "shutil.make_archive(": {}, + "rmtree(": {}, + "copyfile(": {}, + "copymode(": {}, + "copystat(": {}, + "copy(": {}, + "copy2(": {}, + "copytree(": {}, "shutil.unpack_archive(": {}, "linecache.getline(": {}, "linecache.lazycache(": {}, @@ -159,7 +166,11 @@ "to_sql(": {}, "import_module(": {}, "convert(": {}, - "convertFile(": {} + "convertFile(": {}, + "f.write(": {}, + "fp.write(": {}, + "f.writelines(": {}, + "fp.writelines(": {} }, "Exfiltration": { "send_file(": {"sanitisers": ["'..'", "'..' in"]}, @@ -253,14 +264,7 @@ "os.unlink(": {}, "os.fwalk(": {}, "os.setxattr(": {}, - "shelve.open(": {}, - "rmtree(": {}, - "copyfile(": {}, - "copymode(": {}, - "copystat(": {}, - "copy(": {}, - "copy2(": {}, - "copytree(": {}, + "shelve.open(": {}, "chown(": {}, "chmod(": {}, "lchmod(": {}, @@ -295,12 +299,16 @@ "putrequest(": {}, "putheader(": {}, "create_connection(": {}, - "connect(": {}, - "connect_ex(": {}, - "send(": {}, - "ping(": {}, - "sendall(": {}, - "setopt(": {} + "socket.connect(": {}, + "socket.connect_ex(": {}, + "socket.send(": {}, + "socket.ping(": {}, + "socket.sendall(": {}, + "socket.setopt(": {}, + "ws.send(": {}, + "ws.ping(": {}, + "ws.sendall(": {}, + "ws.setopt(": {} }, "ReturnedToUser": { "flash(": {}, @@ -316,8 +324,7 @@ "FileResponse(": {}, "build_response(": {}, "prepare_data(": {}, - "encode_data(": {}, - "write(": {} + "encode_data(": {} }, "ResponseHeaderName": { "set_header(": {}, @@ -342,12 +349,21 @@ "scalar(": {} }, "XMLParser": { - "from_string(": {}, - "parse(": {}, - "iterparse(": {}, + "parser.from_string(": {}, + "xml.from_string(": {}, + "xml.parse(": {}, + "etree.parse(": {}, + "ET.parse(": {}, + "xml.iterparse(": {}, "XML(": {}, "XMLParser.feed(": {}, - "fromstring(": {} + "xml.fromstring(": {}, + "ET.fromstring(": {}, + "parser.fromstring(": {}, + "ET.fromstringlist(": {}, + "parser.fromstringlist(": {}, + "parser.feed(": {}, + "ET.XMLPullParser([": {} }, "XSS": { "do_mark_safe(": {}, @@ -368,7 +384,8 @@ }, "ServerSideTemplateInjection": { - "from_string(": {}, + "Jinja2.from_string(": {}, + "Environment(loader=BaseLoader).from_string(": {}, "compile_expression(": {} } } diff --git a/lib/pyt/vulnerability_definitions/blackbox_mapping.json b/lib/pyt/vulnerability_definitions/blackbox_mapping.json index 69ec774..499a5d2 100644 --- a/lib/pyt/vulnerability_definitions/blackbox_mapping.json +++ b/lib/pyt/vulnerability_definitions/blackbox_mapping.json @@ -20,7 +20,8 @@ "messages.info", "messages.success", "messages.debug", - "messages.error" + "messages.error", + "form.is_valid" ], "propagates": [ "os.path.join" @@ -29,6 +30,16 @@ "user_must_be_authorized", "user_passes_test", "login_required", - "permission_required" + "permission_required", + "filter" + ], + "sensitive_data_list": [ + "key", "token", "secret", "request", "response", "cookie", "req", "res", "password", "passwd", "private", "certificate", "cert", "hash", + "oauth2", "access_token", "refresh_token", "jwt", "cookie", "session", "cvv", "cvc", "card", "address", "mobile", "pin", + "postcode", "paypal", "uuid", "patient", "gender", "heart", "blood", "cholestrol", "hba1c" + ], + "sensitive_allowed_log_levels": [ + "trace", + "debug" ] } diff --git a/lib/pyt/web_frameworks/__init__.py b/lib/pyt/web_frameworks/__init__.py index 4e65fed..a3a5830 100644 --- a/lib/pyt/web_frameworks/__init__.py +++ b/lib/pyt/web_frameworks/__init__.py @@ -2,8 +2,8 @@ from lib.pyt.web_frameworks.framework_helper import ( is_django_view_function, is_flask_route_function, - is_taintable_function, is_function_without_leading_, + is_taintable_function, ) __all__ = [ diff --git a/lib/pyt/web_frameworks/framework_helper.py b/lib/pyt/web_frameworks/framework_helper.py index e943305..98e6ff5 100644 --- a/lib/pyt/web_frameworks/framework_helper.py +++ b/lib/pyt/web_frameworks/framework_helper.py @@ -39,6 +39,26 @@ def is_taintable_function(ast_node): if isinstance(decorator, ast.Call): if _get_last_of_iterable(get_call_names(decorator.func)) in safe_decorators: return False + # Flask route and Django tag + if _get_last_of_iterable(get_call_names(decorator.func)) in [ + "route", + "simple_tag", + "inclusion_tag", + "to_end_tag", + ]: + return True + # Ignore database functions + if len(ast_node.args.args): + first_arg_name = ast_node.args.args[0].arg + # Django view + if first_arg_name in ["request", "context"]: + return True + if first_arg_name in ["conn", "cursor", "sql"]: + return False + # Ignore known validation and sanitization functions + for n in ["valid", "sanitize", "sanitise", "is_", "set_"]: + if ast_node.name.startswith(n): + return False return True diff --git a/lib/utils.py b/lib/utils.py index fa3baf3..84a8b0b 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -390,17 +390,18 @@ def check_command(cmd): return False -def calculate_line_hash(filename, lineno, line): +def calculate_line_hash(filename, lineno, end_lineno, line): """ Method to calculate line hash :param lineno: Line number + :param end_lineno: End Line number :param filename: File name :param line: Line to hash :return: Hash based on blake2b algorithm """ - snippet = "{}:{}:{}".format( - lineno, filename, line.strip().replace("\t", "").replace("\n", "") + snippet = "{}:{}:{}:{}".format( + lineno, end_lineno, filename, line.strip().replace("\t", "").replace("\n", "") ) h = blake2b(digest_size=HASH_DIGEST_SIZE) h.update(snippet.encode()) diff --git a/scan b/scan index 5c0c5e7..f099334 100755 --- a/scan +++ b/scan @@ -288,7 +288,7 @@ def bandit_scan(src, reports_dir, convert, repo_context): "-n", "3", "-s", - "B101,B102,B105,B307,B308,B310,B322,B703", + "B101,B102,B105,B201,B307,B308,B310,B322,B404,B601,B602,B603,B604,B605,B701,B702,B703", "-iii", "-ll", *convert_args,