add MISP tag titles & descriptions and echo these in reporting; updat…

…e colour scheme & thresholds
leondz · Dec 20, 2023 · 77c75f1 · 77c75f1
1 parent 4cbf489
commit 77c75f1
Show file tree

Hide file tree

Showing 4 changed files with 83 additions and 10 deletions.
diff --git a/garak/analyze/report_digest.py b/garak/analyze/report_digest.py
@@ -5,6 +5,7 @@
 import importlib
 import json
 import markdown
+import os
 import re
 import sqlite3
 import sys
@@ -30,11 +31,19 @@
 end_module = templateEnv.get_template("garak/analyze/templates/end_module.jinja")
 
 
+misp_descriptions = {}
+if os.path.isfile("garak/resources/misp_descriptions.tsv"):
+    with open("garak/resources/misp_descriptions.tsv", "r", encoding="utf-8") as f:
+        for line in f:
+            key, title, descr = line.strip().split("\t")
+            misp_descriptions[key] = (title, descr)
+
+
 def map_score(score):
     """assign a defcon class to a %age score 0.0-100.0"""
     if score == 100.0:
         return 5
-    if score == 0.0:
+    if score < 5.0:
         return 1
     if score < 30.0:
         return 2
@@ -146,6 +155,8 @@ def compile_digest(report_path, taxonomy=_config.reporting.taxonomy):
             )
         elif probe_group != "other":
             probe_group_name = f"{taxonomy}:{probe_group}"
+            if probe_group_name in misp_descriptions:
+                probe_group_name, group_doc = misp_descriptions[probe_group_name]
         else:
             probe_group_name = "Uncategorized"
 
@@ -167,7 +178,7 @@ def compile_digest(report_path, taxonomy=_config.reporting.taxonomy):
                 m = importlib.import_module(f"garak.probes.{probe_module}")
                 digest_content += probe_template.render(
                     {
-                        "plugin_name": probe_class,
+                        "plugin_name": f"{probe_module}.{probe_class}",
                         "plugin_score": f"{score:.1f}%",
                         "severity": map_score(score),
                         "plugin_descr": getattr(m, probe_class)().description,

diff --git a/garak/analyze/templates/digest_group.jinja b/garak/analyze/templates/digest_group.jinja
@@ -1,5 +1,5 @@
 
-<button class="defcon{{severity}} accordion">{{ module }} - {{ module_score }}</button>
+<button class="defcon{{severity}} accordion"><b>{{ module }}</b> - {{ module_score }}</button>
 <div class="panel">
 <p>{{module_doc}}</p>
 {%if module_score != "100.0%"%}

diff --git a/garak/analyze/templates/digest_header.jinja b/garak/analyze/templates/digest_header.jinja
@@ -6,11 +6,11 @@
 <meta charset="UTF-8" />
 <style>
 body {font-family: sans-serif}
-.defcon1 {background-color: #DAB894; text-color: #000}
-.defcon2 {background-color: #FFCAAF; text-color: #fff}
-.defcon3 {background-color: #F1FFC4; text-color: #fff}
-.defcon4 {background-color: #C6E2E9; text-color: #fff}
-.defcon5 {background-color: #A7BED3; text-color: #fff}
+.defcon1 {background-color: #f94144; text-color: #000}
+.defcon2 {background-color: #f8961e; text-color: #000}
+.defcon3 {background-color: #ccc; text-color: #000}
+.defcon4 {background-color: #ddd; text-color: #000}
+.defcon5 {background-color: #f7f7f7; text-color: #000}
 .probe {padding-left: 40pt}
 .detector {padding-left: 65pt}
 h2 {padding-left: 20pt}
@@ -34,7 +34,7 @@ h2,h3,h4 {padding-top: 10px; padding-bottom: 10px}
 
 /* Add a background color to the button if it is clicked on (add the .active class with JS), and when you move the mouse over it (hover) */
 .active, .accordion:hover {
-  background-color: #ccc;
+  background-color: #ccf;
 }
 
 /* Style the accordion panel. Note: hidden by default */
@@ -63,4 +63,8 @@ h2,h3,h4 {padding-top: 10px; padding-bottom: 10px}
 <p>probe spec: {{probespec}}</p>
 </div>
 
-<h2>results</h2>
+{%if model_name %}
+<h2>results: {{model_type}} / {{model_name}}</h2>
+{%else%}
+<h2>results:</h2>
+{%endif%}
diff --git a/garak/resources/misp_descriptions.tsv b/garak/resources/misp_descriptions.tsv
@@ -0,0 +1,58 @@
+owasp:llm01	LLM01: Prompt Injection	Crafty inputs can manipulate a Large Language Model, causing unintended actions. Direct injections overwrite system prompts, while indirect ones manipulate inputs from external sources.
+owasp:llm02	LLM02: Insecure Output Handling	This vulnerability occurs when an LLM output is accepted without scrutiny, exposing backend systems. Misuse may lead to severe consequences like XSS, CSRF, SSRF, privilege escalation, or remote code execution.
+owasp:llm03	LLM03: Training Data Poisoning	This occurs when LLM training data is tampered, introducing vulnerabilities or biases that compromise security, effectiveness, or ethical behavior. Sources include Common Crawl, WebText, OpenWebText, & books.
+owasp:llm04	LLM04: Model Denial of Service	Attackers cause resource-heavy operations on Large Language Models leading to service degradation or high costs. The vulnerability is magnified due to the resource-intensive nature of LLMs and unpredictability of user inputs.
+owasp:llm05	LLM05: Supply Chain Vulnerabilities	LLM application lifecycle can be compromised by vulnerable components or services, leading to security attacks. Using third-party datasets, pre- trained models, and plugins can add vulnerabilities.
+owasp:llm06	LLM06: Sensitive Information Disclosure	LLMs may reveal confidential data in its responses, leading to unauthorized data access, privacy violations, and security breaches. It’s crucial to implement data sanitization and strict user policies to mitigate this.
+owasp:llm07	LLM07: Insecure Plugin Design	LLM plugins can have insecure inputs and insufficient access control. This lack of application control makes them easier to exploit and can result in consequences like remote code execution.
+owasp:llm08	LLM08: Excessive Agency	LLM-based systems may undertake actions leading to unintended consequences. The issue arises from excessive functionality, permissions, or autonomy granted to the LLM-based systems.
+owasp:llm09	LLM09: Overreliance	Systems or people overly depending on LLMs without oversight may face misinformation, miscommunication, legal issues, and security vulnerabilities due to incorrect or inappropriate content generated by LLMs.
+owasp:llm10	LLM10: Model Theft	This involves unauthorized access, copying, or exfiltration of proprietary LLM models. The impact includes economic losses, compromised competitive advantage, and potential access to sensitive information.
+avid-effect:security:S0100	Software Vulnerability	Vulnerability in system around model—a traditional vulnerability
+avid-effect:security:S0200	Supply Chain Compromise	Compromising development components of a ML model, e.g. data, model, hardware, and software stack.
+avid-effect:security:S0201	Model Compromise	Infected model file
+avid-effect:security:S0202	Software compromise	Upstream Dependency Compromise
+avid-effect:security:S0300	Over-permissive API	Unintended information leakage through API
+avid-effect:security:S0301	Information Leak	Cloud Model API leaks more information than it needs to
+avid-effect:security:S0302	Excessive Queries	Cloud Model API isn’t sufficiently rate limited
+avid-effect:security:S0400	Model Bypass	Intentionally try to make a model perform poorly
+avid-effect:security:S0401	Bad Features	The model uses features that are easily gamed by the attacker
+avid-effect:security:S0402	Insufficient Training Data	The bypass is not represented in the training data
+avid-effect:security:S0403	Adversarial Example	Input data points intentionally supplied to draw mispredictions. Potential Cause: Over permissive API
+avid-effect:security:S0500	Exfiltration	Directly or indirectly exfiltrate ML artifacts
+avid-effect:security:S0501	Model inversion	Reconstruct training data through strategic queries
+avid-effect:security:S0502	Model theft	Extract model functionality through strategic queries
+avid-effect:security:S0600	Data poisoning	Usage of poisoned data in the ML pipeline
+avid-effect:security:S0601	Ingest Poisoning	Attackers inject poisoned data into the ingest pipeline
+avid-effect:ethics:E0100	Bias/Discrimination	Concerns of algorithms propagating societal bias
+avid-effect:ethics:E0101	Group fairness	Fairness towards specific groups of people
+avid-effect:ethics:E0102	Individual fairness	Fairness in treating similar individuals
+avid-effect:ethics:E0200	Explainability	Ability to explain decisions made by AI
+avid-effect:ethics:E0201	Global explanations	Explain overall functionality
+avid-effect:ethics:E0202	Local explanations	Explain specific decisions
+avid-effect:ethics:E0300	User actions	Perpetuating/causing/being affected by negative user actions
+avid-effect:ethics:E0301	Toxicity	Users hostile towards other users
+avid-effect:ethics:E0302	Polarization/ Exclusion	User behavior skewed in a significant direction
+avid-effect:ethics:E0400	Misinformation	Perpetuating/causing the spread of falsehoods
+avid-effect:ethics:E0401	Deliberative Misinformation	Generated by individuals., e.g. vaccine disinformation
+avid-effect:ethics:E0402	Generative Misinformation	Generated algorithmically, e.g. Deep Fakes
+avid-effect:performance:P0100	Data issues	Problems arising due to faults in the data pipeline
+avid-effect:performance:P0101	Data drift	Input feature distribution has drifted
+avid-effect:performance:P0102	Concept drift	Output feature/label distribution has drifted
+avid-effect:performance:P0103	Data entanglement	Cases of spurious correlation and proxy features
+avid-effect:performance:P0104	Data quality issues	Missing or low-quality features in data
+avid-effect:performance:P0105	Feedback loops	Unaccounted for effects of an AI affecting future data collection
+avid-effect:performance:P0200	Model issues	Ability for the AI to perform as intended
+avid-effect:performance:P0201	Resilience/stability	Ability for outputs to not be affected by small change in inputs
+avid-effect:performance:P0202	OOD generalization	Test performance doesn’t deteriorate on unseen data in training
+avid-effect:performance:P0203	Scaling	Training and inference can scale to high data volumes
+avid-effect:performance:P0204	Accuracy	Model performance accurately reflects realistic expectations
+avid-effect:performance:P0300	Privacy	Protect leakage of user information as required by rules and regulations
+avid-effect:performance:P0301	Anonymization	Protects through anonymizing user identity
+avid-effect:performance:P0302	Randomization	Protects by injecting noise in data, eg. differential privacy
+avid-effect:performance:P0303	Encryption	Protects through encrypting data accessed
+avid-effect:performance:P0400	Safety	Minimizing maximum downstream harms
+avid-effect:performance:P0401	Psychological Safety	Safety from unwanted digital content, e.g. NSFW
+avid-effect:performance:P0402	Physical safety	Safety from physical actions driven by a AI system
+avid-effect:performance:P0403	Socioeconomic safety	Safety from socioeconomic harms, e.g. harms to job prospects or social status
+avid-effect:performance:P0404	Environmental safety	Safety from environmental harms driven by AI systems