Align single threaded with multithreaded / Update specs (#32)

heyitsaamir · Sep 18, 2024 · 67227d5 · 67227d5
1 parent c4e4404
commit 67227d5
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 39 deletions.
diff --git a/src/specs.json b/src/specs.json
@@ -8,51 +8,44 @@
     },
     {
         "id": 2,
-        "spec": "All labels should be numbered with sequence indicators",
-        "instructions_to_solve": "All labels must have a sequence indicator at the beginning. These could be numbers like 1, 2, 3, 4. Numbers could also include letters like 1a, 1b, 1c, 2a, 2b, 2c, etc.",
-        "improvement_hints": "Indicate which labels do not have sequential indicators",
+        "spec": "All labels must be in sequential order.",
+        "instructions_to_solve": "At the start of each label, there should be a number, perhaps plus a letter (examples: 1, 2c, 5., etc). These labels must be in sequence.\nHere are some valid examples:\n1, 2, 3, 4 - All numbers are present\n1, 1a, 1b, 2a, 2b, 3 - Numbers and subsequences all exist and are in the correct order\nHere are some invalid examples:\n1, 2, 4 - here 3 is missing\n1, 2, 2b, 3 - Here the sequence 2 has subsequences (2b), but not all sequence indicators for 2 have subsequences (i.e. it's missing 2a)\n1, 2b, 3 - here 2a is missing\n1, 2, 3, 3, 4 - There are duplicate 3s\n1, 2a, 2c, 3 - 2b is missing.",
+        "improvement_hints": "Indicate which labels do not have sequential indicators, where the sequence is not in order or where a number is duplicated. When there is a duplication only indicate the number that is duplicated.",
         "hints_to_send": ["Node", "Data Flow"]
     },
     {
         "id": 3,
-        "spec": "All labels have sequences that make logical sense",
-        "instructions_to_solve": "The labels themselves may not be in sequential order, but the numbers in the sequence should indicate some sort of order. \nHere are some valid examples:\n1, 2, 3, 4 - All numbers are present\n1, 1a, 1b, 2a, 2b, 3 - Numbers and subsequences all exist and are in the correct order\nHere are some invalid examples:\n1, 2, 4 - here 3 is missing\n1, 2, 2b, 3 - Here the sequence 2 has subsequences (2b), but not all sequence indicators for 2 have subsequences (i.e. it's missing 2a)\n1, 2b, 3 - here 2a is missing\n1, 2, 3, 3, 4 - There are duplicate 3s\n1, 2a, 2c, 3 - 2b is missing.",
-        "improvement_hints": "Indicate which labels do not have sequential indicators",
-        "hints_to_send": ["Node", "Data Flow"]
-    },
-    {
-        "id": 4,
         "spec": "All nodes and labels should be tagged with [NEW] or [EXISTING]",
-        "instructions_to_solve": "All nodes and labels should be tagged with [NEW] or [EXISTING] to denote which part of the DFD is to be reviewed.",
+        "instructions_to_solve": "All nodes and labels should be tagged with [NEW] or [EXISTING] to denote which part of the DFD is to be reviewed. A node or label is defines as being tagged with [NEW] or [EXISTING] if it has [NEW] or [EXISTING] in the label. Do not use the term [NEW] or [EXISTING] if [NEW] or [EXISTING] is present in the node name or label name.",
         "improvement_hints": "Indicate which nodes or labels do not have [NEW] or [EXISTING]",
         "hints_to_send": ["Node", "Data Flow"]
     },
     {
-        "id": 5,
+        "id": 4,
         "spec": "Each node should have a request and response flow",
         "instructions_to_solve": "Validate a request and response for each node. If in the list of nodes with labels between them for two nodes both hasNode2ToNode1Curve and hasNode1ToNode2Curve are not True, say that there aren't curves in both directions between these nodes.\nDo not use strings like hasNode2ToNode1Curve in the response.",
         "improvement_hints": "Indicate which nodes do not have both a request and response flow",
         "hints_to_send": ["Node", "Data Flow"]
     },
     {
-        "id": 6,
+        "id": 5,
         "spec": "Storage nodes should have retention tags",
         "instructions_to_solve": "Each storage node can have a tag like 30D that represents its retention. If there are no storage nodes, then this criteria does not apply and the threat model passes this criteria. If storage nodes exist and do not have this tag issue a warning but this should not be a validation failure. If there is a tag that appears like it's a duration it should be in compact duration format. Only for [NEW] nodes",
         "improvement_hints": "Indicate which storage nodes do not have retention tags",
         "hints_to_send": ["Node", "Data Flow"]
     },
     {
-        "id": 7,
-        "spec": "Each label tag the type of data it passes",
+        "id": 6,
+        "spec": "Each label should tag the type of data it passes",
         "instructions_to_solve": "Each label should have a string representing the type of data it passes. Therefore it should include one of the following: AC, CC, EUII, OII, SM PND, EUPI, SD, FB, AD PPD MSD.",
         "improvement_hints": "Indicate which labels do not tag the type of data it passes",
         "hints_to_send": ["Node", "Data Flow"]
     },
     {
-        "id": 8,
+        "id": 7,
         "spec": "No JSON in labels",
         "instructions_to_solve": "There should not be any JSON in any of the labels. It can contain text, but not formatted as JSON.",
-        "improvement_hints": "Indicate which labels has JSON in the content.",
+        "improvement_hints": "Indicate which labels has JSON in the content, but do not add the JSON itself in the response.",
         "hints_to_send": ["Node", "Data Flow"]
     }
 ]
diff --git a/src/xml_threat_model_reviewer.py b/src/xml_threat_model_reviewer.py
@@ -5,7 +5,7 @@
 from autogen.agentchat.contrib.capabilities.agent_capability import AgentCapability
 from autogen.agentchat.contrib.img_utils import pil_to_data_uri
 
-from Spec import load_specs_from_json
+from Spec import Spec, load_specs_from_json
 from svg_to_png.svg_to_png import load_threat_model
 from asyncio import ensure_future
 
@@ -108,29 +108,18 @@ async def _say_when_evaluating(self, img: Image.Image):
                     )
 
 
-folder = os.path.dirname(os.path.abspath(__file__))
-specs = load_specs_from_json(f"{folder}/specs.json")
-
-
 def setup_xml_threat_model_reviewer(
     llm_config,
     context: ChatContext,
     state: ConversationState,
-    threat_model_spec: str = """
-1. All nodes should be inside a boundary. Are there any nodes not in a boundary? To determine if a node is within a boundary in the node data for a node, has_boundary should be true. Do not tell the user of the has_boundary flag, however, just whether a node is not in a boundary.
-2. All labels should be numbered with sequential numbers. The labels themselves may not be in sequential order, but all numbers in the sequence must be there. For example, if you
-the labels are first "1. FlowA" and second "3. FlowB" and third, "2. FlowC", this is valid, because all numbers between 1 and 3 are there, but if it were "1. FlowA" and second 
-"4. FlowB" and third, "2. FlowC" then this would be invalid, because 3 is missing.
-3. All nodes and labels should be tagged with [NEW] or [EXISTING] to denote which part of the DFD is to be reviewed.
-4. Validate a request and response for each node and that there is a label. If in the list of nodes with labels between them for two nodes either hasNode2ToNode1Curve or hasNode1ToNode2Curve are not true, say that there aren't curves in both directions between these nodes. Do not use strings like hasNode2ToNode1Curve in the response.
-5. Each storage node can have a tag like 30D that represents its retention. If no storage nodes have this tag issue a warning but this should not be a validation failure. If there is a tag that appears like it's a duration it should be in compact duration format. Only for [NEW] nodes
-6. Each label should have a string representing the type of data it passes. Therefore it should include one of the following: AC, CC, EUII, OII, SM PND, EUPI, SD, FB, AD PPD MSD.
-7. There should not be any JSON in any of the labels. Only tags should be in the labels.
-    """,
 ):
-    # threat_model_spec = ''
-    # for spec in specs:
-    #     threat_model_spec += f"#{spec.id}. {spec.spec}\n{spec.instructions_to_solve}\n\n"
+    folder = os.path.dirname(os.path.abspath(__file__))
+    specs = load_specs_from_json(f"{folder}/specs.json")
+    threat_model_spec = [
+        f"{count}. **{spec.spec}**: {spec.instructions_to_solve} {spec.improvement_hints}"
+        for count, spec in enumerate(specs, start=1)
+    ]
+    threat_model_spec = "\n".join(threat_model_spec)
 
     assistant = AssistantAgent(
         name="Threat_Model_Evaluator",
@@ -144,11 +133,19 @@ def setup_xml_threat_model_reviewer(
             Please group the responses in three groups:
             1. **Needs to be addressed** for validation failures
             2. **Green** for items that are done correctly
-            3. **Warnings** for items that are not incorrect but are warnings
-                                                    
-            For any node that has newline characters like \n or \r please filter out these characters in your response. Also, filter out any JSON.""",
+            3. **Warnings** for items that are not incorrect but are warnings""",
         llm_config={"config_list": [llm_config], "timeout": 60, "temperature": 0},
     )
+    def modify_message(reply):
+        if isinstance(reply, str):
+            reply = reply.replace("\\n", "").replace("\\r", "")
+        return reply
+
+    def assistant_hook(sender, message, recipient, silent):
+        modified = modify_message(message)
+        return modified
+
+    assistant.register_hook("process_message_before_send", assistant_hook)
 
     capability = XMLThreatModelImageAddToMessageCapability(
         context, True, state=state, max_width=400