mal-lang · andrewbwm · Feb 26, 2025 · Dec 20, 2024 · Dec 20, 2024 · Dec 20, 2024
diff --git a/README.md b/README.md
@@ -35,12 +35,6 @@ they are a setup for running a simulation. This is how the format looks like:
 lang_file: <path to .mar-archive>
 model_file: <path to json/yml model>
 
-attacker_agent_class: 'BreadthFirstAttacker' | 'DepthFirstAttacker' | 'KeyboardAgent'
-
-# For defender_agent_class, null and False are treated the same - no defender will be used in the simulation
-defender_agent_class: 'BreadthFirstAttacker' | 'DepthFirstAttacker' | 'KeyboardAgent' | null | False
-
-
 # Optionally add rewards for each attack step
 rewards:
   <full name of attack step>: <reward>
@@ -50,17 +44,19 @@ rewards:
   # Data A:read: 100
   ...
 
+# Add entry points to AttackGraph with attacker names
+# and attack step full_names
+agents:
+  'Attacker1':
+    type: 'attacker'
+    agent_class: BreadthFirstAttacker | DepthFirstAttacker | KeyboardAgent | null
+    entry_points:
+    - 'Credentials:6:attemptCredentialsReuse'
 
-# Optionally add entry points to AttackGraph with attacker name and attack step full_names.
-# NOTE: If attacker entry points defined in both model and scenario,
-#       the scenario overrides the ones in the model.
-attacker_entry_points:
-  <attacker name>:
-    - <attack step full name>
+  'Defender1':
+    type: 'defender'
+    agent_class: BreadthFirstDefender | DepthFirstDefender | KeyboardAgent | null
 
-  # example:
-  # 'Attacker1':
-  #   - 'Credentials:6:attemptCredentialsReuse'
 
 # Optionally add observability rules that are applied to AttackGrapNodes
 # to make only certain steps observable

diff --git a/malsim/agents/__init__.py b/malsim/agents/__init__.py
@@ -0,0 +1,11 @@
+from .decision_agent import PassiveAgent, DecisionAgent
+from .keyboard_input import KeyboardAgent
+from .searchers import BreadthFirstAttacker, DepthFirstAttacker
+
+__all__ = [
+    'PassiveAgent',
+    'DecisionAgent',
+    'KeyboardAgent',
+    'BreadthFirstAttacker',
+    'DepthFirstAttacker'
+]
diff --git a/malsim/agents/decision_agent.py b/malsim/agents/decision_agent.py
@@ -0,0 +1,39 @@
+"""A decision agent is a heuristic agent"""
+
+from __future__ import annotations
+from typing import TYPE_CHECKING, Optional
+from abc import ABC, abstractmethod
+
+if TYPE_CHECKING:
+    from ..sims import MalSimAgentStateView
+    from maltoolbox.attackgraph import AttackGraphNode
+
+class DecisionAgent(ABC):
+
+    @abstractmethod
+    def get_next_action(
+        self,
+        agent: MalSimAgentStateView,
+        **kwargs
+    ) -> Optional[AttackGraphNode]:
+        """
+        Select next action the agent will work with.
+
+        Attributes:
+            agent: Current state of and other info about the agent from the simulator
+
+        Returns:
+            The selected action or None if there are no actions to select from.
+        """
+        ...
+
+class PassiveAgent(DecisionAgent):
+    def __init__(self, *args, **kwargs):
+        ...
+
+    def get_next_action(
+        self,
+        agent: MalSimAgentStateView,
+        **kwargs
+    ) -> Optional[AttackGraphNode]:
+        ...
diff --git a/malsim/agents/keyboard_input.py b/malsim/agents/keyboard_input.py
@@ -1,20 +1,29 @@
-import numpy as np
+from __future__ import annotations
 import logging
+from typing import TYPE_CHECKING, Optional
 
-AGENT_ATTACKER = "attacker"
-AGENT_DEFENDER = "defender"
+from .decision_agent import DecisionAgent
+from ..sims import MalSimAgentStateView
+
+if TYPE_CHECKING:
+    from maltoolbox.attackgraph import AttackGraphNode
 
 logger = logging.getLogger(__name__)
 
-null_action = (0, None)
+class KeyboardAgent(DecisionAgent):
+    """An agent that makes decisions by asking user for keyboard input"""
 
+    def __init__(self, _, **kwargs):
+        super().__init__(**kwargs)
+        logger.info("Creating KeyboardAgent")
 
-class KeyboardAgent:
-    def __init__(self, vocab):
-        logger.debug("Create Keyboard agent.")
-        self.vocab = vocab
+    def get_next_action(
+            self,
+            agent: MalSimAgentStateView,
+            **kwargs
+        ) -> Optional[AttackGraphNode]:
+        """Compute action from action_surface"""
 
-    def compute_action_from_dict(self, obs: dict, mask: tuple) -> tuple:
         def valid_action(user_input: str) -> bool:
             if user_input == "":
                 return True
@@ -24,40 +33,35 @@ def valid_action(user_input: str) -> bool:
             except ValueError:
                 return False
 
-            try:
-                a = associated_action[action_strings[node]]
-            except IndexError:
-                return False
-
-            if a == 0:
-                return True  # wait is always valid
-            return node < len(available_actions) and node >= 0
+            return 0 <= node <= len(agent.action_surface)
 
         def get_action_object(user_input: str) -> tuple:
             node = int(user_input) if user_input != "" else None
-            action = associated_action[action_strings[node]] if user_input != "" else 0
-            return node, action
-
-        available_actions = np.flatnonzero(mask[1])
+            return node
 
-        action_strings = [self.vocab[i] for i in available_actions]
-        associated_action = {i: 1 for i in action_strings}
-        action_strings += ["wait"]
-        associated_action["wait"] = 0
+        if not agent.action_surface:
+            print("No actions to pick for defender")
+            return []
 
+        index_to_node = dict(enumerate(agent.action_surface))
         user_input = "xxx"
         while not valid_action(user_input):
             print("Available actions:")
-            print("\n".join([f"{i}. {a}" for i, a in enumerate(action_strings)]))
+            print(
+                "\n".join(
+                    [f"{i}. {n.full_name}" for i, n in index_to_node.items()]
+                )
+            )
             print("Enter action or leave empty to wait:")
             user_input = input("> ")
 
             if not valid_action(user_input):
                 print("Invalid action.")
 
-        node, a = get_action_object(user_input)
+        index = get_action_object(user_input)
         print(
-            f"Selected action: {action_strings[node] if node is not None else 'wait'}"
+            f"Selected action: {index_to_node[index].full_name}"
+            if index is not None else 'wait'
         )
 
-        return (a, available_actions[node] if a != 0 else -1)
+        return index_to_node[index] if index is not None else None
diff --git a/malsim/agents/searchers.py b/malsim/agents/searchers.py
@@ -1,137 +1,98 @@
+from __future__ import annotations
 import logging
+import re
 
 from collections import deque
-from typing import Any, Deque, Dict, List, Set, Union
+from typing import Optional, TYPE_CHECKING
 
 import numpy as np
 
-logger = logging.getLogger(__name__)
-
+from .decision_agent import DecisionAgent
+from ..sims import MalSimAgentStateView
 
-def get_new_targets(
-    observation: dict, discovered_targets: Set[int], mask: tuple
-) -> List[int]:
-    attack_surface = mask[1]
-    surface_indexes = list(np.flatnonzero(attack_surface))
-    new_targets = [idx for idx in surface_indexes if idx not in discovered_targets]
-    return new_targets, surface_indexes
+if TYPE_CHECKING:
+    from maltoolbox.attackgraph import AttackGraphNode
 
+logger = logging.getLogger(__name__)
 
-class PassiveAttacker:
-    def compute_action_from_dict(self, observation, mask):
-        return (0, None)
 
-class BreadthFirstAttacker:
-    def __init__(self, agent_config: dict) -> None:
-        self.targets: Deque[int] = deque([])
-        self.current_target: int = None
-        seed = (
-            agent_config["seed"]
-            if agent_config.get("seed", None)
-            else np.random.SeedSequence().entropy
-        )
-        self.rng = (
-            np.random.default_rng(seed)
-            if agent_config.get("randomize", False)
-            else None
-        )
+class BreadthFirstAttacker(DecisionAgent):
+    """A Breadth-First agent, with possible randomization at each level."""
 
-    def compute_action_from_dict(self, observation: Dict[str, Any], mask: tuple):
-        new_targets, surface_indexes = get_new_targets(observation, self.targets, mask)
+    _extend_method = "extendleft"
+    # Controls where newly discovered steps will be appended to the list of
+    # available actions. Currently used to differentiate between BFS and DFS
+    # agents.
 
-        # Add new targets to the back of the queue
-        # if desired, shuffle the new targets to make the attacker more unpredictable
-        if self.rng:
-            self.rng.shuffle(new_targets)
-        for c in new_targets:
-            self.targets.appendleft(c)
+    name = ' '.join(re.findall(r'[A-Z][^A-Z]*', __qualname__))
+    # A human-friendly name for the agent.
 
-        self.current_target, done = self.select_next_target(
-            self.current_target, self.targets, surface_indexes
-        )
+    default_settings = {
+        'randomize': False,
+        # Whether to randomize next target selection, still respecting the
+        # policy of the agent (e.g. BFS or DFS).
+        'seed': None,
+        # The random seed to initialize the randomness engine with.
+    }
 
-        self.current_target = None if done else self.current_target
-        action = 0 if done else 1
-        if action == 0:
-            logger.debug(
-                "Attacker Breadth First agent does not have "
-                "any valid targets it will terminate"
-            )
+    def __init__(self, agent_config: dict) -> None:
+        """Initialize a BFS agent.
 
-        return (action, self.current_target)
+        Args:
+            agent_config: Dict with settings to override defaults
+        """
+        self.targets: deque[AttackGraphNode] = deque()
+        self.current_target: Optional[AttackGraphNode] = None
 
-    @staticmethod
-    def select_next_target(
-        current_target: int,
-        targets: Union[List[int], Deque[int]],
-        attack_surface: Set[int],
-    ) -> int:
-        # If the current target was not compromised, put it
-        # back, but on the bottom of the stack.
-        if current_target in attack_surface:
-            targets.appendleft(current_target)
-            current_target = targets.pop()
+        self.settings = self.default_settings | agent_config
 
-        while current_target not in attack_surface:
-            if len(targets) == 0:
-                return None, True
+        self.rng = np.random.default_rng(
+            self.settings['seed'] or np.random.SeedSequence()
+        )
 
-            current_target = targets.pop()
+    def get_next_action(
+        self, agent: MalSimAgentStateView, **kwargs
+    ) -> Optional[AttackGraphNode]:
+        self._update_targets(agent.action_surface)
+        self._select_next_target()
 
-        return current_target, False
+        return self.current_target
 
+    def _update_targets(self, action_surface: list[AttackGraphNode]):
 
-class DepthFirstAttacker:
-    def __init__(self, agent_config: dict) -> None:
-        self.current_target = -1
-        self.targets: List[int] = []
-        seed = (
-            agent_config["seed"]
-            if agent_config.get("seed", None)
-            else np.random.SeedSequence().entropy
-        )
-        self.rng = (
-            np.random.default_rng(seed)
-            if agent_config.get("randomize", False)
-            else None
-        )
+        # action surface does not have a guaranteed order,
+        # so for the agent to be deterministic we need to sort
+        action_surface.sort(key=lambda n: n.id)
 
-    def compute_action_from_dict(self, observation: Dict[str, Any], mask: tuple):
-        new_targets, surface_indexes = get_new_targets(observation, self.targets, mask)
+        new_targets = [
+            step
+            for step in action_surface
+            if step not in self.targets and not step.is_compromised()
+        ]
 
-        # Add new targets to the top of the stack
-        if self.rng:
+        if self.settings['randomize']:
             self.rng.shuffle(new_targets)
-        for c in new_targets:
-            self.targets.append(c)
-
-        self.current_target, done = self.select_next_target(
-            self.current_target, self.targets, surface_indexes
-        )
-
-        self.current_target = None if done else self.current_target
-        action = 0 if done else 1
-        return (action, self.current_target)
 
-    @staticmethod
-    def select_next_target(
-        current_target: int,
-        targets: Union[List[int], Deque[int]],
-        attack_surface: Set[int],
-    ) -> int:
-        if current_target in attack_surface:
-            return current_target, False
+        if self.current_target in new_targets:
+            # If self.current_target is not yet compromised, e.g. due to TTCs,
+            # keep using that as the target.
+            new_targets.remove(self.current_target)
+            new_targets.append(self.current_target)
 
-        while current_target not in attack_surface:
-            if len(targets) == 0:
-                return None, True
+        # Enabled defenses may remove previously possible attack steps.
+        self.targets = deque(filter(lambda n: n.is_viable, self.targets))
 
-            current_target = targets.pop()
+        getattr(self.targets, self._extend_method)(new_targets)
 
-        return current_target, False
+    def _select_next_target(self) -> None:
+        """
+        Implement the actual next target selection logic.
+        """
+        try:
+            self.current_target = self.targets.pop()
+        except IndexError:
+            self.current_target = None
 
 
-AGENTS = {
-    BreadthFirstAttacker.__name__: BreadthFirstAttacker,
-    DepthFirstAttacker.__name__: DepthFirstAttacker,
-}
+class DepthFirstAttacker(BreadthFirstAttacker):
+    _extend_method = "extend"