Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
ae5d834
build(python): add bandit dev dependency and project config
robfrank May 5, 2026
f923fe8
feat(python): add internal _logging helper for swallowed exceptions
robfrank May 5, 2026
95f839a
fix(python): log swallowed exception in AsyncExecutor.is_processing
robfrank May 5, 2026
a62c452
fix(python): log swallowed exception in GraphBatch rollback path
robfrank May 5, 2026
94286ea
fix(python): narrow Database.__del__ finalizer exception handling
robfrank May 5, 2026
f133d57
fix(python): narrow shutdown_jvm exception handling to RuntimeError
robfrank May 5, 2026
b1cd6f0
fix(python): log swallowed exceptions in vector-index discovery
robfrank May 5, 2026
f270fdf
fix(python): default server host to localhost and harden finalizer
robfrank May 5, 2026
cde2ae2
fix(python): annotate vector lookup query as parameterized
robfrank May 5, 2026
1b2e8df
docs: add plan for Python bindings Codacy/Bandit cleanup
robfrank May 5, 2026
f0767a2
ci(python): add Bandit security scan job for bindings/python/src
robfrank May 5, 2026
de82682
test(python): centralise TEST_PASSWORD constant in conftest
robfrank May 5, 2026
ec5e8bf
test(python): annotate bench-loop SQL and random as benchmark-only
robfrank May 5, 2026
1952f42
test(python): parameterize vector INSERT/DELETE; annotate distance li…
robfrank May 5, 2026
20e9cd3
test(python): annotate seeded random data generation as nosec B311
robfrank May 5, 2026
171632a
test(python): annotate teardown try/except and subprocess usage
robfrank May 5, 2026
662f1ae
ci(python): require Bandit-clean tests in addition to src
robfrank May 5, 2026
15bacba
examples(python): mark SHA1 short-digest as usedforsecurity=False
robfrank May 5, 2026
82bb3a6
examples(python): enforce https-only URL fetches and annotate XML parse
robfrank May 5, 2026
74e4b7e
examples(python): parameterize timeseries SQL; annotate demo-data SQL
robfrank May 5, 2026
8d2039e
examples(python): annotate remaining bandit findings; tighten CI gate
robfrank May 5, 2026
e658ac5
fix(python): address PR #4084 review feedback
robfrank May 5, 2026
f365c8c
fix(python): revert vector INSERT parameterization (CI test failure)
robfrank May 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions .github/workflows/test-python-bindings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,29 @@ permissions:
contents: read

jobs:
bandit:
name: Bandit security scan (bindings/python)
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: "3.12"

- name: Install Bandit
run: python -m pip install "bandit==1.9.4"

- name: Run Bandit on src and tests (must be clean)
working-directory: bindings/python
run: python -m bandit -c pyproject.toml -r src tests --severity-level low --confidence-level low

- name: Run Bandit on examples (must be clean at medium+/high-confidence)
working-directory: bindings/python
run: python -m bandit -c pyproject.toml -r examples --severity-level medium --confidence-level high

# First job: Download ArcadeDB JARs (platform-agnostic)
download-jars:
name: Download ArcadeDB JARs
Expand Down
20 changes: 15 additions & 5 deletions bindings/python/examples/11_vector_index_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,10 @@ def get_docker_version() -> str | None:


def fetch_json(url: str) -> dict:
if not url.startswith("https://"):
raise ValueError(f"Refusing to open non-HTTPS URL: {url!r}")
req = Request(url, headers={"User-Agent": "arcadedb-bench"})
with urlopen(req, timeout=30) as response:
with urlopen(req, timeout=30) as response: # nosec B310 - https-only
payload = json.load(response)
if not isinstance(payload, dict):
raise RuntimeError(f"Expected JSON object from {url}")
Expand Down Expand Up @@ -870,7 +872,9 @@ def get_qdrant_version(client) -> str | None:


def qdrant_project_name(db_path: Path) -> str:
digest = hashlib.sha1(str(db_path).encode("utf-8")).hexdigest()[:10]
digest = hashlib.sha1(
str(db_path).encode("utf-8"), usedforsecurity=False
).hexdigest()[:10]
return f"arcadb-qdrant-{digest}"


Expand Down Expand Up @@ -960,7 +964,9 @@ def wait_for_qdrant_ready(host: str, port: int, timeout_sec: int = 120) -> None:
while True:
for url in urls:
try:
with urlopen(url, timeout=3) as response:
with urlopen(
url, timeout=3
) as response: # nosec B310 - localhost health-check URL
if 200 <= int(response.status) < 500:
return
except Exception:
Expand Down Expand Up @@ -1010,7 +1016,9 @@ def ensure_milvus_compose_file(compose_file: Path, release_tag: str) -> None:
"https://github.com/milvus-io/milvus/releases/download/"
f"{release_tag}/milvus-standalone-docker-compose.yml"
)
urlretrieve(url, str(compose_file))
urlretrieve(
url, str(compose_file)
) # nosec B310 - url is a hardcoded https://github.com URL
raw = compose_file.read_text(encoding="utf-8")

sanitized = re.sub(r"(?m)^\s*container_name:\s*.*\n", "", raw)
Expand All @@ -1024,7 +1032,9 @@ def ensure_milvus_compose_file(compose_file: Path, release_tag: str) -> None:


def milvus_project_name(db_path: Path) -> str:
digest = hashlib.sha1(str(db_path).encode("utf-8")).hexdigest()[:10]
digest = hashlib.sha1(
str(db_path).encode("utf-8"), usedforsecurity=False
).hexdigest()[:10]
return f"arcadb-milvus-{digest}"


Expand Down
23 changes: 17 additions & 6 deletions bindings/python/examples/12_vector_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,10 @@ def get_docker_version() -> str | None:


def fetch_json(url: str) -> dict:
if not url.startswith("https://"):
raise ValueError(f"Refusing to open non-HTTPS URL: {url!r}")
req = Request(url, headers={"User-Agent": "arcadedb-bench"})
with urlopen(req, timeout=30) as response:
with urlopen(req, timeout=30) as response: # nosec B310 - https-only
payload = json.load(response)
if not isinstance(payload, dict):
raise RuntimeError(f"Expected JSON object from {url}")
Expand Down Expand Up @@ -914,7 +916,9 @@ def get_qdrant_version(client) -> str | None:


def qdrant_project_name(db_path: Path) -> str:
digest = hashlib.sha1(str(db_path).encode("utf-8")).hexdigest()[:10]
digest = hashlib.sha1(
str(db_path).encode("utf-8"), usedforsecurity=False
).hexdigest()[:10]
return f"arcadb-qdrant-{digest}"


Expand Down Expand Up @@ -1004,7 +1008,9 @@ def wait_for_qdrant_ready(host: str, port: int, timeout_sec: int = 120) -> None:
while True:
for url in urls:
try:
with urlopen(url, timeout=3) as response:
with urlopen(
url, timeout=3
) as response: # nosec B310 - localhost health-check URL
if 200 <= int(response.status) < 500:
return
except Exception:
Expand Down Expand Up @@ -1120,7 +1126,8 @@ def run_repeated_search(
**run_stats,
"run": run_idx + 1,
"query_order_hash": hashlib.sha1(
",".join(str(v) for v in run_qids).encode("utf-8")
",".join(str(v) for v in run_qids).encode("utf-8"),
usedforsecurity=False,
).hexdigest(),
}
per_run_stats.append(run_stats)
Expand Down Expand Up @@ -1240,7 +1247,9 @@ def ensure_milvus_compose_file(compose_file: Path, release_tag: str) -> None:
"https://github.com/milvus-io/milvus/releases/download/"
f"{release_tag}/milvus-standalone-docker-compose.yml"
)
urlretrieve(url, str(compose_file))
urlretrieve(
url, str(compose_file)
) # nosec B310 - url is a hardcoded https://github.com URL
raw = compose_file.read_text(encoding="utf-8")

sanitized = re.sub(r"(?m)^\s*version\s*:\s*.*\n", "", raw)
Expand All @@ -1255,7 +1264,9 @@ def ensure_milvus_compose_file(compose_file: Path, release_tag: str) -> None:


def milvus_project_name(db_path: Path) -> str:
digest = hashlib.sha1(str(db_path).encode("utf-8")).hexdigest()[:10]
digest = hashlib.sha1(
str(db_path).encode("utf-8"), usedforsecurity=False
).hexdigest()[:10]
return f"arcadb-milvus-{digest}"


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,10 @@ def edge_endpoints(edge_id: int, vertex_count: int) -> Tuple[int, int]:


def build_rid_lookup_for_vertex_type(db, vertex_type: str) -> Dict[int, str]:
rows = db.query("sql", f"SELECT Id, @rid as rid FROM {vertex_type}").to_list()
rows = db.query(
"sql",
f"SELECT Id, @rid as rid FROM {vertex_type}", # nosec B608 - vertex_type is a script constant
).to_list()
rid_lookup: Dict[int, str] = {}
for row in rows:
row_id = row.get("Id")
Expand Down Expand Up @@ -165,7 +168,12 @@ def collect_vertex_sample(
db, vertex_type: str, vertex_id: int, props: List[ColumnDef]
) -> dict:
row = query_one_or_none(
db.query("sql", f"SELECT FROM {vertex_type} WHERE Id = {vertex_id}")
db.query(
"sql",
# vertex_type is a constant from this script; vertex_id is bound as parameter.
f"SELECT FROM {vertex_type} WHERE Id = ?", # nosec B608
vertex_id,
)
)
if row is None:
return {"Id": vertex_id, "missing": True}
Expand Down Expand Up @@ -246,7 +254,8 @@ def collect_graph_signature(
vertex_aggregate = query_one_or_none(
db.query(
"sql",
f"SELECT {', '.join(vertex_aggregate_fields)} FROM {vertex_type}",
# vertex_aggregate_fields and vertex_type are script-local constants.
f"SELECT {', '.join(vertex_aggregate_fields)} FROM {vertex_type}", # nosec B608
)
)
edge_aggregate = query_one_or_none(
Expand Down
9 changes: 6 additions & 3 deletions bindings/python/examples/17_timeseries_end_to_end.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,10 +338,13 @@ def main() -> int:
for row in db.query(
"sql",
"SELECT FROM SensorReading "
f"WHERE ts BETWEEN {raw_window_start} AND {raw_window_end} "
f"AND sensor_id = '{focus_sensor.sensor_id}' "
f"AND building = '{focus_sensor.building}' "
"WHERE ts BETWEEN ? AND ? "
"AND sensor_id = ? AND building = ? "
"ORDER BY ts",
raw_window_start,
raw_window_end,
focus_sensor.sensor_id,
focus_sensor.building,
)
]
print_rows(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,8 @@ def insert_seed_data(db) -> None:
for route in ROUTES:
db.command(
"sql",
f"CREATE EDGE {route['edge_type']} "
# route['edge_type'] is a constant from the demo schema.
f"CREATE EDGE {route['edge_type']} " # nosec B608
"FROM (SELECT FROM City WHERE code = ? LIMIT 1) "
"TO (SELECT FROM City WHERE code = ? LIMIT 1) "
"SET distance = ?, duration = ?, risk = ?, lane = ?",
Expand Down Expand Up @@ -755,7 +756,10 @@ def run_reopen_phase(db_path: Path) -> None:
.get("count")
)
route_count = sum(
reopened_db.query("sql", f"SELECT count(*) AS count FROM {edge_type}")
reopened_db.query(
"sql",
f"SELECT count(*) AS count FROM {edge_type}", # nosec B608 - edge_type is a script constant
)
.first()
.get("count")
for edge_type in ("Road", "Rail", "Ferry")
Expand Down
8 changes: 7 additions & 1 deletion bindings/python/examples/21_server_mode_http_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,14 @@ def http_json_request(
request_headers["Content-Type"] = "application/json"

request = Request(url, data=data, headers=request_headers, method=method)
if not request.full_url.startswith(
("http://localhost", "http://127.0.0.1", "https://")
):
raise ValueError(f"Refusing to call unexpected URL: {request.full_url!r}")
try:
with urlopen(request, timeout=timeout) as response:
with urlopen(
request, timeout=timeout
) as response: # nosec B310 - localhost or https
body = response.read().decode("utf-8")
except HTTPError as exc:
detail = exc.read().decode("utf-8", errors="replace")
Expand Down
7 changes: 4 additions & 3 deletions bindings/python/examples/22_graph_analytical_view_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,7 @@ def query_direct_neighbor_sample(


def query_two_hop_summary(db, origin_code: str) -> dict:
# origin_code is a script-local constant from the demo dataset.
result = db.query(
"sql",
f"""
Expand All @@ -550,7 +551,7 @@ def query_two_hop_summary(db, origin_code: str) -> dict:
{{type: City, as: dst}}
RETURN DISTINCT dst.code AS code
)
""",
""", # nosec B608 - demo-data constants only
)
row = result.first()
require(row is not None, "Expected a two-hop summary row")
Expand All @@ -567,7 +568,7 @@ def query_hub_inbound_count(db, hub_code: str) -> int:
{{type: City, as: hub, where: (code = '{hub_code}')}}
RETURN src.code AS code
)
""",
""", # nosec B608 - demo-data constants only
)
row = result.first()
require(row is not None, "Expected an inbound count row")
Expand All @@ -583,7 +584,7 @@ def query_region_sample(db, sample_limit: int) -> list[dict]:
GROUP BY region
ORDER BY region
LIMIT {sample_limit}
""",
""", # nosec B608 - sample_limit is a script integer constant
)
return rows_to_dicts(result, ["region", "city_count", "avg_demand"])

Expand Down
46 changes: 37 additions & 9 deletions bindings/python/examples/download_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,18 @@
tqdm = None


def _require_https(url: str) -> str:
"""Reject non-HTTPS URLs before opening them.

Check notice on line 117 in bindings/python/examples/download_data.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

bindings/python/examples/download_data.py#L117

Multi-line docstring summary should start at the second line (D213)

Bandit B310 flags urlopen() because it permits file:// and custom schemes.
Examples download from a fixed list of HTTPS dataset URLs, so we enforce
that contract explicitly here.
"""
if not url.startswith("https://"):
raise ValueError(f"Refusing to open non-HTTPS URL: {url!r}")
return url


def ensure_clean_dir(path: Path, label: str) -> None:
if path.exists():
print(f"[CLEAN] Removing existing {label} directory: {path}")
Expand Down Expand Up @@ -188,8 +200,10 @@
f"[DOWNLOAD] Resuming {destination.name} from {_format_bytes(resume_from)}"
)

request = urllib.request.Request(url, headers=headers)
with urllib.request.urlopen(request, timeout=60) as response:
request = urllib.request.Request(_require_https(url), headers=headers)
with urllib.request.urlopen(
request, timeout=60
) as response: # nosec B310 - https-only
status = getattr(response, "status", response.getcode())

if resume_from > 0 and status != 206:
Expand Down Expand Up @@ -649,7 +663,9 @@
end="",
)

urllib.request.urlretrieve(url, zip_path, reporthook=report_progress)
urllib.request.urlretrieve(
_require_https(url), zip_path, reporthook=report_progress
) # nosec B310 - https-only
print() # New line after progress
download_elapsed = time.time() - download_start
print(f"[OK] Downloaded to: {zip_path} " f"({download_elapsed:.2f}s)")
Expand Down Expand Up @@ -1085,9 +1101,11 @@


def _iter_stackoverflow_rows(xml_path: Path, fields: list[str]):
import xml.etree.ElementTree as ET
import xml.etree.ElementTree as ET # nosec B405 - parsing files we just downloaded over HTTPS and verified

context = ET.iterparse(xml_path, events=("start", "end"))
context = ET.iterparse(
xml_path, events=("start", "end")
) # nosec B314 - input is a downloaded, checksum-verified file
_, root = next(context)
for event, elem in context:
if event == "end" and elem.tag == "row":
Expand Down Expand Up @@ -1618,7 +1636,9 @@
end="",
)

urllib.request.urlretrieve(url, dbgen_zip, reporthook=report_progress)
urllib.request.urlretrieve(
_require_https(url), dbgen_zip, reporthook=report_progress
) # nosec B310 - https-only
print()

extract_dir = data_dir / "tpch-dbgen-extract"
Expand Down Expand Up @@ -1701,7 +1721,13 @@
"main/params-csv-merge-foreign.ini"
)
print("[DOWNLOAD] LDBC SNB params template")
template = urllib.request.urlopen(template_url).read().decode("utf-8")
template = (
urllib.request.urlopen( # nosec B310 - https-only
_require_https(template_url)
)
.read()
.decode("utf-8")
)
lines = []
inserted = False
for line in template.splitlines():
Expand Down Expand Up @@ -2139,7 +2165,7 @@
Returns:
dict: Verification results
"""
import xml.etree.ElementTree as ET
import xml.etree.ElementTree as ET # nosec B405 - parsing files we just downloaded over HTTPS and verified

verification_start = time.time()
results = {}
Expand All @@ -2163,7 +2189,9 @@
file_start = time.time()

# Parse XML iteratively for large files
context = ET.iterparse(xml_path, events=("start", "end"))
context = ET.iterparse(
xml_path, events=("start", "end")
) # nosec B314 - input is a downloaded, checksum-verified file
_, root = next(context) # Get root element

all_attrs = set()
Expand Down
Loading
Loading