Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 60 additions & 1 deletion ledger/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from __future__ import annotations

import logging
import re
from typing import Any

from surrealdb import AsyncSurreal, RecordID
Expand All @@ -19,6 +20,61 @@
logger = logging.getLogger(__name__)


# Windows-drive-letter detector at the start of an embedded URL path.
# Matches "C:\..." or "C:/...". Used to spot URLs that contain a
# Windows-style file path which needs slash-normalization before
# urllib.parse can read them.
_WINDOWS_DRIVE_AT_PATH_START = re.compile(r"^([A-Za-z]):[\\/]")


def normalize_surrealkv_url(url: str) -> str:
"""Normalize ``surrealkv://`` URLs containing Windows file paths.

Issue #68: ``urllib.parse.urlparse("surrealkv://C:\\Users\\...")``
treats everything after the scheme as a netloc and raises
``ValueError: Port could not be cast to integer value`` on
``parsed.port``. The SurrealDB Python SDK reads ``parsed.port``
in its ``Url`` wrapper, so passing an unmodified Windows backslash
path crashes every embedded test that builds its URL from a
``tmp_path`` fixture.

Fix: replace backslashes with forward slashes inside the path.

surrealkv://C:\\Users\\foo\\bar.db → surrealkv://C:/Users/foo/bar.db

The forward-slash form parses cleanly through ``urllib.parse``
(netloc=``C:``, path=``/Users/foo/bar.db``, port=None — the path
after the colon doesn't look like an int, but ``urlparse`` only
raises when the port-position content is non-empty AND non-numeric;
here the colon is immediately followed by ``/`` so the port-position
is empty and parsing succeeds). The SurrealKV Rust backend accepts
this form on Windows.

POSIX URLs, in-memory URLs (``memory://``), and remote URLs
(``ws://``, ``http://``) pass through unchanged because they
contain no backslashes.
"""
if not url.startswith(("surrealkv://", "surrealkv+versioned://", "file://")):
return url

# Find the path portion (everything after scheme://)
scheme_end = url.find("://") + len("://")
after_scheme = url[scheme_end:]

# Only rewrite if the URL contains a Windows-style backslash or a
# bare drive-letter prefix that would confuse urllib. Pure POSIX
# paths and already-normalized Windows paths pass through unchanged.
if "\\" not in after_scheme:
return url

if not _WINDOWS_DRIVE_AT_PATH_START.match(after_scheme):
# Has backslashes but no drive letter — likely a malformed URL,
# but we fix the slashes anyway to give urllib a fighting chance.
return url[:scheme_end] + after_scheme.replace("\\", "/")

return url[:scheme_end] + after_scheme.replace("\\", "/")


class LedgerError(RuntimeError):
"""Raised when SurrealDB rejects a statement at the application layer.

Expand Down Expand Up @@ -65,7 +121,10 @@ def __init__(
username: str = "root",
password: str = "root",
) -> None:
self.url = url
# Normalize embedded Windows paths so the SurrealDB SDK's internal
# urllib.parse.urlparse() doesn't choke on the drive-letter colon.
# See ``normalize_surrealkv_url`` and issue #68.
self.url = normalize_surrealkv_url(url)
self.ns = ns
self.db = db
self._username = username
Expand Down
117 changes: 117 additions & 0 deletions tests/test_surrealkv_url_normalization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""Regression tests for issue #68 — surrealkv:// URL normalization for Windows paths.

Issue #68: ``urllib.parse.urlparse("surrealkv://C:\\Users\\...")`` treats
the drive letter as a netloc with a port and raises:

ValueError: Port could not be cast to integer value as 'C'

The SurrealDB Python SDK calls ``urlparse`` internally on connect, so
passing an unmodified Windows path crashes every embedded test that
constructs its URL from a ``tmp_path`` fixture (e.g. all 5 tests in
``tests/test_schema_persistence.py``).

``LedgerClient.__init__`` now calls ``normalize_surrealkv_url`` to
replace backslashes with forward slashes inside the path, which urllib
parses cleanly AND which the SurrealKV Rust backend accepts:

surrealkv://C:\\Users\\foo\\bar.db → surrealkv://C:/Users/foo/bar.db
"""

from __future__ import annotations

from urllib.parse import urlparse

import pytest

from ledger.client import LedgerClient, normalize_surrealkv_url


class TestNormalizeSurrealKVURL:
"""Pure-function tests for ``normalize_surrealkv_url``."""

def test_windows_backslash_path_normalised(self) -> None:
out = normalize_surrealkv_url(r"surrealkv://C:\Users\krkna\AppData\Temp\ledger.db")
assert out == "surrealkv://C:/Users/krkna/AppData/Temp/ledger.db"

def test_windows_forward_slash_path_unchanged(self) -> None:
# Already forward-slashed — no backslashes to replace.
url = "surrealkv://D:/temp/ledger.db"
assert normalize_surrealkv_url(url) == url

def test_lowercase_drive_letter_preserved(self) -> None:
out = normalize_surrealkv_url(r"surrealkv://c:\foo\bar.db")
assert out == "surrealkv://c:/foo/bar.db"

def test_versioned_scheme_also_normalised(self) -> None:
out = normalize_surrealkv_url(r"surrealkv+versioned://C:\foo\bar.db")
assert out == "surrealkv+versioned://C:/foo/bar.db"

def test_file_scheme_also_normalised(self) -> None:
out = normalize_surrealkv_url(r"file://C:\foo\bar.db")
assert out == "file://C:/foo/bar.db"

def test_posix_surrealkv_url_unchanged(self) -> None:
url = "surrealkv:///home/user/.bicameral/ledger.db"
assert normalize_surrealkv_url(url) == url

def test_memory_url_unchanged(self) -> None:
assert normalize_surrealkv_url("memory://") == "memory://"

def test_ws_url_unchanged(self) -> None:
assert normalize_surrealkv_url("ws://localhost:8001") == "ws://localhost:8001"

def test_https_url_unchanged(self) -> None:
url = "https://api.surrealdb.com/db"
assert normalize_surrealkv_url(url) == url

def test_empty_string_unchanged(self) -> None:
assert normalize_surrealkv_url("") == ""

def test_normalised_url_parses_cleanly_with_urllib(self) -> None:
"""The output must not raise from ``urllib.parse.urlparse(...).port``."""
out = normalize_surrealkv_url(r"surrealkv://C:\Users\foo\bar.db")
parsed = urlparse(out)
assert parsed.scheme == "surrealkv"
# ``.port`` is the accessor that previously raised ValueError.
assert parsed.port is None


class TestLedgerClientUsesNormalizer:
"""Confirm the normalizer is wired into ``LedgerClient.__init__``."""

def test_constructor_normalises_windows_path(self) -> None:
c = LedgerClient(url=r"surrealkv://C:\temp\test.db")
assert c.url == "surrealkv://C:/temp/test.db"

def test_constructor_passes_memory_url_through(self) -> None:
c = LedgerClient(url="memory://")
assert c.url == "memory://"

def test_constructor_passes_ws_url_through(self) -> None:
c = LedgerClient(url="ws://localhost:8001")
assert c.url == "ws://localhost:8001"


class TestNormalizedURLConnectsCleanly:
"""End-to-end: a Windows-style URL constructed in a tmp_path fixture
must connect without raising. This is the original repro from #68."""

@pytest.mark.asyncio
async def test_windows_style_tmp_path_url_connects(self, tmp_path) -> None:
"""The exact pattern from ``test_schema_persistence.py`` fixtures."""
# On Windows this would previously fail in urllib.parse before
# ever touching the on-disk store. On POSIX tmp_path is already
# POSIX-style so this exercises the no-op path. Either way, the
# connection must succeed.
url = f"surrealkv://{tmp_path / 'ledger.db'}"
client = LedgerClient(url=url, ns="bicameral", db="ledger")
await client.connect()
try:
# Sanity: the client survived urlparse and reached SurrealDB.
rows = await client.query("INFO FOR DB")
# Either the query returns rows OR returns empty (v2 embedded
# quirk documented in CLAUDE.md). Both are fine — we only
# care that the connect path didn't raise on URL parsing.
assert isinstance(rows, list)
finally:
await client.close()
Loading