diff --git a/gitnexus/src/core/group/extractors/http-patterns/python.ts b/gitnexus/src/core/group/extractors/http-patterns/python.ts index 27ddf6633c..0d33852470 100644 --- a/gitnexus/src/core/group/extractors/http-patterns/python.ts +++ b/gitnexus/src/core/group/extractors/http-patterns/python.ts @@ -1,3 +1,4 @@ +import type Parser from 'tree-sitter'; import Python from 'tree-sitter-python'; import { compilePatterns, @@ -12,6 +13,7 @@ import type { HttpDetection, HttpLanguagePlugin } from './types.js'; * - FastAPI `@app.get("/path")` provider decorators * - `requests.get/post/...("url")` consumer calls * - Generic `requests.request("METHOD", "url")` consumer calls + * - `httpx.AsyncClient` instances calling `.get/.post/...("url")` */ const FASTAPI_VERBS: Record = { @@ -77,11 +79,161 @@ const REQUESTS_GENERIC_PATTERNS = compilePatterns({ ], } satisfies LanguagePatterns>); +// ─── Consumer: httpx.AsyncClient assignments ──────────────────────── +// NOTE: This targeted detector only tracks explicit `httpx.AsyncClient(...)` +// construction. Direct imports (`from httpx import AsyncClient`) and module +// aliases (`import httpx as hx`) and annotated assignments (`client: httpx.AsyncClient = ...`) +// are intentionally left for a follow-up. Module-scope clients are only matched +// at module scope; calls inside functions require a function/class-local tracked +// client to avoid false positives from same-name local variables. +const HTTPX_ASYNC_CLIENT_ASSIGN_PATTERNS = compilePatterns({ + name: 'python-httpx-async-client-assign', + language: Python, + patterns: [ + { + meta: {}, + query: ` + (assignment + left: (_) @client + right: (call + function: (attribute + object: (identifier) @module (#eq? @module "httpx") + attribute: (identifier) @client_class (#eq? @client_class "AsyncClient")))) + `, + }, + ], +} satisfies LanguagePatterns>); + +// ─── Consumer: async with httpx.AsyncClient() as client ────────────── +const HTTPX_ASYNC_CLIENT_WITH_ALIAS_PATTERNS = compilePatterns({ + name: 'python-httpx-async-client-with-alias', + language: Python, + patterns: [ + { + meta: {}, + query: ` + (as_pattern + (call + function: (attribute + object: (identifier) @module (#eq? @module "httpx") + attribute: (identifier) @client_class (#eq? @client_class "AsyncClient"))) + (as_pattern_target (identifier) @client)) + `, + }, + ], +} satisfies LanguagePatterns>); + +function getScopeKey(node: Parser.SyntaxNode | null, preferClass = false): string { + if (preferClass) { + let current: Parser.SyntaxNode | null = node; + while (current) { + if (current.type === 'class_definition') { + return `class:${current.startIndex}:${current.endIndex}`; + } + current = current.parent; + } + } + + let current: Parser.SyntaxNode | null = node; + while (current) { + if (current.type === 'function_definition') { + return `function:${current.startIndex}:${current.endIndex}`; + } + current = current.parent; + } + + return 'module'; +} + +function trackedClientScopeKey(clientNode: Parser.SyntaxNode): string { + return getScopeKey(clientNode.parent, clientNode.text.includes('.')); +} + +function callScopeKeys(clientNode: Parser.SyntaxNode): string[] { + const keys = new Set(); + const preferClass = clientNode.text.includes('.'); + const nearestScope = getScopeKey(clientNode.parent, preferClass); + + keys.add(nearestScope); + + return [...keys]; +} + +function collectHttpxAsyncClients(tree: Parser.Tree): Map> { + const clients = new Map>(); + + const addClient = (clientNode: Parser.SyntaxNode | undefined) => { + if (!clientNode) return; + const scopeKey = trackedClientScopeKey(clientNode); + const clientText = clientNode.text; + const scopes = clients.get(clientText) ?? new Set(); + scopes.add(scopeKey); + clients.set(clientText, scopes); + }; + + for (const match of runCompiledPatterns(HTTPX_ASYNC_CLIENT_ASSIGN_PATTERNS, tree)) { + addClient(match.captures.client); + } + + for (const match of runCompiledPatterns(HTTPX_ASYNC_CLIENT_WITH_ALIAS_PATTERNS, tree)) { + addClient(match.captures.client); + } + + return clients; +} + +function hasTrackedHttpxAsyncClient( + clients: Map>, + clientNode: Parser.SyntaxNode, +): boolean { + const scopes = clients.get(clientNode.text); + if (!scopes) return false; + + return callScopeKeys(clientNode).some((scopeKey) => scopes.has(scopeKey)); +} + +// ─── Consumer: httpx AsyncClient .get/.post/...("url") ────────────── +const HTTPX_ASYNC_CLIENT_VERB_PATTERNS = compilePatterns({ + name: 'python-httpx-async-client-verb', + language: Python, + patterns: [ + { + meta: {}, + query: ` + (call + function: (attribute + object: (_) @client + attribute: (identifier) @method (#match? @method "^(get|post|put|delete|patch)$")) + arguments: (argument_list . (string) @path)) + `, + }, + ], +} satisfies LanguagePatterns>); + +// ─── Consumer: httpx AsyncClient .request("METHOD", "url") ───────── +const HTTPX_ASYNC_CLIENT_GENERIC_PATTERNS = compilePatterns({ + name: 'python-httpx-async-client-generic', + language: Python, + patterns: [ + { + meta: {}, + query: ` + (call + function: (attribute + object: (_) @client + attribute: (identifier) @method (#eq? @method "request")) + arguments: (argument_list . (string) @http_method (string) @path)) + `, + }, + ], +} satisfies LanguagePatterns>); + export const PYTHON_HTTP_PLUGIN: HttpLanguagePlugin = { name: 'python-http', language: Python, scan(tree) { const out: HttpDetection[] = []; + const httpxAsyncClients = collectHttpxAsyncClients(tree); // Providers: FastAPI for (const match of runCompiledPatterns(FASTAPI_PATTERNS, tree)) { @@ -137,6 +289,45 @@ export const PYTHON_HTTP_PLUGIN: HttpLanguagePlugin = { }); } + // Consumers: httpx.AsyncClient.("url") + for (const match of runCompiledPatterns(HTTPX_ASYNC_CLIENT_VERB_PATTERNS, tree)) { + const clientNode = match.captures.client; + const methodNode = match.captures.method; + const pathNode = match.captures.path; + if (!clientNode || !methodNode || !pathNode) continue; + if (!hasTrackedHttpxAsyncClient(httpxAsyncClients, clientNode)) continue; + const path = unquoteLiteral(pathNode.text); + if (path === null) continue; + out.push({ + role: 'consumer', + framework: 'python-httpx', + method: methodNode.text.toUpperCase(), + path, + name: null, + confidence: 0.7, + }); + } + + // Consumers: httpx.AsyncClient.request("METHOD", "url") + for (const match of runCompiledPatterns(HTTPX_ASYNC_CLIENT_GENERIC_PATTERNS, tree)) { + const clientNode = match.captures.client; + const methodNode = match.captures.http_method; + const pathNode = match.captures.path; + if (!clientNode || !methodNode || !pathNode) continue; + if (!hasTrackedHttpxAsyncClient(httpxAsyncClients, clientNode)) continue; + const methodRaw = unquoteLiteral(methodNode.text); + const path = unquoteLiteral(pathNode.text); + if (methodRaw === null || path === null) continue; + out.push({ + role: 'consumer', + framework: 'python-httpx', + method: methodRaw.toUpperCase(), + path, + name: null, + confidence: 0.7, + }); + } + return out; }, }; diff --git a/gitnexus/test/unit/group/http-route-extractor.test.ts b/gitnexus/test/unit/group/http-route-extractor.test.ts index 2e3b0d2120..aa648a71d5 100644 --- a/gitnexus/test/unit/group/http-route-extractor.test.ts +++ b/gitnexus/test/unit/group/http-route-extractor.test.ts @@ -14,12 +14,12 @@ import { HttpRouteExtractor } from '../../../src/core/group/extractors/http-rout import type { RepoHandle } from '../../../src/core/group/types.js'; describe('HttpRouteExtractor', () => { - const tmpDir = path.join(os.tmpdir(), `gitnexus-http-extract-${Date.now()}`); + let tmpDir: string; let extractor: HttpRouteExtractor; beforeEach(() => { extractor = new HttpRouteExtractor(); - fs.mkdirSync(tmpDir, { recursive: true }); + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gitnexus-http-extract-')); }); afterEach(() => { @@ -436,6 +436,74 @@ def create_order(): consumers.find((c) => c.contractId === 'http::POST::/api/orders/{param}'), ).toBeDefined(); }); + it('extracts Python httpx.AsyncClient calls assigned to attributes or aliases', async () => { + const dir = path.join(tmpDir, 'python-httpx-consumer'); + fs.mkdirSync(path.join(dir, 'src'), { recursive: true }); + fs.writeFileSync( + path.join(dir, 'src', 'client.py'), + ` +import httpx + +module_client = httpx.AsyncClient(base_url="https://svc.local") + +class TopicClient: + def __init__(self): + self._client = httpx.AsyncClient(base_url="https://svc.local") + + async def list_topics(self): + return await self._client.get("/topic") + + async def publish(self): + return await self._client.request("POST", "/questions/import") + + async def delete_topic(self): + return await self._client.delete("/topic") + +async def check_duplicate(): + async with httpx.AsyncClient() as client: + data = {} + data.get("/nope") + service.request("POST", "/nope") + return await client.post("https://svc.local/questions/duplicate-check") + +def unrelated_scope_collision(): + client = acquire_cache_client() + return client.get("/ignored-same-name") + +def module_scope_shadow_collision(): + client = acquire_cache_client() + return client.get("/ignored-module-same-name") + +module_client.get("/module-topic") +`, + ); + + const contracts = await extractor.extract(null, dir, makeRepo(dir)); + const consumers = contracts.filter((c) => c.role === 'consumer'); + + const expected = [ + 'http::GET::/topic', + 'http::POST::/questions/import', + 'http::DELETE::/topic', + 'http::POST::/questions/duplicate-check', + 'http::GET::/module-topic', + ]; + + for (const contractId of expected) { + const consumer = consumers.find((c) => c.contractId === contractId); + expect(consumer).toBeDefined(); + expect(consumer?.meta.framework).toBe('python-httpx'); + } + + expect(consumers.find((c) => c.contractId === 'http::GET::/nope')).toBeUndefined(); + expect(consumers.find((c) => c.contractId === 'http::POST::/nope')).toBeUndefined(); + expect( + consumers.find((c) => c.contractId === 'http::GET::/ignored-same-name'), + ).toBeUndefined(); + expect( + consumers.find((c) => c.contractId === 'http::GET::/ignored-module-same-name'), + ).toBeUndefined(); + }); it('extracts Java RestTemplate, WebClient and OkHttp calls', async () => { const dir = path.join(tmpDir, 'java-consumer');