Skip to content
191 changes: 191 additions & 0 deletions gitnexus/src/core/group/extractors/http-patterns/python.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import type Parser from 'tree-sitter';
import Python from 'tree-sitter-python';
import {
compilePatterns,
Expand All @@ -12,6 +13,7 @@ import type { HttpDetection, HttpLanguagePlugin } from './types.js';
* - FastAPI `@app.get("/path")` provider decorators
* - `requests.get/post/...("url")` consumer calls
* - Generic `requests.request("METHOD", "url")` consumer calls
* - `httpx.AsyncClient` instances calling `.get/.post/...("url")`
*/

const FASTAPI_VERBS: Record<string, string> = {
Expand Down Expand Up @@ -77,11 +79,161 @@ const REQUESTS_GENERIC_PATTERNS = compilePatterns({
],
} satisfies LanguagePatterns<Record<string, never>>);

// ─── Consumer: httpx.AsyncClient assignments ────────────────────────
// NOTE: This targeted detector only tracks explicit `httpx.AsyncClient(...)`
// construction. Direct imports (`from httpx import AsyncClient`) and module
// aliases (`import httpx as hx`) and annotated assignments (`client: httpx.AsyncClient = ...`)
// are intentionally left for a follow-up. Module-scope clients are only matched
// at module scope; calls inside functions require a function/class-local tracked
// client to avoid false positives from same-name local variables.
const HTTPX_ASYNC_CLIENT_ASSIGN_PATTERNS = compilePatterns({
name: 'python-httpx-async-client-assign',
language: Python,
patterns: [
{
meta: {},
query: `
(assignment
left: (_) @client
right: (call
function: (attribute
object: (identifier) @module (#eq? @module "httpx")
attribute: (identifier) @client_class (#eq? @client_class "AsyncClient"))))
`,
},
],
} satisfies LanguagePatterns<Record<string, never>>);

// ─── Consumer: async with httpx.AsyncClient() as client ──────────────
const HTTPX_ASYNC_CLIENT_WITH_ALIAS_PATTERNS = compilePatterns({
name: 'python-httpx-async-client-with-alias',
language: Python,
patterns: [
{
meta: {},
query: `
(as_pattern
(call
function: (attribute
object: (identifier) @module (#eq? @module "httpx")
attribute: (identifier) @client_class (#eq? @client_class "AsyncClient")))
(as_pattern_target (identifier) @client))
`,
},
],
} satisfies LanguagePatterns<Record<string, never>>);

function getScopeKey(node: Parser.SyntaxNode | null, preferClass = false): string {
if (preferClass) {
let current: Parser.SyntaxNode | null = node;
while (current) {
if (current.type === 'class_definition') {
return `class:${current.startIndex}:${current.endIndex}`;
}
current = current.parent;
}
}

let current: Parser.SyntaxNode | null = node;
while (current) {
if (current.type === 'function_definition') {
return `function:${current.startIndex}:${current.endIndex}`;
}
current = current.parent;
}

return 'module';
}

function trackedClientScopeKey(clientNode: Parser.SyntaxNode): string {
return getScopeKey(clientNode.parent, clientNode.text.includes('.'));
}

function callScopeKeys(clientNode: Parser.SyntaxNode): string[] {
const keys = new Set<string>();
const preferClass = clientNode.text.includes('.');
const nearestScope = getScopeKey(clientNode.parent, preferClass);

keys.add(nearestScope);

return [...keys];
}

function collectHttpxAsyncClients(tree: Parser.Tree): Map<string, Set<string>> {
const clients = new Map<string, Set<string>>();

const addClient = (clientNode: Parser.SyntaxNode | undefined) => {
if (!clientNode) return;
const scopeKey = trackedClientScopeKey(clientNode);
const clientText = clientNode.text;
const scopes = clients.get(clientText) ?? new Set<string>();
scopes.add(scopeKey);
clients.set(clientText, scopes);
};

for (const match of runCompiledPatterns(HTTPX_ASYNC_CLIENT_ASSIGN_PATTERNS, tree)) {
addClient(match.captures.client);
}

for (const match of runCompiledPatterns(HTTPX_ASYNC_CLIENT_WITH_ALIAS_PATTERNS, tree)) {
addClient(match.captures.client);
}

return clients;
}

function hasTrackedHttpxAsyncClient(
clients: Map<string, Set<string>>,
clientNode: Parser.SyntaxNode,
): boolean {
const scopes = clients.get(clientNode.text);
if (!scopes) return false;

return callScopeKeys(clientNode).some((scopeKey) => scopes.has(scopeKey));
}

// ─── Consumer: httpx AsyncClient .get/.post/...("url") ──────────────
const HTTPX_ASYNC_CLIENT_VERB_PATTERNS = compilePatterns({
name: 'python-httpx-async-client-verb',
language: Python,
patterns: [
{
meta: {},
query: `
(call
function: (attribute
object: (_) @client
attribute: (identifier) @method (#match? @method "^(get|post|put|delete|patch)$"))
arguments: (argument_list . (string) @path))
`,
},
],
} satisfies LanguagePatterns<Record<string, never>>);

// ─── Consumer: httpx AsyncClient .request("METHOD", "url") ─────────
const HTTPX_ASYNC_CLIENT_GENERIC_PATTERNS = compilePatterns({
name: 'python-httpx-async-client-generic',
language: Python,
patterns: [
{
meta: {},
query: `
(call
function: (attribute
object: (_) @client
attribute: (identifier) @method (#eq? @method "request"))
arguments: (argument_list . (string) @http_method (string) @path))
`,
},
],
} satisfies LanguagePatterns<Record<string, never>>);

export const PYTHON_HTTP_PLUGIN: HttpLanguagePlugin = {
name: 'python-http',
language: Python,
scan(tree) {
const out: HttpDetection[] = [];
const httpxAsyncClients = collectHttpxAsyncClients(tree);

// Providers: FastAPI
for (const match of runCompiledPatterns(FASTAPI_PATTERNS, tree)) {
Expand Down Expand Up @@ -137,6 +289,45 @@ export const PYTHON_HTTP_PLUGIN: HttpLanguagePlugin = {
});
}

// Consumers: httpx.AsyncClient.<verb>("url")
for (const match of runCompiledPatterns(HTTPX_ASYNC_CLIENT_VERB_PATTERNS, tree)) {
const clientNode = match.captures.client;
const methodNode = match.captures.method;
const pathNode = match.captures.path;
if (!clientNode || !methodNode || !pathNode) continue;
if (!hasTrackedHttpxAsyncClient(httpxAsyncClients, clientNode)) continue;
const path = unquoteLiteral(pathNode.text);
if (path === null) continue;
out.push({
role: 'consumer',
framework: 'python-httpx',
method: methodNode.text.toUpperCase(),
path,
name: null,
confidence: 0.7,
});
}

// Consumers: httpx.AsyncClient.request("METHOD", "url")
for (const match of runCompiledPatterns(HTTPX_ASYNC_CLIENT_GENERIC_PATTERNS, tree)) {
const clientNode = match.captures.client;
const methodNode = match.captures.http_method;
const pathNode = match.captures.path;
if (!clientNode || !methodNode || !pathNode) continue;
if (!hasTrackedHttpxAsyncClient(httpxAsyncClients, clientNode)) continue;
const methodRaw = unquoteLiteral(methodNode.text);
const path = unquoteLiteral(pathNode.text);
if (methodRaw === null || path === null) continue;
out.push({
role: 'consumer',
framework: 'python-httpx',
method: methodRaw.toUpperCase(),
path,
name: null,
confidence: 0.7,
});
}

return out;
},
};
72 changes: 70 additions & 2 deletions gitnexus/test/unit/group/http-route-extractor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ import { HttpRouteExtractor } from '../../../src/core/group/extractors/http-rout
import type { RepoHandle } from '../../../src/core/group/types.js';

describe('HttpRouteExtractor', () => {
const tmpDir = path.join(os.tmpdir(), `gitnexus-http-extract-${Date.now()}`);
let tmpDir: string;
let extractor: HttpRouteExtractor;

beforeEach(() => {
extractor = new HttpRouteExtractor();
fs.mkdirSync(tmpDir, { recursive: true });
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gitnexus-http-extract-'));
});

afterEach(() => {
Expand Down Expand Up @@ -436,6 +436,74 @@ def create_order():
consumers.find((c) => c.contractId === 'http::POST::/api/orders/{param}'),
).toBeDefined();
});
it('extracts Python httpx.AsyncClient calls assigned to attributes or aliases', async () => {
const dir = path.join(tmpDir, 'python-httpx-consumer');
fs.mkdirSync(path.join(dir, 'src'), { recursive: true });
fs.writeFileSync(
path.join(dir, 'src', 'client.py'),
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
`
import httpx

module_client = httpx.AsyncClient(base_url="https://svc.local")

class TopicClient:
def __init__(self):
self._client = httpx.AsyncClient(base_url="https://svc.local")

async def list_topics(self):
return await self._client.get("/topic")

async def publish(self):
return await self._client.request("POST", "/questions/import")

async def delete_topic(self):
return await self._client.delete("/topic")

async def check_duplicate():
async with httpx.AsyncClient() as client:
data = {}
data.get("/nope")
service.request("POST", "/nope")
return await client.post("https://svc.local/questions/duplicate-check")

def unrelated_scope_collision():
client = acquire_cache_client()
return client.get("/ignored-same-name")

def module_scope_shadow_collision():
client = acquire_cache_client()
return client.get("/ignored-module-same-name")

module_client.get("/module-topic")
`,
);

const contracts = await extractor.extract(null, dir, makeRepo(dir));
const consumers = contracts.filter((c) => c.role === 'consumer');

const expected = [
'http::GET::/topic',
'http::POST::/questions/import',
'http::DELETE::/topic',
'http::POST::/questions/duplicate-check',
'http::GET::/module-topic',
];

for (const contractId of expected) {
const consumer = consumers.find((c) => c.contractId === contractId);
expect(consumer).toBeDefined();
expect(consumer?.meta.framework).toBe('python-httpx');
}

expect(consumers.find((c) => c.contractId === 'http::GET::/nope')).toBeUndefined();
expect(consumers.find((c) => c.contractId === 'http::POST::/nope')).toBeUndefined();
expect(
consumers.find((c) => c.contractId === 'http::GET::/ignored-same-name'),
).toBeUndefined();
expect(
consumers.find((c) => c.contractId === 'http::GET::/ignored-module-same-name'),
).toBeUndefined();
});

it('extracts Java RestTemplate, WebClient and OkHttp calls', async () => {
const dir = path.join(tmpDir, 'java-consumer');
Expand Down
Loading