Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
8469305
[+] Add django route discovery to create cross-link for multi-repo
May 26, 2026
c5b9a1c
[+] Update ingestion
May 26, 2026
3134326
Merge branch 'abhigyanpatwari:main' into main
HuyNguyenDinh May 26, 2026
8f11fbd
Merge branch 'main' into main
magyargergo May 26, 2026
e5e7047
Merge branch 'main' into main
magyargergo May 27, 2026
2ff90c9
Merge branch 'main' into main
magyargergo May 27, 2026
5092327
Merge branch 'main' into main
magyargergo May 27, 2026
1b9ca35
Merge branch 'abhigyanpatwari:main' into main
HuyNguyenDinh May 27, 2026
ca91739
Merge branch 'main' into main
magyargergo May 27, 2026
fbcd773
Merge branch 'main' into main
magyargergo May 28, 2026
e6183d7
Merge branch 'main' into main
magyargergo May 28, 2026
bb6de7b
[~] Fix bugs and abstraction violation
May 30, 2026
9e92ffd
Merge branch 'origin/main' into main - resolve conflicts in python HT…
May 30, 2026
e1319be
Merge branch 'main' into main
HuyNguyenDinh May 30, 2026
f0a0029
Merge branch 'main' into main
HuyNguyenDinh May 30, 2026
838c164
Merge branch 'main' into main
magyargergo May 30, 2026
104c0df
Merge branch 'main' into main
HuyNguyenDinh Jun 1, 2026
d9aa2a0
Merge branch 'main' into main
HuyNguyenDinh Jun 1, 2026
980d693
Merge branch 'main' into main
HuyNguyenDinh Jun 3, 2026
2e0cfbf
feat(python-http): add keyword url= and variable propagation for cons…
Jun 3, 2026
0260729
[+] add extract uri and url keywork pattern for request http
Jun 3, 2026
d2c10b2
feat(python-http): add variable propagation for uri=/url= consumer pa…
Jun 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
266 changes: 265 additions & 1 deletion gitnexus/src/core/group/extractors/http-patterns/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import type { HttpDetection, HttpLanguagePlugin, RepoContext } from './types.js'
/**
* Python HTTP plugin. Handles:
* - FastAPI `@app.get("/path")` provider decorators
* - Django `path("route/", view)` provider calls
* - `requests.get/post/...("url")` consumer calls
* - Generic `requests.request("METHOD", "url")` consumer calls
* - `httpx.AsyncClient` instances calling `.get/.post/...("url")`, including
Expand Down Expand Up @@ -52,6 +53,22 @@ const FASTAPI_APP_PATTERNS = compilePatterns({
],
} satisfies LanguagePatterns<Record<string, never>>);

// ─── Provider: Django path()/re_path()/url() ─────────────────────────
const DJANGO_PATH_PATTERNS = compilePatterns({
name: 'python-django-path',
language: Python,
patterns: [
{
meta: {},
query: `
(call
function: (identifier) @func (#match? @func "^(path|re_path)$")
arguments: (argument_list . (string) @path))
`,
},
],
} satisfies LanguagePatterns<Record<string, never>>);

const FASTAPI_ROUTER_PATTERNS = compilePatterns({
name: 'python-fastapi-router',
language: Python,
Expand All @@ -70,6 +87,21 @@ const FASTAPI_ROUTER_PATTERNS = compilePatterns({
],
} satisfies LanguagePatterns<Record<string, never>>);

const DJANGO_URL_PATTERNS = compilePatterns({
name: 'python-django-url',
language: Python,
patterns: [
{
meta: {},
query: `
(call
function: (identifier) @func (#eq? @func "url")
arguments: (argument_list . (string) @pattern . (identifier) @view))
`,
},
],
} satisfies LanguagePatterns<Record<string, never>>);

// ─── include_router(<router_obj>, prefix='/x') across the repo ────────
// Two shapes are common:
// app.include_router(assistant.router, prefix='/ai')
Expand Down Expand Up @@ -184,7 +216,7 @@ const FROM_IMPORT_MODULE_PATTERNS = compilePatterns({
],
} satisfies LanguagePatterns<Record<string, never>>);

// ─── Consumer: requests.get/post/... ──────────────────────────────────
// ─── Consumer: requests.get/post/...("literal") ──────────────────────
const REQUESTS_VERB_PATTERNS = compilePatterns({
name: 'python-requests-verb',
language: Python,
Expand All @@ -202,6 +234,27 @@ const REQUESTS_VERB_PATTERNS = compilePatterns({
],
} satisfies LanguagePatterns<Record<string, never>>);

// ─── Consumer: requests.get/post/...(url=VALUE) keyword ──────────────
const REQUESTS_KEYWORD_URL_PATTERNS = compilePatterns({
name: 'python-requests-keyword-url',
language: Python,
patterns: [
{
meta: {},
query: `
(call
function: (attribute
object: (identifier) @obj (#eq? @obj "requests")
attribute: (identifier) @method (#match? @method "^(get|post|put|delete|patch)$"))
arguments: (argument_list
(keyword_argument
name: (identifier) @kw (#eq? @kw "url")
value: (string) @path)))
`,
},
],
} satisfies LanguagePatterns<Record<string, never>>);

// ─── Consumer: requests.request("METHOD", "url") ─────────────────────
const REQUESTS_GENERIC_PATTERNS = compilePatterns({
name: 'python-requests-generic',
Expand All @@ -220,6 +273,101 @@ const REQUESTS_GENERIC_PATTERNS = compilePatterns({
],
} satisfies LanguagePatterns<Record<string, never>>);

// ─── Consumer: wrapper classes with uri= or url= keyword argument ──────
// Common pattern: wrapper classes like RequestFetch that accept URL via
// named argument instead of positional argument:
// obj.fetch(uri="api/v1/camera/info/")
// obj.get(url="api/v1/camera/info/")
// obj.post(uri="api/v1/config/update/")
const WRAPPER_URI_PATTERNS = compilePatterns({
name: 'python-http-wrapper-uri',
language: Python,
patterns: [
{
meta: {},
// Match any method call where keyword argument is `uri` or `url`
query: `
(call
function: (attribute
object: (_) @client
attribute: (identifier) @method)
arguments: (argument_list
(keyword_argument
name: (identifier) @kw (#match? @kw "^(uri|url)$")
value: (string) @path)))
`,
},
],
} satisfies LanguagePatterns<Record<string, never>>);

// Map wrapper method names to HTTP verbs
const WRAPPER_METHOD_TO_HTTP: Record<string, string> = {
get: 'GET',
post: 'POST',
put: 'PUT',
delete: 'DELETE',
patch: 'PATCH',
fetch: 'GET',
request: 'GET',
};

// ─── Variable-to-string propagation patterns ─────────────────────────
// Many repos assign URL paths to local variables then pass them as
// keyword arguments: uri = "api/v1/endpoint/"; obj.fetch(uri=uri, body)
// These patterns + buildLocalStringMap resolve the variable → literal chain.

// Track local string constants: uri = "api/v1/endpoint/"
const LOCAL_STRING_ASSIGNMENTS = compilePatterns({
name: 'python-local-string-assign',
language: Python,
patterns: [
{
meta: {},
query: `
(assignment
left: (identifier) @var_name
right: (string) @var_value)
`,
},
],
} satisfies LanguagePatterns<Record<string, never>>);

// Match method calls where uri=/url= value is a variable that was previously
// assigned a string literal
const WRAPPER_URI_VAR_PATTERNS = compilePatterns({
name: 'python-http-wrapper-uri-var',
language: Python,
patterns: [
{
meta: {},
query: `
(call
function: (attribute
object: (_) @client
attribute: (identifier) @method)
arguments: (argument_list
(keyword_argument
name: (identifier) @kw (#match? @kw "^(uri|url)$")
value: (identifier) @path_var)))
`,
},
],
} satisfies LanguagePatterns<Record<string, never>>);

// Pre-scan: collect local string assignments (uri = "api/v1/endpoint/")
function buildLocalStringMap(tree: Parser.Tree): Map<string, string> {
const map = new Map<string, string>();
for (const match of runCompiledPatterns(LOCAL_STRING_ASSIGNMENTS, tree)) {
const varNode = match.captures.var_name;
const valNode = match.captures.var_value;
if (!varNode || !valNode) continue;
const val = unquoteLiteral(valNode.text);
if (val === null) continue;
map.set(varNode.text, val);
}
return map;
}

// ─── Consumer: httpx.AsyncClient assignments ────────────────────────
// Module-scope clients are only matched
// at module scope; calls inside functions require a function/class-local tracked
Expand Down Expand Up @@ -822,6 +970,36 @@ export const PYTHON_HTTP_PLUGIN: HttpLanguagePlugin = {
});
}

// Providers: Django path()/re_path()/url()
for (const match of runCompiledPatterns(DJANGO_PATH_PATTERNS, tree)) {
const pathNode = match.captures.path;
if (!pathNode) continue;
const path = unquoteLiteral(pathNode.text);
if (path === null) continue;
out.push({
role: 'provider',
framework: 'django',
method: '*',
path,
name: null,
confidence: 0.7,
});
}
for (const match of runCompiledPatterns(DJANGO_URL_PATTERNS, tree)) {
const patternNode = match.captures.pattern;
if (!patternNode) continue;
const path = unquoteLiteral(patternNode.text);
if (path === null) continue;
out.push({
role: 'provider',
framework: 'django',
method: '*',
path,
name: null,
confidence: 0.7,
});
}

// Providers: FastAPI @router.<verb>("/path") — must be joined
// with the prefix(es) declared at the include_router site. When
// no prefix is found we still emit the unprefixed path so this
Expand Down Expand Up @@ -880,6 +1058,23 @@ export const PYTHON_HTTP_PLUGIN: HttpLanguagePlugin = {
});
}

// Consumers: requests.<verb>(url="literal") keyword
for (const match of runCompiledPatterns(REQUESTS_KEYWORD_URL_PATTERNS, tree)) {
const methodNode = match.captures.method;
const pathNode = match.captures.path;
if (!methodNode || !pathNode) continue;
const path = unquoteLiteral(pathNode.text);
if (path === null) continue;
out.push({
role: 'consumer',
framework: 'python-requests',
method: methodNode.text.toUpperCase(),
path,
name: null,
confidence: 0.7,
});
}

// Consumers: requests.request("METHOD", "url")
for (const match of runCompiledPatterns(REQUESTS_GENERIC_PATTERNS, tree)) {
const methodNode = match.captures.http_method;
Expand Down Expand Up @@ -937,6 +1132,75 @@ export const PYTHON_HTTP_PLUGIN: HttpLanguagePlugin = {
});
}

// Consumers: wrapper classes with uri= or url= keyword argument
// obj.fetch(uri="api/v1/camera/info/")
// obj.post(url="api/v1/config/update/")
const seenUriDetections = new Set<number>(); // Track line numbers to avoid duplicates
for (const match of runCompiledPatterns(WRAPPER_URI_PATTERNS, tree)) {
const methodNode = match.captures.method;
const pathNode = match.captures.path;
if (!methodNode || !pathNode) continue;
const path = unquoteLiteral(pathNode.text);
if (path === null) continue;

// Deduplicate: the two pattern branches can match the same call
const lineNum = pathNode.startPosition.row;
const dedupKey = lineNum * 1000 + methodNode.startPosition.row;
if (seenUriDetections.has(dedupKey)) continue;
seenUriDetections.add(dedupKey);

const methodName = methodNode.text.toLowerCase();
// Map wrapper method name to HTTP verb (fetch, request → GET)
const httpMethod = WRAPPER_METHOD_TO_HTTP[methodName] ?? 'GET';

out.push({
role: 'consumer',
framework: 'python-http-wrapper',
method: httpMethod,
path,
name: null,
confidence: 0.65,
});
}

// Variable propagation: uri = "api/v1/endpoint/"; obj.fetch(uri=uri)
// Many repos assign URL paths to local vars then pass as keyword args.
const localStrings = buildLocalStringMap(tree);
const seenVarDetections = new Set<string>();
for (const match of runCompiledPatterns(WRAPPER_URI_VAR_PATTERNS, tree)) {
const methodNode = match.captures.method;
const pathVarNode = match.captures.path_var;
if (!methodNode || !pathVarNode) continue;
const dedupKey = `${pathVarNode.startPosition.row}:${methodNode.startPosition.row}`;
if (seenVarDetections.has(dedupKey)) continue;
seenVarDetections.add(dedupKey);
const resolved = localStrings.get(pathVarNode.text);
if (!resolved) continue;
const normalized = normalizeConsumerPath(resolved);
if (normalized === '/') continue;
const httpMethod = WRAPPER_METHOD_TO_HTTP[methodNode.text.toLowerCase()] ?? 'GET';
out.push({
role: 'consumer',
framework: 'python-http-wrapper',
method: httpMethod,
path: normalized,
name: null,
confidence: 0.6,
});
}

return out;
},
};

/** Normalize consumer path: strip host, template literals, numeric segments → {param} */
function normalizeConsumerPath(url: string): string {
let s = url.replace(/\$\{[^}]+\}/g, '{param}').trim();
if (/^https?:\/\//i.test(s)) {
try { s = new URL(s).pathname; } catch { s = s.replace(/^https?:\/\/[^/]+/i, ''); }
}
if (!s.startsWith('/')) s = '/' + s;
const segments = s.split('/').filter(Boolean).map(seg => /^\d+$/.test(seg) ? '{param}' : seg);
s = '/' + segments.join('/');
return s.replace(/\/+$/, '') || '/';
}
16 changes: 16 additions & 0 deletions gitnexus/src/core/ingestion/language-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ import type { ImportResolverFn } from './import-resolvers/types.js';
import type { NamedBindingExtractorFn } from './named-bindings/types.js';
import type { SyntaxNode } from './utils/ast-helpers.js';
import type { NodeLabel } from 'gitnexus-shared';
import type { ExtractedRoute } from './route-extractors/laravel.js';
import type Parser from 'tree-sitter';

// ── Shared type aliases ────────────────────────────────────────────────────
/** Tree-sitter query captures: capture name → AST node (or undefined if not captured). */
Expand Down Expand Up @@ -301,6 +303,20 @@ interface LanguageProviderConfig {
* When true, the worker extracts routes via the language's route extraction logic.
* Default: undefined (no route files). */
readonly isRouteFile?: (filePath: string) => boolean;
/** Discover the root route file (e.g. Django root urls.py).
* If not provided, we extract from all route files matching `isRouteFile`. */
readonly discoverRootRouteFile?: (
files: Array<{ path: string; content: string }>,
contentMap?: Map<string, string>,
) => string | null;
/** Extract routes from a framework route file.
* Default: undefined (no route extraction). */
readonly extractRoutes?: (
tree: Parser.Tree,
filePath: string,
reader: (relativePath: string) => string | null,
parser?: Parser | null,
) => ExtractedRoute[];

// ── Call-resolution DAG hooks ─────────────────────────────────────
/**
Expand Down
2 changes: 2 additions & 0 deletions gitnexus/src/core/ingestion/languages/php.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import { phpVariableConfig } from '../variable-extractors/configs/php.js';
import { createCallExtractor } from '../call-extractors/generic.js';
import { phpCallConfig } from '../call-extractors/configs/php.js';
import { createHeritageExtractor } from '../heritage-extractors/generic.js';
import { extractLaravelRoutes } from '../route-extractors/laravel.js';

const BUILT_INS: ReadonlySet<string> = new Set([
'echo',
Expand Down Expand Up @@ -298,6 +299,7 @@ export const phpProvider = defineLanguage({
heritageExtractor: createHeritageExtractor(SupportedLanguages.PHP),
descriptionExtractor: phpDescriptionExtractor,
isRouteFile: isPhpRouteFile,
extractRoutes: (tree, filePath) => extractLaravelRoutes(tree, filePath),
builtInNames: BUILT_INS,
// ── RFC #909 Ring 3: scope-based resolution hooks ──────────────────────
emitScopeCaptures: emitPhpScopeCaptures,
Expand Down
Loading