diff --git a/gitnexus/src/core/group/extractors/http-patterns/python.ts b/gitnexus/src/core/group/extractors/http-patterns/python.ts index d467cdb342..c950ec4c3b 100644 --- a/gitnexus/src/core/group/extractors/http-patterns/python.ts +++ b/gitnexus/src/core/group/extractors/http-patterns/python.ts @@ -11,6 +11,7 @@ import type { HttpDetection, HttpLanguagePlugin, RepoContext } from './types.js' /** * Python HTTP plugin. Handles: * - FastAPI `@app.get("/path")` provider decorators + * - Django `path("route/", view)` provider calls * - `requests.get/post/...("url")` consumer calls * - Generic `requests.request("METHOD", "url")` consumer calls * - `httpx.AsyncClient` instances calling `.get/.post/...("url")`, including @@ -52,6 +53,22 @@ const FASTAPI_APP_PATTERNS = compilePatterns({ ], } satisfies LanguagePatterns>); +// ─── Provider: Django path()/re_path()/url() ───────────────────────── +const DJANGO_PATH_PATTERNS = compilePatterns({ + name: 'python-django-path', + language: Python, + patterns: [ + { + meta: {}, + query: ` + (call + function: (identifier) @func (#match? @func "^(path|re_path)$") + arguments: (argument_list . (string) @path)) + `, + }, + ], +} satisfies LanguagePatterns>); + const FASTAPI_ROUTER_PATTERNS = compilePatterns({ name: 'python-fastapi-router', language: Python, @@ -70,6 +87,21 @@ const FASTAPI_ROUTER_PATTERNS = compilePatterns({ ], } satisfies LanguagePatterns>); +const DJANGO_URL_PATTERNS = compilePatterns({ + name: 'python-django-url', + language: Python, + patterns: [ + { + meta: {}, + query: ` + (call + function: (identifier) @func (#eq? @func "url") + arguments: (argument_list . (string) @pattern . (identifier) @view)) + `, + }, + ], +} satisfies LanguagePatterns>); + // ─── include_router(, prefix='/x') across the repo ──────── // Two shapes are common: // app.include_router(assistant.router, prefix='/ai') @@ -184,7 +216,7 @@ const FROM_IMPORT_MODULE_PATTERNS = compilePatterns({ ], } satisfies LanguagePatterns>); -// ─── Consumer: requests.get/post/... ────────────────────────────────── +// ─── Consumer: requests.get/post/...("literal") ────────────────────── const REQUESTS_VERB_PATTERNS = compilePatterns({ name: 'python-requests-verb', language: Python, @@ -202,6 +234,27 @@ const REQUESTS_VERB_PATTERNS = compilePatterns({ ], } satisfies LanguagePatterns>); +// ─── Consumer: requests.get/post/...(url=VALUE) keyword ────────────── +const REQUESTS_KEYWORD_URL_PATTERNS = compilePatterns({ + name: 'python-requests-keyword-url', + language: Python, + patterns: [ + { + meta: {}, + query: ` + (call + function: (attribute + object: (identifier) @obj (#eq? @obj "requests") + attribute: (identifier) @method (#match? @method "^(get|post|put|delete|patch)$")) + arguments: (argument_list + (keyword_argument + name: (identifier) @kw (#eq? @kw "url") + value: (string) @path))) + `, + }, + ], +} satisfies LanguagePatterns>); + // ─── Consumer: requests.request("METHOD", "url") ───────────────────── const REQUESTS_GENERIC_PATTERNS = compilePatterns({ name: 'python-requests-generic', @@ -220,6 +273,101 @@ const REQUESTS_GENERIC_PATTERNS = compilePatterns({ ], } satisfies LanguagePatterns>); +// ─── Consumer: wrapper classes with uri= or url= keyword argument ────── +// Common pattern: wrapper classes like RequestFetch that accept URL via +// named argument instead of positional argument: +// obj.fetch(uri="api/v1/camera/info/") +// obj.get(url="api/v1/camera/info/") +// obj.post(uri="api/v1/config/update/") +const WRAPPER_URI_PATTERNS = compilePatterns({ + name: 'python-http-wrapper-uri', + language: Python, + patterns: [ + { + meta: {}, + // Match any method call where keyword argument is `uri` or `url` + query: ` + (call + function: (attribute + object: (_) @client + attribute: (identifier) @method) + arguments: (argument_list + (keyword_argument + name: (identifier) @kw (#match? @kw "^(uri|url)$") + value: (string) @path))) + `, + }, + ], +} satisfies LanguagePatterns>); + +// Map wrapper method names to HTTP verbs +const WRAPPER_METHOD_TO_HTTP: Record = { + get: 'GET', + post: 'POST', + put: 'PUT', + delete: 'DELETE', + patch: 'PATCH', + fetch: 'GET', + request: 'GET', +}; + +// ─── Variable-to-string propagation patterns ───────────────────────── +// Many repos assign URL paths to local variables then pass them as +// keyword arguments: uri = "api/v1/endpoint/"; obj.fetch(uri=uri, body) +// These patterns + buildLocalStringMap resolve the variable → literal chain. + +// Track local string constants: uri = "api/v1/endpoint/" +const LOCAL_STRING_ASSIGNMENTS = compilePatterns({ + name: 'python-local-string-assign', + language: Python, + patterns: [ + { + meta: {}, + query: ` + (assignment + left: (identifier) @var_name + right: (string) @var_value) + `, + }, + ], +} satisfies LanguagePatterns>); + +// Match method calls where uri=/url= value is a variable that was previously +// assigned a string literal +const WRAPPER_URI_VAR_PATTERNS = compilePatterns({ + name: 'python-http-wrapper-uri-var', + language: Python, + patterns: [ + { + meta: {}, + query: ` + (call + function: (attribute + object: (_) @client + attribute: (identifier) @method) + arguments: (argument_list + (keyword_argument + name: (identifier) @kw (#match? @kw "^(uri|url)$") + value: (identifier) @path_var))) + `, + }, + ], +} satisfies LanguagePatterns>); + +// Pre-scan: collect local string assignments (uri = "api/v1/endpoint/") +function buildLocalStringMap(tree: Parser.Tree): Map { + const map = new Map(); + for (const match of runCompiledPatterns(LOCAL_STRING_ASSIGNMENTS, tree)) { + const varNode = match.captures.var_name; + const valNode = match.captures.var_value; + if (!varNode || !valNode) continue; + const val = unquoteLiteral(valNode.text); + if (val === null) continue; + map.set(varNode.text, val); + } + return map; +} + // ─── Consumer: httpx.AsyncClient assignments ──────────────────────── // Module-scope clients are only matched // at module scope; calls inside functions require a function/class-local tracked @@ -822,6 +970,36 @@ export const PYTHON_HTTP_PLUGIN: HttpLanguagePlugin = { }); } + // Providers: Django path()/re_path()/url() + for (const match of runCompiledPatterns(DJANGO_PATH_PATTERNS, tree)) { + const pathNode = match.captures.path; + if (!pathNode) continue; + const path = unquoteLiteral(pathNode.text); + if (path === null) continue; + out.push({ + role: 'provider', + framework: 'django', + method: '*', + path, + name: null, + confidence: 0.7, + }); + } + for (const match of runCompiledPatterns(DJANGO_URL_PATTERNS, tree)) { + const patternNode = match.captures.pattern; + if (!patternNode) continue; + const path = unquoteLiteral(patternNode.text); + if (path === null) continue; + out.push({ + role: 'provider', + framework: 'django', + method: '*', + path, + name: null, + confidence: 0.7, + }); + } + // Providers: FastAPI @router.("/path") — must be joined // with the prefix(es) declared at the include_router site. When // no prefix is found we still emit the unprefixed path so this @@ -880,6 +1058,23 @@ export const PYTHON_HTTP_PLUGIN: HttpLanguagePlugin = { }); } + // Consumers: requests.(url="literal") keyword + for (const match of runCompiledPatterns(REQUESTS_KEYWORD_URL_PATTERNS, tree)) { + const methodNode = match.captures.method; + const pathNode = match.captures.path; + if (!methodNode || !pathNode) continue; + const path = unquoteLiteral(pathNode.text); + if (path === null) continue; + out.push({ + role: 'consumer', + framework: 'python-requests', + method: methodNode.text.toUpperCase(), + path, + name: null, + confidence: 0.7, + }); + } + // Consumers: requests.request("METHOD", "url") for (const match of runCompiledPatterns(REQUESTS_GENERIC_PATTERNS, tree)) { const methodNode = match.captures.http_method; @@ -937,6 +1132,75 @@ export const PYTHON_HTTP_PLUGIN: HttpLanguagePlugin = { }); } + // Consumers: wrapper classes with uri= or url= keyword argument + // obj.fetch(uri="api/v1/camera/info/") + // obj.post(url="api/v1/config/update/") + const seenUriDetections = new Set(); // Track line numbers to avoid duplicates + for (const match of runCompiledPatterns(WRAPPER_URI_PATTERNS, tree)) { + const methodNode = match.captures.method; + const pathNode = match.captures.path; + if (!methodNode || !pathNode) continue; + const path = unquoteLiteral(pathNode.text); + if (path === null) continue; + + // Deduplicate: the two pattern branches can match the same call + const lineNum = pathNode.startPosition.row; + const dedupKey = lineNum * 1000 + methodNode.startPosition.row; + if (seenUriDetections.has(dedupKey)) continue; + seenUriDetections.add(dedupKey); + + const methodName = methodNode.text.toLowerCase(); + // Map wrapper method name to HTTP verb (fetch, request → GET) + const httpMethod = WRAPPER_METHOD_TO_HTTP[methodName] ?? 'GET'; + + out.push({ + role: 'consumer', + framework: 'python-http-wrapper', + method: httpMethod, + path, + name: null, + confidence: 0.65, + }); + } + + // Variable propagation: uri = "api/v1/endpoint/"; obj.fetch(uri=uri) + // Many repos assign URL paths to local vars then pass as keyword args. + const localStrings = buildLocalStringMap(tree); + const seenVarDetections = new Set(); + for (const match of runCompiledPatterns(WRAPPER_URI_VAR_PATTERNS, tree)) { + const methodNode = match.captures.method; + const pathVarNode = match.captures.path_var; + if (!methodNode || !pathVarNode) continue; + const dedupKey = `${pathVarNode.startPosition.row}:${methodNode.startPosition.row}`; + if (seenVarDetections.has(dedupKey)) continue; + seenVarDetections.add(dedupKey); + const resolved = localStrings.get(pathVarNode.text); + if (!resolved) continue; + const normalized = normalizeConsumerPath(resolved); + if (normalized === '/') continue; + const httpMethod = WRAPPER_METHOD_TO_HTTP[methodNode.text.toLowerCase()] ?? 'GET'; + out.push({ + role: 'consumer', + framework: 'python-http-wrapper', + method: httpMethod, + path: normalized, + name: null, + confidence: 0.6, + }); + } + return out; }, }; + +/** Normalize consumer path: strip host, template literals, numeric segments → {param} */ +function normalizeConsumerPath(url: string): string { + let s = url.replace(/\$\{[^}]+\}/g, '{param}').trim(); + if (/^https?:\/\//i.test(s)) { + try { s = new URL(s).pathname; } catch { s = s.replace(/^https?:\/\/[^/]+/i, ''); } + } + if (!s.startsWith('/')) s = '/' + s; + const segments = s.split('/').filter(Boolean).map(seg => /^\d+$/.test(seg) ? '{param}' : seg); + s = '/' + segments.join('/'); + return s.replace(/\/+$/, '') || '/'; +} diff --git a/gitnexus/src/core/ingestion/language-provider.ts b/gitnexus/src/core/ingestion/language-provider.ts index 058710b2b3..e5d6fc96d6 100644 --- a/gitnexus/src/core/ingestion/language-provider.ts +++ b/gitnexus/src/core/ingestion/language-provider.ts @@ -43,6 +43,8 @@ import type { ImportResolverFn } from './import-resolvers/types.js'; import type { NamedBindingExtractorFn } from './named-bindings/types.js'; import type { SyntaxNode } from './utils/ast-helpers.js'; import type { NodeLabel } from 'gitnexus-shared'; +import type { ExtractedRoute } from './route-extractors/laravel.js'; +import type Parser from 'tree-sitter'; // ── Shared type aliases ──────────────────────────────────────────────────── /** Tree-sitter query captures: capture name → AST node (or undefined if not captured). */ @@ -301,6 +303,20 @@ interface LanguageProviderConfig { * When true, the worker extracts routes via the language's route extraction logic. * Default: undefined (no route files). */ readonly isRouteFile?: (filePath: string) => boolean; + /** Discover the root route file (e.g. Django root urls.py). + * If not provided, we extract from all route files matching `isRouteFile`. */ + readonly discoverRootRouteFile?: ( + files: Array<{ path: string; content: string }>, + contentMap?: Map, + ) => string | null; + /** Extract routes from a framework route file. + * Default: undefined (no route extraction). */ + readonly extractRoutes?: ( + tree: Parser.Tree, + filePath: string, + reader: (relativePath: string) => string | null, + parser?: Parser | null, + ) => ExtractedRoute[]; // ── Call-resolution DAG hooks ───────────────────────────────────── /** diff --git a/gitnexus/src/core/ingestion/languages/php.ts b/gitnexus/src/core/ingestion/languages/php.ts index caca85335c..f0dad7f1f5 100644 --- a/gitnexus/src/core/ingestion/languages/php.ts +++ b/gitnexus/src/core/ingestion/languages/php.ts @@ -38,6 +38,7 @@ import { phpVariableConfig } from '../variable-extractors/configs/php.js'; import { createCallExtractor } from '../call-extractors/generic.js'; import { phpCallConfig } from '../call-extractors/configs/php.js'; import { createHeritageExtractor } from '../heritage-extractors/generic.js'; +import { extractLaravelRoutes } from '../route-extractors/laravel.js'; const BUILT_INS: ReadonlySet = new Set([ 'echo', @@ -298,6 +299,7 @@ export const phpProvider = defineLanguage({ heritageExtractor: createHeritageExtractor(SupportedLanguages.PHP), descriptionExtractor: phpDescriptionExtractor, isRouteFile: isPhpRouteFile, + extractRoutes: (tree, filePath) => extractLaravelRoutes(tree, filePath), builtInNames: BUILT_INS, // ── RFC #909 Ring 3: scope-based resolution hooks ────────────────────── emitScopeCaptures: emitPhpScopeCaptures, diff --git a/gitnexus/src/core/ingestion/languages/python.ts b/gitnexus/src/core/ingestion/languages/python.ts index a0e8340309..347f09e8ca 100644 --- a/gitnexus/src/core/ingestion/languages/python.ts +++ b/gitnexus/src/core/ingestion/languages/python.ts @@ -45,6 +45,8 @@ import { pythonReceiverBinding, resolvePythonImportTarget, } from './python/index.js'; +import { extractDjangoRoutes, setDjangoParser } from '../route-extractors/django.js'; +import { discoverDjangoRootUrl } from '../route-extractors/django-root-discovery.js'; const BUILT_INS: ReadonlySet = new Set([ 'print', @@ -104,6 +106,16 @@ function normalizePythonStringLiteral(text: string): string | undefined { return raw.replace(/\s+/g, ' '); } +/** Detect Django URL config files by naming convention. */ +function isDjangoRouteFile(filePath: string): boolean { + return ( + filePath.endsWith('.py') && + (filePath.endsWith('/urls.py') || + filePath.endsWith('/urls/__init__.py') || + filePath === 'urls.py') + ); +} + export const pythonProvider = defineLanguage({ id: SupportedLanguages.Python, extensions: ['.py'], @@ -137,6 +149,14 @@ export const pythonProvider = defineLanguage({ heritageExtractor: createHeritageExtractor(SupportedLanguages.Python), descriptionExtractor: pythonDescriptionExtractor, builtInNames: BUILT_INS, + isRouteFile: isDjangoRouteFile, + discoverRootRouteFile: (files, contentMap) => discoverDjangoRootUrl(files, contentMap), + extractRoutes: (tree, filePath, reader, parser) => { + if (parser) { + setDjangoParser(parser); + } + return extractDjangoRoutes(tree, filePath, reader); + }, labelOverride: pythonFunctionDefinitionLabel, // ── RFC #909 Ring 3: scope-based resolution hooks (RFC §5) ────────── diff --git a/gitnexus/src/core/ingestion/parsing-processor.ts b/gitnexus/src/core/ingestion/parsing-processor.ts index 6c77e79585..9a431bae77 100644 --- a/gitnexus/src/core/ingestion/parsing-processor.ts +++ b/gitnexus/src/core/ingestion/parsing-processor.ts @@ -68,6 +68,8 @@ import { getTreeSitterContentByteLength, TREE_SITTER_MAX_BUFFER, } from './constants.js'; +import fs from 'node:fs'; +import path from 'node:path'; import { ARRAY_METHOD_HOC_BLOCKLIST_SET, DEFAULT_EXPORT_IDENTIFIER_BLOCKLIST_SET, @@ -397,12 +399,30 @@ const processParsingSequential = async ( astCache: ASTCache, scopeTreeCache: ASTCache | undefined, onFileProgress?: FileProgressCallback, + outRoutes?: ExtractedRoute[], ) => { const parser = await loadParser(); const total = files.length; const logSkipped = isVerboseIngestionEnabled(); const skippedByLang = logSkipped ? new Map() : null; + // Pre-compute file content map and discover root route files across all languages in this batch + const fileContentMap = new Map(); + for (const f of files) fileContentMap.set(f.path, f.content); + + const rootRouteFiles = new Map(); + const languagesInBatch = new Set(); + for (const f of files) { + const lang = getLanguageFromFilename(f.path); + if (lang) languagesInBatch.add(lang); + } + for (const lang of languagesInBatch) { + const provider = getProvider(lang); + if (provider.discoverRootRouteFile) { + rootRouteFiles.set(lang, provider.discoverRootRouteFile(files, fileContentMap)); + } + } + for (let i = 0; i < files.length; i++) { const file = files[i]; @@ -915,6 +935,27 @@ const processParsingSequential = async ( }); } }); + + // ── Route extraction (Django / Laravel / generic) ── + // Replicates the per-file route extraction from parse-worker.ts processFileGroup. + const isRouteFile = provider.isRouteFile?.(file.path) ?? false; + if (isRouteFile && provider.extractRoutes) { + const rootRouteFile = rootRouteFiles.get(language) ?? null; + const isRootRoute = rootRouteFile !== null ? file.path === rootRouteFile : isRouteFile; + if (isRootRoute) { + const reader = (relativePath: string) => { + const cached = fileContentMap.get(relativePath); + if (cached != null) return cached; + try { + return fs.readFileSync(path.join(process.cwd(), relativePath), 'utf-8'); + } catch { + return null; + } + }; + const extractedRoutes = provider.extractRoutes(tree, file.path, reader, parser); + for (const r of extractedRoutes) outRoutes?.push(r); + } + } } if (skippedByLang && skippedByLang.size > 0) { @@ -961,6 +1002,14 @@ export const processParsing = async ( * artifact to cache there). See `gitnexus/src/storage/parse-cache.ts`. */ outRawResults?: ParseWorkerResult[], + /** + * Optional out-parameter for extracted routes from the sequential + * fallback path. The worker path returns routes inside the + * `WorkerExtractedData` return value; the sequential path writes + * them here so the caller can feed them into the deferred route + * processing pipeline (`processRoutesFromExtracted`). + */ + outRoutes?: ExtractedRoute[], ): Promise => { let lastProgress = 0; const reportProgress: FileProgressCallback | undefined = onFileProgress @@ -1059,6 +1108,7 @@ export const processParsing = async ( astCache, scopeTreeCache, reportProgress, + outRoutes, ); return null; }; diff --git a/gitnexus/src/core/ingestion/pipeline-phases/parse-impl.ts b/gitnexus/src/core/ingestion/pipeline-phases/parse-impl.ts index 0f9f12b131..6104fc7cf0 100644 --- a/gitnexus/src/core/ingestion/pipeline-phases/parse-impl.ts +++ b/gitnexus/src/core/ingestion/pipeline-phases/parse-impl.ts @@ -619,9 +619,8 @@ export async function runChunkedParseAndResolve( scopeTreeCache, progressForChunk, activeWorkerPool, - // Capture raw results only when we have a cache to write to — - // otherwise we'd retain extra arrays for nothing. parseCache && chunkHash && activeWorkerPool ? rawResults : undefined, + allExtractedRoutes, ); } catch (err) { if (!(err instanceof WorkerPoolInitializationError)) throw err; @@ -632,7 +631,22 @@ export async function runChunkedParseAndResolve( // parser, which masked this exact regression as a 2-hour "stuck" run // in #1741. The failed (zero-worker) pool is torn down by the outer // finally. `--workers 0` is the explicit opt-in to sequential. - rawResults.length = 0; + let workerPoolDisabled = false; + workerPoolDisabled = true; + const failedPool = workerPool; + workerPool = undefined; + await failedPool?.terminate().catch(() => undefined); + chunkWorkerData = await processParsing( + graph, + chunkFiles, + symbolTable, + astCache, + scopeTreeCache, + progressForChunk, + undefined, + undefined, + allExtractedRoutes, + ); handleWorkerStartupFailure(err); // always throws } // Persist the raw results for this chunk hash. Sequential path diff --git a/gitnexus/src/core/ingestion/route-extractors/django-root-discovery.ts b/gitnexus/src/core/ingestion/route-extractors/django-root-discovery.ts new file mode 100644 index 0000000000..db2f437fd1 --- /dev/null +++ b/gitnexus/src/core/ingestion/route-extractors/django-root-discovery.ts @@ -0,0 +1,180 @@ +/** + * Given a `manage.py` file content, extract the Django settings module. + * e.g. `os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'cmrMngt.settings')` + * returns `'cmrMngt.settings'` + */ +function extractDjangoSettingsModule(manageContent: string): string | null { + const m = manageContent.match(/DJANGO_SETTINGS_MODULE\s*['"]?[,= ]\s*['"]([^'"]+)['"]/); + return m ? m[1] : null; +} + +/** + * Given a dotted Python module path, produce possible file paths. + * e.g. `cmrMngt.settings` → `['cmrMngt/settings.py', 'cmrMngt/settings/__init__.py']` + */ +export function djangoModuleToFilePaths(modulePath: string): string[] { + const base = modulePath.replace(/\./g, '/'); + return [`${base}.py`, `${base}/__init__.py`]; +} + +/** + * Read a file, trying first from the given content map, then from disk. + */ +function tryReadFile(relativePath: string, contentMap: Map): string | null { + return contentMap.get(relativePath) ?? null; +} + +/** + * Extract a module-level string assignment value from Python source. + * e.g. `content` contains `ROOT_URLCONF = 'cmrMngt.urls'` + * returns `'cmrMngt.urls'` + */ +function extractPythonStringAssignment(content: string, varName: string): string | null { + const regex = new RegExp(`^${varName}\\s*=\\s*['"]([^'"]+)['"]`, 'm'); + const m = content.match(regex); + return m ? m[1] : null; +} + +/** + * Extract `from import *` statements from Python source. + * e.g. `from .settings_base import *` → `settings_base` + * `from cmrMngt.settings_base import *` → `cmrMngt.settings_base` + */ +function extractStarImports(content: string): string[] { + const modules: string[] = []; + const regex = /^from\s+(\.?[\w.]+)\s+import\s+\*/gm; + let m; + while ((m = regex.exec(content)) !== null) { + const moduleName = m[1]; + if (moduleName.startsWith('.')) { + // Relative import — caller needs to resolve based on current module + modules.push(moduleName); + } else { + modules.push(moduleName); + } + } + return modules; +} + +/** + * Resolve a relative Python import path. + * `from .settings_base import *` in `cmrMngt/settings.py` + * → `cmrMngt/settings_base.py` + */ +function resolveRelativeImport(currentModulePath: string, importPath: string): string | null { + if (!importPath.startsWith('.')) return null; + + const currentDir = currentModulePath.includes('/') + ? currentModulePath.substring(0, currentModulePath.lastIndexOf('/')) + : ''; + + let relPath = importPath; + let dir = currentDir; + while (relPath.startsWith('.')) { + if (relPath.startsWith('..')) { + dir = dir.includes('/') ? dir.substring(0, dir.lastIndexOf('/')) : ''; + relPath = relPath.substring(2); + } else { + relPath = relPath.substring(1); + break; + } + } + + return dir ? `${dir}/${relPath}` : relPath; +} + +/** + * Discover the Django root URL file by following: + * manage.py → DJANGO_SETTINGS_MODULE → settings → ROOT_URLCONF → urls.py + * + * @param files Array of file paths and their contents in the current batch. + * @param contentMap Optional pre-built map of file path → content. + * @returns The relative path to the root URL file, or null. + */ +export function discoverDjangoRootUrl( + files: Array<{ path: string; content: string }>, + contentMap?: Map, +): string | null { + const map = contentMap ?? new Map(); + for (const f of files) map.set(f.path, f.content); + + const managePy = files.find((f) => f.path === 'manage.py' || f.path.endsWith('/manage.py')); + if (!managePy) return null; + + const settingsModule = extractDjangoSettingsModule(managePy.content); + if (!settingsModule) return null; + + // Find the settings file + const settingsPaths = djangoModuleToFilePaths(settingsModule); + let settingsContent: string | null = null; + let resolvedSettingsPath: string | null = null; + for (const sp of settingsPaths) { + const c = tryReadFile(sp, map); + if (c !== null) { + settingsContent = c; + resolvedSettingsPath = settingsModule.replace(/\./g, '/'); + break; + } + } + if (!settingsContent) return null; + + // Check ROOT_URLCONF in the main settings and any base settings (star imports) + let rootUrlConf = extractPythonStringAssignment(settingsContent, 'ROOT_URLCONF'); + if (!rootUrlConf) { + // Check star-imported base settings + const starImports = extractStarImports(settingsContent); + for (const imp of starImports) { + let baseModule: string | null = null; + if (imp.startsWith('.')) { + const resolved = resolveRelativeImport(resolvedSettingsPath!, imp); + if (resolved) baseModule = resolved; + } else { + baseModule = imp; + } + if (!baseModule) continue; + + const basePaths: string[] = []; + if (baseModule.startsWith('.')) { + const resolved = resolveRelativeImport(resolvedSettingsPath!, baseModule); + if (resolved) { + basePaths.push(`${resolved.replace(/\./g, '/')}.py`); + basePaths.push(`${resolved.replace(/\./g, '/')}/__init__.py`); + } + } else { + basePaths.push(`${baseModule.replace(/\./g, '/')}.py`); + basePaths.push(`${baseModule.replace(/\./g, '/')}/__init__.py`); + } + + for (const bp of basePaths) { + const bc = tryReadFile(bp, map); + if (bc) { + rootUrlConf = extractPythonStringAssignment(bc, 'ROOT_URLCONF'); + if (rootUrlConf) break; + } + } + if (rootUrlConf) break; + } + } + + if (!rootUrlConf) return null; + + // Convert ROOT_URLCONF module path to file path + const urlPaths = djangoModuleToFilePaths(rootUrlConf); + for (const up of urlPaths) { + if (tryReadFile(up, map) !== null) return up; + } + + // Also try relative to the settings module's directory + if (resolvedSettingsPath && resolvedSettingsPath.includes('/')) { + const settingsDir = resolvedSettingsPath.substring( + 0, + resolvedSettingsPath.lastIndexOf('/') + 1, + ); + for (const up of urlPaths) { + const tryPath = settingsDir + up; + if (tryReadFile(tryPath, map) !== null) return tryPath; + } + } + + return null; +} diff --git a/gitnexus/src/core/ingestion/route-extractors/django.ts b/gitnexus/src/core/ingestion/route-extractors/django.ts new file mode 100644 index 0000000000..c83e21478d --- /dev/null +++ b/gitnexus/src/core/ingestion/route-extractors/django.ts @@ -0,0 +1,380 @@ +import type Parser from 'tree-sitter'; +import { parseSourceSafe } from '../../tree-sitter/safe-parse.js'; +import { extractStringContent, type SyntaxNode } from '../utils/ast-helpers.js'; +import type { ExtractedRoute } from './laravel.js'; + +interface DjangoRouteContext { + prefix: string | null; +} + +interface WalkFrame { + node: SyntaxNode; + routeCtx: DjangoRouteContext; + currentFilePath: string; + depth: number; +} + +const DJANGO_ROUTE_FUNCTIONS = new Set(['path', 're_path', 'url']); +const DJANGO_INCLUDE_FUNCTION = 'include'; +const MAX_INCLUDE_DEPTH = 8; + +function modulePathToFilePath(modulePath: string): string { + return modulePath.replace(/\./g, '/'); +} + +export type DjangoFileReader = (relativePath: string) => string | null; + +function extractStringArg(argsNode: SyntaxNode | null): string | null { + if (!argsNode) return null; + for (const child of argsNode.children ?? []) { + if (child.type === '(' || child.type === ')' || child.type === ',') continue; + if (child.type === 'string') { + return extractStringContent(child); + } + if (child.type === 'binary_operator') { + let concat = ''; + for (const part of child.children ?? []) { + if (part.type === 'string') { + const s = extractStringContent(part); + if (s !== null) concat += s; + } + } + if (concat) return concat; + } + } + return null; +} + +function extractViewTarget(argsNode: SyntaxNode | null): { + viewName: string | null; + viewCall: string | null; +} { + if (!argsNode) return { viewName: null, viewCall: null }; + const positionalArgs: SyntaxNode[] = []; + for (const child of argsNode.children ?? []) { + if (child.type === '(' || child.type === ')' || child.type === ',') continue; + positionalArgs.push(child); + } + const viewNode = positionalArgs[1]; + if (!viewNode) return { viewName: null, viewCall: null }; + if (viewNode.type === 'attribute') return { viewName: viewNode.text, viewCall: null }; + if (viewNode.type === 'call') return { viewName: null, viewCall: viewNode.text }; + if (viewNode.type === 'identifier') return { viewName: viewNode.text, viewCall: null }; + if (viewNode.type === 'string') + return { viewName: extractStringContent(viewNode), viewCall: null }; + return { viewName: null, viewCall: null }; +} + +function inferHttpMethod(viewName: string | null): string { + if (!viewName) return '*'; + const lower = viewName.toLowerCase(); + const m = lower.match(/\.(get|post|put|patch|delete|head|options)(_|$)/); + if (m) { + return m[1].toUpperCase(); + } + return '*'; +} + +function findUrlpatternsLists(rootNode: SyntaxNode): SyntaxNode[] { + const assignmentNodes: SyntaxNode[] = []; + _collectAssignments(rootNode, assignmentNodes); + const lists: SyntaxNode[] = []; + for (const node of assignmentNodes) { + const left = node.childForFieldName?.('left') ?? node.children?.[0] ?? null; + if (left?.type === 'identifier' && left.text === 'urlpatterns') { + const right = node.childForFieldName?.('right') ?? node.children?.[2] ?? null; + if (right?.type === 'list') { + lists.push(right); + } + } + } + return lists; +} + +function _collectAssignments(node: SyntaxNode, out: SyntaxNode[]): void { + if (node.type === 'assignment' || node.type === 'augmented_assignment') { + out.push(node); + } + for (const child of node.children ?? []) { + _collectAssignments(child, out); + } +} + +function emitDjangoRoute( + callNode: SyntaxNode, + filePath: string, + ctx: DjangoRouteContext, +): ExtractedRoute { + const argsNode = callNode.childForFieldName?.('arguments') ?? null; + const routePath = extractStringArg(argsNode); + + const { viewName, viewCall } = extractViewTarget(argsNode); + const httpMethod = inferHttpMethod(viewName); + + let routeName: string | null = null; + if (argsNode) { + for (let i = 0; i < argsNode.children.length; i++) { + const child = argsNode.children[i]; + if (child.type === 'keyword_argument' && child.childForFieldName?.('name')?.text === 'name') { + const valueNode = child.childForFieldName?.('value'); + if (valueNode?.type === 'string') { + routeName = extractStringContent(valueNode); + } + } + } + } + + return { + filePath, + httpMethod, + routePath, + routeName, + controllerName: viewName ?? viewCall, + methodName: null, + middleware: [], + prefix: ctx.prefix, + lineNumber: callNode.startPosition.row, + }; +} + +function getIncludeModulePath(callNode: SyntaxNode): string | null { + const funcName = + callNode.childForFieldName?.('function')?.text ?? + callNode.children?.find((c) => c.type === 'identifier')?.text; + if (funcName !== DJANGO_INCLUDE_FUNCTION) return null; + const argsNode = callNode.childForFieldName?.('arguments'); + if (!argsNode) return null; + + const modulePath = extractStringArg(argsNode); + if (modulePath) return modulePath; + + for (const child of argsNode.children ?? []) { + if (child.type === '(' || child.type === ')' || child.type === ',') continue; + if (child.type === 'tuple' || child.type === 'parenthesized_expression') { + for (const inner of child.children ?? []) { + if (inner.type === '(' || inner.type === ')' || inner.type === ',') continue; + if (inner.type === 'string') return extractStringContent(inner); + } + } + } + return null; +} + +function makePrefix(parentPrefix: string | null, childPrefix: string | null): string | null { + if (!childPrefix) return parentPrefix; + if (!parentPrefix) return childPrefix; + return `${parentPrefix}/${childPrefix}`.replace(/\/+/g, '/'); +} + +function getCallFuncName(node: SyntaxNode): string | null { + return ( + node.childForFieldName?.('function')?.text ?? + node.children?.find((c) => c.type === 'identifier')?.text ?? + null + ); +} + +let _djangoParser: Parser | null = null; + +export function setDjangoParser(p: Parser): void { + _djangoParser = p; +} + +/** + * Given a Django dotted module path like `app.submodule.urls`, + * try multiple path resolution strategies to find the file on disk. + * + * Strategies tried in order: + * 1. Direct dot-to-slash: `module/path.py` and `module/path/__init__.py` + * 2. Relative to the current file's directory + * 3. Walk up the directory tree from the current file, trying each ancestor + */ +function resolveIncludedFile( + modulePath: string, + currentFilePath: string, + readFile: DjangoFileReader, +): { filePath: string; content: string } | null { + const basePath = modulePathToFilePath(modulePath); + + const candidates: string[] = []; + + // Strategy 1: direct path (app/urls.py, app/urls/__init__.py) + candidates.push(basePath + '.py'); + candidates.push(basePath + '/__init__.py'); + + // Strategy 2: relative to current file's directory + if (currentFilePath.includes('/')) { + const dir = currentFilePath.substring(0, currentFilePath.lastIndexOf('/') + 1); + candidates.push(dir + basePath + '.py'); + candidates.push(dir + basePath + '/__init__.py'); + } + + // Strategy 3: walk up from current file, trying each ancestor + let parentDir = currentFilePath.includes('/') + ? currentFilePath.substring(0, currentFilePath.lastIndexOf('/')) + : ''; + while (parentDir.length > 0) { + const prefix = parentDir + '/'; + candidates.push(prefix + basePath + '.py'); + candidates.push(prefix + basePath + '/__init__.py'); + const nextSep = parentDir.lastIndexOf('/'); + if (nextSep < 0) break; + parentDir = parentDir.substring(0, nextSep); + } + + // Strategy 4: bare path with just the last segment (e.g. 'urls.py' from 'app.urls') + const segments = basePath.split('/'); + if (segments.length > 1) { + const lastSegment = segments[segments.length - 1]; + candidates.push(lastSegment + '.py'); + candidates.push(lastSegment + '/__init__.py'); + } + + for (const candidate of candidates) { + const content = readFile(candidate); + if (content !== null) return { filePath: candidate, content }; + } + + return null; +} + +export function extractDjangoRoutes( + tree: Parser.Tree, + filePath: string, + readFile?: DjangoFileReader | null, + _visited?: Set, +): ExtractedRoute[] { + const routeSet = _visited ?? new Set(); + if (routeSet.has(filePath)) return []; + routeSet.add(filePath); + + const listNodes = findUrlpatternsLists(tree.rootNode); + if (listNodes.length === 0) return []; + + const routes: ExtractedRoute[] = []; + const walkStack: WalkFrame[] = []; + + for (const listNode of listNodes) { + walkStack.push({ + node: listNode, + routeCtx: { prefix: null }, + currentFilePath: filePath, + depth: 0, + }); + } + + while (walkStack.length > 0) { + const { node, routeCtx, currentFilePath, depth } = walkStack.pop()!; + + if (node.type === 'list') { + const children = node.children ?? []; + for (let i = children.length - 1; i >= 0; i--) { + const child = children[i]; + if (child.type === '[' || child.type === ']' || child.type === ',') continue; + walkStack.push({ node: child, routeCtx, currentFilePath, depth }); + } + continue; + } + + if (node.type === 'call') { + const funcName = getCallFuncName(node); + + if (!funcName) { + for (const child of node.children ?? []) { + if (child.type === 'call' || child.type === 'list') { + walkStack.push({ node: child, routeCtx, currentFilePath, depth }); + } + } + continue; + } + + if (DJANGO_ROUTE_FUNCTIONS.has(funcName)) { + const argsNode = node.childForFieldName?.('arguments') ?? null; + + let hasIncludeChild = false; + if (argsNode) { + for (const child of argsNode.children ?? []) { + if (child.type === 'call' && getCallFuncName(child) === DJANGO_INCLUDE_FUNCTION) { + hasIncludeChild = true; + const modulePath = getIncludeModulePath(child); + if (modulePath && readFile && _djangoParser && depth < MAX_INCLUDE_DEPTH) { + const resolved = resolveIncludedFile(modulePath, currentFilePath, readFile); + if (resolved && !routeSet.has(resolved.filePath)) { + routeSet.add(resolved.filePath); + let childTree: Parser.Tree; + try { + childTree = parseSourceSafe(_djangoParser, resolved.content); + } catch { + continue; + } + const childLists = findUrlpatternsLists(childTree.rootNode); + for (const childList of childLists) { + const childPrefix = makePrefix(routeCtx.prefix, extractStringArg(argsNode)); + walkStack.push({ + node: childList, + routeCtx: { prefix: childPrefix }, + currentFilePath: resolved.filePath, + depth: depth + 1, + }); + } + } + } + } + } + } + + if (!hasIncludeChild) { + routes.push(emitDjangoRoute(node, currentFilePath, routeCtx)); + } + continue; + } + + if ( + funcName === DJANGO_INCLUDE_FUNCTION && + readFile && + _djangoParser && + depth < MAX_INCLUDE_DEPTH + ) { + const modulePath = getIncludeModulePath(node); + if (modulePath) { + const resolved = resolveIncludedFile(modulePath, currentFilePath, readFile); + if (resolved && !routeSet.has(resolved.filePath)) { + routeSet.add(resolved.filePath); + let childTree: Parser.Tree; + try { + childTree = parseSourceSafe(_djangoParser, resolved.content); + } catch { + continue; + } + const childLists = findUrlpatternsLists(childTree.rootNode); + for (const childList of childLists) { + walkStack.push({ + node: childList, + routeCtx, + currentFilePath: resolved.filePath, + depth: depth + 1, + }); + } + } + } + continue; + } + + for (const child of node.children ?? []) { + if (child.type === 'call' || child.type === 'list') { + walkStack.push({ node: child, routeCtx, currentFilePath, depth }); + } + } + continue; + } + + for (const child of node.children ?? []) { + if (child.type === '(' || child.type === ')' || child.type === ',') continue; + if (child.type === 'call' || child.type === 'list') { + walkStack.push({ node: child, routeCtx, currentFilePath, depth }); + } + } + } + + return routes; +} diff --git a/gitnexus/src/core/ingestion/workers/parse-worker.ts b/gitnexus/src/core/ingestion/workers/parse-worker.ts index 8a7946d327..33ed51e956 100644 --- a/gitnexus/src/core/ingestion/workers/parse-worker.ts +++ b/gitnexus/src/core/ingestion/workers/parse-worker.ts @@ -1,4 +1,6 @@ import { parentPort, threadId } from 'node:worker_threads'; +import fs from 'node:fs'; +import path from 'node:path'; import Parser from 'tree-sitter'; import JavaScript from 'tree-sitter-javascript'; import TypeScript from 'tree-sitter-typescript'; @@ -98,7 +100,7 @@ import { extractTemplateArguments, templateArgumentsIdTag } from '../utils/templ import type { LanguageProvider } from '../language-provider.js'; import type { ParsedFile } from 'gitnexus-shared'; import { extractParsedFile } from '../scope-extractor-bridge.js'; -import { extractLaravelRoutes, type ExtractedRoute } from '../route-extractors/laravel.js'; +import type { ExtractedRoute } from '../route-extractors/laravel.js'; import { logger } from '../../logger.js'; export type { ExtractedRoute } from '../route-extractors/laravel.js'; @@ -1075,6 +1077,15 @@ const processFileGroup = ( result: ParseWorkerResult, onFileProcessed?: () => void, ): void => { + const fileContentMap = new Map(); + for (const f of files) fileContentMap.set(f.path, f.content); + + // Discover root route file if the language provider has discovery logic + const provider = getProvider(language); + const rootRouteFile = provider.discoverRootRouteFile + ? provider.discoverRootRouteFile(files, fileContentMap) + : null; + let query: Parser.Query; try { const lang = parser.getLanguage(); @@ -2144,9 +2155,22 @@ const processFileGroup = ( } // Extract framework routes via provider detection (e.g., Laravel routes.php) - if (provider.isRouteFile?.(file.path)) { - const extractedRoutes = extractLaravelRoutes(tree, file.path); - for (const r of extractedRoutes) result.routes.push(r); + const isRouteFile = provider.isRouteFile?.(file.path) ?? false; + if (isRouteFile && provider.extractRoutes) { + const isRootRoute = rootRouteFile !== null ? file.path === rootRouteFile : isRouteFile; + if (isRootRoute) { + const reader = (relativePath: string) => { + const cached = fileContentMap.get(relativePath); + if (cached != null) return cached; + try { + return fs.readFileSync(path.join(process.cwd(), relativePath), 'utf-8'); + } catch { + return null; + } + }; + const extractedRoutes = provider.extractRoutes(tree, file.path, reader, parser); + for (const r of extractedRoutes) result.routes.push(r); + } } // Extract ORM queries (Prisma, Supabase) diff --git a/gitnexus/test/unit/django-route-extraction.test.ts b/gitnexus/test/unit/django-route-extraction.test.ts new file mode 100644 index 0000000000..eb510db586 --- /dev/null +++ b/gitnexus/test/unit/django-route-extraction.test.ts @@ -0,0 +1,302 @@ +import { describe, expect, it } from 'vitest'; +import Parser from 'tree-sitter'; +import Python from 'tree-sitter-python'; +import { + extractDjangoRoutes, + setDjangoParser, +} from '../../src/core/ingestion/route-extractors/django.js'; + +const parser = new Parser(); +parser.setLanguage(Python); + +// Must be called once before extractDjangoRoutes can resolve includes +setDjangoParser(parser); + +const extract = ( + source: string, + filePath = 'app/urls.py', + readFile?: (path: string) => string | null, +) => + extractDjangoRoutes(parser.parse(source), filePath, readFile).map((route) => ({ + httpMethod: route.httpMethod, + routePath: route.routePath, + routeName: route.routeName, + controllerName: route.controllerName, + prefix: route.prefix, + filePath: route.filePath, + })); + +describe('Django route extraction', () => { + it('extracts path() routes from urlpatterns', () => { + const routes = extract(` +from django.urls import path +from . import views + +urlpatterns = [ + path('orders/', views.order_list), + path('orders//', views.order_detail), + path('users/', views.user_list, name='user-list'), +] +`); + expect(routes).toHaveLength(3); + + expect(routes[0]).toMatchObject({ httpMethod: '*', routePath: 'orders/' }); + expect(routes[1]).toMatchObject({ httpMethod: '*', routePath: 'orders//' }); + expect(routes[2]).toMatchObject({ + httpMethod: '*', + routePath: 'users/', + routeName: 'user-list', + }); + }); + + it('extracts re_path() routes', () => { + const routes = extract(` +from django.urls import re_path +from . import views + +urlpatterns = [ + re_path(r'^articles/(?P[0-9]{4})/$', views.year_archive), +] +`); + expect(routes).toHaveLength(1); + expect(routes[0]).toMatchObject({ + httpMethod: '*', + routePath: '^articles/(?P[0-9]{4})/$', + }); + }); + + it('extracts legacy url() routes', () => { + const routes = extract(` +from django.conf.urls import url +from . import views + +urlpatterns = [ + url(r'^legacy/$', views.legacy_view), +] +`); + expect(routes).toHaveLength(1); + expect(routes[0]).toMatchObject({ httpMethod: '*', routePath: '^legacy/$' }); + }); + + it('handles str concatenation in path strings', () => { + const routes = extract(` +from django.urls import path +from . import views + +urlpatterns = [ + path('api/' + 'v1/users/', views.user_list), +] +`); + // Binary operator concatenation should produce the full path + expect(routes).toHaveLength(1); + if (routes.length > 0) { + expect(routes[0].routePath).toContain('api/'); + expect(routes[0].routePath).toContain('v1/users/'); + } + }); + + it('extracts from augmented assignment (urlpatterns += ...)', () => { + const routes = extract(` +from django.urls import path +from . import views + +urlpatterns = [ + path('base/', views.base), +] +urlpatterns += [ + path('extra/', views.extra), +] +`); + // Should find at least 'extra/' from the augmented assignment + expect(routes.some((r) => r.routePath === 'extra/')).toBe(true); + }); + + it('resolves include() to child url files via readFile', () => { + const childContent = ` +from django.urls import path +from . import views + +urlpatterns = [ + path('list/', views.item_list), + path('/', views.item_detail), +] +`; + const readFile = (path: string) => { + if (path === 'items/urls.py' || path === 'app/items/urls.py') return childContent; + return null; + }; + + const routes = extract( + ` +from django.urls import path, include +from . import views + +urlpatterns = [ + path('api/', include('items.urls')), + path('health/', views.health), +] +`, + 'app/urls.py', + readFile, + ); + + // Should have: health/ and two routes from items/urls.py with prefix 'api/' + const healthRoute = routes.find((r) => r.routePath === 'health/'); + expect(healthRoute).toBeDefined(); + expect(healthRoute?.filePath).toBe('app/urls.py'); + + const prefixedRoutes = routes.filter((r) => r.prefix === 'api/'); + expect(prefixedRoutes).toHaveLength(2); + expect(prefixedRoutes.some((r) => r.routePath === 'list/')).toBe(true); + expect(prefixedRoutes.some((r) => r.routePath === '/')).toBe(true); + expect(prefixedRoutes.every((r) => r.filePath === 'items/urls.py')).toBe(true); + }); + + it('resolves nested includes with accumulated prefixes', () => { + const childContent = ` +from django.urls import path, include +from . import views + +urlpatterns = [ + path('v1/', include('v1.urls')), + path('v2/', include('v2.urls')), +] +`; + const grandchildContent = ` +from django.urls import path +from . import views + +urlpatterns = [ + path('users/', views.user_list), +] +`; + const readFile = (path: string) => { + if (path === 'app/api/urls.py') return childContent; + if (path === 'v1/urls.py' || path === 'app/v1/urls.py') return grandchildContent; + if (path === 'v2/urls.py' || path === 'app/v2/urls.py') return grandchildContent; + return null; + }; + + const routes = extract( + ` +from django.urls import path, include + +urlpatterns = [ + path('api/', include('app.api.urls')), +] +`, + 'root/urls.py', + readFile, + ); + + // Should have deeply prefixed routes: api/v1/users/ and api/v2/users/ + const prefixedRoutes = routes.filter((r) => r.prefix != null); + const hasApiV1Users = prefixedRoutes.some( + (r) => r.prefix === 'api/v1/' && r.routePath === 'users/', + ); + const hasApiV2Users = prefixedRoutes.some( + (r) => r.prefix === 'api/v2/' && r.routePath === 'users/', + ); + expect(hasApiV1Users).toBe(true); + expect(hasApiV2Users).toBe(true); + // Included routes should report their actual source file + const v1Routes = routes.filter((r) => r.prefix === 'api/v1/'); + expect(v1Routes.every((r) => r.filePath === 'v1/urls.py')).toBe(true); + const v2Routes = routes.filter((r) => r.prefix === 'api/v2/'); + expect(v2Routes.every((r) => r.filePath === 'v2/urls.py')).toBe(true); + }); + + it('resolves views with attribute-style references (views.function)', () => { + const routes = extract(` +from django.urls import path +from . import views + +urlpatterns = [ + path('dashboard/', views.DashboardView.as_view()), + path('report/', views.report_view), +] +`); + expect(routes).toHaveLength(2); + expect(routes[0]).toMatchObject({ + httpMethod: '*', + routePath: 'dashboard/', + controllerName: 'views.DashboardView.as_view()', + }); + expect(routes[1]).toMatchObject({ + httpMethod: '*', + routePath: 'report/', + controllerName: 'views.report_view', + }); + }); + + it('infers HTTP method from view name suffix', () => { + const routes = extract(` +from django.urls import path + +urlpatterns = [ + path('users/', views.get_user), + path('users/', views.post_user), + path('users/', views.put_user), + path('users/', views.patch_user), + path('users/', views.delete_user), +] +`); + const methods = routes.map((r) => r.httpMethod); + expect(methods).toEqual(['GET', 'POST', 'PUT', 'PATCH', 'DELETE']); + }); + + it('handles include with tuple namespace', () => { + const childContent = ` +from django.urls import path + +urlpatterns = [ + path('profile/', views.profile), +] +`; + const readFile = (path: string) => { + if (path === 'account/urls.py' || path === 'app/account/urls.py') return childContent; + return null; + }; + + const routes = extract( + ` +from django.urls import path, include + +urlpatterns = [ + path('account/', include(('account.urls', 'app_name'), namespace='account')), +] +`, + 'app/urls.py', + readFile, + ); + + const prefixedRoutes = routes.filter((r) => r.prefix === 'account/'); + expect(prefixedRoutes.length).toBeGreaterThanOrEqual(1); + expect(prefixedRoutes.some((r) => r.filePath === 'account/urls.py')).toBe(true); + }); + + it('does not crash on empty urlpatterns', () => { + const routes = extract(` +from django.urls import path + +urlpatterns = [] +`); + expect(routes).toHaveLength(0); + }); + + it('skips non-urlpatterns assignments', () => { + const routes = extract(` +from django.urls import path + +OTHER_LIST = [ + path('not-a-route/', something), +] + +urlpatterns = [ + path('real/', views.real), +] +`); + expect(routes).toHaveLength(1); + expect(routes[0].routePath).toBe('real/'); + }); +});