diff --git a/gitnexus/src/core/ingestion/languages/cobol.ts b/gitnexus/src/core/ingestion/languages/cobol.ts index 6e54c2309c..a28f87a8f6 100644 --- a/gitnexus/src/core/ingestion/languages/cobol.ts +++ b/gitnexus/src/core/ingestion/languages/cobol.ts @@ -6,11 +6,18 @@ * processed by cobol-processor.ts in pipeline Phase 2.6, not by the * tree-sitter pipeline. * - * This provider exists to satisfy the SupportedLanguages exhaustiveness - * checks and to declare parseStrategy: 'standalone'. + * This provider supports scope-based resolution (RFC #909 Ring 3) via + * `emitScopeCaptures` which wraps the regex tagger. COPY statements are + * interpreted as imports; there is no type system and no implicit receiver. */ import { SupportedLanguages } from 'gitnexus-shared'; import { defineLanguage } from '../language-provider.js'; +import { + emitCobolScopeCaptures, + interpretCobolImport, + cobolImportOwningScope, + cobolReceiverBinding, +} from './cobol/index.js'; export const cobolProvider = defineLanguage({ id: SupportedLanguages.Cobol, @@ -26,4 +33,10 @@ export const cobolProvider = defineLanguage({ }, exportChecker: () => false, importResolver: () => null, + + // ── Scope-resolution hooks ─────────────────────────────────────── + emitScopeCaptures: emitCobolScopeCaptures, + interpretImport: interpretCobolImport, + importOwningScope: cobolImportOwningScope, + receiverBinding: cobolReceiverBinding, }); diff --git a/gitnexus/src/core/ingestion/languages/cobol/captures.ts b/gitnexus/src/core/ingestion/languages/cobol/captures.ts new file mode 100644 index 0000000000..69a6c80b9a --- /dev/null +++ b/gitnexus/src/core/ingestion/languages/cobol/captures.ts @@ -0,0 +1,291 @@ +/** + * `emitScopeCaptures` for COBOL. + * + * Wraps the existing regex tagger (`extractCobolSymbolsWithRegex`) and + * produces parser-agnostic `CaptureMatch[]` matching the RFC §5.1 + * vocabulary. The central `ScopeExtractor` consumes these captures + * without knowing whether they came from tree-sitter or regex. + * + * Pure given the input source text. No I/O, no globals consulted. + * The regex tagger is synchronous — no async needed. + */ + +import type { Capture, CaptureMatch, Range } from 'gitnexus-shared'; +import { + extractCobolSymbolsWithRegex, + preprocessCobolSource, +} from '../../cobol/cobol-preprocessor.js'; + +// --------------------------------------------------------------------------- +// Capture building helpers +// --------------------------------------------------------------------------- + +function capture(name: string, range: Range, text: string): Capture { + return { name, range, text }; +} + +function rangeOf(startLine: number, startCol: number, endLine: number, endCol: number): Range { + return { startLine, startCol, endLine, endCol }; +} + +/** + * Build a single CaptureMatch from a record of captures. + * Returns null if the record is empty. + */ +function matchFrom(grouped: Record): CaptureMatch | null { + if (Object.keys(grouped).length === 0) return null; + return Object.freeze(grouped) as CaptureMatch; +} + +/** + * Compute end column for a single-line capture from the source lines array. + */ +function endColFrom(line: string): number { + return line.length > 0 ? line.length - 1 : 0; +} + +// --------------------------------------------------------------------------- +// Main entry point +// --------------------------------------------------------------------------- + +export function emitCobolScopeCaptures( + sourceText: string, + _filePath: string, + _cachedTree?: unknown, +): readonly CaptureMatch[] { + const lines = sourceText.split(/\r?\n/); + // Preprocess: strip patch markers from columns 1-6 + const cleaned = preprocessCobolSource(sourceText); + // Run the regex tagger on the preprocessed source + const extracted = extractCobolSymbolsWithRegex(cleaned, _filePath); + + const out: CaptureMatch[] = []; + + // ── 1. PROGRAM-ID → @scope.module ─────────────────────────────────── + // The primary program name (first PROGRAM-ID encountered) + if (extracted.programName) { + const name = extracted.programName; + const lastLine = lines.length; + + const progDef = extracted.programs.find((p) => p.name.toUpperCase() === name.toUpperCase()); + const startLine = progDef?.startLine ?? 1; + const endLine = progDef?.endLine ?? lastLine; + const startCol = 0; + const endCol = endColFrom(lines[Math.min(endLine, lines.length) - 1] ?? ''); + + const progIdLine = findProgramIdLine(cleaned, name); + const nameRange = + progIdLine !== -1 + ? rangeOf(progIdLine, 7, progIdLine, lines[progIdLine - 1]?.length ?? endCol) + : rangeOf(startLine, startCol, endLine, endCol); + + const grouped: Record = { + '@scope.module': capture('@scope.module', nameRange, name), + '@declaration.program': capture( + '@declaration.program', + rangeOf(startLine, startCol, endLine, endCol), + name, + ), + '@declaration.name': capture('@declaration.name', nameRange, name), + }; + + if (progDef?.procedureUsing && progDef.procedureUsing.length > 0) { + grouped['@declaration.parameter-count'] = capture( + '@declaration.parameter-count', + nameRange, + String(progDef.procedureUsing.length), + ); + } + + const m = matchFrom(grouped); + if (m !== null) out.push(m); + } + + // ── 2. Nested / additional programs → @scope.module ────────────── + for (const prog of extracted.programs) { + if (extracted.programName && prog.name.toUpperCase() === extracted.programName.toUpperCase()) + continue; + + const startLine = prog.startLine; + const endLine = prog.endLine; + const startCol = 0; + const endCol = endColFrom(lines[Math.min(endLine, lines.length) - 1] ?? ''); + + const progIdLine = findProgramIdLine(cleaned, prog.name); + const nameRange = + progIdLine !== -1 + ? rangeOf(progIdLine, 7, progIdLine, lines[progIdLine - 1]?.length ?? endCol) + : rangeOf(startLine, startCol, endLine, endCol); + + const grouped: Record = { + '@scope.module': capture('@scope.module', nameRange, prog.name), + '@declaration.program': capture( + '@declaration.program', + rangeOf(startLine, startCol, endLine, endCol), + prog.name, + ), + '@declaration.name': capture('@declaration.name', nameRange, prog.name), + }; + + if (prog.procedureUsing && prog.procedureUsing.length > 0) { + grouped['@declaration.parameter-count'] = capture( + '@declaration.parameter-count', + nameRange, + String(prog.procedureUsing.length), + ); + } + + const m = matchFrom(grouped); + if (m !== null) out.push(m); + } + + // ── 3. PROCEDURE DIVISION sections → @scope.function ───────────── + for (const section of extracted.sections) { + const lineIdx = section.line - 1; + if (lineIdx < 0 || lineIdx >= lines.length) continue; + + const sectionLine = lines[lineIdx]; + const startCol = 0; + const endCol = endColFrom(sectionLine); + const nameRange = rangeOf(section.line, startCol, section.line, endCol); + + const grouped: Record = { + '@scope.function': capture('@scope.function', nameRange, section.name), + '@declaration.function': capture('@declaration.function', nameRange, section.name), + '@declaration.name': capture('@declaration.name', nameRange, section.name), + }; + + const m = matchFrom(grouped); + if (m !== null) out.push(m); + } + + // ── 4. Paragraphs → @scope.function ────────────────────────────── + for (const para of extracted.paragraphs) { + const lineIdx = para.line - 1; + if (lineIdx < 0 || lineIdx >= lines.length) continue; + + const paraLine = lines[lineIdx]; + const startCol = 0; + const endCol = endColFrom(paraLine); + const nameRange = rangeOf(para.line, startCol, para.line, endCol); + + const grouped: Record = { + '@scope.function': capture('@scope.function', nameRange, para.name), + '@declaration.function': capture('@declaration.function', nameRange, para.name), + '@declaration.name': capture('@declaration.name', nameRange, para.name), + }; + + const m = matchFrom(grouped); + if (m !== null) out.push(m); + } + + // ── 5. COPY → @import.statement ────────────────────────────────── + for (const copy of extracted.copies) { + const lineIdx = copy.line - 1; + if (lineIdx < 0 || lineIdx >= lines.length) continue; + + const copyLine = lines[lineIdx]; + const startCol = 0; + const endCol = endColFrom(copyLine); + const stmtRange = rangeOf(copy.line, startCol, copy.line, endCol); + + const grouped: Record = { + '@import.statement': capture('@import.statement', stmtRange, copy.target), + '@import.name': capture('@import.name', stmtRange, copy.target), + }; + + const m = matchFrom(grouped); + if (m !== null) out.push(m); + } + + // ── 6. CALL (quoted/referenced) → @reference.call ──────────────── + for (const call of extracted.calls) { + const lineIdx = call.line - 1; + if (lineIdx < 0 || lineIdx >= lines.length) continue; + + const callLine = lines[lineIdx]; + const startCol = 0; + const endCol = endColFrom(callLine); + const stmtRange = rangeOf(call.line, startCol, call.line, endCol); + + const grouped: Record = { + '@reference.call': capture('@reference.call', stmtRange, call.target), + '@reference.name': capture('@reference.name', stmtRange, call.target), + }; + + // Arity from CALL USING parameters + if (call.parameters && call.parameters.length > 0) { + grouped['@reference.arity'] = capture( + '@reference.arity', + stmtRange, + String(call.parameters.length), + ); + } + + const m = matchFrom(grouped); + if (m !== null) out.push(m); + } + + // ── 7. PERFORM → @reference.call ───────────────────────────────── + for (const perf of extracted.performs) { + const lineIdx = perf.line - 1; + if (lineIdx < 0 || lineIdx >= lines.length) continue; + + const perfLine = lines[lineIdx]; + const startCol = 0; + const endCol = endColFrom(perfLine); + const stmtRange = rangeOf(perf.line, startCol, perf.line, endCol); + + const grouped: Record = { + '@reference.call': capture('@reference.call', stmtRange, perf.target), + '@reference.name': capture('@reference.name', stmtRange, perf.target), + }; + + const m = matchFrom(grouped); + if (m !== null) out.push(m); + } + + // ── 8. GO TO → @reference.call ─────────────────────────────────── + for (const gt of extracted.gotos) { + const lineIdx = gt.line - 1; + if (lineIdx < 0 || lineIdx >= lines.length) continue; + + const gtLine = lines[lineIdx]; + const startCol = 0; + const endCol = endColFrom(gtLine); + const stmtRange = rangeOf(gt.line, startCol, gt.line, endCol); + + const grouped: Record = { + '@reference.call': capture('@reference.call', stmtRange, gt.target), + '@reference.name': capture('@reference.name', stmtRange, gt.target), + }; + + const m = matchFrom(grouped); + if (m !== null) out.push(m); + } + + return out; +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Find the PROGRAM-ID. line for a given program name in the cleaned source. + * Returns 1-based line number, or -1 if not found. + */ +function findProgramIdLine(cleanedSource: string, programName: string): number { + const lines = cleanedSource.split(/\r?\n/); + const upper = programName.toUpperCase(); + const re = new RegExp(`\\bPROGRAM-ID\\.\\s*${escapeRegex(upper)}\\b`, 'i'); + for (let i = 0; i < lines.length; i++) { + if (re.test(lines[i])) return i + 1; // 1-based + } + return -1; +} + +/** Simple regex escape for special chars. */ +function escapeRegex(s: string): string { + return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} diff --git a/gitnexus/src/core/ingestion/languages/cobol/index.ts b/gitnexus/src/core/ingestion/languages/cobol/index.ts new file mode 100644 index 0000000000..24cb804738 --- /dev/null +++ b/gitnexus/src/core/ingestion/languages/cobol/index.ts @@ -0,0 +1,20 @@ +/** + * COBOL scope-resolution public API barrel. + * + * Consumers should import from this file rather than the individual + * modules — that keeps the per-hook organization an implementation + * detail we can refactor without touching the provider wiring. + * + * Module layout: + * + * - `captures.ts` — `emitCobolScopeCaptures` (wraps the regex tagger) + * - `interpret.ts` — import/type-binding/receiver hooks + */ + +export { emitCobolScopeCaptures } from './captures.js'; +export { + interpretCobolImport, + interpretCobolTypeBinding, + cobolImportOwningScope, + cobolReceiverBinding, +} from './interpret.js'; diff --git a/gitnexus/src/core/ingestion/languages/cobol/interpret.ts b/gitnexus/src/core/ingestion/languages/cobol/interpret.ts new file mode 100644 index 0000000000..62959069c2 --- /dev/null +++ b/gitnexus/src/core/ingestion/languages/cobol/interpret.ts @@ -0,0 +1,99 @@ +/** + * COBOL scope-resolution interpret hooks. + * + * Interprets raw `@import.statement` capture matches (from COPY statements) + * into `ParsedImport` for the central finalize algorithm. + * + * COBOL's import semantic is simple: `COPY bookname` means the copybook's + * content is inlined at compile time. There is no module-system equivalent + * of `export` — everything is text-inclusion. The scope-resolution pipeline + * models this as a `'named'` import where the imported name is the copybook + * name and the target is the copybook file path. + */ + +import type { + CaptureMatch, + ParsedImport, + ParsedTypeBinding, + ScopeId, + ScopeTree, + Scope, + TypeRef, +} from 'gitnexus-shared'; + +// ─── interpretImport ────────────────────────────────────────────────────── + +/** + * Interpret a COPY statement as a `ParsedImport`. + * + * The `@import.name` capture contains the copybook target name (e.g., + * `CPSESP` from `COPY CPSESP.`). Returns a `'named'` import with the + * copybook name as both `localName` and `importedName`. + * + * Returns `null` for any match that doesn't carry an `@import.name` (e.g., + * malformed COPY statements the regex tagger might emit). + */ +export function interpretCobolImport(match: CaptureMatch): ParsedImport | null { + const nameCap = match['@import.name']; + if (nameCap === undefined) return null; + + const name = nameCap.text; + if (name === '') return null; + + return { + kind: 'named', + localName: name, + importedName: name, + targetRaw: name, + }; +} + +// ─── interpretTypeBinding ───────────────────────────────────────────────── + +/** + * COBOL has no type system — no type bindings to interpret. + * Always returns `null`. + */ +export function interpretCobolTypeBinding(_match: CaptureMatch): ParsedTypeBinding | null { + return null; +} + +// ─── importOwningScope ──────────────────────────────────────────────────── + +/** + * COPY statements in COBOL are module-level — they expand inline at + * compile time and their bindings belong to the enclosing PROGRAM-ID + * (Module) scope. Walk up from the innermost scope through ancestors + * to find the enclosing Module scope. + * + * For the edge case of a COPY inside a paragraph (unusual but possible + * with some vendors), we walk the scope tree to ensure the import is + * attached to the program scope, not the paragraph Function scope. + */ +export function cobolImportOwningScope( + _imp: ParsedImport, + innermost: Scope, + tree: ScopeTree, +): ScopeId | null { + // If already in a Module scope, use it directly. + if (innermost.kind === 'Module') return innermost.id; + // Walk through ancestors to find the enclosing Module. + const ancestors = tree.getAncestors(innermost.id); + for (const ancId of ancestors) { + const anc = tree.getScope(ancId); + if (anc !== undefined && anc.kind === 'Module') return ancId; + } + // Fallback: delegate to central default. + return null; +} + +// ─── receiverBinding ────────────────────────────────────────────────────── + +/** + * COBOL has no implicit receiver (no `self`, `this`, or equivalent). + * All function calls are explicit CALL statements or PERFORM/GO TO + * control flow. Always returns `null`. + */ +export function cobolReceiverBinding(_functionScope: Scope): TypeRef | null { + return null; +} diff --git a/gitnexus/src/core/ingestion/languages/cobol/scope-resolver.ts b/gitnexus/src/core/ingestion/languages/cobol/scope-resolver.ts new file mode 100644 index 0000000000..799cc518ee --- /dev/null +++ b/gitnexus/src/core/ingestion/languages/cobol/scope-resolver.ts @@ -0,0 +1,75 @@ +/** + * COBOL `ScopeResolver` registered in `SCOPE_RESOLVERS` and consumed + * by the generic `runScopeResolution` orchestrator. + * + * The provider is a thin wiring object — COBOL's simple scope model + * (Module + Function only, no inheritance, no type system) plugs into + * `runScopeResolution` with minimal configuration. + * + * Reference: `languages/python/scope-resolver.ts`. + */ + +import path from 'node:path'; +import type { ParsedFile } from 'gitnexus-shared'; +import { SupportedLanguages } from 'gitnexus-shared'; +import { populateClassOwnedMembers } from '../../scope-resolution/scope/walkers.js'; +import type { ScopeResolver } from '../../scope-resolution/contract/scope-resolver.js'; +import { cobolProvider } from '../cobol.js'; + +// Copybook file extensions for COPY name resolution +const COPYBOOK_EXTENSIONS = new Set(['.cpy', '.copybook']); + +const cobolScopeResolver: ScopeResolver = { + language: SupportedLanguages.Cobol, + languageProvider: cobolProvider, + importEdgeReason: 'cobol-scope: copy', + + // ── Resolve COPY bookname to file path ───────────────────────────── + resolveImportTarget: (targetRaw, _fromFile, allFilePaths) => { + const upper = targetRaw.toUpperCase(); + // Check copybook files first + for (const fp of allFilePaths) { + const ext = path.extname(fp).toLowerCase(); + if (!COPYBOOK_EXTENSIONS.has(ext)) continue; + const basename = path.basename(fp, ext).toUpperCase(); + if (basename === upper) return fp; + } + // Also search COBOL source files (.cbl, .cob, .cobol) + const COBOL_SOURCE_EXTS = new Set(['.cbl', '.cob', '.cobol']); + for (const fp of allFilePaths) { + const ext = path.extname(fp).toLowerCase(); + if (!COBOL_SOURCE_EXTS.has(ext)) continue; + const basename = path.basename(fp, ext).toUpperCase(); + if (basename === upper) return fp; + } + return null; + }, + + // COBOL has no binding-merge rules beyond the default (local-first-then-imports). + mergeBindings: (existing) => [...existing], + + // COBOL arity: compare CALL USING param count against def's parameterCount. + // COBOL requires exact arity match for CALL USING. + arityCompatibility: (callsite, def) => { + if (callsite.arity === undefined) return 'unknown'; + const defParamCount = def.parameterCount; + if (defParamCount === undefined) return 'unknown'; + if (callsite.arity === defParamCount) return 'compatible'; + return 'incompatible'; + }, + + // No inheritance in COBOL — empty MRO map. + buildMro: () => new Map(), + + // Everything lives under the PROGRAM-ID Module scope. + populateOwners: (parsed: ParsedFile) => populateClassOwnedMembers(parsed), + + // COBOL has no super calls. + isSuperReceiver: () => false, + + // ── Optional toggles ───────────────────────────────────────────── + fieldFallbackOnMethodLookup: false, + propagatesReturnTypesAcrossImports: false, +}; + +export { cobolScopeResolver }; diff --git a/gitnexus/src/core/ingestion/registry-primary-flag.ts b/gitnexus/src/core/ingestion/registry-primary-flag.ts index c31c54a172..e552a4600d 100644 --- a/gitnexus/src/core/ingestion/registry-primary-flag.ts +++ b/gitnexus/src/core/ingestion/registry-primary-flag.ts @@ -11,8 +11,9 @@ * ## Contract * * - Env-var name per language: `REGISTRY_PRIMARY_`. - * Example: `SupportedLanguages.Python` → `REGISTRY_PRIMARY_PYTHON`; - * `SupportedLanguages.CPlusPlus` (value `'cpp'`) → `REGISTRY_PRIMARY_CPP`. + * Example: `SupportedLanguages.Python` → `REGISTRY_PRIMARY_PYTHON`; + * `SupportedLanguages.CPlusPlus` (value `'cpp'`) → `REGISTRY_PRIMARY_CPP`. + * `SupportedLanguages.Cobol` (value `'cobol'`) → `REGISTRY_PRIMARY_COBOL`. * - Truthy values: `'true'`, `'1'`, `'yes'` (case-insensitive, * whitespace-trimmed). Anything else — including `undefined`, empty * string, or unknown tokens — is `false`. diff --git a/gitnexus/src/core/ingestion/scope-resolution/pipeline/registry.ts b/gitnexus/src/core/ingestion/scope-resolution/pipeline/registry.ts index b7c1dc5c69..2a70dd1f6b 100644 --- a/gitnexus/src/core/ingestion/scope-resolution/pipeline/registry.ts +++ b/gitnexus/src/core/ingestion/scope-resolution/pipeline/registry.ts @@ -23,6 +23,7 @@ import { rustScopeResolver } from '../../languages/rust/scope-resolver.js'; import { javascriptScopeResolver } from '../../languages/javascript/scope-resolver.js'; import { kotlinScopeResolver } from '../../languages/kotlin/scope-resolver.js'; import { rubyScopeResolver } from '../../languages/ruby/scope-resolver.js'; +import { cobolScopeResolver } from '../../languages/cobol/scope-resolver.js'; /** Map of `SupportedLanguages` → `ScopeResolver`. The phase iterates * this map intersected with `MIGRATED_LANGUAGES` (the per-language @@ -44,4 +45,5 @@ export const SCOPE_RESOLVERS: ReadonlyMap = n [SupportedLanguages.JavaScript, javascriptScopeResolver], [SupportedLanguages.Kotlin, kotlinScopeResolver], [SupportedLanguages.Ruby, rubyScopeResolver], + [SupportedLanguages.Cobol, cobolScopeResolver], ]); diff --git a/gitnexus/test/fixtures/cobol/AUDITCONST.cpy b/gitnexus/test/fixtures/cobol/AUDITCONST.cpy new file mode 100644 index 0000000000..7549829f20 --- /dev/null +++ b/gitnexus/test/fixtures/cobol/AUDITCONST.cpy @@ -0,0 +1,2 @@ + 01 WS-CONSTANT-1 PIC X(10) VALUE 'AUDIT'. + 01 WS-CONSTANT-2 PIC X(10) VALUE 'LOG'. diff --git a/gitnexus/test/fixtures/cobol/AUDITLOG.cbl b/gitnexus/test/fixtures/cobol/AUDITLOG.cbl new file mode 100644 index 0000000000..3a294ea3cd --- /dev/null +++ b/gitnexus/test/fixtures/cobol/AUDITLOG.cbl @@ -0,0 +1,77 @@ + IDENTIFICATION DIVISION. + PROGRAM-ID. AUDITLOG. + + DATA DIVISION. + WORKING-STORAGE SECTION. + 01 WS-LOG-MESSAGE PIC X(80). + 01 WS-TIMESTAMP PIC X(26). + 01 WS-IDX PIC 9(4). + 01 WS-EOF-FLAG PIC 9 VALUE 0. + 01 WS-PARAM-A PIC X(10). + 01 WS-PARAM-B PIC X(10). + 01 WS-PARAM-C PIC X(10). + 01 WS-INDEX PIC 9(2). + 01 WS-FLAG PIC 9. + COPY AUDITCONST. + COPY AUDITVARS. + + LINKAGE SECTION. + 01 LS-CUST-ID PIC 9(8). + 01 LS-AMOUNT PIC 9(7)V99. + + PROCEDURE DIVISION USING LS-CUST-ID LS-AMOUNT. + MAIN-PARAGRAPH. + PERFORM WRITE-LOG + PERFORM VARYING-TEST + PERFORM UNTIL-TEST + GOBACK. + + WRITE-LOG. + STRING 'Customer ' LS-CUST-ID ' amount ' LS-AMOUNT + DELIMITED BY SIZE INTO WS-LOG-MESSAGE + DISPLAY WS-LOG-MESSAGE. + + * PERFORM VARYING I FROM 1 BY 1 UNTIL I > 10 + VARYING-TEST. + PERFORM VARYING WS-IDX FROM 1 BY 1 + UNTIL WS-IDX > 10 + DISPLAY 'COUNT ' WS-IDX + END-PERFORM. + + * PERFORM UNTIL EOF-FLAG = 1 + UNTIL-TEST. + PERFORM UNTIL WS-EOF-FLAG = 1 + DISPLAY 'LOOPING' + END-PERFORM. + + * CALL with OMITTED (3 args: WS-PARAM-A, OMITTED, WS-PARAM-C) + CALL-OMITTED-TEST. + CALL 'PROCESS' USING WS-PARAM-A OMITTED WS-PARAM-C. + + * Nested IF with CALL inside + NESTED-IF-CALL. + IF WS-FLAG = 1 + IF WS-INDEX > 5 + CALL 'DEEPPROC' + ELSE + CALL 'SHALLOW' + END-IF + END-IF. + + * GO TO DEPENDING ON with 3 targets + GOTO-DEPENDING. + GO TO PARA-ONE PARA-TWO PARA-THREE + DEPENDING ON WS-INDEX. + + PARA-ONE. + DISPLAY 'ONE'. + + PARA-TWO. + DISPLAY 'TWO'. + + PARA-THREE. + DISPLAY 'THREE'. + + ENTRY "AUDITLOG-BATCH" USING LS-CUST-ID. + DISPLAY 'Batch audit for ' LS-CUST-ID + GOBACK. diff --git a/gitnexus/test/fixtures/cobol/AUDITVARS.cpy b/gitnexus/test/fixtures/cobol/AUDITVARS.cpy new file mode 100644 index 0000000000..781bd7566d --- /dev/null +++ b/gitnexus/test/fixtures/cobol/AUDITVARS.cpy @@ -0,0 +1,2 @@ + 01 WS-VAR-1 PIC X(10). + 01 WS-VAR-2 PIC X(10). diff --git a/gitnexus/test/fixtures/cobol/COPYLIB.cpy b/gitnexus/test/fixtures/cobol/COPYLIB.cpy new file mode 100644 index 0000000000..e78840d896 --- /dev/null +++ b/gitnexus/test/fixtures/cobol/COPYLIB.cpy @@ -0,0 +1,3 @@ + 01 PREFIX-RECORD. + 05 PREFIX-CODE PIC X(10). + 05 PREFIX-NAME PIC X(30). diff --git a/gitnexus/test/fixtures/cobol/CUSTDAT.cpy b/gitnexus/test/fixtures/cobol/CUSTDAT.cpy new file mode 100644 index 0000000000..52428837ae --- /dev/null +++ b/gitnexus/test/fixtures/cobol/CUSTDAT.cpy @@ -0,0 +1,6 @@ + 01 WS-CUSTOMER-DATA. + 05 WS-CUST-CODE PIC X(10). + 05 WS-CUST-TYPE PIC X(3). + 88 PREMIUM-CUSTOMER VALUE 'PRM'. + 88 REGULAR-CUSTOMER VALUE 'REG'. + 05 WS-CUST-ADDR PIC X(50). diff --git a/gitnexus/test/fixtures/cobol/CUSTUPDT.cbl b/gitnexus/test/fixtures/cobol/CUSTUPDT.cbl new file mode 100644 index 0000000000..978e289d13 --- /dev/null +++ b/gitnexus/test/fixtures/cobol/CUSTUPDT.cbl @@ -0,0 +1,74 @@ + IDENTIFICATION DIVISION. + PROGRAM-ID. CUSTUPDT. + AUTHOR. TEST. + + ENVIRONMENT DIVISION. + INPUT-OUTPUT SECTION. + FILE-CONTROL. + SELECT CUSTOMER-FILE ASSIGN TO 'CUSTFILE' + ORGANIZATION IS INDEXED + ACCESS IS DYNAMIC + RECORD KEY IS CUST-ID + FILE STATUS IS WS-FILE-STATUS. + + DATA DIVISION. + FILE SECTION. + FD CUSTOMER-FILE. + 01 CUSTOMER-RECORD. + 05 CUST-ID PIC 9(8). + 05 CUST-NAME PIC X(30). + 05 CUST-BALANCE PIC 9(7)V99. + + WORKING-STORAGE SECTION. + 01 WS-FILE-STATUS PIC XX. + 01 WS-CUSTOMER-NAME PIC X(30). + 01 WS-AMOUNT PIC 9(7)V99. + 01 WS-EOF PIC 9 VALUE 0. + 88 END-OF-FILE VALUE 1. + 01 WS-AMT PIC 9(5)V99. + 01 WS-PROG-NAME PIC X(8). + 01 FIELD-A PIC 9(5)V99. + 01 FIELD-B PIC 9(5)V99. + COPY COPYLIB REPLACING ==PREFIX-== BY ==WS-==. + + LINKAGE SECTION. + 01 LS-PARAM PIC X(20). + + PROCEDURE DIVISION. + INIT-SECTION SECTION. + MAIN-PARAGRAPH. + PERFORM INIT-PARAGRAPH + PERFORM PROCESS-PARAGRAPH + PERFORM CLEANUP-PARAGRAPH + STOP RUN. + + INIT-PARAGRAPH. + OPEN I-O CUSTOMER-FILE + MOVE SPACES TO WS-CUSTOMER-NAME. + + PROCESSING-SECTION SECTION. + PROCESS-PARAGRAPH. + PERFORM READ-CUSTOMER THRU WRITE-CUSTOMER + CALL "AUDITLOG" USING CUST-ID WS-AMOUNT + CALL WS-PROG-NAME. + + READ-CUSTOMER. + READ CUSTOMER-FILE + NOT AT END + MOVE CUST-NAME TO WS-CUSTOMER-NAME + END-READ. + + UPDATE-BALANCE. + ADD WS-AMOUNT TO CUST-BALANCE + MOVE WS-AMOUNT TO CUST-BALANCE + MOVE WS-AMT TO FIELD-A FIELD-B. + + WRITE-CUSTOMER. + REWRITE CUSTOMER-RECORD. + + CLEANUP-PARAGRAPH. + CLOSE CUSTOMER-FILE. + + ENTRY 'ALTENTRY' USING LS-PARAM. + DISPLAY 'ALTERNATE ENTRY POINT' + GOBACK. diff --git a/gitnexus/test/fixtures/cobol/NESTED.cbl b/gitnexus/test/fixtures/cobol/NESTED.cbl new file mode 100644 index 0000000000..60af79ef7c --- /dev/null +++ b/gitnexus/test/fixtures/cobol/NESTED.cbl @@ -0,0 +1,33 @@ + IDENTIFICATION DIVISION. + PROGRAM-ID. OUTER-PROG. + + DATA DIVISION. + WORKING-STORAGE SECTION. + 01 WS-OUTER-FLAG PIC 9 VALUE 0. + + PROCEDURE DIVISION. + OUTER-MAIN. + PERFORM OUTER-PROCESS + CALL "INNER-PROG" + STOP RUN. + + OUTER-PROCESS. + DISPLAY 'OUTER PROCESSING'. + + IDENTIFICATION DIVISION. + PROGRAM-ID. INNER-PROG. + + DATA DIVISION. + WORKING-STORAGE SECTION. + 01 WS-INNER-CODE PIC X(5). + + PROCEDURE DIVISION. + INNER-MAIN. + PERFORM INNER-PROCESS + GOBACK. + + INNER-PROCESS. + DISPLAY 'INNER PROCESSING'. + + END PROGRAM INNER-PROG. + END PROGRAM OUTER-PROG. diff --git a/gitnexus/test/fixtures/cobol/RPTGEN.cbl b/gitnexus/test/fixtures/cobol/RPTGEN.cbl new file mode 100644 index 0000000000..3ef2c137f2 --- /dev/null +++ b/gitnexus/test/fixtures/cobol/RPTGEN.cbl @@ -0,0 +1,94 @@ + IDENTIFICATION DIVISION. + PROGRAM-ID. RPTGEN. + + DATA DIVISION. + WORKING-STORAGE SECTION. + COPY CUSTDAT. + 01 WS-REPORT-LINE PIC X(132). + 01 WS-SQL-CODE PIC S9(9) COMP. + 01 WS-COUNT PIC 9(4). + 01 WS-MAP-NAME PIC X(8). + 01 WS-SORT-FILE PIC X(8). + 01 WS-QUEUE-NAME PIC X(16). + 01 WS-NEXT-PGM PIC X(8). + + PROCEDURE DIVISION. + MAIN-PARAGRAPH. + PERFORM FETCH-DATA + PERFORM FORMAT-REPORT + PERFORM SEND-SCREEN + CALL "CUSTUPDT" + GO TO EXIT-PARAGRAPH. + + FETCH-DATA. + EXEC SQL + SELECT CUST_NAME, CUST_BALANCE + FROM CUSTOMER + WHERE CUST_ID = :WS-CUST-CODE + END-EXEC. + + FORMAT-REPORT. + PERFORM WS-COUNT TIMES + MOVE WS-CUST-CODE TO WS-REPORT-LINE + END-PERFORM + PERFORM MAIN-PARAGRAPH THRU FORMAT-REPORT + IF WS-COUNT > 0 PERFORM FETCH-DATA + ELSE PERFORM SEND-SCREEN + END-IF + SORT WS-SORT-FILE USING CUSTOMER-DATA + GIVING WS-REPORT-LINE. + SORT WS-SORT-FILE ON ASCENDING KEY WS-COUNT + INPUT PROCEDURE IS BUILD-SORT-INPUT + OUTPUT PROCEDURE IS WRITE-SORTED. + MOVE CORR WS-CUSTOMER-DATA TO WS-REPORT-LINE + SEARCH WS-CUSTOMER-DATA + GO TO FETCH-DATA FORMAT-REPORT SEND-SCREEN + DEPENDING ON WS-COUNT. + + SEND-SCREEN. + EXEC CICS + SEND MAP(WS-MAP-NAME) MAPSET('CUSTSET') + FROM(WS-REPORT-LINE) + END-EXEC. + + EXEC CICS + LINK PROGRAM('AUDITLOG') + END-EXEC. + + EXEC CICS + XCTL PROGRAM('CUSTUPDT') + END-EXEC. + + EXEC CICS + READ FILE('CUSTFILE') + INTO(WS-CUSTOMER-DATA) + END-EXEC. + + EXEC CICS + WRITEQ TS QUEUE('RPTQUEUE') + FROM(WS-REPORT-LINE) + END-EXEC. + + EXEC CICS + HANDLE ABEND LABEL(ABEND-HANDLER) + END-EXEC. + + EXEC CICS + RETURN TRANSID('RPTG') + END-EXEC. + + EXEC CICS + XCTL PROGRAM(WS-NEXT-PGM) + END-EXEC. + + BUILD-SORT-INPUT. + DISPLAY 'BUILDING SORT INPUT'. + + WRITE-SORTED. + DISPLAY 'WRITING SORTED OUTPUT'. + + ABEND-HANDLER. + DISPLAY 'ABEND OCCURRED'. + + EXIT-PARAGRAPH. + STOP RUN. diff --git a/gitnexus/test/fixtures/cobol/empty-file.cbl b/gitnexus/test/fixtures/cobol/empty-file.cbl new file mode 100644 index 0000000000..e69de29bb2 diff --git a/gitnexus/test/fixtures/cobol/fixed-format.cbl b/gitnexus/test/fixtures/cobol/fixed-format.cbl new file mode 100644 index 0000000000..8ccbb18376 --- /dev/null +++ b/gitnexus/test/fixtures/cobol/fixed-format.cbl @@ -0,0 +1,21 @@ + IDENTIFICATION DIVISION. + PROGRAM-ID. FIXEDFORMAT. + * Fixed-format COBOL with sequence numbers in cols 1-6 +000100 ENVIRONMENT DIVISION. +000200 DATA DIVISION. +000300 WORKING-STORAGE SECTION. +000400 01 WS-COUNTER PIC 9(4) VALUE 0. +000500 01 WS-NAME PIC X(20). +000600 +000700 PROCEDURE DIVISION. +000800 MAIN-PARA. +000900 PERFORM INIT-PARA +001000 PERFORM PROCESS-PARA +001100 STOP RUN. +001200 +001300 INIT-PARA. +001400 MOVE 1 TO WS-COUNTER. +001500 +001600 PROCESS-PARA. +001700 CALL "LOGGER" +001800 GOBACK. diff --git a/gitnexus/test/fixtures/cobol/malformed-multiline.cbl b/gitnexus/test/fixtures/cobol/malformed-multiline.cbl new file mode 100644 index 0000000000..f023d870c7 --- /dev/null +++ b/gitnexus/test/fixtures/cobol/malformed-multiline.cbl @@ -0,0 +1,20 @@ + IDENTIFICATION DIVISION. + PROGRAM-ID. MALFORMED. + DATA DIVISION. + WORKING-STORAGE SECTION. + 01 WS-PGM PIC X(8) VALUE "OTHER". + PROCEDURE DIVISION. + MAIN. + * Incomplete statement (no period) + MOVE "TEST" TO WS-PGM + * CALL USING on separate lines + CALL "TARGET" + USING WS-PGM + RETURNING WS-PGM + * CALL without END-CALL across lines + CALL "MULTILINE" + USING WS-PGM + * GO TO with multiple targets + GO TO MAIN EXIT-PARA. + EXIT-PARA. + GOBACK. diff --git a/gitnexus/test/fixtures/cobol/whitespace-only.cbl b/gitnexus/test/fixtures/cobol/whitespace-only.cbl new file mode 100644 index 0000000000..fd40910d9e --- /dev/null +++ b/gitnexus/test/fixtures/cobol/whitespace-only.cbl @@ -0,0 +1,4 @@ + + + + diff --git a/gitnexus/test/integration/resolvers/cobol-scope.test.ts b/gitnexus/test/integration/resolvers/cobol-scope.test.ts new file mode 100644 index 0000000000..bd57656c9a --- /dev/null +++ b/gitnexus/test/integration/resolvers/cobol-scope.test.ts @@ -0,0 +1,553 @@ +/** + * COBOL scope-capture integration tests. + * + * These test that `emitCobolScopeCaptures` produces correct `CaptureMatch[]` + * from real COBOL source files, covering all 11 fixture classes. + * + * The test verifies capture output directly — scope-resolution pipeline + * integration (REGISTRY_PRIMARY_COBOL) will be tested in a follow-up phase + * after the pipeline wiring is complete. + */ + +import { describe, expect, it } from 'vitest'; +import path from 'path'; +import fs from 'fs'; +import { emitCobolScopeCaptures } from '../../../src/core/ingestion/languages/cobol/captures.js'; + +const FIXTURES = path.resolve(process.cwd(), 'test/fixtures/cobol'); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function readFixture(name: string): string { + const p = path.join(FIXTURES, name); + return fs.readFileSync(p, 'utf-8'); +} + +/** Count captures with exact name */ +function countByName(captures: readonly Record[], name: string): number { + return captures.filter((m) => name in m).length; +} + +/** Find a capture match where @declaration.name.text equals a value */ +function findDecl( + captures: readonly Record[], + name: string, +): Record | undefined { + return captures.find((m) => { + const n = (m as Record)['@declaration.name']; + return n?.text === name; + }); +} + +/** Find a capture match where @reference.name.text equals a value */ +function findRef( + captures: readonly Record[], + name: string, +): Record | undefined { + return captures.find((m) => { + const n = (m as Record)['@reference.name']; + return n?.text === name; + }); +} + +/** Find a capture match with @import.name.text */ +function findImport( + captures: readonly Record[], + name: string, +): Record | undefined { + return captures.find((m) => { + const n = (m as Record)['@import.name']; + return n?.text.toUpperCase() === name.toUpperCase(); + }); +} + +// =========================================================================== +// Class 1: Basic program structure +// =========================================================================== + +describe('Class 1: Basic program structure — PROGRAM-ID + paragraphs + CALL + PERFORM', () => { + it('AUDITLOG.cbl: PROGRAM-ID, PROCEDURE DIVISION USING, PERFORM', () => { + const result = emitCobolScopeCaptures(readFixture('AUDITLOG.cbl'), 'AUDITLOG.cbl'); + expect(result.length).toBeGreaterThan(0); + + // Should have a @scope.module for AUDITLOG + const moduleCount = countByName(result, '@scope.module'); + expect(moduleCount).toBe(1); + + // Should have @declaration.name = 'AUDITLOG' + const auditlog = findDecl(result, 'AUDITLOG'); + expect(auditlog).toBeDefined(); + + // Should have functions for paragraphs: MAIN-PARAGRAPH, WRITE-LOG + const funcCount = countByName(result, '@scope.function'); + expect(funcCount).toBeGreaterThanOrEqual(2); + + // Should have PERFORM references + const perfCount = countByName(result, '@reference.call'); + expect(perfCount).toBeGreaterThanOrEqual(1); + + // WRITE-LOG paragraph should be a function scope + const writeLog = findDecl(result, 'WRITE-LOG'); + expect(writeLog).toBeDefined(); + + // Gap 1: PERFORM VARYING captures target paragraph + expect(findDecl(result, 'VARYING-TEST')).toBeDefined(); + + // Gap 2: PERFORM UNTIL captures target paragraph + expect(findDecl(result, 'UNTIL-TEST')).toBeDefined(); + + // Gap 3: GO TO DEPENDING ON with 3 branches + expect(findRef(result, 'PARA-ONE')).toBeDefined(); + expect(findRef(result, 'PARA-TWO')).toBeDefined(); + expect(findRef(result, 'PARA-THREE')).toBeDefined(); + + // Gap 4: Multiple COPY statements + const copyConst = findImport(result, 'AUDITCONST'); + expect(copyConst).toBeDefined(); + const copyVars = findImport(result, 'AUDITVARS'); + expect(copyVars).toBeDefined(); + expect(copyConst).not.toBe(copyVars); + + // Gap 5: CALL with OMITTED — arity counts actual params, OMITTED is a keyword placeholder + const procRef = findRef(result, 'PROCESS'); + expect(procRef).toBeDefined(); + const procMatch = procRef as Record; + // CALL 'PROCESS' USING WS-PARAM-A OMITTED WS-PARAM-C has 2 actual params + expect(procMatch['@reference.arity']).toBeDefined(); + expect(procMatch['@reference.arity']!.text).toBe('2'); + + // Gap 6: CALLs inside nested IF blocks + expect(findRef(result, 'DEEPPROC')).toBeDefined(); + expect(findRef(result, 'SHALLOW')).toBeDefined(); + }); + + it('RPTGEN.cbl: PROGRAM-ID, PERFORM, GO TO, SORT INPUT/OUTPUT PROCEDURE', () => { + const result = emitCobolScopeCaptures(readFixture('RPTGEN.cbl'), 'RPTGEN.cbl'); + expect(result.length).toBeGreaterThan(0); + + // PROGRAM-ID + expect(countByName(result, '@scope.module')).toBe(1); + expect(findDecl(result, 'RPTGEN')).toBeDefined(); + + // Paragraphs: MAIN-PARAGRAPH, FETCH-DATA, FORMAT-REPORT, etc. + const funcCount = countByName(result, '@scope.function'); + expect(funcCount).toBeGreaterThanOrEqual(6); + + // REFERENCES for PERFORM and GO TO + const refCount = countByName(result, '@reference.call'); + expect(refCount).toBeGreaterThanOrEqual(5); + + // GO TO DEPENDING ON should create multiple reference targets + const gotoFetch = findRef(result, 'FETCH-DATA'); + expect(gotoFetch).toBeDefined(); + const gotoFormat = findRef(result, 'FORMAT-REPORT'); + expect(gotoFormat).toBeDefined(); + }); +}); + +// =========================================================================== +// Class 2: COPY import +// =========================================================================== + +describe('Class 2: COPY import — COPY bookname, COPY REPLACING', () => { + it('RPTGEN.cbl: COPY CUSTDAT without REPLACING', () => { + const result = emitCobolScopeCaptures(readFixture('RPTGEN.cbl'), 'RPTGEN.cbl'); + const imp = findImport(result, 'CUSTDAT'); + expect(imp).toBeDefined(); + }); + + it('CUSTUPDT.cbl: COPY COPYLIB REPLACING ==PREFIX-== BY ==WS-==', () => { + const result = emitCobolScopeCaptures(readFixture('CUSTUPDT.cbl'), 'CUSTUPDT.cbl'); + const imp = findImport(result, 'COPYLIB'); + expect(imp).toBeDefined(); + }); +}); + +// =========================================================================== +// Class 3: CALL USING +// =========================================================================== + +describe('Class 3: CALL USING — match/mismatch arity', () => { + it('AUDITLOG.cbl: PROCEDURE DIVISION USING with 2 params', () => { + const result = emitCobolScopeCaptures(readFixture('AUDITLOG.cbl'), 'AUDITLOG.cbl'); + // AUDITLOG has PROCEDURE DIVISION USING LS-CUST-ID LS-AMOUNT (2 params) + const auditlog = findDecl(result, 'AUDITLOG'); + expect(auditlog).toBeDefined(); + const match = auditlog as Record; + // Parameter count should be captured + expect(match['@declaration.parameter-count']).toBeDefined(); + expect(match['@declaration.parameter-count']!.text).toBe('2'); + }); + + it('CUSTUPDT.cbl: CALL "AUDITLOG" USING CUST-ID WS-AMOUNT (2 args)', () => { + const result = emitCobolScopeCaptures(readFixture('CUSTUPDT.cbl'), 'CUSTUPDT.cbl'); + // Should have CALL reference with arity=2 + const callRef = findRef(result, 'AUDITLOG'); + expect(callRef).toBeDefined(); + const match = callRef as Record; + expect(match['@reference.arity']).toBeDefined(); + expect(match['@reference.arity']!.text).toBe('2'); + }); +}); + +// =========================================================================== +// Class 4: Dynamic CALL +// =========================================================================== + +describe('Class 4: Dynamic CALL — CALL WS-VAR, stays unresolved', () => { + it('CUSTUPDT.cbl: CALL WS-PROG-NAME (dynamic, no quotes)', () => { + const result = emitCobolScopeCaptures(readFixture('CUSTUPDT.cbl'), 'CUSTUPDT.cbl'); + // WS-PROG-NAME should appear as a CALL reference + const dynCall = findRef(result, 'WS-PROG-NAME'); + expect(dynCall).toBeDefined(); + }); +}); + +// =========================================================================== +// Class 5: Nested programs +// =========================================================================== + +describe('Class 5: Nested programs — multiple PROGRAM-IDs, scope isolation', () => { + it('NESTED.cbl: OUTER-PROG + INNER-PROG in one file', () => { + const result = emitCobolScopeCaptures(readFixture('NESTED.cbl'), 'NESTED.cbl'); + // Two PROGRAM-IDs → 2 @scope.module captures + const moduleCount = countByName(result, '@scope.module'); + expect(moduleCount).toBe(2); + + // Both program names should appear + expect(findDecl(result, 'OUTER-PROG')).toBeDefined(); + expect(findDecl(result, 'INNER-PROG')).toBeDefined(); + + // Paragraphs in both programs + expect(findDecl(result, 'OUTER-MAIN')).toBeDefined(); + expect(findDecl(result, 'OUTER-PROCESS')).toBeDefined(); + expect(findDecl(result, 'INNER-MAIN')).toBeDefined(); + expect(findDecl(result, 'INNER-PROCESS')).toBeDefined(); + + // CALL "INNER-PROG" reference + expect(findRef(result, 'INNER-PROG')).toBeDefined(); + }); +}); + +// =========================================================================== +// Class 6: SECTION vs PARAGRAPH +// =========================================================================== + +describe('Class 6: SECTION vs PARAGRAPH — both map to Function', () => { + it('CUSTUPDT.cbl: Sections (INIT-SECTION, PROCESSING-SECTION) + paragraphs', () => { + const result = emitCobolScopeCaptures(readFixture('CUSTUPDT.cbl'), 'CUSTUPDT.cbl'); + // Sections declared as functions + expect(findDecl(result, 'INIT-SECTION')).toBeDefined(); + expect(findDecl(result, 'PROCESSING-SECTION')).toBeDefined(); + + // Paragraphs inside sections + expect(findDecl(result, 'MAIN-PARAGRAPH')).toBeDefined(); + expect(findDecl(result, 'INIT-PARAGRAPH')).toBeDefined(); + expect(findDecl(result, 'PROCESS-PARAGRAPH')).toBeDefined(); + + // Both are @scope.function + const funcCount = countByName(result, '@scope.function'); + expect(funcCount).toBeGreaterThanOrEqual(5); + }); +}); + +// =========================================================================== +// Class 7: Single-quoted CALL/COPY +// =========================================================================== + +describe('Class 7: Single-quoted CALL/COPY — the #500 regression case', () => { + it('CUSTUPDT.cbl: ENTRY with single quotes ALTENTRY', () => { + const result = emitCobolScopeCaptures(readFixture('CUSTUPDT.cbl'), 'CUSTUPDT.cbl'); + // ENTRY 'ALTENTRY' uses single quotes — should still produce captures + // (ENTRY points are recognized by the regex tagger) + // Verify the file processed without error + expect(result.length).toBeGreaterThan(0); + }); +}); + +// =========================================================================== +// Class 8: Fixed-format with sequence numbers +// =========================================================================== + +describe('Class 8: Fixed-format — sequence numbers in cols 1-6, Area A/B detection', () => { + it('fixed-format.cbl: sequence numbers 000100-001800, Area A paragraphs', () => { + const result = emitCobolScopeCaptures(readFixture('fixed-format.cbl'), 'fixed-format.cbl'); + expect(result.length).toBeGreaterThan(0); + + // Should detect PROGRAM-ID FIXEDFORMAT + expect(findDecl(result, 'FIXEDFORMAT')).toBeDefined(); + expect(countByName(result, '@scope.module')).toBe(1); + + // Paragraphs MAIN-PARA, INIT-PARA, PROCESS-PARA + expect(findDecl(result, 'MAIN-PARA')).toBeDefined(); + expect(findDecl(result, 'INIT-PARA')).toBeDefined(); + expect(findDecl(result, 'PROCESS-PARA')).toBeDefined(); + + // CALL "LOGGER" reference + expect(findRef(result, 'LOGGER')).toBeDefined(); + }); +}); + +// =========================================================================== +// Class 9: Edge: malformed/multiline +// =========================================================================== + +describe('Class 9: Edge: malformed/multiline — incomplete statements, CALL USING on separate lines', () => { + it('malformed-multiline.cbl: multiline CALL, incomplete statements', () => { + const result = emitCobolScopeCaptures( + readFixture('malformed-multiline.cbl'), + 'malformed-multiline.cbl', + ); + expect(result.length).toBeGreaterThan(0); + + // Should still detect PROGRAM-ID + expect(findDecl(result, 'MALFORMED')).toBeDefined(); + + // Paragraphs MAIN, EXIT-PARA + expect(findDecl(result, 'MAIN')).toBeDefined(); + expect(findDecl(result, 'EXIT-PARA')).toBeDefined(); + + // CALL "TARGET" should be captured (multi-line CALL with USING) + const target = findRef(result, 'TARGET'); + expect(target).toBeDefined(); + + // CALL "MULTILINE" should also be captured + const multi = findRef(result, 'MULTILINE'); + expect(multi).toBeDefined(); + + // GO TO EXIT-PARA reference + const exitRef = findRef(result, 'EXIT-PARA'); + expect(exitRef).toBeDefined(); + }); +}); + +// =========================================================================== +// Class 10: Edge: empty/whitespace file +// =========================================================================== + +describe('Class 10: Edge: empty/whitespace file — must not throw', () => { + it('empty-file.cbl: empty file produces empty captures', () => { + const result = emitCobolScopeCaptures(readFixture('empty-file.cbl'), 'empty-file.cbl'); + expect(result).toEqual([]); + }); + + it('whitespace-only.cbl: whitespace-only file produces empty captures', () => { + const result = emitCobolScopeCaptures( + readFixture('whitespace-only.cbl'), + 'whitespace-only.cbl', + ); + expect(result).toEqual([]); + }); +}); + +// =========================================================================== +// Class 11: Legacy parity — legacy processor doesn't crash on fixtures +// =========================================================================== + +describe('Class 11: Legacy parity — legacy processor handles fixtures', () => { + it('all fixtures can be processed without error', () => { + const files = fs.readdirSync(FIXTURES).filter((f) => f.endsWith('.cbl')); + expect(files.length).toBeGreaterThan(5); + + for (const file of files) { + const source = readFixture(file); + // Just running the regex tagger should not throw + expect(() => emitCobolScopeCaptures(source, file)).not.toThrow(); + } + }); +}); + +// =========================================================================== +// Cross-cutting: all fixtures produce expected structure +// =========================================================================== + +describe('Cross-cutting structure verification', () => { + it('every non-empty COBOL file produces at least one @scope.module', () => { + const files = fs + .readdirSync(FIXTURES) + .filter((f) => f.endsWith('.cbl') && f !== 'empty-file.cbl' && f !== 'whitespace-only.cbl'); + for (const file of files) { + const result = emitCobolScopeCaptures(readFixture(file), file); + const modCount = countByName(result, '@scope.module'); + expect(modCount).toBeGreaterThanOrEqual(1); + } + }); + + it('every @scope.function has a matching @declaration.name', () => { + const files = fs + .readdirSync(FIXTURES) + .filter((f) => f.endsWith('.cbl') && f !== 'empty-file.cbl' && f !== 'whitespace-only.cbl'); + for (const file of files) { + const result = emitCobolScopeCaptures(readFixture(file), file); + const funcScopes = result.filter((m) => '@scope.function' in m); + const funcDecls = result.filter((m) => '@declaration.function' in m); + expect(funcDecls.length).toBe(funcScopes.length); + } + }); +}); + +// =========================================================================== +// Reviewer Check 1: Program-ID scope isolation +// =========================================================================== + +describe('Reviewer Check 1: Program-ID scope isolation — CALLs in different PROGRAM-IDs', () => { + it('NESTED.cbl: OUTER-PROG and INNER-PROG each have isolated paragraphs', () => { + const result = emitCobolScopeCaptures(readFixture('NESTED.cbl'), 'NESTED.cbl'); + + // Both programs produce separate @scope.module captures + const mods = result.filter((m) => '@scope.module' in m); + expect(mods.length).toBe(2); + + // OUTER-PROG-related paragraphs + expect(findDecl(result, 'OUTER-MAIN')).toBeDefined(); + expect(findDecl(result, 'OUTER-PROCESS')).toBeDefined(); + // INNER-PROG-related paragraphs + expect(findDecl(result, 'INNER-MAIN')).toBeDefined(); + expect(findDecl(result, 'INNER-PROCESS')).toBeDefined(); + + // CALL "INNER-PROG" from OUTER-PROG + expect(findRef(result, 'INNER-PROG')).toBeDefined(); + }); + + it('CUSTUPDT.cbl: CALLs in one program do not cross-contaminate paragraphs', () => { + const result = emitCobolScopeCaptures(readFixture('CUSTUPDT.cbl'), 'CUSTUPDT.cbl'); + // CUSTUPDT is a single PROGRAM-ID; all paragraphs belong to it + expect(countByName(result, '@scope.module')).toBe(1); + // Verify several distinct paragraphs exist + expect(findDecl(result, 'MAIN-PARAGRAPH')).toBeDefined(); + expect(findDecl(result, 'INIT-PARAGRAPH')).toBeDefined(); + expect(findDecl(result, 'PROCESS-PARAGRAPH')).toBeDefined(); + expect(findDecl(result, 'READ-CUSTOMER')).toBeDefined(); + expect(findDecl(result, 'UPDATE-BALANCE')).toBeDefined(); + expect(findDecl(result, 'WRITE-CUSTOMER')).toBeDefined(); + expect(findDecl(result, 'CLEANUP-PARAGRAPH')).toBeDefined(); + }); +}); + +// =========================================================================== +// Reviewer Check 2: COPY REPLACING capture range consistency +// =========================================================================== + +describe('Reviewer Check 2: COPY REPLACING — capture ranges from transformed source', () => { + it('CUSTUPDT.cbl: COPY COPYLIB REPLACING capture range matches source line', () => { + const source = readFixture('CUSTUPDT.cbl'); + const lines = source.split('\n'); + const result = emitCobolScopeCaptures(source, 'CUSTUPDT.cbl'); + + const copyMatch = findImport(result, 'COPYLIB'); + expect(copyMatch).toBeDefined(); + + // Find '@import.statement' within the match + const importCap = copyMatch as Record< + string, + { name: string; range: { startLine: number }; text: string } + >; + const stmt = importCap['@import.statement']; + expect(stmt).toBeDefined(); + // Range should reference a valid line in the source + const lineIdx = stmt.range.startLine - 1; + expect(lineIdx).toBeGreaterThanOrEqual(0); + expect(lineIdx).toBeLessThan(lines.length); + // The line should contain COPY...COPYLIB + expect(lines[lineIdx].toUpperCase()).toContain('COPY'); + expect(lines[lineIdx].toUpperCase()).toContain('COPYLIB'); + }); + + it('RPTGEN.cbl: COPY CUSTDAT capture range matches source line', () => { + const source = readFixture('RPTGEN.cbl'); + const lines = source.split('\n'); + const result = emitCobolScopeCaptures(source, 'RPTGEN.cbl'); + + const copyMatch = findImport(result, 'CUSTDAT'); + expect(copyMatch).toBeDefined(); + + const importCap = copyMatch as Record< + string, + { name: string; range: { startLine: number }; text: string } + >; + const stmt = importCap['@import.statement']; + expect(stmt).toBeDefined(); + const lineIdx = stmt.range.startLine - 1; + expect(lineIdx).toBeGreaterThanOrEqual(0); + expect(lineIdx).toBeLessThan(lines.length); + expect(lines[lineIdx].toUpperCase()).toContain('COPY'); + expect(lines[lineIdx].toUpperCase()).toContain('CUSTDAT'); + }); +}); + +// =========================================================================== +// Reviewer Check 3: Import ownership scope +// =========================================================================== + +describe('Reviewer Check 3: importOwningScope returns Module scope for COPY', () => { + it('importOwningScope walks from paragraph to enclosing Module', async () => { + // Test the importOwningScope function directly + const { cobolImportOwningScope } = + await import('../../../src/core/ingestion/languages/cobol/interpret.js'); + // Simulate an import at paragraph (Function) scope: + // the function should walk up to find the Module. + const mockTree = { + getScope: (id: string) => { + if (id === 'func:test') return { id: 'func:test', kind: 'Function' } as any; + if (id === 'mod:test') return { id: 'mod:test', kind: 'Module' } as any; + return undefined; + }, + getAncestors: (_id: string) => ['mod:test'], + getParent: (_id: string) => undefined, + getChildren: (_id: string) => [], + has: (_id: string) => true, + byId: new Map(), + size: 2, + }; + + const paraScope = { + id: 'func:test', + kind: 'Function', + name: 'TEST-PARA', + range: { startLine: 1, startCol: 0, endLine: 2, endCol: 0 }, + } as any; + const result = cobolImportOwningScope(null as any, paraScope, mockTree); + expect(result).toBe('mod:test'); + }); + + it('importOwningScope returns innermost when already in Module scope', async () => { + const { cobolImportOwningScope } = + await import('../../../src/core/ingestion/languages/cobol/interpret.js'); + const mockTree = { + getScope: () => undefined, + getAncestors: () => [], + getParent: () => undefined, + getChildren: () => [], + has: () => true, + byId: new Map(), + size: 1, + }; + const modScope = { + id: 'mod:test', + kind: 'Module', + name: 'MYPROG', + range: { startLine: 1, startCol: 0, endLine: 100, endCol: 0 }, + } as any; + const result = cobolImportOwningScope(null as any, modScope, mockTree); + expect(result).toBe('mod:test'); + }); +}); + +// =========================================================================== +// Reviewer Check 5: Dynamic CALL produces no CALLS edge, CodeElement annotation +// =========================================================================== + +describe('Reviewer Check 5: Dynamic CALL — CALL WS-VAR captures as reference', () => { + it('CUSTUPDT.cbl: CALL WS-PROG-NAME is captured as a dynamic reference', () => { + const result = emitCobolScopeCaptures(readFixture('CUSTUPDT.cbl'), 'CUSTUPDT.cbl'); + // WS-PROG-NAME should appear as a CALL reference (dynamic, no quotes) + const dynRef = findRef(result, 'WS-PROG-NAME'); + expect(dynRef).toBeDefined(); + + // Verify it's a @reference.call, not a resolved edge + const ref = dynRef as Record; + expect(ref['@reference.call']).toBeDefined(); + }); +});