Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions gitnexus/src/core/ingestion/call-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import { getLanguageFromFilename } from './utils/language-detection.js';
import { isVerboseIngestionEnabled } from './utils/verbose.js';
import { yieldToEventLoop } from './utils/event-loop.js';
import { FUNCTION_NODE_TYPES, extractFunctionName, findEnclosingClassId } from './utils/ast-helpers.js';
import { isBuiltInOrNoise } from './utils/noise-filter.js';
import {
countCallArguments,
inferCallForm,
Expand Down Expand Up @@ -497,7 +496,7 @@ export const processCalls = async (
}
}

if (isBuiltInOrNoise(calledName)) return;
if (provider.isBuiltInName(calledName)) return;

const callNode = captureMap['call'];
const callForm = inferCallForm(callNode, nameNode);
Expand Down
14 changes: 13 additions & 1 deletion gitnexus/src/core/ingestion/language-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ interface LanguageProviderConfig {
* When true, the worker extracts routes via the language's route extraction logic.
* Default: undefined (no route files). */
readonly isRouteFile?: (filePath: string) => boolean;

// ── Noise filtering ────────────────────────────────────────────────
/** Built-in/stdlib names that should be filtered from the call graph for this language.
* Default: undefined (no language-specific filtering). */
readonly builtInNames?: ReadonlySet<string>;
}

/** Runtime type — same as LanguageProviderConfig but with defaults guaranteed present. */
Expand All @@ -119,6 +124,8 @@ export interface LanguageProvider extends Omit<LanguageProviderConfig,
readonly importSemantics: ImportSemantics;
readonly heritageDefaultEdge: 'EXTENDS' | 'IMPLEMENTS';
readonly mroStrategy: MroStrategy;
/** Check if a name is a built-in/stdlib function that should be filtered from the call graph. */
readonly isBuiltInName: (name: string) => boolean;
}

const DEFAULTS: Pick<LanguageProvider, 'importSemantics' | 'heritageDefaultEdge' | 'mroStrategy'> = {
Expand All @@ -129,5 +136,10 @@ const DEFAULTS: Pick<LanguageProvider, 'importSemantics' | 'heritageDefaultEdge'

/** Define a language provider — required fields must be supplied, optional fields get sensible defaults. */
export function defineLanguage(config: LanguageProviderConfig): LanguageProvider {
return { ...DEFAULTS, ...config };
const builtIns = config.builtInNames;
return {
...DEFAULTS,
...config,
isBuiltInName: builtIns ? (name: string) => builtIns.has(name) : () => false,
};
}
24 changes: 24 additions & 0 deletions gitnexus/src/core/ingestion/languages/c-cpp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,28 @@ import { C_QUERIES, CPP_QUERIES } from '../tree-sitter-queries.js';
import { isCppInsideClassOrStruct } from '../utils/ast-helpers.js';
import type { LanguageProvider } from '../language-provider.js';

const C_BUILT_INS: ReadonlySet<string> = new Set([
'printf', 'fprintf', 'sprintf', 'snprintf', 'vprintf', 'vfprintf', 'vsprintf', 'vsnprintf',
'scanf', 'fscanf', 'sscanf',
'malloc', 'calloc', 'realloc', 'free', 'memcpy', 'memmove', 'memset', 'memcmp',
'strlen', 'strcpy', 'strncpy', 'strcat', 'strncat', 'strcmp', 'strncmp', 'strstr', 'strchr', 'strrchr',
'atoi', 'atol', 'atof', 'strtol', 'strtoul', 'strtoll', 'strtoull', 'strtod',
'sizeof', 'offsetof', 'typeof',
'assert', 'abort', 'exit', '_exit',
'fopen', 'fclose', 'fread', 'fwrite', 'fseek', 'ftell', 'rewind', 'fflush', 'fgets', 'fputs',
'likely', 'unlikely', 'BUG', 'BUG_ON', 'WARN', 'WARN_ON', 'WARN_ONCE',
'IS_ERR', 'PTR_ERR', 'ERR_PTR', 'IS_ERR_OR_NULL',
'ARRAY_SIZE', 'container_of', 'list_for_each_entry', 'list_for_each_entry_safe',
'min', 'max', 'clamp', 'abs', 'swap',
'pr_info', 'pr_warn', 'pr_err', 'pr_debug', 'pr_notice', 'pr_crit', 'pr_emerg',
'printk', 'dev_info', 'dev_warn', 'dev_err', 'dev_dbg',
'GFP_KERNEL', 'GFP_ATOMIC',
'spin_lock', 'spin_unlock', 'spin_lock_irqsave', 'spin_unlock_irqrestore',
'mutex_lock', 'mutex_unlock', 'mutex_init',
'kfree', 'kmalloc', 'kzalloc', 'kcalloc', 'krealloc', 'kvmalloc', 'kvfree',
'get', 'put',
]);

/** Label override shared by C and C++: skip function_definition captures inside class/struct
* bodies (they're duplicates of definition.method captures). */
const cppLabelOverride: NonNullable<LanguageProvider['labelOverride']> = (functionNode, defaultLabel) => {
Expand All @@ -34,6 +56,7 @@ export const cProvider = defineLanguage({
importResolver: resolveCImport,
importSemantics: 'wildcard',
labelOverride: cppLabelOverride,
builtInNames: C_BUILT_INS,
});

export const cppProvider = defineLanguage({
Expand All @@ -46,4 +69,5 @@ export const cppProvider = defineLanguage({
importSemantics: 'wildcard',
mroStrategy: 'leftmost-base',
labelOverride: cppLabelOverride,
builtInNames: C_BUILT_INS,
});
22 changes: 22 additions & 0 deletions gitnexus/src/core/ingestion/languages/csharp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,27 @@ import { resolveCSharpImport } from '../import-resolvers/csharp.js';
import { extractCSharpNamedBindings } from '../named-bindings/csharp.js';
import { CSHARP_QUERIES } from '../tree-sitter-queries.js';

const BUILT_INS: ReadonlySet<string> = new Set([
'Console', 'WriteLine', 'ReadLine', 'Write',
'Task', 'Run', 'Wait', 'WhenAll', 'WhenAny', 'FromResult', 'Delay', 'ContinueWith',
'ConfigureAwait', 'GetAwaiter', 'GetResult',
'ToString', 'GetType', 'Equals', 'GetHashCode', 'ReferenceEquals',
'Add', 'Remove', 'Contains', 'Clear', 'Count', 'Any', 'All',
'Where', 'Select', 'SelectMany', 'OrderBy', 'OrderByDescending', 'GroupBy',
'First', 'FirstOrDefault', 'Single', 'SingleOrDefault', 'Last', 'LastOrDefault',
'ToList', 'ToArray', 'ToDictionary', 'AsEnumerable', 'AsQueryable',
'Aggregate', 'Sum', 'Average', 'Min', 'Max', 'Distinct', 'Skip', 'Take',
'String', 'Format', 'IsNullOrEmpty', 'IsNullOrWhiteSpace', 'Concat', 'Join',
'Trim', 'TrimStart', 'TrimEnd', 'Split', 'Replace', 'StartsWith', 'EndsWith',
'Convert', 'ToInt32', 'ToDouble', 'ToBoolean', 'ToByte',
'Math', 'Abs', 'Ceiling', 'Floor', 'Round', 'Pow', 'Sqrt',
'Dispose', 'Close',
'TryParse', 'Parse',
'AddRange', 'RemoveAt', 'RemoveAll', 'FindAll', 'Exists', 'TrueForAll',
'ContainsKey', 'TryGetValue', 'AddOrUpdate',
'Throw', 'ThrowIfNull',
]);

export const csharpProvider = defineLanguage({
id: SupportedLanguages.CSharp,
extensions: ['.cs'],
Expand All @@ -24,4 +45,5 @@ export const csharpProvider = defineLanguage({
namedBindingExtractor: extractCSharpNamedBindings,
interfaceNamePattern: /^I[A-Z]/,
mroStrategy: 'implements-split',
builtInNames: BUILT_INS,
});
10 changes: 10 additions & 0 deletions gitnexus/src/core/ingestion/languages/dart.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ import { dartExportChecker } from '../export-detection.js';
import { resolveDartImport } from '../import-resolvers/dart.js';
import { DART_QUERIES } from '../tree-sitter-queries.js';

const BUILT_INS: ReadonlySet<string> = new Set([
'setState', 'mounted', 'debugPrint',
'runApp', 'showDialog', 'showModalBottomSheet',
'Navigator', 'push', 'pushNamed', 'pushReplacement', 'pop', 'maybePop',
'ScaffoldMessenger', 'showSnackBar',
'deactivate', 'reassemble', 'debugDumpApp', 'debugDumpRenderTree',
'then', 'catchError', 'whenComplete', 'listen',
]);

export const dartProvider = defineLanguage({
id: SupportedLanguages.Dart,
extensions: ['.dart'],
Expand All @@ -22,4 +31,5 @@ export const dartProvider = defineLanguage({
exportChecker: dartExportChecker,
importResolver: resolveDartImport,
importSemantics: 'wildcard',
builtInNames: BUILT_INS,
});
16 changes: 16 additions & 0 deletions gitnexus/src/core/ingestion/languages/kotlin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,21 @@ import { appendKotlinWildcard } from '../import-resolvers/jvm.js';
import { KOTLIN_QUERIES } from '../tree-sitter-queries.js';
import { isKotlinClassMethod } from '../utils/ast-helpers.js';

const BUILT_INS: ReadonlySet<string> = new Set([
'println', 'print', 'readLine', 'require', 'requireNotNull', 'check', 'assert', 'lazy', 'error',
'listOf', 'mapOf', 'setOf', 'mutableListOf', 'mutableMapOf', 'mutableSetOf',
'arrayOf', 'sequenceOf', 'also', 'apply', 'run', 'with', 'takeIf', 'takeUnless',
'TODO', 'buildString', 'buildList', 'buildMap', 'buildSet',
'repeat', 'synchronized',
'launch', 'async', 'runBlocking', 'withContext', 'coroutineScope',
'supervisorScope', 'delay',
'flow', 'flowOf', 'collect', 'emit', 'onEach', 'catch',
'buffer', 'conflate', 'distinctUntilChanged',
'flatMapLatest', 'flatMapMerge', 'combine',
'stateIn', 'shareIn', 'launchIn',
'to', 'until', 'downTo', 'step',
]);

export const kotlinProvider = defineLanguage({
id: SupportedLanguages.Kotlin,
extensions: ['.kt', '.kts'],
Expand All @@ -27,6 +42,7 @@ export const kotlinProvider = defineLanguage({
namedBindingExtractor: extractKotlinNamedBindings,
importPathPreprocessor: appendKotlinWildcard,
mroStrategy: 'implements-split',
builtInNames: BUILT_INS,
labelOverride: (functionNode, defaultLabel) => {
if (defaultLabel !== 'Function') return defaultLabel;
if (isKotlinClassMethod(functionNode)) return 'Method';
Expand Down
20 changes: 20 additions & 0 deletions gitnexus/src/core/ingestion/languages/php.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,25 @@ import { PHP_QUERIES } from '../tree-sitter-queries.js';
import { findDescendant, extractStringContent } from '../utils/ast-helpers.js';
import type { NodeLabel } from '../../graph/types.js';

const BUILT_INS: ReadonlySet<string> = new Set([
'echo', 'isset', 'empty', 'unset', 'list', 'array', 'compact', 'extract',
'count', 'strlen', 'strpos', 'strrpos', 'substr', 'strtolower', 'strtoupper', 'trim',
'ltrim', 'rtrim', 'str_replace', 'str_contains', 'str_starts_with', 'str_ends_with',
'sprintf', 'vsprintf', 'printf', 'number_format',
'array_map', 'array_filter', 'array_reduce', 'array_push', 'array_pop', 'array_shift',
'array_unshift', 'array_slice', 'array_splice', 'array_merge', 'array_keys', 'array_values',
'array_key_exists', 'in_array', 'array_search', 'array_unique', 'usort', 'rsort',
'json_encode', 'json_decode', 'serialize', 'unserialize',
'intval', 'floatval', 'strval', 'boolval', 'is_null', 'is_string', 'is_int', 'is_array',
'is_object', 'is_numeric', 'is_bool', 'is_float',
'var_dump', 'print_r', 'var_export',
'date', 'time', 'strtotime', 'mktime', 'microtime',
'file_exists', 'file_get_contents', 'file_put_contents', 'is_file', 'is_dir',
'preg_match', 'preg_match_all', 'preg_replace', 'preg_split',
'header', 'session_start', 'session_destroy', 'ob_start', 'ob_end_clean', 'ob_get_clean',
'dd', 'dump',
]);

/** Eloquent model properties whose array values are worth indexing. */
const ELOQUENT_ARRAY_PROPS = new Set(['fillable', 'casts', 'hidden', 'guarded', 'with', 'appends']);

Expand Down Expand Up @@ -130,4 +149,5 @@ export const phpProvider = defineLanguage({
namedBindingExtractor: extractPhpNamedBindings,
descriptionExtractor: phpDescriptionExtractor,
isRouteFile: isPhpRouteFile,
builtInNames: BUILT_INS,
});
8 changes: 8 additions & 0 deletions gitnexus/src/core/ingestion/languages/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@ import { resolvePythonImport } from '../import-resolvers/python.js';
import { extractPythonNamedBindings } from '../named-bindings/python.js';
import { PYTHON_QUERIES } from '../tree-sitter-queries.js';

const BUILT_INS: ReadonlySet<string> = new Set([
'print', 'len', 'range', 'str', 'int', 'float', 'list', 'dict', 'set', 'tuple',
'append', 'extend', 'update',
'type', 'isinstance', 'issubclass', 'getattr', 'setattr', 'hasattr',
'enumerate', 'zip', 'sorted', 'reversed', 'min', 'max', 'sum', 'abs',
]);

export const pythonProvider = defineLanguage({
id: SupportedLanguages.Python,
extensions: ['.py'],
Expand All @@ -28,4 +35,5 @@ export const pythonProvider = defineLanguage({
namedBindingExtractor: extractPythonNamedBindings,
importSemantics: 'namespace',
mroStrategy: 'c3',
builtInNames: BUILT_INS,
});
17 changes: 17 additions & 0 deletions gitnexus/src/core/ingestion/languages/ruby.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,22 @@ import { rubyExportChecker } from '../export-detection.js';
import { resolveRubyImport } from '../import-resolvers/ruby.js';
import { RUBY_QUERIES } from '../tree-sitter-queries.js';

const BUILT_INS: ReadonlySet<string> = new Set([
'puts', 'p', 'pp', 'raise', 'fail',
'require', 'require_relative', 'load', 'autoload',
'include', 'extend', 'prepend',
'attr_accessor', 'attr_reader', 'attr_writer',
'public', 'private', 'protected', 'module_function',
'lambda', 'proc', 'block_given?',
'nil?', 'is_a?', 'kind_of?', 'instance_of?', 'respond_to?',
'freeze', 'frozen?', 'dup', 'tap', 'yield_self',
'each', 'select', 'reject', 'detect', 'collect',
'inject', 'flat_map', 'each_with_object', 'each_with_index',
'any?', 'all?', 'none?', 'count', 'first', 'last',
'sort_by', 'min_by', 'max_by',
'group_by', 'partition', 'compact', 'flatten', 'uniq',
]);

export const rubyProvider = defineLanguage({
id: SupportedLanguages.Ruby,
extensions: ['.rb', '.rake', '.gemspec'],
Expand All @@ -24,4 +40,5 @@ export const rubyProvider = defineLanguage({
importResolver: resolveRubyImport,
callRouter: routeRubyCall,
importSemantics: 'wildcard',
builtInNames: BUILT_INS,
});
14 changes: 14 additions & 0 deletions gitnexus/src/core/ingestion/languages/rust.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,19 @@ import { resolveRustImport } from '../import-resolvers/rust.js';
import { extractRustNamedBindings } from '../named-bindings/rust.js';
import { RUST_QUERIES } from '../tree-sitter-queries.js';

const BUILT_INS: ReadonlySet<string> = new Set([
'unwrap', 'expect', 'unwrap_or', 'unwrap_or_else', 'unwrap_or_default',
'ok', 'err', 'is_ok', 'is_err', 'map', 'map_err', 'and_then', 'or_else',
'clone', 'to_string', 'to_owned', 'into', 'from', 'as_ref', 'as_mut',
'iter', 'into_iter', 'collect', 'filter', 'fold', 'for_each',
'len', 'is_empty', 'push', 'pop', 'insert', 'remove', 'contains',
'format', 'write', 'writeln', 'panic', 'unreachable', 'todo', 'unimplemented',
'vec', 'println', 'eprintln', 'dbg',
'lock', 'read', 'try_lock',
'spawn', 'join', 'sleep',
'Some', 'None', 'Ok', 'Err',
]);

export const rustProvider = defineLanguage({
id: SupportedLanguages.Rust,
extensions: ['.rs'],
Expand All @@ -27,4 +40,5 @@ export const rustProvider = defineLanguage({
importResolver: resolveRustImport,
namedBindingExtractor: extractRustNamedBindings,
mroStrategy: 'qualified-syntax',
builtInNames: BUILT_INS,
});
29 changes: 29 additions & 0 deletions gitnexus/src/core/ingestion/languages/swift.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,34 @@ function wireSwiftImplicitImports(
}
}

const BUILT_INS: ReadonlySet<string> = new Set([
'print', 'debugPrint', 'dump', 'fatalError', 'precondition', 'preconditionFailure',
'assert', 'assertionFailure', 'NSLog',
'abs', 'min', 'max', 'zip', 'stride', 'sequence', 'repeatElement',
'swap', 'withUnsafePointer', 'withUnsafeMutablePointer', 'withUnsafeBytes',
'autoreleasepool', 'unsafeBitCast', 'unsafeDowncast', 'numericCast',
'type', 'MemoryLayout',
'map', 'flatMap', 'compactMap', 'filter', 'reduce', 'forEach', 'contains',
'first', 'last', 'prefix', 'suffix', 'dropFirst', 'dropLast',
'sorted', 'reversed', 'enumerated', 'joined', 'split',
'append', 'insert', 'remove', 'removeAll', 'removeFirst', 'removeLast',
'isEmpty', 'count', 'index', 'startIndex', 'endIndex',
'addSubview', 'removeFromSuperview', 'layoutSubviews', 'setNeedsLayout',
'layoutIfNeeded', 'setNeedsDisplay', 'invalidateIntrinsicContentSize',
'addTarget', 'removeTarget', 'addGestureRecognizer',
'addConstraint', 'addConstraints', 'removeConstraint', 'removeConstraints',
'NSLocalizedString', 'Bundle',
'reloadData', 'reloadSections', 'reloadRows', 'performBatchUpdates',
'register', 'dequeueReusableCell', 'dequeueReusableSupplementaryView',
'beginUpdates', 'endUpdates', 'insertRows', 'deleteRows', 'insertSections', 'deleteSections',
'present', 'dismiss', 'pushViewController', 'popViewController', 'popToRootViewController',
'performSegue', 'prepare',
'DispatchQueue', 'async', 'sync', 'asyncAfter',
'Task', 'withCheckedContinuation', 'withCheckedThrowingContinuation',
'sink', 'store', 'assign', 'receive', 'subscribe',
'addObserver', 'removeObserver', 'post', 'NotificationCenter',
]);

export const swiftProvider = defineLanguage({
id: SupportedLanguages.Swift,
extensions: ['.swift'],
Expand All @@ -111,4 +139,5 @@ export const swiftProvider = defineLanguage({
importSemantics: 'wildcard',
heritageDefaultEdge: 'IMPLEMENTS',
implicitImportWirer: wireSwiftImplicitImports,
builtInNames: BUILT_INS,
});
23 changes: 23 additions & 0 deletions gitnexus/src/core/ingestion/languages/typescript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,27 @@ import { resolveTypescriptImport, resolveJavascriptImport } from '../import-reso
import { extractTsNamedBindings } from '../named-bindings/typescript.js';
import { TYPESCRIPT_QUERIES, JAVASCRIPT_QUERIES } from '../tree-sitter-queries.js';

const BUILT_INS: ReadonlySet<string> = new Set([
'console', 'log', 'warn', 'error', 'info', 'debug',
'setTimeout', 'setInterval', 'clearTimeout', 'clearInterval',
'parseInt', 'parseFloat', 'isNaN', 'isFinite',
'encodeURI', 'decodeURI', 'encodeURIComponent', 'decodeURIComponent',
'JSON', 'parse', 'stringify',
'Object', 'Array', 'String', 'Number', 'Boolean', 'Symbol', 'BigInt',
'Map', 'Set', 'WeakMap', 'WeakSet',
'Promise', 'resolve', 'reject', 'then', 'catch', 'finally',
'Math', 'Date', 'RegExp', 'Error',
'require', 'import', 'export', 'fetch', 'Response', 'Request',
'useState', 'useEffect', 'useCallback', 'useMemo', 'useRef', 'useContext',
'useReducer', 'useLayoutEffect', 'useImperativeHandle', 'useDebugValue',
'createElement', 'createContext', 'createRef', 'forwardRef', 'memo', 'lazy',
'map', 'filter', 'reduce', 'forEach', 'find', 'findIndex', 'some', 'every',
'includes', 'indexOf', 'slice', 'splice', 'concat', 'join', 'split',
'push', 'pop', 'shift', 'unshift', 'sort', 'reverse',
'keys', 'values', 'entries', 'assign', 'freeze', 'seal',
'hasOwnProperty', 'toString', 'valueOf',
]);

export const typescriptProvider = defineLanguage({
id: SupportedLanguages.TypeScript,
extensions: ['.ts', '.tsx'],
Expand All @@ -23,6 +44,7 @@ export const typescriptProvider = defineLanguage({
exportChecker: tsExportChecker,
importResolver: resolveTypescriptImport,
namedBindingExtractor: extractTsNamedBindings,
builtInNames: BUILT_INS,
});

export const javascriptProvider = defineLanguage({
Expand All @@ -33,4 +55,5 @@ export const javascriptProvider = defineLanguage({
exportChecker: tsExportChecker,
importResolver: resolveJavascriptImport,
namedBindingExtractor: extractTsNamedBindings,
builtInNames: BUILT_INS,
});
2 changes: 1 addition & 1 deletion gitnexus/src/core/ingestion/tree-sitter-queries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -808,7 +808,7 @@ export const RUBY_QUERIES = `
; NOTE: This may over-capture variable reads as calls (e.g. 'result' at
; statement level). Ruby's grammar makes bare identifiers ambiguous — they
; could be local variables or zero-arity method calls. Post-processing via
; isBuiltInOrNoise and symbol resolution filtering suppresses most false
; provider.isBuiltInName and symbol resolution filtering suppresses most false
; positives, but a variable name that coincidentally matches a method name
; elsewhere may produce a false CALLS edge.
(body_statement
Expand Down
Loading
Loading