Skip to content

Commit 206ffbb

Browse files
committed
new changes
1 parent 7c3a846 commit 206ffbb

File tree

10 files changed

+823
-3
lines changed

10 files changed

+823
-3
lines changed

meerkat-browser/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@devrev/meerkat-browser",
3-
"version": "0.0.90",
3+
"version": "0.0.91",
44
"dependencies": {
55
"@swc/helpers": "~0.5.0",
66
"@devrev/meerkat-core": "*",

meerkat-core/package.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
{
22
"name": "@devrev/meerkat-core",
3-
"version": "0.0.90",
3+
"version": "0.0.91",
44
"dependencies": {
5-
"@swc/helpers": "~0.5.0"
5+
"@swc/helpers": "~0.5.0",
6+
"lodash": "^4.17.21"
67
},
78
"scripts": {
89
"release": "semantic-release"
Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
import { DatasetRegistry } from '../dataset-registry/dataset-registry';
2+
import { Column, Dataset } from '../dataset-registry/types';
3+
import { DimensionType } from '../types/cube-types';
4+
import { ColumnCompatibilityAnalyzer } from './column-compatibility-analyzer';
5+
import {
6+
NAME_EXACT_MATCH,
7+
NAME_PARTIAL_MATCH,
8+
SCHEMA_COMPATIBILITY_MATCH,
9+
TYPE_COMPATIBILITY_MATCH,
10+
} from './constants';
11+
12+
class TestableColumnCompatibilityAnalyzer extends ColumnCompatibilityAnalyzer {
13+
public testGetTypeCompatibilityScore(
14+
sourceType: DimensionType,
15+
targetType: DimensionType
16+
): number {
17+
return this['getTypeCompatibilityScore'](sourceType, targetType);
18+
}
19+
20+
public testGetNameSimilarityScore(
21+
sourceName: string,
22+
targetName: string
23+
): number {
24+
return this['getNameSimilarityScore'](sourceName, targetName);
25+
}
26+
27+
public testGetSchemaCompatibilityScore(
28+
sourceColumn: Column,
29+
targetColumn: Column
30+
): number {
31+
return this['getSchemaCompatibilityScore'](sourceColumn, targetColumn);
32+
}
33+
34+
public testNormalizeColumnName(name: string): string {
35+
return this['normalizeColumnName'](name);
36+
}
37+
38+
public testAssessCompatibility(sourceColumn: Column, targetColumn: Column) {
39+
return this['assessCompatibilityScore'](sourceColumn, targetColumn);
40+
}
41+
}
42+
43+
describe('ColumnCompatibilityAnalyzer', () => {
44+
let compatibleAnalyzer: ColumnCompatibilityAnalyzer;
45+
let mockRegistry: DatasetRegistry;
46+
47+
const mockDatasets: Dataset[] = [
48+
{
49+
id: 'dataset1',
50+
name: 'Dataset 1',
51+
columns: [
52+
{
53+
name: 'user_id',
54+
dataType: 'number',
55+
schema: { type: 'integer' },
56+
},
57+
{
58+
name: 'email',
59+
dataType: 'string',
60+
schema: { type: 'string', format: 'email' },
61+
},
62+
],
63+
},
64+
{
65+
id: 'dataset2',
66+
name: 'Dataset 2',
67+
columns: [
68+
{
69+
name: 'userId',
70+
dataType: 'number',
71+
schema: { type: 'integer' },
72+
},
73+
{
74+
name: 'name',
75+
dataType: 'string',
76+
schema: { type: 'string' },
77+
},
78+
],
79+
},
80+
];
81+
82+
beforeEach(() => {
83+
mockRegistry = new DatasetRegistry();
84+
mockDatasets.forEach((dataset) => mockRegistry.registerDataset(dataset));
85+
compatibleAnalyzer = new ColumnCompatibilityAnalyzer(mockRegistry);
86+
});
87+
88+
describe('findCompatibleColumns', () => {
89+
it('should find compatible columns based on type, name, and schema', () => {
90+
const result = compatibleAnalyzer.findCompatibleColumns({
91+
sourceDatasetId: 'dataset1',
92+
sourceColumnName: 'user_id',
93+
});
94+
95+
expect(result).toHaveLength(1);
96+
expect(result[0].column.name).toBe('userId');
97+
expect(result[0].dataset.id).toBe('dataset2');
98+
});
99+
100+
it('should throw error when source column not found', () => {
101+
expect(() =>
102+
compatibleAnalyzer.findCompatibleColumns({
103+
sourceDatasetId: 'dataset1',
104+
sourceColumnName: 'unique_column',
105+
})
106+
).toThrow('Column unique_column not found in dataset dataset1');
107+
});
108+
});
109+
110+
describe('name similarity scoring', () => {
111+
it('should match exact names ignoring case and special characters', () => {
112+
const result = compatibleAnalyzer.findCompatibleColumns({
113+
sourceDatasetId: 'dataset1',
114+
sourceColumnName: 'user_id',
115+
});
116+
117+
expect(result).toHaveLength(1);
118+
expect(result[0].column.name).toBe('userId');
119+
});
120+
});
121+
122+
describe('schema compatibility', () => {
123+
beforeEach(() => {
124+
mockRegistry.registerDataset({
125+
id: 'dataset3',
126+
name: 'Dataset 3',
127+
columns: [
128+
{
129+
name: 'email',
130+
dataType: 'string',
131+
schema: { type: 'string', format: 'email' },
132+
},
133+
],
134+
});
135+
});
136+
137+
it('should consider schema when scoring compatibility', () => {
138+
const result = compatibleAnalyzer.findCompatibleColumns({
139+
sourceDatasetId: 'dataset1',
140+
sourceColumnName: 'email',
141+
});
142+
143+
expect(result).toHaveLength(2);
144+
expect(result[0].column.name).toBe('email');
145+
expect(result[0].dataset.id).toBe('dataset3');
146+
});
147+
148+
it('should handle missing schema gracefully', () => {
149+
const noSchemaDataset: Dataset = {
150+
id: 'dataset4',
151+
name: 'Dataset 4',
152+
columns: [
153+
{
154+
name: 'id',
155+
dataType: 'number',
156+
},
157+
],
158+
};
159+
mockRegistry.registerDataset(noSchemaDataset);
160+
161+
const result = compatibleAnalyzer.findCompatibleColumns({
162+
sourceDatasetId: 'dataset4',
163+
sourceColumnName: 'id',
164+
});
165+
166+
expect(result).toHaveLength(2);
167+
});
168+
});
169+
170+
describe('ColumnCompatibilityAnalyzer PRIVATE METHODS', () => {
171+
let analyzer: TestableColumnCompatibilityAnalyzer;
172+
let registry: DatasetRegistry;
173+
174+
beforeEach(() => {
175+
registry = new DatasetRegistry();
176+
analyzer = new TestableColumnCompatibilityAnalyzer(registry);
177+
});
178+
179+
describe('getTypeCompatibilityScore', () => {
180+
it('should return full score for matching types', () => {
181+
expect(analyzer.testGetTypeCompatibilityScore('string', 'string')).toBe(
182+
TYPE_COMPATIBILITY_MATCH
183+
);
184+
expect(analyzer.testGetTypeCompatibilityScore('number', 'number')).toBe(
185+
TYPE_COMPATIBILITY_MATCH
186+
);
187+
});
188+
189+
it('should return 0 for different types', () => {
190+
expect(analyzer.testGetTypeCompatibilityScore('string', 'number')).toBe(
191+
0
192+
);
193+
expect(
194+
analyzer.testGetTypeCompatibilityScore('boolean', 'string')
195+
).toBe(0);
196+
});
197+
});
198+
199+
describe('getNameSimilarityScore', () => {
200+
it('should return exact match score for identical names', () => {
201+
expect(analyzer.testGetNameSimilarityScore('user_id', 'user_id')).toBe(
202+
NAME_EXACT_MATCH
203+
);
204+
expect(analyzer.testGetNameSimilarityScore('userId', 'userId')).toBe(
205+
NAME_EXACT_MATCH
206+
);
207+
});
208+
209+
it('should return partial match score for similar names', () => {
210+
expect(analyzer.testGetNameSimilarityScore('user_id', 'userId')).toBe(
211+
NAME_EXACT_MATCH
212+
);
213+
expect(analyzer.testGetNameSimilarityScore('customer_id', 'id')).toBe(
214+
NAME_PARTIAL_MATCH
215+
);
216+
});
217+
218+
it('should return 0 for different names', () => {
219+
expect(
220+
analyzer.testGetNameSimilarityScore('user_id', 'product_name')
221+
).toBe(0);
222+
});
223+
});
224+
225+
describe('getSchemaCompatibilityScore', () => {
226+
it('should return full score for matching schemas', () => {
227+
const schema1 = { type: 'string', length: 255 };
228+
const column1: Column = {
229+
name: 'test1',
230+
dataType: 'string',
231+
schema: schema1,
232+
};
233+
const column2: Column = {
234+
name: 'test2',
235+
dataType: 'string',
236+
schema: schema1,
237+
};
238+
239+
expect(analyzer.testGetSchemaCompatibilityScore(column1, column2)).toBe(
240+
SCHEMA_COMPATIBILITY_MATCH
241+
);
242+
});
243+
244+
it('should return 0 for different schemas', () => {
245+
const column1: Column = {
246+
name: 'test1',
247+
dataType: 'string',
248+
schema: { type: 'string', length: 255 },
249+
};
250+
const column2: Column = {
251+
name: 'test2',
252+
dataType: 'string',
253+
schema: { type: 'string', length: 100 },
254+
};
255+
256+
expect(analyzer.testGetSchemaCompatibilityScore(column1, column2)).toBe(
257+
0
258+
);
259+
});
260+
261+
it('should return 0 when schemas are missing', () => {
262+
const column1: Column = { name: 'test1', dataType: 'string' };
263+
const column2: Column = { name: 'test2', dataType: 'string' };
264+
265+
expect(analyzer.testGetSchemaCompatibilityScore(column1, column2)).toBe(
266+
0
267+
);
268+
});
269+
});
270+
271+
describe('normalizeColumnName', () => {
272+
it('should convert to lowercase and remove special characters', () => {
273+
expect(analyzer.testNormalizeColumnName('User_ID')).toBe('userid');
274+
expect(analyzer.testNormalizeColumnName('customer-id')).toBe(
275+
'customerid'
276+
);
277+
expect(analyzer.testNormalizeColumnName('ProductName')).toBe(
278+
'productname'
279+
);
280+
});
281+
});
282+
283+
describe('assessCompatibilityScore', () => {
284+
it('should calculate total compatibility score correctly', () => {
285+
const column1: Column = {
286+
name: 'user_id',
287+
dataType: 'string',
288+
schema: { type: 'string', length: 255 },
289+
};
290+
const column2: Column = {
291+
name: 'user_id',
292+
dataType: 'string',
293+
schema: { type: 'string', length: 255 },
294+
};
295+
296+
const result = analyzer.testAssessCompatibility(column1, column2);
297+
298+
expect(result).toEqual(
299+
TYPE_COMPATIBILITY_MATCH +
300+
NAME_EXACT_MATCH +
301+
SCHEMA_COMPATIBILITY_MATCH
302+
);
303+
});
304+
305+
it('should return default score when types do not match', () => {
306+
const column1: Column = { name: 'test1', dataType: 'string' };
307+
const column2: Column = { name: 'test1', dataType: 'number' };
308+
309+
const result = analyzer.testAssessCompatibility(column1, column2);
310+
311+
expect(result).toBe(0);
312+
});
313+
});
314+
});
315+
});

0 commit comments

Comments
 (0)