Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -1265,6 +1265,7 @@
"papaparse": "^5.5.3",
"pbf": "3.2.1",
"pdfmake": "^0.2.15",
"piscina": "^3.2.0",
"polished": "^4.3.1",
"pretty-ms": "6.0.0",
"prop-types": "^15.8.1",
Expand Down Expand Up @@ -1891,7 +1892,6 @@
"peggy": "^4.2.0",
"picomatch": "^4.0.2",
"pirates": "^4.0.7",
"piscina": "^3.2.0",
"pixelmatch": "^5.3.0",
"playwright": "1.49.0",
"playwright-chromium": "1.49.0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ export {
type DeanonymizationOutput,
type DeanonymizedMessage,
type AnonymizationSettings,
type AnonymizationRegexWorkerTaskPayload,
} from './src/chat_complete';

export type { BoundInferenceClient, InferenceClient } from './src/inference_client';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ export type {
RegexAnonymizationRule,
NamedEntityRecognitionRule,
AnonymizationSettings,
AnonymizationRegexWorkerTaskPayload,
} from './types';
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,7 @@ export interface DeanonymizationOutput {
}

export type DeanonymizedMessage = Message & { deanonymizations: Deanonymization[] };
export interface AnonymizationRegexWorkerTaskPayload {
rule: RegexAnonymizationRule;
records: Array<Record<string, string>>;
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,5 @@ export type {
RegexAnonymizationRule,
NamedEntityRecognitionRule,
AnonymizationSettings,
AnonymizationRegexWorkerTaskPayload,
} from './anonymization';
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
*/

import { MlInferenceResponseResult } from '@elastic/elasticsearch/lib/api/types';
import { loggerMock, type MockedLogger } from '@kbn/logging-mocks';
import { anonymizeMessages } from './anonymize_messages';
import {
AnonymizationRule,
Expand All @@ -16,16 +17,23 @@ import {
} from '@kbn/inference-common';
import { messageToAnonymizationRecords } from './message_to_anonymization_records';
import { getEntityMask } from './get_entity_mask';

import { RegexWorkerService } from './regex_worker_service';
import { AnonymizationWorkerConfig } from '../../config';
const mockEsClient = {
ml: {
inferTrainedModel: jest.fn(),
},
} as any;

const testConfig = {
enabled: false,
} as AnonymizationWorkerConfig;
describe('anonymizeMessages', () => {
let logger: MockedLogger;
let regexWorker: RegexWorkerService;
beforeEach(() => {
jest.resetAllMocks();
logger = loggerMock.create();
regexWorker = new RegexWorkerService(testConfig, logger);
});

const setupMockResponse = (entities: MlInferenceResponseResult[]) => {
Expand Down Expand Up @@ -88,6 +96,7 @@ describe('anonymizeMessages', () => {
const result = await anonymizeMessages({
messages,
anonymizationRules: [nerRule],
regexWorker,
esClient: mockEsClient,
});

Expand Down Expand Up @@ -126,6 +135,7 @@ describe('anonymizeMessages', () => {
anonymizeMessages({
messages,
anonymizationRules: [nerRule],
regexWorker,
esClient: mockEsClient,
})
).resolves.toBeDefined();
Expand All @@ -137,6 +147,7 @@ describe('anonymizeMessages', () => {
const result = await anonymizeMessages({
messages,
anonymizationRules: [disabledRule],
regexWorker,
esClient: mockEsClient,
});

Expand All @@ -154,6 +165,7 @@ describe('anonymizeMessages', () => {
const result = await anonymizeMessages({
messages,
anonymizationRules: [disabledRule],
regexWorker,
esClient: mockEsClient,
});

Expand Down Expand Up @@ -181,6 +193,7 @@ describe('anonymizeMessages', () => {
const result = await anonymizeMessages({
messages,
anonymizationRules: [regexRule],
regexWorker,
esClient: mockEsClient,
});

Expand Down Expand Up @@ -251,6 +264,7 @@ describe('anonymizeMessages', () => {

const result = await anonymizeMessages({
messages,
regexWorker,
anonymizationRules: [nerRule],
esClient: mockEsClient,
});
Expand Down Expand Up @@ -295,6 +309,7 @@ describe('anonymizeMessages', () => {
system: systemPrompt,
messages: [],
anonymizationRules: [nerRule],
regexWorker,
esClient: mockEsClient,
});
expect(result.system).toBe(
Expand Down Expand Up @@ -349,6 +364,7 @@ describe('anonymizeMessages', () => {
},
],
anonymizationRules: [nerRule], // nerRule allows only PER
regexWorker,
esClient: mockEsClient,
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,19 @@ import { merge } from 'lodash';
import { anonymizeRecords } from './anonymize_records';
import { messageFromAnonymizationRecords } from './message_from_anonymization_records';
import { messageToAnonymizationRecords } from './message_to_anonymization_records';
import { RegexWorkerService } from './regex_worker_service';

export async function anonymizeMessages({
system,
messages,
anonymizationRules,
regexWorker,
esClient,
}: {
system?: string | undefined;
messages: Message[];
anonymizationRules: AnonymizationRule[];
regexWorker: RegexWorkerService;
esClient: ElasticsearchClient;
}): Promise<AnonymizationOutput> {
const rules = anonymizationRules.filter((rule) => rule.enabled);
Expand All @@ -41,6 +44,7 @@ export async function anonymizeMessages({
const { records, anonymizations } = await anonymizeRecords({
input: toAnonymize,
anonymizationRules: rules,
regexWorker,
esClient,
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import { anonymizeRecords } from './anonymize_records';
import { AnonymizationRule } from '@kbn/inference-common';
import { MlInferenceResponseResult } from '@elastic/elasticsearch/lib/api/types';

import { loggerMock, type MockedLogger } from '@kbn/logging-mocks';
import { RegexWorkerService } from './regex_worker_service';
import { AnonymizationWorkerConfig } from '../../config';
const mockEsClient = {
ml: {
inferTrainedModel: jest.fn(),
Expand All @@ -20,27 +22,36 @@ const setupMockResponse = (entitiesPerDoc: MlInferenceResponseResult[]) => {
inference_results: entitiesPerDoc,
});
};
const nerRule: AnonymizationRule = {
type: 'NER',
enabled: true,
modelId: 'model-1',
};
const nerRule2: AnonymizationRule = {
type: 'NER',
enabled: true,
modelId: 'model-2',
};
const regexRule: AnonymizationRule = {
type: 'RegExp',
enabled: true,
entityClass: 'EMAIL',
pattern: '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}',
};

const testConfig = {
enabled: false,
} as AnonymizationWorkerConfig;

describe('anonymizeRecords', () => {
const nerRule: AnonymizationRule = {
type: 'NER',
enabled: true,
modelId: 'model-1',
};
const nerRule2: AnonymizationRule = {
type: 'NER',
enabled: true,
modelId: 'model-2',
};
const regexRule: AnonymizationRule = {
type: 'RegExp',
enabled: true,
entityClass: 'EMAIL',
pattern: '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}',
};
let logger: MockedLogger;

let regexWorker: RegexWorkerService;

beforeEach(() => {
jest.resetAllMocks();
logger = loggerMock.create();
regexWorker = new RegexWorkerService(testConfig, logger);
});

it('masks values using regex rule', async () => {
Expand All @@ -49,6 +60,7 @@ describe('anonymizeRecords', () => {
const { records, anonymizations } = await anonymizeRecords({
input,
anonymizationRules: [regexRule],
regexWorker,
esClient: mockEsClient,
});

Expand All @@ -63,6 +75,7 @@ describe('anonymizeRecords', () => {
await anonymizeRecords({
input: [{ content: shortText }],
anonymizationRules: [nerRule],
regexWorker,
esClient: mockEsClient,
});

Expand All @@ -80,6 +93,7 @@ describe('anonymizeRecords', () => {
const { records } = await anonymizeRecords({
input: [{ content: longText }],
anonymizationRules: [nerRule],
regexWorker,
esClient: mockEsClient,
});

Expand Down Expand Up @@ -134,6 +148,7 @@ describe('anonymizeRecords', () => {
const { records, anonymizations } = await anonymizeRecords({
input,
anonymizationRules: [nerRule, nerRule2],
regexWorker,
esClient: mockEsClient,
});

Expand All @@ -156,6 +171,7 @@ describe('anonymizeRecords', () => {
const result = await anonymizeRecords({
input,
anonymizationRules: [regexRule],
regexWorker,
esClient: mockEsClient,
});

Expand Down Expand Up @@ -190,6 +206,7 @@ describe('anonymizeRecords', () => {
const result = await anonymizeRecords({
input,
anonymizationRules: [nerRule],
regexWorker,
esClient: mockEsClient,
});

Expand Down Expand Up @@ -242,6 +259,7 @@ describe('anonymizeRecords', () => {
const result = await anonymizeRecords({
input,
anonymizationRules: [nerRule, nerRule2],
regexWorker,
esClient: mockEsClient,
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,29 @@ import { partition } from 'lodash';
import { AnonymizationState } from './types';
import { executeRegexRule } from './execute_regex_rule';
import { executeNerRule } from './execute_ner_rule';
import { RegexWorkerService } from './regex_worker_service';

export async function anonymizeRecords<T extends Record<string, string | undefined>>({
input,
anonymizationRules,
regexWorker,
esClient,
}: {
input: T[];
anonymizationRules: AnonymizationRule[];
regexWorker: RegexWorkerService;
esClient: ElasticsearchClient;
}): Promise<AnonymizationState>;

export async function anonymizeRecords({
input,
anonymizationRules,
regexWorker,
esClient,
}: {
input: Array<Record<string, string>>;
anonymizationRules: AnonymizationRule[];
regexWorker: RegexWorkerService;
esClient: ElasticsearchClient;
}): Promise<AnonymizationState> {
let state: AnonymizationState = {
Expand All @@ -42,9 +47,10 @@ export async function anonymizeRecords({
);

for (const rule of regexRules) {
state = executeRegexRule({
state = await executeRegexRule({
rule,
state,
regexWorker,
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,41 +5,26 @@
* 2.0.
*/

import { Anonymization, RegexAnonymizationRule } from '@kbn/inference-common';
import { RegexAnonymizationRule } from '@kbn/inference-common';
import { AnonymizationState } from './types';
import { getEntityMask } from './get_entity_mask';
import { RegexWorkerService } from './regex_worker_service';

/**
* Executes a regex anonymization rule, by iterating over the matches,
* and replacing each occurrence with a masked value.
*/
export function executeRegexRule({
export async function executeRegexRule({
state,
rule,
regexWorker,
}: {
state: AnonymizationState;
rule: RegexAnonymizationRule;
}): AnonymizationState {
const regex = new RegExp(rule.pattern, 'g');

const anonymizations: Anonymization[] = state.anonymizations.concat();

const nextRecords = state.records.map((record) => {
const newRecord: Record<string, string> = {};
for (const [key, value] of Object.entries(record)) {
newRecord[key] = value.replace(regex, (match) => {
const mask = getEntityMask({ value: match, class_name: rule.entityClass });

anonymizations.push({
entity: { value: match, class_name: rule.entityClass, mask },
rule: { type: rule.type },
});

return mask;
});
}
return newRecord;
regexWorker: RegexWorkerService;
}): Promise<AnonymizationState> {
const { records, anonymizations } = await regexWorker.run({
rule,
records: state.records,
});

return { records: nextRecords, anonymizations };
return { records, anonymizations: state.anonymizations.concat(anonymizations) };
}
Loading