Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,5 +102,5 @@ __pycache__/
*.pyc
.pytest_cache/

# internal
# internal examples
internal_examples/
64 changes: 64 additions & 0 deletions src/__tests__/unit/checks/pii.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,68 @@ describe('pii guardrail', () => {

await expect(pii({}, '', config)).rejects.toThrow('Text cannot be empty or null');
});

it('detects valid Korean Resident Registration Number (KR_RRN)', async () => {
const config = PIIConfig.parse({
entities: [PIIEntity.KR_RRN],
block: false,
});
// Valid format: YYMMDD-GNNNNNN (900101 = Jan 1, 1990, gender digit 1)
const text = 'Korean RRN: 900101-1234567';

const result = await pii({}, text, config);

expect(result.tripwireTriggered).toBe(false);
expect((result.info?.detected_entities as Record<string, string[]>)?.KR_RRN).toEqual(['900101-1234567']);
expect(result.info?.checked_text).toBe('Korean RRN: <KR_RRN>');
});

it('detects multiple valid KR_RRN formats', async () => {
const config = PIIConfig.parse({
entities: [PIIEntity.KR_RRN],
block: false,
});
// Testing different valid date ranges and gender digits (1-4)
const text = 'RRNs: 850315-2345678, 001231-3456789, 750628-4123456';

const result = await pii({}, text, config);

expect(result.tripwireTriggered).toBe(false);
expect((result.info?.detected_entities as Record<string, string[]>)?.KR_RRN).toHaveLength(3);
expect((result.info?.detected_entities as Record<string, string[]>)?.KR_RRN).toContain('850315-2345678');
expect((result.info?.detected_entities as Record<string, string[]>)?.KR_RRN).toContain('001231-3456789');
expect((result.info?.detected_entities as Record<string, string[]>)?.KR_RRN).toContain('750628-4123456');
});

it('does not detect invalid KR_RRN patterns (false positives)', async () => {
const config = PIIConfig.parse({
entities: [PIIEntity.KR_RRN],
block: false,
});
// Invalid patterns that should NOT be detected:
// - Invalid month (13)
// - Invalid day (00, 32)
// - Invalid gender digit (0, 5, 9)
// - Random tracking numbers
const text = 'Invalid: 901301-1234567, 900100-1234567, 900132-1234567, 900101-0234567, 900101-5234567, 123456-7890123';

const result = await pii({}, text, config);

expect(result.tripwireTriggered).toBe(false);
expect(result.info?.detected_entities).toEqual({});
expect(result.info?.checked_text).toBe(text); // No masking should occur
});

it('triggers tripwire for KR_RRN when block=true', async () => {
const config = PIIConfig.parse({
entities: [PIIEntity.KR_RRN],
block: true,
});
const text = 'Korean RRN: 900101-1234567';

const result = await pii({}, text, config);

expect(result.tripwireTriggered).toBe(true);
expect((result.info?.detected_entities as Record<string, string[]>)?.KR_RRN).toEqual(['900101-1234567']);
});
});
7 changes: 7 additions & 0 deletions src/checks/pii.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ export enum PIIEntity {

// Finland
FI_PERSONAL_IDENTITY_CODE = 'FI_PERSONAL_IDENTITY_CODE',

// Korea
KR_RRN = 'KR_RRN',
}

/**
Expand Down Expand Up @@ -236,6 +239,10 @@ const DEFAULT_PII_PATTERNS: Record<PIIEntity, RegExp> = {

// Finland
[PIIEntity.FI_PERSONAL_IDENTITY_CODE]: /\b\d{6}[+-A]\d{3}[A-Z0-9]\b/g,

// Korea
// Format: YYMMDD-GNNNNNN where YY=year, MM=month(01-12), DD=day(01-31), G=gender/century(1-4)
[PIIEntity.KR_RRN]: /\b\d{2}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01])-[1-4]\d{6}\b/g,
Copy link

Copilot AI Oct 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The regex pattern allows invalid dates like February 30th (0230) or February 31st (0231). The day validation (0[1-9]|[12]\d|3[01]) doesn't account for different month lengths. While month-specific day validation in regex is complex, consider if this level of validation is acceptable or if additional validation logic should be added.

Copilot uses AI. Check for mistakes.
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now this is fine. All of the checks are basic regex, I would recommend a separate PR that makes all of the checks more robust.

};

/**
Expand Down