Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ describe('Testing pipeline templates', () => {
target_field: '@timestamp',
formats: ["yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS'Z'", 'ISO8601'],
tag: 'date_processor_xdfsfs.ds.@timestamp',
if: 'ctx.xdfsfs?.ds?.@timestamp != null',
if: 'ctx.xdfsfs?.ds?.get("@timestamp") != null',
},
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@ import { Pipeline, ESProcessorItem } from '../../../common';
import type { EcsMappingState } from '../../types';
import { ECS_TYPES } from './constants';
import { deepCopy } from '../../util/util';

interface IngestPipeline {
[key: string]: unknown;
}
import { type FieldPath, fieldPathToProcessorString } from '../../util/fields';
import { fieldPathToPainlessExpression, SafePainlessExpression } from '../../util/painless';

interface ECSField {
target: string;
Expand All @@ -24,16 +22,36 @@ interface ECSField {
type: string;
}

const KNOWN_ES_TYPES = ['long', 'float', 'scaled_float', 'ip', 'boolean', 'keyword'];
type KnownESType = (typeof KNOWN_ES_TYPES)[number];

/**
* Clarifies the types of specific fields in pipeline processors.
*
* This includes safety requirements for Painless script fields.
* Restricted to the processors that we generate in this file.
*/
interface SafeESProcessorItem extends ESProcessorItem {
[k: string]: {
field?: string;
if?: SafePainlessExpression;
ignore_missing?: boolean;
target_field?: string;
type?: KnownESType;
formats?: string[];
};
}

function generateProcessor(
currentPath: string,
currentPath: FieldPath,
ecsField: ECSField,
expectedEcsType: string,
sampleValue: unknown
): object {
): SafeESProcessorItem {
if (needsTypeConversion(sampleValue, expectedEcsType)) {
return {
convert: {
field: currentPath,
field: fieldPathToProcessorString(currentPath),
target_field: ecsField.target,
type: getConvertProcessorType(expectedEcsType),
ignore_missing: true,
Expand All @@ -44,17 +62,17 @@ function generateProcessor(
if (ecsField.type === 'date') {
return {
date: {
field: currentPath,
field: fieldPathToProcessorString(currentPath),
target_field: ecsField.target,
formats: convertIfIsoDate(ecsField.date_formats),
if: currentPath.replace(/\./g, '?.'),
if: fieldPathToPainlessExpression(currentPath),
},
};
}

return {
rename: {
field: currentPath,
field: fieldPathToProcessorString(currentPath),
target_field: ecsField.target,
ignore_missing: true,
},
Expand All @@ -74,10 +92,9 @@ function convertIfIsoDate(date: string[]): string[] {
return date;
}

function getSampleValue(key: string, samples: Record<string, any>): unknown {
const keyList = key.split('.');
function getSampleValue(fieldPath: FieldPath, samples: Record<string, any>): unknown {
let value: any = samples;
for (const k of keyList) {
for (const k of fieldPath) {
if (value === undefined || value === null) {
return null;
}
Expand All @@ -91,7 +108,7 @@ function getEcsType(ecsField: ECSField, ecsTypes: Record<string, string>): strin
return ecsTypes[ecsTarget];
}

function getConvertProcessorType(expectedEcsType: string): string {
function getConvertProcessorType(expectedEcsType: KnownESType): KnownESType {
if (expectedEcsType === 'long') {
return 'long';
}
Expand All @@ -107,7 +124,7 @@ function getConvertProcessorType(expectedEcsType: string): string {
return 'string';
}

function needsTypeConversion(sample: unknown, expected: string): boolean {
function needsTypeConversion(sample: unknown, expected: KnownESType): boolean {
if (sample === null || sample === undefined) {
return false;
}
Expand Down Expand Up @@ -136,16 +153,20 @@ function needsTypeConversion(sample: unknown, expected: string): boolean {
return false;
}

function generateProcessors(ecsMapping: object, samples: object, basePath: string = ''): object[] {
function generateProcessors(
ecsMapping: object,
samples: object,
basePath: FieldPath = []
): SafeESProcessorItem[] {
if (Object.keys(ecsMapping).length === 0) {
return [];
}
const ecsTypes = ECS_TYPES;
const valueFieldKeys = new Set(['target', 'confidence', 'date_formats', 'type']);
const results: object[] = [];
const results: SafeESProcessorItem[] = [];

for (const [key, value] of Object.entries(ecsMapping)) {
const currentPath = basePath ? `${basePath}.${key}` : key;
const currentPath = [...basePath, key];

if (value !== null && typeof value === 'object' && value?.target !== null) {
const valueKeys = new Set(Object.keys(value));
Expand All @@ -162,10 +183,11 @@ function generateProcessors(ecsMapping: object, samples: object, basePath: strin
}
}
}

return results;
}

export function createPipeline(state: EcsMappingState): IngestPipeline {
export function createPipeline(state: EcsMappingState): Pipeline {
const samples = JSON.parse(state.combinedSamples);

const processors = generateProcessors(state.finalMapping, samples);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ processors:
- {{ format }}
{% endfor %}
tag: date_processor_{{ value.field}}
if: "ctx.{{ value.if }} != null"{% endif %}
if: |-
{{ value.if }} != null{% endif %}
{% if key == 'convert' %}
- {{ key }}:
field: {{ value.field }}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { fieldPathToProcessorString } from './fields';

describe('fieldPathToProcessorString', () => {
it('should join an array of strings with dots', () => {
const result = fieldPathToProcessorString(['foo', 'bar', 'baz']);
expect(result).toBe('foo.bar.baz');
});

it('should return an empty string if array is empty', () => {
const result = fieldPathToProcessorString([]);
expect(result).toBe('');
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

/**
* Represents a path to a field as an array of string segments.
* Each element in the array represents a level in the field hierarchy.
*
* A segment might contain a character that is invalid in some contexts.
* @example ['person', 'address', 'street-level']
*/
export type FieldPath = string[];

/**
* Converts a FieldPath array into a string useable as the field in the ingest pipeline.
*
* @param fieldPath - The array of field names representing the path.
* @returns The processor string created by joining the field names with a dot.
*/
export function fieldPathToProcessorString(fieldPath: FieldPath): string {
return fieldPath.join('.');
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import {
isPainlessIdentifier,
painlessStringRepresentation,
addPainlessFieldAccess,
fieldPathToPainlessExpression,
type SafePainlessExpression,
} from './painless';

describe('isPainlessIdentifier', () => {
it('should return true for valid identifiers', () => {
expect(isPainlessIdentifier('_validIdentifier123')).toBe(true);
expect(isPainlessIdentifier('valid')).toBe(true);
});

it('should return false for invalid identifiers', () => {
expect(isPainlessIdentifier('123start')).toBe(false); // Identifiers cannot start with a number
expect(isPainlessIdentifier('new')).toBe(true); // Reserved words are valid identifiers
expect(isPainlessIdentifier('_source')).toBe(true); // Underscore-prefixed identifiers are valid
expect(isPainlessIdentifier('invalid-char!')).toBe(false); // Identifiers cannot contain special characters
});
});

describe('painlessFieldEscape', () => {
it('should return a quoted and escaped string', () => {
expect(painlessStringRepresentation('simple')).toBe('"simple"');
expect(painlessStringRepresentation('"quote"')).toBe('"\\"quote\\""');
expect(painlessStringRepresentation('back\\slash')).toBe('"back\\\\slash"');
});
});

describe('addPainlessFieldAccess', () => {
it('should add a dot-access for valid identifiers', () => {
const expr = 'root' as SafePainlessExpression;
const result = addPainlessFieldAccess('foo', expr, false);
expect(result).toBe('root.foo');
});

it('should add a nullable dot-access for valid identifiers', () => {
const expr = 'root' as SafePainlessExpression;
const result = addPainlessFieldAccess('foo', expr);
expect(result).toBe('root?.foo');
});

it('should add a get-access for invalid identifiers', () => {
const expr = 'root' as SafePainlessExpression;
const result = addPainlessFieldAccess('foo-bar', expr, false);
expect(result).toContain('"foo-bar"');
expect(result).toBe('root.get("foo-bar")');
});

it('should add a nullable get-access for invalid identifiers in the chain', () => {
const expr = 'root' as SafePainlessExpression;
const result = addPainlessFieldAccess('foo-bar', expr, true);
expect(result).toContain('"foo-bar"');
expect(result).toBe('root?.get("foo-bar")');
});
});

describe('fieldPathToPainlessExpression', () => {
it('should build a nested expression from a simple field path', () => {
const result = fieldPathToPainlessExpression(['source', 'ip']);
expect(result).toBe('ctx.source?.ip');
});

it('should quote invalid identifiers', () => {
const result = fieldPathToPainlessExpression(['ip-address']);
expect(result).toContain('"ip-address"');
expect(result).toBe('ctx.get("ip-address")');
});

it('should use nullable get access for nested invalid identifiers', () => {
const result = fieldPathToPainlessExpression(['field', 'ip-address']);
expect(result).toContain('"ip-address"');
expect(result).toBe('ctx.field?.get("ip-address")');
});

it('should return just "ctx" if the path is empty', () => {
const result = fieldPathToPainlessExpression([]);
expect(result).toBe('ctx');
});
});
Loading