-
Notifications
You must be signed in to change notification settings - Fork 1
Fix sending large messages to SQS queue #86
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,71 @@ | ||||||
| import { ErrorRecord } from '../types/common'; | ||||||
| import { EventData } from '../types/extraction'; | ||||||
|
|
||||||
| const MAX_EVENT_SIZE_BYTES = 200_000; | ||||||
| const EVENT_SIZE_THRESHOLD_BYTES = Math.floor(MAX_EVENT_SIZE_BYTES * 0.8); // 160_000 bytes | ||||||
|
|
||||||
| /** | ||||||
| * Get the JSON serialized size of event data in bytes | ||||||
| */ | ||||||
| export function getEventDataSize(data: EventData | undefined): number { | ||||||
| if (!data) return 0; | ||||||
| return JSON.stringify(data).length; | ||||||
| } | ||||||
|
|
||||||
| /** | ||||||
| * Check if event data exceeds the 80% threshold (160KB) | ||||||
| */ | ||||||
| export function shouldTriggerSizeLimit(data: EventData | undefined): boolean { | ||||||
| return getEventDataSize(data) > EVENT_SIZE_THRESHOLD_BYTES; | ||||||
| } | ||||||
|
|
||||||
| /** | ||||||
| * Truncate error message to max length (default 1000 chars) | ||||||
| */ | ||||||
| export function truncateErrorMessage( | ||||||
| error: ErrorRecord | undefined, | ||||||
| maxLength: number = 1000 | ||||||
| ): ErrorRecord | undefined { | ||||||
| if (!error) return undefined; | ||||||
|
|
||||||
| return { | ||||||
| message: error.message.substring(0, maxLength), | ||||||
|
||||||
| message: error.message.substring(0, maxLength), | |
| message: error.message?.substring(0, maxLength) ?? '', |
Copilot
AI
Nov 25, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nitpick] Exporting constants with renamed aliases on a separate line from function exports reduces readability. Consider moving these constant exports to the top of the file with the constant declarations or using separate export statements for clarity.
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -7,6 +7,11 @@ import { | |||||||||||||
| STATELESS_EVENT_TYPES, | ||||||||||||||
| } from '../common/constants'; | ||||||||||||||
| import { emit } from '../common/control-protocol'; | ||||||||||||||
| import { | ||||||||||||||
| logSizeLimitWarning, | ||||||||||||||
| pruneEventData, | ||||||||||||||
| SIZE_LIMIT_THRESHOLD, | ||||||||||||||
| } from '../common/event-size-monitor'; | ||||||||||||||
| import { addReportToLoaderReport, getFilesToLoad } from '../common/helpers'; | ||||||||||||||
| import { serializeError } from '../logger/logger'; | ||||||||||||||
| import { Mappers } from '../mappers/mappers'; | ||||||||||||||
|
|
@@ -91,6 +96,9 @@ export class WorkerAdapter<ConnectorState> { | |||||||||||||
| private _mappers: Mappers; | ||||||||||||||
| private uploader: Uploader; | ||||||||||||||
|
|
||||||||||||||
| // Length of the resulting artifact JSON object string. | ||||||||||||||
| private currentLength: number = 0; | ||||||||||||||
|
|
||||||||||||||
| constructor({ | ||||||||||||||
| event, | ||||||||||||||
| adapterState, | ||||||||||||||
|
|
@@ -149,12 +157,30 @@ export class WorkerAdapter<ConnectorState> { | |||||||||||||
| itemType: repo.itemType, | ||||||||||||||
| ...(shouldNormalize && { normalize: repo.normalize }), | ||||||||||||||
| onUpload: (artifact: Artifact) => { | ||||||||||||||
| const newLength = JSON.stringify(artifact).length; | ||||||||||||||
|
||||||||||||||
| const newLength = JSON.stringify(artifact).length; | |
| // Cache the stringified artifact to avoid redundant serialization | |
| if (!('_stringified' in artifact)) { | |
| (artifact as any)._stringified = JSON.stringify(artifact); | |
| } | |
| const newLength = (artifact as any)._stringified.length; |
Copilot
AI
Nov 25, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The comment states '80% of 200KB = 160KB' but this should be 160,000 bytes. The comment should clarify that SIZE_LIMIT_THRESHOLD is defined elsewhere to avoid confusion about the actual numeric value being compared.
Copilot
AI
Nov 25, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nitpick] The comment says 'chars' but should say 'characters' for consistency with the function documentation in event-size-monitor.ts. Also, like Comment 3, this should reference that the 1000 limit is defined in the truncateErrorMessage function.
| // Always prune error messages to 1000 chars before emit | |
| // Always prune error messages to 1000 characters before emit (limit defined in truncateErrorMessage) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
JSON.stringify().length calculates size in UTF-16 code units, not bytes. For accurate byte size calculation (especially for SQS limits), use
Buffer.byteLength(JSON.stringify(data), 'utf8')ornew TextEncoder().encode(JSON.stringify(data)).length.