-
Notifications
You must be signed in to change notification settings - Fork 1.4k
For CosmosDB bulk api added support for splitting of batch based on size. #23987
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f67284c
8d4f89c
dc00f58
17f5457
a509580
99e04a4
6f35dee
2bcce56
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,8 @@ import { PartitionKeyDefinition } from "../documents"; | |
| import { RequestOptions } from ".."; | ||
| import { PatchRequestBody } from "./patch"; | ||
| import { v4 } from "uuid"; | ||
| import { bodyFromData } from "../request/request"; | ||
| import { Constants } from "../common/constants"; | ||
| const uuid = v4; | ||
|
|
||
| export type Operation = | ||
|
|
@@ -210,6 +212,53 @@ export function decorateOperation( | |
| return operation as Operation; | ||
| } | ||
|
|
||
| /** | ||
| * Splits a batch into array of batches based on cumulative size of its operations by making sure | ||
| * cumulative size of an individual batch is not larger than {@link Constants.DefaultMaxBulkRequestBodySizeInBytes}. | ||
| * If a single operation itself is larger than {@link Constants.DefaultMaxBulkRequestBodySizeInBytes}, that | ||
| * operation would be moved into a batch containing only that operation. | ||
| * @param originalBatch - A batch of operations needed to be checked. | ||
| * @returns | ||
| * @hidden | ||
| */ | ||
| export function splitBatchBasedOnBodySize(originalBatch: Batch): Batch[] { | ||
| if (originalBatch?.operations === undefined || originalBatch.operations.length < 1) return []; | ||
| let currentBatchSize = calculateObjectSizeInBytes(originalBatch.operations[0]); | ||
| let currentBatch: Batch = { | ||
| ...originalBatch, | ||
| operations: [originalBatch.operations[0]], | ||
| indexes: [originalBatch.indexes[0]], | ||
| }; | ||
| const processedBatches: Batch[] = []; | ||
| processedBatches.push(currentBatch); | ||
|
|
||
| for (let index = 1; index < originalBatch.operations.length; index++) { | ||
| const operation = originalBatch.operations[index]; | ||
| const currentOpSize = calculateObjectSizeInBytes(operation); | ||
| if (currentBatchSize + currentOpSize > Constants.DefaultMaxBulkRequestBodySizeInBytes) { | ||
| currentBatch = { | ||
| ...originalBatch, | ||
| operations: [], | ||
| indexes: [], | ||
| }; | ||
| processedBatches.push(currentBatch); | ||
| currentBatchSize = 0; | ||
| } | ||
| currentBatch.operations.push(operation); | ||
| currentBatch.indexes.push(originalBatch.indexes[index]); | ||
| currentBatchSize += currentOpSize; | ||
| } | ||
| return processedBatches; | ||
| } | ||
|
|
||
| /** | ||
| * Calculates size of an JSON object in bytes with utf-8 encoding. | ||
| * @hidden | ||
| */ | ||
| export function calculateObjectSizeInBytes(obj: unknown): number { | ||
| return new TextEncoder().encode(bodyFromData(obj as any)).length; | ||
|
witemple-msft marked this conversation as resolved.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just leaving a note that this feels expensive. You're basically encoding the body into a buffer, which the request pipeline must do anyway, for the sole purpose of measuring the body's length. I'm not sure if there's a great alternative to doing this to be honest, but it sticks out at me as being a costly operation. |
||
| } | ||
|
|
||
| export function decorateBatchOperation( | ||
| operation: OperationInput, | ||
| options: RequestOptions = {} | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why this specific size?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have taken this constant from Java and .Net SDK. this is slightly lower than 2Mb but this seems to the value used by both SDKs.