diff --git a/packages/client-twitter/src/utils.ts b/packages/client-twitter/src/utils.ts index 26ba07c3e07..d11ed5b534f 100644 --- a/packages/client-twitter/src/utils.ts +++ b/packages/client-twitter/src/utils.ts @@ -321,11 +321,31 @@ function splitTweetContent(content: string, maxLength: number): string[] { return tweets; } -function splitParagraph(paragraph: string, maxLength: number): string[] { - // eslint-disable-next-line - const sentences = paragraph.match(/[^\.!\?]+[\.!\?]+|[^\.!\?]+$/g) || [ - paragraph, - ]; +function extractUrls(paragraph: string): { + textWithPlaceholders: string; + placeholderMap: Map; +} { + // replace https urls with placeholder + const urlRegex = /https?:\/\/[^\s]+/g; + const placeholderMap = new Map(); + + let urlIndex = 0; + const textWithPlaceholders = paragraph.replace(urlRegex, (match) => { + // twitter url would be considered as 23 characters + // <> is also 23 characters + const placeholder = `<>`; // Placeholder without . ? ! etc + placeholderMap.set(placeholder, match); + urlIndex++; + return placeholder; + }); + + return { textWithPlaceholders, placeholderMap }; +} + +function splitSentencesAndWords(text: string, maxLength: number): string[] { + // Split by periods, question marks and exclamation marks + // Note that URLs in text have been replaced with `<>` and won't be split by dots + const sentences = text.match(/[^\.!\?]+[\.!\?]+|[^\.!\?]+$/g) || [text]; const chunks: string[] = []; let currentChunk = ""; @@ -337,13 +357,16 @@ function splitParagraph(paragraph: string, maxLength: number): string[] { currentChunk = sentence; } } else { + // Can't fit more, push currentChunk to results if (currentChunk) { chunks.push(currentChunk.trim()); } + + // If current sentence itself is less than or equal to maxLength if (sentence.length <= maxLength) { currentChunk = sentence; } else { - // Split long sentence into smaller pieces + // Need to split sentence by spaces const words = sentence.split(" "); currentChunk = ""; for (const word of words) { @@ -366,9 +389,39 @@ function splitParagraph(paragraph: string, maxLength: number): string[] { } } + // Handle remaining content if (currentChunk) { chunks.push(currentChunk.trim()); } return chunks; } + +function restoreUrls( + chunks: string[], + placeholderMap: Map +): string[] { + return chunks.map((chunk) => { + // Replace all <> in chunk back to original URLs using regex + return chunk.replace(/<>/g, (match) => { + const original = placeholderMap.get(match); + return original || match; // Return placeholder if not found (theoretically won't happen) + }); + }); +} + +function splitParagraph(paragraph: string, maxLength: number): string[] { + // 1) Extract URLs and replace with placeholders + const { textWithPlaceholders, placeholderMap } = extractUrls(paragraph); + + // 2) Use first section's logic to split by sentences first, then do secondary split + const splittedChunks = splitSentencesAndWords( + textWithPlaceholders, + maxLength + ); + + // 3) Replace placeholders back to original URLs + const restoredChunks = restoreUrls(splittedChunks, placeholderMap); + + return restoredChunks; +} \ No newline at end of file