From 7ece520410913a633f95cbdf13a2bdde62ae3b6c Mon Sep 17 00:00:00 2001 From: gabriellsh <40830821+gabriellsh@users.noreply.github.com> Date: Wed, 5 Oct 2022 17:58:34 -0300 Subject: [PATCH] fix(message-parser): Message parser issues (#852) * Add first test * fix: check if is emoticon to preserver colon character * fix(message-parser): Thumbs emoji becoming phone number (#858) Co-authored-by: gabriellsh * fix(message-parser): Link/URL parser issues (#855) * feat(fuselage): Adds wordbreak in Box Component (#853) * Fixing URL/links parsing issues * restoring files from base branch * replacing backstick with single quotes Co-authored-by: Douglas Fabris * fixing domainChar accepted chars (#861) * temporarily removing emphasis support Co-authored-by: Filipe Marins Co-authored-by: Hugo Costa Co-authored-by: Douglas Fabris --- packages/message-parser/src/grammar.pegjs | 83 +++------ packages/message-parser/src/utils.ts | 11 +- packages/message-parser/tests/emoji.test.ts | 22 ++- .../message-parser/tests/emoticons.test.ts | 4 + .../message-parser/tests/inlineCode.test.ts | 10 ++ packages/message-parser/tests/link.test.ts | 162 ++++++++++++++++++ 6 files changed, 230 insertions(+), 62 deletions(-) diff --git a/packages/message-parser/src/grammar.pegjs b/packages/message-parser/src/grammar.pegjs index 82c4a03f60..71b9afb111 100644 --- a/packages/message-parser/src/grammar.pegjs +++ b/packages/message-parser/src/grammar.pegjs @@ -171,19 +171,12 @@ Space / "\t" anyText - = [\x20-\x27] /* ! " # $ % & ' ( ) */ + = [\x20-\x27] // ! " # $ % & ' / [\x2B-\x40] // + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ / [\x41-\x5A] // A B C D E F G H I J K L M N O P Q R S T U V W X Y Z / [\x61-\x7A] // a b c d e f g h i j k l m n o p q r s t u v w x y z / nonascii -SectionText - = [-]+ - / [\x20-\x40] // ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ - / [\x41-\x60] // A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` - / [\x61-\x7A] // a b c d e f g h i j k l m n o p q r s t u v w x y z - / nonascii - utf8_names_validation = $[0-9a-zA-Z-_.]+ matrix_server_validation = ":" utf8_names_validation @@ -203,12 +196,14 @@ ChannelMention } / "#" channel:utf8_names_validation { return mentionChannel(channel); } +emoji_shortCode_name = $[0-9a-zA-Z-_+.]+ + Emoji = Emoji_shortCode / ch:unicodeEmoji { return emojiUnicode(ch); } Emoji_shortCode - = ":" shortCode:$(text:utf8_names_validation) ":" { return emoji(shortCode); } + = ":" shortCode:$(text:emoji_shortCode_name) ":" { return emoji(shortCode); } /* __Italic__ */ /* _Italic_ */ @@ -301,27 +296,6 @@ escape = unicode / "\\" ch:[^\r\n\f0-9a-f]i { return ch; } -nmstart - = [_a-z]i - / nonascii - / escape - -nmchar - = [_a-z0-9-]i - / nonascii - / escape - -string1 - = "\"" chars:$([^\n\r\f\\"] / "\\" nl:nl { return ''; } / escape)* "\"" { - return chars; - } - -nl - = "\n" - / "\r\n" - / "\r" - / "\f" - AutolinkedPhone = p:Phone { return link('tel:' + p.number, plain(p.text)); } AutolinkedURL = u:URL { return link(u); } @@ -375,11 +349,12 @@ hexByte = a:hexdigit b:hexdigit { return parseInt(a + b, 16); } domainName = "localhost" - / $(domainNameLabel ("." domainChar domainNameLabel)+) + / $(domainNameLabel ("." (!digit domainChar) domainNameLabel)+) domainNameLabel = $(domainChar+ $("-" domainChar+)*) -domainChar = !"\\" !"/" !"|" !">" !"<" !safe !extra !EndOfLine !Space . +domainChar + = !"\\" !"/" !"|" !">" !"<" !"%" !"`" !safe !extra !EndOfLine !Space . /** * @@ -409,23 +384,8 @@ phonePrefix */ URL - = $( - s:urlScheme - a:urlAuthority - p:urlPath? - q:urlQuery? - f:urlFragment? - g:urlPath? - h:urlQuery? - ) - / $( - urlAuthorityHost - p:urlPath? - q:urlQuery? - f:urlFragment? - g:urlPath? - h:urlQuery? - ) + = $(urlScheme urlAuthority urlBody) + / $(urlAuthorityHost urlBody) urlScheme = $( @@ -464,6 +424,25 @@ urlScheme ":" ) +urlBody + = ( + !Whitespace + ( + anyText + / "*" + / "[" + / "\/" + / "]" + / "^" + / "_" + / "`" + / "{" + / "}" + / "~" + / "(" + ) + )* + urlAuthority = $("//" urlAuthorityUserInfo? urlAuthorityHost) urlAuthorityUserInfo = $(urlAuthorityUser (":" urlAuthorityPassword)? "@") @@ -481,12 +460,6 @@ urlAuthorityHostName urlAuthorityPort = digits // TODO: from "0" to "65535" -urlPath = $("/" $(!"?" !"/" !"#" !")" !">" !"|" !" " .)* urlPath*) - -urlQuery = $("?" $(alpha_digit / safe)*) - -urlFragment = $("#" $(alpha_digit / extra / safe)*) - /** * * Email diff --git a/packages/message-parser/src/utils.ts b/packages/message-parser/src/utils.ts index 261b45673b..2f81cda29b 100644 --- a/packages/message-parser/src/utils.ts +++ b/packages/message-parser/src/utils.ts @@ -75,6 +75,7 @@ const isValidLink = (link: string) => { return false; } }; + export const link = (() => { const fn = generate('LINK'); @@ -145,9 +146,17 @@ const joinEmoji = ( const hasPlainAsNeighbor = (previous?.type === 'PLAIN_TEXT' && previous.value.trim() !== '') || (next?.type === 'PLAIN_TEXT' && next.value.trim() !== ''); + const isEmoticon = current.shortCode !== current.value.value; if (current.value && (hasEmojiAsNeighbor || hasPlainAsNeighbor)) { - return current.value; + if (isEmoticon) { + return current.value; + } + + return { + ...current.value, + value: `:${current.value.value}:`, + }; } return current; diff --git a/packages/message-parser/tests/emoji.test.ts b/packages/message-parser/tests/emoji.test.ts index 1eb0476d4e..d902148d4f 100644 --- a/packages/message-parser/tests/emoji.test.ts +++ b/packages/message-parser/tests/emoji.test.ts @@ -2,16 +2,22 @@ import { parse } from '../src'; import { emoji, bigEmoji, paragraph, plain, emojiUnicode } from '../src/utils'; test.each([ - [':smille: asd', [paragraph([emoji('smille'), plain(' asd')])]], + [':smile: asd', [paragraph([emoji('smile'), plain(' asd')])]], + ['text:inner:outer', [paragraph([plain('text:inner:outer')])]], + ['10:20:30', [paragraph([plain('10:20:30')])]], + ['10:20:30:', [paragraph([plain('10:20:30:')])]], + ['":smile:"', [paragraph([plain('":smile:"')])]], + ['":smile: "', [paragraph([plain('":smile: "')])]], + ['" :smile: "', [paragraph([plain('" '), emoji('smile'), plain(' "')])]], [ - `:smille: - :smille: + `:smile: + :smile: `, - [bigEmoji([emoji('smille'), emoji('smille')])], + [bigEmoji([emoji('smile'), emoji('smile')])], ], [ - 'asdas :smille: asd', - [paragraph([plain('asdas '), emoji('smille'), plain(' asd')])], + 'asdas :smile: asd', + [paragraph([plain('asdas '), emoji('smile'), plain(' asd')])], ], [ 'normal emojis :smile: :smile: :smile:', @@ -44,10 +50,13 @@ test.each([ ], [':smile::smile:', [bigEmoji([emoji('smile'), emoji('smile')])]], [':smile:', [bigEmoji([emoji('smile')])]], + ['Hi :+1:', [paragraph([plain('Hi '), emoji('+1')])]], + ['Hi :+1_tone4:', [paragraph([plain('Hi '), emoji('+1_tone4')])]], ])('parses %p', (input, output) => { expect(parse(input)).toMatchObject(output); }); +// Tests for unicode emojis test.each([ ['😀', [bigEmoji([emojiUnicode('😀')])]], ['😃', [bigEmoji([emojiUnicode('😃')])]], @@ -81,6 +90,7 @@ test.each([ [bigEmoji([emojiUnicode('👆🏽'), emojiUnicode('👆🏽'), emojiUnicode('👆🏽')])], ], ['👆🏺', [bigEmoji([emojiUnicode('👆'), emojiUnicode('🏺')])]], + ['Hi 👍', [paragraph([plain('Hi '), emojiUnicode('👍')])]], ])('parses %p', (input, output) => { expect(parse(input)).toMatchObject(output); }); diff --git a/packages/message-parser/tests/emoticons.test.ts b/packages/message-parser/tests/emoticons.test.ts index 8cc2ebc8aa..9cf58187e0 100644 --- a/packages/message-parser/tests/emoticons.test.ts +++ b/packages/message-parser/tests/emoticons.test.ts @@ -131,6 +131,10 @@ test.each([ // Should not render Emojis or BigEmojis if they are not surrounded by spaces ['normal emojis :):):)', [paragraph([plain('normal emojis :):):)')])]], + [':)10:30', [paragraph([plain(':)10:30')])]], + [':smile::)text', [paragraph([plain(':smile::)text')])]], + ['text:):smile:', [paragraph([plain('text:):smile:')])]], + ['text:):)', [paragraph([plain('text:):)')])]], [':):):) normal emojis', [paragraph([plain(':):):) normal emojis')])]], [':):):):)', [paragraph([plain(':):):):)')])]], ['10:30', [paragraph([plain('10:30')])]], diff --git a/packages/message-parser/tests/inlineCode.test.ts b/packages/message-parser/tests/inlineCode.test.ts index ddbc1a4d41..3b2766e042 100644 --- a/packages/message-parser/tests/inlineCode.test.ts +++ b/packages/message-parser/tests/inlineCode.test.ts @@ -7,6 +7,16 @@ test.each([ [paragraph([inlineCode(plain('[asd](https://localhost)'))])], ], [`\`code\``, [paragraph([inlineCode(plain('code'))])]], + [ + `File extension (\`.mov\`)`, + [ + paragraph([ + plain('File extension ('), + inlineCode(plain('.mov')), + plain(')'), + ]), + ], + ], ])('parses %p', (input, output) => { expect(parse(input)).toMatchObject(output); }); diff --git a/packages/message-parser/tests/link.test.ts b/packages/message-parser/tests/link.test.ts index 572665dd01..a1aa0f8594 100644 --- a/packages/message-parser/tests/link.test.ts +++ b/packages/message-parser/tests/link.test.ts @@ -7,6 +7,10 @@ import { strike, italic, quote, + lineBreak, + unorderedList, + listItem, + orderedList, } from '../src/utils'; test.each([ @@ -259,6 +263,164 @@ test.each([ ]), ], ], + [ + '[Github link with hash](https://github.com/RocketChat/Rocket.Chat/pull/26751/files#diff-c87b108ecf1ede549f8ede68eca840fbb330180b927df0b8a0b4df5d06cbd89b)', + [ + paragraph([ + link( + 'https://github.com/RocketChat/Rocket.Chat/pull/26751/files#diff-c87b108ecf1ede549f8ede68eca840fbb330180b927df0b8a0b4df5d06cbd89b', + plain('Github link with hash') + ), + ]), + ], + ], + [ + '[Github link with hash](https://github.com/RocketChat/Rocket.Chat/pull/26751/files#diff)', + [ + paragraph([ + link( + 'https://github.com/RocketChat/Rocket.Chat/pull/26751/files#diff', + plain('Github link with hash') + ), + ]), + ], + ], + [ + '[Github link without hash](https://github.com/RocketChat/Rocket.Chat/pull/26751/files)', + [ + paragraph([ + link( + 'https://github.com/RocketChat/Rocket.Chat/pull/26751/files', + plain('Github link without hash') + ), + ]), + ], + ], + [ + '[Link with special chars](https://github.com/RocketChat/Rocket.Chat*[/]^_`{}~)', + [ + paragraph([ + link( + 'https://github.com/RocketChat/Rocket.Chat*[/]^_`{}~', + plain('Link with special chars') + ), + ]), + ], + ], + [ + '[Google complex Link](https://www.google.com/url?rct=j&sa=t&url=https://ga.de/freizeit/region-erleben/bonn-und-region-tipps-fuers-wochenende-flohmarkt-rheinaue-weltkindertag-stadtfest_aid-53876987&ct=ga&cd=CAIyHDQ0NzEyYWE3MDA1MGNhNTQ6Y29tOmRlOkRFOlI&usg=AOvVaw3ySYrO9lM0iNSnk43gPVwZ)', + [ + paragraph([ + link( + 'https://www.google.com/url?rct=j&sa=t&url=https://ga.de/freizeit/region-erleben/bonn-und-region-tipps-fuers-wochenende-flohmarkt-rheinaue-weltkindertag-stadtfest_aid-53876987&ct=ga&cd=CAIyHDQ0NzEyYWE3MDA1MGNhNTQ6Y29tOmRlOkRFOlI&usg=AOvVaw3ySYrO9lM0iNSnk43gPVwZ', + plain('Google complex Link') + ), + ]), + ], + ], + [ + '[Rocket.Chat](https://rocket.chat) Inline Text', + [ + paragraph([ + link('https://rocket.chat', plain('Rocket.Chat')), + plain(' Inline Text'), + ]), + ], + ], + [ + 'https://analytics.zoho.com/open-view/123456789 Same Line', + [ + paragraph([ + link( + 'https://analytics.zoho.com/open-view/123456789', + plain('https://analytics.zoho.com/open-view/123456789') + ), + plain(' Same Line'), + ]), + ], + ], + [ + `[Rocket.Chat](https://rocket.chat) +Text after in a new line after link`, + [ + paragraph([link('https://rocket.chat', plain('Rocket.Chat'))]), + paragraph([plain('Text after in a new line after link')]), + ], + ], + [ + `https://analytics.zoho.com/open-view/123456789 +Second line`, + [ + paragraph([ + link( + 'https://analytics.zoho.com/open-view/123456789', + plain('https://analytics.zoho.com/open-view/123456789') + ), + ]), + paragraph([plain('Second line')]), + ], + ], + [ + `[Rocket.Chat](https://rocket.chat) + +Text after line break`, + [ + paragraph([link('https://rocket.chat', plain('Rocket.Chat'))]), + lineBreak(), + paragraph([plain('Text after line break')]), + ], + ], + [ + ` +[List Header Link](https://rocket.chat) +- First item +- Second item +- Third item +- *Fourth item* +`.trim(), + [ + paragraph([link('https://rocket.chat', plain('List Header Link'))]), + unorderedList([ + listItem([plain('First item')]), + listItem([plain('Second item')]), + listItem([plain('Third item')]), + listItem([bold([plain('Fourth item')])]), + ]), + ], + ], + [ + `[List Header Link](https://rocket.chat) +7. First item +2. Second item +8. Third item +4. *Fourth item* +15. *Fifteenth item* +`.trim(), + [ + paragraph([link('https://rocket.chat', plain('List Header Link'))]), + orderedList([ + listItem([plain('First item')], 7), + listItem([plain('Second item')], 2), + listItem([plain('Third item')], 8), + listItem([bold([plain('Fourth item')])], 4), + listItem([bold([plain('Fifteenth item')])], 15), + ]), + ], + ], + [ + '[9gag](https://9gag.com/)', + [paragraph([link('https://9gag.com/', plain(`9gag`))])], + ], + ['[9gag](9gag.com)', [paragraph([link('9gag.com', plain(`9gag`))])]], + ['<9gag.com|9gag>', [paragraph([link('9gag.com', plain(`9gag`))])]], + ['9gag.com', [paragraph([link('9gag.com')])]], + + // Should not parse as link + ['[77.77%](77.77%)', [paragraph([plain('[77.77%](77.77%)')])]], + ['77.77%', [paragraph([plain('77.77%')])]], + ['[77.77](77.77)', [paragraph([plain('[77.77](77.77)')])]], + ['77.77', [paragraph([plain('77.77')])]], + ['test.9gag', [paragraph([plain('test.9gag')])]], ])('parses %p', (input, output) => { expect(parse(input)).toMatchObject(output); });