diff --git a/src/Umbraco.Infrastructure/DeliveryApi/ApiRichTextElementParser.cs b/src/Umbraco.Infrastructure/DeliveryApi/ApiRichTextElementParser.cs index ceaf1b48bd4b..f3a045aad24e 100644 --- a/src/Umbraco.Infrastructure/DeliveryApi/ApiRichTextElementParser.cs +++ b/src/Umbraco.Infrastructure/DeliveryApi/ApiRichTextElementParser.cs @@ -1,13 +1,10 @@ -using HtmlAgilityPack; -using Microsoft.Extensions.DependencyInjection; +using HtmlAgilityPack; using Microsoft.Extensions.Logging; using Umbraco.Cms.Core; using Umbraco.Cms.Core.DeliveryApi; -using Umbraco.Cms.Core.DependencyInjection; using Umbraco.Cms.Core.Models.Blocks; using Umbraco.Cms.Core.Models.DeliveryApi; using Umbraco.Cms.Core.PublishedCache; -using Umbraco.Cms.Core.Routing; using Umbraco.Cms.Infrastructure.Extensions; using Umbraco.Extensions; @@ -101,9 +98,9 @@ private T ParseElement(HtmlNode element, IPublishedSnapshot publishedSnapshot // - non-#comment nodes // - non-#text nodes // - non-empty #text nodes - // - empty #text between inline elements (see #17037) + // - empty #text between inline elements (see #17037) but not #text with only newlines (see #19388) HtmlNode[] childNodes = element.ChildNodes - .Where(c => c.Name != CommentNodeName && (c.Name != TextNodeName || c.NextSibling is not null || string.IsNullOrWhiteSpace(c.InnerText) is false)) + .Where(c => c.Name != CommentNodeName && (c.Name != TextNodeName || IsNonEmptyElement(c))) .ToArray(); var tag = TagName(element); @@ -124,6 +121,9 @@ private T ParseElement(HtmlNode element, IPublishedSnapshot publishedSnapshot return createElement(tag, attributes, childElements); } + private static bool IsNonEmptyElement(HtmlNode htmlNode) => + string.IsNullOrWhiteSpace(htmlNode.InnerText) is false || htmlNode.InnerText.Any(c => c != '\n' && c != '\r'); + private string TagName(HtmlNode htmlNode) => htmlNode.Name; private void ReplaceLocalLinks(IPublishedSnapshot publishedSnapshot, Dictionary attributes) diff --git a/tests/Umbraco.Tests.UnitTests/Umbraco.Core/DeliveryApi/RichTextParserTests.cs b/tests/Umbraco.Tests.UnitTests/Umbraco.Core/DeliveryApi/RichTextParserTests.cs index 2a8da7acb07d..f9f3722a0fea 100644 --- a/tests/Umbraco.Tests.UnitTests/Umbraco.Core/DeliveryApi/RichTextParserTests.cs +++ b/tests/Umbraco.Tests.UnitTests/Umbraco.Core/DeliveryApi/RichTextParserTests.cs @@ -1,4 +1,4 @@ -using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Logging; using Moq; using NUnit.Framework; using Umbraco.Cms.Core; @@ -357,16 +357,71 @@ public void ParseElement_CanHandleMixedInlineAndBlockLevelBlocks() Assert.IsEmpty(blockLevelBlock.Elements); } + private const string TestParagraph = "What follows from here is just a bunch of text."; + [Test] public void ParseElement_CanHandleWhitespaceAroundInlineElemements() { var parser = CreateRichTextElementParser(); - var element = parser.Parse("

What follows from here is just a bunch of text.

") as RichTextRootElement; + var element = parser.Parse($"

{TestParagraph}

") as RichTextRootElement; Assert.IsNotNull(element); var paragraphElement = element.Elements.Single() as RichTextGenericElement; Assert.IsNotNull(paragraphElement); + AssertTestParagraph(paragraphElement); + } + + [TestCase(1, "\n")] + [TestCase(2, "\n")] + [TestCase(1, "\r")] + [TestCase(2, "\r")] + [TestCase(1, "\r\n")] + [TestCase(2, "\r\n")] + public void ParseElement_RemovesNewLinesAroundHtmlStructuralElements(int numberOfNewLineCharacters, string newlineCharacter) + { + var parser = CreateRichTextElementParser(); + + var newLineSeparator = string.Concat(Enumerable.Repeat(newlineCharacter, numberOfNewLineCharacters)); + var element = parser.Parse($"{newLineSeparator}{newLineSeparator}{newLineSeparator}{newLineSeparator}
{TestParagraph}
") as RichTextRootElement; + Assert.IsNotNull(element); + var tableElement = element.Elements.Single() as RichTextGenericElement; + Assert.IsNotNull(tableElement); + + var rowElement = tableElement.Elements.Single() as RichTextGenericElement; + Assert.IsNotNull(rowElement); + + var cellElement = rowElement.Elements.Single() as RichTextGenericElement; + Assert.IsNotNull(cellElement); + + AssertTestParagraph(cellElement); + } + + [TestCase(1, "\n")] + [TestCase(2, "\n")] + [TestCase(1, "\r")] + [TestCase(2, "\r")] + [TestCase(1, "\r\n")] + [TestCase(2, "\r\n")] + public void ParseElement_RemovesNewLinesAroundHtmlContentElements(int numberOfNewLineCharacters, string newlineCharacter) + { + var parser = CreateRichTextElementParser(); + + var newLineSeparator = string.Concat(Enumerable.Repeat(newlineCharacter, numberOfNewLineCharacters)); + var element = parser.Parse($"

{TestParagraph}

{newLineSeparator}

{newLineSeparator}

 

{newLineSeparator}

{TestParagraph}

") as RichTextRootElement; + Assert.IsNotNull(element); + var divElement = element.Elements.Single() as RichTextGenericElement; + Assert.IsNotNull(divElement); + + var paragraphELements = divElement.Elements; + Assert.AreEqual(4, paragraphELements.Count()); + + AssertTestParagraph(paragraphELements.First() as RichTextGenericElement); + AssertTestParagraph(paragraphELements.Last() as RichTextGenericElement); + } + + private static void AssertTestParagraph(RichTextGenericElement paragraphElement) + { var childElements = paragraphElement.Elements.ToArray(); Assert.AreEqual(7, childElements.Length);