Skip to content

Commit 2c5d145

Browse files
authored
fix: only allow first h1 to be the title (#9474)
1 parent 0a3bd2c commit 2c5d145

File tree

4 files changed

+47
-102
lines changed

4 files changed

+47
-102
lines changed

src/Docfx.Build/Conceptual/BuildConceptualDocument.cs

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,18 @@
33

44
using System.Collections.Immutable;
55
using System.Composition;
6-
6+
using System.Net;
77
using Docfx.Build.Common;
88
using Docfx.Common;
99
using Docfx.DataContracts.Common;
1010
using Docfx.Plugins;
11+
using HtmlAgilityPack;
1112

1213
namespace Docfx.Build.ConceptualDocuments;
1314

1415
[Export(nameof(ConceptualDocumentProcessor), typeof(IDocumentBuildStep))]
1516
class BuildConceptualDocument : BaseDocumentBuildStep
1617
{
17-
private const string ConceptualKey = Constants.PropertyName.Conceptual;
1818
private const string DocumentTypeKey = "documentType";
1919

2020
public override string Name => nameof(BuildConceptualDocument);
@@ -28,16 +28,16 @@ public override void Build(FileModel model, IHostService host)
2828
return;
2929
}
3030
var content = (Dictionary<string, object>)model.Content;
31-
var markdown = (string)content[ConceptualKey];
31+
var markdown = (string)content[Constants.PropertyName.Conceptual];
3232
var result = host.Markup(markdown, model.OriginalFileAndType, false);
3333

34-
var htmlInfo = HtmlDocumentUtility.SeparateHtml(result.Html);
35-
content["rawTitle"] = htmlInfo.RawTitle;
36-
if (!string.IsNullOrEmpty(htmlInfo.RawTitle))
34+
var (h1, h1Raw, conceptual) = ExtractH1(result.Html);
35+
content["rawTitle"] = h1Raw;
36+
if (!string.IsNullOrEmpty(h1Raw))
3737
{
38-
model.ManifestProperties.rawTitle = htmlInfo.RawTitle;
38+
model.ManifestProperties.rawTitle = h1Raw;
3939
}
40-
content[ConceptualKey] = htmlInfo.Content;
40+
content[Constants.PropertyName.Conceptual] = conceptual;
4141

4242
if (result.YamlHeader?.Count > 0)
4343
{
@@ -47,13 +47,15 @@ public override void Build(FileModel model, IHostService host)
4747
}
4848
}
4949

50-
(content[Constants.PropertyName.Title], model.Properties.IsUserDefinedTitle) = GetTitle(result.YamlHeader, htmlInfo);
50+
content[Constants.PropertyName.Title] = GetTitle(result.YamlHeader, h1);
51+
content["wordCount"] = WordCounter.CountWord(conceptual);
5152

5253
model.LinkToFiles = result.LinkToFiles.ToImmutableHashSet();
5354
model.LinkToUids = result.LinkToUids;
5455
model.FileLinkSources = result.FileLinkSources;
5556
model.UidLinkSources = result.UidLinkSources;
5657
model.Properties.XrefSpec = null;
58+
5759
if (model.Uids.Length > 0)
5860
{
5961
var title = content[Constants.PropertyName.Title] as string;
@@ -108,31 +110,31 @@ void HandleYamlHeaderPair(string key, object value)
108110
}
109111
}
110112

111-
(string title, bool isUserDefined) GetTitle(ImmutableDictionary<string, object> yamlHeader, SeparatedHtmlInfo info)
113+
string GetTitle(ImmutableDictionary<string, object> yamlHeader, string h1)
112114
{
113115
// title from YAML header
114116
if (yamlHeader != null
115117
&& TryGetStringValue(yamlHeader, Constants.PropertyName.Title, out var yamlHeaderTitle))
116118
{
117-
return (yamlHeaderTitle, true);
119+
return yamlHeaderTitle;
118120
}
119121

120122
// title from metadata/titleOverwriteH1
121123
if (TryGetStringValue(content, Constants.PropertyName.TitleOverwriteH1, out var titleOverwriteH1))
122124
{
123-
return (titleOverwriteH1, true);
125+
return titleOverwriteH1;
124126
}
125127

126128
// title from H1
127-
if (!string.IsNullOrEmpty(info.Title))
129+
if (!string.IsNullOrEmpty(h1))
128130
{
129-
return (info.Title, false);
131+
return h1;
130132
}
131133

132134
// title from globalMetadata or fileMetadata
133135
if (TryGetStringValue(content, Constants.PropertyName.Title, out var title))
134136
{
135-
return (title, true);
137+
return title;
136138
}
137139

138140
return default;
@@ -152,4 +154,34 @@ bool TryGetStringValue(IDictionary<string, object> dictionary, string key, out s
152154
}
153155
}
154156
}
157+
158+
static (string h1, string h1Raw, string body) ExtractH1(string contentHtml)
159+
{
160+
ArgumentNullException.ThrowIfNull(contentHtml);
161+
162+
var document = new HtmlDocument();
163+
document.LoadHtml(contentHtml);
164+
165+
// InnerText in HtmlAgilityPack is not decoded, should be a bug
166+
var h1Node = document.DocumentNode.SelectSingleNode("//h1");
167+
var h1 = WebUtility.HtmlDecode(h1Node?.InnerText);
168+
var h1Raw = "";
169+
if (h1Node != null && GetFirstNoneCommentChild(document.DocumentNode) == h1Node)
170+
{
171+
h1Raw = h1Node.OuterHtml;
172+
h1Node.Remove();
173+
}
174+
175+
return (h1, h1Raw, document.DocumentNode.OuterHtml);
176+
177+
static HtmlNode GetFirstNoneCommentChild(HtmlNode node)
178+
{
179+
var result = node.FirstChild;
180+
while (result != null && (result.NodeType == HtmlNodeType.Comment || string.IsNullOrWhiteSpace(result.OuterHtml)))
181+
{
182+
result = result.NextSibling;
183+
}
184+
return result;
185+
}
186+
}
155187
}

src/Docfx.Build/Conceptual/HtmlDocumentUtility.cs

Lines changed: 0 additions & 49 deletions
This file was deleted.

src/Docfx.Build/Conceptual/SeparatedHtmlInfo.cs

Lines changed: 0 additions & 13 deletions
This file was deleted.

src/Docfx.Build/Conceptual/CountWord.cs renamed to src/Docfx.Build/Conceptual/WordCounter.cs

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,10 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4-
using System.Collections.Immutable;
5-
using System.Composition;
6-
using Docfx.Build.Common;
7-
using Docfx.DataContracts.Common;
8-
using Docfx.Plugins;
94
using HtmlAgilityPack;
105

116
namespace Docfx.Build.ConceptualDocuments;
127

13-
[Export(nameof(ConceptualDocumentProcessor), typeof(IDocumentBuildStep))]
14-
class CountWord : BaseDocumentBuildStep
15-
{
16-
public override string Name => nameof(CountWord);
17-
18-
public override int BuildOrder => 1;
19-
20-
public override void Postbuild(ImmutableList<FileModel> models, IHostService host)
21-
{
22-
foreach (var model in models)
23-
{
24-
if (model.Type == DocumentType.Article)
25-
{
26-
var content = (Dictionary<string, object>)model.Content;
27-
content["wordCount"] = WordCounter.CountWord((string)content[Constants.PropertyName.Conceptual]);
28-
}
29-
}
30-
}
31-
}
32-
338
internal static class WordCounter
349
{
3510
private static readonly string[] ExcludeNodeXPaths = { "//title" };

0 commit comments

Comments
 (0)