diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
index b6643b9169..69d2d66adf 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs
@@ -364,19 +364,23 @@ protected static string GetInterletterValues(string pat)
///
/// LUCENENET specific helper class to force the DTD file to be read from the embedded resource
- /// rather than from the file system.
+ /// rather than from the file system. Any other external reference is rejected, so the parser
+ /// only ever resolves the known, embedded hyphenation.dtd.
///
internal class DtdResolver : XmlUrlResolver
{
+ internal const string DTD_FILENAME = "hyphenation.dtd";
+
public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn)
{
- string dtdFilename = "hyphenation.dtd";
- if (dtdFilename.Equals(absoluteUri.Segments.LastOrDefault(), StringComparison.Ordinal))
+ if (DTD_FILENAME.Equals(absoluteUri?.Segments.LastOrDefault(), StringComparison.Ordinal))
{
- return typeof(PatternParser).FindAndGetManifestResourceStream(dtdFilename);
+ return typeof(PatternParser).FindAndGetManifestResourceStream(DTD_FILENAME);
}
- return base.GetEntity(absoluteUri, role, ofObjectToReturn);
+ // Only the embedded hyphenation.dtd is a valid external reference. Reject anything
+ // else rather than resolving it from the file system or network.
+ throw new XmlException($"Unexpected external reference in hyphenation data: '{absoluteUri}'. Only '{DTD_FILENAME}' may be referenced.");
}
}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestPatternParser.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestPatternParser.cs
new file mode 100644
index 0000000000..8fe103fcec
--- /dev/null
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestPatternParser.cs
@@ -0,0 +1,108 @@
+// Lucene version compatibility level 4.8.1
+using Lucene.Net.Analysis.Compound.Hyphenation;
+using Lucene.Net.Attributes;
+using Lucene.Net.Util;
+using NUnit.Framework;
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text;
+using System.Xml;
+
+namespace Lucene.Net.Analysis.Compound
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ [LuceneNetSpecific]
+ public class TestPatternParser : LuceneTestCase
+ {
+ ///
+ /// A well-formed hyphenation file that references the standard, embedded
+ /// hyphenation.dtd parses without error.
+ ///
+ [Test]
+ public virtual void TestValidHyphenationDataParses()
+ {
+ using var stream = this.GetType().getResourceAsStream("da_UTF8.xml");
+ var parser = new PatternParser(new NoOpPatternConsumer());
+
+ Assert.DoesNotThrow(() => parser.Parse(stream));
+ }
+
+ ///
+ /// A hyphenation file that references an external entity other than the
+ /// embedded hyphenation.dtd is rejected rather than resolving the
+ /// reference.
+ ///
+ [Test]
+ public virtual void TestExternalEntityIsRejected()
+ {
+ // Point the external reference at a real, readable file. If the reference were
+ // resolved, its contents would be pulled into the parsed document; instead the
+ // parser must refuse the reference.
+ FileInfo target = CreateTempFile("lucene_pp_", ".txt");
+ File.WriteAllText(target.FullName, "marker-contents");
+
+ string targetUri = new Uri(target.FullName).AbsoluteUri;
+ string xml =
+ "\n" +
+ "\n" +
+ "]>\n" +
+ "\n" +
+ " &ext;\n" +
+ "\n";
+
+ using var stream = new MemoryStream(Encoding.UTF8.GetBytes(xml));
+ var parser = new PatternParser(new NoOpPatternConsumer());
+
+ Assert.Throws(() => parser.Parse(stream));
+ }
+
+ ///
+ /// A reference to an external DTD other than the embedded
+ /// hyphenation.dtd is rejected.
+ ///
+ [Test]
+ public virtual void TestExternalDtdIsRejected()
+ {
+ FileInfo target = CreateTempFile("lucene_pp_", ".dtd");
+ File.WriteAllText(target.FullName, "");
+
+ string targetUri = new Uri(target.FullName).AbsoluteUri;
+ string xml =
+ "\n" +
+ "\n" +
+ "\n";
+
+ using var stream = new MemoryStream(Encoding.UTF8.GetBytes(xml));
+ var parser = new PatternParser(new NoOpPatternConsumer());
+
+ Assert.Throws(() => parser.Parse(stream));
+ }
+
+ private sealed class NoOpPatternConsumer : IPatternConsumer
+ {
+ public void AddClass(string chargroup) { }
+
+ public void AddException(string word, IList