diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs index b6643b9169..69d2d66adf 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs @@ -364,19 +364,23 @@ protected static string GetInterletterValues(string pat) /// /// LUCENENET specific helper class to force the DTD file to be read from the embedded resource - /// rather than from the file system. + /// rather than from the file system. Any other external reference is rejected, so the parser + /// only ever resolves the known, embedded hyphenation.dtd. /// internal class DtdResolver : XmlUrlResolver { + internal const string DTD_FILENAME = "hyphenation.dtd"; + public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn) { - string dtdFilename = "hyphenation.dtd"; - if (dtdFilename.Equals(absoluteUri.Segments.LastOrDefault(), StringComparison.Ordinal)) + if (DTD_FILENAME.Equals(absoluteUri?.Segments.LastOrDefault(), StringComparison.Ordinal)) { - return typeof(PatternParser).FindAndGetManifestResourceStream(dtdFilename); + return typeof(PatternParser).FindAndGetManifestResourceStream(DTD_FILENAME); } - return base.GetEntity(absoluteUri, role, ofObjectToReturn); + // Only the embedded hyphenation.dtd is a valid external reference. Reject anything + // else rather than resolving it from the file system or network. + throw new XmlException($"Unexpected external reference in hyphenation data: '{absoluteUri}'. Only '{DTD_FILENAME}' may be referenced."); } } diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestPatternParser.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestPatternParser.cs new file mode 100644 index 0000000000..8fe103fcec --- /dev/null +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Compound/TestPatternParser.cs @@ -0,0 +1,108 @@ +// Lucene version compatibility level 4.8.1 +using Lucene.Net.Analysis.Compound.Hyphenation; +using Lucene.Net.Attributes; +using Lucene.Net.Util; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; +using System.Xml; + +namespace Lucene.Net.Analysis.Compound +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + [LuceneNetSpecific] + public class TestPatternParser : LuceneTestCase + { + /// + /// A well-formed hyphenation file that references the standard, embedded + /// hyphenation.dtd parses without error. + /// + [Test] + public virtual void TestValidHyphenationDataParses() + { + using var stream = this.GetType().getResourceAsStream("da_UTF8.xml"); + var parser = new PatternParser(new NoOpPatternConsumer()); + + Assert.DoesNotThrow(() => parser.Parse(stream)); + } + + /// + /// A hyphenation file that references an external entity other than the + /// embedded hyphenation.dtd is rejected rather than resolving the + /// reference. + /// + [Test] + public virtual void TestExternalEntityIsRejected() + { + // Point the external reference at a real, readable file. If the reference were + // resolved, its contents would be pulled into the parsed document; instead the + // parser must refuse the reference. + FileInfo target = CreateTempFile("lucene_pp_", ".txt"); + File.WriteAllText(target.FullName, "marker-contents"); + + string targetUri = new Uri(target.FullName).AbsoluteUri; + string xml = + "\n" + + "\n" + + "]>\n" + + "\n" + + " &ext;\n" + + "\n"; + + using var stream = new MemoryStream(Encoding.UTF8.GetBytes(xml)); + var parser = new PatternParser(new NoOpPatternConsumer()); + + Assert.Throws(() => parser.Parse(stream)); + } + + /// + /// A reference to an external DTD other than the embedded + /// hyphenation.dtd is rejected. + /// + [Test] + public virtual void TestExternalDtdIsRejected() + { + FileInfo target = CreateTempFile("lucene_pp_", ".dtd"); + File.WriteAllText(target.FullName, ""); + + string targetUri = new Uri(target.FullName).AbsoluteUri; + string xml = + "\n" + + "\n" + + "\n"; + + using var stream = new MemoryStream(Encoding.UTF8.GetBytes(xml)); + var parser = new PatternParser(new NoOpPatternConsumer()); + + Assert.Throws(() => parser.Parse(stream)); + } + + private sealed class NoOpPatternConsumer : IPatternConsumer + { + public void AddClass(string chargroup) { } + + public void AddException(string word, IList hyphenatedword) { } + + public void AddPattern(string pattern, string values) { } + } + } +}