Skip to content

Commit 5bf729f

Browse files
authored
Merge pull request #2328 from sparklemotion/flavorjones-GHSA-2rr5-8q37-2w7h_main
fix JRuby SAX parser entity handling
2 parents 04032e5 + 3828603 commit 5bf729f

12 files changed

+804
-868
lines changed

ext/java/nokogiri/Html4SaxPushParser.java

+14-19
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,26 @@
11
package nokogiri;
22

3-
import static nokogiri.XmlSaxPushParser.terminateExecution;
4-
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
5-
import static org.jruby.runtime.Helpers.invoke;
6-
7-
import java.io.ByteArrayInputStream;
8-
import java.io.InputStream;
9-
import java.io.IOException;
10-
import java.util.concurrent.Callable;
11-
import java.util.concurrent.ExecutionException;
12-
import java.util.concurrent.ExecutorService;
13-
import java.util.concurrent.Executors;
14-
import java.util.concurrent.Future;
15-
import java.util.concurrent.FutureTask;
16-
import java.util.concurrent.ThreadFactory;
17-
18-
import nokogiri.internals.*;
19-
3+
import nokogiri.internals.ClosedStreamException;
4+
import nokogiri.internals.NokogiriBlockingQueueInputStream;
5+
import nokogiri.internals.NokogiriHelpers;
6+
import nokogiri.internals.ParserContext;
207
import org.jruby.Ruby;
218
import org.jruby.RubyClass;
229
import org.jruby.RubyObject;
2310
import org.jruby.anno.JRubyClass;
2411
import org.jruby.anno.JRubyMethod;
25-
import org.jruby.exceptions.RaiseException;
2612
import org.jruby.runtime.ThreadContext;
2713
import org.jruby.runtime.builtin.IRubyObject;
2814

15+
import java.io.ByteArrayInputStream;
16+
import java.io.IOException;
17+
import java.io.InputStream;
18+
import java.util.concurrent.*;
19+
20+
import static nokogiri.XmlSaxPushParser.terminateExecution;
21+
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
22+
import static org.jruby.runtime.Helpers.invoke;
23+
2924
/**
3025
* Class for Nokogiri::HTML4::SAX::PushParser
3126
*
@@ -134,7 +129,7 @@ public class Html4SaxPushParser extends RubyObject
134129

135130
if (!options.recover && parserTask.getErrorCount() > errorCount0) {
136131
terminateTask(context.runtime);
137-
throw parserTask.getLastError();
132+
throw parserTask.getLastError().toThrowable();
138133
}
139134

140135
return this;

ext/java/nokogiri/XmlSaxParserContext.java

+31-93
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,23 @@
11
package nokogiri;
22

3-
import static org.jruby.runtime.Helpers.invoke;
4-
5-
import java.io.IOException;
6-
import java.io.InputStream;
7-
3+
import nokogiri.internals.*;
84
import org.apache.xerces.parsers.AbstractSAXParser;
95
import org.jruby.Ruby;
106
import org.jruby.RubyClass;
117
import org.jruby.RubyFixnum;
12-
import org.jruby.RubyModule;
13-
import org.jruby.RubyObjectAdapter;
148
import org.jruby.anno.JRubyClass;
159
import org.jruby.anno.JRubyMethod;
1610
import org.jruby.exceptions.RaiseException;
17-
import org.jruby.javasupport.JavaEmbedUtils;
11+
import org.jruby.runtime.Helpers;
1812
import org.jruby.runtime.ThreadContext;
1913
import org.jruby.runtime.builtin.IRubyObject;
20-
import org.xml.sax.ContentHandler;
21-
import org.xml.sax.ErrorHandler;
2214
import org.xml.sax.SAXException;
23-
import org.xml.sax.SAXNotRecognizedException;
24-
import org.xml.sax.SAXNotSupportedException;
2515
import org.xml.sax.SAXParseException;
2616

27-
import nokogiri.internals.NokogiriHandler;
28-
import nokogiri.internals.NokogiriHelpers;
29-
import nokogiri.internals.ParserContext;
30-
import nokogiri.internals.XmlSaxParser;
17+
import java.io.IOException;
18+
import java.io.InputStream;
19+
20+
import static org.jruby.runtime.Helpers.invoke;
3121

3222
/**
3323
* Base class for the SAX parsers.
@@ -51,6 +41,7 @@ public class XmlSaxParserContext extends ParserContext
5141
protected AbstractSAXParser parser;
5242

5343
protected NokogiriHandler handler;
44+
protected NokogiriErrorHandler errorHandler;
5445
private boolean replaceEntities = true;
5546
private boolean recovery = false;
5647

@@ -168,31 +159,12 @@ public class XmlSaxParserContext extends ParserContext
168159
return (XmlSaxParserContext) NokogiriService.XML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(runtime, klazz);
169160
}
170161

171-
/**
172-
* Set a property of the underlying parser.
173-
*/
174-
protected void
175-
setProperty(String key, Object val)
176-
throws SAXNotRecognizedException, SAXNotSupportedException
177-
{
178-
parser.setProperty(key, val);
179-
}
180-
181-
protected void
182-
setContentHandler(ContentHandler handler)
183-
{
184-
parser.setContentHandler(handler);
185-
}
186-
187-
protected void
188-
setErrorHandler(ErrorHandler handler)
189-
{
190-
parser.setErrorHandler(handler);
191-
}
192-
193162
public final NokogiriHandler
194163
getNokogiriHandler() { return handler; }
195164

165+
public final NokogiriErrorHandler
166+
getNokogiriErrorHandler() { return errorHandler; }
167+
196168
/**
197169
* Perform any initialization prior to parsing with the handler
198170
* <code>handlerRuby</code>. Convenience hook for subclasses.
@@ -223,6 +195,17 @@ public class XmlSaxParserContext extends ParserContext
223195
parser.parse(getInputSource());
224196
}
225197

198+
protected static Options
199+
defaultParseOptions(ThreadContext context)
200+
{
201+
return new ParserContext.Options(
202+
RubyFixnum.fix2long(Helpers.invoke(context,
203+
((RubyClass)context.getRuntime().getClassFromPath("Nokogiri::XML::ParseOptions"))
204+
.getConstant("DEFAULT_XML"),
205+
"to_i"))
206+
);
207+
}
208+
226209
@JRubyMethod
227210
public IRubyObject
228211
parse_with(ThreadContext context, IRubyObject handlerRuby)
@@ -233,14 +216,19 @@ public class XmlSaxParserContext extends ParserContext
233216
throw runtime.newArgumentError("argument must respond_to document");
234217
}
235218

236-
NokogiriHandler handler = this.handler = new NokogiriHandler(runtime, handlerRuby);
237-
preParse(runtime, handlerRuby, handler);
219+
/* TODO: how should we pass in parse options? */
220+
ParserContext.Options options = defaultParseOptions(context);
221+
222+
errorHandler = new NokogiriStrictErrorHandler(runtime, options.noError, options.noWarning);
223+
handler = new NokogiriHandler(runtime, handlerRuby, errorHandler);
238224

239-
setContentHandler(handler);
240-
setErrorHandler(handler);
225+
preParse(runtime, handlerRuby, handler);
226+
parser.setContentHandler(handler);
227+
parser.setErrorHandler(handler);
228+
parser.setEntityResolver(new NokogiriEntityResolver(runtime, errorHandler, options));
241229

242230
try {
243-
setProperty("http://xml.org/sax/properties/lexical-handler", handler);
231+
parser.setProperty("http://xml.org/sax/properties/lexical-handler", handler);
244232
} catch (Exception ex) {
245233
throw runtime.newRuntimeError("Problem while creating XML SAX Parser: " + ex.toString());
246234
}
@@ -270,8 +258,6 @@ public class XmlSaxParserContext extends ParserContext
270258

271259
postParse(runtime, handlerRuby, handler);
272260

273-
//maybeTrimLeadingAndTrailingWhitespace(context, handlerRuby);
274-
275261
return runtime.getNil();
276262
}
277263

@@ -319,53 +305,6 @@ public class XmlSaxParserContext extends ParserContext
319305
return context.runtime.newBoolean(recovery);
320306
}
321307

322-
/**
323-
* If the handler's document is a FragmentHandler, attempt to trim
324-
* leading and trailing whitespace.
325-
*
326-
* This is a bit hackish and depends heavily on the internals of
327-
* FragmentHandler.
328-
*/
329-
protected void
330-
maybeTrimLeadingAndTrailingWhitespace(ThreadContext context, IRubyObject parser)
331-
{
332-
RubyObjectAdapter adapter = JavaEmbedUtils.newObjectAdapter();
333-
RubyModule mod = context.getRuntime().getClassFromPath("Nokogiri::XML::FragmentHandler");
334-
335-
IRubyObject handler = adapter.getInstanceVariable(parser, "@document");
336-
if (handler == null || handler.isNil() || !adapter.isKindOf(handler, mod)) {
337-
return;
338-
}
339-
IRubyObject stack = adapter.getInstanceVariable(handler, "@stack");
340-
if (stack == null || stack.isNil()) {
341-
return;
342-
}
343-
// doc is finally a DocumentFragment whose nodes we can check
344-
IRubyObject doc = adapter.callMethod(stack, "first");
345-
if (doc == null || doc.isNil()) {
346-
return;
347-
}
348-
349-
IRubyObject children;
350-
351-
for (;;) {
352-
children = adapter.callMethod(doc, "children");
353-
IRubyObject first = adapter.callMethod(children, "first");
354-
if (NokogiriHelpers.isBlank(first)) { adapter.callMethod(first, "unlink"); }
355-
else { break; }
356-
}
357-
358-
for (;;) {
359-
children = adapter.callMethod(doc, "children");
360-
IRubyObject last = adapter.callMethod(children, "last");
361-
if (NokogiriHelpers.isBlank(last)) { adapter.callMethod(last, "unlink"); }
362-
else { break; }
363-
}
364-
365-
// While we have a document, normalize it.
366-
((XmlNode) doc).normalize();
367-
}
368-
369308
@JRubyMethod(name = "column")
370309
public IRubyObject
371310
column(ThreadContext context)
@@ -383,5 +322,4 @@ public class XmlSaxParserContext extends ParserContext
383322
if (number == null) { return context.getRuntime().getNil(); }
384323
return RubyFixnum.newFixnum(context.getRuntime(), number.longValue());
385324
}
386-
387325
}

ext/java/nokogiri/XmlSaxPushParser.java

+17-25
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,24 @@
11
package nokogiri;
22

3-
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
4-
import static org.jruby.runtime.Helpers.invoke;
5-
6-
import java.io.ByteArrayInputStream;
7-
import java.io.IOException;
8-
import java.io.InputStream;
9-
import java.util.concurrent.ExecutionException;
10-
import java.util.concurrent.ExecutorService;
11-
import java.util.concurrent.Executors;
12-
import java.util.concurrent.Future;
13-
import java.util.concurrent.FutureTask;
14-
import java.util.concurrent.ThreadFactory;
15-
3+
import nokogiri.internals.*;
164
import org.jruby.Ruby;
175
import org.jruby.RubyClass;
6+
import org.jruby.RubyException;
187
import org.jruby.RubyObject;
198
import org.jruby.anno.JRubyClass;
209
import org.jruby.anno.JRubyMethod;
2110
import org.jruby.exceptions.RaiseException;
2211
import org.jruby.runtime.ThreadContext;
2312
import org.jruby.runtime.builtin.IRubyObject;
2413

25-
import nokogiri.internals.ClosedStreamException;
26-
import nokogiri.internals.NokogiriBlockingQueueInputStream;
27-
import nokogiri.internals.NokogiriHandler;
28-
import nokogiri.internals.NokogiriHelpers;
29-
import nokogiri.internals.ParserContext;
14+
import java.io.ByteArrayInputStream;
15+
import java.io.IOException;
16+
import java.io.InputStream;
17+
import java.util.List;
18+
import java.util.concurrent.*;
19+
20+
import static nokogiri.internals.NokogiriHelpers.getNokogiriClass;
21+
import static org.jruby.runtime.Helpers.invoke;
3022

3123
/**
3224
* Class for Nokogiri::XML::SAX::PushParser
@@ -159,7 +151,8 @@ public class XmlSaxPushParser extends RubyObject
159151

160152
if (!options.recover && parserTask.getErrorCount() > errorCount0) {
161153
terminateTask(context.runtime);
162-
throw ex = parserTask.getLastError();
154+
ex = parserTask.getLastError().toThrowable();
155+
throw ex;
163156
}
164157

165158
return this;
@@ -278,16 +271,15 @@ static class ParserTask extends ParserContext.ParserTask<XmlSaxParserContext>
278271
getErrorCount()
279272
{
280273
// check for null because thread may not have started yet
281-
if (parser.getNokogiriHandler() == null) { return 0; }
282-
return parser.getNokogiriHandler().getErrorCount();
274+
if (parser.getNokogiriErrorHandler() == null) { return 0; }
275+
return parser.getNokogiriErrorHandler().getErrors().size();
283276
}
284277

285-
synchronized final RaiseException
278+
synchronized final RubyException
286279
getLastError()
287280
{
288-
return parser.getNokogiriHandler().getLastError();
281+
List<RubyException> errors = parser.getNokogiriErrorHandler().getErrors();
282+
return errors.get(errors.size() - 1);
289283
}
290-
291284
}
292-
293285
}

ext/java/nokogiri/internals/NokogiriEntityResolver.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ public class NokogiriEntityResolver implements EntityResolver2
8585
private void
8686
addError(String errorMessage)
8787
{
88-
if (handler != null) { handler.errors.add(new Exception(errorMessage)); }
88+
if (handler != null) { handler.addError(new Exception(errorMessage)); }
8989
}
9090

9191
/**

ext/java/nokogiri/internals/NokogiriErrorHandler.java

+29-8
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
package nokogiri.internals;
22

3-
import java.util.ArrayList;
4-
import java.util.List;
5-
3+
import nokogiri.XmlSyntaxError;
64
import org.apache.xerces.xni.parser.XMLErrorHandler;
5+
import org.jruby.Ruby;
6+
import org.jruby.RubyException;
7+
import org.jruby.exceptions.RaiseException;
78
import org.xml.sax.ErrorHandler;
89

10+
import java.util.ArrayList;
11+
import java.util.List;
12+
913
/**
1014
* Super class of error handlers.
1115
*
@@ -17,23 +21,40 @@
1721
*/
1822
public abstract class NokogiriErrorHandler implements ErrorHandler, XMLErrorHandler
1923
{
20-
protected final List<Exception> errors;
24+
private final Ruby runtime;
25+
protected final List<RubyException> errors;
2126
protected boolean noerror;
2227
protected boolean nowarning;
2328

2429
public
25-
NokogiriErrorHandler(boolean noerror, boolean nowarning)
30+
NokogiriErrorHandler(Ruby runtime, boolean noerror, boolean nowarning)
2631
{
27-
this.errors = new ArrayList<Exception>(4);
32+
this.runtime = runtime;
33+
this.errors = new ArrayList<RubyException>(4);
2834
this.noerror = noerror;
2935
this.nowarning = nowarning;
3036
}
3137

32-
List<Exception>
38+
public List<RubyException>
3339
getErrors() { return errors; }
3440

3541
public void
36-
addError(Exception ex) { errors.add(ex); }
42+
addError(Exception ex)
43+
{
44+
addError(XmlSyntaxError.createXMLSyntaxError(runtime, ex));
45+
}
46+
47+
public void
48+
addError(RubyException ex)
49+
{
50+
errors.add(ex);
51+
}
52+
53+
public void
54+
addError(RaiseException ex)
55+
{
56+
addError(ex.getException());
57+
}
3758

3859
protected boolean
3960
usesNekoHtml(String domain)

0 commit comments

Comments
 (0)