Skip to content

Commit

Permalink
[DRAFT] Set StreamReadConstraints.maxStringLength for IR parsing only
Browse files Browse the repository at this point in the history
How on earth do we get the type checking right!!
  • Loading branch information
jmarshall committed Dec 9, 2024
1 parent dcea9e9 commit 358b452
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 15 deletions.
13 changes: 0 additions & 13 deletions hail/src/main/scala/is/hail/backend/Backend.scala
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import scala.reflect.ClassTag
import java.io._
import java.nio.charset.StandardCharsets

import com.fasterxml.jackson.core.StreamReadConstraints
import org.json4s._
import org.json4s.jackson.{JsonMethods, Serialization}

Expand Down Expand Up @@ -55,18 +54,6 @@ trait BackendContext {
}

abstract class Backend {
// From https://github.com/hail-is/hail/issues/14580 :
// IR can get quite big, especially as it can contain an arbitrary
// amount of encoded literals from the user's python session. This
// was a (controversial) restriction imposed by Jackson and should be lifted.
//
// We remove this restriction for all backends, and we do so here, in the
// constructor since constructing a backend is one of the first things that
// happens and this constraint should be overrided as early as possible.
StreamReadConstraints.overrideDefaultStreamReadConstraints(
StreamReadConstraints.builder().maxStringLength(Integer.MAX_VALUE).build()
)

val persistedIR: mutable.Map[Int, BaseIR] = mutable.Map()

protected[this] def addJavaIR(ir: BaseIR): Int = {
Expand Down
15 changes: 13 additions & 2 deletions hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,11 @@ import java.io._
import java.nio.charset.StandardCharsets
import java.util.concurrent._

import com.fasterxml.jackson.databind.DeserializationFeature.USE_BIG_INTEGER_FOR_INTS

import org.apache.log4j.Logger
import org.json4s.{DefaultFormats, Formats}
import org.json4s.JsonAST._
import org.json4s.jackson.JsonMethods

class ServiceBackendContext(
val billingProject: String,
Expand Down Expand Up @@ -464,7 +465,17 @@ object ServiceBackendAPI {

implicit val formats: Formats = DefaultFormats

val input = using(fs.openNoCompression(inputURL))(JsonMethods.parse(_))
val JsonFactory factory = JsonFactory.builder()
.streamReadConstraints(StreamReadConstraints.builder().maxStringLength(Integer.MAX_VALUE).build())
.build();

val mapper = new ObjectMapper(factory)
mapper.registerModule(new Json4sScalaModule)
mapper.configure(USE_BIG_INTEGER_FOR_INTS, true)

val reader = mapper.readerFor(classOf[JValue])
val input = using(fs.openNoCompression(inputURL))(reader.readValue[JValue](_))

val rpcConfig = (input \ "config").extract[ServiceBackendRPCPayload]

// FIXME: when can the classloader be shared? (optimizer benefits!)
Expand Down

0 comments on commit 358b452

Please sign in to comment.