-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-19556][core] Do not encrypt block manager data in memory. #17295
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
3aa752f
db4074c
d697398
5dba0eb
1428fcd
1b2a3e4
6848a59
107e3e7
6bda670
00b6d00
d4013f9
ab4b5dd
4a39cb2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,20 +16,23 @@ | |
| */ | ||
| package org.apache.spark.security | ||
|
|
||
| import java.io.{InputStream, OutputStream} | ||
| import java.io.{EOFException, InputStream, OutputStream} | ||
| import java.nio.ByteBuffer | ||
| import java.nio.channels.{ReadableByteChannel, WritableByteChannel} | ||
| import java.util.Properties | ||
| import javax.crypto.KeyGenerator | ||
| import javax.crypto.spec.{IvParameterSpec, SecretKeySpec} | ||
|
|
||
| import scala.collection.JavaConverters._ | ||
|
|
||
| import com.google.common.io.ByteStreams | ||
| import org.apache.commons.crypto.random._ | ||
| import org.apache.commons.crypto.stream._ | ||
|
|
||
| import org.apache.spark.SparkConf | ||
| import org.apache.spark.internal.Logging | ||
| import org.apache.spark.internal.config._ | ||
| import org.apache.spark.network.util.CryptoUtils | ||
| import org.apache.spark.network.util.{CryptoUtils, JavaUtils} | ||
|
|
||
| /** | ||
| * A util class for manipulating IO encryption and decryption streams. | ||
|
|
@@ -48,12 +51,27 @@ private[spark] object CryptoStreamUtils extends Logging { | |
| os: OutputStream, | ||
| sparkConf: SparkConf, | ||
| key: Array[Byte]): OutputStream = { | ||
| val properties = toCryptoConf(sparkConf) | ||
| val iv = createInitializationVector(properties) | ||
| val params = new CryptoParams(key, sparkConf) | ||
| val iv = createInitializationVector(params.conf) | ||
| os.write(iv) | ||
| val transformationStr = sparkConf.get(IO_CRYPTO_CIPHER_TRANSFORMATION) | ||
| new CryptoOutputStream(transformationStr, properties, os, | ||
| new SecretKeySpec(key, "AES"), new IvParameterSpec(iv)) | ||
| new CryptoOutputStream(params.transformation, params.conf, os, params.keySpec, | ||
| new IvParameterSpec(iv)) | ||
| } | ||
|
|
||
| /** | ||
| * Wrap a `WritableByteChannel` for encryption. | ||
| */ | ||
| def createWritableChannel( | ||
| channel: WritableByteChannel, | ||
| sparkConf: SparkConf, | ||
| key: Array[Byte]): WritableByteChannel = { | ||
| val params = new CryptoParams(key, sparkConf) | ||
| val iv = createInitializationVector(params.conf) | ||
| val helper = new CryptoHelperChannel(channel) | ||
|
|
||
| helper.write(ByteBuffer.wrap(iv)) | ||
| new CryptoOutputStream(params.transformation, params.conf, helper, params.keySpec, | ||
| new IvParameterSpec(iv)) | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -63,12 +81,27 @@ private[spark] object CryptoStreamUtils extends Logging { | |
| is: InputStream, | ||
| sparkConf: SparkConf, | ||
| key: Array[Byte]): InputStream = { | ||
| val properties = toCryptoConf(sparkConf) | ||
| val iv = new Array[Byte](IV_LENGTH_IN_BYTES) | ||
| is.read(iv, 0, iv.length) | ||
| val transformationStr = sparkConf.get(IO_CRYPTO_CIPHER_TRANSFORMATION) | ||
| new CryptoInputStream(transformationStr, properties, is, | ||
| new SecretKeySpec(key, "AES"), new IvParameterSpec(iv)) | ||
| ByteStreams.readFully(is, iv) | ||
| val params = new CryptoParams(key, sparkConf) | ||
| new CryptoInputStream(params.transformation, params.conf, is, params.keySpec, | ||
| new IvParameterSpec(iv)) | ||
| } | ||
|
|
||
| /** | ||
| * Wrap a `ReadableByteChannel` for decryption. | ||
| */ | ||
| def createReadableChannel( | ||
| channel: ReadableByteChannel, | ||
| sparkConf: SparkConf, | ||
| key: Array[Byte]): ReadableByteChannel = { | ||
| val iv = new Array[Byte](IV_LENGTH_IN_BYTES) | ||
| val buf = ByteBuffer.wrap(iv) | ||
| JavaUtils.readFully(channel, buf) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not use
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's no |
||
|
|
||
| val params = new CryptoParams(key, sparkConf) | ||
| new CryptoInputStream(params.transformation, params.conf, channel, params.keySpec, | ||
| new IvParameterSpec(iv)) | ||
| } | ||
|
|
||
| def toCryptoConf(conf: SparkConf): Properties = { | ||
|
|
@@ -102,4 +135,34 @@ private[spark] object CryptoStreamUtils extends Logging { | |
| } | ||
| iv | ||
| } | ||
|
|
||
| /** | ||
| * This class is a workaround for CRYPTO-125, that forces all bytes to be written to the | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a lousy bug ! Good thing that we dont seem to be hit by it (yet).
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a pretty nasty workaround for it in the network library... (the non-blocking workaround is a lot worse than this.) |
||
| * underlying channel. Since the callers of this API are using blocking I/O, there are no | ||
| * concerns with regards to CPU usage here. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it a separated bug fix?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No. As the comment states, it's a workaround for a bug in the commons-crypto library, which would affect the code being added. |
||
| */ | ||
| private class CryptoHelperChannel(sink: WritableByteChannel) extends WritableByteChannel { | ||
|
|
||
| override def write(src: ByteBuffer): Int = { | ||
| val count = src.remaining() | ||
| while (src.hasRemaining()) { | ||
| sink.write(src) | ||
| } | ||
| count | ||
| } | ||
|
|
||
| override def isOpen(): Boolean = sink.isOpen() | ||
|
|
||
| override def close(): Unit = sink.close() | ||
|
|
||
| } | ||
|
|
||
| private class CryptoParams(key: Array[Byte], sparkConf: SparkConf) { | ||
|
|
||
| val keySpec = new SecretKeySpec(key, "AES") | ||
| val transformation = sparkConf.get(IO_CRYPTO_CIPHER_TRANSFORMATION) | ||
| val conf = toCryptoConf(sparkConf) | ||
|
|
||
| } | ||
|
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -148,14 +148,14 @@ private[spark] class SerializerManager( | |
| /** | ||
| * Wrap an output stream for compression if block compression is enabled for its block type | ||
| */ | ||
| private[this] def wrapForCompression(blockId: BlockId, s: OutputStream): OutputStream = { | ||
| def wrapForCompression(blockId: BlockId, s: OutputStream): OutputStream = { | ||
| if (shouldCompress(blockId)) compressionCodec.compressedOutputStream(s) else s | ||
| } | ||
|
|
||
| /** | ||
| * Wrap an input stream for compression if block compression is enabled for its block type | ||
| */ | ||
| private[this] def wrapForCompression(blockId: BlockId, s: InputStream): InputStream = { | ||
| def wrapForCompression(blockId: BlockId, s: InputStream): InputStream = { | ||
| if (shouldCompress(blockId)) compressionCodec.compressedInputStream(s) else s | ||
| } | ||
|
|
||
|
|
@@ -167,30 +167,26 @@ private[spark] class SerializerManager( | |
| val byteStream = new BufferedOutputStream(outputStream) | ||
| val autoPick = !blockId.isInstanceOf[StreamBlockId] | ||
| val ser = getSerializer(implicitly[ClassTag[T]], autoPick).newInstance() | ||
| ser.serializeStream(wrapStream(blockId, byteStream)).writeAll(values).close() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They're still used in a bunch of places. |
||
| ser.serializeStream(wrapForCompression(blockId, byteStream)).writeAll(values).close() | ||
| } | ||
|
|
||
| /** Serializes into a chunked byte buffer. */ | ||
| def dataSerialize[T: ClassTag]( | ||
| blockId: BlockId, | ||
| values: Iterator[T], | ||
| allowEncryption: Boolean = true): ChunkedByteBuffer = { | ||
| dataSerializeWithExplicitClassTag(blockId, values, implicitly[ClassTag[T]], | ||
| allowEncryption = allowEncryption) | ||
| values: Iterator[T]): ChunkedByteBuffer = { | ||
| dataSerializeWithExplicitClassTag(blockId, values, implicitly[ClassTag[T]]) | ||
| } | ||
|
|
||
| /** Serializes into a chunked byte buffer. */ | ||
| def dataSerializeWithExplicitClassTag( | ||
| blockId: BlockId, | ||
| values: Iterator[_], | ||
| classTag: ClassTag[_], | ||
| allowEncryption: Boolean = true): ChunkedByteBuffer = { | ||
| classTag: ClassTag[_]): ChunkedByteBuffer = { | ||
| val bbos = new ChunkedByteBufferOutputStream(1024 * 1024 * 4, ByteBuffer.allocate) | ||
| val byteStream = new BufferedOutputStream(bbos) | ||
| val autoPick = !blockId.isInstanceOf[StreamBlockId] | ||
| val ser = getSerializer(classTag, autoPick).newInstance() | ||
| val encrypted = if (allowEncryption) wrapForEncryption(byteStream) else byteStream | ||
| ser.serializeStream(wrapForCompression(blockId, encrypted)).writeAll(values).close() | ||
| ser.serializeStream(wrapForCompression(blockId, byteStream)).writeAll(values).close() | ||
| bbos.toChunkedByteBuffer | ||
| } | ||
|
|
||
|
|
@@ -200,15 +196,13 @@ private[spark] class SerializerManager( | |
| */ | ||
| def dataDeserializeStream[T]( | ||
| blockId: BlockId, | ||
| inputStream: InputStream, | ||
| maybeEncrypted: Boolean = true) | ||
| inputStream: InputStream) | ||
| (classTag: ClassTag[T]): Iterator[T] = { | ||
| val stream = new BufferedInputStream(inputStream) | ||
| val autoPick = !blockId.isInstanceOf[StreamBlockId] | ||
| val decrypted = if (maybeEncrypted) wrapForEncryption(inputStream) else inputStream | ||
| getSerializer(classTag, autoPick) | ||
| .newInstance() | ||
| .deserializeStream(wrapForCompression(blockId, decrypted)) | ||
| .deserializeStream(wrapForCompression(blockId, inputStream)) | ||
| .asIterator.asInstanceOf[Iterator[T]] | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ah good catch! we should dispose the blocks here