ericl
diff --git a/‎common/tags/src/test/java/org/apache/spark/tags/ExtendedRedshiftTest.java‎
Lines changed: 19 additions & 0 deletions b/‎common/tags/src/test/java/org/apache/spark/tags/ExtendedRedshiftTest.java‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎dev/.rat-excludes‎
Lines changed: 1 addition & 0 deletions b/‎dev/.rat-excludes‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎dev/install-redshift-jdbc.sh‎
Lines changed: 21 additions & 0 deletions b/‎dev/install-redshift-jdbc.sh‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎dev/run-tests.py‎
Lines changed: 4 additions & 1 deletion b/‎dev/run-tests.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎dev/sparktestsupport/modules.py‎
Lines changed: 25 additions & 0 deletions b/‎dev/sparktestsupport/modules.py‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎external/redshift-integration-tests/pom.xml‎
Lines changed: 144 additions & 0 deletions b/‎external/redshift-integration-tests/pom.xml‎
Lines changed: 144 additions & 0 deletions
diff --git a/‎external/redshift-integration-tests/src/test/scala/com/databricks/spark/redshift/AWSCredentialsInUriIntegrationSuite.scala‎
Lines changed: 52 additions & 0 deletions b/‎external/redshift-integration-tests/src/test/scala/com/databricks/spark/redshift/AWSCredentialsInUriIntegrationSuite.scala‎
Lines changed: 52 additions & 0 deletions
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2016 Databricks, Inc.
+ *
+ * Portions of this software incorporate or are derived from software contained within Apache Spark,
+ * and this modified software differs from the Apache Spark software provided under the Apache
+ * License, Version 2.0, a copy of which you may obtain at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+
+package org.apache.spark.tags;
+
+import java.lang.annotation.*;
+
+import org.scalatest.TagAnnotation;
+
+@TagAnnotation
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.METHOD, ElementType.TYPE})
+public @interface ExtendedRedshiftTest { }
@@ -103,3 +103,4 @@ org.apache.spark.scheduler.ExternalClusterManager
 org.apache.spark.deploy.yarn.security.ServiceCredentialProvider
 spark-warehouse
 structured-streaming/*
+install-redshift-jdbc.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+set -e
+
+SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
+SPARK_ROOT_DIR="$(dirname "$SCRIPT_DIR")"
+
+cd /tmp
+
+VERSION='1.1.7.1007'
+FILENAME="RedshiftJDBC4-$VERSION.jar"
+
+wget "https://s3.amazonaws.com/redshift-downloads/drivers/$FILENAME"
+
+$SPARK_ROOT_DIR/build/mvn install:install-file \
+    -Dfile=$FILENAME \
+    -DgroupId=com.amazonaws \
+    -DartifactId=redshift.jdbc4 \
+    -Dversion=$VERSION \
+    -Dpackaging=jar
+
@@ -111,7 +111,8 @@ def determine_modules_to_test(changed_modules):
     >>> x = [x.name for x in determine_modules_to_test([modules.sql])]
     >>> x # doctest: +NORMALIZE_WHITESPACE
     ['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'sql-kafka-0-8', 'examples',
-     'hive-thriftserver', 'pyspark-sql', 'sparkr', 'pyspark-mllib', 'pyspark-ml']
+     'hive-thriftserver', 'pyspark-sql', 'redshift', 'sparkr', 'pyspark-mllib',
+     'redshift-integration-tests', 'pyspark-ml']
     """
     modules_to_test = set()
     for module in changed_modules:
@@ -512,6 +513,8 @@ def main():
         test_env = "amplab_jenkins"
         # add path for Python3 in Jenkins if we're calling from a Jenkins machine
         os.environ["PATH"] = "/home/anaconda/envs/py3k/bin:" + os.environ.get("PATH")
+        # Install Redshift JDBC
+        run_cmd([os.path.join(SPARK_HOME, "dev", "install-redshift-jdbc.sh")])
     else:
         # else we're running locally and can use local settings
         build_tool = "sbt"
 
@@ -168,6 +168,31 @@ def __hash__(self):
     ]
 )
 
+redshift = Module(
+    name="redshift",
+    dependencies=[avro, sql],
+    source_file_regexes=[
+        "external/redshift",
+    ],
+    sbt_test_goals=[
+        "redshift/test",
+    ],
+    test_tags=[
+        "org.apache.spark.tags.ExtendedRedshiftTest"
+    ],
+)
+
+redshift_integration_tests = Module(
+    name="redshift-integration-tests",
+    dependencies=[redshift],
+    source_file_regexes=[
+        "external/redshift-integration-tests",
+    ],
+    sbt_test_goals=[
+        "redshift-integration-tests/test",
+    ],
+)
+
 sql_kafka = Module(
     name="sql-kafka-0-10",
     dependencies=[sql],
 
@@ -0,0 +1,144 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+/*
+ * Copyright (C) 2016 Databricks, Inc.
+ *
+ * Portions of this software incorporate or are derived from software contained within Apache Spark,
+ * and this modified software differs from the Apache Spark software provided under the Apache
+ * License, Version 2.0, a copy of which you may obtain at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.11</artifactId>
+    <version>2.1.0</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>com.databricks</groupId>
+  <artifactId>spark-redshift-integration-tests_2.11</artifactId>
+  <properties>
+    <sbt.project.name>redshift-integration-tests</sbt.project.name>
+  </properties>
+  <packaging>jar</packaging>
+  <name>Spark Redshift Integration Tests</name>
+  <url>http://spark.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.databricks</groupId>
+      <artifactId>spark-avro_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.databricks</groupId>
+      <artifactId>spark-redshift_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.databricks</groupId>
+      <artifactId>spark-redshift_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <!-- The AWS Java SDK version here should match (or be below) the version used in DBC images -->
+    <dependency>
+      <groupId>com.amazonaws</groupId>
+      <artifactId>aws-java-sdk-core</artifactId>
+      <version>1.9.40</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.amazonaws</groupId>
+      <artifactId>aws-java-sdk-s3</artifactId>
+      <version>1.9.40</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.amazonaws</groupId>
+      <artifactId>aws-java-sdk-sts</artifactId>
+      <version>1.9.40</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.eclipsesource.minimal-json</groupId>
+      <artifactId>minimal-json</artifactId>
+      <version>0.9.4</version>
+      <scope>compile</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <version>${hadoop.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-hive_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <!-- This dependency is installed using ./dev/install-redshift-jdbc.sh -->
+    <dependency>
+      <groupId>com.amazonaws</groupId>
+      <artifactId>redshift.jdbc4</artifactId>
+      <version>1.1.7.1007</version>
+      <type>jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  </build>
+</project>
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2016 Databricks, Inc.
+ *
+ * Portions of this software incorporate or are derived from software contained within Apache Spark,
+ * and this modified software differs from the Apache Spark software provided under the Apache
+ * License, Version 2.0, a copy of which you may obtain at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ */
+
+package com.databricks.spark.redshift
+
+import java.net.URI
+
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
+import org.apache.spark.tags.ExtendedRedshiftTest
+
+/**
+ * This suite performs basic integration tests where the AWS credentials have been
+ * encoded into the tempdir URI rather than being set in the Hadoop configuration.
+ */
+@ExtendedRedshiftTest
+class AWSCredentialsInUriIntegrationSuite extends IntegrationSuiteBase {
+
+  override protected val tempDir: String = {
+    val uri = new URI(AWS_S3_SCRATCH_SPACE + randomSuffix + "/")
+    new URI(
+      uri.getScheme,
+      s"$AWS_ACCESS_KEY_ID:$AWS_SECRET_ACCESS_KEY",
+      uri.getHost,
+      uri.getPort,
+      uri.getPath,
+      uri.getQuery,
+      uri.getFragment).toString
+  }
+
+
+  // Override this method so that we do not set the credentials in sc.hadoopConf.
+  override def beforeAll(): Unit = {
+    assert(tempDir.contains("AKIA"), "tempdir did not contain AWS credentials")
+    assert(!AWS_SECRET_ACCESS_KEY.contains("/"), "AWS secret key should not contain slash")
+    sc = new SparkContext("local", getClass.getSimpleName)
+    conn = DefaultJDBCWrapper.getConnector(None, jdbcUrl, None)
+  }
+
+  test("roundtrip save and load") {
+    val df = sqlContext.createDataFrame(sc.parallelize(Seq(Row(1)), 1),
+      StructType(StructField("foo", IntegerType) :: Nil))
+    testRoundtripSaveAndLoad(s"roundtrip_save_and_load_$randomSuffix", df)
+  }
+}