Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions core/src/main/scala/org/apache/spark/Partition.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,6 @@ trait Partition extends Serializable {

// A better default implementation of HashCode
override def hashCode(): Int = index

override def equals(other: Any): Boolean = super.equals(other)
}
6 changes: 3 additions & 3 deletions core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ private[spark] case class NarrowCoGroupSplitDep(
* narrowDeps should always be equal to the number of parents.
*/
private[spark] class CoGroupPartition(
idx: Int, val narrowDeps: Array[Option[NarrowCoGroupSplitDep]])
override val index: Int, val narrowDeps: Array[Option[NarrowCoGroupSplitDep]])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may upset MiMa since you effectively remove an 'idx' property but it can be ignored for our purposes as it's a private class.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make sense

extends Partition with Serializable {
override val index: Int = idx
override def hashCode(): Int = idx
override def hashCode(): Int = index
override def equals(other: Any): Boolean = super.equals(other)
}

/**
Expand Down
6 changes: 3 additions & 3 deletions core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,14 @@ import org.apache.spark.util.{NextIterator, SerializableConfiguration, ShutdownH
/**
* A Spark split class that wraps around a Hadoop InputSplit.
*/
private[spark] class HadoopPartition(rddId: Int, idx: Int, s: InputSplit)
private[spark] class HadoopPartition(rddId: Int, override val index: Int, s: InputSplit)
extends Partition {

val inputSplit = new SerializableWritable[InputSplit](s)

override def hashCode(): Int = 41 * (41 + rddId) + idx
override def hashCode(): Int = 31 * (31 + rddId) + index

override val index: Int = idx
override def equals(other: Any): Boolean = super.equals(other)

/**
* Get any environment variables that should be added to the users environment when running pipes
Expand Down
5 changes: 4 additions & 1 deletion core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@ private[spark] class NewHadoopPartition(
extends Partition {

val serializableHadoopSplit = new SerializableWritable(rawSplit)
override def hashCode(): Int = 41 * (41 + rddId) + index

override def hashCode(): Int = 31 * (31 + rddId) + index

override def equals(other: Any): Boolean = super.equals(other)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,13 @@ import org.apache.spark.util.Utils
private[spark]
class PartitionerAwareUnionRDDPartition(
@transient val rdds: Seq[RDD[_]],
val idx: Int
override val index: Int
) extends Partition {
var parents = rdds.map(_.partitions(idx)).toArray
var parents = rdds.map(_.partitions(index)).toArray

override val index = idx
override def hashCode(): Int = idx
override def hashCode(): Int = index

override def equals(other: Any): Boolean = super.equals(other)

@throws(classOf[IOException])
private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
Expand Down
5 changes: 4 additions & 1 deletion core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ import org.apache.spark.serializer.Serializer

private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition {
override val index: Int = idx
override def hashCode(): Int = idx

override def hashCode(): Int = index

override def equals(other: Any): Boolean = super.equals(other)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.spark.scheduler

import java.util.Arrays
import java.util.Objects

import org.apache.spark._
import org.apache.spark.rdd.RDD
Expand Down Expand Up @@ -53,6 +54,9 @@ class CoalescedPartitioner(val parent: Partitioner, val partitionStartIndices: A
parentPartitionMapping(parent.getPartition(key))
}

override def hashCode(): Int =
31 * Objects.hashCode(parent) + Arrays.hashCode(partitionStartIndices)

override def equals(other: Any): Boolean = other match {
case c: CoalescedPartitioner =>
c.parent == parent && Arrays.equals(c.partitionStartIndices, partitionStartIndices)
Expand All @@ -66,6 +70,8 @@ private[spark] class CustomShuffledRDDPartition(
extends Partition {

override def hashCode(): Int = index

override def equals(other: Any): Boolean = super.equals(other)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -476,13 +476,18 @@ object KryoTest {

class ClassWithNoArgConstructor {
var x: Int = 0

override def hashCode(): Int = x

override def equals(other: Any): Boolean = other match {
case c: ClassWithNoArgConstructor => x == c.x
case _ => false
}
}

class ClassWithoutNoArgConstructor(val x: Int) {
override def hashCode(): Int = x

override def equals(other: Any): Boolean = other match {
case c: ClassWithoutNoArgConstructor => x == c.x
case _ => false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ class ClosureCleanerSuite extends SparkFunSuite {
// A non-serializable class we create in closures to make sure that we aren't
// keeping references to unneeded variables from our outer closures.
class NonSerializable(val id: Int = -1) {
override def hashCode(): Int = id

override def equals(other: Any): Boolean = {
other match {
case o: NonSerializable => id == o.id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,8 @@ package org.apache.spark.util.collection
*/
case class FixedHashObject(v: Int, h: Int) extends Serializable {
override def hashCode(): Int = h
override def equals(other: Any): Boolean = other match {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although this very technically changes semantics, given the clear and specific purpose of this class, I don't think it hurts to define reasonable equality semantics instead of leaving it to reference equality.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually if we do call super.equals() it fails tests as case classes have there implementation of equals so we need to rewrite the same equals back again.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aha you're right about that. Right, this actually represents no behavior change.

case that: FixedHashObject => v == that.v && h == that.h
case _ => false
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,8 @@ class SparseMatrix (
rowIndices: Array[Int],
values: Array[Double]) = this(numRows, numCols, colPtrs, rowIndices, values, false)

override def hashCode(): Int = toBreeze.hashCode()

override def equals(o: Any): Boolean = o match {
case m: Matrix => toBreeze == m.toBreeze
case _ => false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,8 @@ class DenseVector (val values: Array[Double]) extends Vector {
}
}

override def equals(other: Any): Boolean = super.equals(other)

override def hashCode(): Int = {
var result: Int = 31 + size
var i = 0
Expand Down Expand Up @@ -602,6 +604,8 @@ class SparseVector (
}
}

override def equals(other: Any): Boolean = super.equals(other)

override def hashCode(): Int = {
var result: Int = 31 + size
val end = values.length
Expand Down
22 changes: 16 additions & 6 deletions mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.ml.tree

import java.util.Objects

import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.mllib.tree.configuration.{FeatureType => OldFeatureType}
Expand Down Expand Up @@ -112,12 +114,15 @@ final class CategoricalSplit private[ml] (
}
}

override def equals(o: Any): Boolean = {
o match {
case other: CategoricalSplit => featureIndex == other.featureIndex &&
isLeft == other.isLeft && categories == other.categories
case _ => false
}
override def hashCode(): Int = {
val state = Seq(featureIndex, isLeft, categories)
state.map(Objects.hashCode).foldLeft(0)((a, b) => 31 * a + b)
}

override def equals(o: Any): Boolean = o match {
case other: CategoricalSplit => featureIndex == other.featureIndex &&
isLeft == other.isLeft && categories == other.categories
case _ => false
}

override private[tree] def toOld: OldSplit = {
Expand Down Expand Up @@ -181,6 +186,11 @@ final class ContinuousSplit private[ml] (override val featureIndex: Int, val thr
}
}

override def hashCode(): Int = {
val state = Seq(featureIndex, threshold)
state.map(Objects.hashCode).foldLeft(0)((a, b) => 31 * a + b)
}

override private[tree] def toOld: OldSplit = {
OldSplit(featureIndex, threshold, OldFeatureType.Continuous, List.empty[Double])
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,8 @@ class SparseMatrix @Since("1.3.0") (
case _ => false
}

override def hashCode(): Int = toBreeze.hashCode

private[mllib] def toBreeze: BM[Double] = {
if (!isTransposed) {
new BSM[Double](values, numRows, numCols, colPtrs, rowIndices)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,8 @@ class DenseVector @Since("1.0.0") (
}
}

override def equals(other: Any): Boolean = super.equals(other)

override def hashCode(): Int = {
var result: Int = 31 + size
var i = 0
Expand Down Expand Up @@ -751,6 +753,8 @@ class SparseVector @Since("1.0.0") (
}
}

override def equals(other: Any): Boolean = super.equals(other)

override def hashCode(): Int = {
var result: Int = 31 + size
val end = values.length
Expand Down
4 changes: 4 additions & 0 deletions project/MimaExcludes.scala
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ object MimaExcludes {
ProblemFilters.exclude[MissingMethodProblem](
"org.apache.spark.api.java.function.FlatMapGroupsFunction.call")
) ++
Seq(
// [SPARK-6429] Implement hashCode and equals together
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.Partition.org$apache$spark$Partition$$super=uals")
) ++
Seq(
// SPARK-4819 replace Guava Optional
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.api.java.JavaSparkContext.getCheckpointDir"),
Expand Down
2 changes: 1 addition & 1 deletion scalastyle-config.xml
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ This file is divided into 3 sections:
</check>

<!-- Should turn this on, but we have a few places that need to be fixed first -->
<check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="false"></check>
<check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>

<!-- ================================================================================ -->
<!-- rules we don't want -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ object AttributeSet {
class AttributeSet private (val baseSet: Set[AttributeEquals])
extends Traversable[Attribute] with Serializable {

override def hashCode: Int = baseSet.hashCode()

/** Returns true if the members of this AttributeSet and other are the same. */
override def equals(other: Any): Boolean = other match {
case otherSet: AttributeSet =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ class EquivalentExpressions {
case other: Expr => e.semanticEquals(other.e)
case _ => false
}
override val hashCode: Int = e.semanticHash()

override def hashCode: Int = e.semanticHash()
}

// For each expression, the set of equivalent expressions.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions

import java.nio.charset.StandardCharsets
import java.sql.{Date, Timestamp}
import java.util.Objects

import org.json4s.JsonAST._

Expand Down Expand Up @@ -170,6 +171,8 @@ case class Literal protected (value: Any, dataType: DataType)

override def toString: String = if (value != null) value.toString else "null"

override def hashCode(): Int = 31 * (31 * Objects.hashCode(dataType)) + Objects.hashCode(value)

override def equals(other: Any): Boolean = other match {
case o: Literal =>
dataType.equals(o.dataType) &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.catalyst.expressions

import java.util.UUID
import java.util.{Objects, UUID}

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
Expand Down Expand Up @@ -175,6 +175,11 @@ case class Alias(child: Expression, name: String)(
exprId :: qualifier :: explicitMetadata :: isGenerated :: Nil
}

override def hashCode(): Int = {
val state = Seq(name, exprId, child, qualifier, explicitMetadata)
state.map(Objects.hashCode).foldLeft(0)((a, b) => 31 * a + b)
}

override def equals(other: Any): Boolean = other match {
case a: Alias =>
name == a.name && exprId == a.exprId && child == a.child && qualifier == a.qualifier &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.sql.types

import java.util.Objects

import org.json4s.JsonAST.JValue
import org.json4s.JsonDSL._

Expand Down Expand Up @@ -83,6 +85,8 @@ abstract class UserDefinedType[UserType >: Null] extends DataType with Serializa

override def sql: String = sqlType.sql

override def hashCode(): Int = getClass.hashCode()

override def equals(other: Any): Boolean = other match {
case that: UserDefinedType[_] => this.acceptsType(that)
case _ => false
Expand Down Expand Up @@ -115,7 +119,9 @@ private[sql] class PythonUserDefinedType(
}

override def equals(other: Any): Boolean = other match {
case that: PythonUserDefinedType => this.pyUDT.equals(that.pyUDT)
case that: PythonUserDefinedType => pyUDT == that.pyUDT
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This ought to be equivalent in Scala right? just triple checking

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the same except that it works event if``pyUDT` is null.
http://stackoverflow.com/questions/7681161/whats-the-difference-between-and-equals-in-scala

case _ => false
}

override def hashCode(): Int = Objects.hashCode(pyUDT)
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ import org.apache.spark.sql.types.{ArrayType, Decimal, ObjectType, StructType}
case class RepeatedStruct(s: Seq[PrimitiveData])

case class NestedArray(a: Array[Array[Int]]) {
override def hashCode(): Int =
java.util.Arrays.deepHashCode(a.asInstanceOf[Array[AnyRef]])

override def equals(other: Any): Boolean = other match {
case NestedArray(otherArray) =>
java.util.Arrays.deepEquals(
Expand Down Expand Up @@ -64,15 +67,21 @@ case class SpecificCollection(l: List[Int])

/** For testing Kryo serialization based encoder. */
class KryoSerializable(val value: Int) {
override def equals(other: Any): Boolean = {
this.value == other.asInstanceOf[KryoSerializable].value
override def hashCode(): Int = value

override def equals(other: Any): Boolean = other match {
case that: KryoSerializable => this.value == that.value
case _ => false
}
}

/** For testing Java serialization based encoder. */
class JavaSerializable(val value: Int) extends Serializable {
override def equals(other: Any): Boolean = {
this.value == other.asInstanceOf[JavaSerializable].value
override def hashCode(): Int = value

override def equals(other: Any): Boolean = other match {
case that: JavaSerializable => this.value == that.value
case _ => false
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ private final class ShuffledRowRDDPartition(
val startPreShufflePartitionIndex: Int,
val endPreShufflePartitionIndex: Int) extends Partition {
override val index: Int = postShufflePartitionIndex

override def hashCode(): Int = postShufflePartitionIndex

override def equals(other: Any): Boolean = super.equals(other)
}

/**
Expand Down
Loading