diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index f81db8e44a1d4..4d8aafae1cd58 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -379,7 +379,6 @@ def __hash__(self):
         "pyspark.mllib.evaluation",
         "pyspark.mllib.feature",
         "pyspark.mllib.fpm",
-        "pyspark.mllib.linalg.__init__",
         "pyspark.mllib.linalg.distributed",
         "pyspark.mllib.random",
         "pyspark.mllib.recommendation",
@@ -406,6 +405,7 @@ def __hash__(self):
         "pyspark.ml.feature",
         "pyspark.ml.classification",
         "pyspark.ml.clustering",
+        "pyspark.ml.linalg.__init__",
         "pyspark.ml.recommendation",
         "pyspark.ml.regression",
         "pyspark.ml.tuning",
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 8daee7b3aa1e9..5e533f797fc1f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -1207,6 +1207,7 @@ private[python] class PythonMLLibAPI extends Serializable {
 private[spark] object SerDe extends Serializable {
 
   val PYSPARK_PACKAGE = "pyspark.mllib"
+  val PYSPARK_ML_PACKAGE = "pyspark.ml"
 
   /**
    * Base class used for pickle
@@ -1214,8 +1215,10 @@ private[spark] object SerDe extends Serializable {
   private[python] abstract class BasePickler[T: ClassTag]
     extends IObjectPickler with IObjectConstructor {
 
+    protected def packageName: String = PYSPARK_PACKAGE
+
     private val cls = implicitly[ClassTag[T]].runtimeClass
-    private val module = PYSPARK_PACKAGE + "." + cls.getName.split('.')(4)
+    private val module = packageName + "." + cls.getName.split('.')(4)
     private val name = cls.getSimpleName
 
     // register this to Pickler and Unpickler
@@ -1265,6 +1268,8 @@ private[spark] object SerDe extends Serializable {
   // Pickler for DenseVector
   private[python] class DenseVectorPickler extends BasePickler[DenseVector] {
 
+    override protected def packageName = PYSPARK_ML_PACKAGE
+
     def saveState(obj: Object, out: OutputStream, pickler: Pickler): Unit = {
       val vector: DenseVector = obj.asInstanceOf[DenseVector]
       val bytes = new Array[Byte](8 * vector.size)
@@ -1297,6 +1302,8 @@ private[spark] object SerDe extends Serializable {
   // Pickler for DenseMatrix
   private[python] class DenseMatrixPickler extends BasePickler[DenseMatrix] {
 
+    override protected def packageName = PYSPARK_ML_PACKAGE
+
     def saveState(obj: Object, out: OutputStream, pickler: Pickler): Unit = {
       val m: DenseMatrix = obj.asInstanceOf[DenseMatrix]
       val bytes = new Array[Byte](8 * m.values.length)
@@ -1334,6 +1341,8 @@ private[spark] object SerDe extends Serializable {
   // Pickler for SparseMatrix
   private[python] class SparseMatrixPickler extends BasePickler[SparseMatrix] {
 
+    override protected def packageName = PYSPARK_ML_PACKAGE
+
     def saveState(obj: Object, out: OutputStream, pickler: Pickler): Unit = {
       val s = obj.asInstanceOf[SparseMatrix]
       val order = ByteOrder.nativeOrder()
@@ -1389,6 +1398,8 @@ private[spark] object SerDe extends Serializable {
   // Pickler for SparseVector
   private[python] class SparseVectorPickler extends BasePickler[SparseVector] {
 
+    override protected def packageName = PYSPARK_ML_PACKAGE
+
     def saveState(obj: Object, out: OutputStream, pickler: Pickler): Unit = {
       val v: SparseVector = obj.asInstanceOf[SparseVector]
       val n = v.indices.length
@@ -1431,6 +1442,8 @@ private[spark] object SerDe extends Serializable {
   // Pickler for LabeledPoint
   private[python] class LabeledPointPickler extends BasePickler[LabeledPoint] {
 
+    override protected def packageName = PYSPARK_PACKAGE
+
     def saveState(obj: Object, out: OutputStream, pickler: Pickler): Unit = {
       val point: LabeledPoint = obj.asInstanceOf[LabeledPoint]
       saveObjects(out, pickler, point.label, point.features)
@@ -1447,6 +1460,8 @@ private[spark] object SerDe extends Serializable {
   // Pickler for Rating
   private[python] class RatingPickler extends BasePickler[Rating] {
 
+    override protected def packageName = PYSPARK_PACKAGE
+
     def saveState(obj: Object, out: OutputStream, pickler: Pickler): Unit = {
       val rating: Rating = obj.asInstanceOf[Rating]
       saveObjects(out, pickler, rating.user, rating.product, rating.rating)
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index c26c2d7fa5bfc..4f2d1dd0ef77d 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -53,7 +53,7 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
     Currently, this class only supports binary classification.
 
     >>> from pyspark.sql import Row
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sc.parallelize([
     ...     Row(label=1.0, weight=2.0, features=Vectors.dense(1.0)),
     ...     Row(label=0.0, weight=2.0, features=Vectors.sparse(1, [], []))]).toDF()
@@ -496,7 +496,7 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
     It supports both binary and multiclass labels, as well as both continuous and categorical
     features.
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> from pyspark.ml.feature import StringIndexer
     >>> df = sqlContext.createDataFrame([
     ...     (1.0, Vectors.dense(1.0)),
@@ -625,7 +625,7 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
 
     >>> import numpy
     >>> from numpy import allclose
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> from pyspark.ml.feature import StringIndexer
     >>> df = sqlContext.createDataFrame([
     ...     (1.0, Vectors.dense(1.0)),
@@ -752,7 +752,7 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
     `SPARK-4240 <https://issues.apache.org/jira/browse/SPARK-4240>`_
 
     >>> from numpy import allclose
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> from pyspark.ml.feature import StringIndexer
     >>> df = sqlContext.createDataFrame([
     ...     (1.0, Vectors.dense(1.0)),
@@ -884,7 +884,7 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
     The input feature values must be nonnegative.
 
     >>> from pyspark.sql import Row
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sqlContext.createDataFrame([
     ...     Row(label=0.0, features=Vectors.dense([0.0, 0.0])),
     ...     Row(label=0.0, features=Vectors.dense([0.0, 1.0])),
@@ -1023,7 +1023,7 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
     Number of inputs has to be equal to the size of feature vectors.
     Number of outputs has to be equal to the total number of labels.
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sqlContext.createDataFrame([
     ...     (0.0, Vectors.dense([0.0, 0.0])),
     ...     (1.0, Vectors.dense([0.0, 1.0])),
@@ -1188,7 +1188,7 @@ class OneVsRest(Estimator, OneVsRestParams, MLReadable, MLWritable):
     is picked to label the example.
 
     >>> from pyspark.sql import Row
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sc.parallelize([
     ...     Row(label=0.0, features=Vectors.dense(1.0, 0.8)),
     ...     Row(label=1.0, features=Vectors.sparse(2, [], [])),
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index b90daf16e2f71..4c5bf139e74d8 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -65,7 +65,7 @@ class GaussianMixture(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte
 
     GaussianMixture clustering.
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
 
     >>> data = [(Vectors.dense([-0.1, -0.05 ]),),
     ...         (Vectors.dense([-0.01, -0.1]),),
@@ -194,7 +194,7 @@ class KMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol
     K-means clustering with a k-means++ like initialization mode
     (the k-means|| algorithm by Bahmani et al).
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),
     ...         (Vectors.dense([9.0, 8.0]),), (Vectors.dense([8.0, 9.0]),)]
     >>> df = sqlContext.createDataFrame(data, ["features"])
@@ -347,7 +347,7 @@ class BisectingKMeans(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte
     If bisecting all divisible clusters on the bottom level would result more than `k` leaf
     clusters, larger clusters get higher priority.
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> data = [(Vectors.dense([0.0, 0.0]),), (Vectors.dense([1.0, 1.0]),),
     ...         (Vectors.dense([9.0, 8.0]),), (Vectors.dense([8.0, 9.0]),)]
     >>> df = sqlContext.createDataFrame(data, ["features"])
@@ -625,7 +625,7 @@ class LDA(JavaEstimator, HasFeaturesCol, HasMaxIter, HasSeed, HasCheckpointInter
     :py:class:`pyspark.ml.feature.Tokenizer` and :py:class:`pyspark.ml.feature.CountVectorizer`
     can be useful for converting text to word count vectors.
 
-    >>> from pyspark.mllib.linalg import Vectors, SparseVector
+    >>> from pyspark.ml.linalg import Vectors, SparseVector
     >>> from pyspark.ml.clustering import LDA
     >>> df = sqlContext.createDataFrame([[1, Vectors.dense([0.0, 1.0])],
     ...      [2, SparseVector(2, {0: 1.0})],], ["id", "features"])
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index 719c0c7d79fc9..fc9099b7ec172 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -111,7 +111,7 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
     The rawPrediction column can be of type double (binary 0/1 prediction, or probability of label
     1) or of type vector (length-2 vector of raw predictions, scores, or label probabilities).
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> scoreAndLabels = map(lambda x: (Vectors.dense([1.0 - x[0], x[0]]), x[1]),
     ...    [(0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)])
     >>> dataset = sqlContext.createDataFrame(scoreAndLabels, ["raw", "label"])
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 606a6e7c22b4f..983b6a5301ae1 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -23,11 +23,11 @@
 
 from pyspark import since, keyword_only
 from pyspark.rdd import ignore_unicode_prefix
+from pyspark.ml.linalg import _convert_to_vector
 from pyspark.ml.param.shared import *
 from pyspark.ml.util import JavaMLReadable, JavaMLWritable
 from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaTransformer, _jvm
 from pyspark.mllib.common import inherit_doc
-from pyspark.mllib.linalg import _convert_to_vector
 
 __all__ = ['Binarizer',
            'Bucketizer',
@@ -380,7 +380,7 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWrit
     .. seealso:: `More information on Wikipedia \
     <https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia>`_.
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df1 = sqlContext.createDataFrame([(Vectors.dense([5.0, 8.0, 6.0]),)], ["vec"])
     >>> dct = DCT(inverse=False, inputCol="vec", outputCol="resultVec")
     >>> df2 = dct.transform(df1)
@@ -447,7 +447,7 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReada
     with a provided "weight" vector. In other words, it scales each column of the dataset
     by a scalar multiplier.
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sqlContext.createDataFrame([(Vectors.dense([2.0, 1.0, 3.0]),)], ["values"])
     >>> ep = ElementwiseProduct(scalingVec=Vectors.dense([1.0, 2.0, 3.0]),
     ...     inputCol="values", outputCol="eprod")
@@ -582,7 +582,7 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab
 
     Compute the Inverse Document Frequency (IDF) given a collection of documents.
 
-    >>> from pyspark.mllib.linalg import DenseVector
+    >>> from pyspark.ml.linalg import DenseVector
     >>> df = sqlContext.createDataFrame([(DenseVector([1.0, 2.0]),),
     ...     (DenseVector([0.0, 1.0]),), (DenseVector([3.0, 0.2]),)], ["tf"])
     >>> idf = IDF(minDocFreq=3, inputCol="tf", outputCol="idf")
@@ -670,7 +670,7 @@ class MaxAbsScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav
     absolute value in each feature. It does not shift/center the data, and thus does not destroy
     any sparsity.
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sqlContext.createDataFrame([(Vectors.dense([1.0]),), (Vectors.dense([2.0]),)], ["a"])
     >>> maScaler = MaxAbsScaler(inputCol="a", outputCol="scaled")
     >>> model = maScaler.fit(df)
@@ -757,7 +757,7 @@ class MinMaxScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Jav
     Note that since zero values will probably be transformed to non-zero values, output of the
     transformer will be DenseVector even for sparse input.
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sqlContext.createDataFrame([(Vectors.dense([0.0]),), (Vectors.dense([2.0]),)], ["a"])
     >>> mmScaler = MinMaxScaler(inputCol="a", outputCol="scaled")
     >>> model = mmScaler.fit(df)
@@ -961,7 +961,7 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
 
      Normalize a vector to have unit norm using the given p-norm.
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> svec = Vectors.sparse(4, {1: 4.0, 3: 3.0})
     >>> df = sqlContext.createDataFrame([(Vectors.dense([3.0, -4.0]), svec)], ["dense", "sparse"])
     >>> normalizer = Normalizer(p=2.0, inputCol="dense", outputCol="features")
@@ -1114,7 +1114,7 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol, JavaMLRead
     multiplication distributes over addition". Take a 2-variable feature vector as an example:
     `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`.
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sqlContext.createDataFrame([(Vectors.dense([0.5, 2.0]),)], ["dense"])
     >>> px = PolynomialExpansion(degree=2, inputCol="dense", outputCol="expanded")
     >>> px.transform(df).head().expanded
@@ -1459,7 +1459,7 @@ class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, J
     Standardizes features by removing the mean and scaling to unit variance using column summary
     statistics on the samples in the training set.
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sqlContext.createDataFrame([(Vectors.dense([0.0]),), (Vectors.dense([2.0]),)], ["a"])
     >>> standardScaler = StandardScaler(inputCol="a", outputCol="scaled")
     >>> model = standardScaler.fit(df)
@@ -1942,7 +1942,7 @@ class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, Ja
       - Add warning if a categorical feature has only 1 category.
       - Add option for allowing unknown categories.
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sqlContext.createDataFrame([(Vectors.dense([-1.0, 0.0]),),
     ...     (Vectors.dense([0.0, 1.0]),), (Vectors.dense([0.0, 2.0]),)], ["a"])
     >>> indexer = VectorIndexer(maxCategories=2, inputCol="a", outputCol="indexed")
@@ -2062,7 +2062,7 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, J
     The output vector will order features with the selected indices first (in the order given),
     followed by the selected names (in the order given).
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sqlContext.createDataFrame([
     ...     (Vectors.dense([-2.0, 2.3, 0.0, 0.0, 1.0]),),
     ...     (Vectors.dense([0.0, 0.0, 0.0, 0.0, 0.0]),),
@@ -2329,7 +2329,7 @@ class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab
 
     PCA trains a model to project vectors to a low-dimensional space using PCA.
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> data = [(Vectors.sparse(5, [(1, 1.0), (3, 7.0)]),),
     ...     (Vectors.dense([2.0, 0.0, 3.0, 4.0, 5.0]),),
     ...     (Vectors.dense([4.0, 0.0, 0.0, 6.0, 7.0]),)]
@@ -2547,7 +2547,7 @@ class ChiSqSelector(JavaEstimator, HasFeaturesCol, HasOutputCol, HasLabelCol, Ja
     Chi-Squared feature selection, which selects categorical features to use for predicting a
     categorical label.
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sqlContext.createDataFrame(
     ...    [(Vectors.dense([0.0, 0.0, 18.0, 1.0]), 1.0),
     ...     (Vectors.dense([0.0, 1.0, 12.0, 0.0]), 0.0),
diff --git a/python/pyspark/ml/linalg/__init__.py b/python/pyspark/ml/linalg/__init__.py
new file mode 100644
index 0000000000000..3135b236dbd2d
--- /dev/null
+++ b/python/pyspark/ml/linalg/__init__.py
@@ -0,0 +1,1286 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+MLlib utilities for linear algebra. For dense vectors, MLlib
+uses the NumPy C{array} type, so you can simply pass NumPy arrays
+around. For sparse vectors, users can construct a L{SparseVector}
+object from MLlib or pass SciPy C{scipy.sparse} column vectors if
+SciPy is available in their environment.
+"""
+
+import sys
+import array
+import struct
+
+if sys.version >= '3':
+    basestring = str
+    xrange = range
+    import copyreg as copy_reg
+    long = int
+else:
+    from itertools import izip as zip
+    import copy_reg
+
+import numpy as np
+
+from pyspark import since
+from pyspark.mllib.linalg import Vector as OldVector
+from pyspark.sql.types import UserDefinedType, StructField, StructType, ArrayType, DoubleType, \
+    IntegerType, ByteType, BooleanType
+
+
+__all__ = ['Vector', 'DenseVector', 'SparseVector', 'Vectors',
+           'Matrix', 'DenseMatrix', 'SparseMatrix', 'Matrices',
+           'QRDecomposition']
+
+
+if sys.version_info[:2] == (2, 7):
+    # speed up pickling array in Python 2.7
+    def fast_pickle_array(ar):
+        return array.array, (ar.typecode, ar.tostring())
+    copy_reg.pickle(array.array, fast_pickle_array)
+
+
+# Check whether we have SciPy. MLlib works without it too, but if we have it, some methods,
+# such as _dot and _serialize_double_vector, start to support scipy.sparse matrices.
+
+try:
+    import scipy.sparse
+    _have_scipy = True
+except:
+    # No SciPy in environment, but that's okay
+    _have_scipy = False
+
+
+def _convert_to_vector(l):
+    if isinstance(l, Vector):
+        return l
+    # this is for compatiblity because Scala mllib codes still use mllib.linalg Vector
+    # todo: remove this when mllib.linalg Vector is removed.
+    elif isinstance(l, OldVector):
+        return l
+    elif type(l) in (array.array, np.array, np.ndarray, list, tuple, xrange):
+        return DenseVector(l)
+    elif _have_scipy and scipy.sparse.issparse(l):
+        assert l.shape[1] == 1, "Expected column vector"
+        csc = l.tocsc()
+        return SparseVector(l.shape[0], csc.indices, csc.data)
+    else:
+        raise TypeError("Cannot convert type %s into Vector" % type(l))
+
+
+def _vector_size(v):
+    """
+    Returns the size of the vector.
+
+    >>> _vector_size([1., 2., 3.])
+    3
+    >>> _vector_size((1., 2., 3.))
+    3
+    >>> _vector_size(array.array('d', [1., 2., 3.]))
+    3
+    >>> _vector_size(np.zeros(3))
+    3
+    >>> _vector_size(np.zeros((3, 1)))
+    3
+    >>> _vector_size(np.zeros((1, 3)))
+    Traceback (most recent call last):
+        ...
+    ValueError: Cannot treat an ndarray of shape (1, 3) as a vector
+    """
+    if isinstance(v, Vector):
+        return len(v)
+    elif type(v) in (array.array, list, tuple, xrange):
+        return len(v)
+    elif type(v) == np.ndarray:
+        if v.ndim == 1 or (v.ndim == 2 and v.shape[1] == 1):
+            return len(v)
+        else:
+            raise ValueError("Cannot treat an ndarray of shape %s as a vector" % str(v.shape))
+    elif _have_scipy and scipy.sparse.issparse(v):
+        assert v.shape[1] == 1, "Expected column vector"
+        return v.shape[0]
+    else:
+        raise TypeError("Cannot treat type %s as a vector" % type(v))
+
+
+def _format_float(f, digits=4):
+    s = str(round(f, digits))
+    if '.' in s:
+        s = s[:s.index('.') + 1 + digits]
+    return s
+
+
+def _format_float_list(l):
+    return [_format_float(x) for x in l]
+
+
+def _double_to_long_bits(value):
+    if np.isnan(value):
+        value = float('nan')
+    # pack double into 64 bits, then unpack as long int
+    return struct.unpack('Q', struct.pack('d', value))[0]
+
+
+class VectorUDT(UserDefinedType):
+    """
+    SQL user-defined type (UDT) for Vector.
+    """
+
+    @classmethod
+    def sqlType(cls):
+        return StructType([
+            StructField("type", ByteType(), False),
+            StructField("size", IntegerType(), True),
+            StructField("indices", ArrayType(IntegerType(), False), True),
+            StructField("values", ArrayType(DoubleType(), False), True)])
+
+    @classmethod
+    def module(cls):
+        return "pyspark.ml.linalg"
+
+    @classmethod
+    def scalaUDT(cls):
+        # we can't change this to org.apache.spark.ml.linalg.VectorUDT before Scala mllib/ml codes
+        # use ml VectorUDT.
+        # todo: change this to org.apache.spark.ml.linalg.VectorUDT
+        return "org.apache.spark.mllib.linalg.VectorUDT"
+
+    def serialize(self, obj):
+        if isinstance(obj, SparseVector):
+            indices = [int(i) for i in obj.indices]
+            values = [float(v) for v in obj.values]
+            return (0, obj.size, indices, values)
+        elif isinstance(obj, DenseVector):
+            values = [float(v) for v in obj]
+            return (1, None, None, values)
+        else:
+            raise TypeError("cannot serialize %r of type %r" % (obj, type(obj)))
+
+    def deserialize(self, datum):
+        assert len(datum) == 4, \
+            "VectorUDT.deserialize given row with length %d but requires 4" % len(datum)
+        tpe = datum[0]
+        if tpe == 0:
+            return SparseVector(datum[1], datum[2], datum[3])
+        elif tpe == 1:
+            return DenseVector(datum[3])
+        else:
+            raise ValueError("do not recognize type %r" % tpe)
+
+    def simpleString(self):
+        return "vector"
+
+
+class MatrixUDT(UserDefinedType):
+    """
+    SQL user-defined type (UDT) for Matrix.
+    """
+
+    @classmethod
+    def sqlType(cls):
+        return StructType([
+            StructField("type", ByteType(), False),
+            StructField("numRows", IntegerType(), False),
+            StructField("numCols", IntegerType(), False),
+            StructField("colPtrs", ArrayType(IntegerType(), False), True),
+            StructField("rowIndices", ArrayType(IntegerType(), False), True),
+            StructField("values", ArrayType(DoubleType(), False), True),
+            StructField("isTransposed", BooleanType(), False)])
+
+    @classmethod
+    def module(cls):
+        return "pyspark.ml.linalg"
+
+    @classmethod
+    def scalaUDT(cls):
+        # we can't change this to org.apache.spark.ml.linalg.MatrixUDT before Scala mllib/ml codes
+        # use ml MatrixUDT.
+        # todo: change this to org.apache.spark.ml.linalg.MatrixUDT
+        return "org.apache.spark.mllib.linalg.MatrixUDT"
+
+    def serialize(self, obj):
+        if isinstance(obj, SparseMatrix):
+            colPtrs = [int(i) for i in obj.colPtrs]
+            rowIndices = [int(i) for i in obj.rowIndices]
+            values = [float(v) for v in obj.values]
+            return (0, obj.numRows, obj.numCols, colPtrs,
+                    rowIndices, values, bool(obj.isTransposed))
+        elif isinstance(obj, DenseMatrix):
+            values = [float(v) for v in obj.values]
+            return (1, obj.numRows, obj.numCols, None, None, values,
+                    bool(obj.isTransposed))
+        else:
+            raise TypeError("cannot serialize type %r" % (type(obj)))
+
+    def deserialize(self, datum):
+        assert len(datum) == 7, \
+            "MatrixUDT.deserialize given row with length %d but requires 7" % len(datum)
+        tpe = datum[0]
+        if tpe == 0:
+            return SparseMatrix(*datum[1:])
+        elif tpe == 1:
+            return DenseMatrix(datum[1], datum[2], datum[5], datum[6])
+        else:
+            raise ValueError("do not recognize type %r" % tpe)
+
+    def simpleString(self):
+        return "matrix"
+
+
+class Vector(object):
+
+    __UDT__ = VectorUDT()
+
+    """
+    Abstract class for DenseVector and SparseVector
+    """
+    def toArray(self):
+        """
+        Convert the vector into an numpy.ndarray
+
+        :return: numpy.ndarray
+        """
+        raise NotImplementedError
+
+
+class DenseVector(Vector):
+    """
+    A dense vector represented by a value array. We use numpy array for
+    storage and arithmetics will be delegated to the underlying numpy
+    array.
+
+    >>> v = Vectors.dense([1.0, 2.0])
+    >>> u = Vectors.dense([3.0, 4.0])
+    >>> v + u
+    DenseVector([4.0, 6.0])
+    >>> 2 - v
+    DenseVector([1.0, 0.0])
+    >>> v / 2
+    DenseVector([0.5, 1.0])
+    >>> v * u
+    DenseVector([3.0, 8.0])
+    >>> u / v
+    DenseVector([3.0, 2.0])
+    >>> u % 2
+    DenseVector([1.0, 0.0])
+    """
+    def __init__(self, ar):
+        if isinstance(ar, bytes):
+            ar = np.frombuffer(ar, dtype=np.float64)
+        elif not isinstance(ar, np.ndarray):
+            ar = np.array(ar, dtype=np.float64)
+        if ar.dtype != np.float64:
+            ar = ar.astype(np.float64)
+        self.array = ar
+
+    @staticmethod
+    def parse(s):
+        """
+        Parse string representation back into the DenseVector.
+
+        >>> DenseVector.parse(' [ 0.0,1.0,2.0,  3.0]')
+        DenseVector([0.0, 1.0, 2.0, 3.0])
+        """
+        start = s.find('[')
+        if start == -1:
+            raise ValueError("Array should start with '['.")
+        end = s.find(']')
+        if end == -1:
+            raise ValueError("Array should end with ']'.")
+        s = s[start + 1: end]
+
+        try:
+            values = [float(val) for val in s.split(',') if val]
+        except ValueError:
+            raise ValueError("Unable to parse values from %s" % s)
+        return DenseVector(values)
+
+    def __reduce__(self):
+        return DenseVector, (self.array.tostring(),)
+
+    def numNonzeros(self):
+        """
+        Number of nonzero elements. This scans all active values and count non zeros
+        """
+        return np.count_nonzero(self.array)
+
+    def norm(self, p):
+        """
+        Calculates the norm of a DenseVector.
+
+        >>> a = DenseVector([0, -1, 2, -3])
+        >>> a.norm(2)
+        3.7...
+        >>> a.norm(1)
+        6.0
+        """
+        return np.linalg.norm(self.array, p)
+
+    def dot(self, other):
+        """
+        Compute the dot product of two Vectors. We support
+        (Numpy array, list, SparseVector, or SciPy sparse)
+        and a target NumPy array that is either 1- or 2-dimensional.
+        Equivalent to calling numpy.dot of the two vectors.
+
+        >>> dense = DenseVector(array.array('d', [1., 2.]))
+        >>> dense.dot(dense)
+        5.0
+        >>> dense.dot(SparseVector(2, [0, 1], [2., 1.]))
+        4.0
+        >>> dense.dot(range(1, 3))
+        5.0
+        >>> dense.dot(np.array(range(1, 3)))
+        5.0
+        >>> dense.dot([1.,])
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> dense.dot(np.reshape([1., 2., 3., 4.], (2, 2), order='F'))
+        array([  5.,  11.])
+        >>> dense.dot(np.reshape([1., 2., 3.], (3, 1), order='F'))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        """
+        if type(other) == np.ndarray:
+            if other.ndim > 1:
+                assert len(self) == other.shape[0], "dimension mismatch"
+            return np.dot(self.array, other)
+        elif _have_scipy and scipy.sparse.issparse(other):
+            assert len(self) == other.shape[0], "dimension mismatch"
+            return other.transpose().dot(self.toArray())
+        else:
+            assert len(self) == _vector_size(other), "dimension mismatch"
+            if isinstance(other, SparseVector):
+                return other.dot(self)
+            elif isinstance(other, Vector):
+                return np.dot(self.toArray(), other.toArray())
+            else:
+                return np.dot(self.toArray(), other)
+
+    def squared_distance(self, other):
+        """
+        Squared distance of two Vectors.
+
+        >>> dense1 = DenseVector(array.array('d', [1., 2.]))
+        >>> dense1.squared_distance(dense1)
+        0.0
+        >>> dense2 = np.array([2., 1.])
+        >>> dense1.squared_distance(dense2)
+        2.0
+        >>> dense3 = [2., 1.]
+        >>> dense1.squared_distance(dense3)
+        2.0
+        >>> sparse1 = SparseVector(2, [0, 1], [2., 1.])
+        >>> dense1.squared_distance(sparse1)
+        2.0
+        >>> dense1.squared_distance([1.,])
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> dense1.squared_distance(SparseVector(1, [0,], [1.,]))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        """
+        assert len(self) == _vector_size(other), "dimension mismatch"
+        if isinstance(other, SparseVector):
+            return other.squared_distance(self)
+        elif _have_scipy and scipy.sparse.issparse(other):
+            return _convert_to_vector(other).squared_distance(self)
+
+        if isinstance(other, Vector):
+            other = other.toArray()
+        elif not isinstance(other, np.ndarray):
+            other = np.array(other)
+        diff = self.toArray() - other
+        return np.dot(diff, diff)
+
+    def toArray(self):
+        """
+        Returns an numpy.ndarray
+        """
+        return self.array
+
+    @property
+    def values(self):
+        """
+        Returns a list of values
+        """
+        return self.array
+
+    def __getitem__(self, item):
+        return self.array[item]
+
+    def __len__(self):
+        return len(self.array)
+
+    def __str__(self):
+        return "[" + ",".join([str(v) for v in self.array]) + "]"
+
+    def __repr__(self):
+        return "DenseVector([%s])" % (', '.join(_format_float(i) for i in self.array))
+
+    def __eq__(self, other):
+        if isinstance(other, DenseVector):
+            return np.array_equal(self.array, other.array)
+        elif isinstance(other, SparseVector):
+            if len(self) != other.size:
+                return False
+            return Vectors._equals(list(xrange(len(self))), self.array, other.indices, other.values)
+        return False
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __hash__(self):
+        size = len(self)
+        result = 31 + size
+        nnz = 0
+        i = 0
+        while i < size and nnz < 128:
+            if self.array[i] != 0:
+                result = 31 * result + i
+                bits = _double_to_long_bits(self.array[i])
+                result = 31 * result + (bits ^ (bits >> 32))
+                nnz += 1
+            i += 1
+        return result
+
+    def __getattr__(self, item):
+        return getattr(self.array, item)
+
+    def _delegate(op):
+        def func(self, other):
+            if isinstance(other, DenseVector):
+                other = other.array
+            return DenseVector(getattr(self.array, op)(other))
+        return func
+
+    __neg__ = _delegate("__neg__")
+    __add__ = _delegate("__add__")
+    __sub__ = _delegate("__sub__")
+    __mul__ = _delegate("__mul__")
+    __div__ = _delegate("__div__")
+    __truediv__ = _delegate("__truediv__")
+    __mod__ = _delegate("__mod__")
+    __radd__ = _delegate("__radd__")
+    __rsub__ = _delegate("__rsub__")
+    __rmul__ = _delegate("__rmul__")
+    __rdiv__ = _delegate("__rdiv__")
+    __rtruediv__ = _delegate("__rtruediv__")
+    __rmod__ = _delegate("__rmod__")
+
+
+class SparseVector(Vector):
+    """
+    A simple sparse vector class for passing data to MLlib. Users may
+    alternatively pass SciPy's {scipy.sparse} data types.
+    """
+    def __init__(self, size, *args):
+        """
+        Create a sparse vector, using either a dictionary, a list of
+        (index, value) pairs, or two separate arrays of indices and
+        values (sorted by index).
+
+        :param size: Size of the vector.
+        :param args: Active entries, as a dictionary {index: value, ...},
+          a list of tuples [(index, value), ...], or a list of strictly
+          increasing indices and a list of corresponding values [index, ...],
+          [value, ...]. Inactive entries are treated as zeros.
+
+        >>> SparseVector(4, {1: 1.0, 3: 5.5})
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        >>> SparseVector(4, [(1, 1.0), (3, 5.5)])
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        >>> SparseVector(4, [1, 3], [1.0, 5.5])
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        """
+        self.size = int(size)
+        """ Size of the vector. """
+        assert 1 <= len(args) <= 2, "must pass either 2 or 3 arguments"
+        if len(args) == 1:
+            pairs = args[0]
+            if type(pairs) == dict:
+                pairs = pairs.items()
+            pairs = sorted(pairs)
+            self.indices = np.array([p[0] for p in pairs], dtype=np.int32)
+            """ A list of indices corresponding to active entries. """
+            self.values = np.array([p[1] for p in pairs], dtype=np.float64)
+            """ A list of values corresponding to active entries. """
+        else:
+            if isinstance(args[0], bytes):
+                assert isinstance(args[1], bytes), "values should be string too"
+                if args[0]:
+                    self.indices = np.frombuffer(args[0], np.int32)
+                    self.values = np.frombuffer(args[1], np.float64)
+                else:
+                    # np.frombuffer() doesn't work well with empty string in older version
+                    self.indices = np.array([], dtype=np.int32)
+                    self.values = np.array([], dtype=np.float64)
+            else:
+                self.indices = np.array(args[0], dtype=np.int32)
+                self.values = np.array(args[1], dtype=np.float64)
+            assert len(self.indices) == len(self.values), "index and value arrays not same length"
+            for i in xrange(len(self.indices) - 1):
+                if self.indices[i] >= self.indices[i + 1]:
+                    raise TypeError(
+                        "Indices %s and %s are not strictly increasing"
+                        % (self.indices[i], self.indices[i + 1]))
+
+    def numNonzeros(self):
+        """
+        Number of nonzero elements. This scans all active values and count non zeros.
+        """
+        return np.count_nonzero(self.values)
+
+    def norm(self, p):
+        """
+        Calculates the norm of a SparseVector.
+
+        >>> a = SparseVector(4, [0, 1], [3., -4.])
+        >>> a.norm(1)
+        7.0
+        >>> a.norm(2)
+        5.0
+        """
+        return np.linalg.norm(self.values, p)
+
+    def __reduce__(self):
+        return (
+            SparseVector,
+            (self.size, self.indices.tostring(), self.values.tostring()))
+
+    @staticmethod
+    def parse(s):
+        """
+        Parse string representation back into the SparseVector.
+
+        >>> SparseVector.parse(' (4, [0,1 ],[ 4.0,5.0] )')
+        SparseVector(4, {0: 4.0, 1: 5.0})
+        """
+        start = s.find('(')
+        if start == -1:
+            raise ValueError("Tuple should start with '('")
+        end = s.find(')')
+        if start == -1:
+            raise ValueError("Tuple should end with ')'")
+        s = s[start + 1: end].strip()
+
+        size = s[: s.find(',')]
+        try:
+            size = int(size)
+        except ValueError:
+            raise ValueError("Cannot parse size %s." % size)
+
+        ind_start = s.find('[')
+        if ind_start == -1:
+            raise ValueError("Indices array should start with '['.")
+        ind_end = s.find(']')
+        if ind_end == -1:
+            raise ValueError("Indices array should end with ']'")
+        new_s = s[ind_start + 1: ind_end]
+        ind_list = new_s.split(',')
+        try:
+            indices = [int(ind) for ind in ind_list if ind]
+        except ValueError:
+            raise ValueError("Unable to parse indices from %s." % new_s)
+        s = s[ind_end + 1:].strip()
+
+        val_start = s.find('[')
+        if val_start == -1:
+            raise ValueError("Values array should start with '['.")
+        val_end = s.find(']')
+        if val_end == -1:
+            raise ValueError("Values array should end with ']'.")
+        val_list = s[val_start + 1: val_end].split(',')
+        try:
+            values = [float(val) for val in val_list if val]
+        except ValueError:
+            raise ValueError("Unable to parse values from %s." % s)
+        return SparseVector(size, indices, values)
+
+    def dot(self, other):
+        """
+        Dot product with a SparseVector or 1- or 2-dimensional Numpy array.
+
+        >>> a = SparseVector(4, [1, 3], [3.0, 4.0])
+        >>> a.dot(a)
+        25.0
+        >>> a.dot(array.array('d', [1., 2., 3., 4.]))
+        22.0
+        >>> b = SparseVector(4, [2], [1.0])
+        >>> a.dot(b)
+        0.0
+        >>> a.dot(np.array([[1, 1], [2, 2], [3, 3], [4, 4]]))
+        array([ 22.,  22.])
+        >>> a.dot([1., 2., 3.])
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> a.dot(np.array([1., 2.]))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> a.dot(DenseVector([1., 2.]))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> a.dot(np.zeros((3, 2)))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        """
+
+        if isinstance(other, np.ndarray):
+            if other.ndim not in [2, 1]:
+                raise ValueError("Cannot call dot with %d-dimensional array" % other.ndim)
+            assert len(self) == other.shape[0], "dimension mismatch"
+            return np.dot(self.values, other[self.indices])
+
+        assert len(self) == _vector_size(other), "dimension mismatch"
+
+        if isinstance(other, DenseVector):
+            return np.dot(other.array[self.indices], self.values)
+
+        elif isinstance(other, SparseVector):
+            # Find out common indices.
+            self_cmind = np.in1d(self.indices, other.indices, assume_unique=True)
+            self_values = self.values[self_cmind]
+            if self_values.size == 0:
+                return 0.0
+            else:
+                other_cmind = np.in1d(other.indices, self.indices, assume_unique=True)
+                return np.dot(self_values, other.values[other_cmind])
+
+        else:
+            return self.dot(_convert_to_vector(other))
+
+    def squared_distance(self, other):
+        """
+        Squared distance from a SparseVector or 1-dimensional NumPy array.
+
+        >>> a = SparseVector(4, [1, 3], [3.0, 4.0])
+        >>> a.squared_distance(a)
+        0.0
+        >>> a.squared_distance(array.array('d', [1., 2., 3., 4.]))
+        11.0
+        >>> a.squared_distance(np.array([1., 2., 3., 4.]))
+        11.0
+        >>> b = SparseVector(4, [2], [1.0])
+        >>> a.squared_distance(b)
+        26.0
+        >>> b.squared_distance(a)
+        26.0
+        >>> b.squared_distance([1., 2.])
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        >>> b.squared_distance(SparseVector(3, [1,], [1.0,]))
+        Traceback (most recent call last):
+            ...
+        AssertionError: dimension mismatch
+        """
+        assert len(self) == _vector_size(other), "dimension mismatch"
+
+        if isinstance(other, np.ndarray) or isinstance(other, DenseVector):
+            if isinstance(other, np.ndarray) and other.ndim != 1:
+                raise Exception("Cannot call squared_distance with %d-dimensional array" %
+                                other.ndim)
+            if isinstance(other, DenseVector):
+                other = other.array
+            sparse_ind = np.zeros(other.size, dtype=bool)
+            sparse_ind[self.indices] = True
+            dist = other[sparse_ind] - self.values
+            result = np.dot(dist, dist)
+
+            other_ind = other[~sparse_ind]
+            result += np.dot(other_ind, other_ind)
+            return result
+
+        elif isinstance(other, SparseVector):
+            result = 0.0
+            i, j = 0, 0
+            while i < len(self.indices) and j < len(other.indices):
+                if self.indices[i] == other.indices[j]:
+                    diff = self.values[i] - other.values[j]
+                    result += diff * diff
+                    i += 1
+                    j += 1
+                elif self.indices[i] < other.indices[j]:
+                    result += self.values[i] * self.values[i]
+                    i += 1
+                else:
+                    result += other.values[j] * other.values[j]
+                    j += 1
+            while i < len(self.indices):
+                result += self.values[i] * self.values[i]
+                i += 1
+            while j < len(other.indices):
+                result += other.values[j] * other.values[j]
+                j += 1
+            return result
+        else:
+            return self.squared_distance(_convert_to_vector(other))
+
+    def toArray(self):
+        """
+        Returns a copy of this SparseVector as a 1-dimensional NumPy array.
+        """
+        arr = np.zeros((self.size,), dtype=np.float64)
+        arr[self.indices] = self.values
+        return arr
+
+    def __len__(self):
+        return self.size
+
+    def __str__(self):
+        inds = "[" + ",".join([str(i) for i in self.indices]) + "]"
+        vals = "[" + ",".join([str(v) for v in self.values]) + "]"
+        return "(" + ",".join((str(self.size), inds, vals)) + ")"
+
+    def __repr__(self):
+        inds = self.indices
+        vals = self.values
+        entries = ", ".join(["{0}: {1}".format(inds[i], _format_float(vals[i]))
+                             for i in xrange(len(inds))])
+        return "SparseVector({0}, {{{1}}})".format(self.size, entries)
+
+    def __eq__(self, other):
+        if isinstance(other, SparseVector):
+            return other.size == self.size and np.array_equal(other.indices, self.indices) \
+                and np.array_equal(other.values, self.values)
+        elif isinstance(other, DenseVector):
+            if self.size != len(other):
+                return False
+            return Vectors._equals(self.indices, self.values, list(xrange(len(other))), other.array)
+        return False
+
+    def __getitem__(self, index):
+        inds = self.indices
+        vals = self.values
+        if not isinstance(index, int):
+            raise TypeError(
+                "Indices must be of type integer, got type %s" % type(index))
+
+        if index >= self.size or index < -self.size:
+            raise ValueError("Index %d out of bounds." % index)
+        if index < 0:
+            index += self.size
+
+        if (inds.size == 0) or (index > inds.item(-1)):
+            return 0.
+
+        insert_index = np.searchsorted(inds, index)
+        row_ind = inds[insert_index]
+        if row_ind == index:
+            return vals[insert_index]
+        return 0.
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __hash__(self):
+        result = 31 + self.size
+        nnz = 0
+        i = 0
+        while i < len(self.values) and nnz < 128:
+            if self.values[i] != 0:
+                result = 31 * result + int(self.indices[i])
+                bits = _double_to_long_bits(self.values[i])
+                result = 31 * result + (bits ^ (bits >> 32))
+                nnz += 1
+            i += 1
+        return result
+
+
+class Vectors(object):
+
+    """
+    Factory methods for working with vectors. Note that dense vectors
+    are simply represented as NumPy array objects, so there is no need
+    to covert them for use in MLlib. For sparse vectors, the factory
+    methods in this class create an MLlib-compatible type, or users
+    can pass in SciPy's C{scipy.sparse} column vectors.
+    """
+
+    @staticmethod
+    def sparse(size, *args):
+        """
+        Create a sparse vector, using either a dictionary, a list of
+        (index, value) pairs, or two separate arrays of indices and
+        values (sorted by index).
+
+        :param size: Size of the vector.
+        :param args: Non-zero entries, as a dictionary, list of tuples,
+                     or two sorted lists containing indices and values.
+
+        >>> Vectors.sparse(4, {1: 1.0, 3: 5.5})
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        >>> Vectors.sparse(4, [(1, 1.0), (3, 5.5)])
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        >>> Vectors.sparse(4, [1, 3], [1.0, 5.5])
+        SparseVector(4, {1: 1.0, 3: 5.5})
+        """
+        return SparseVector(size, *args)
+
+    @staticmethod
+    def dense(*elements):
+        """
+        Create a dense vector of 64-bit floats from a Python list or numbers.
+
+        >>> Vectors.dense([1, 2, 3])
+        DenseVector([1.0, 2.0, 3.0])
+        >>> Vectors.dense(1.0, 2.0)
+        DenseVector([1.0, 2.0])
+        """
+        if len(elements) == 1 and not isinstance(elements[0], (float, int, long)):
+            # it's list, numpy.array or other iterable object.
+            elements = elements[0]
+        return DenseVector(elements)
+
+    @staticmethod
+    def stringify(vector):
+        """
+        Converts a vector into a string, which can be recognized by
+        Vectors.parse().
+
+        >>> Vectors.stringify(Vectors.sparse(2, [1], [1.0]))
+        '(2,[1],[1.0])'
+        >>> Vectors.stringify(Vectors.dense([0.0, 1.0]))
+        '[0.0,1.0]'
+        """
+        return str(vector)
+
+    @staticmethod
+    def squared_distance(v1, v2):
+        """
+        Squared distance between two vectors.
+        a and b can be of type SparseVector, DenseVector, np.ndarray
+        or array.array.
+
+        >>> a = Vectors.sparse(4, [(0, 1), (3, 4)])
+        >>> b = Vectors.dense([2, 5, 4, 1])
+        >>> a.squared_distance(b)
+        51.0
+        """
+        v1, v2 = _convert_to_vector(v1), _convert_to_vector(v2)
+        return v1.squared_distance(v2)
+
+    @staticmethod
+    def norm(vector, p):
+        """
+        Find norm of the given vector.
+        """
+        return _convert_to_vector(vector).norm(p)
+
+    @staticmethod
+    def parse(s):
+        """Parse a string representation back into the Vector.
+
+        >>> Vectors.parse('[2,1,2 ]')
+        DenseVector([2.0, 1.0, 2.0])
+        >>> Vectors.parse(' ( 100,  [0],  [2])')
+        SparseVector(100, {0: 2.0})
+        """
+        if s.find('(') == -1 and s.find('[') != -1:
+            return DenseVector.parse(s)
+        elif s.find('(') != -1:
+            return SparseVector.parse(s)
+        else:
+            raise ValueError(
+                "Cannot find tokens '[' or '(' from the input string.")
+
+    @staticmethod
+    def zeros(size):
+        return DenseVector(np.zeros(size))
+
+    @staticmethod
+    def _equals(v1_indices, v1_values, v2_indices, v2_values):
+        """
+        Check equality between sparse/dense vectors,
+        v1_indices and v2_indices assume to be strictly increasing.
+        """
+        v1_size = len(v1_values)
+        v2_size = len(v2_values)
+        k1 = 0
+        k2 = 0
+        all_equal = True
+        while all_equal:
+            while k1 < v1_size and v1_values[k1] == 0:
+                k1 += 1
+            while k2 < v2_size and v2_values[k2] == 0:
+                k2 += 1
+
+            if k1 >= v1_size or k2 >= v2_size:
+                return k1 >= v1_size and k2 >= v2_size
+
+            all_equal = v1_indices[k1] == v2_indices[k2] and v1_values[k1] == v2_values[k2]
+            k1 += 1
+            k2 += 1
+        return all_equal
+
+
+class Matrix(object):
+
+    __UDT__ = MatrixUDT()
+
+    """
+    Represents a local matrix.
+    """
+    def __init__(self, numRows, numCols, isTransposed=False):
+        self.numRows = numRows
+        self.numCols = numCols
+        self.isTransposed = isTransposed
+
+    def toArray(self):
+        """
+        Returns its elements in a NumPy ndarray.
+        """
+        raise NotImplementedError
+
+    @staticmethod
+    def _convert_to_array(array_like, dtype):
+        """
+        Convert Matrix attributes which are array-like or buffer to array.
+        """
+        if isinstance(array_like, bytes):
+            return np.frombuffer(array_like, dtype=dtype)
+        return np.asarray(array_like, dtype=dtype)
+
+
+class DenseMatrix(Matrix):
+    """
+    Column-major dense matrix.
+    """
+    def __init__(self, numRows, numCols, values, isTransposed=False):
+        Matrix.__init__(self, numRows, numCols, isTransposed)
+        values = self._convert_to_array(values, np.float64)
+        assert len(values) == numRows * numCols
+        self.values = values
+
+    def __reduce__(self):
+        return DenseMatrix, (
+            self.numRows, self.numCols, self.values.tostring(),
+            int(self.isTransposed))
+
+    def __str__(self):
+        """
+        Pretty printing of a DenseMatrix
+
+        >>> dm = DenseMatrix(2, 2, range(4))
+        >>> print(dm)
+        DenseMatrix([[ 0.,  2.],
+                     [ 1.,  3.]])
+        >>> dm = DenseMatrix(2, 2, range(4), isTransposed=True)
+        >>> print(dm)
+        DenseMatrix([[ 0.,  1.],
+                     [ 2.,  3.]])
+        """
+        # Inspired by __repr__ in scipy matrices.
+        array_lines = repr(self.toArray()).splitlines()
+
+        # We need to adjust six spaces which is the difference in number
+        # of letters between "DenseMatrix" and "array"
+        x = '\n'.join([(" " * 6 + line) for line in array_lines[1:]])
+        return array_lines[0].replace("array", "DenseMatrix") + "\n" + x
+
+    def __repr__(self):
+        """
+        Representation of a DenseMatrix
+
+        >>> dm = DenseMatrix(2, 2, range(4))
+        >>> dm
+        DenseMatrix(2, 2, [0.0, 1.0, 2.0, 3.0], False)
+        """
+        # If the number of values are less than seventeen then return as it is.
+        # Else return first eight values and last eight values.
+        if len(self.values) < 17:
+            entries = _format_float_list(self.values)
+        else:
+            entries = (
+                _format_float_list(self.values[:8]) +
+                ["..."] +
+                _format_float_list(self.values[-8:])
+            )
+
+        entries = ", ".join(entries)
+        return "DenseMatrix({0}, {1}, [{2}], {3})".format(
+            self.numRows, self.numCols, entries, self.isTransposed)
+
+    def toArray(self):
+        """
+        Return an numpy.ndarray
+
+        >>> m = DenseMatrix(2, 2, range(4))
+        >>> m.toArray()
+        array([[ 0.,  2.],
+               [ 1.,  3.]])
+        """
+        if self.isTransposed:
+            return np.asfortranarray(
+                self.values.reshape((self.numRows, self.numCols)))
+        else:
+            return self.values.reshape((self.numRows, self.numCols), order='F')
+
+    def toSparse(self):
+        """Convert to SparseMatrix"""
+        if self.isTransposed:
+            values = np.ravel(self.toArray(), order='F')
+        else:
+            values = self.values
+        indices = np.nonzero(values)[0]
+        colCounts = np.bincount(indices // self.numRows)
+        colPtrs = np.cumsum(np.hstack(
+            (0, colCounts, np.zeros(self.numCols - colCounts.size))))
+        values = values[indices]
+        rowIndices = indices % self.numRows
+
+        return SparseMatrix(self.numRows, self.numCols, colPtrs, rowIndices, values)
+
+    def __getitem__(self, indices):
+        i, j = indices
+        if i < 0 or i >= self.numRows:
+            raise ValueError("Row index %d is out of range [0, %d)"
+                             % (i, self.numRows))
+        if j >= self.numCols or j < 0:
+            raise ValueError("Column index %d is out of range [0, %d)"
+                             % (j, self.numCols))
+
+        if self.isTransposed:
+            return self.values[i * self.numCols + j]
+        else:
+            return self.values[i + j * self.numRows]
+
+    def __eq__(self, other):
+        if (not isinstance(other, DenseMatrix) or
+                self.numRows != other.numRows or
+                self.numCols != other.numCols):
+            return False
+
+        self_values = np.ravel(self.toArray(), order='F')
+        other_values = np.ravel(other.toArray(), order='F')
+        return all(self_values == other_values)
+
+
+class SparseMatrix(Matrix):
+    """Sparse Matrix stored in CSC format."""
+    def __init__(self, numRows, numCols, colPtrs, rowIndices, values,
+                 isTransposed=False):
+        Matrix.__init__(self, numRows, numCols, isTransposed)
+        self.colPtrs = self._convert_to_array(colPtrs, np.int32)
+        self.rowIndices = self._convert_to_array(rowIndices, np.int32)
+        self.values = self._convert_to_array(values, np.float64)
+
+        if self.isTransposed:
+            if self.colPtrs.size != numRows + 1:
+                raise ValueError("Expected colPtrs of size %d, got %d."
+                                 % (numRows + 1, self.colPtrs.size))
+        else:
+            if self.colPtrs.size != numCols + 1:
+                raise ValueError("Expected colPtrs of size %d, got %d."
+                                 % (numCols + 1, self.colPtrs.size))
+        if self.rowIndices.size != self.values.size:
+            raise ValueError("Expected rowIndices of length %d, got %d."
+                             % (self.rowIndices.size, self.values.size))
+
+    def __str__(self):
+        """
+        Pretty printing of a SparseMatrix
+
+        >>> sm1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
+        >>> print(sm1)
+        2 X 2 CSCMatrix
+        (0,0) 2.0
+        (1,0) 3.0
+        (1,1) 4.0
+        >>> sm1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4], True)
+        >>> print(sm1)
+        2 X 2 CSRMatrix
+        (0,0) 2.0
+        (0,1) 3.0
+        (1,1) 4.0
+        """
+        spstr = "{0} X {1} ".format(self.numRows, self.numCols)
+        if self.isTransposed:
+            spstr += "CSRMatrix\n"
+        else:
+            spstr += "CSCMatrix\n"
+
+        cur_col = 0
+        smlist = []
+
+        # Display first 16 values.
+        if len(self.values) <= 16:
+            zipindval = zip(self.rowIndices, self.values)
+        else:
+            zipindval = zip(self.rowIndices[:16], self.values[:16])
+        for i, (rowInd, value) in enumerate(zipindval):
+            if self.colPtrs[cur_col + 1] <= i:
+                cur_col += 1
+            if self.isTransposed:
+                smlist.append('({0},{1}) {2}'.format(
+                    cur_col, rowInd, _format_float(value)))
+            else:
+                smlist.append('({0},{1}) {2}'.format(
+                    rowInd, cur_col, _format_float(value)))
+        spstr += "\n".join(smlist)
+
+        if len(self.values) > 16:
+            spstr += "\n.." * 2
+        return spstr
+
+    def __repr__(self):
+        """
+        Representation of a SparseMatrix
+
+        >>> sm1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
+        >>> sm1
+        SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2.0, 3.0, 4.0], False)
+        """
+        rowIndices = list(self.rowIndices)
+        colPtrs = list(self.colPtrs)
+
+        if len(self.values) <= 16:
+            values = _format_float_list(self.values)
+
+        else:
+            values = (
+                _format_float_list(self.values[:8]) +
+                ["..."] +
+                _format_float_list(self.values[-8:])
+            )
+            rowIndices = rowIndices[:8] + ["..."] + rowIndices[-8:]
+
+        if len(self.colPtrs) > 16:
+            colPtrs = colPtrs[:8] + ["..."] + colPtrs[-8:]
+
+        values = ", ".join(values)
+        rowIndices = ", ".join([str(ind) for ind in rowIndices])
+        colPtrs = ", ".join([str(ptr) for ptr in colPtrs])
+        return "SparseMatrix({0}, {1}, [{2}], [{3}], [{4}], {5})".format(
+            self.numRows, self.numCols, colPtrs, rowIndices,
+            values, self.isTransposed)
+
+    def __reduce__(self):
+        return SparseMatrix, (
+            self.numRows, self.numCols, self.colPtrs.tostring(),
+            self.rowIndices.tostring(), self.values.tostring(),
+            int(self.isTransposed))
+
+    def __getitem__(self, indices):
+        i, j = indices
+        if i < 0 or i >= self.numRows:
+            raise ValueError("Row index %d is out of range [0, %d)"
+                             % (i, self.numRows))
+        if j < 0 or j >= self.numCols:
+            raise ValueError("Column index %d is out of range [0, %d)"
+                             % (j, self.numCols))
+
+        # If a CSR matrix is given, then the row index should be searched
+        # for in ColPtrs, and the column index should be searched for in the
+        # corresponding slice obtained from rowIndices.
+        if self.isTransposed:
+            j, i = i, j
+
+        colStart = self.colPtrs[j]
+        colEnd = self.colPtrs[j + 1]
+        nz = self.rowIndices[colStart: colEnd]
+        ind = np.searchsorted(nz, i) + colStart
+        if ind < colEnd and self.rowIndices[ind] == i:
+            return self.values[ind]
+        else:
+            return 0.0
+
+    def toArray(self):
+        """
+        Return an numpy.ndarray
+        """
+        A = np.zeros((self.numRows, self.numCols), dtype=np.float64, order='F')
+        for k in xrange(self.colPtrs.size - 1):
+            startptr = self.colPtrs[k]
+            endptr = self.colPtrs[k + 1]
+            if self.isTransposed:
+                A[k, self.rowIndices[startptr:endptr]] = self.values[startptr:endptr]
+            else:
+                A[self.rowIndices[startptr:endptr], k] = self.values[startptr:endptr]
+        return A
+
+    def toDense(self):
+        densevals = np.ravel(self.toArray(), order='F')
+        return DenseMatrix(self.numRows, self.numCols, densevals)
+
+    # TODO: More efficient implementation:
+    def __eq__(self, other):
+        return np.all(self.toArray() == other.toArray())
+
+
+class Matrices(object):
+    @staticmethod
+    def dense(numRows, numCols, values):
+        """
+        Create a DenseMatrix
+        """
+        return DenseMatrix(numRows, numCols, values)
+
+    @staticmethod
+    def sparse(numRows, numCols, colPtrs, rowIndices, values):
+        """
+        Create a SparseMatrix
+        """
+        return SparseMatrix(numRows, numCols, colPtrs, rowIndices, values)
+
+
+class QRDecomposition(object):
+    """
+    .. note:: Experimental
+
+    Represents QR factors.
+    """
+    def __init__(self, Q, R):
+        self._Q = Q
+        self._R = R
+
+    @property
+    @since('2.0.0')
+    def Q(self):
+        """
+        An orthogonal matrix Q in a QR decomposition.
+        May be null if not computed.
+        """
+        return self._Q
+
+    @property
+    @since('2.0.0')
+    def R(self):
+        """
+        An upper triangular matrix R in a QR decomposition.
+        """
+        return self._R
+
+
+def _test():
+    import doctest
+    (failure_count, test_count) = doctest.testmod(optionflags=doctest.ELLIPSIS)
+    if failure_count:
+        exit(-1)
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
index d9513ca5b273d..ade4864e1d785 100644
--- a/python/pyspark/ml/param/__init__.py
+++ b/python/pyspark/ml/param/__init__.py
@@ -29,8 +29,8 @@
 from py4j.java_gateway import JavaObject
 
 from pyspark import since
+from pyspark.ml.linalg import DenseVector, Vector
 from pyspark.ml.util import Identifiable
-from pyspark.mllib.linalg import DenseVector, Vector
 
 
 __all__ = ['Param', 'Params', 'TypeConverters']
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index a2300fa49c0bf..1b838e5653e22 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -51,7 +51,7 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
      - L1 (Lasso)
      - L2 + L1 (elastic net)
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sqlContext.createDataFrame([
     ...     (1.0, 2.0, Vectors.dense(1.0)),
     ...     (0.0, 2.0, Vectors.sparse(1, [], []))], ["label", "weight", "features"])
@@ -407,7 +407,7 @@ class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
     Currently implemented using parallelized pool adjacent violators algorithm.
     Only univariate (single feature) algorithm supported.
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sqlContext.createDataFrame([
     ...     (1.0, Vectors.dense(1.0)),
     ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
@@ -635,7 +635,7 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
     learning algorithm for regression.
     It supports both continuous and categorical features.
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sqlContext.createDataFrame([
     ...     (1.0, Vectors.dense(1.0)),
     ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
@@ -791,7 +791,7 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
     It supports both continuous and categorical features.
 
     >>> from numpy import allclose
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sqlContext.createDataFrame([
     ...     (1.0, Vectors.dense(1.0)),
     ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
@@ -899,7 +899,7 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
     It supports both continuous and categorical features.
 
     >>> from numpy import allclose
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sqlContext.createDataFrame([
     ...     (1.0, Vectors.dense(1.0)),
     ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
@@ -1024,7 +1024,7 @@ class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
 
     .. seealso:: `AFT Model <https://en.wikipedia.org/wiki/Accelerated_failure_time_model>`_
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sqlContext.createDataFrame([
     ...     (1.0, Vectors.dense(1.0), 1.0),
     ...     (0.0, Vectors.sparse(1, [], []), 0.0)], ["label", "features", "censor"])
@@ -1217,7 +1217,7 @@ class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, Ha
 
     .. seealso:: `GLM <https://en.wikipedia.org/wiki/Generalized_linear_model>`_
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> df = sqlContext.createDataFrame([
     ...     (1.0, Vectors.dense(0.0, 0.0)),
     ...     (1.0, Vectors.dense(1.0, 2.0)),
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 49d3a4a332fd1..e399a6cbb2040 100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -18,7 +18,6 @@
 """
 Unit tests for Spark ML Python APIs.
 """
-import array
 import sys
 if sys.version > '3':
     xrange = range
@@ -40,15 +39,21 @@
 
 from shutil import rmtree
 import tempfile
+import array as pyarray
 import numpy as np
+from numpy import (
+    array, array_equal, zeros, inf, random, exp, dot, all, mean, abs, arange, tile, ones)
+from numpy import sum as array_sum
 import inspect
 
-from pyspark import keyword_only
+from pyspark import keyword_only, SparkContext
 from pyspark.ml import Estimator, Model, Pipeline, PipelineModel, Transformer
 from pyspark.ml.classification import *
 from pyspark.ml.clustering import *
 from pyspark.ml.evaluation import BinaryClassificationEvaluator, RegressionEvaluator
 from pyspark.ml.feature import *
+from pyspark.ml.linalg import Vector, SparseVector, DenseVector, VectorUDT,\
+    DenseMatrix, SparseMatrix, Vectors, Matrices, MatrixUDT
 from pyspark.ml.param import Param, Params, TypeConverters
 from pyspark.ml.param.shared import HasMaxIter, HasInputCol, HasSeed
 from pyspark.ml.recommendation import ALS
@@ -56,13 +61,27 @@
 from pyspark.ml.tuning import *
 from pyspark.ml.wrapper import JavaParams
 from pyspark.mllib.common import _java2py
-from pyspark.mllib.linalg import Vectors, DenseVector, SparseVector
+from pyspark.mllib.linalg import SparseVector as OldSparseVector, DenseVector as OldDenseVector,\
+    DenseMatrix as OldDenseMatrix, SparseMatrix as OldSparseMatrix
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.serializers import PickleSerializer
 from pyspark.sql import DataFrame, Row, SparkSession
 from pyspark.sql.functions import rand
 from pyspark.sql.utils import IllegalArgumentException
 from pyspark.storagelevel import *
 from pyspark.tests import ReusedPySparkTestCase as PySparkTestCase
 
+ser = PickleSerializer()
+
+
+class MLlibTestCase(unittest.TestCase):
+    def setUp(self):
+        self.sc = SparkContext('local[4]', "MLlib tests")
+        self.spark = SparkSession(self.sc)
+
+    def tearDown(self):
+        self.spark.stop()
+
 
 class SparkSessionTestCase(PySparkTestCase):
     @classmethod
@@ -149,7 +168,7 @@ def test_vector(self):
     def test_list(self):
         l = [0, 1]
         for lst_like in [l, np.array(l), DenseVector(l), SparseVector(len(l), range(len(l)), l),
-                         array.array('l', l), xrange(2), tuple(l)]:
+                         pyarray.array('l', l), xrange(2), tuple(l)]:
             converted = TypeConverters.toList(lst_like)
             self.assertEqual(type(converted), list)
             self.assertListEqual(converted, l)
@@ -157,7 +176,7 @@ def test_list(self):
     def test_list_int(self):
         for indices in [[1.0, 2.0], np.array([1.0, 2.0]), DenseVector([1.0, 2.0]),
                         SparseVector(2, {0: 1.0, 1: 2.0}), xrange(1, 3), (1.0, 2.0),
-                        array.array('d', [1.0, 2.0])]:
+                        pyarray.array('d', [1.0, 2.0])]:
             vs = VectorSlicer(indices=indices)
             self.assertListEqual(vs.getIndices(), [1, 2])
             self.assertTrue(all([type(v) == int for v in vs.getIndices()]))
@@ -872,7 +891,6 @@ def test_persistence(self):
 class TrainingSummaryTest(SparkSessionTestCase):
 
     def test_linear_regression_summary(self):
-        from pyspark.mllib.linalg import Vectors
         df = self.spark.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)),
                                          (0.0, 2.0, Vectors.sparse(1, [], []))],
                                         ["label", "weight", "features"])
@@ -910,7 +928,6 @@ def test_linear_regression_summary(self):
         self.assertAlmostEqual(sameSummary.explainedVariance, s.explainedVariance)
 
     def test_logistic_regression_summary(self):
-        from pyspark.mllib.linalg import Vectors
         df = self.spark.createDataFrame([(1.0, 2.0, Vectors.dense(1.0)),
                                          (0.0, 2.0, Vectors.sparse(1, [], []))],
                                         ["label", "weight", "features"])
@@ -1072,6 +1089,369 @@ def test_java_params(self):
                     self.check_params(cls())
 
 
+def _squared_distance(a, b):
+    if isinstance(a, Vector):
+        return a.squared_distance(b)
+    else:
+        return b.squared_distance(a)
+
+
+class VectorTests(MLlibTestCase):
+
+    def _test_serialize(self, v):
+        self.assertEqual(v, ser.loads(ser.dumps(v)))
+        jvec = self.sc._jvm.SerDe.loads(bytearray(ser.dumps(v)))
+        nv = ser.loads(bytes(self.sc._jvm.SerDe.dumps(jvec)))
+        self.assertEqual(v, nv)
+        vs = [v] * 100
+        jvecs = self.sc._jvm.SerDe.loads(bytearray(ser.dumps(vs)))
+        nvs = ser.loads(bytes(self.sc._jvm.SerDe.dumps(jvecs)))
+        self.assertEqual(vs, nvs)
+
+    def test_serialize(self):
+        self._test_serialize(DenseVector(range(10)))
+        self._test_serialize(DenseVector(array([1., 2., 3., 4.])))
+        self._test_serialize(DenseVector(pyarray.array('d', range(10))))
+        self._test_serialize(SparseVector(4, {1: 1, 3: 2}))
+        self._test_serialize(SparseVector(3, {}))
+        self._test_serialize(DenseMatrix(2, 3, range(6)))
+        sm1 = SparseMatrix(
+            3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0])
+        self._test_serialize(sm1)
+
+    def test_dot(self):
+        sv = SparseVector(4, {1: 1, 3: 2})
+        dv = DenseVector(array([1., 2., 3., 4.]))
+        lst = DenseVector([1, 2, 3, 4])
+        mat = array([[1., 2., 3., 4.],
+                     [1., 2., 3., 4.],
+                     [1., 2., 3., 4.],
+                     [1., 2., 3., 4.]])
+        arr = pyarray.array('d', [0, 1, 2, 3])
+        self.assertEqual(10.0, sv.dot(dv))
+        self.assertTrue(array_equal(array([3., 6., 9., 12.]), sv.dot(mat)))
+        self.assertEqual(30.0, dv.dot(dv))
+        self.assertTrue(array_equal(array([10., 20., 30., 40.]), dv.dot(mat)))
+        self.assertEqual(30.0, lst.dot(dv))
+        self.assertTrue(array_equal(array([10., 20., 30., 40.]), lst.dot(mat)))
+        self.assertEqual(7.0, sv.dot(arr))
+
+    def test_squared_distance(self):
+        sv = SparseVector(4, {1: 1, 3: 2})
+        dv = DenseVector(array([1., 2., 3., 4.]))
+        lst = DenseVector([4, 3, 2, 1])
+        lst1 = [4, 3, 2, 1]
+        arr = pyarray.array('d', [0, 2, 1, 3])
+        narr = array([0, 2, 1, 3])
+        self.assertEqual(15.0, _squared_distance(sv, dv))
+        self.assertEqual(25.0, _squared_distance(sv, lst))
+        self.assertEqual(20.0, _squared_distance(dv, lst))
+        self.assertEqual(15.0, _squared_distance(dv, sv))
+        self.assertEqual(25.0, _squared_distance(lst, sv))
+        self.assertEqual(20.0, _squared_distance(lst, dv))
+        self.assertEqual(0.0, _squared_distance(sv, sv))
+        self.assertEqual(0.0, _squared_distance(dv, dv))
+        self.assertEqual(0.0, _squared_distance(lst, lst))
+        self.assertEqual(25.0, _squared_distance(sv, lst1))
+        self.assertEqual(3.0, _squared_distance(sv, arr))
+        self.assertEqual(3.0, _squared_distance(sv, narr))
+
+    def test_hash(self):
+        v1 = DenseVector([0.0, 1.0, 0.0, 5.5])
+        v2 = SparseVector(4, [(1, 1.0), (3, 5.5)])
+        v3 = DenseVector([0.0, 1.0, 0.0, 5.5])
+        v4 = SparseVector(4, [(1, 1.0), (3, 2.5)])
+        self.assertEqual(hash(v1), hash(v2))
+        self.assertEqual(hash(v1), hash(v3))
+        self.assertEqual(hash(v2), hash(v3))
+        self.assertFalse(hash(v1) == hash(v4))
+        self.assertFalse(hash(v2) == hash(v4))
+
+    def test_eq(self):
+        v1 = DenseVector([0.0, 1.0, 0.0, 5.5])
+        v2 = SparseVector(4, [(1, 1.0), (3, 5.5)])
+        v3 = DenseVector([0.0, 1.0, 0.0, 5.5])
+        v4 = SparseVector(6, [(1, 1.0), (3, 5.5)])
+        v5 = DenseVector([0.0, 1.0, 0.0, 2.5])
+        v6 = SparseVector(4, [(1, 1.0), (3, 2.5)])
+        self.assertEqual(v1, v2)
+        self.assertEqual(v1, v3)
+        self.assertFalse(v2 == v4)
+        self.assertFalse(v1 == v5)
+        self.assertFalse(v1 == v6)
+
+    def test_equals(self):
+        indices = [1, 2, 4]
+        values = [1., 3., 2.]
+        self.assertTrue(Vectors._equals(indices, values, list(range(5)), [0., 1., 3., 0., 2.]))
+        self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 3., 1., 0., 2.]))
+        self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 3., 0., 2.]))
+        self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 1., 3., 2., 2.]))
+
+    def test_conversion(self):
+        # numpy arrays should be automatically upcast to float64
+        # tests for fix of [SPARK-5089]
+        v = array([1, 2, 3, 4], dtype='float64')
+        dv = DenseVector(v)
+        self.assertTrue(dv.array.dtype == 'float64')
+        v = array([1, 2, 3, 4], dtype='float32')
+        dv = DenseVector(v)
+        self.assertTrue(dv.array.dtype == 'float64')
+
+    def test_sparse_vector_indexing(self):
+        sv = SparseVector(5, {1: 1, 3: 2})
+        self.assertEqual(sv[0], 0.)
+        self.assertEqual(sv[3], 2.)
+        self.assertEqual(sv[1], 1.)
+        self.assertEqual(sv[2], 0.)
+        self.assertEqual(sv[4], 0.)
+        self.assertEqual(sv[-1], 0.)
+        self.assertEqual(sv[-2], 2.)
+        self.assertEqual(sv[-3], 0.)
+        self.assertEqual(sv[-5], 0.)
+        for ind in [5, -6]:
+            self.assertRaises(ValueError, sv.__getitem__, ind)
+        for ind in [7.8, '1']:
+            self.assertRaises(TypeError, sv.__getitem__, ind)
+
+        zeros = SparseVector(4, {})
+        self.assertEqual(zeros[0], 0.0)
+        self.assertEqual(zeros[3], 0.0)
+        for ind in [4, -5]:
+            self.assertRaises(ValueError, zeros.__getitem__, ind)
+
+        empty = SparseVector(0, {})
+        for ind in [-1, 0, 1]:
+            self.assertRaises(ValueError, empty.__getitem__, ind)
+
+    def test_matrix_indexing(self):
+        mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
+        expected = [[0, 6], [1, 8], [4, 10]]
+        for i in range(3):
+            for j in range(2):
+                self.assertEqual(mat[i, j], expected[i][j])
+
+    def test_repr_dense_matrix(self):
+        mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
+        self.assertTrue(
+            repr(mat),
+            'DenseMatrix(3, 2, [0.0, 1.0, 4.0, 6.0, 8.0, 10.0], False)')
+
+        mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10], True)
+        self.assertTrue(
+            repr(mat),
+            'DenseMatrix(3, 2, [0.0, 1.0, 4.0, 6.0, 8.0, 10.0], False)')
+
+        mat = DenseMatrix(6, 3, zeros(18))
+        self.assertTrue(
+            repr(mat),
+            'DenseMatrix(6, 3, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..., \
+                0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], False)')
+
+    def test_repr_sparse_matrix(self):
+        sm1t = SparseMatrix(
+            3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0],
+            isTransposed=True)
+        self.assertTrue(
+            repr(sm1t),
+            'SparseMatrix(3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0], True)')
+
+        indices = tile(arange(6), 3)
+        values = ones(18)
+        sm = SparseMatrix(6, 3, [0, 6, 12, 18], indices, values)
+        self.assertTrue(
+            repr(sm), "SparseMatrix(6, 3, [0, 6, 12, 18], \
+                [0, 1, 2, 3, 4, 5, 0, 1, ..., 4, 5, 0, 1, 2, 3, 4, 5], \
+                [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ..., \
+                1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], False)")
+
+        self.assertTrue(
+            str(sm),
+            "6 X 3 CSCMatrix\n\
+            (0,0) 1.0\n(1,0) 1.0\n(2,0) 1.0\n(3,0) 1.0\n(4,0) 1.0\n(5,0) 1.0\n\
+            (0,1) 1.0\n(1,1) 1.0\n(2,1) 1.0\n(3,1) 1.0\n(4,1) 1.0\n(5,1) 1.0\n\
+            (0,2) 1.0\n(1,2) 1.0\n(2,2) 1.0\n(3,2) 1.0\n..\n..")
+
+        sm = SparseMatrix(1, 18, zeros(19), [], [])
+        self.assertTrue(
+            repr(sm),
+            'SparseMatrix(1, 18, \
+                [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0], [], [], False)')
+
+    def test_sparse_matrix(self):
+        # Test sparse matrix creation.
+        sm1 = SparseMatrix(
+            3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0])
+        self.assertEqual(sm1.numRows, 3)
+        self.assertEqual(sm1.numCols, 4)
+        self.assertEqual(sm1.colPtrs.tolist(), [0, 2, 2, 4, 4])
+        self.assertEqual(sm1.rowIndices.tolist(), [1, 2, 1, 2])
+        self.assertEqual(sm1.values.tolist(), [1.0, 2.0, 4.0, 5.0])
+        self.assertTrue(
+            repr(sm1),
+            'SparseMatrix(3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0], False)')
+
+        # Test indexing
+        expected = [
+            [0, 0, 0, 0],
+            [1, 0, 4, 0],
+            [2, 0, 5, 0]]
+
+        for i in range(3):
+            for j in range(4):
+                self.assertEqual(expected[i][j], sm1[i, j])
+        self.assertTrue(array_equal(sm1.toArray(), expected))
+
+        # Test conversion to dense and sparse.
+        smnew = sm1.toDense().toSparse()
+        self.assertEqual(sm1.numRows, smnew.numRows)
+        self.assertEqual(sm1.numCols, smnew.numCols)
+        self.assertTrue(array_equal(sm1.colPtrs, smnew.colPtrs))
+        self.assertTrue(array_equal(sm1.rowIndices, smnew.rowIndices))
+        self.assertTrue(array_equal(sm1.values, smnew.values))
+
+        sm1t = SparseMatrix(
+            3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0],
+            isTransposed=True)
+        self.assertEqual(sm1t.numRows, 3)
+        self.assertEqual(sm1t.numCols, 4)
+        self.assertEqual(sm1t.colPtrs.tolist(), [0, 2, 3, 5])
+        self.assertEqual(sm1t.rowIndices.tolist(), [0, 1, 2, 0, 2])
+        self.assertEqual(sm1t.values.tolist(), [3.0, 2.0, 4.0, 9.0, 8.0])
+
+        expected = [
+            [3, 2, 0, 0],
+            [0, 0, 4, 0],
+            [9, 0, 8, 0]]
+
+        for i in range(3):
+            for j in range(4):
+                self.assertEqual(expected[i][j], sm1t[i, j])
+        self.assertTrue(array_equal(sm1t.toArray(), expected))
+
+    def test_dense_matrix_is_transposed(self):
+        mat1 = DenseMatrix(3, 2, [0, 4, 1, 6, 3, 9], isTransposed=True)
+        mat = DenseMatrix(3, 2, [0, 1, 3, 4, 6, 9])
+        self.assertEqual(mat1, mat)
+
+        expected = [[0, 4], [1, 6], [3, 9]]
+        for i in range(3):
+            for j in range(2):
+                self.assertEqual(mat1[i, j], expected[i][j])
+        self.assertTrue(array_equal(mat1.toArray(), expected))
+
+        sm = mat1.toSparse()
+        self.assertTrue(array_equal(sm.rowIndices, [1, 2, 0, 1, 2]))
+        self.assertTrue(array_equal(sm.colPtrs, [0, 2, 5]))
+        self.assertTrue(array_equal(sm.values, [1, 3, 4, 6, 9]))
+
+    def test_parse_vector(self):
+        a = DenseVector([])
+        self.assertEqual(str(a), '[]')
+        self.assertEqual(Vectors.parse(str(a)), a)
+        a = DenseVector([3, 4, 6, 7])
+        self.assertEqual(str(a), '[3.0,4.0,6.0,7.0]')
+        self.assertEqual(Vectors.parse(str(a)), a)
+        a = SparseVector(4, [], [])
+        self.assertEqual(str(a), '(4,[],[])')
+        self.assertEqual(SparseVector.parse(str(a)), a)
+        a = SparseVector(4, [0, 2], [3, 4])
+        self.assertEqual(str(a), '(4,[0,2],[3.0,4.0])')
+        self.assertEqual(Vectors.parse(str(a)), a)
+        a = SparseVector(10, [0, 1], [4, 5])
+        self.assertEqual(SparseVector.parse(' (10, [0,1 ],[ 4.0,5.0] )'), a)
+
+    def test_norms(self):
+        a = DenseVector([0, 2, 3, -1])
+        self.assertAlmostEqual(a.norm(2), 3.742, 3)
+        self.assertTrue(a.norm(1), 6)
+        self.assertTrue(a.norm(inf), 3)
+        a = SparseVector(4, [0, 2], [3, -4])
+        self.assertAlmostEqual(a.norm(2), 5)
+        self.assertTrue(a.norm(1), 7)
+        self.assertTrue(a.norm(inf), 4)
+
+        tmp = SparseVector(4, [0, 2], [3, 0])
+        self.assertEqual(tmp.numNonzeros(), 1)
+
+
+class VectorUDTTests(MLlibTestCase):
+
+    dv0 = DenseVector([])
+    dv1 = DenseVector([1.0, 2.0])
+    sv0 = SparseVector(2, [], [])
+    sv1 = SparseVector(2, [1], [2.0])
+    udt = VectorUDT()
+
+    def test_json_schema(self):
+        self.assertEqual(VectorUDT.fromJson(self.udt.jsonValue()), self.udt)
+
+    def test_serialization(self):
+        for v in [self.dv0, self.dv1, self.sv0, self.sv1]:
+            self.assertEqual(v, self.udt.deserialize(self.udt.serialize(v)))
+
+    def test_infer_schema(self):
+        rdd = self.sc.parallelize([LabeledPoint(1.0, self.dv1), LabeledPoint(0.0, self.sv1)])
+        df = rdd.toDF()
+        schema = df.schema
+        field = [f for f in schema.fields if f.name == "features"][0]
+        self.assertEqual(field.dataType, self.udt)
+        vectors = df.rdd.map(lambda p: p.features).collect()
+        self.assertEqual(len(vectors), 2)
+        for v in vectors:
+            if isinstance(v, SparseVector):
+                self.assertEqual(v, self.sv1)
+            elif isinstance(v, DenseVector):
+                self.assertEqual(v, self.dv1)
+            # Following two conditions are for compatibility, because Scala mllib codes still
+            # use mllib.linalg Vector.
+            # todo: remove this when mllib.linalg Vector is removed.
+            elif isinstance(v, OldSparseVector):
+                self.assertTrue(array_equal(v.toArray(), self.sv1.toArray()))
+            elif isinstance(v, OldDenseVector):
+                self.assertTrue(array_equal(v.toArray(), self.dv1.toArray()))
+            else:
+                raise TypeError("expecting a vector but got %r of type %r" % (v, type(v)))
+
+
+class MatrixUDTTests(MLlibTestCase):
+
+    dm1 = DenseMatrix(3, 2, [0, 1, 4, 5, 9, 10])
+    dm2 = DenseMatrix(3, 2, [0, 1, 4, 5, 9, 10], isTransposed=True)
+    sm1 = SparseMatrix(1, 1, [0, 1], [0], [2.0])
+    sm2 = SparseMatrix(2, 1, [0, 0, 1], [0], [5.0], isTransposed=True)
+    udt = MatrixUDT()
+
+    def test_json_schema(self):
+        self.assertEqual(MatrixUDT.fromJson(self.udt.jsonValue()), self.udt)
+
+    def test_serialization(self):
+        for m in [self.dm1, self.dm2, self.sm1, self.sm2]:
+            self.assertEqual(m, self.udt.deserialize(self.udt.serialize(m)))
+
+    def test_infer_schema(self):
+        rdd = self.sc.parallelize([("dense", self.dm1), ("sparse", self.sm1)])
+        df = rdd.toDF()
+        schema = df.schema
+        self.assertTrue(schema.fields[1].dataType, self.udt)
+        matrices = df.rdd.map(lambda x: x._2).collect()
+        self.assertEqual(len(matrices), 2)
+        for m in matrices:
+            if isinstance(m, DenseMatrix):
+                self.assertTrue(m, self.dm1)
+            elif isinstance(m, SparseMatrix):
+                self.assertTrue(m, self.sm1)
+            # Following two conditions are for compatibility, because Scala mllib codes still
+            # use mllib.linalg Matrix.
+            # todo: remove this when mllib.linalg Matrix is removed.
+            elif isinstance(m, OldDenseMatrix):
+                self.assertTrue(m, self.dm1)
+            elif isinstance(m, OldSparseMatrix):
+                self.assertTrue(m, self.sm1)
+            else:
+                raise ValueError("Expected a matrix but got type %r" % type(m))
+
+
 if __name__ == "__main__":
     from pyspark.ml.tests import *
     if xmlrunner:
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 0920ae6ea144b..75789c4d09184 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -151,7 +151,7 @@ class CrossValidator(Estimator, ValidatorParams):
 
     >>> from pyspark.ml.classification import LogisticRegression
     >>> from pyspark.ml.evaluation import BinaryClassificationEvaluator
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> dataset = sqlContext.createDataFrame(
     ...     [(Vectors.dense([0.0]), 0.0),
     ...      (Vectors.dense([0.4]), 1.0),
@@ -310,7 +310,7 @@ class TrainValidationSplit(Estimator, ValidatorParams):
 
     >>> from pyspark.ml.classification import LogisticRegression
     >>> from pyspark.ml.evaluation import BinaryClassificationEvaluator
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> dataset = sqlContext.createDataFrame(
     ...     [(Vectors.dense([0.0]), 0.0),
     ...      (Vectors.dense([0.4]), 1.0),
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index fe5b6844bfceb..6dec0c2cff881 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -23,8 +23,8 @@
 
 from pyspark import RDD, since
 from pyspark.streaming import DStream
+from pyspark.ml.linalg import DenseVector, SparseVector, _convert_to_vector
 from pyspark.mllib.common import callMLlibFunc, _py2java, _java2py
-from pyspark.mllib.linalg import DenseVector, SparseVector, _convert_to_vector
 from pyspark.mllib.regression import (
     LabeledPoint, LinearModel, _regression_train_wrapper,
     StreamingLinearAlgorithm)
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index 95f7278dc64ce..2ad56748185fc 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -31,8 +31,8 @@
 
 from pyspark import SparkContext, since
 from pyspark.rdd import RDD, ignore_unicode_prefix
+from pyspark.ml.linalg import SparseVector, _convert_to_vector, DenseVector
 from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc, callJavaFunc, _py2java, _java2py
-from pyspark.mllib.linalg import SparseVector, _convert_to_vector, DenseVector
 from pyspark.mllib.regression import LabeledPoint
 from pyspark.mllib.stat.distribution import MultivariateGaussian
 from pyspark.mllib.util import Saveable, Loader, inherit_doc, JavaLoader, JavaSaveable
@@ -370,7 +370,7 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader):
 
     A clustering model derived from the Gaussian Mixture Model method.
 
-    >>> from pyspark.mllib.linalg import Vectors, DenseMatrix
+    >>> from pyspark.ml.linalg import Vectors, DenseMatrix
     >>> from numpy.testing import assert_equal
     >>> from shutil import rmtree
     >>> import os, tempfile
@@ -922,7 +922,7 @@ class LDAModel(JavaModelWrapper, JavaSaveable, Loader):
     - Original LDA paper (journal version):
     Blei, Ng, and Jordan.  "Latent Dirichlet Allocation."  JMLR, 2003.
 
-    >>> from pyspark.mllib.linalg import Vectors
+    >>> from pyspark.ml.linalg import Vectors
     >>> from numpy.testing import assert_almost_equal, assert_equal
     >>> data = [
     ...     [1, Vectors.dense([0.0, 1.0])],
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index 90559f6cfbe43..903dde74ee665 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -32,9 +32,9 @@
 
 from pyspark import since
 from pyspark.rdd import RDD, ignore_unicode_prefix
-from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
-from pyspark.mllib.linalg import (
+from pyspark.ml.linalg import (
     Vector, Vectors, DenseVector, SparseVector, _convert_to_vector)
+from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
 from pyspark.mllib.regression import LabeledPoint
 from pyspark.mllib.util import JavaLoader, JavaSaveable
 
diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py
index af34ce346b0ca..ae3c5c15d4606 100644
--- a/python/pyspark/mllib/linalg/distributed.py
+++ b/python/pyspark/mllib/linalg/distributed.py
@@ -27,8 +27,8 @@
 from py4j.java_gateway import JavaObject
 
 from pyspark import RDD, since
+from pyspark.ml.linalg import _convert_to_vector, Matrix, QRDecomposition
 from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
-from pyspark.mllib.linalg import _convert_to_vector, Matrix, QRDecomposition
 from pyspark.mllib.stat import MultivariateStatisticalSummary
 from pyspark.storagelevel import StorageLevel
 
@@ -1186,7 +1186,7 @@ def _test():
     import doctest
     from pyspark import SparkContext
     from pyspark.sql import SQLContext
-    from pyspark.mllib.linalg import Matrices
+    from pyspark.ml.linalg import Matrices
     import pyspark.mllib.linalg.distributed
     globs = pyspark.mllib.linalg.distributed.__dict__.copy()
     globs['sc'] = SparkContext('local[2]', 'PythonTest', batchSize=2)
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 639c5eabaa23b..ee9c1aa81eac3 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -21,8 +21,8 @@
 
 from pyspark import RDD, since
 from pyspark.streaming.dstream import DStream
+from pyspark.ml.linalg import SparseVector, Vectors, _convert_to_vector
 from pyspark.mllib.common import callMLlibFunc, _py2java, _java2py, inherit_doc
-from pyspark.mllib.linalg import SparseVector, Vectors, _convert_to_vector
 from pyspark.mllib.util import Saveable, Loader
 
 __all__ = ['LabeledPoint', 'LinearModel',
@@ -42,7 +42,7 @@ class LabeledPoint(object):
       Label for this data point.
     :param features:
       Vector of features for this point (NumPy array, list,
-      pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix).
+      pyspark.ml.linalg.SparseVector, or scipy.sparse column matrix).
 
     Note: 'label' and 'features' are accessible as class attributes.
 
diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py
index 36c8f48a4a882..5fbcb9f436e88 100644
--- a/python/pyspark/mllib/stat/_statistics.py
+++ b/python/pyspark/mllib/stat/_statistics.py
@@ -20,8 +20,8 @@
     basestring = str
 
 from pyspark.rdd import RDD, ignore_unicode_prefix
+from pyspark.ml.linalg import Matrix, _convert_to_vector
 from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper
-from pyspark.mllib.linalg import Matrix, _convert_to_vector
 from pyspark.mllib.regression import LabeledPoint
 from pyspark.mllib.stat.test import ChiSqTestResult, KolmogorovSmirnovTestResult
 
@@ -72,7 +72,7 @@ def colStats(rdd):
         :return: :class:`MultivariateStatisticalSummary` object containing
                  column-wise summary statistics.
 
-        >>> from pyspark.mllib.linalg import Vectors
+        >>> from pyspark.ml.linalg import Vectors
         >>> rdd = sc.parallelize([Vectors.dense([2, 0, 0, -2]),
         ...                       Vectors.dense([4, 5, 0,  3]),
         ...                       Vectors.dense([6, 7, 0,  8])])
@@ -124,7 +124,7 @@ def corr(x, y=None, method=None):
         >>> from math import isnan
         >>> isnan(Statistics.corr(x, zeros))
         True
-        >>> from pyspark.mllib.linalg import Vectors
+        >>> from pyspark.ml.linalg import Vectors
         >>> rdd = sc.parallelize([Vectors.dense([1, 0, 0, -2]), Vectors.dense([4, 5, 0, 3]),
         ...                       Vectors.dense([6, 7, 0,  8]), Vectors.dense([9, 0, 0, 1])])
         >>> pearsonCorr = Statistics.corr(rdd)
@@ -190,7 +190,7 @@ def chiSqTest(observed, expected=None):
         :return: ChiSquaredTest object containing the test statistic, degrees
                  of freedom, p-value, the method used, and the null hypothesis.
 
-        >>> from pyspark.mllib.linalg import Vectors, Matrices
+        >>> from pyspark.ml.linalg import Vectors, Matrices
         >>> observed = Vectors.dense([4, 6, 5])
         >>> pearson = Statistics.chiSqTest(observed)
         >>> print(pearson.statistic)
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
index 74cf7bb8eaf9d..8536f9ec28c71 100644
--- a/python/pyspark/mllib/tests.py
+++ b/python/pyspark/mllib/tests.py
@@ -49,10 +49,10 @@
     import unittest
 
 from pyspark import SparkContext
+from pyspark.ml.linalg import Vector, SparseVector, DenseVector,  _convert_to_vector,\
+    Vectors, Matrices
 from pyspark.mllib.common import _to_java_object_rdd
 from pyspark.mllib.clustering import StreamingKMeans, StreamingKMeansModel
-from pyspark.mllib.linalg import Vector, SparseVector, DenseVector, VectorUDT, _convert_to_vector,\
-    DenseMatrix, SparseMatrix, Vectors, Matrices, MatrixUDT
 from pyspark.mllib.classification import StreamingLogisticRegressionWithSGD
 from pyspark.mllib.recommendation import Rating
 from pyspark.mllib.regression import LabeledPoint, StreamingLinearRegressionWithSGD
@@ -145,285 +145,6 @@ def _squared_distance(a, b):
         return b.squared_distance(a)
 
 
-class VectorTests(MLlibTestCase):
-
-    def _test_serialize(self, v):
-        self.assertEqual(v, ser.loads(ser.dumps(v)))
-        jvec = self.sc._jvm.SerDe.loads(bytearray(ser.dumps(v)))
-        nv = ser.loads(bytes(self.sc._jvm.SerDe.dumps(jvec)))
-        self.assertEqual(v, nv)
-        vs = [v] * 100
-        jvecs = self.sc._jvm.SerDe.loads(bytearray(ser.dumps(vs)))
-        nvs = ser.loads(bytes(self.sc._jvm.SerDe.dumps(jvecs)))
-        self.assertEqual(vs, nvs)
-
-    def test_serialize(self):
-        self._test_serialize(DenseVector(range(10)))
-        self._test_serialize(DenseVector(array([1., 2., 3., 4.])))
-        self._test_serialize(DenseVector(pyarray.array('d', range(10))))
-        self._test_serialize(SparseVector(4, {1: 1, 3: 2}))
-        self._test_serialize(SparseVector(3, {}))
-        self._test_serialize(DenseMatrix(2, 3, range(6)))
-        sm1 = SparseMatrix(
-            3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0])
-        self._test_serialize(sm1)
-
-    def test_dot(self):
-        sv = SparseVector(4, {1: 1, 3: 2})
-        dv = DenseVector(array([1., 2., 3., 4.]))
-        lst = DenseVector([1, 2, 3, 4])
-        mat = array([[1., 2., 3., 4.],
-                     [1., 2., 3., 4.],
-                     [1., 2., 3., 4.],
-                     [1., 2., 3., 4.]])
-        arr = pyarray.array('d', [0, 1, 2, 3])
-        self.assertEqual(10.0, sv.dot(dv))
-        self.assertTrue(array_equal(array([3., 6., 9., 12.]), sv.dot(mat)))
-        self.assertEqual(30.0, dv.dot(dv))
-        self.assertTrue(array_equal(array([10., 20., 30., 40.]), dv.dot(mat)))
-        self.assertEqual(30.0, lst.dot(dv))
-        self.assertTrue(array_equal(array([10., 20., 30., 40.]), lst.dot(mat)))
-        self.assertEqual(7.0, sv.dot(arr))
-
-    def test_squared_distance(self):
-        sv = SparseVector(4, {1: 1, 3: 2})
-        dv = DenseVector(array([1., 2., 3., 4.]))
-        lst = DenseVector([4, 3, 2, 1])
-        lst1 = [4, 3, 2, 1]
-        arr = pyarray.array('d', [0, 2, 1, 3])
-        narr = array([0, 2, 1, 3])
-        self.assertEqual(15.0, _squared_distance(sv, dv))
-        self.assertEqual(25.0, _squared_distance(sv, lst))
-        self.assertEqual(20.0, _squared_distance(dv, lst))
-        self.assertEqual(15.0, _squared_distance(dv, sv))
-        self.assertEqual(25.0, _squared_distance(lst, sv))
-        self.assertEqual(20.0, _squared_distance(lst, dv))
-        self.assertEqual(0.0, _squared_distance(sv, sv))
-        self.assertEqual(0.0, _squared_distance(dv, dv))
-        self.assertEqual(0.0, _squared_distance(lst, lst))
-        self.assertEqual(25.0, _squared_distance(sv, lst1))
-        self.assertEqual(3.0, _squared_distance(sv, arr))
-        self.assertEqual(3.0, _squared_distance(sv, narr))
-
-    def test_hash(self):
-        v1 = DenseVector([0.0, 1.0, 0.0, 5.5])
-        v2 = SparseVector(4, [(1, 1.0), (3, 5.5)])
-        v3 = DenseVector([0.0, 1.0, 0.0, 5.5])
-        v4 = SparseVector(4, [(1, 1.0), (3, 2.5)])
-        self.assertEqual(hash(v1), hash(v2))
-        self.assertEqual(hash(v1), hash(v3))
-        self.assertEqual(hash(v2), hash(v3))
-        self.assertFalse(hash(v1) == hash(v4))
-        self.assertFalse(hash(v2) == hash(v4))
-
-    def test_eq(self):
-        v1 = DenseVector([0.0, 1.0, 0.0, 5.5])
-        v2 = SparseVector(4, [(1, 1.0), (3, 5.5)])
-        v3 = DenseVector([0.0, 1.0, 0.0, 5.5])
-        v4 = SparseVector(6, [(1, 1.0), (3, 5.5)])
-        v5 = DenseVector([0.0, 1.0, 0.0, 2.5])
-        v6 = SparseVector(4, [(1, 1.0), (3, 2.5)])
-        self.assertEqual(v1, v2)
-        self.assertEqual(v1, v3)
-        self.assertFalse(v2 == v4)
-        self.assertFalse(v1 == v5)
-        self.assertFalse(v1 == v6)
-
-    def test_equals(self):
-        indices = [1, 2, 4]
-        values = [1., 3., 2.]
-        self.assertTrue(Vectors._equals(indices, values, list(range(5)), [0., 1., 3., 0., 2.]))
-        self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 3., 1., 0., 2.]))
-        self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 3., 0., 2.]))
-        self.assertFalse(Vectors._equals(indices, values, list(range(5)), [0., 1., 3., 2., 2.]))
-
-    def test_conversion(self):
-        # numpy arrays should be automatically upcast to float64
-        # tests for fix of [SPARK-5089]
-        v = array([1, 2, 3, 4], dtype='float64')
-        dv = DenseVector(v)
-        self.assertTrue(dv.array.dtype == 'float64')
-        v = array([1, 2, 3, 4], dtype='float32')
-        dv = DenseVector(v)
-        self.assertTrue(dv.array.dtype == 'float64')
-
-    def test_sparse_vector_indexing(self):
-        sv = SparseVector(5, {1: 1, 3: 2})
-        self.assertEqual(sv[0], 0.)
-        self.assertEqual(sv[3], 2.)
-        self.assertEqual(sv[1], 1.)
-        self.assertEqual(sv[2], 0.)
-        self.assertEqual(sv[4], 0.)
-        self.assertEqual(sv[-1], 0.)
-        self.assertEqual(sv[-2], 2.)
-        self.assertEqual(sv[-3], 0.)
-        self.assertEqual(sv[-5], 0.)
-        for ind in [5, -6]:
-            self.assertRaises(ValueError, sv.__getitem__, ind)
-        for ind in [7.8, '1']:
-            self.assertRaises(TypeError, sv.__getitem__, ind)
-
-        zeros = SparseVector(4, {})
-        self.assertEqual(zeros[0], 0.0)
-        self.assertEqual(zeros[3], 0.0)
-        for ind in [4, -5]:
-            self.assertRaises(ValueError, zeros.__getitem__, ind)
-
-        empty = SparseVector(0, {})
-        for ind in [-1, 0, 1]:
-            self.assertRaises(ValueError, empty.__getitem__, ind)
-
-    def test_matrix_indexing(self):
-        mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
-        expected = [[0, 6], [1, 8], [4, 10]]
-        for i in range(3):
-            for j in range(2):
-                self.assertEqual(mat[i, j], expected[i][j])
-
-    def test_repr_dense_matrix(self):
-        mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10])
-        self.assertTrue(
-            repr(mat),
-            'DenseMatrix(3, 2, [0.0, 1.0, 4.0, 6.0, 8.0, 10.0], False)')
-
-        mat = DenseMatrix(3, 2, [0, 1, 4, 6, 8, 10], True)
-        self.assertTrue(
-            repr(mat),
-            'DenseMatrix(3, 2, [0.0, 1.0, 4.0, 6.0, 8.0, 10.0], False)')
-
-        mat = DenseMatrix(6, 3, zeros(18))
-        self.assertTrue(
-            repr(mat),
-            'DenseMatrix(6, 3, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..., \
-                0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], False)')
-
-    def test_repr_sparse_matrix(self):
-        sm1t = SparseMatrix(
-            3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0],
-            isTransposed=True)
-        self.assertTrue(
-            repr(sm1t),
-            'SparseMatrix(3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0], True)')
-
-        indices = tile(arange(6), 3)
-        values = ones(18)
-        sm = SparseMatrix(6, 3, [0, 6, 12, 18], indices, values)
-        self.assertTrue(
-            repr(sm), "SparseMatrix(6, 3, [0, 6, 12, 18], \
-                [0, 1, 2, 3, 4, 5, 0, 1, ..., 4, 5, 0, 1, 2, 3, 4, 5], \
-                [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ..., \
-                1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], False)")
-
-        self.assertTrue(
-            str(sm),
-            "6 X 3 CSCMatrix\n\
-            (0,0) 1.0\n(1,0) 1.0\n(2,0) 1.0\n(3,0) 1.0\n(4,0) 1.0\n(5,0) 1.0\n\
-            (0,1) 1.0\n(1,1) 1.0\n(2,1) 1.0\n(3,1) 1.0\n(4,1) 1.0\n(5,1) 1.0\n\
-            (0,2) 1.0\n(1,2) 1.0\n(2,2) 1.0\n(3,2) 1.0\n..\n..")
-
-        sm = SparseMatrix(1, 18, zeros(19), [], [])
-        self.assertTrue(
-            repr(sm),
-            'SparseMatrix(1, 18, \
-                [0, 0, 0, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0], [], [], False)')
-
-    def test_sparse_matrix(self):
-        # Test sparse matrix creation.
-        sm1 = SparseMatrix(
-            3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0])
-        self.assertEqual(sm1.numRows, 3)
-        self.assertEqual(sm1.numCols, 4)
-        self.assertEqual(sm1.colPtrs.tolist(), [0, 2, 2, 4, 4])
-        self.assertEqual(sm1.rowIndices.tolist(), [1, 2, 1, 2])
-        self.assertEqual(sm1.values.tolist(), [1.0, 2.0, 4.0, 5.0])
-        self.assertTrue(
-            repr(sm1),
-            'SparseMatrix(3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0], False)')
-
-        # Test indexing
-        expected = [
-            [0, 0, 0, 0],
-            [1, 0, 4, 0],
-            [2, 0, 5, 0]]
-
-        for i in range(3):
-            for j in range(4):
-                self.assertEqual(expected[i][j], sm1[i, j])
-        self.assertTrue(array_equal(sm1.toArray(), expected))
-
-        # Test conversion to dense and sparse.
-        smnew = sm1.toDense().toSparse()
-        self.assertEqual(sm1.numRows, smnew.numRows)
-        self.assertEqual(sm1.numCols, smnew.numCols)
-        self.assertTrue(array_equal(sm1.colPtrs, smnew.colPtrs))
-        self.assertTrue(array_equal(sm1.rowIndices, smnew.rowIndices))
-        self.assertTrue(array_equal(sm1.values, smnew.values))
-
-        sm1t = SparseMatrix(
-            3, 4, [0, 2, 3, 5], [0, 1, 2, 0, 2], [3.0, 2.0, 4.0, 9.0, 8.0],
-            isTransposed=True)
-        self.assertEqual(sm1t.numRows, 3)
-        self.assertEqual(sm1t.numCols, 4)
-        self.assertEqual(sm1t.colPtrs.tolist(), [0, 2, 3, 5])
-        self.assertEqual(sm1t.rowIndices.tolist(), [0, 1, 2, 0, 2])
-        self.assertEqual(sm1t.values.tolist(), [3.0, 2.0, 4.0, 9.0, 8.0])
-
-        expected = [
-            [3, 2, 0, 0],
-            [0, 0, 4, 0],
-            [9, 0, 8, 0]]
-
-        for i in range(3):
-            for j in range(4):
-                self.assertEqual(expected[i][j], sm1t[i, j])
-        self.assertTrue(array_equal(sm1t.toArray(), expected))
-
-    def test_dense_matrix_is_transposed(self):
-        mat1 = DenseMatrix(3, 2, [0, 4, 1, 6, 3, 9], isTransposed=True)
-        mat = DenseMatrix(3, 2, [0, 1, 3, 4, 6, 9])
-        self.assertEqual(mat1, mat)
-
-        expected = [[0, 4], [1, 6], [3, 9]]
-        for i in range(3):
-            for j in range(2):
-                self.assertEqual(mat1[i, j], expected[i][j])
-        self.assertTrue(array_equal(mat1.toArray(), expected))
-
-        sm = mat1.toSparse()
-        self.assertTrue(array_equal(sm.rowIndices, [1, 2, 0, 1, 2]))
-        self.assertTrue(array_equal(sm.colPtrs, [0, 2, 5]))
-        self.assertTrue(array_equal(sm.values, [1, 3, 4, 6, 9]))
-
-    def test_parse_vector(self):
-        a = DenseVector([])
-        self.assertEqual(str(a), '[]')
-        self.assertEqual(Vectors.parse(str(a)), a)
-        a = DenseVector([3, 4, 6, 7])
-        self.assertEqual(str(a), '[3.0,4.0,6.0,7.0]')
-        self.assertEqual(Vectors.parse(str(a)), a)
-        a = SparseVector(4, [], [])
-        self.assertEqual(str(a), '(4,[],[])')
-        self.assertEqual(SparseVector.parse(str(a)), a)
-        a = SparseVector(4, [0, 2], [3, 4])
-        self.assertEqual(str(a), '(4,[0,2],[3.0,4.0])')
-        self.assertEqual(Vectors.parse(str(a)), a)
-        a = SparseVector(10, [0, 1], [4, 5])
-        self.assertEqual(SparseVector.parse(' (10, [0,1 ],[ 4.0,5.0] )'), a)
-
-    def test_norms(self):
-        a = DenseVector([0, 2, 3, -1])
-        self.assertAlmostEqual(a.norm(2), 3.742, 3)
-        self.assertTrue(a.norm(1), 6)
-        self.assertTrue(a.norm(inf), 3)
-        a = SparseVector(4, [0, 2], [3, -4])
-        self.assertAlmostEqual(a.norm(2), 5)
-        self.assertTrue(a.norm(1), 7)
-        self.assertTrue(a.norm(inf), 4)
-
-        tmp = SparseVector(4, [0, 2], [3, 0])
-        self.assertEqual(tmp.numNonzeros(), 1)
-
-
 class ListTests(MLlibTestCase):
 
     """
@@ -684,69 +405,6 @@ def test_col_norms(self):
         self.assertTrue(math.fabs(summary2.normL2()[0] - expectedNormL2) < 1e-14)
 
 
-class VectorUDTTests(MLlibTestCase):
-
-    dv0 = DenseVector([])
-    dv1 = DenseVector([1.0, 2.0])
-    sv0 = SparseVector(2, [], [])
-    sv1 = SparseVector(2, [1], [2.0])
-    udt = VectorUDT()
-
-    def test_json_schema(self):
-        self.assertEqual(VectorUDT.fromJson(self.udt.jsonValue()), self.udt)
-
-    def test_serialization(self):
-        for v in [self.dv0, self.dv1, self.sv0, self.sv1]:
-            self.assertEqual(v, self.udt.deserialize(self.udt.serialize(v)))
-
-    def test_infer_schema(self):
-        rdd = self.sc.parallelize([LabeledPoint(1.0, self.dv1), LabeledPoint(0.0, self.sv1)])
-        df = rdd.toDF()
-        schema = df.schema
-        field = [f for f in schema.fields if f.name == "features"][0]
-        self.assertEqual(field.dataType, self.udt)
-        vectors = df.rdd.map(lambda p: p.features).collect()
-        self.assertEqual(len(vectors), 2)
-        for v in vectors:
-            if isinstance(v, SparseVector):
-                self.assertEqual(v, self.sv1)
-            elif isinstance(v, DenseVector):
-                self.assertEqual(v, self.dv1)
-            else:
-                raise TypeError("expecting a vector but got %r of type %r" % (v, type(v)))
-
-
-class MatrixUDTTests(MLlibTestCase):
-
-    dm1 = DenseMatrix(3, 2, [0, 1, 4, 5, 9, 10])
-    dm2 = DenseMatrix(3, 2, [0, 1, 4, 5, 9, 10], isTransposed=True)
-    sm1 = SparseMatrix(1, 1, [0, 1], [0], [2.0])
-    sm2 = SparseMatrix(2, 1, [0, 0, 1], [0], [5.0], isTransposed=True)
-    udt = MatrixUDT()
-
-    def test_json_schema(self):
-        self.assertEqual(MatrixUDT.fromJson(self.udt.jsonValue()), self.udt)
-
-    def test_serialization(self):
-        for m in [self.dm1, self.dm2, self.sm1, self.sm2]:
-            self.assertEqual(m, self.udt.deserialize(self.udt.serialize(m)))
-
-    def test_infer_schema(self):
-        rdd = self.sc.parallelize([("dense", self.dm1), ("sparse", self.sm1)])
-        df = rdd.toDF()
-        schema = df.schema
-        self.assertTrue(schema.fields[1].dataType, self.udt)
-        matrices = df.rdd.map(lambda x: x._2).collect()
-        self.assertEqual(len(matrices), 2)
-        for m in matrices:
-            if isinstance(m, DenseMatrix):
-                self.assertTrue(m, self.dm1)
-            elif isinstance(m, SparseMatrix):
-                self.assertTrue(m, self.sm1)
-            else:
-                raise ValueError("Expected a matrix but got type %r" % type(m))
-
-
 @unittest.skipIf(not _have_scipy, "SciPy not installed")
 class SciPyTests(MLlibTestCase):
 
diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py
index f7ea466b43291..48980a803c592 100644
--- a/python/pyspark/mllib/tree.py
+++ b/python/pyspark/mllib/tree.py
@@ -20,8 +20,8 @@
 import random
 
 from pyspark import SparkContext, RDD, since
+from pyspark.ml.linalg import _convert_to_vector
 from pyspark.mllib.common import callMLlibFunc, inherit_doc, JavaModelWrapper
-from pyspark.mllib.linalg import _convert_to_vector
 from pyspark.mllib.regression import LabeledPoint
 from pyspark.mllib.util import JavaLoader, JavaSaveable
 
@@ -258,7 +258,7 @@ def trainRegressor(cls, data, categoricalFeaturesInfo,
 
         >>> from pyspark.mllib.regression import LabeledPoint
         >>> from pyspark.mllib.tree import DecisionTree
-        >>> from pyspark.mllib.linalg import SparseVector
+        >>> from pyspark.ml.linalg import SparseVector
         >>>
         >>> sparse_data = [
         ...     LabeledPoint(0.0, SparseVector(2, {0: 0.0})),
@@ -457,7 +457,7 @@ def trainRegressor(cls, data, categoricalFeaturesInfo, numTrees, featureSubsetSt
 
         >>> from pyspark.mllib.regression import LabeledPoint
         >>> from pyspark.mllib.tree import RandomForest
-        >>> from pyspark.mllib.linalg import SparseVector
+        >>> from pyspark.ml.linalg import SparseVector
         >>>
         >>> sparse_data = [
         ...     LabeledPoint(0.0, SparseVector(2, {0: 1.0})),
@@ -627,7 +627,7 @@ def trainRegressor(cls, data, categoricalFeaturesInfo,
 
         >>> from pyspark.mllib.regression import LabeledPoint
         >>> from pyspark.mllib.tree import GradientBoostedTrees
-        >>> from pyspark.mllib.linalg import SparseVector
+        >>> from pyspark.ml.linalg import SparseVector
         >>>
         >>> sparse_data = [
         ...     LabeledPoint(0.0, SparseVector(2, {0: 1.0})),
diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py
index 39bc6586dd582..1f4348f7b1bd6 100644
--- a/python/pyspark/mllib/util.py
+++ b/python/pyspark/mllib/util.py
@@ -24,8 +24,8 @@
     basestring = str
 
 from pyspark import SparkContext, since
+from pyspark.ml.linalg import Vectors, SparseVector, _convert_to_vector
 from pyspark.mllib.common import callMLlibFunc, inherit_doc
-from pyspark.mllib.linalg import Vectors, SparseVector, _convert_to_vector
 
 
 class MLUtils(object):