From 0abf742d09f62b8d35099f595791e649271267f1 Mon Sep 17 00:00:00 2001
From: Marc-Antoine Schmidt <marc-antoine.schmidt@quantco.com>
Date: Fri, 10 Jul 2020 16:56:28 -0400
Subject: [PATCH 1/7] working prototype

---
 src/quantcore/matrix/__init__.py |  2 ++
 src/quantcore/matrix/pandas.py   | 39 ++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)
 create mode 100644 src/quantcore/matrix/pandas.py

diff --git a/src/quantcore/matrix/__init__.py b/src/quantcore/matrix/__init__.py
index 210c7a44..8ef5b54c 100644
--- a/src/quantcore/matrix/__init__.py
+++ b/src/quantcore/matrix/__init__.py
@@ -1,6 +1,7 @@
 from .categorical_matrix import CategoricalMatrix
 from .dense_matrix import DenseMatrix
 from .matrix_base import MatrixBase, one_over_var_inf_to_val
+from .pandas import from_pandas
 from .sparse_matrix import SparseMatrix
 from .split_matrix import SplitMatrix, csc_to_split
 from .standardized_mat import StandardizedMatrix
@@ -14,4 +15,5 @@
     "CategoricalMatrix",
     "csc_to_split",
     "one_over_var_inf_to_val",
+    "from_pandas",
 ]
diff --git a/src/quantcore/matrix/pandas.py b/src/quantcore/matrix/pandas.py
new file mode 100644
index 00000000..cd77624e
--- /dev/null
+++ b/src/quantcore/matrix/pandas.py
@@ -0,0 +1,39 @@
+import warnings
+
+import pandas as pd
+import scipy.sparse as sps
+
+from .categorical_matrix import CategoricalMatrix
+from .matrix_base import MatrixBase
+from .split_matrix import SplitMatrix, csc_to_split
+
+
+def from_pandas(
+    df: pd.DataFrame,
+    sparse_threshold: float = 0.1,
+    cat_threshold: int = 4,
+    object_as_cat: bool = False,
+) -> MatrixBase:
+    """
+    TODO:
+     - docstring
+     - tests
+     - efficiency
+     - consider changing filename
+    """
+    if object_as_cat:
+        for colname in df.select_dtypes("object"):
+            df[colname] = df[colname].astype("category")
+    else:
+        if not df.select_dtypes(include=object).empty:
+            warnings.warn("DataFrame contains columns with object dtypes. Ignoring")
+
+    categorical_component = df.select_dtypes(include=pd.CategoricalDtype)
+    X_cat = []
+    for colname in categorical_component:
+        X_cat.append(CategoricalMatrix(categorical_component[colname]))
+
+    numerical_component = df.select_dtypes(include="number")
+    X_noncat = csc_to_split(sps.csc_matrix(numerical_component))
+
+    return SplitMatrix([*X_noncat.matrices, *X_cat])

From 1d8bee701d7e2b8544e61019f4f7cb91ea4cacd9 Mon Sep 17 00:00:00 2001
From: Marc-Antoine Schmidt <marc-antoine.schmidt@quantco.com>
Date: Mon, 13 Jul 2020 14:24:40 -0400
Subject: [PATCH 2/7] more efficient implementation + docstring

---
 src/quantcore/matrix/pandas.py | 95 ++++++++++++++++++++++++++++------
 1 file changed, 78 insertions(+), 17 deletions(-)

diff --git a/src/quantcore/matrix/pandas.py b/src/quantcore/matrix/pandas.py
index cd77624e..81b6d9ec 100644
--- a/src/quantcore/matrix/pandas.py
+++ b/src/quantcore/matrix/pandas.py
@@ -1,39 +1,100 @@
 import warnings
 
+import numpy as np
 import pandas as pd
-import scipy.sparse as sps
+from pandas.api.types import is_numeric_dtype
 
 from .categorical_matrix import CategoricalMatrix
+from .dense_matrix import DenseMatrix
 from .matrix_base import MatrixBase
-from .split_matrix import SplitMatrix, csc_to_split
+from .sparse_matrix import SparseMatrix
+from .split_matrix import SplitMatrix
 
 
 def from_pandas(
     df: pd.DataFrame,
+    dtype: np.dtype = np.float64,
     sparse_threshold: float = 0.1,
     cat_threshold: int = 4,
     object_as_cat: bool = False,
 ) -> MatrixBase:
     """
-    TODO:
-     - docstring
-     - tests
-     - efficiency
-     - consider changing filename
+    Transform a pandas.DataFrame into an efficient SplitMatrix
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        pandas DataFrame to be converted.
+    dtype : np.dtype, default np.float64
+        dtype of all sub-matrices of the resulting SplitMatrix.
+    sparse_threshold : float, default 0.1
+        Density threshold below which numerical columns will be stored in a sparse
+        format.
+    cat_threshold : int, default 4
+        Number of levels of a categorical column under which the column will be stored
+        as sparse one-hot-encoded columns instead of CategoricalMatrix
+    object_as_cat : bool, default False
+        If True, DataFrame columns stored as python objects will be treated as
+        categorical columns.
+
+    Returns
+    -------
+    SplitMatrix
     """
     if object_as_cat:
         for colname in df.select_dtypes("object"):
             df[colname] = df[colname].astype("category")
-    else:
-        if not df.select_dtypes(include=object).empty:
-            warnings.warn("DataFrame contains columns with object dtypes. Ignoring")
 
-    categorical_component = df.select_dtypes(include=pd.CategoricalDtype)
-    X_cat = []
-    for colname in categorical_component:
-        X_cat.append(CategoricalMatrix(categorical_component[colname]))
+    matrices = []
+    sparse_ohe_comp = []
+    sparse_idx = []
+    dense_idx = []
+    ignored_cols = []
+    for colidx, (colname, coldata) in enumerate(df.iteritems()):
+        # categorical
+        if isinstance(coldata.dtype, pd.CategoricalDtype):
+            if len(coldata.cat.categories) < cat_threshold:
+                sparse_ohe_comp.append(
+                    pd.get_dummies(coldata, prefix=colname, sparse=True)
+                )
+            else:
+                matrices.append(CategoricalMatrix(coldata, dtype=dtype))
+
+        # sparse data, keep in sparse format even if density is larger than threshold
+        elif isinstance(coldata.dtype, pd.SparseDtype):
+            sparse_idx.append(colidx)
 
-    numerical_component = df.select_dtypes(include="number")
-    X_noncat = csc_to_split(sps.csc_matrix(numerical_component))
+        # All other numerical dtypes (needs to be after pd.SparseDtype)
+        elif is_numeric_dtype(coldata):
+            # check if we want to store as sparse
+            if (coldata != 0).mean() <= sparse_threshold:
+                sparse_dtype = pd.SparseDtype(coldata.dtype, fill_value=0)
+                df.iloc[:, colidx] = df.iloc[:, colidx].astype(sparse_dtype)
+                sparse_idx.append(colidx)
+            else:
+                dense_idx.append(colidx)
 
-    return SplitMatrix([*X_noncat.matrices, *X_cat])
+        # dtype not handled yet
+        else:
+            ignored_cols.append((colidx, colname))
+
+    if len(ignored_cols) > 0:
+        warnings.warn(
+            f"Columns {ignored_cols} were ignored. Make sure they have a valid dtype."
+        )
+    if len(dense_idx) > 0:
+        dense_comp = DenseMatrix(df.iloc[:, dense_idx].astype(dtype))
+        matrices.append(dense_comp)
+    if len(sparse_idx) > 0:
+        sparse_comp = SparseMatrix(df.iloc[:, sparse_idx].sparse.to_coo(), dtype=dtype)
+        matrices.append(sparse_comp)
+    if len(sparse_ohe_comp) > 0:
+        sparse_ohe_comp = SparseMatrix(
+            pd.concat(sparse_ohe_comp, axis=1).sparse.to_coo(), dtype=dtype
+        )
+        matrices.append(sparse_ohe_comp)
+
+    if len(matrices) > 1:
+        return SplitMatrix(matrices)
+    else:
+        return matrices[0]

From 875f466a0c30ba388e0e00bdb4e985b1a810d787 Mon Sep 17 00:00:00 2001
From: Marc-Antoine Schmidt <marc-antoine.schmidt@quantco.com>
Date: Thu, 16 Jul 2020 10:28:07 -0400
Subject: [PATCH 3/7] added simple test

---
 src/quantcore/matrix/__init__.py              |  2 +-
 .../matrix/{pandas.py => constructor.py}      |  0
 tests/test_matrices.py                        | 38 +++++++++++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)
 rename src/quantcore/matrix/{pandas.py => constructor.py} (100%)

diff --git a/src/quantcore/matrix/__init__.py b/src/quantcore/matrix/__init__.py
index 8ef5b54c..34887106 100644
--- a/src/quantcore/matrix/__init__.py
+++ b/src/quantcore/matrix/__init__.py
@@ -1,7 +1,7 @@
 from .categorical_matrix import CategoricalMatrix
+from .constructor import from_pandas
 from .dense_matrix import DenseMatrix
 from .matrix_base import MatrixBase, one_over_var_inf_to_val
-from .pandas import from_pandas
 from .sparse_matrix import SparseMatrix
 from .split_matrix import SplitMatrix, csc_to_split
 from .standardized_mat import StandardizedMatrix
diff --git a/src/quantcore/matrix/pandas.py b/src/quantcore/matrix/constructor.py
similarity index 100%
rename from src/quantcore/matrix/pandas.py
rename to src/quantcore/matrix/constructor.py
diff --git a/tests/test_matrices.py b/tests/test_matrices.py
index 4b3310f2..f4ebed6e 100644
--- a/tests/test_matrices.py
+++ b/tests/test_matrices.py
@@ -2,6 +2,7 @@
 from typing import List, Optional, Union
 
 import numpy as np
+import pandas as pd
 import pytest
 from scipy import sparse as sps
 
@@ -407,3 +408,40 @@ def test_indexing_range_row(mat: Union[mx.MatrixBase, mx.StandardizedMatrix]):
         res = res.A
     expected = mat.A[0:2, :]
     np.testing.assert_allclose(np.squeeze(res), expected)
+
+
+def test_pandas_to_matrix():
+    n_rows = 10
+    dense_column = np.linspace(-10, 10, num=n_rows, dtype=np.float64)
+    sparse_column = np.zeros(n_rows, dtype=np.float64)
+    sparse_column[::10] = 1.0
+    cat_column_lowdim = np.tile(["a", "b"], n_rows // 2)
+    cat_column_highdim = np.arange(n_rows)
+
+    dense_ser = pd.Series(dense_column)
+    sparse_ser = pd.Series(sparse_column, dtype=pd.SparseDtype("float", 0.0))
+    cat_ser_lowdim = pd.Categorical(cat_column_lowdim)
+    cat_ser_highdim = pd.Categorical(cat_column_highdim)
+
+    df = pd.DataFrame(
+        data={
+            "d": dense_ser,
+            "s": sparse_ser,
+            "cl": cat_ser_lowdim,
+            "ch": cat_ser_highdim,
+        }
+    )
+
+    mat = mx.from_pandas(df, sparse_threshold=0.3, cat_threshold=4)
+
+    assert mat.shape == (n_rows, 14)
+    assert len(mat.matrices) == 3
+    assert isinstance(mat, mx.SplitMatrix)
+
+    nb_col_by_type = {
+        mx.DenseMatrix: 1,
+        mx.SparseMatrix: 3,  # sparse column + low dimensional categorical
+        mx.CategoricalMatrix: n_rows,
+    }
+    for submat in mat.matrices:
+        assert submat.shape[1] == nb_col_by_type[type(submat)]

From bcc840a66621f299b5be7a16821929a5f8b85ba9 Mon Sep 17 00:00:00 2001
From: Marc-Antoine Schmidt <marc-antoine.schmidt@quantco.com>
Date: Thu, 16 Jul 2020 18:03:30 -0400
Subject: [PATCH 4/7] keep ordering

---
 src/quantcore/matrix/constructor.py | 79 ++++++++++++++++++-----------
 1 file changed, 50 insertions(+), 29 deletions(-)

diff --git a/src/quantcore/matrix/constructor.py b/src/quantcore/matrix/constructor.py
index 81b6d9ec..f1bae7fc 100644
--- a/src/quantcore/matrix/constructor.py
+++ b/src/quantcore/matrix/constructor.py
@@ -1,4 +1,5 @@
 import warnings
+from typing import List, Union
 
 import numpy as np
 import pandas as pd
@@ -8,7 +9,7 @@
 from .dense_matrix import DenseMatrix
 from .matrix_base import MatrixBase
 from .sparse_matrix import SparseMatrix
-from .split_matrix import SplitMatrix
+from .split_matrix import SplitMatrix, split_sparse_and_dense_parts
 
 
 def from_pandas(
@@ -45,56 +46,76 @@ def from_pandas(
         for colname in df.select_dtypes("object"):
             df[colname] = df[colname].astype("category")
 
-    matrices = []
-    sparse_ohe_comp = []
-    sparse_idx = []
-    dense_idx = []
+    matrices: List[Union[DenseMatrix, SparseMatrix, CategoricalMatrix]] = []
+    indices: List[List[int]] = []
+
+    dense_dfidx = []  # column index in original DataFrame
+    dense_mxidx = []  # index in the new SplitMatrix
+    sparse_dfidx = []  # column index in original DataFrame
+    sparse_mxidx = []  # index in the new SplitMatrix
     ignored_cols = []
-    for colidx, (colname, coldata) in enumerate(df.iteritems()):
+
+    mxcolidx = 0
+
+    for dfcolidx, (colname, coldata) in enumerate(df.iteritems()):
         # categorical
         if isinstance(coldata.dtype, pd.CategoricalDtype):
             if len(coldata.cat.categories) < cat_threshold:
-                sparse_ohe_comp.append(
-                    pd.get_dummies(coldata, prefix=colname, sparse=True)
+                (
+                    X_dense_F,
+                    X_sparse,
+                    dense_indices,
+                    sparse_indices,
+                ) = split_sparse_and_dense_parts(
+                    pd.get_dummies(coldata, prefix=colname, sparse=True),
+                    threshold=sparse_threshold,
                 )
+                matrices.append(X_dense_F)
+                indices.append(mxcolidx + dense_indices)
+                matrices.append(X_sparse)
+                indices.append(mxcolidx + sparse_indices)
+                mxcolidx += len(dense_indices) + len(sparse_indices)
             else:
-                matrices.append(CategoricalMatrix(coldata, dtype=dtype))
-
-        # sparse data, keep in sparse format even if density is larger than threshold
-        elif isinstance(coldata.dtype, pd.SparseDtype):
-            sparse_idx.append(colidx)
+                cat = CategoricalMatrix(coldata, dtype=dtype)
+                matrices.append(cat)
+                indices.append(mxcolidx + np.arange(cat.shape[1]))
+                mxcolidx += cat.shape[1]
 
         # All other numerical dtypes (needs to be after pd.SparseDtype)
         elif is_numeric_dtype(coldata):
             # check if we want to store as sparse
             if (coldata != 0).mean() <= sparse_threshold:
-                sparse_dtype = pd.SparseDtype(coldata.dtype, fill_value=0)
-                df.iloc[:, colidx] = df.iloc[:, colidx].astype(sparse_dtype)
-                sparse_idx.append(colidx)
+                if not isinstance(coldata.dtype, pd.SparseDtype):
+                    sparse_dtype = pd.SparseDtype(coldata.dtype, fill_value=0)
+                    df.iloc[:, dfcolidx] = coldata.astype(sparse_dtype)
+                sparse_dfidx.append(dfcolidx)
+                sparse_mxidx.append(mxcolidx)
+                mxcolidx += 1
             else:
-                dense_idx.append(colidx)
+                dense_dfidx.append(dfcolidx)
+                dense_mxidx.append(mxcolidx)
+                mxcolidx += 1
 
         # dtype not handled yet
         else:
-            ignored_cols.append((colidx, colname))
+            ignored_cols.append((dfcolidx, colname))
 
     if len(ignored_cols) > 0:
         warnings.warn(
             f"Columns {ignored_cols} were ignored. Make sure they have a valid dtype."
         )
-    if len(dense_idx) > 0:
-        dense_comp = DenseMatrix(df.iloc[:, dense_idx].astype(dtype))
-        matrices.append(dense_comp)
-    if len(sparse_idx) > 0:
-        sparse_comp = SparseMatrix(df.iloc[:, sparse_idx].sparse.to_coo(), dtype=dtype)
-        matrices.append(sparse_comp)
-    if len(sparse_ohe_comp) > 0:
-        sparse_ohe_comp = SparseMatrix(
-            pd.concat(sparse_ohe_comp, axis=1).sparse.to_coo(), dtype=dtype
+    if len(dense_dfidx) > 0:
+        matrices.append(DenseMatrix(df.iloc[:, dense_dfidx].astype(dtype)))
+        indices.append(dense_mxidx)
+    if len(sparse_dfidx) > 0:
+        matrices.append(
+            SparseMatrix(df.iloc[:, sparse_dfidx].sparse.to_coo(), dtype=dtype)
         )
-        matrices.append(sparse_ohe_comp)
+        indices.append(sparse_mxidx)
 
     if len(matrices) > 1:
-        return SplitMatrix(matrices)
+        return SplitMatrix(matrices, indices)
+    elif len(matrices) == 0:
+        raise ValueError("DataFrame contained no valid column")
     else:
         return matrices[0]

From ad8e9e326f3445825db35e7b09acd2930ffd0b68 Mon Sep 17 00:00:00 2001
From: Marc-Antoine Schmidt <marc-antoine.schmidt@quantco.com>
Date: Thu, 16 Jul 2020 18:17:08 -0400
Subject: [PATCH 5/7] fix test

---
 src/quantcore/matrix/constructor.py |  6 +++++-
 tests/test_matrices.py              | 12 ++++++------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/quantcore/matrix/constructor.py b/src/quantcore/matrix/constructor.py
index f1bae7fc..3f2ac9d2 100644
--- a/src/quantcore/matrix/constructor.py
+++ b/src/quantcore/matrix/constructor.py
@@ -67,7 +67,11 @@ def from_pandas(
                     dense_indices,
                     sparse_indices,
                 ) = split_sparse_and_dense_parts(
-                    pd.get_dummies(coldata, prefix=colname, sparse=True),
+                    pd.get_dummies(
+                        coldata, prefix=colname, sparse=True, dtype=np.float64
+                    )
+                    .sparse.to_coo()
+                    .tocsc(),
                     threshold=sparse_threshold,
                 )
                 matrices.append(X_dense_F)
diff --git a/tests/test_matrices.py b/tests/test_matrices.py
index e2c18c1a..99e61aa2 100644
--- a/tests/test_matrices.py
+++ b/tests/test_matrices.py
@@ -426,10 +426,10 @@ def test_indexing_range_row(mat: Union[mx.MatrixBase, mx.StandardizedMatrix]):
 
 
 def test_pandas_to_matrix():
-    n_rows = 10
+    n_rows = 50
     dense_column = np.linspace(-10, 10, num=n_rows, dtype=np.float64)
     sparse_column = np.zeros(n_rows, dtype=np.float64)
-    sparse_column[::10] = 1.0
+    sparse_column[0] = 1.0
     cat_column_lowdim = np.tile(["a", "b"], n_rows // 2)
     cat_column_highdim = np.arange(n_rows)
 
@@ -447,15 +447,15 @@ def test_pandas_to_matrix():
         }
     )
 
-    mat = mx.from_pandas(df, sparse_threshold=0.3, cat_threshold=4)
+    mat = mx.from_pandas(df, dtype=np.float64, sparse_threshold=0.3, cat_threshold=4)
 
-    assert mat.shape == (n_rows, 14)
+    assert mat.shape == (n_rows, n_rows + 4)
     assert len(mat.matrices) == 3
     assert isinstance(mat, mx.SplitMatrix)
 
     nb_col_by_type = {
-        mx.DenseMatrix: 1,
-        mx.SparseMatrix: 3,  # sparse column + low dimensional categorical
+        mx.DenseMatrix: 3,  # includes low-dimension categorical
+        mx.SparseMatrix: 1,  # sparse column
         mx.CategoricalMatrix: n_rows,
     }
     for submat in mat.matrices:

From b658a686154cec79171f95c1cb0baacff2166e09 Mon Sep 17 00:00:00 2001
From: Marc-Antoine Schmidt <marc-antoine.schmidt@quantco.com>
Date: Thu, 16 Jul 2020 18:46:03 -0400
Subject: [PATCH 6/7] let user choose categorical location

---
 src/quantcore/matrix/constructor.py | 41 ++++++++++++++++++++++++-----
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/src/quantcore/matrix/constructor.py b/src/quantcore/matrix/constructor.py
index 3f2ac9d2..a8d3006b 100644
--- a/src/quantcore/matrix/constructor.py
+++ b/src/quantcore/matrix/constructor.py
@@ -18,6 +18,7 @@ def from_pandas(
     sparse_threshold: float = 0.1,
     cat_threshold: int = 4,
     object_as_cat: bool = False,
+    cat_position: str = "expand",
 ) -> MatrixBase:
     """
     Transform a pandas.DataFrame into an efficient SplitMatrix
@@ -37,6 +38,11 @@ def from_pandas(
     object_as_cat : bool, default False
         If True, DataFrame columns stored as python objects will be treated as
         categorical columns.
+    cat_position : str {'end'|'expand'}, default 'expand'
+        Position of the categorical variable in the index. If "last", all the
+        categoricals (including the ones that did not satisfy cat_threshold)
+        will be placed at the end of the index list. If "expand", all the variables
+        will remain in the same order.
 
     Returns
     -------
@@ -48,6 +54,7 @@ def from_pandas(
 
     matrices: List[Union[DenseMatrix, SparseMatrix, CategoricalMatrix]] = []
     indices: List[List[int]] = []
+    is_cat: List[bool] = []
 
     dense_dfidx = []  # column index in original DataFrame
     dense_mxidx = []  # index in the new SplitMatrix
@@ -75,16 +82,26 @@ def from_pandas(
                     threshold=sparse_threshold,
                 )
                 matrices.append(X_dense_F)
-                indices.append(mxcolidx + dense_indices)
+                is_cat.append(True)
                 matrices.append(X_sparse)
-                indices.append(mxcolidx + sparse_indices)
-                mxcolidx += len(dense_indices) + len(sparse_indices)
+                is_cat.append(True)
+                if cat_position == "expand":
+                    indices.append(mxcolidx + dense_indices)
+                    indices.append(mxcolidx + sparse_indices)
+                    mxcolidx += len(dense_indices) + len(sparse_indices)
+                elif cat_position == "end":
+                    indices.append(dense_indices)
+                    indices.append(sparse_indices)
+
             else:
                 cat = CategoricalMatrix(coldata, dtype=dtype)
                 matrices.append(cat)
-                indices.append(mxcolidx + np.arange(cat.shape[1]))
-                mxcolidx += cat.shape[1]
-
+                is_cat.append(True)
+                if cat_position == "expand":
+                    indices.append(mxcolidx + np.arange(cat.shape[1]))
+                    mxcolidx += cat.shape[1]
+                elif cat_position == "end":
+                    indices.append(np.arange(cat.shape[1]))
         # All other numerical dtypes (needs to be after pd.SparseDtype)
         elif is_numeric_dtype(coldata):
             # check if we want to store as sparse
@@ -111,11 +128,23 @@ def from_pandas(
     if len(dense_dfidx) > 0:
         matrices.append(DenseMatrix(df.iloc[:, dense_dfidx].astype(dtype)))
         indices.append(dense_mxidx)
+        is_cat.append(False)
     if len(sparse_dfidx) > 0:
         matrices.append(
             SparseMatrix(df.iloc[:, sparse_dfidx].sparse.to_coo(), dtype=dtype)
         )
         indices.append(sparse_mxidx)
+        is_cat.append(False)
+
+    if cat_position == "end":
+        new_indices = []
+        for mat_indices, is_cat_ in zip(indices, is_cat):
+            if is_cat:
+                new_indices.append(np.asarray(mat_indices) + mxcolidx)
+                mxcolidx += len(mat_indices)
+            else:
+                new_indices.append(mat_indices)
+        indices = new_indices
 
     if len(matrices) > 1:
         return SplitMatrix(matrices, indices)

From 29586bf328d7ef852cf15d94f6be43d0e5fe9f32 Mon Sep 17 00:00:00 2001
From: Marc-Antoine Schmidt <marc-antoine.schmidt@quantco.com>
Date: Thu, 16 Jul 2020 18:51:40 -0400
Subject: [PATCH 7/7] typo

---
 src/quantcore/matrix/constructor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/quantcore/matrix/constructor.py b/src/quantcore/matrix/constructor.py
index a8d3006b..4a54965c 100644
--- a/src/quantcore/matrix/constructor.py
+++ b/src/quantcore/matrix/constructor.py
@@ -139,7 +139,7 @@ def from_pandas(
     if cat_position == "end":
         new_indices = []
         for mat_indices, is_cat_ in zip(indices, is_cat):
-            if is_cat:
+            if is_cat_:
                 new_indices.append(np.asarray(mat_indices) + mxcolidx)
                 mxcolidx += len(mat_indices)
             else: