Fix drop() method to handle quoted column names consistently (apache#1242)

H0TB0X420 · timsaucer · web-flow · commit 0ec0102000f4 · 2025-09-24T08:42:06.000-04:00
* Fix drop() method to handle quoted column names consistently - Strip quotes from column names in drop() method - Maintains consistency with other DataFrame operations - Both drop('col') and drop('col') now work Fixes apache#1212 * Update drop() method docstring to clarify quote handling - Document that column names are case-sensitive and don't require quotes - Clarify that both quoted and unquoted column names are accepted - Add examples showing both 'col' and 'col' syntax work - Note difference from select() operation behavior * Fix whitespace and documentation errors --------- Co-authored-by: Tim Saucer <timsaucer@gmail.com>
diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
@@ -413,13 +413,30 @@ def select(self, *exprs: Expr | str) -> DataFrame:
     def drop(self, *columns: str) -> DataFrame:
         """Drop arbitrary amount of columns.
 
+        Column names are case-sensitive and do not require double quotes like
+        other operations such as `select`. Leading and trailing double quotes
+        are allowed and will be automatically stripped if present.
+
         Args:
-            columns: Column names to drop from the dataframe.
+            columns: Column names to drop from the dataframe. Both ``column_name``
+                    and ``"column_name"`` are accepted.
 
         Returns:
             DataFrame with those columns removed in the projection.
+
+        Example Usage::
+
+            df.drop('ID_For_Students')      # Works
+            df.drop('"ID_For_Students"')    # Also works (quotes stripped)
         """
-        return DataFrame(self.df.drop(*columns))
+        normalized_columns = []
+        for col in columns:
+            if col.startswith('"') and col.endswith('"'):
+                normalized_columns.append(col.strip('"'))  # Strip double quotes
+            else:
+                normalized_columns.append(col)
+
+        return DataFrame(self.df.drop(*normalized_columns))
 
     def filter(self, *predicates: Expr) -> DataFrame:
         """Return a DataFrame for which ``predicate`` evaluates to ``True``.
diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
@@ -220,6 +220,16 @@ def test_select(df):
     assert result.column(1) == pa.array([1, 2, 3])
 
 
+def test_drop_quoted_columns():
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays([pa.array([1, 2, 3])], names=["ID_For_Students"])
+    df = ctx.create_dataframe([[batch]])
+
+    # Both should work
+    assert df.drop('"ID_For_Students"').schema().names == []
+    assert df.drop("ID_For_Students").schema().names == []
+
+
 def test_select_mixed_expr_string(df):
     df = df.select(column("b"), "a")