apache · betodealmeida · Aug 17, 2022 · Aug 7, 2022 · Aug 7, 2022 · Aug 7, 2022
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -420,7 +420,7 @@ Commits to `master` trigger a rebuild and redeploy of the documentation site. Su
 Make sure your machine meets the [OS dependencies](https://superset.apache.org/docs/installation/installing-superset-from-scratch#os-dependencies) before following these steps.
 You also need to install MySQL or [MariaDB](https://mariadb.com/downloads).
 
-Ensure that you are using Python version 3.8 or 3.9, then proceed with:
+Ensure that you are using Python version 3.8, 3.9 or 3.10, then proceed with:
 
 ```bash
 # Create a virtual environment and activate it (recommended)

diff --git a/Makefile b/Makefile
@@ -15,8 +15,8 @@
 # limitations under the License.
 #
 
-# Python version installed; we need 3.8-3.9
-PYTHON=`command -v python3.9 || command -v python3.8`
+# Python version installed; we need 3.8-3.10
+PYTHON=`command -v python3.10 || command -v python3.9 || command -v python3.8`
 
 .PHONY: install superset venv pre-commit
 
@@ -70,7 +70,7 @@ update-js:
 
 venv:
 	# Create a virtual environment and activate it (recommended)
-	if ! [ -x "${PYTHON}" ]; then echo "You need Python 3.8 or 3.9 installed"; exit 1; fi
+	if ! [ -x "${PYTHON}" ]; then echo "You need Python 3.8, 3.9 or 3.10 installed"; exit 1; fi
 	test -d venv || ${PYTHON} -m venv venv # setup a python3 virtualenv
 	. venv/bin/activate
 

diff --git a/UPDATING.md b/UPDATING.md
@@ -26,6 +26,7 @@ assists people when migrating to a new version.
 
 - [20606](https://github.com/apache/superset/pull/20606): When user clicks on chart title or "Edit chart" button in Dashboard page, Explore opens in the same tab. Clicking while holding cmd/ctrl opens Explore in a new tab. To bring back the old behaviour (always opening Explore in a new tab), flip feature flag `DASHBOARD_EDIT_CHART_IN_NEW_TAB` to `True`.
 - [20799](https://github.com/apache/superset/pull/20799): Presto and Trino engine will now display tracking URL for running queries in SQL Lab. If for some reason you don't want to show the tracking URL (for example, when your data warehouse hasn't enable access for to Presto or Trino UI), update `TRACKING_URL_TRANSFORMER` in `config.py` to return `None`.
+- [21002](https://github.com/apache/superset/pull/21002): Support Python 3.10 and bump pandas 1.4 and pyarrow 6.
 
 ### Breaking Changes
 

diff --git a/requirements/base.txt b/requirements/base.txt
@@ -185,7 +185,7 @@ packaging==21.3
     # via
     #   bleach
     #   deprecation
-pandas==1.3.4
+pandas==1.4.3
     # via apache-superset
 parsedatetime==2.6
     # via apache-superset
@@ -197,7 +197,7 @@ prison==0.2.1
     # via flask-appbuilder
 prompt-toolkit==3.0.28
     # via click-repl
-pyarrow==5.0.0
+pyarrow==6.0.1
     # via apache-superset
 pycparser==2.20
     # via cffi

diff --git a/setup.py b/setup.py
@@ -99,15 +99,15 @@ def get_git_sha() -> str:
         "markdown>=3.0",
         "msgpack>=1.0.0, <1.1",
         "numpy==1.22.1",
-        "pandas>=1.3.0, <1.4",
+        "pandas>=1.4.3, <1.5",
         "parsedatetime",
         "pgsanity",
         "polyline",
         "pyparsing>=3.0.6, <4",
         "python-dateutil",
         "python-dotenv",
         "python-geohash",
-        "pyarrow>=5.0.0, <6.0",
+        "pyarrow>=6.0.1, <7",
         "pyyaml>=5.4",
         "PyJWT>=2.4.0, <3.0",
         "redis",
@@ -182,5 +182,6 @@ def get_git_sha() -> str:
     classifiers=[
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
     ],
 )
diff --git a/superset/examples/bart_lines.py b/superset/examples/bart_lines.py
@@ -34,7 +34,7 @@ def load_bart_lines(only_metadata: bool = False, force: bool = False) -> None:
     table_exists = database.has_table_by_name(tbl_name)
 
     if not only_metadata and (not table_exists or force):
-        content = get_example_data("bart-lines.json.gz")
+        content = get_example_data("bart-lines.json.gz").decode("utf-8")
         df = pd.read_json(content, encoding="latin-1")
         df["path_json"] = df.path.map(json.dumps)
         df["polyline"] = df.path.map(polyline.encode)

diff --git a/superset/examples/birth_names.py b/superset/examples/birth_names.py
@@ -66,7 +66,7 @@ def gen_filter(
 
 
 def load_data(tbl_name: str, database: Database, sample: bool = False) -> None:
-    pdf = pd.read_json(get_example_data("birth_names2.json.gz"))
+    pdf = pd.read_json(get_example_data("birth_names2.json.gz").decode("utf-8"))
     # TODO(bkyryliuk): move load examples data into the pytest fixture
     if database.backend == "presto":
         pdf.ds = pd.to_datetime(pdf.ds, unit="ms")

diff --git a/superset/examples/energy.py b/superset/examples/energy.py
@@ -46,7 +46,7 @@ def load_energy(
     table_exists = database.has_table_by_name(tbl_name)
 
     if not only_metadata and (not table_exists or force):
-        data = get_example_data("energy.json.gz")
+        data = get_example_data("energy.json.gz").decode("utf-8")
         pdf = pd.read_json(data)
         pdf = pdf.head(100) if sample else pdf
         pdf.to_sql(

diff --git a/superset/examples/helpers.py b/superset/examples/helpers.py
@@ -19,7 +19,7 @@
 import os
 import zlib
 from io import BytesIO
-from typing import Any, Dict, List, Set
+from typing import Union, Any, Dict, List, Set
 from urllib import request
 
 from superset import app, db
@@ -75,7 +75,7 @@ def get_slice_json(defaults: Dict[Any, Any], **kwargs: Any) -> str:
 
 def get_example_data(
     filepath: str, is_gzip: bool = True, make_bytes: bool = False
-) -> BytesIO:
+) -> Union[bytes, BytesIO]:
     content = request.urlopen(  # pylint: disable=consider-using-with
         f"{BASE_URL}{filepath}?raw=true"
     ).read()

diff --git a/superset/examples/multiformat_time_series.py b/superset/examples/multiformat_time_series.py
@@ -44,7 +44,7 @@ def load_multiformat_time_series(  # pylint: disable=too-many-locals
     table_exists = database.has_table_by_name(tbl_name)
 
     if not only_metadata and (not table_exists or force):
-        data = get_example_data("multiformat_time_series.json.gz")
+        data = get_example_data("multiformat_time_series.json.gz").decode("utf-8")
         pdf = pd.read_json(data)
         # TODO(bkyryliuk): move load examples data into the pytest fixture
         if database.backend == "presto":

diff --git a/superset/examples/paris.py b/superset/examples/paris.py
@@ -33,7 +33,7 @@ def load_paris_iris_geojson(only_metadata: bool = False, force: bool = False) ->
     table_exists = database.has_table_by_name(tbl_name)
 
     if not only_metadata and (not table_exists or force):
-        data = get_example_data("paris_iris.json.gz")
+        data = get_example_data("paris_iris.json.gz").decode("utf-8")
         df = pd.read_json(data)
         df["features"] = df.features.map(json.dumps)
 

diff --git a/superset/examples/random_time_series.py b/superset/examples/random_time_series.py
@@ -42,7 +42,7 @@ def load_random_time_series_data(
     table_exists = database.has_table_by_name(tbl_name)
 
     if not only_metadata and (not table_exists or force):
-        data = get_example_data("random_time_series.json.gz")
+        data = get_example_data("random_time_series.json.gz").decode("utf-8")
         pdf = pd.read_json(data)
         if database.backend == "presto":
             pdf.ds = pd.to_datetime(pdf.ds, unit="s")

diff --git a/superset/examples/sf_population_polygons.py b/superset/examples/sf_population_polygons.py
@@ -35,7 +35,7 @@ def load_sf_population_polygons(
     table_exists = database.has_table_by_name(tbl_name)
 
     if not only_metadata and (not table_exists or force):
-        data = get_example_data("sf_population.json.gz")
+        data = get_example_data("sf_population.json.gz").decode("utf-8")
         df = pd.read_json(data)
         df["contour"] = df.contour.map(json.dumps)
 

diff --git a/superset/examples/world_bank.py b/superset/examples/world_bank.py
@@ -56,7 +56,7 @@ def load_world_bank_health_n_pop(  # pylint: disable=too-many-locals, too-many-s
     table_exists = database.has_table_by_name(tbl_name)
 
     if not only_metadata and (not table_exists or force):
-        data = get_example_data("countries.json.gz")
+        data = get_example_data("countries.json.gz").decode("utf-8")
         pdf = pd.read_json(data)
         pdf.columns = [col.replace(".", "_") for col in pdf.columns]
         if database.backend == "presto":

diff --git a/superset/result_set.py b/superset/result_set.py
@@ -161,6 +161,9 @@ def __init__(  # pylint: disable=too-many-locals
                         except Exception as ex:  # pylint: disable=broad-except
                             logger.exception(ex)
 
+        if not pa_data:
+            column_names = []
+
         self.table = pa.Table.from_arrays(pa_data, names=column_names)
         self._type_dict: Dict[str, Any] = {}
         try:

diff --git a/superset/viz.py b/superset/viz.py
@@ -2172,14 +2172,14 @@ def get_data(self, df: pd.DataFrame) -> VizData:
             if df is not None and not df.empty:
                 if metric:
                     df = df.sort_values(
-                        utils.get_metric_name(metric), ascending=flt.get("asc")
+                        utils.get_metric_name(metric), ascending=flt.get("asc", False)
                     )
                     data[col] = [
                         {"id": row[0], "text": row[0], "metric": row[1]}
                         for row in df.itertuples(index=False)
                     ]
                 else:
-                    df = df.sort_values(col, ascending=flt.get("asc"))
+                    df = df.sort_values(col, ascending=flt.get("asc", False))
                     data[col] = [
                         {"id": row[0], "text": row[0]}
                         for row in df.itertuples(index=False)

diff --git a/tests/unit_tests/pandas_postprocessing/test_contribution.py b/tests/unit_tests/pandas_postprocessing/test_contribution.py
@@ -74,7 +74,7 @@ def test_contribution():
         rename_columns=["pct_a"],
     )
     assert processed_df.columns.tolist() == ["a", "b", "c", "pct_a"]
-    assert_array_equal(processed_df["a"].tolist(), [1, 3, nan])
-    assert_array_equal(processed_df["b"].tolist(), [1, 9, nan])
-    assert_array_equal(processed_df["c"].tolist(), [nan, nan, nan])
+    assert_array_equal(processed_df["a"].tolist(), [1, 3, 0])
+    assert_array_equal(processed_df["b"].tolist(), [1, 9, 0])
+    assert_array_equal(processed_df["c"].tolist(), [0, 0, 0])
     assert processed_df["pct_a"].tolist() == [0.25, 0.75, 0]