From 6157db9a2bcdafac7725064acd0dbb9198062904 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 28 Nov 2023 15:25:50 +0800 Subject: [PATCH 1/4] [SPARK-46135][PYTHON][DOCS] Fix table format error in ipynb docs --- .../getting_started/quickstart_connect.ipynb | 2 +- .../getting_started/quickstart_df.ipynb | 110 +- .../getting_started/quickstart_ps.ipynb | 1807 ++--------------- 3 files changed, 246 insertions(+), 1673 deletions(-) diff --git a/python/docs/source/getting_started/quickstart_connect.ipynb b/python/docs/source/getting_started/quickstart_connect.ipynb index 15a2ab749d2a..1d994bde907f 100644 --- a/python/docs/source/getting_started/quickstart_connect.ipynb +++ b/python/docs/source/getting_started/quickstart_connect.ipynb @@ -28,7 +28,7 @@ "metadata": {}, "outputs": [], "source": [ - "!$HOME/sbin/start-connect-server.sh --packages org.apache.spark:spark-connect_2.12:$SPARK_VERSION" + "!$HOME/sbin/start-connect-server.sh --packages org.apache.spark:spark-connect_2.13:$SPARK_VERSION" ] }, { diff --git a/python/docs/source/getting_started/quickstart_df.ipynb b/python/docs/source/getting_started/quickstart_df.ipynb index f1c04c8bf118..6b7716dcc422 100644 --- a/python/docs/source/getting_started/quickstart_df.ipynb +++ b/python/docs/source/getting_started/quickstart_df.ipynb @@ -218,27 +218,23 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "
abcde
12.0string12000-01-012000-01-01 12:00:00
23.0string22000-02-012000-01-02 12:00:00
34.0string32000-03-012000-01-03 12:00:00
\n" - ], - "text/plain": [ - "DataFrame[a: bigint, b: double, c: string, d: date, e: timestamp]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "+---+---+-------+----------+-------------------+\n", + "| a| b| c| d| e|\n", + "+---+---+-------+----------+-------------------+\n", + "| 1|2.0|string1|2000-01-01|2000-01-01 12:00:00|\n", + "| 2|3.0|string2|2000-02-01|2000-01-02 12:00:00|\n", + "| 4|5.0|string3|2000-03-01|2000-01-03 12:00:00|\n", + "+---+---+-------+----------+-------------------+\n", + "\n" + ] } ], "source": [ "spark.conf.set('spark.sql.repl.eagerEval.enabled', True)\n", - "df" + "df.show()" ] }, { @@ -424,76 +420,22 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abcde
012.0string12000-01-012000-01-01 12:00:00
123.0string22000-02-012000-01-02 12:00:00
234.0string32000-03-012000-01-03 12:00:00
\n", - "
" - ], - "text/plain": [ - " a b c d e\n", - "0 1 2.0 string1 2000-01-01 2000-01-01 12:00:00\n", - "1 2 3.0 string2 2000-02-01 2000-01-02 12:00:00\n", - "2 3 4.0 string3 2000-03-01 2000-01-03 12:00:00" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "+----+-----+-----+---------+------------+---------------------+\n", + "| | a | b | c | d | e |\n", + "|----+-----+-----+---------+------------+---------------------|\n", + "| 0 | 1 | 2 | string1 | 2000-01-01 | 2000-01-01 12:00:00 |\n", + "| 1 | 2 | 3 | string2 | 2000-02-01 | 2000-01-02 12:00:00 |\n", + "| 2 | 4 | 5 | string3 | 2000-03-01 | 2000-01-03 12:00:00 |\n", + "+----+-----+-----+---------+------------+---------------------+\n" + ] } ], "source": [ - "df.toPandas()" + "from tabulate import tabulate\n", + "print(tabulate(df.toPandas(), headers = 'keys', tablefmt = 'psql'))" ] }, { diff --git a/python/docs/source/getting_started/quickstart_ps.ipynb b/python/docs/source/getting_started/quickstart_ps.ipynb index 02884a00859d..a1febbc40d89 100644 --- a/python/docs/source/getting_started/quickstart_ps.ipynb +++ b/python/docs/source/getting_started/quickstart_ps.ipynb @@ -99,89 +99,21 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
101100one
202200two
303300three
404400four
505500five
606600six
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "10 1 100 one\n", - "20 2 200 two\n", - "30 3 300 three\n", - "40 4 400 four\n", - "50 5 500 five\n", - "60 6 600 six" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " a b c\n", + "10 1 100 one\n", + "20 2 200 two\n", + "30 3 300 three\n", + "40 4 400 four\n", + "50 5 500 five\n", + "60 6 600 six\n" + ] } ], "source": [ - "psdf" + "print(psdf)" ] }, { @@ -237,96 +169,21 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
2013-01-010.912558-0.795645-0.2891150.187606
2013-01-02-0.059703-1.2338970.316625-1.226828
2013-01-030.332871-1.262010-0.434844-0.579920
2013-01-040.924016-1.022019-0.405249-1.036021
2013-01-05-0.772209-1.2280990.0689010.896679
2013-01-061.485582-0.709306-0.202637-0.248766
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "2013-01-01 0.912558 -0.795645 -0.289115 0.187606\n", - "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828\n", - "2013-01-03 0.332871 -1.262010 -0.434844 -0.579920\n", - "2013-01-04 0.924016 -1.022019 -0.405249 -1.036021\n", - "2013-01-05 -0.772209 -1.228099 0.068901 0.896679\n", - "2013-01-06 1.485582 -0.709306 -0.202637 -0.248766" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " A B C D\n", + "2013-01-01 0.912558 -0.795645 -0.289115 0.187606\n", + "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828\n", + "2013-01-03 0.332871 -1.262010 -0.434844 -0.579920\n", + "2013-01-04 0.924016 -1.022019 -0.405249 -1.036021\n", + "2013-01-05 -0.772209 -1.228099 0.068901 0.896679\n", + "2013-01-06 1.485582 -0.709306 -0.202637 -0.248766" + ] } ], "source": [ - "pdf" + "print(pdf)" ] }, { @@ -380,96 +237,21 @@ }, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
2013-01-010.912558-0.795645-0.2891150.187606
2013-01-02-0.059703-1.2338970.316625-1.226828
2013-01-030.332871-1.262010-0.434844-0.579920
2013-01-040.924016-1.022019-0.405249-1.036021
2013-01-05-0.772209-1.2280990.0689010.896679
2013-01-061.485582-0.709306-0.202637-0.248766
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "2013-01-01 0.912558 -0.795645 -0.289115 0.187606\n", - "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828\n", - "2013-01-03 0.332871 -1.262010 -0.434844 -0.579920\n", - "2013-01-04 0.924016 -1.022019 -0.405249 -1.036021\n", - "2013-01-05 -0.772209 -1.228099 0.068901 0.896679\n", - "2013-01-06 1.485582 -0.709306 -0.202637 -0.248766" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " A B C D\n", + "2013-01-01 0.912558 -0.795645 -0.289115 0.187606\n", + "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828\n", + "2013-01-03 0.332871 -1.262010 -0.434844 -0.579920\n", + "2013-01-04 0.924016 -1.022019 -0.405249 -1.036021\n", + "2013-01-05 -0.772209 -1.228099 0.068901 0.896679\n", + "2013-01-06 1.485582 -0.709306 -0.202637 -0.248766" + ] } ], "source": [ - "psdf" + "print(psdf)" ] }, { @@ -548,96 +330,21 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
00.912558-0.795645-0.2891150.187606
1-0.059703-1.2338970.316625-1.226828
20.332871-1.262010-0.434844-0.579920
30.924016-1.022019-0.405249-1.036021
4-0.772209-1.2280990.0689010.896679
51.485582-0.709306-0.202637-0.248766
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "0 0.912558 -0.795645 -0.289115 0.187606\n", - "1 -0.059703 -1.233897 0.316625 -1.226828\n", - "2 0.332871 -1.262010 -0.434844 -0.579920\n", - "3 0.924016 -1.022019 -0.405249 -1.036021\n", - "4 -0.772209 -1.228099 0.068901 0.896679\n", - "5 1.485582 -0.709306 -0.202637 -0.248766" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " A B C D\n", + "0 0.912558 -0.795645 -0.289115 0.187606\n", + "1 -0.059703 -1.233897 0.316625 -1.226828\n", + "2 0.332871 -1.262010 -0.434844 -0.579920\n", + "3 0.924016 -1.022019 -0.405249 -1.036021\n", + "4 -0.772209 -1.228099 0.068901 0.896679\n", + "5 1.485582 -0.709306 -0.202637 -0.248766" + ] } ], "source": [ - "psdf" + "print(psdf)" ] }, { @@ -686,88 +393,20 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
00.912558-0.795645-0.2891150.187606
1-0.059703-1.2338970.316625-1.226828
20.332871-1.262010-0.434844-0.579920
30.924016-1.022019-0.405249-1.036021
4-0.772209-1.2280990.0689010.896679
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "0 0.912558 -0.795645 -0.289115 0.187606\n", - "1 -0.059703 -1.233897 0.316625 -1.226828\n", - "2 0.332871 -1.262010 -0.434844 -0.579920\n", - "3 0.924016 -1.022019 -0.405249 -1.036021\n", - "4 -0.772209 -1.228099 0.068901 0.896679" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " A B C D\n", + "0 0.912558 -0.795645 -0.289115 0.187606\n", + "1 -0.059703 -1.233897 0.316625 -1.226828\n", + "2 0.332871 -1.262010 -0.434844 -0.579920\n", + "3 0.924016 -1.022019 -0.405249 -1.036021\n", + "4 -0.772209 -1.228099 0.068901 0.896679" + ] } ], "source": [ - "psdf.head()" + "print(psdf.head())" ] }, { @@ -855,112 +494,23 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
count6.0000006.0000006.0000006.000000
mean0.470519-1.041829-0.157720-0.334542
std0.8094280.2415110.2945200.793014
min-0.772209-1.262010-0.434844-1.226828
25%-0.059703-1.233897-0.405249-1.036021
50%0.332871-1.228099-0.289115-0.579920
75%0.924016-0.7956450.0689010.187606
max1.485582-0.7093060.3166250.896679
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "count 6.000000 6.000000 6.000000 6.000000\n", - "mean 0.470519 -1.041829 -0.157720 -0.334542\n", - "std 0.809428 0.241511 0.294520 0.793014\n", - "min -0.772209 -1.262010 -0.434844 -1.226828\n", - "25% -0.059703 -1.233897 -0.405249 -1.036021\n", - "50% 0.332871 -1.228099 -0.289115 -0.579920\n", - "75% 0.924016 -0.795645 0.068901 0.187606\n", - "max 1.485582 -0.709306 0.316625 0.896679" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " A B C D\n", + "count 6.000000 6.000000 6.000000 6.000000\n", + "mean 0.470519 -1.041829 -0.157720 -0.334542\n", + "std 0.809428 0.241511 0.294520 0.793014\n", + "min -0.772209 -1.262010 -0.434844 -1.226828\n", + "25% -0.059703 -1.233897 -0.405249 -1.036021\n", + "50% 0.332871 -1.228099 -0.289115 -0.579920\n", + "75% 0.924016 -0.795645 0.068901 0.187606\n", + "max 1.485582 -0.709306 0.316625 0.896679" + ] } ], "source": [ - "psdf.describe()" + "print(psdf.describe())" ] }, { @@ -976,90 +526,19 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
012345
A0.912558-0.0597030.3328710.924016-0.7722091.485582
B-0.795645-1.233897-1.262010-1.022019-1.228099-0.709306
C-0.2891150.316625-0.434844-0.4052490.068901-0.202637
D0.187606-1.226828-0.579920-1.0360210.896679-0.248766
\n", - "
" - ], - "text/plain": [ - " 0 1 2 3 4 5\n", - "A 0.912558 -0.059703 0.332871 0.924016 -0.772209 1.485582\n", - "B -0.795645 -1.233897 -1.262010 -1.022019 -1.228099 -0.709306\n", - "C -0.289115 0.316625 -0.434844 -0.405249 0.068901 -0.202637\n", - "D 0.187606 -1.226828 -0.579920 -1.036021 0.896679 -0.248766" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " 0 1 2 3 4 5\n", + "A 0.912558 -0.059703 0.332871 0.924016 -0.772209 1.485582\n", + "B -0.795645 -1.233897 -1.262010 -1.022019 -1.228099 -0.709306\n", + "C -0.289115 0.316625 -0.434844 -0.405249 0.068901 -0.202637\n", + "D 0.187606 -1.226828 -0.579920 -1.036021 0.896679 -0.248766" + ] } ], "source": [ - "psdf.T" + "print(psdf.T)" ] }, { @@ -1075,96 +554,21 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
51.485582-0.709306-0.202637-0.248766
4-0.772209-1.2280990.0689010.896679
30.924016-1.022019-0.405249-1.036021
20.332871-1.262010-0.434844-0.579920
1-0.059703-1.2338970.316625-1.226828
00.912558-0.795645-0.2891150.187606
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "5 1.485582 -0.709306 -0.202637 -0.248766\n", - "4 -0.772209 -1.228099 0.068901 0.896679\n", - "3 0.924016 -1.022019 -0.405249 -1.036021\n", - "2 0.332871 -1.262010 -0.434844 -0.579920\n", - "1 -0.059703 -1.233897 0.316625 -1.226828\n", - "0 0.912558 -0.795645 -0.289115 0.187606" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " A B C D\n", + "5 1.485582 -0.709306 -0.202637 -0.248766\n", + "4 -0.772209 -1.228099 0.068901 0.896679\n", + "3 0.924016 -1.022019 -0.405249 -1.036021\n", + "2 0.332871 -1.262010 -0.434844 -0.579920\n", + "1 -0.059703 -1.233897 0.316625 -1.226828\n", + "0 0.912558 -0.795645 -0.289115 0.187606" + ] } ], "source": [ - "psdf.sort_index(ascending=False)" + "print(psdf.sort_index(ascending=False))" ] }, { @@ -1180,96 +584,21 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
20.332871-1.262010-0.434844-0.579920
1-0.059703-1.2338970.316625-1.226828
4-0.772209-1.2280990.0689010.896679
30.924016-1.022019-0.405249-1.036021
00.912558-0.795645-0.2891150.187606
51.485582-0.709306-0.202637-0.248766
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "2 0.332871 -1.262010 -0.434844 -0.579920\n", - "1 -0.059703 -1.233897 0.316625 -1.226828\n", - "4 -0.772209 -1.228099 0.068901 0.896679\n", - "3 0.924016 -1.022019 -0.405249 -1.036021\n", - "0 0.912558 -0.795645 -0.289115 0.187606\n", - "5 1.485582 -0.709306 -0.202637 -0.248766" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " A B C D\n", + "2 0.332871 -1.262010 -0.434844 -0.579920\n", + "1 -0.059703 -1.233897 0.316625 -1.226828\n", + "4 -0.772209 -1.228099 0.068901 0.896679\n", + "3 0.924016 -1.022019 -0.405249 -1.036021\n", + "0 0.912558 -0.795645 -0.289115 0.187606\n", + "5 1.485582 -0.709306 -0.202637 -0.248766" + ] } ], "source": [ - "psdf.sort_values(by='B')" + "print(psdf.sort_values(by='B'))" ] }, { @@ -1313,85 +642,19 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCDE
2013-01-010.912558-0.795645-0.2891150.1876061.0
2013-01-02-0.059703-1.2338970.316625-1.2268281.0
2013-01-030.332871-1.262010-0.434844-0.579920NaN
2013-01-040.924016-1.022019-0.405249-1.036021NaN
\n", - "
" - ], - "text/plain": [ - " A B C D E\n", - "2013-01-01 0.912558 -0.795645 -0.289115 0.187606 1.0\n", - "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828 1.0\n", - "2013-01-03 0.332871 -1.262010 -0.434844 -0.579920 NaN\n", - "2013-01-04 0.924016 -1.022019 -0.405249 -1.036021 NaN" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " A B C D E\n", + "2013-01-01 0.912558 -0.795645 -0.289115 0.187606 1.0\n", + "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828 1.0\n", + "2013-01-03 0.332871 -1.262010 -0.434844 -0.579920 NaN\n", + "2013-01-04 0.924016 -1.022019 -0.405249 -1.036021 NaN" + ] } ], "source": [ - "psdf1" + "print(psdf1)" ] }, { @@ -1407,67 +670,17 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCDE
2013-01-010.912558-0.795645-0.2891150.1876061.0
2013-01-02-0.059703-1.2338970.316625-1.2268281.0
\n", - "
" - ], - "text/plain": [ - " A B C D E\n", - "2013-01-01 0.912558 -0.795645 -0.289115 0.187606 1.0\n", - "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828 1.0" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " A B C D E\n", + "2013-01-01 0.912558 -0.795645 -0.289115 0.187606 1.0\n", + "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828 1.0" + ] } ], "source": [ - "psdf1.dropna(how='any')" + "print(psdf1.dropna(how='any'))" ] }, { @@ -1483,85 +696,19 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCDE
2013-01-010.912558-0.795645-0.2891150.1876061.0
2013-01-02-0.059703-1.2338970.316625-1.2268281.0
2013-01-030.332871-1.262010-0.434844-0.5799205.0
2013-01-040.924016-1.022019-0.405249-1.0360215.0
\n", - "
" - ], - "text/plain": [ - " A B C D E\n", - "2013-01-01 0.912558 -0.795645 -0.289115 0.187606 1.0\n", - "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828 1.0\n", - "2013-01-03 0.332871 -1.262010 -0.434844 -0.579920 5.0\n", - "2013-01-04 0.924016 -1.022019 -0.405249 -1.036021 5.0" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " A B C D E\n", + "2013-01-01 0.912558 -0.795645 -0.289115 0.187606 1.0\n", + "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828 1.0\n", + "2013-01-03 0.332871 -1.262010 -0.434844 -0.579920 5.0\n", + "2013-01-04 0.924016 -1.022019 -0.405249 -1.036021 5.0" + ] } ], "source": [ - "psdf1.fillna(value=5)" + "print(psdf1.fillna(value=5))" ] }, { @@ -1703,112 +850,23 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
0fooone1.039632-0.571950
1barone0.9720891.085353
2footwo-1.931621-2.579164
3barthree-0.654371-0.340704
4footwo-0.1570800.893736
5bartwo0.8827950.024978
6fooone-0.1493840.201667
7foothree-1.3551360.693883
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "0 foo one 1.039632 -0.571950\n", - "1 bar one 0.972089 1.085353\n", - "2 foo two -1.931621 -2.579164\n", - "3 bar three -0.654371 -0.340704\n", - "4 foo two -0.157080 0.893736\n", - "5 bar two 0.882795 0.024978\n", - "6 foo one -0.149384 0.201667\n", - "7 foo three -1.355136 0.693883" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " A B C D\n", + "0 foo one 1.039632 -0.571950\n", + "1 bar one 0.972089 1.085353\n", + "2 foo two -1.931621 -2.579164\n", + "3 bar three -0.654371 -0.340704\n", + "4 foo two -0.157080 0.893736\n", + "5 bar two 0.882795 0.024978\n", + "6 foo one -0.149384 0.201667\n", + "7 foo three -1.355136 0.693883" + ] } ], "source": [ - "psdf" + "print(psdf)" ] }, { @@ -1824,64 +882,18 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
CD
A
bar1.2005130.769627
foo-2.553589-1.361828
\n", - "
" - ], - "text/plain": [ - " C D\n", - "A \n", - "bar 1.200513 0.769627\n", - "foo -2.553589 -1.361828" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " C D\n", + "A \n", + "bar 1.200513 0.769627\n", + "foo -2.553589 -1.361828" + ] } ], "source": [ - "psdf.groupby('A').sum()" + "print(psdf.groupby('A').sum())" ] }, { @@ -1897,94 +909,22 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
CD
AB
fooone0.890248-0.370283
two-2.088701-1.685428
barthree-0.654371-0.340704
foothree-1.3551360.693883
bartwo0.8827950.024978
one0.9720891.085353
\n", - "
" - ], - "text/plain": [ - " C D\n", - "A B \n", - "foo one 0.890248 -0.370283\n", - " two -2.088701 -1.685428\n", - "bar three -0.654371 -0.340704\n", - "foo three -1.355136 0.693883\n", - "bar two 0.882795 0.024978\n", - " one 0.972089 1.085353" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " C D\n", + "A B \n", + "foo one 0.890248 -0.370283\n", + " two -2.088701 -1.685428\n", + "bar three -0.654371 -0.340704\n", + "foo three -1.355136 0.693883\n", + "bar two 0.882795 0.024978\n", + " one 0.972089 1.085353" + ] } ], "source": [ - "psdf.groupby(['A', 'B']).sum()" + "print(psdf.groupby(['A', 'B']).sum())" ] }, { @@ -14052,129 +12992,26 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
0-1.187097-0.1346450.377094-0.627217
10.3317410.1662180.377094-0.627217
20.3317410.4394500.3770940.365970
30.6216200.4394501.1901800.365970
40.6216200.4394501.1901800.365970
52.1691981.0691831.3956420.365970
62.7557381.0691831.3956421.045868
72.7557381.0691831.3956421.045868
82.7557381.0691831.3956421.045868
92.7557381.5087321.3956421.556933
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "0 -1.187097 -0.134645 0.377094 -0.627217\n", - "1 0.331741 0.166218 0.377094 -0.627217\n", - "2 0.331741 0.439450 0.377094 0.365970\n", - "3 0.621620 0.439450 1.190180 0.365970\n", - "4 0.621620 0.439450 1.190180 0.365970\n", - "5 2.169198 1.069183 1.395642 0.365970\n", - "6 2.755738 1.069183 1.395642 1.045868\n", - "7 2.755738 1.069183 1.395642 1.045868\n", - "8 2.755738 1.069183 1.395642 1.045868\n", - "9 2.755738 1.508732 1.395642 1.556933" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " A B C D\n", + "0 -1.187097 -0.134645 0.377094 -0.627217\n", + "1 0.331741 0.166218 0.377094 -0.627217\n", + "2 0.331741 0.439450 0.377094 0.365970\n", + "3 0.621620 0.439450 1.190180 0.365970\n", + "4 0.621620 0.439450 1.190180 0.365970\n", + "5 2.169198 1.069183 1.395642 0.365970\n", + "6 2.755738 1.069183 1.395642 1.045868\n", + "7 2.755738 1.069183 1.395642 1.045868\n", + "8 2.755738 1.069183 1.395642 1.045868\n", + "9 2.755738 1.508732 1.395642 1.556933" + ] } ], "source": [ "psdf.to_csv('foo.csv')\n", - "ps.read_csv('foo.csv').head(10)" + "print(ps.read_csv('foo.csv').head(10))" ] }, { @@ -14192,129 +13029,26 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
0-1.187097-0.1346450.377094-0.627217
10.3317410.1662180.377094-0.627217
20.3317410.4394500.3770940.365970
30.6216200.4394501.1901800.365970
40.6216200.4394501.1901800.365970
52.1691981.0691831.3956420.365970
62.7557381.0691831.3956421.045868
72.7557381.0691831.3956421.045868
82.7557381.0691831.3956421.045868
92.7557381.5087321.3956421.556933
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "0 -1.187097 -0.134645 0.377094 -0.627217\n", - "1 0.331741 0.166218 0.377094 -0.627217\n", - "2 0.331741 0.439450 0.377094 0.365970\n", - "3 0.621620 0.439450 1.190180 0.365970\n", - "4 0.621620 0.439450 1.190180 0.365970\n", - "5 2.169198 1.069183 1.395642 0.365970\n", - "6 2.755738 1.069183 1.395642 1.045868\n", - "7 2.755738 1.069183 1.395642 1.045868\n", - "8 2.755738 1.069183 1.395642 1.045868\n", - "9 2.755738 1.508732 1.395642 1.556933" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " A B C D\n", + "0 -1.187097 -0.134645 0.377094 -0.627217\n", + "1 0.331741 0.166218 0.377094 -0.627217\n", + "2 0.331741 0.439450 0.377094 0.365970\n", + "3 0.621620 0.439450 1.190180 0.365970\n", + "4 0.621620 0.439450 1.190180 0.365970\n", + "5 2.169198 1.069183 1.395642 0.365970\n", + "6 2.755738 1.069183 1.395642 1.045868\n", + "7 2.755738 1.069183 1.395642 1.045868\n", + "8 2.755738 1.069183 1.395642 1.045868\n", + "9 2.755738 1.508732 1.395642 1.556933" + ] } ], "source": [ "psdf.to_parquet('bar.parquet')\n", - "ps.read_parquet('bar.parquet').head(10)" + "print(ps.read_parquet('bar.parquet').head(10))" ] }, { @@ -14332,129 +13066,26 @@ "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABCD
0-1.187097-0.1346450.377094-0.627217
10.3317410.1662180.377094-0.627217
20.3317410.4394500.3770940.365970
30.6216200.4394501.1901800.365970
40.6216200.4394501.1901800.365970
52.1691981.0691831.3956420.365970
62.7557381.0691831.3956421.045868
72.7557381.0691831.3956421.045868
82.7557381.0691831.3956421.045868
92.7557381.5087321.3956421.556933
\n", - "
" - ], - "text/plain": [ - " A B C D\n", - "0 -1.187097 -0.134645 0.377094 -0.627217\n", - "1 0.331741 0.166218 0.377094 -0.627217\n", - "2 0.331741 0.439450 0.377094 0.365970\n", - "3 0.621620 0.439450 1.190180 0.365970\n", - "4 0.621620 0.439450 1.190180 0.365970\n", - "5 2.169198 1.069183 1.395642 0.365970\n", - "6 2.755738 1.069183 1.395642 1.045868\n", - "7 2.755738 1.069183 1.395642 1.045868\n", - "8 2.755738 1.069183 1.395642 1.045868\n", - "9 2.755738 1.508732 1.395642 1.556933" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + " A B C D\n", + "0 -1.187097 -0.134645 0.377094 -0.627217\n", + "1 0.331741 0.166218 0.377094 -0.627217\n", + "2 0.331741 0.439450 0.377094 0.365970\n", + "3 0.621620 0.439450 1.190180 0.365970\n", + "4 0.621620 0.439450 1.190180 0.365970\n", + "5 2.169198 1.069183 1.395642 0.365970\n", + "6 2.755738 1.069183 1.395642 1.045868\n", + "7 2.755738 1.069183 1.395642 1.045868\n", + "8 2.755738 1.069183 1.395642 1.045868\n", + "9 2.755738 1.508732 1.395642 1.556933" + ] } ], "source": [ "psdf.to_spark_io('zoo.orc', format=\"orc\")\n", - "ps.read_spark_io('zoo.orc', format=\"orc\").head(10)" + "print(ps.read_spark_io('zoo.orc', format=\"orc\").head(10))" ] }, { From 899921e9ae1fb7fe0e3ebe0a319e04dad83da1b1 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 28 Nov 2023 19:08:57 +0800 Subject: [PATCH 2/4] [SPARK-46135][PYTHON][DOCS] Fix table format error in ipynb docs --- .../getting_started/quickstart_df.ipynb | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/python/docs/source/getting_started/quickstart_df.ipynb b/python/docs/source/getting_started/quickstart_df.ipynb index 6b7716dcc422..6c93d3de0f62 100644 --- a/python/docs/source/getting_started/quickstart_df.ipynb +++ b/python/docs/source/getting_started/quickstart_df.ipynb @@ -218,23 +218,27 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "+---+---+-------+----------+-------------------+\n", - "| a| b| c| d| e|\n", - "+---+---+-------+----------+-------------------+\n", - "| 1|2.0|string1|2000-01-01|2000-01-01 12:00:00|\n", - "| 2|3.0|string2|2000-02-01|2000-01-02 12:00:00|\n", - "| 4|5.0|string3|2000-03-01|2000-01-03 12:00:00|\n", - "+---+---+-------+----------+-------------------+\n", - "\n" - ] + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
abcde
12.0string12000-01-012000-01-01 12:00:00
23.0string22000-02-012000-01-02 12:00:00
34.0string32000-03-012000-01-03 12:00:00
\n" + ], + "text/plain": [ + "DataFrame[a: bigint, b: double, c: string, d: date, e: timestamp]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "spark.conf.set('spark.sql.repl.eagerEval.enabled', True)\n", - "df.show()" + "df" ] }, { From f52c1c81d020b90e82dabd452e110c5bdd53fd06 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 29 Nov 2023 10:32:49 +0800 Subject: [PATCH 3/4] [SPARK-46135][PYTHON][DOCS] Fix table format error in ipynb docs --- .../getting_started/quickstart_df.ipynb | 80 ++++++++++++++++--- 1 file changed, 67 insertions(+), 13 deletions(-) diff --git a/python/docs/source/getting_started/quickstart_df.ipynb b/python/docs/source/getting_started/quickstart_df.ipynb index 6c93d3de0f62..5590ed5cc041 100644 --- a/python/docs/source/getting_started/quickstart_df.ipynb +++ b/python/docs/source/getting_started/quickstart_df.ipynb @@ -424,22 +424,76 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "+----+-----+-----+---------+------------+---------------------+\n", - "| | a | b | c | d | e |\n", - "|----+-----+-----+---------+------------+---------------------|\n", - "| 0 | 1 | 2 | string1 | 2000-01-01 | 2000-01-01 12:00:00 |\n", - "| 1 | 2 | 3 | string2 | 2000-02-01 | 2000-01-02 12:00:00 |\n", - "| 2 | 4 | 5 | string3 | 2000-03-01 | 2000-01-03 12:00:00 |\n", - "+----+-----+-----+---------+------------+---------------------+\n" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcde
012.0string12000-01-012000-01-01 12:00:00
123.0string22000-02-012000-01-02 12:00:00
234.0string32000-03-012000-01-03 12:00:00
\n", + "
" + ], + "text/plain": [ + " a b c d e\n", + "0 1 2.0 string1 2000-01-01 2000-01-01 12:00:00\n", + "1 2 3.0 string2 2000-02-01 2000-01-02 12:00:00\n", + "2 3 4.0 string3 2000-03-01 2000-01-03 12:00:00" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "from tabulate import tabulate\n", - "print(tabulate(df.toPandas(), headers = 'keys', tablefmt = 'psql'))" + "df.toPandas()" ] }, { From b6fb6db5f737c1d4be091c5094eb675aa5a432de Mon Sep 17 00:00:00 2001 From: panbingkun Date: Thu, 30 Nov 2023 17:50:54 +0800 Subject: [PATCH 4/4] [SPARK-46135][PYTHON][DOCS] Fix table format error in ipynb docs --- .../getting_started/quickstart_ps.ipynb | 1811 +++++++++++++++-- 1 file changed, 1592 insertions(+), 219 deletions(-) diff --git a/python/docs/source/getting_started/quickstart_ps.ipynb b/python/docs/source/getting_started/quickstart_ps.ipynb index a1febbc40d89..2b6b3f8142c7 100644 --- a/python/docs/source/getting_started/quickstart_ps.ipynb +++ b/python/docs/source/getting_started/quickstart_ps.ipynb @@ -99,21 +99,89 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " a b c\n", - "10 1 100 one\n", - "20 2 200 two\n", - "30 3 300 three\n", - "40 4 400 four\n", - "50 5 500 five\n", - "60 6 600 six\n" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
101100one
202200two
303300three
404400four
505500five
606600six
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "10 1 100 one\n", + "20 2 200 two\n", + "30 3 300 three\n", + "40 4 400 four\n", + "50 5 500 five\n", + "60 6 600 six" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(psdf)" + "psdf" ] }, { @@ -169,21 +237,96 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " A B C D\n", - "2013-01-01 0.912558 -0.795645 -0.289115 0.187606\n", - "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828\n", - "2013-01-03 0.332871 -1.262010 -0.434844 -0.579920\n", - "2013-01-04 0.924016 -1.022019 -0.405249 -1.036021\n", - "2013-01-05 -0.772209 -1.228099 0.068901 0.896679\n", - "2013-01-06 1.485582 -0.709306 -0.202637 -0.248766" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
2013-01-010.912558-0.795645-0.2891150.187606
2013-01-02-0.059703-1.2338970.316625-1.226828
2013-01-030.332871-1.262010-0.434844-0.579920
2013-01-040.924016-1.022019-0.405249-1.036021
2013-01-05-0.772209-1.2280990.0689010.896679
2013-01-061.485582-0.709306-0.202637-0.248766
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "2013-01-01 0.912558 -0.795645 -0.289115 0.187606\n", + "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828\n", + "2013-01-03 0.332871 -1.262010 -0.434844 -0.579920\n", + "2013-01-04 0.924016 -1.022019 -0.405249 -1.036021\n", + "2013-01-05 -0.772209 -1.228099 0.068901 0.896679\n", + "2013-01-06 1.485582 -0.709306 -0.202637 -0.248766" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(pdf)" + "pdf" ] }, { @@ -237,21 +380,96 @@ }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " A B C D\n", - "2013-01-01 0.912558 -0.795645 -0.289115 0.187606\n", - "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828\n", - "2013-01-03 0.332871 -1.262010 -0.434844 -0.579920\n", - "2013-01-04 0.924016 -1.022019 -0.405249 -1.036021\n", - "2013-01-05 -0.772209 -1.228099 0.068901 0.896679\n", - "2013-01-06 1.485582 -0.709306 -0.202637 -0.248766" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
2013-01-010.912558-0.795645-0.2891150.187606
2013-01-02-0.059703-1.2338970.316625-1.226828
2013-01-030.332871-1.262010-0.434844-0.579920
2013-01-040.924016-1.022019-0.405249-1.036021
2013-01-05-0.772209-1.2280990.0689010.896679
2013-01-061.485582-0.709306-0.202637-0.248766
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "2013-01-01 0.912558 -0.795645 -0.289115 0.187606\n", + "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828\n", + "2013-01-03 0.332871 -1.262010 -0.434844 -0.579920\n", + "2013-01-04 0.924016 -1.022019 -0.405249 -1.036021\n", + "2013-01-05 -0.772209 -1.228099 0.068901 0.896679\n", + "2013-01-06 1.485582 -0.709306 -0.202637 -0.248766" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(psdf)" + "psdf" ] }, { @@ -330,21 +548,96 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " A B C D\n", - "0 0.912558 -0.795645 -0.289115 0.187606\n", - "1 -0.059703 -1.233897 0.316625 -1.226828\n", - "2 0.332871 -1.262010 -0.434844 -0.579920\n", - "3 0.924016 -1.022019 -0.405249 -1.036021\n", - "4 -0.772209 -1.228099 0.068901 0.896679\n", - "5 1.485582 -0.709306 -0.202637 -0.248766" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
00.912558-0.795645-0.2891150.187606
1-0.059703-1.2338970.316625-1.226828
20.332871-1.262010-0.434844-0.579920
30.924016-1.022019-0.405249-1.036021
4-0.772209-1.2280990.0689010.896679
51.485582-0.709306-0.202637-0.248766
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 0.912558 -0.795645 -0.289115 0.187606\n", + "1 -0.059703 -1.233897 0.316625 -1.226828\n", + "2 0.332871 -1.262010 -0.434844 -0.579920\n", + "3 0.924016 -1.022019 -0.405249 -1.036021\n", + "4 -0.772209 -1.228099 0.068901 0.896679\n", + "5 1.485582 -0.709306 -0.202637 -0.248766" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(psdf)" + "psdf" ] }, { @@ -393,20 +686,88 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " A B C D\n", - "0 0.912558 -0.795645 -0.289115 0.187606\n", - "1 -0.059703 -1.233897 0.316625 -1.226828\n", - "2 0.332871 -1.262010 -0.434844 -0.579920\n", - "3 0.924016 -1.022019 -0.405249 -1.036021\n", - "4 -0.772209 -1.228099 0.068901 0.896679" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
00.912558-0.795645-0.2891150.187606
1-0.059703-1.2338970.316625-1.226828
20.332871-1.262010-0.434844-0.579920
30.924016-1.022019-0.405249-1.036021
4-0.772209-1.2280990.0689010.896679
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 0.912558 -0.795645 -0.289115 0.187606\n", + "1 -0.059703 -1.233897 0.316625 -1.226828\n", + "2 0.332871 -1.262010 -0.434844 -0.579920\n", + "3 0.924016 -1.022019 -0.405249 -1.036021\n", + "4 -0.772209 -1.228099 0.068901 0.896679" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(psdf.head())" + "psdf.head()" ] }, { @@ -494,23 +855,112 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " A B C D\n", - "count 6.000000 6.000000 6.000000 6.000000\n", - "mean 0.470519 -1.041829 -0.157720 -0.334542\n", - "std 0.809428 0.241511 0.294520 0.793014\n", - "min -0.772209 -1.262010 -0.434844 -1.226828\n", - "25% -0.059703 -1.233897 -0.405249 -1.036021\n", - "50% 0.332871 -1.228099 -0.289115 -0.579920\n", - "75% 0.924016 -0.795645 0.068901 0.187606\n", - "max 1.485582 -0.709306 0.316625 0.896679" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
count6.0000006.0000006.0000006.000000
mean0.470519-1.041829-0.157720-0.334542
std0.8094280.2415110.2945200.793014
min-0.772209-1.262010-0.434844-1.226828
25%-0.059703-1.233897-0.405249-1.036021
50%0.332871-1.228099-0.289115-0.579920
75%0.924016-0.7956450.0689010.187606
max1.485582-0.7093060.3166250.896679
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "count 6.000000 6.000000 6.000000 6.000000\n", + "mean 0.470519 -1.041829 -0.157720 -0.334542\n", + "std 0.809428 0.241511 0.294520 0.793014\n", + "min -0.772209 -1.262010 -0.434844 -1.226828\n", + "25% -0.059703 -1.233897 -0.405249 -1.036021\n", + "50% 0.332871 -1.228099 -0.289115 -0.579920\n", + "75% 0.924016 -0.795645 0.068901 0.187606\n", + "max 1.485582 -0.709306 0.316625 0.896679" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(psdf.describe())" + "psdf.describe()" ] }, { @@ -526,19 +976,90 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " 0 1 2 3 4 5\n", - "A 0.912558 -0.059703 0.332871 0.924016 -0.772209 1.485582\n", - "B -0.795645 -1.233897 -1.262010 -1.022019 -1.228099 -0.709306\n", - "C -0.289115 0.316625 -0.434844 -0.405249 0.068901 -0.202637\n", - "D 0.187606 -1.226828 -0.579920 -1.036021 0.896679 -0.248766" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012345
A0.912558-0.0597030.3328710.924016-0.7722091.485582
B-0.795645-1.233897-1.262010-1.022019-1.228099-0.709306
C-0.2891150.316625-0.434844-0.4052490.068901-0.202637
D0.187606-1.226828-0.579920-1.0360210.896679-0.248766
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5\n", + "A 0.912558 -0.059703 0.332871 0.924016 -0.772209 1.485582\n", + "B -0.795645 -1.233897 -1.262010 -1.022019 -1.228099 -0.709306\n", + "C -0.289115 0.316625 -0.434844 -0.405249 0.068901 -0.202637\n", + "D 0.187606 -1.226828 -0.579920 -1.036021 0.896679 -0.248766" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(psdf.T)" + "psdf.T" ] }, { @@ -554,21 +1075,96 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " A B C D\n", - "5 1.485582 -0.709306 -0.202637 -0.248766\n", - "4 -0.772209 -1.228099 0.068901 0.896679\n", - "3 0.924016 -1.022019 -0.405249 -1.036021\n", - "2 0.332871 -1.262010 -0.434844 -0.579920\n", - "1 -0.059703 -1.233897 0.316625 -1.226828\n", - "0 0.912558 -0.795645 -0.289115 0.187606" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
51.485582-0.709306-0.202637-0.248766
4-0.772209-1.2280990.0689010.896679
30.924016-1.022019-0.405249-1.036021
20.332871-1.262010-0.434844-0.579920
1-0.059703-1.2338970.316625-1.226828
00.912558-0.795645-0.2891150.187606
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "5 1.485582 -0.709306 -0.202637 -0.248766\n", + "4 -0.772209 -1.228099 0.068901 0.896679\n", + "3 0.924016 -1.022019 -0.405249 -1.036021\n", + "2 0.332871 -1.262010 -0.434844 -0.579920\n", + "1 -0.059703 -1.233897 0.316625 -1.226828\n", + "0 0.912558 -0.795645 -0.289115 0.187606" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(psdf.sort_index(ascending=False))" + "psdf.sort_index(ascending=False)" ] }, { @@ -584,21 +1180,96 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " A B C D\n", - "2 0.332871 -1.262010 -0.434844 -0.579920\n", - "1 -0.059703 -1.233897 0.316625 -1.226828\n", - "4 -0.772209 -1.228099 0.068901 0.896679\n", - "3 0.924016 -1.022019 -0.405249 -1.036021\n", - "0 0.912558 -0.795645 -0.289115 0.187606\n", - "5 1.485582 -0.709306 -0.202637 -0.248766" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
20.332871-1.262010-0.434844-0.579920
1-0.059703-1.2338970.316625-1.226828
4-0.772209-1.2280990.0689010.896679
30.924016-1.022019-0.405249-1.036021
00.912558-0.795645-0.2891150.187606
51.485582-0.709306-0.202637-0.248766
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "2 0.332871 -1.262010 -0.434844 -0.579920\n", + "1 -0.059703 -1.233897 0.316625 -1.226828\n", + "4 -0.772209 -1.228099 0.068901 0.896679\n", + "3 0.924016 -1.022019 -0.405249 -1.036021\n", + "0 0.912558 -0.795645 -0.289115 0.187606\n", + "5 1.485582 -0.709306 -0.202637 -0.248766" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(psdf.sort_values(by='B'))" + "psdf.sort_values(by='B')" ] }, { @@ -642,19 +1313,85 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " A B C D E\n", - "2013-01-01 0.912558 -0.795645 -0.289115 0.187606 1.0\n", - "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828 1.0\n", - "2013-01-03 0.332871 -1.262010 -0.434844 -0.579920 NaN\n", - "2013-01-04 0.924016 -1.022019 -0.405249 -1.036021 NaN" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCDE
2013-01-010.912558-0.795645-0.2891150.1876061.0
2013-01-02-0.059703-1.2338970.316625-1.2268281.0
2013-01-030.332871-1.262010-0.434844-0.579920NaN
2013-01-040.924016-1.022019-0.405249-1.036021NaN
\n", + "
" + ], + "text/plain": [ + " A B C D E\n", + "2013-01-01 0.912558 -0.795645 -0.289115 0.187606 1.0\n", + "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828 1.0\n", + "2013-01-03 0.332871 -1.262010 -0.434844 -0.579920 NaN\n", + "2013-01-04 0.924016 -1.022019 -0.405249 -1.036021 NaN" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(psdf1)" + "psdf1" ] }, { @@ -670,17 +1407,67 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " A B C D E\n", - "2013-01-01 0.912558 -0.795645 -0.289115 0.187606 1.0\n", - "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828 1.0" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCDE
2013-01-010.912558-0.795645-0.2891150.1876061.0
2013-01-02-0.059703-1.2338970.316625-1.2268281.0
\n", + "
" + ], + "text/plain": [ + " A B C D E\n", + "2013-01-01 0.912558 -0.795645 -0.289115 0.187606 1.0\n", + "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828 1.0" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(psdf1.dropna(how='any'))" + "psdf1.dropna(how='any')" ] }, { @@ -696,19 +1483,85 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " A B C D E\n", - "2013-01-01 0.912558 -0.795645 -0.289115 0.187606 1.0\n", - "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828 1.0\n", - "2013-01-03 0.332871 -1.262010 -0.434844 -0.579920 5.0\n", - "2013-01-04 0.924016 -1.022019 -0.405249 -1.036021 5.0" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCDE
2013-01-010.912558-0.795645-0.2891150.1876061.0
2013-01-02-0.059703-1.2338970.316625-1.2268281.0
2013-01-030.332871-1.262010-0.434844-0.5799205.0
2013-01-040.924016-1.022019-0.405249-1.0360215.0
\n", + "
" + ], + "text/plain": [ + " A B C D E\n", + "2013-01-01 0.912558 -0.795645 -0.289115 0.187606 1.0\n", + "2013-01-02 -0.059703 -1.233897 0.316625 -1.226828 1.0\n", + "2013-01-03 0.332871 -1.262010 -0.434844 -0.579920 5.0\n", + "2013-01-04 0.924016 -1.022019 -0.405249 -1.036021 5.0" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(psdf1.fillna(value=5))" + "psdf1.fillna(value=5)" ] }, { @@ -850,23 +1703,112 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " A B C D\n", - "0 foo one 1.039632 -0.571950\n", - "1 bar one 0.972089 1.085353\n", - "2 foo two -1.931621 -2.579164\n", - "3 bar three -0.654371 -0.340704\n", - "4 foo two -0.157080 0.893736\n", - "5 bar two 0.882795 0.024978\n", - "6 foo one -0.149384 0.201667\n", - "7 foo three -1.355136 0.693883" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
0fooone1.039632-0.571950
1barone0.9720891.085353
2footwo-1.931621-2.579164
3barthree-0.654371-0.340704
4footwo-0.1570800.893736
5bartwo0.8827950.024978
6fooone-0.1493840.201667
7foothree-1.3551360.693883
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 foo one 1.039632 -0.571950\n", + "1 bar one 0.972089 1.085353\n", + "2 foo two -1.931621 -2.579164\n", + "3 bar three -0.654371 -0.340704\n", + "4 foo two -0.157080 0.893736\n", + "5 bar two 0.882795 0.024978\n", + "6 foo one -0.149384 0.201667\n", + "7 foo three -1.355136 0.693883" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(psdf)" + "psdf" ] }, { @@ -882,18 +1824,68 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " C D\n", - "A \n", - "bar 1.200513 0.769627\n", - "foo -2.553589 -1.361828" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
BCD
A
baronethreetwo1.2005130.769627
fooonetwotwoonethree-2.553589-1.361828
\n", + "
" + ], + "text/plain": [ + " C D\n", + "A \n", + "bar 1.200513 0.769627\n", + "foo -2.553589 -1.361828" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(psdf.groupby('A').sum())" + "psdf.groupby('A').sum()" ] }, { @@ -909,22 +1901,94 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " C D\n", - "A B \n", - "foo one 0.890248 -0.370283\n", - " two -2.088701 -1.685428\n", - "bar three -0.654371 -0.340704\n", - "foo three -1.355136 0.693883\n", - "bar two 0.882795 0.024978\n", - " one 0.972089 1.085353" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CD
AB
fooone0.890248-0.370283
two-2.088701-1.685428
barthree-0.654371-0.340704
foothree-1.3551360.693883
bartwo0.8827950.024978
one0.9720891.085353
\n", + "
" + ], + "text/plain": [ + " C D\n", + "A B \n", + "foo one 0.890248 -0.370283\n", + " two -2.088701 -1.685428\n", + "bar three -0.654371 -0.340704\n", + "foo three -1.355136 0.693883\n", + "bar two 0.882795 0.024978\n", + " one 0.972089 1.085353" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(psdf.groupby(['A', 'B']).sum())" + "psdf.groupby(['A', 'B']).sum()" ] }, { @@ -12992,26 +14056,129 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " A B C D\n", - "0 -1.187097 -0.134645 0.377094 -0.627217\n", - "1 0.331741 0.166218 0.377094 -0.627217\n", - "2 0.331741 0.439450 0.377094 0.365970\n", - "3 0.621620 0.439450 1.190180 0.365970\n", - "4 0.621620 0.439450 1.190180 0.365970\n", - "5 2.169198 1.069183 1.395642 0.365970\n", - "6 2.755738 1.069183 1.395642 1.045868\n", - "7 2.755738 1.069183 1.395642 1.045868\n", - "8 2.755738 1.069183 1.395642 1.045868\n", - "9 2.755738 1.508732 1.395642 1.556933" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
0-1.187097-0.1346450.377094-0.627217
10.3317410.1662180.377094-0.627217
20.3317410.4394500.3770940.365970
30.6216200.4394501.1901800.365970
40.6216200.4394501.1901800.365970
52.1691981.0691831.3956420.365970
62.7557381.0691831.3956421.045868
72.7557381.0691831.3956421.045868
82.7557381.0691831.3956421.045868
92.7557381.5087321.3956421.556933
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 -1.187097 -0.134645 0.377094 -0.627217\n", + "1 0.331741 0.166218 0.377094 -0.627217\n", + "2 0.331741 0.439450 0.377094 0.365970\n", + "3 0.621620 0.439450 1.190180 0.365970\n", + "4 0.621620 0.439450 1.190180 0.365970\n", + "5 2.169198 1.069183 1.395642 0.365970\n", + "6 2.755738 1.069183 1.395642 1.045868\n", + "7 2.755738 1.069183 1.395642 1.045868\n", + "8 2.755738 1.069183 1.395642 1.045868\n", + "9 2.755738 1.508732 1.395642 1.556933" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "psdf.to_csv('foo.csv')\n", - "print(ps.read_csv('foo.csv').head(10))" + "ps.read_csv('foo.csv').head(10)" ] }, { @@ -13029,26 +14196,129 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " A B C D\n", - "0 -1.187097 -0.134645 0.377094 -0.627217\n", - "1 0.331741 0.166218 0.377094 -0.627217\n", - "2 0.331741 0.439450 0.377094 0.365970\n", - "3 0.621620 0.439450 1.190180 0.365970\n", - "4 0.621620 0.439450 1.190180 0.365970\n", - "5 2.169198 1.069183 1.395642 0.365970\n", - "6 2.755738 1.069183 1.395642 1.045868\n", - "7 2.755738 1.069183 1.395642 1.045868\n", - "8 2.755738 1.069183 1.395642 1.045868\n", - "9 2.755738 1.508732 1.395642 1.556933" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
0-1.187097-0.1346450.377094-0.627217
10.3317410.1662180.377094-0.627217
20.3317410.4394500.3770940.365970
30.6216200.4394501.1901800.365970
40.6216200.4394501.1901800.365970
52.1691981.0691831.3956420.365970
62.7557381.0691831.3956421.045868
72.7557381.0691831.3956421.045868
82.7557381.0691831.3956421.045868
92.7557381.5087321.3956421.556933
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 -1.187097 -0.134645 0.377094 -0.627217\n", + "1 0.331741 0.166218 0.377094 -0.627217\n", + "2 0.331741 0.439450 0.377094 0.365970\n", + "3 0.621620 0.439450 1.190180 0.365970\n", + "4 0.621620 0.439450 1.190180 0.365970\n", + "5 2.169198 1.069183 1.395642 0.365970\n", + "6 2.755738 1.069183 1.395642 1.045868\n", + "7 2.755738 1.069183 1.395642 1.045868\n", + "8 2.755738 1.069183 1.395642 1.045868\n", + "9 2.755738 1.508732 1.395642 1.556933" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "psdf.to_parquet('bar.parquet')\n", - "print(ps.read_parquet('bar.parquet').head(10))" + "ps.read_parquet('bar.parquet').head(10)" ] }, { @@ -13066,26 +14336,129 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - " A B C D\n", - "0 -1.187097 -0.134645 0.377094 -0.627217\n", - "1 0.331741 0.166218 0.377094 -0.627217\n", - "2 0.331741 0.439450 0.377094 0.365970\n", - "3 0.621620 0.439450 1.190180 0.365970\n", - "4 0.621620 0.439450 1.190180 0.365970\n", - "5 2.169198 1.069183 1.395642 0.365970\n", - "6 2.755738 1.069183 1.395642 1.045868\n", - "7 2.755738 1.069183 1.395642 1.045868\n", - "8 2.755738 1.069183 1.395642 1.045868\n", - "9 2.755738 1.508732 1.395642 1.556933" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCD
0-1.187097-0.1346450.377094-0.627217
10.3317410.1662180.377094-0.627217
20.3317410.4394500.3770940.365970
30.6216200.4394501.1901800.365970
40.6216200.4394501.1901800.365970
52.1691981.0691831.3956420.365970
62.7557381.0691831.3956421.045868
72.7557381.0691831.3956421.045868
82.7557381.0691831.3956421.045868
92.7557381.5087321.3956421.556933
\n", + "
" + ], + "text/plain": [ + " A B C D\n", + "0 -1.187097 -0.134645 0.377094 -0.627217\n", + "1 0.331741 0.166218 0.377094 -0.627217\n", + "2 0.331741 0.439450 0.377094 0.365970\n", + "3 0.621620 0.439450 1.190180 0.365970\n", + "4 0.621620 0.439450 1.190180 0.365970\n", + "5 2.169198 1.069183 1.395642 0.365970\n", + "6 2.755738 1.069183 1.395642 1.045868\n", + "7 2.755738 1.069183 1.395642 1.045868\n", + "8 2.755738 1.069183 1.395642 1.045868\n", + "9 2.755738 1.508732 1.395642 1.556933" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "psdf.to_spark_io('zoo.orc', format=\"orc\")\n", - "print(ps.read_spark_io('zoo.orc', format=\"orc\").head(10))" + "ps.read_spark_io('zoo.orc', format=\"orc\").head(10)" ] }, {