Skip to content

Commit 892a339

Browse files
committed
2.9.0
1 parent f8afec6 commit 892a339

19 files changed

+261
-27
lines changed

files/install-rstudio.sh

+4-4
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,11 @@ if [ "$RSTUDIO_VERSION" = "latest" ]; then
4747
fi
4848

4949
if [ "$(uname -m)" != "aarch64" ]; then
50-
# wget "https://s3.amazonaws.com/rstudio-ide-build/server/$UBUNTU_VERSION/amd64/rstudio-server-2022.12.0-daily-259-amd64.deb" -O "$DOWNLOAD_FILE"
51-
wget "https://s3.amazonaws.com/rstudio-ide-build/server/$UBUNTU_VERSION/amd64/rstudio-server-2023.05.0-daily-293-amd64.deb" -O "$DOWNLOAD_FILE"
50+
# wget "https://s3.amazonaws.com/rstudio-ide-build/server/$UBUNTU_VERSION/amd64/rstudio-server-2023.12.0-daily-330-amd64.deb" -O "$DOWNLOAD_FILE"
51+
wget "https://s3.amazonaws.com/rstudio-ide-build/server/jammy/amd64/rstudio-server-2023.12.0-daily-330-amd64.deb" -O "$DOWNLOAD_FILE"
5252
else
53-
# wget "https://s3.amazonaws.com/rstudio-ide-build/server/$UBUNTU_VERSION/arm64/rstudio-server-2022.12.0-daily-295-arm64.deb" -O "$DOWNLOAD_FILE"
54-
wget "https://s3.amazonaws.com/rstudio-ide-build/server/$UBUNTU_VERSION/arm64/rstudio-server-2023.05.0-daily-293-arm64.deb" -O "$DOWNLOAD_FILE"
53+
# wget "https://s3.amazonaws.com/rstudio-ide-build/server/$UBUNTU_VERSION/arm64/rstudio-server-2023.05.0-daily-293-arm64.deb" -O "$DOWNLOAD_FILE"
54+
wget "https://s3.amazonaws.com/rstudio-ide-build/server/jammy/arm64/rstudio-server-2023.12.0-daily-330-arm64.deb" -O "$DOWNLOAD_FILE"
5555
fi
5656

5757
dpkg -i "$DOWNLOAD_FILE"
+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3+
<!--
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License. See accompanying LICENSE file.
15+
-->
16+
17+
<!-- Put site-specific property overrides in this file. -->
18+
19+
<configuration>
20+
<property>
21+
<name>fs.defaultFS</name>
22+
<value>hdfs://localhost:9000</value>
23+
</property>
24+
</configuration>

files/scalable_analytics/hadoop.sh

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/bin/bash
2+
set -e
3+
4+
# cd /opt/hadoop/
5+
# chmod +x init-dfs.sh
6+
$HADOOP_HOME/init-dfs.sh
7+
$HADOOP_HOME/bin/hdfs --daemon start namenode
8+
$HADOOP_HOME/bin/hdfs --daemon start datanode
9+
$HADOOP_HOME/bin/hdfs --daemon start secondarynamenode
10+
11+
# cd /home
12+
# jupyter lab --ip="0.0.0.0" --port=8888 --no-browser --allow-root --NotebookApp.password_required='False'
+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3+
<!--
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License. See accompanying LICENSE file.
15+
-->
16+
17+
<!-- Put site-specific property overrides in this file. -->
18+
19+
<configuration>
20+
<property>
21+
<name>dfs.replication</name>
22+
<value>1</value>
23+
</property>
24+
</configuration>

files/scalable_analytics/init-dfs.sh

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/bash
2+
3+
$HADOOP_HOME/bin/hdfs namenode -format
4+
echo `${HADOOP_HOME}/bin/hdfs getconf -confKey dfs.datanode.data.dir` | cut -c8- | xargs rm -r

files/scalable_analytics/start-dfs.sh

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#!/bin/bash
2+
3+
# hdfs namenode -format
4+
# echo `hdfs getconf -confKey dfs.datanode.data.dir` | cut -c8- | xargs rm -r
5+
$HADOOP_HOME/sbin/hadoop-daemon.sh start namenode
6+
$HADOOP_HOME/sbin/hadoop-daemon.sh start datanode
7+
$HADOOP_HOME/sbin/hadoop-daemon.sh start secondarynamenode
8+

files/scalable_analytics/stop-dfs.sh

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
3+
$HADOOP_HOME/sbin/hadoop-daemon.sh stop namenode
4+
$HADOOP_HOME/sbin/hadoop-daemon.sh stop datanode
5+
$HADOOP_HOME/sbin/hadoop-daemon.sh stop secondarynamenode
6+

files/setup-arrow.sh

+2-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ NCPUS=${NCPUS:--1}
2020
# R -e "Sys.setenv(ARROW_PARQUET = 'ON', ARROW_WITH_SNAPPY = 'ON', ARROW_R_DEV = TRUE, ARROW_USE_PKG_CONFIG=TRUE); install.packages('arrow', repo='${CRAN}', Ncpus=${NCPUS})"
2121
# R -e "Sys.setenv(ARROW_PARQUET = 'ON', ARROW_WITH_SNAPPY = 'ON', ARROW_R_DEV = TRUE); devtools::install_version('arrow', version='10.0.1', repos='${CRAN}', Ncpus=${NCPUS})"
2222
# R -e "Sys.setenv(ARROW_PARQUET = 'ON', ARROW_WITH_SNAPPY = 'ON', ARROW_R_DEV = TRUE); devtools::install_version('arrow', version='11.0.0.3', repos='${CRAN}', Ncpus=${NCPUS})"
23-
R -e "Sys.setenv(ARROW_PARQUET = 'ON', ARROW_WITH_SNAPPY = 'ON', ARROW_R_DEV = TRUE); devtools::install_version('arrow', version='${PYARROW_VERSION}', repos='${CRAN}', Ncpus=${NCPUS})"
23+
# R -e "Sys.setenv(ARROW_PARQUET = 'ON', ARROW_WITH_SNAPPY = 'ON', ARROW_R_DEV = TRUE); remotes::install_version('arrow', version='${PYARROW_VERSION}', repos='${CRAN}', Ncpus=${NCPUS})"
24+
R -e "Sys.setenv(ARROW_PARQUET = 'ON', ARROW_WITH_SNAPPY = 'ON', ARROW_R_DEV = TRUE); install.packages('arrow', version='${PYARROW_VERSION}', repos='${CRAN}', Ncpus=${NCPUS})"
2425

2526
# these run into issues earlier in the install process so we install them here
2627
R -e "install.packages(c('systemfonts', 'textshaping', 'ragg', 'httpgd', 'tidyverse', 'svglite'), repos='${CRAN}', Ncpus=${NCPUS})"

files/setup-hadoop.sh

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/bash
2+
set -e
3+
4+
if [ ! -d "${HADOOP_HOME}" ]; then
5+
mkdir $HADOOP_HOME
6+
fi
7+
8+
curl -sL --retry 3 \
9+
"http://archive.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz" \
10+
| gunzip \
11+
| tar -x --strip-components=1 -C $HADOOP_HOME \
12+
&& rm -rf $HADOOP_HOME/share/doc \
13+
&& chown -R ${NB_USER} $HADOOP_HOME \
14+
&& mkdir "${HADOOP_HOME}/logs"

files/setup-quarto.sh

+3-2
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,19 @@ NCPUS=${NCPUS:--1}
1111
UBUNTU_VERSION=${UBUNTU_VERSION:-`lsb_release -sc`}
1212
CRAN=${CRAN:-https://cran.r-project.org}
1313
CRAN_SOURCE=${CRAN/"__linux__/$UBUNTU_VERSION/"/""}
14+
QUARTO_VERSION="1.4.506"
1415

1516
if [ "$(uname -m)" != "aarch64" ]; then
1617
# ln -fs /usr/lib/rstudio-server/bin/quarto/bin/quarto /usr/local/bin
1718
# need pre-release for inline python
1819
sudo apt-get update -qq && apt-get -y install gdebi-core
19-
wget https://github.com/quarto-dev/quarto-cli/releases/download/v1.4.376/quarto-1.4.376-linux-amd64.deb -O quarto.deb
20+
wget https://github.com/quarto-dev/quarto-cli/releases/download/v${QUARTO_VERSION}/quarto-${QUARTO_VERSION}-linux-amd64.deb -O quarto.deb
2021
sudo gdebi -n quarto.deb # adding -n to run non-interactively
2122

2223
else
2324
# need pre-release for inline python
2425
sudo apt-get update -qq && apt-get -y install gdebi-core
25-
wget https://github.com/quarto-dev/quarto-cli/releases/download/v1.4.376/quarto-1.4.376-linux-arm64.deb -O quarto.deb
26+
wget https://github.com/quarto-dev/quarto-cli/releases/download/v${QUARTO_VERSION}/quarto-${QUARTO_VERSION}-linux-arm64.deb -O quarto.deb
2627
sudo gdebi -n quarto.deb # adding -n to run non-interactively
2728
CRAN=$CRAN_SOURCE
2829
fi

files/setup-radiant.sh

-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ R -e "install.packages('igraph', repo='${CRAN}', Ncpus=${NCPUS})" \
5656
-e "remotes::install_github('radiant-rstats/radiant.multivariate', upgrade = 'never')" \
5757
-e "remotes::install_github('radiant-rstats/radiant', upgrade = 'never')" \
5858
-e "remotes::install_github('radiant-rstats/radiant.update', upgrade = 'never')" \
59-
-e "remotes::install_version('shiny', version='1.7.4.1', repos='${CRAN}', Ncpus=${NCPUS})" \
6059
-e "install.packages('duckdb', repo='${CRAN}', Ncpus=${NCPUS})"
6160

6261
rm -rf /tmp/downloaded_packages

files/supervisord.conf

+8-1
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,11 @@ command=sudo /usr/sbin/sshd -D
2222
stdout_logfile=/var/log/supervisor/%(program_name)s.log
2323
stderr_logfile=/var/log/supervisor/%(program_name)s.log
2424
startsecs=0
25-
autorestart=false
25+
autorestart=false
26+
27+
# [program:hadoop]
28+
# command=sudo /usr/bin/hadoop
29+
# stdout_logfile=/var/log/supervisor/%(program_name)s.log
30+
# stderr_logfile=/var/log/supervisor/%(program_name)s.log
31+
# startsecs=0
32+
# autorestart=false

install/rsm-msba-windows.md

+5-3
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,16 @@ wsl --set-default-version 2
4545
wsl --install -d Ubuntu-22.04
4646
```
4747

48-
> Important: Make sure to enter the same username and password you use to login to your computer
49-
50-
48+
> Important: Make sure to enter the same username and password you use to login to your computer. The username should **not** have any spaces or special characters.
5149
5250
Check your username for Windows and Ubuntu by executing the command below in both (1) a Windows PowerShell and (2) an Ubuntu terminal. The output in both cases should be the same.
5351

5452
```bash
5553
whoami
5654
```
5755

56+
> Important: If you see `root` as the username please review the discussion in **step 4** below. You will need to reset your username for WSL2.
57+
5858
Next, restart your computer and re-open PowerShell to check that Ubuntu is set as the default linux distribution:
5959

6060
```bash
@@ -308,6 +308,8 @@ For a more extensive example using Python see: <a href="https://github.com/radia
308308

309309
### Trouble shooting
310310

311+
If you see `root` as the username when you type `whoami` in an Ubuntu terminal you will need to reset your username for WSL2. Please review step 4 in the install process for more guidance.
312+
311313
If you cannot connect to postgresql it is most likely due to an issue with the docker volume that contains the data. The volume can become corrupted if the container is not properly stopped using `q + Enter` in the launch menu. To create a clean volume for postgres (1) stop the running container using `q + Enter`, (2) run the code below in a terminal, and (3) restart the container. If you are still having issues connecting to the postgresql server, please reach out for support through Piazza.
312314

313315
```bash

rsm-msba-arm/Dockerfile

+37-5
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
# python 3.11 version posted on 6-25-2023 (arm64)
66
# FROM jupyter/pyspark-notebook@sha256:b18721c3abe68c5316aa71c1d783c459a18968b29154ab235ca33cef374c3b9a
77
# 2023-09-18 version (arm64)
8-
FROM jupyter/pyspark-notebook@sha256:8b143afc893bb1838c608e6b3bc863d2a07e79c1ebac8a12c2f59b64bbeb195c
8+
# FROM jupyter/pyspark-notebook@sha256:8b143afc893bb1838c608e6b3bc863d2a07e79c1ebac8a12c2f59b64bbeb195c
9+
FROM quay.io/jupyter/pyspark-notebook@sha256:c6d64565bfd40b04ad649e31cea2d6a0c06cbed9b5c763b31e9d5523c8e28174
910

1011
LABEL Vincent Nijs "[email protected]"
1112

@@ -32,11 +33,13 @@ RUN apt-get update -qq && apt-get -y --no-install-recommends install \
3233
lsb-release \
3334
libcurl4-openssl-dev \
3435
git \
35-
netcat
36+
netcat \
37+
htop
3638

3739
ENV CMDSTAN_VERSION="2.33.1"
3840
ENV PANDAS_VERSION="2.1.1"
39-
ENV PYARROW_VERSION="13.0.0"
41+
# ENV PANDAS_VERSION="2.0.3" # pyspark image still using 2.0.3
42+
ENV PYARROW_VERSION="14.0.1"
4043
RUN mamba install --quiet --yes -c conda-forge \
4144
pandas=${PANDAS_VERSION} \
4245
cmdstan=${CMDSTAN_VERSION} \
@@ -79,6 +82,12 @@ RUN mamba install --quiet --yes -c conda-forge \
7982
bash_kernel \
8083
sympy \
8184
simpy \
85+
awscli \
86+
bokeh \
87+
dask-kubernetes \
88+
dask-ml \
89+
findspark \
90+
plotly \
8291
&& python -m bash_kernel.install
8392

8493
# causing issues with 1/12/2023 update
@@ -90,7 +99,7 @@ RUN chmod 755 setup.sh \
9099
&& rm setup.sh
91100

92101
# make system (conda) R the first choice
93-
ENV R_VERSION=4.3.1
102+
ENV R_VERSION=4.3.2
94103
# ENV R_VERSION=4.2.3
95104
ENV TERM=xterm
96105
ENV R_HOME=/opt/conda/lib/R
@@ -124,6 +133,7 @@ RUN pip install \
124133
xlsx2csv \
125134
jupysql \
126135
shiny \
136+
shinywidgets \
127137
pyrsm
128138

129139
# catboost # not available for arm64
@@ -290,6 +300,28 @@ RUN chmod +x setup.sh \
290300
&& ./setup.sh \
291301
&& rm setup.sh
292302

303+
# setup hadoop
304+
ENV JAVA_HOME "/usr/lib/jvm/java-17-openjdk-arm64/"
305+
ENV HADOOP_VERSION 3.3.4
306+
ENV HADOOP_HOME /opt/hadoop
307+
ENV HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
308+
COPY files/setup-hadoop.sh setup.sh
309+
RUN chmod +x setup.sh \
310+
&& ./setup.sh \
311+
&& rm setup.sh
312+
313+
# hadoop configuration
314+
ADD files/scalable_analytics/core-site.xml $HADOOP_HOME/etc/hadoop/
315+
ADD files/scalable_analytics/hdfs-site.xml $HADOOP_HOME/etc/hadoop/
316+
ADD files/scalable_analytics/init-dfs.sh /opt/hadoop/
317+
ADD files/scalable_analytics/start-dfs.sh /opt/hadoop/
318+
ADD files/scalable_analytics/stop-dfs.sh /opt/hadoop/
319+
ADD files/scalable_analytics/hadoop.sh /usr/bin/hadoop
320+
RUN chown -R ${NB_USER} ${HADOOP_HOME} \
321+
&& chmod 755 ${HADOOP_HOME}/*.sh \
322+
&& chmod 755 /usr/bin/hadoop
323+
ENV PATH $PATH:$HADOOP_HOME/bin
324+
293325
# setting up ssh connection
294326
RUN mkdir -p /var/run/sshd \
295327
&& ssh-keygen -A \
@@ -298,7 +330,7 @@ RUN mkdir -p /var/run/sshd \
298330
&& echo 'PubkeyAuthentication yes' >> /etc/ssh/sshd_config \
299331
&& chsh -s $(which zsh) ${NB_USER}
300332

301-
EXPOSE 22 8181 8282 8765 8989 8501 8000
333+
EXPOSE 22 4040 4041 8181 8282 8765 8989 8501 8000
302334
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
303335

304336
# Switch back to jovyan to avoid accidental container runs as root

rsm-msba-intel/Dockerfile

+37-6
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
# FROM jupyter/pyspark-notebook@sha256:cc4e4a6d19b0e05124e6340473346acfcd9bafb27bfcba153644810041a99c4d
77
# 2023-09-18 version (amd64)
88
FROM jupyter/pyspark-notebook@sha256:a66f0b98a323c274097fbbf4fbbacb3b90edcffea1aa279579b17b6f1003798c
9+
# same sha256 for amd64 and arm64
10+
FROM quay.io/jupyter/pyspark-notebook@sha256:c6d64565bfd40b04ad649e31cea2d6a0c06cbed9b5c763b31e9d5523c8e28174
911

1012
LABEL Vincent Nijs "[email protected]"
1113

@@ -32,11 +34,13 @@ RUN apt-get update -qq && apt-get -y --no-install-recommends install \
3234
lsb-release \
3335
libcurl4-openssl-dev \
3436
git \
35-
netcat
37+
netcat \
38+
htop
3639

3740
ENV CMDSTAN_VERSION="2.33.1"
3841
ENV PANDAS_VERSION="2.1.1"
39-
ENV PYARROW_VERSION="13.0.0"
42+
# ENV PANDAS_VERSION="2.0.3" # pyspark image still using 2.0.3
43+
ENV PYARROW_VERSION="14.0.1"
4044
RUN mamba install --quiet --yes -c conda-forge \
4145
pandas=${PANDAS_VERSION} \
4246
cmdstan=${CMDSTAN_VERSION} \
@@ -79,6 +83,12 @@ RUN mamba install --quiet --yes -c conda-forge \
7983
bash_kernel \
8084
sympy \
8185
simpy \
86+
awscli \
87+
bokeh \
88+
dask-kubernetes \
89+
dask-ml \
90+
findspark \
91+
plotly \
8292
&& python -m bash_kernel.install
8393

8494
# causing issues with 1/12/2023 update
@@ -90,7 +100,7 @@ RUN chmod 755 setup.sh \
90100
&& rm setup.sh
91101

92102
# make system (conda) R the first choice
93-
ENV R_VERSION=4.3.1
103+
ENV R_VERSION=4.3.2
94104
# ENV R_VERSION=4.2.3
95105
ENV TERM=xterm
96106
ENV R_HOME=/opt/conda/lib/R
@@ -124,6 +134,7 @@ RUN pip install \
124134
xlsx2csv \
125135
jupysql \
126136
shiny \
137+
shinywidgets \
127138
pyrsm
128139

129140
# catboost # not available for arm64
@@ -275,7 +286,6 @@ RUN pip install git+https://github.com/vnijs/jupyter-pgweb-proxy.git \
275286

276287
# packages need for radiant a reproducible analysis
277288
COPY files/setup-extra.sh setup.sh
278-
COPY files/setup-extra.sh setup.sh
279289
RUN chmod +x setup.sh \
280290
&& ./setup.sh \
281291
&& rm setup.sh
@@ -291,16 +301,37 @@ RUN chmod +x setup.sh \
291301
&& ./setup.sh \
292302
&& rm setup.sh
293303

304+
# setup hadoop
305+
ENV JAVA_HOME "/usr/lib/jvm/java-17-openjdk-arm64/"
306+
ENV HADOOP_VERSION 3.3.4
307+
ENV HADOOP_HOME /opt/hadoop
308+
ENV HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
309+
COPY files/setup-hadoop.sh setup.sh
310+
RUN chmod +x setup.sh \
311+
&& ./setup.sh \
312+
&& rm setup.sh
313+
314+
# hadoop configuration
315+
ADD files/scalable_analytics/core-site.xml $HADOOP_HOME/etc/hadoop/
316+
ADD files/scalable_analytics/hdfs-site.xml $HADOOP_HOME/etc/hadoop/
317+
ADD files/scalable_analytics/init-dfs.sh /opt/hadoop/
318+
ADD files/scalable_analytics/start-dfs.sh /opt/hadoop/
319+
ADD files/scalable_analytics/stop-dfs.sh /opt/hadoop/
320+
ADD files/scalable_analytics/hadoop.sh /usr/bin/hadoop
321+
RUN chown -R ${NB_USER} ${HADOOP_HOME} \
322+
&& chmod 755 ${HADOOP_HOME}/*.sh \
323+
&& chmod 755 /usr/bin/hadoop
324+
ENV PATH $PATH:$HADOOP_HOME/bin
325+
294326
# setting up ssh connection
295327
RUN mkdir -p /var/run/sshd \
296-
&& mkdir -p /var/run/sshd \
297328
&& ssh-keygen -A \
298329
&& echo 'PasswordAuthentication no' >> /etc/ssh/sshd_config \
299330
&& echo 'PermitRootLogin no' >> /etc/ssh/sshd_config \
300331
&& echo 'PubkeyAuthentication yes' >> /etc/ssh/sshd_config \
301332
&& chsh -s $(which zsh) ${NB_USER}
302333

303-
EXPOSE 22 8181 8282 8765 8989 8501 8000
334+
EXPOSE 22 4040 4041 8181 8282 8765 8989 8501 8000
304335
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
305336

306337
# Switch back to jovyan to avoid accidental container runs as root

0 commit comments

Comments
 (0)