Skip to content

Commit

Permalink
updated slurm for reservoir container
Browse files Browse the repository at this point in the history
  • Loading branch information
espirado committed Jun 30, 2023
1 parent 54e12bc commit 7695e8b
Show file tree
Hide file tree
Showing 33 changed files with 969 additions and 9 deletions.
30 changes: 30 additions & 0 deletions Dockerfile.base
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
FROM rocker/geospatial:4.3.0
MAINTAINER "Noam Ross" [email protected]

ENV PYTHON_VENV_PATH=/opt/venv/reticulate
RUN /rocker_scripts/install_python.sh

COPY scripts/install_system_tools.sh /reservoir_scripts/install_system_tools.sh
RUN /reservoir_scripts/install_system_tools.sh

COPY scripts/install_eigen_stan_inla.sh /reservoir_scripts/install_eigen_stan_inla.sh
RUN /reservoir_scripts/install_eigen_stan_inla.sh

COPY scripts/install_r_pkgs_compiled.sh /reservoir_scripts/install_r_pkgs_compiled.sh
RUN /reservoir_scripts/install_r_pkgs_compiled.sh

COPY scripts/install_r_pkgs_other.sh /reservoir_scripts/install_r_pkgs_other.sh
RUN /reservoir_scripts/install_r_pkgs_other.sh

ENV TZ=America/New_York
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime \
&& echo $TZ > /etc/timezone \
&& dpkg-reconfigure --frontend noninteractive tzdata

COPY /scripts/server_config.sh /reservoir_scripts/server_config.sh
COPY config /reservoir_config
RUN /reservoir_scripts/server_config.sh

RUN locale-gen en_US.UTF-8

EXPOSE 22 8787
7 changes: 0 additions & 7 deletions Reservoir /Dockerfile.gpu → Dockerfile.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,6 @@ MAINTAINER "Noam Ross" [email protected]
ENV CUDAToolkit_ROOT=/usr/local/cuda
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64

# Install SLURM client
RUN apt-get update && apt-get install -y slurm-client

# Add SLURM configuration file
ADD slurm.conf /etc/slurm-llnl/slurm.conf

# All your previous installation scripts and configurations
COPY scripts/install_xgboost.sh /reservoir_scripts/install_xgboost.sh
RUN /reservoir_scripts/install_xgboost.sh

Expand Down
21 changes: 19 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,19 @@
# slurm-containerized-hpc-environment
The goal of this project is to outline the strategies for integrating Simple Linux Utility for Resource Management(SLURM) into our existing High-Performance Computing(HPC) platform.
# Reservoir: The EHA Data Science Container

A docker image for EHA's modeling and analytics work servers.

[![GitHub Actions CI](https://github.com/ecohealthalliance/reservoir/actions/workflows/build-containers.yml/badge.svg)](https://github.com/ecohealthalliance/reservoir/actions/workflows/build-containers.yml)
[![license](https://img.shields.io/badge/license-GPLv2-blue.svg)](https://opensource.org/licenses/GPL-2.0)



*reservoir* is an image built for the modeling and analytics workflow at [EcoHealth Alliance](ecohealthalliance.org). It build on top of the [rocker project](https://www.rocker-project.org/) `geospatial` and GPU-enabled `ml-verse` images and adds commonly used other R packages, system tools, SSH and mosh access, and GPU-compiled tools.

Get the images at

```
docker pull ghcr.io/ecohealthalliance/reservoir:base # The primary image
docker pull ghcr.io/ecohealthalliance/reservoir:gpu # for hosts with GPUs
```

Empty file removed Reservoir /Dockerfile.base
Empty file.
12 changes: 12 additions & 0 deletions build-and-push.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash

set -e
set -x

for tag in base gpu
do
image=ecohealthalliance/reservoir:$tag
time docker pull $image > /dev/null
time docker build -f Dockerfile.$tag --cache-from $image -t $image .
docker push $image > /dev/null
done
11 changes: 11 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

set -e
set -x

for tag in base gpu
do
image=ecohealthalliance/reservoir:$tag
# time docker pull $image > /dev/null
time docker build -f Dockerfile.$tag --cache-from $image -t $image .
done
Binary file added config/EHA_Logo_369C.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 13 additions & 0 deletions config/Makevars.site
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
NPROC=$(shell nproc)
NPROC_MIN=$(shell $(($NPROC>4?4:$NPROC)))
MAKE=nice -n 10 make
MAKEFLAGS+="-j $(NPROC_MIN)"
VER=
CCACHE=
CC=$(CCACHE) gcc$(VER)
CXX=$(CCACHE) g++$(VER)
CXX11=$(CCACHE) g++$(VER)
CXX14=$(CCACHE) g++$(VER)
FC=$(CCACHE) gfortran$(VER)
F77=$(CCACHE) gfortran$(VER)

3 changes: 3 additions & 0 deletions config/Renviron.site
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
IS_RESERVOIR=true
TEMPDIR=~/.tmp
R_LIBS_USER=~/R/%p-library/%v
52 changes: 52 additions & 0 deletions config/Rprofile.site
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# R Startup configuration

## Set CRAN mirror
options(repos = c(
CRAN = "https://packagemanager.rstudio.com/all/__linux__/jammy/latest"
))

## Reduce raster tempfile generation
options(rasterTmpTime = 1, rasterMaxMemory = 1e10)

## Set temp directory inside user folders
if(!dir.exists(path.expand("~/tmp"))) dir.create(path.expand("~/tmp"))

if("unixtools" %in% utils::installed.packages()) {
unixtools::set.tempdir(path.expand("~/tmp"))
}

## Make sure users have home R libraries
if(!dir.exists(Sys.getenv("R_LIBS_USER"))) dir.create(Sys.getenv("R_LIBS_USER"), recursive = TRUE)
.libPaths(Sys.getenv("R_LIBS_USER"))

# Configure httr to perform out-of-band authentication if HTTR_LOCALHOST
# is not set since a redirect to localhost may not work depending upon
# where this Docker container is running.
if(is.na(Sys.getenv("HTTR_LOCALHOST", unset=NA))) {
options(httr_oob_default = TRUE)
}

## Set to single threaded unless user has environment variables
## set to the contrary

local({
if(require("RhpcBLASctl", quietly=TRUE)) {
if(Sys.getenv("OPENBLAS_NUM_THREADS")=="" &&
Sys.getenv("OMP_NUM_THREADS")=="") {
RhpcBLASctl::blas_set_num_threads(1)
}
}
})

## Startup message
#.First <- function() {
# ("\014")
# message("Welcome to *reservoir*, the EcoHealth Alliance Modeling &\n",
# "Analytics machine image. Please read our users' guide at\n",
# "https://github.com/ecohealthalliance/eha-servers.\n",
# "\n",
# R.version$version.string, "\n",
# "Cores available: ", parallel::detectCores(),"\n"
# )
#
#}
54 changes: 54 additions & 0 deletions config/bash_settings.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# set a fancy prompt (non-color, overwrite the one in /etc/profile)
#PS1='${debian_chroot:+($debian_chroot)}\u@\h:\w\$ '

# Commented out, don't overwrite xterm -T "title" -n "icontitle" by default.
# If this is an xterm set the title to user@host:dir
#case "$TERM" in
#xterm*|rxvt*)
# PROMPT_COMMAND='echo -ne "\033]0;${USER}@${HOSTNAME}: ${PWD}\007"'
# ;;
#*)
# ;;
#esac

# enable bash completion in interactive shells
if ! shopt -oq posix; then
if [ -f /usr/share/bash-completion/bash_completion ]; then
. /usr/share/bash-completion/bash_completion
elif [ -f /etc/bash_completion ]; then
. /etc/bash_completion
fi
fi

# if the command-not-found package is installed, use it
if [ -x /usr/lib/command-not-found -o -x /usr/share/command-not-found/command-not-found ]; then
function command_not_found_handle {
# check because c-n-f could've been removed in the meantime
if [ -x /usr/lib/command-not-found ]; then
/usr/lib/command-not-found -- "$1"
return $?
elif [ -x /usr/share/command-not-found/command-not-found ]; then
/usr/share/command-not-found/command-not-found -- "$1"
return $?
else
printf "%s: command not found\n" "$1" >&2
return 127
fi
}
fi

# Show a color prompt with path and git status
parse_git_branch() {
git branch 2> /dev/null | sed -e '/^[^*]/d' -e 's/* \(.*\)/ (\1)/'
}
export PS1="\u@\h \[\033[32m\]\w\[\033[33m\]\$(parse_git_branch)\[\033[00m\]$ "

# Auto-login to byobu by default
#export LC_BYOBU=1

# Essential gurobi variables
export GUROBI_HOME="/opt/gurobi752/linux64"
export PATH="${PATH}:${GUROBI_HOME}/bin"
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${GUROBI_HOME}/lib"
export EDITOR=/usr/bin/micro
export GPG_TTY=$(tty)
37 changes: 37 additions & 0 deletions config/byobu_status
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# status - Byobu's default status enabled/disabled settings
#
# Override these in $BYOBU_CONFIG_DIR/status
# where BYOBU_CONFIG_DIR is XDG_CONFIG_HOME if defined,
# and $HOME/.byobu otherwise.
#
# Copyright (C) 2009-2011 Canonical Ltd.
#
# Authors: Dustin Kirkland <[email protected]>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

# Status beginning with '#' are disabled.

# Screen has two status lines, with 4 quadrants for status
screen_upper_left="color"
screen_upper_right="color whoami hostname ip_address menu"
screen_lower_left="color logo distro release #arch session"
screen_lower_right="color network #disk_io custom #entropy raid reboot_required updates_available #apport #services #mail users uptime #ec2_cost #rcs_cost #fan_speed #cpu_temp battery wifi_quality #processes load_average cpu_count cpu_freq memory #swap #disk #time_utc date time"

# Tmux has one status line, with 2 halves for status
tmux_left=" #logo #distro #release #arch session"
# You can have as many tmux right lines below here, and cycle through them using Shift-F5
#tmux_right=" #network disk_io #custom #entropy raid #reboot_required #updates_available #apport #services #mail users #uptime #ec2_cost #rcs_cost #fan_speed cpu_temp #battery #wifi_quality processes load_average cpu_count cpu_freq memory swap #disk whoami #hostname #ip_address #time_utc date time"
tmux_right=" #network #disk_io #custom #entropy #raid #reboot_required #updates_available #apport #services #mail users #uptime #ec2_cost #rcs_cost #fan_speed #cpu_temp #battery #wifi_quality #processes cpu_count load_average #cpu_freq memory #swap disk whoami hostname #ip_address #time_utc date time"
#tmux_right="network #disk_io custom #entropy raid reboot_required updates_available #apport #services #mail users uptime #ec2_cost #rcs_cost #fan_speed #cpu_temp battery wifi_quality #processes load_average cpu_count cpu_freq memory #swap #disk #whoami #hostname ip_address #time_utc date time"
#tmux_right="#network disk_io #custom entropy #raid #reboot_required #updates_available #apport #services #mail #users #uptime #ec2_cost #rcs_cost fan_speed cpu_temp #battery #wifi_quality #processes #load_average #cpu_count #cpu_freq #memory #swap whoami hostname ip_address #time_utc disk date time"
78 changes: 78 additions & 0 deletions config/byobu_statusrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# statusrc - Byobu's default status configurations
#
# Override these in $BYOBU_CONFIG_DIR/statusrc
# where BYOBU_CONFIG_DIR is XDG_CONFIG_HOME if defined,
# and $HOME/.byobu otherwise.
#
# Copyright (C) 2009-2011 Canonical Ltd.
#
# Authors: Dustin Kirkland <[email protected]>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

# Configurations that you can override; if you leave these commented out,
# Byobu will try to auto-detect them.

# This should be auto-detected for most distro, but setting it here will save
# some call to lsb_release and the like.
#BYOBU_DISTRO=Ubuntu

# Default: depends on the distro (which is either auto-detected, either set
# via $DISTRO)
#LOGO="\o/"

# Abbreviate the release to N characters
# By default, this is disabled. But if you set RELEASE_ABBREVIATED=1
# and your lsb_release is "precise", only "p" will be displayed
#RELEASE_ABBREVIATED=1

# Default: /
MONITORED_DISK=/home

# Minimum disk throughput that triggers the notification (in kB/s)
# Default: 50
#DISK_IO_THRESHOLD=50

# Default: eth0
#MONITORED_NETWORK=eth0

# Unit used for network throughput (either bits per second or bytes per second)
# Default: bits
#NETWORK_UNITS=bytes

# Minimum network throughput that triggers the notification (in kbit/s)
# Default: 20
#NETWORK_THRESHOLD=20

# You can add an additional source of temperature here
#MONITORED_TEMP=/proc/acpi/thermal_zone/THM0/temperature

# Default: C
#TEMP=F

#SERVICES="eucalyptus-nc|NC eucalyptus-cloud|CLC eucalyptus-walrus eucalyptus-cc|CC eucalyptus-sc|SC"

#FAN=$(find /sys -type f -name fan1_input | head -n1)

# You can set this to 1 to report your external/public ip address
# Default: 0
#IP_EXTERNAL=0

# The users notification normally counts ssh sessions; set this configuration to '1'
# to instead count number of distinct users logged onto the system
# Default: 0
#USERS_DISTINCT=0

# Set this to zero to hide seconds int the time display
# Default 1
#TIME_SECONDS=0
29 changes: 29 additions & 0 deletions config/byoburc
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
###############################################################################
# Load:
# * the stock byobu profile
# * any windows
# * and the local byoburc (instead of .screenrc)
# Used at startup but not profile refresh
#
# Copyright (C) 2009-2011 Canonical Ltd.
#
# Authors: Dustin Kirkland <[email protected]>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
###############################################################################

set -g mouse on

source $BYOBU_CONFIG_DIR/profile
source $BYOBU_WINDOWS
source $BYOBU_CONFIG_DIR/.screenrc
6 changes: 6 additions & 0 deletions config/ccache.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
max_size = 5.0G
# important for R CMD INSTALL *.tar.gz as tarballs are expanded freshly -> fresh ctime
sloppiness = include_file_ctime
# also important as the (temp.) directory name will differ
hash_dir = false
# cache_dir = /shared/ccache_shared_cache
11 changes: 11 additions & 0 deletions config/motd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/sh

B="\033[34m"
R="\033[31m"
S="\033[34;1m"
N="\033[0m"

echo "$B Welcome to$S reservoir$N$B, the EcoHealth Alliance Modeling &"
echo "$B Analytics machine image. Please read our users' guide at"
echo "$B https://github.com/ecohealthalliance/eha-servers."
echo "$N"
4 changes: 4 additions & 0 deletions config/rserver.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# R Studio server configuration file. Put in /etc/rstudio/rserver.conf

auth-timeout-minutes=0
auth-stay-signed-in-days=30
Loading

0 comments on commit 7695e8b

Please sign in to comment.