Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SNOW-103] Create a streamlit app template #68

Merged
merged 32 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
4fc91ad
Initial commit for streamlit app template
jaymedina Jul 19, 2024
4bf59d4
example_secrets.toml
jaymedina Jul 19, 2024
df7a308
refactor main app script
jaymedina Jul 19, 2024
619d2c3
update example_secrets.toml
jaymedina Jul 19, 2024
efad37b
New queries.py. Working with snowflake data. Moved out sample data.
jaymedina Jul 19, 2024
40693fd
New widgets.py
jaymedina Jul 20, 2024
ad77817
New utils.py. Some reformatting of app.py
jaymedina Jul 22, 2024
d61dc38
Fixing imports
jaymedina Jul 22, 2024
387ffb6
Turn queries.py vars into global vars. Finish appy.py
jaymedina Jul 22, 2024
41ce43c
New tests/ and toolkit/ folder. Moving files
jaymedina Jul 22, 2024
dbd08c2
New requirements.txt. Small updates to app.py
jaymedina Jul 22, 2024
bb6e076
New Dockerfile
jaymedina Jul 22, 2024
76e86cb
Updated requirements.txt
jaymedina Jul 23, 2024
865eba4
Updated Dockerfile to use specific server address and port
jaymedina Jul 24, 2024
63818e5
Create README.md
jaymedina Jul 24, 2024
957d67e
Add examples
jaymedina Jul 24, 2024
5c262b9
New .dockerignore to prevent secrets.toml from being added to docker …
jaymedina Jul 24, 2024
5836a7e
Update README.md
jaymedina Jul 24, 2024
9c03639
Add docker-compose file
jaymedina Jul 24, 2024
b2017e3
Adding steps to "Launch your app"
jaymedina Jul 24, 2024
11e6044
Update EC2 instructions
jaymedina Jul 24, 2024
7207fde
Update to build your app section
jaymedina Jul 25, 2024
e53376e
.
jaymedina Jul 31, 2024
748a814
New test suite. Updated documentation
jaymedina Aug 3, 2024
02bf472
Final updates to README
jaymedina Aug 3, 2024
3ecf2c7
Ignore pycache
jaymedina Aug 3, 2024
68cd091
Adding pre-commit hook for black and isort
jaymedina Aug 13, 2024
c741ba1
Introduce SYNID global var for queries.py
jaymedina Aug 13, 2024
a72097b
Updated example in README.md
jaymedina Aug 13, 2024
903a3df
Separate .gitignore
jaymedina Aug 13, 2024
36aaa59
Untracked __pycache__ directories as specified in .gitignore
jaymedina Aug 13, 2024
d96317e
Updated syntax in .gitignore
jaymedina Aug 13, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions streamlit_template/.pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
repos:
- repo: https://github.com/pycqa/isort
rev: 5.13.2
hooks:
- id: isort
name: isort (python)

- repo: https://github.com/psf/black
rev: 24.3.0
hooks:
- id: black
language_version: python3
8 changes: 3 additions & 5 deletions streamlit_template/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,11 @@ QUERY_NUMBER_OF_FILES = """
select
count(*) as number_of_files
from
node_latest
synapse_data_warehouse.synapse.node_latest
where
project_id = '53214489'
and
node_type = 'file' // we want files, not folders or any other entity
and
annotations is not NULL;
node_type = 'file';
"""
```

Expand Down Expand Up @@ -114,7 +112,7 @@ as you see fit.
### 6. Dockerize your Application

- Update the `requirements.txt` file with the packages used in any of the scripts above.
- Ensure you have pushed all your changes to your fork of the repository that you are working in.
- Ensure you have pushed all your changes to your fork of the repository that you are working in (remember not to commit your `secrets.toml` file).
jaymedina marked this conversation as resolved.
Show resolved Hide resolved
- **_(Optional)_** You can choose to push a Docker image to the GitHub Container Registry to pull it directly from the container registry when ready to deploy.
For instructions on how to deploy your Docker image to the GitHub Container Registry, [see here](https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-container-registry).

Expand Down
Empty file.
35 changes: 23 additions & 12 deletions streamlit_template/app.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import numpy as np
import streamlit as st

from toolkit.queries import (QUERY_ENTITY_DISTRIBUTION, QUERY_PROJECT_DOWNLOADS,
QUERY_PROJECT_SIZES, QUERY_UNIQUE_USERS)
from toolkit.queries import (
QUERY_ENTITY_DISTRIBUTION,
QUERY_PROJECT_DOWNLOADS,
QUERY_PROJECT_SIZES,
QUERY_UNIQUE_USERS,
)
from toolkit.utils import get_data_from_snowflake
from toolkit.widgets import (plot_download_sizes, plot_unique_users_trend)

from toolkit.widgets import plot_download_sizes, plot_unique_users_trend

# Custom CSS for styling
with open('style.css') as f:
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
with open("style.css") as f:
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)


def main():

Expand All @@ -21,14 +24,21 @@ def main():

# 2. Transform the data as needed
convert_to_gib = 1024 * 1024 * 1024
project_sizes = dict(PROJECT_ID=list(project_sizes_df['PROJECT_ID']), TOTAL_CONTENT_SIZE=list(project_sizes_df['TOTAL_CONTENT_SIZE']))
total_data_size = sum(project_sizes['TOTAL_CONTENT_SIZE']) #round(sum(project_sizes['TOTAL_CONTENT_SIZE']) / convert_to_gib, 2)
average_project_size = round(np.mean(project_sizes['TOTAL_CONTENT_SIZE']) / convert_to_gib, 2)
project_sizes = dict(
PROJECT_ID=list(project_sizes_df["PROJECT_ID"]),
TOTAL_CONTENT_SIZE=list(project_sizes_df["TOTAL_CONTENT_SIZE"]),
)
total_data_size = sum(
project_sizes["TOTAL_CONTENT_SIZE"]
) # round(sum(project_sizes['TOTAL_CONTENT_SIZE']) / convert_to_gib, 2)
average_project_size = round(
np.mean(project_sizes["TOTAL_CONTENT_SIZE"]) / convert_to_gib, 2
)

# 3. Format the app, and visualize the data with your widgets in widgets.py
# -------------------------------------------------------------------------
# Row 1 -------------------------------------------------------------------
st.markdown('### Monthly Overview :calendar:')
st.markdown("### Monthly Overview :calendar:")
col1, col2, col3 = st.columns([1, 1, 1])
col1.metric("Total Storage Occupied", f"{total_data_size} GB", "7.2 GB")
col2.metric("Avg. Project Size", f"{average_project_size} GB", "8.0 GB")
Expand All @@ -45,5 +55,6 @@ def main():
st.markdown("### Entity Trends :pencil:")
st.dataframe(entity_distribution_df)


if __name__ == "__main__":
main()
main()
3 changes: 3 additions & 0 deletions streamlit_template/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
black==24.3.0
isort==5.13.2
numpy==1.26.3
streamlit==1.36.0
pandas==2.2.2
plotly==5.22.0
pytest==8.3.2
pre-commit==3.6.0
snowflake-connector-python==3.9.1
snowflake-snowpark-python==1.15.0
Empty file.
10 changes: 8 additions & 2 deletions streamlit_template/tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,18 @@
from streamlit.testing.v1 import AppTest

# Ensure that the base directory is in PYTHONPATH so ``toolkit`` and other tools can be found
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

# The timeout limit to wait for the app to load before shutdown ( in seconds )
DEFAULT_TIMEOUT = 30


@pytest.fixture(scope="module")
def app():
return AppTest.from_file('app.py', default_timeout=DEFAULT_TIMEOUT).run() # Point to your main Streamlit app file
return AppTest.from_file(
"app.py", default_timeout=DEFAULT_TIMEOUT
).run() # Point to your main Streamlit app file


def test_monthly_overview(app):
"""
Expand All @@ -44,6 +48,7 @@ def test_monthly_overview(app):
assert avg_project_size.label == "Avg. Project Size"
assert annual_cost.label == "Annual Cost"


def test_plotly_charts(app):
"""Ensure both plotly charts are being displayed."""

Expand All @@ -52,6 +57,7 @@ def test_plotly_charts(app):
assert plotly_charts is not None
assert len(plotly_charts) == 2


def test_dataframe(app):
"""Ensure that the dataframe is being displayed."""

Expand Down
2 changes: 1 addition & 1 deletion streamlit_template/toolkit/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
import toolkit
import toolkit
7 changes: 4 additions & 3 deletions streamlit_template/toolkit/queries.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
QUERY_ENTITY_DISTRIBUTION = """
SYNID = 20446927
QUERY_ENTITY_DISTRIBUTION = f"""
with htan_projects as (
// select distinct cast(replace(NF.projectid, 'syn', '') as INTEGER) as project_id from sage.portal_raw.HTAN
select
Expand All @@ -7,7 +8,7 @@
synapse_data_warehouse.synapse.node_latest,
lateral flatten(input => node_latest.scope_ids) scopes
where
id = 20446927
id = {SYNID}
)
SELECT
node_type,
Expand Down Expand Up @@ -163,4 +164,4 @@
ORDER BY
project_id,
access_month;
"""
"""
4 changes: 3 additions & 1 deletion streamlit_template/toolkit/utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import streamlit as st
from snowflake.snowpark import Session


@st.cache_resource
def connect_to_snowflake():
session = Session.builder.configs(st.secrets.snowflake).create()
return session


@st.cache_data
def get_data_from_snowflake(query=""):
session = connect_to_snowflake()
node_latest = session.sql(query).to_pandas()
return node_latest
return node_latest
Loading