-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexplore_page.py
75 lines (59 loc) · 2.18 KB
/
explore_page.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
def shorten_categories(categories,cutoff):
categorical_map={}
for i in range(len(categories)):
if categories.values[i]>=cutoff:
categorical_map[categories.index[i]]=categories.index[i]
else:
categorical_map[categories.index[i]]='Other'
return categorical_map
def clean_experience(x):
if x== 'More than 50 years':
return 50
if x=='Less than 1 year':
return 0.5
return float(x)
def clean_education(x):
if "Bachelor’s degree" in x:
return "Bachelor’s degree"
if "Master’s degree" in x:
return "Master’s degree"
if "Professional degree" in x:
return "Post grad"
return "Less than a Bachelors"
@st.cache_data
def load_data():
df=pd.read_csv("survey_results_public.csv")
df=df[["Country","EdLevel","YearsCodePro","Employment","ConvertedCompYearly"]]
df=df.rename({"ConvertedCompYearly":"Salary"},axis=1)
df=df[df["Salary"].notnull()]
df = df.dropna()
df=df[df["Employment"]=="Employed, full-time"]
df=df.drop("Employment",axis=1)
country_map=shorten_categories(df.Country.value_counts(),400)
df['Country']=df['Country'].map(country_map)
df=df[df["Salary"]<=250000]
df=df[df["Salary"]>=10000]
df=df[df['Country']!='Other']
df['YearsCodePro']=df['YearsCodePro'].apply(clean_experience)
df['EdLevel'] = df['EdLevel'].apply(clean_education)
return df
df=load_data()
def show_explore_page():
st.title("Explore Software Engineer Salaries")
st.write("""
### Stack Overflow Developer Survey 2023
""")
data=df["Country"].value_counts()
fig1, ax1=plt.subplots()
ax1.pie(data,labels=data.index,autopct="%1.1f%%",shadow=True,startangle=230)
st.write("""#### Number of Data from different countries""")
st.pyplot(fig1)
st.write("""#### Mean Salary Based On Country""")
data=df.groupby(["Country"])["Salary"].mean().sort_values(ascending=True)
st.bar_chart(data)
st.write("""#### Mean Salary Based On Experience""")
data=df.groupby(["YearsCodePro"])["Salary"].mean().sort_values(ascending=True)
st.line_chart(data)