-
Notifications
You must be signed in to change notification settings - Fork 1
/
rate.py
105 lines (86 loc) · 4.25 KB
/
rate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import json
import argparse
import sys
import matplotlib.pyplot as plt
def initialize_language_statistics_dictionary(languages):
statistics = {}
for language in languages:
statistics[language] = {'vacancy_count': 0, 'payment_sum': 0}
return statistics
def is_language_found_in_text(language_synonym_list, text):
for synonym in language_synonym_list:
if synonym.lower() in text.lower():
return True
return False
def determine_vacancy_languages(vacancy, language_search_keywords):
languages_mentioned = []
for language, keywords in language_search_keywords.items():
detected_in_title = is_language_found_in_text(keywords, vacancy['profession'])
detected_in_summary = False
if vacancy['candidat'] is not None:
detected_in_summary = is_language_found_in_text(keywords, vacancy['candidat'])
if not (detected_in_title or detected_in_summary):
continue
languages_mentioned.append(language)
return languages_mentioned
def get_language_statistics(vacancy_list):
# programming languages mentioned in SuperJob research
# https://www.superjob.ru/research/articles/111800/samye-vysokie-zarplaty-v-sfere-it/
language_search_keywords = { 'Java': ['Java'],
'Python': ['Python'],
'C/C++': ['C++'],
'Objective-C': ['Objective-C', 'Obj-C'],
'C#': ['C#'],
'PHP': ['PHP'],
'JavaScript': ['JavaScript', 'JS'],
'Ruby': ['Ruby'],
'Delphi': ['Delphi'],
'Perl': ['Perl'],
}
stats = initialize_language_statistics_dictionary(language_search_keywords.keys())
for vacancy in vacancy_list:
vacancy_languages = determine_vacancy_languages(vacancy, language_search_keywords)
for language in vacancy_languages:
stats[language]['vacancy_count'] += 1
stats[language]['payment_sum'] += vacancy['payment']
for counters in stats.values():
if counters['vacancy_count'] == 0:
counters['average_payment'] = 0
continue
counters['average_payment'] = counters['payment_sum'] / counters['vacancy_count']
return stats
def print_statistics_for_each_language(language_statistics, outfile):
for language, stats in sorted(language_statistics.items()):
outfile.write('Name: %s\n' % language)
outfile.write(' Number of vacancies: %d\n' % stats['vacancy_count'])
outfile.write(' Average payment: %d\n' % stats['average_payment'])
def show_statistics_histogram(statistics):
bar_coordinates = range(len(sorted(statistics)))
language_names = [name for name in sorted(statistics)]
average_payments = [statistics[name]['average_payment'] for name in language_names]
plt.figure(figsize=(12, 7))
plt.bar(bar_coordinates, average_payments, tick_label=language_names, align='center')
plt.figure(1).canvas.set_window_title('language salaries')
plt.ylabel('Average salary')
plt.xlabel('Language name')
plt.title('Comparison of average salaries among different programming languages')
plt.show()
def get_cli_arguments():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--infile', type=argparse.FileType('r'),
default=sys.stdin,
help='input JSON file, stdin by default')
parser.add_argument('-o', '--outfile', type=argparse.FileType('w'),
default=sys.stdout,
help='output file, stdout by default')
parser.add_argument('-g', '--graph', action='store_true',
help='in addition to text output, provide '\
'graphical representation of the data')
return parser.parse_args()
if __name__ == '__main__':
args = get_cli_arguments()
vacancies = json.load(args.infile)
stats = get_language_statistics(vacancies)
print_statistics_for_each_language(stats, args.outfile)
if args.graph:
show_statistics_histogram(stats)