-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathgutenwords_0.py
executable file
·49 lines (38 loc) · 1.29 KB
/
gutenwords_0.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env python3
"""
A very simple script returning the top N words in a text file
Arguments:
- n
- filename
"""
import sys
import os
# The program requires two arguments: let's check we got them
if len(sys.argv) != 3:
print("Usage: python3 words.py n filename")
print("ERROR: Supply two arguments")
exit(1)
# Retrieve the two parameters from the command line using sys.argv
filename, n = sys.argv[1], int(sys.argv[2])
# Check that the file exists
if not os.path.exists(filename):
print(f"ERROR: File {filename} not found")
exit(2)
# Read the file
with open(filename, mode="r", encoding="latin-1") as f:
text = f.read()
# Split the text into words
words = text.split()
# Create a dictionary of word:count
word_counts = {}
for word in words:
# TODO: We can consider doing some cleaning of the words here (e.g. stripping punctuation)
if word not in word_counts:
word_counts[word] = 0
word_counts[word] += 1
# Sort the dictionary by value: see for example https://www.freecodecamp.org/news/sort-dictionary-by-value-in-python/
sorted_word_counts = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)
# Print the top n words
for word, count in sorted_word_counts[:n]:
frequency = count / len(words) * 100
print(f"{word}\t{count}\t{frequency:.2f}%")