main compiled + tests passed

2025-09-25 14:54:05 +02:00 · 2025-09-25 14:54:05 +02:00 · a6e6f7405f
commit a6e6f7405f
parent 0ac6024b63
3 changed files with 57 additions and 4 deletions
--- a/pycache/wordfreq.cpython-313.pyc
+++ b/pycache/wordfreq.cpython-313.pyc
--- a/topmost.py
+++ b/topmost.py
@ -0,0 +1,32 @@
 import sys
 import wordfreq
 import urllib.request
 def main():
    stop_word_file = sys.argv[1]
    word_file = sys.argv[2]
    amount_of_words = int(sys.argv[3])
    if "https" in word_file:
        response = urllib.request.urlopen(word_file)
        lines = response.read().decode("utf8").splitlines()
    else:
        inp_file = open(word_file, encoding="utf-8")
        lines = inp_file.readlines()
        inp_file.close()
    tokenized = wordfreq.tokenize(lines)
    stop_word_lines = open(stop_word_file, encoding="utf-8")
    stop_words = stop_word_lines.read().split("\n")
    stop_word_lines.close()
    counted_words = wordfreq.countWords(tokenized, stop_words)
    wordfreq.printTopMost(counted_words, amount_of_words)
 main()
--- a/wordfreq.py
+++ b/wordfreq.py
@ -5,7 +5,7 @@ class CharType(Enum):
 	NONE = 0
 	DIGIT = 1
 	ALPHA = 2
-	SPACE = 3
+	WHITESPACE = 3
 	SYMBOL = 4
@ -14,8 +14,8 @@ def char_type(char: str) -> CharType:
 		return CharType.DIGIT
 	elif char.isalpha():
 		return CharType.ALPHA
-	elif char == " ":
+	elif char == " " or char == "	":
-		return CharType.SPACE
+		return CharType.WHITESPACE
 	return CharType.SYMBOL
@ -27,7 +27,7 @@ def tokenize(lines: list[str]) -> list[str]:
 		for i, char in enumerate(line.strip().lower()):
 			if len(current_word) == 0:
 				ct = char_type(char)
-				if ct == CharType.SPACE:
+				if ct == CharType.WHITESPACE:
 					continue
 				if ct == CharType.SYMBOL:
 					result.append(char)
@ -38,3 +38,24 @@ def tokenize(lines: list[str]) -> list[str]:
 				result.append("".join(current_word))
 				current_word = []
 	return result
 def countWords(words, stopWords = []):
 	dict = {}
 	for word in words:
 		if word not in stopWords:
 			if word in dict.keys():
 				dict[word] += 1
 			else:
 				dict[word] = 1
 	return(dict)
 def printTopMost(frequencies,n):
    list = []
    for word,freq in frequencies.items():
        list.append((word,freq))
    list = sorted(list, key=lambda x: -x[1])
    for i in range (0, n):
        if len(list)-1 >= i:
 	        print(list[i][0].ljust(19),str(list[i][1]).rjust(5))