diff --git a/__pycache__/wordfreq.cpython-312.pyc b/__pycache__/wordfreq.cpython-312.pyc
new file mode 100644
index 0000000..9345f56
Binary files /dev/null and b/__pycache__/wordfreq.cpython-312.pyc differ
diff --git a/tokenize.py b/tokenize.py
deleted file mode 100644
index e69de29..0000000
diff --git a/wordfreq.py b/wordfreq.py
new file mode 100644
index 0000000..c2b5016
--- /dev/null
+++ b/wordfreq.py
@@ -0,0 +1,40 @@
+from enum import Enum
+
+
+class CharType(Enum):
+	NONE = 0
+	DIGIT = 1
+	ALPHA = 2
+	SPACE = 3
+	SYMBOL = 4
+
+
+def char_type(char: str) -> CharType:
+	if char.isdigit():
+		return CharType.DIGIT
+	elif char.isalpha():
+		return CharType.ALPHA
+	elif char == " ":
+		return CharType.SPACE
+	return CharType.SYMBOL
+
+
+def tokenize(lines: list[str]) -> list[str]:
+	result = []
+	for line in lines:
+		word_type = CharType.NONE
+		current_word = []
+		for i, char in enumerate(line.lower()):
+			if len(current_word) == 0:
+				ct = char_type(char)
+				if ct == CharType.SPACE:
+					continue
+				if ct == CharType.SYMBOL:
+					result.append(char)
+					continue
+				word_type = ct
+			current_word.append(char)
+			if i+1 >= len(line) or word_type != char_type(line[i+1]):
+				result.append("".join(current_word))
+				current_word = []
+	return result