For anyone studying Japanese, you might like this. I asked CHAT GPT to rewrite the code. Now it outputs all the Kanji in the Anki deck, writes how many times that kanji appears, then lists them all in descending order.
import sys
import re
def main():
# set input and output files
input_file = "kanji.txt"
output_file = "output.txt"
# read input file
with open(input_file, "r", encoding="utf-8") as f:
text = f.read()
# remove kanji within <div id=tag> tags
text = re.sub(r'<div id=tag>.*?</div>', '', text, flags=re.DOTALL)
# count kanji
kanji_count = {}
for char in text:
if '\u4e00' <= char <= '\u9fff':
if char in kanji_count:
kanji_count[char] += 1
else:
kanji_count[char] = 1
# sort kanji by frequency
kanji_freq = [(kanji, freq) for kanji, freq in kanji_count.items()]
kanji_freq.sort(key=lambda x: x[1], reverse=True)
# write results to output file
with open(output_file, "w", encoding="utf-8") as f:
f.write(f"Total Unique Kanji: {len(kanji_freq)}\n\n")
for kanji, freq in kanji_freq:
f.write(f"{kanji}: {freq}\n")
if __name__ == "__main__":
if len(sys.argv) == 1:
main()
else:
print("Usage: python kanji.py")
name the script kanji.py
name the exported list of cards kanji.txt
it outputs to output.txt
kanji appearing in tags are not included
if you dont know how to run the script, chat gpt taught me when i asked