From 1b3aed2e15e17b3efb5951609e511c249ea2dc70 Mon Sep 17 00:00:00 2001
From: arne314 <73391160+arne314@users.noreply.github.com>
Date: Fri, 3 Jan 2025 17:09:05 +0100
Subject: [PATCH] fix(anki): unicode characters in flashcards

---
 src/anki/file_handler.py | 31 ++++++++++++++++---------------
 src/anki/parser.py       |  8 +++-----
 2 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/src/anki/file_handler.py b/src/anki/file_handler.py
index 50f31f8..dc31438 100644
--- a/src/anki/file_handler.py
+++ b/src/anki/file_handler.py
@@ -1,5 +1,4 @@
 import hashlib
-
 from pathlib import Path
 from typing import List
 
@@ -8,7 +7,7 @@ import tree_sitter
 
 class FileHandler:
     file_path: Path
-    file_content: List[str]
+    file_content: List[bytes]
 
     def __init__(self, path: Path):
         self.file_path = path
@@ -19,17 +18,19 @@ class FileHandler:
         return self.file_path.parent
 
     def get_bytes(self) -> bytes:
-        return bytes("".join(self.file_content), encoding="utf-8")
+        return b"".join(self.file_content)
 
     def get_file_hash(self) -> str:
-        return hashlib.md5("".join(self.file_content).encode(), usedforsecurity=False).hexdigest()
+        return hashlib.md5(self.get_bytes(), usedforsecurity=False).hexdigest()
 
-    def get_node_content(self, node: tree_sitter.Node, remove_outer=False):
-        content = "".join(self.file_content[node.start_point.row : node.end_point.row + 1])[
-            node.start_point.column : -(
-                len(self.file_content[node.end_point.row]) - node.end_point.column
-            )
-        ]
+    def get_node_content(self, node: tree_sitter.Node, remove_outer=False) -> str:
+        content = (
+            b"".join(self.file_content[node.start_point.row : node.end_point.row + 1])[
+                node.start_point.column : -(
+                    len(self.file_content[node.end_point.row]) - node.end_point.column
+                )
+            ]
+        ).decode()
         return content[1:-1] if remove_outer else content
 
     def update_node_content(self, node: tree_sitter.Node, value):
@@ -38,18 +39,18 @@ class FileHandler:
         last_line = self.file_content[node.end_point.row][node.end_point.column :]
         new_lines.extend(
             (
-                line + "\n"
-                for line in (first_line + str(value) + last_line).split("\n")
-                if line != ""
+                line + b"\n"
+                for line in (first_line + str(value).encode() + last_line).split(b"\n")
+                if line != b""
             )
         )
         new_lines.extend(self.file_content[node.end_point.row + 1 :])
         self.file_content = new_lines
 
     def read(self):
-        with self.file_path.open(encoding="utf-8") as f:
+        with self.file_path.open("rb") as f:
             self.file_content = f.readlines()
 
     def write(self):
-        with self.file_path.open("w", encoding="utf-8") as f:
+        with self.file_path.open("wb") as f:
             f.writelines(self.file_content)
diff --git a/src/anki/parser.py b/src/anki/parser.py
index 308413d..21b0090 100644
--- a/src/anki/parser.py
+++ b/src/anki/parser.py
@@ -56,7 +56,7 @@ class FlashcardParser:
         self.file_handlers = []
         self._load_file_hashes()
 
-    def _parse_file(self, file: FileHandler, preamble: str) -> List[Flashcard]:
+    def _parse_file(self, file: FileHandler, preamble: str | None) -> List[Flashcard]:
         cards = []
         tree = self.typst_parser.parse(file.get_bytes(), encoding="utf8")
         card_captures = self.flashcard_query.captures(tree.root_node)
@@ -133,14 +133,12 @@ class FlashcardParser:
                     return preamble
                 path = path.parent
 
-        for file in sorted(
-            glob.glob(f"{root_dir}/**/.anki.typ", include_hidden=True, recursive=True)
-        ):
+        for file in glob.glob(f"{root_dir}/**/.anki.typ", include_hidden=True, recursive=True):
             file = Path(file)
             if file.name == ".anki.typ":
                 preambles[file.parent] = file.read_text(encoding="utf-8")
 
-        for file in sorted(glob.glob(f"{scan_dir}/**/**.typ", recursive=True)):
+        for file in glob.glob(f"{scan_dir}/**/**.typ", recursive=True):
             file = Path(file)
             if single_file is not None and file != single_file:
                 continue