From 1b3aed2e15e17b3efb5951609e511c249ea2dc70 Mon Sep 17 00:00:00 2001 From: arne314 <73391160+arne314@users.noreply.github.com> Date: Fri, 3 Jan 2025 17:09:05 +0100 Subject: [PATCH] fix(anki): unicode characters in flashcards --- src/anki/file_handler.py | 31 ++++++++++++++++--------------- src/anki/parser.py | 8 +++----- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/src/anki/file_handler.py b/src/anki/file_handler.py index 50f31f8..dc31438 100644 --- a/src/anki/file_handler.py +++ b/src/anki/file_handler.py @@ -1,5 +1,4 @@ import hashlib - from pathlib import Path from typing import List @@ -8,7 +7,7 @@ import tree_sitter class FileHandler: file_path: Path - file_content: List[str] + file_content: List[bytes] def __init__(self, path: Path): self.file_path = path @@ -19,17 +18,19 @@ class FileHandler: return self.file_path.parent def get_bytes(self) -> bytes: - return bytes("".join(self.file_content), encoding="utf-8") + return b"".join(self.file_content) def get_file_hash(self) -> str: - return hashlib.md5("".join(self.file_content).encode(), usedforsecurity=False).hexdigest() + return hashlib.md5(self.get_bytes(), usedforsecurity=False).hexdigest() - def get_node_content(self, node: tree_sitter.Node, remove_outer=False): - content = "".join(self.file_content[node.start_point.row : node.end_point.row + 1])[ - node.start_point.column : -( - len(self.file_content[node.end_point.row]) - node.end_point.column - ) - ] + def get_node_content(self, node: tree_sitter.Node, remove_outer=False) -> str: + content = ( + b"".join(self.file_content[node.start_point.row : node.end_point.row + 1])[ + node.start_point.column : -( + len(self.file_content[node.end_point.row]) - node.end_point.column + ) + ] + ).decode() return content[1:-1] if remove_outer else content def update_node_content(self, node: tree_sitter.Node, value): @@ -38,18 +39,18 @@ class FileHandler: last_line = self.file_content[node.end_point.row][node.end_point.column :] new_lines.extend( ( - line + "\n" - for line in (first_line + str(value) + last_line).split("\n") - if line != "" + line + b"\n" + for line in (first_line + str(value).encode() + last_line).split(b"\n") + if line != b"" ) ) new_lines.extend(self.file_content[node.end_point.row + 1 :]) self.file_content = new_lines def read(self): - with self.file_path.open(encoding="utf-8") as f: + with self.file_path.open("rb") as f: self.file_content = f.readlines() def write(self): - with self.file_path.open("w", encoding="utf-8") as f: + with self.file_path.open("wb") as f: f.writelines(self.file_content) diff --git a/src/anki/parser.py b/src/anki/parser.py index 308413d..21b0090 100644 --- a/src/anki/parser.py +++ b/src/anki/parser.py @@ -56,7 +56,7 @@ class FlashcardParser: self.file_handlers = [] self._load_file_hashes() - def _parse_file(self, file: FileHandler, preamble: str) -> List[Flashcard]: + def _parse_file(self, file: FileHandler, preamble: str | None) -> List[Flashcard]: cards = [] tree = self.typst_parser.parse(file.get_bytes(), encoding="utf8") card_captures = self.flashcard_query.captures(tree.root_node) @@ -133,14 +133,12 @@ class FlashcardParser: return preamble path = path.parent - for file in sorted( - glob.glob(f"{root_dir}/**/.anki.typ", include_hidden=True, recursive=True) - ): + for file in glob.glob(f"{root_dir}/**/.anki.typ", include_hidden=True, recursive=True): file = Path(file) if file.name == ".anki.typ": preambles[file.parent] = file.read_text(encoding="utf-8") - for file in sorted(glob.glob(f"{scan_dir}/**/**.typ", recursive=True)): + for file in glob.glob(f"{scan_dir}/**/**.typ", recursive=True): file = Path(file) if single_file is not None and file != single_file: continue