fix(anki): unicode characters in flashcards

This commit is contained in:
arne314
2025-01-03 17:09:05 +01:00
parent f4378dd9e1
commit 1b3aed2e15
2 changed files with 19 additions and 20 deletions

View File

@@ -1,5 +1,4 @@
import hashlib import hashlib
from pathlib import Path from pathlib import Path
from typing import List from typing import List
@@ -8,7 +7,7 @@ import tree_sitter
class FileHandler: class FileHandler:
file_path: Path file_path: Path
file_content: List[str] file_content: List[bytes]
def __init__(self, path: Path): def __init__(self, path: Path):
self.file_path = path self.file_path = path
@@ -19,17 +18,19 @@ class FileHandler:
return self.file_path.parent return self.file_path.parent
def get_bytes(self) -> bytes: def get_bytes(self) -> bytes:
return bytes("".join(self.file_content), encoding="utf-8") return b"".join(self.file_content)
def get_file_hash(self) -> str: def get_file_hash(self) -> str:
return hashlib.md5("".join(self.file_content).encode(), usedforsecurity=False).hexdigest() return hashlib.md5(self.get_bytes(), usedforsecurity=False).hexdigest()
def get_node_content(self, node: tree_sitter.Node, remove_outer=False): def get_node_content(self, node: tree_sitter.Node, remove_outer=False) -> str:
content = "".join(self.file_content[node.start_point.row : node.end_point.row + 1])[ content = (
b"".join(self.file_content[node.start_point.row : node.end_point.row + 1])[
node.start_point.column : -( node.start_point.column : -(
len(self.file_content[node.end_point.row]) - node.end_point.column len(self.file_content[node.end_point.row]) - node.end_point.column
) )
] ]
).decode()
return content[1:-1] if remove_outer else content return content[1:-1] if remove_outer else content
def update_node_content(self, node: tree_sitter.Node, value): def update_node_content(self, node: tree_sitter.Node, value):
@@ -38,18 +39,18 @@ class FileHandler:
last_line = self.file_content[node.end_point.row][node.end_point.column :] last_line = self.file_content[node.end_point.row][node.end_point.column :]
new_lines.extend( new_lines.extend(
( (
line + "\n" line + b"\n"
for line in (first_line + str(value) + last_line).split("\n") for line in (first_line + str(value).encode() + last_line).split(b"\n")
if line != "" if line != b""
) )
) )
new_lines.extend(self.file_content[node.end_point.row + 1 :]) new_lines.extend(self.file_content[node.end_point.row + 1 :])
self.file_content = new_lines self.file_content = new_lines
def read(self): def read(self):
with self.file_path.open(encoding="utf-8") as f: with self.file_path.open("rb") as f:
self.file_content = f.readlines() self.file_content = f.readlines()
def write(self): def write(self):
with self.file_path.open("w", encoding="utf-8") as f: with self.file_path.open("wb") as f:
f.writelines(self.file_content) f.writelines(self.file_content)

View File

@@ -56,7 +56,7 @@ class FlashcardParser:
self.file_handlers = [] self.file_handlers = []
self._load_file_hashes() self._load_file_hashes()
def _parse_file(self, file: FileHandler, preamble: str) -> List[Flashcard]: def _parse_file(self, file: FileHandler, preamble: str | None) -> List[Flashcard]:
cards = [] cards = []
tree = self.typst_parser.parse(file.get_bytes(), encoding="utf8") tree = self.typst_parser.parse(file.get_bytes(), encoding="utf8")
card_captures = self.flashcard_query.captures(tree.root_node) card_captures = self.flashcard_query.captures(tree.root_node)
@@ -133,14 +133,12 @@ class FlashcardParser:
return preamble return preamble
path = path.parent path = path.parent
for file in sorted( for file in glob.glob(f"{root_dir}/**/.anki.typ", include_hidden=True, recursive=True):
glob.glob(f"{root_dir}/**/.anki.typ", include_hidden=True, recursive=True)
):
file = Path(file) file = Path(file)
if file.name == ".anki.typ": if file.name == ".anki.typ":
preambles[file.parent] = file.read_text(encoding="utf-8") preambles[file.parent] = file.read_text(encoding="utf-8")
for file in sorted(glob.glob(f"{scan_dir}/**/**.typ", recursive=True)): for file in glob.glob(f"{scan_dir}/**/**.typ", recursive=True):
file = Path(file) file = Path(file)
if single_file is not None and file != single_file: if single_file is not None and file != single_file:
continue continue