Files
typstar/src/anki/parser.py
2024-12-30 23:47:43 +01:00

159 lines
5.2 KiB
Python

import glob
import json
import re
from functools import cache
from pathlib import Path
from typing import List, Tuple
import appdirs
import tree_sitter
from tree_sitter_language_pack import get_language, get_parser
from .file_handler import FileHandler
from .flashcard import Flashcard
ts_flashcard_query = """
(call
item: [
(call
item: (call
item: (ident) @fncall
(group
(number) @id))
(content) @front)
(call
item: (ident) @fncall
(group
(number) @id
(string) @front))
]
(#eq? @fncall "flashcard")
((content) @back
) @flashcard)
"""
ts_deck_query = """
((comment) @deck)
"""
deck_regex = re.compile(r"\W+ANKI:\s*(\S*)")
class FlashcardParser:
typst_language: tree_sitter.Language
typst_parser: tree_sitter.Parser
flashcard_query: tree_sitter.Query
deck_query: tree_sitter.Query
file_handlers: List[tuple[FileHandler, List[Flashcard]]]
file_hashes: dict[str, str]
file_hashes_store_path: Path = Path(appdirs.user_state_dir("typstar") + "/file_hashes.json")
def __init__(self):
self.typst_language = get_language("typst")
self.typst_parser = get_parser("typst")
self.flashcard_query = self.typst_language.query(ts_flashcard_query)
self.deck_query = self.typst_language.query(ts_deck_query)
self.file_handlers = []
self._load_file_hashes()
def _parse_file(self, file: FileHandler, preamble: str) -> List[Flashcard]:
cards = []
tree = self.typst_parser.parse(file.get_bytes(), encoding="utf8")
card_captures = self.flashcard_query.captures(tree.root_node)
if not card_captures:
return cards
deck_captures = self.deck_query.captures(tree.root_node)
def row_compare(node):
return node.start_point.row
card_captures["id"].sort(key=row_compare)
card_captures["front"].sort(key=row_compare)
card_captures["back"].sort(key=row_compare)
deck_refs: List[Tuple[int, str]] = []
deck_refs_idx = -1
current_deck = None
if deck_captures:
deck_captures["deck"].sort(key=row_compare)
for comment in deck_captures["deck"]:
if match := deck_regex.match(file.get_node_content(comment)):
deck_refs.append((comment.start_point.row, None if match[1].isspace() else match[1]))
for note_id, front, back in zip(card_captures["id"], card_captures["front"], card_captures["back"]):
while deck_refs_idx < len(deck_refs) - 1 and back.end_point.row >= deck_refs[deck_refs_idx + 1][0]:
deck_refs_idx += 1
current_deck = deck_refs[deck_refs_idx][1]
card = Flashcard(
file.get_node_content(front, True),
file.get_node_content(back, True),
current_deck,
int(file.get_node_content(note_id)),
preamble,
file,
)
card.set_ts_nodes(front, back, note_id)
cards.append(card)
return cards
def parse_directory(self, root_dir):
print(f"Parsing flashcards in {root_dir}...")
root_dir = Path(root_dir)
preambles = {}
flashcards = []
@cache
def get_preamble(path: Path) -> str | None:
while path != root_dir:
if preamble := preambles.get(path):
return preamble
path = path.parent
for file in sorted(glob.glob(f"{root_dir}/**/**.typ", include_hidden=True, recursive=True)):
file = Path(file)
if file.name == ".anki.typ":
preambles[file.parent] = file.read_text(encoding="utf-8")
continue
fh = FileHandler(file)
if self._hash_changed(fh):
cards = self._parse_file(fh, get_preamble(file.parent))
self.file_handlers.append((fh, cards))
flashcards.extend(cards)
return flashcards
def _hash_changed(self, file: FileHandler) -> bool:
file_hash = file.get_file_hash()
cached = self.file_hashes.get(str(file.file_path))
self.file_hashes[str(file.file_path)] = file_hash
return file_hash != cached
def _load_file_hashes(self):
self.file_hashes_store_path.parent.mkdir(parents=True, exist_ok=True)
self.file_hashes_store_path.touch()
content = self.file_hashes_store_path.read_text()
if content:
self.file_hashes = json.loads(content)
else:
self.file_hashes = {}
def save_file_hashes(self):
self.file_hashes_store_path.write_text(json.dumps(self.file_hashes))
def clear_file_hashes(self):
self.file_hashes = {}
self.save_file_hashes()
def update_ids_in_source(self):
print("Updating ids in source...")
for fh, cards in self.file_handlers:
file_updated = False
for c in cards:
if c.id_updated:
fh.update_node_content(c.note_id_node, c.note_id)
file_updated = True
if file_updated:
fh.write()
self.file_hashes[str(fh.file_path)] = fh.get_file_hash()