mirror of
https://github.com/Ascyii/typstar.git
synced 2026-01-01 05:24:24 -05:00
perf(anki): skip unchanged files using file hashes
This commit is contained in:
@@ -13,6 +13,7 @@ readme = "README.md"
|
|||||||
requires-python = ">=3.11.10"
|
requires-python = ">=3.11.10"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aiohttp>=3.11.11",
|
"aiohttp>=3.11.11",
|
||||||
|
"appdirs>=1.4.4",
|
||||||
"tree-sitter-language-pack>=0.2.0",
|
"tree-sitter-language-pack>=0.2.0",
|
||||||
"typer>=0.15.1",
|
"typer>=0.15.1",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ class AnkiConnectError(Exception):
|
|||||||
class AnkiConnectApi:
|
class AnkiConnectApi:
|
||||||
url: str
|
url: str
|
||||||
api_key: str
|
api_key: str
|
||||||
|
semaphore: asyncio.Semaphore
|
||||||
|
|
||||||
def __init__(self, url: str, api_key: str):
|
def __init__(self, url: str, api_key: str):
|
||||||
self.url = url
|
self.url = url
|
||||||
@@ -30,7 +31,8 @@ class AnkiConnectApi:
|
|||||||
else:
|
else:
|
||||||
update.append(card)
|
update.append(card)
|
||||||
print(f"Pushing {len(add)} new flashcards and {len(update)} updated flashcards to Anki...")
|
print(f"Pushing {len(add)} new flashcards and {len(update)} updated flashcards to Anki...")
|
||||||
await asyncio.gather(self._add(add), self._update(update))
|
await self._add(add)
|
||||||
|
await self._update(update)
|
||||||
|
|
||||||
async def _request_api(self, action, **params):
|
async def _request_api(self, action, **params):
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
|
|||||||
@@ -1,24 +1,29 @@
|
|||||||
import os.path
|
import hashlib
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import tree_sitter
|
import tree_sitter
|
||||||
|
|
||||||
|
|
||||||
class FileHandler:
|
class FileHandler:
|
||||||
file_path: str
|
file_path: Path
|
||||||
file_content: List[str]
|
file_content: List[str]
|
||||||
|
|
||||||
def __init__(self, path):
|
def __init__(self, path: Path):
|
||||||
self.file_path = path
|
self.file_path = path
|
||||||
self.read()
|
self.read()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def directory_path(self) -> str:
|
def directory_path(self) -> Path:
|
||||||
return os.path.dirname(self.file_path)
|
return self.file_path.parent
|
||||||
|
|
||||||
def get_bytes(self) -> bytes:
|
def get_bytes(self) -> bytes:
|
||||||
return bytes("".join(self.file_content), encoding="utf-8")
|
return bytes("".join(self.file_content), encoding="utf-8")
|
||||||
|
|
||||||
|
def get_file_hash(self) -> str:
|
||||||
|
return hashlib.md5("".join(self.file_content).encode(), usedforsecurity=False).hexdigest()
|
||||||
|
|
||||||
def get_node_content(self, node: tree_sitter.Node, remove_outer=False):
|
def get_node_content(self, node: tree_sitter.Node, remove_outer=False):
|
||||||
content = "".join(
|
content = "".join(
|
||||||
self.file_content[node.start_point.row:node.end_point.row + 1]
|
self.file_content[node.start_point.row:node.end_point.row + 1]
|
||||||
@@ -37,9 +42,9 @@ class FileHandler:
|
|||||||
self.file_content = new_lines
|
self.file_content = new_lines
|
||||||
|
|
||||||
def read(self):
|
def read(self):
|
||||||
with open(self.file_path, encoding="utf-8") as f:
|
with self.file_path.open(encoding="utf-8") as f:
|
||||||
self.file_content = f.readlines()
|
self.file_content = f.readlines()
|
||||||
|
|
||||||
def write(self):
|
def write(self):
|
||||||
with open(self.file_path, "w", encoding="utf-8") as f:
|
with self.file_path.open("w", encoding="utf-8") as f:
|
||||||
f.writelines(self.file_content)
|
f.writelines(self.file_content)
|
||||||
|
|||||||
@@ -1,24 +1,24 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import glob
|
|
||||||
import os
|
import os
|
||||||
from typing_extensions import Annotated
|
from typing_extensions import Annotated
|
||||||
|
|
||||||
import typer
|
import typer
|
||||||
|
|
||||||
from anki.anki_api import AnkiConnectApi
|
from anki.anki_api import AnkiConnectApi
|
||||||
from anki.file_handler import FileHandler
|
|
||||||
from anki.parser import FlashcardParser
|
from anki.parser import FlashcardParser
|
||||||
from anki.typst_compiler import TypstCompiler
|
from anki.typst_compiler import TypstCompiler
|
||||||
|
|
||||||
cli = typer.Typer(name="typstar-anki")
|
cli = typer.Typer(name="typstar-anki")
|
||||||
|
|
||||||
|
|
||||||
async def export_flashcards(root_dir, typst_cmd, anki_url, anki_key):
|
async def export_flashcards(root_dir, clear_cache, typst_cmd, anki_url, anki_key):
|
||||||
parser = FlashcardParser()
|
parser = FlashcardParser()
|
||||||
compiler = TypstCompiler(root_dir, typst_cmd)
|
compiler = TypstCompiler(root_dir, typst_cmd)
|
||||||
api = AnkiConnectApi(anki_url, anki_key)
|
api = AnkiConnectApi(anki_url, anki_key)
|
||||||
|
|
||||||
# parse flashcards
|
# parse flashcards
|
||||||
|
if clear_cache:
|
||||||
|
parser.clear_file_hashes()
|
||||||
flashcards = parser.parse_directory(root_dir)
|
flashcards = parser.parse_directory(root_dir)
|
||||||
|
|
||||||
# async typst compilation
|
# async typst compilation
|
||||||
@@ -30,6 +30,7 @@ async def export_flashcards(root_dir, typst_cmd, anki_url, anki_key):
|
|||||||
finally:
|
finally:
|
||||||
# write id updates to files
|
# write id updates to files
|
||||||
parser.update_ids_in_source()
|
parser.update_ids_in_source()
|
||||||
|
parser.save_file_hashes()
|
||||||
print("Done")
|
print("Done")
|
||||||
|
|
||||||
|
|
||||||
@@ -37,10 +38,12 @@ async def export_flashcards(root_dir, typst_cmd, anki_url, anki_key):
|
|||||||
def cmd(root_dir: Annotated[
|
def cmd(root_dir: Annotated[
|
||||||
str, typer.Option(help="Directory scanned for flashcards and passed over to typst compile command")] = os.getcwd(),
|
str, typer.Option(help="Directory scanned for flashcards and passed over to typst compile command")] = os.getcwd(),
|
||||||
typst_cmd: Annotated[str, typer.Option(help="Typst command used for flashcard compilation")] = "typst",
|
typst_cmd: Annotated[str, typer.Option(help="Typst command used for flashcard compilation")] = "typst",
|
||||||
|
clear_cache: Annotated[bool, typer.Option(help="Clear stored file hashes and force compilation and "
|
||||||
|
"push of all flashcards (e.g. on preamble change)")] = False,
|
||||||
anki_url: Annotated[str, typer.Option(help="Url for Anki-Connect")] = "http://127.0.0.1:8765",
|
anki_url: Annotated[str, typer.Option(help="Url for Anki-Connect")] = "http://127.0.0.1:8765",
|
||||||
anki_key: Annotated[str, typer.Option(help="Api key for Anki-Connect")] = None,
|
anki_key: Annotated[str, typer.Option(help="Api key for Anki-Connect")] = None,
|
||||||
):
|
):
|
||||||
asyncio.run(export_flashcards(root_dir, typst_cmd, anki_url, anki_key))
|
asyncio.run(export_flashcards(root_dir, clear_cache, typst_cmd, anki_url, anki_key))
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|||||||
@@ -1,9 +1,11 @@
|
|||||||
import glob
|
import glob
|
||||||
import os.path
|
import json
|
||||||
|
|
||||||
from functools import cache
|
from functools import cache
|
||||||
|
from pathlib import Path
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
import appdirs
|
||||||
import tree_sitter
|
import tree_sitter
|
||||||
from tree_sitter_language_pack import get_language, get_parser
|
from tree_sitter_language_pack import get_language, get_parser
|
||||||
|
|
||||||
@@ -37,14 +39,17 @@ class FlashcardParser:
|
|||||||
flashcard_query: tree_sitter.Query
|
flashcard_query: tree_sitter.Query
|
||||||
|
|
||||||
file_handlers: List[tuple[FileHandler, List[Flashcard]]]
|
file_handlers: List[tuple[FileHandler, List[Flashcard]]]
|
||||||
|
file_hashes: dict[str, str]
|
||||||
|
file_hashes_store_path: Path = Path(appdirs.user_state_dir("typstar") + "/file_hashes.json")
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.typst_language = get_language("typst")
|
self.typst_language = get_language("typst")
|
||||||
self.typst_parser = get_parser("typst")
|
self.typst_parser = get_parser("typst")
|
||||||
self.flashcard_query = self.typst_language.query(ts_flashcard_query)
|
self.flashcard_query = self.typst_language.query(ts_flashcard_query)
|
||||||
self.file_handlers = []
|
self.file_handlers = []
|
||||||
|
self._load_file_hashes()
|
||||||
|
|
||||||
def parse_file(self, file: FileHandler, preamble: str) -> List[Flashcard]:
|
def _parse_file(self, file: FileHandler, preamble: str) -> List[Flashcard]:
|
||||||
cards = []
|
cards = []
|
||||||
tree = self.typst_parser.parse(file.get_bytes(), encoding="utf8")
|
tree = self.typst_parser.parse(file.get_bytes(), encoding="utf8")
|
||||||
captures = self.flashcard_query.captures(tree.root_node)
|
captures = self.flashcard_query.captures(tree.root_node)
|
||||||
@@ -73,27 +78,51 @@ class FlashcardParser:
|
|||||||
|
|
||||||
def parse_directory(self, root_dir):
|
def parse_directory(self, root_dir):
|
||||||
print(f"Parsing flashcards in {root_dir}...")
|
print(f"Parsing flashcards in {root_dir}...")
|
||||||
|
root_dir = Path(root_dir)
|
||||||
preambles = {}
|
preambles = {}
|
||||||
flashcards = []
|
flashcards = []
|
||||||
|
|
||||||
@cache
|
@cache
|
||||||
def get_preamble(path) -> str | None:
|
def get_preamble(path: Path) -> str | None:
|
||||||
while len(path) > len(root_dir):
|
while path != root_dir:
|
||||||
if preamble := preambles.get(path):
|
if preamble := preambles.get(path):
|
||||||
return preamble
|
return preamble
|
||||||
path = os.path.dirname(path)
|
path = path.parent
|
||||||
|
|
||||||
for file in sorted(glob.glob(f"{root_dir}/**/**.typ", include_hidden=True, recursive=True)):
|
for file in sorted(glob.glob(f"{root_dir}/**/**.typ", include_hidden=True, recursive=True)):
|
||||||
if os.path.basename(file) == ".anki.typ":
|
file = Path(file)
|
||||||
with open(file, encoding="utf-8") as f:
|
if file.name == ".anki.typ":
|
||||||
preambles[os.path.dirname(file)] = f.read()
|
preambles[file.parent] = file.read_text(encoding="utf-8")
|
||||||
continue
|
continue
|
||||||
fh = FileHandler(file)
|
fh = FileHandler(file)
|
||||||
cards = self.parse_file(fh, get_preamble(os.path.dirname(file)))
|
if self._hash_changed(fh):
|
||||||
self.file_handlers.append((fh, cards))
|
cards = self._parse_file(fh, get_preamble(file.parent))
|
||||||
flashcards.extend(cards)
|
self.file_handlers.append((fh, cards))
|
||||||
|
flashcards.extend(cards)
|
||||||
return flashcards
|
return flashcards
|
||||||
|
|
||||||
|
def _hash_changed(self, file: FileHandler) -> bool:
|
||||||
|
file_hash = file.get_file_hash()
|
||||||
|
cached = self.file_hashes.get(str(file.file_path))
|
||||||
|
self.file_hashes[str(file.file_path)] = file_hash
|
||||||
|
return file_hash != cached
|
||||||
|
|
||||||
|
def _load_file_hashes(self):
|
||||||
|
self.file_hashes_store_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.file_hashes_store_path.touch()
|
||||||
|
content = self.file_hashes_store_path.read_text()
|
||||||
|
if content:
|
||||||
|
self.file_hashes = json.loads(content)
|
||||||
|
else:
|
||||||
|
self.file_hashes = {}
|
||||||
|
|
||||||
|
def save_file_hashes(self):
|
||||||
|
self.file_hashes_store_path.write_text(json.dumps(self.file_hashes))
|
||||||
|
|
||||||
|
def clear_file_hashes(self):
|
||||||
|
self.file_hashes = {}
|
||||||
|
self.save_file_hashes()
|
||||||
|
|
||||||
def update_ids_in_source(self):
|
def update_ids_in_source(self):
|
||||||
print("Updating ids in source...")
|
print("Updating ids in source...")
|
||||||
for fh, cards in self.file_handlers:
|
for fh, cards in self.file_handlers:
|
||||||
@@ -104,3 +133,4 @@ class FlashcardParser:
|
|||||||
file_updated = True
|
file_updated = True
|
||||||
if file_updated:
|
if file_updated:
|
||||||
fh.write()
|
fh.write()
|
||||||
|
self.file_hashes[str(fh.file_path)] = fh.get_file_hash()
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from .flashcard import Flashcard
|
from .flashcard import Flashcard
|
||||||
@@ -31,7 +33,7 @@ class TypstCompiler:
|
|||||||
self.typst_root_dir = typst_root_dir
|
self.typst_root_dir = typst_root_dir
|
||||||
self.max_processes = round(os.cpu_count() * 1.5)
|
self.max_processes = round(os.cpu_count() * 1.5)
|
||||||
|
|
||||||
async def _compile(self, src: str, directory: str) -> bytes:
|
async def _compile(self, src: str, directory: Path) -> bytes:
|
||||||
tmp_path = f"{directory}/tmp_{random.randint(1, 1000000000)}.typ"
|
tmp_path = f"{directory}/tmp_{random.randint(1, 1000000000)}.typ"
|
||||||
with open(tmp_path, "w", encoding="utf-8") as f:
|
with open(tmp_path, "w", encoding="utf-8") as f:
|
||||||
f.write(src)
|
f.write(src)
|
||||||
|
|||||||
11
uv.lock
generated
11
uv.lock
generated
@@ -84,6 +84,15 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/ec/6a/bc7e17a3e87a2985d3e8f4da4cd0f481060eb78fb08596c42be62c90a4d9/aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5", size = 7597 },
|
{ url = "https://files.pythonhosted.org/packages/ec/6a/bc7e17a3e87a2985d3e8f4da4cd0f481060eb78fb08596c42be62c90a4d9/aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5", size = 7597 },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "appdirs"
|
||||||
|
version = "1.4.4"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/d7/d8/05696357e0311f5b5c316d7b95f46c669dd9c15aaeecbb48c7d0aeb88c40/appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41", size = 13470 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/3b/00/2344469e2084fb287c2e0b57b72910309874c3245463acd6cf5e3db69324/appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128", size = 9566 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "attrs"
|
name = "attrs"
|
||||||
version = "24.3.0"
|
version = "24.3.0"
|
||||||
@@ -507,6 +516,7 @@ version = "1.0.0"
|
|||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "aiohttp" },
|
{ name = "aiohttp" },
|
||||||
|
{ name = "appdirs" },
|
||||||
{ name = "tree-sitter-language-pack" },
|
{ name = "tree-sitter-language-pack" },
|
||||||
{ name = "typer" },
|
{ name = "typer" },
|
||||||
]
|
]
|
||||||
@@ -514,6 +524,7 @@ dependencies = [
|
|||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
{ name = "aiohttp", specifier = ">=3.11.11" },
|
{ name = "aiohttp", specifier = ">=3.11.11" },
|
||||||
|
{ name = "appdirs", specifier = ">=1.4.4" },
|
||||||
{ name = "tree-sitter-language-pack", specifier = ">=0.2.0" },
|
{ name = "tree-sitter-language-pack", specifier = ">=0.2.0" },
|
||||||
{ name = "typer", specifier = ">=0.15.1" },
|
{ name = "typer", specifier = ">=0.15.1" },
|
||||||
]
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user