Compare commits

..

No commits in common. "7196f1493d2e4aa0bf44cc3e5469362f85f7c76b" and "9c4e32006352e6a96c04fc7a3fe4feaa4460b9e0" have entirely different histories.

8 changed files with 56 additions and 139 deletions

3
.gitignore vendored
View File

@ -40,6 +40,3 @@ media/
# OS
.DS_Store
Thumbs.db
output/
data/

View File

@ -33,6 +33,7 @@ A Python tool to automatically generate Anki flashcards for language learning.
uv run -m anki_generator --input words.csv --output deck.apkg
```
## Enabling TTS
See [Anki Manual](https://docs.ankiweb.net/templates/fields.html#text-to-speech-for-individual-fields).
## To do
- [ ] Add error logging
- [ ] Fix image rendering
- [ ] If word already exists, do not recreate cards

View File

@ -1,6 +1,5 @@
import os
import random
import logging
import genanki
from datetime import datetime
from typing import List, Tuple, Optional
@ -8,21 +7,12 @@ from .models import GermanWord, UnsplashImage
from .clients.llm import AnthropicClient
from .clients.unsplash import UnsplashClient
logger = logging.getLogger(__name__)
class GermanDeckPackage:
"""Package class to create Anki deck with media files"""
class GermanDeckPackage(genanki.Package):
"""Custom Package class to include media files"""
def __init__(self, deck, media_files):
self.deck = deck
super().__init__(deck)
self.media_files = media_files
def write_to_file(self, file):
"""Write deck to a file"""
package = genanki.Package([self.deck])
package.media_files = self.media_files
logger.info("Writing deck with media files: %s", self.media_files)
package.write_to_file(file)
class CardGenerator:
def __init__(self, llm_client: AnthropicClient, unsplash_client: UnsplashClient):
self.llm_client = llm_client
@ -53,12 +43,10 @@ class CardGenerator:
{
'name': 'German Vocabulary Card',
'qfmt': '''
<div style="font-size: 24px;">
{{German_Word}} {{tts de_DE:German_Word}}
</div>
<div style="font-size: 24px;">{{German_Word}}</div>
<div style="font-size: 18px;">Part of speech: {{Part_of_Speech}}</div>
<div style="font-size: 14px;">Source: {{Source}}</div>
{{Image}}
{{#Image}}<div><img src="{{text:Image}}" style="max-width: 300px; max-height: 200px;"></div>{{/Image}}
''',
'afmt': '''
{{FrontSide}}
@ -66,9 +54,7 @@ class CardGenerator:
<div><b>English meaning:</b> {{English_Meaning}}</div>
<div><b>Article:</b> {{Article}}</div>
<div><b>Plural Form:</b> {{Plural_Form}}</div>
<div>
<b>Example:</b> {{Example_Sentence}} {{tts de_DE:Example_Sentence}}
</div>
<div><b>Example:</b> {{Example_Sentence}}</div>
<div><b>Translation:</b> {{Sentence_Translation}}</div>
<div><b>Usage notes:</b> {{Usage_Notes}}</div>
<div><b>Related words:</b> {{Related_Words}}</div>
@ -93,65 +79,29 @@ class CardGenerator:
'''
)
def _load_processed_words(self) -> set:
"""Load previously processed words from tracking file"""
processed_words = set()
tracking_file = os.path.join("output", "processed_words.txt")
if os.path.exists(tracking_file):
with open(tracking_file, 'r', encoding='utf-8') as f:
processed_words = set(line.strip() for line in f if line.strip())
logger.info("Found %d previously processed words", len(processed_words))
return processed_words
def _add_to_processed_words(self, word: str):
"""Add a word to the tracking file"""
tracking_file = os.path.join("output", "processed_words.txt")
os.makedirs("output", exist_ok=True)
with open(tracking_file, 'a', encoding='utf-8') as f:
f.write(f"{word}\n")
logger.debug("Added %s to processed words", word)
def create_deck(self, word_list: List[Tuple[str, str]], deck_name: str = "German Vocabulary") -> Tuple[genanki.Deck, List[str]]:
"""Create an Anki deck from a list of words"""
deck_id = random.randrange(1 << 30, 1 << 31)
deck = genanki.Deck(deck_id, deck_name)
media_files = []
# Load previously processed words
processed_words = self._load_processed_words()
for word, source in word_list:
# Skip if word has been processed before
if word in processed_words:
logger.info("Skipping %s - already exists in previous deck", word)
continue
try:
card_info_dict = self.llm_client.get_card_info(word, source)
card_info = GermanWord(**card_info_dict)
image_filename = f"{word.lower().replace(' ', '_')}.jpg"
# Create output directory if it doesn't exist
output_dir = "output"
os.makedirs(output_dir, exist_ok=True)
media_dir = "media"
os.makedirs(media_dir, exist_ok=True)
# Save image directly to media directory
image_path = os.path.join(media_dir, image_filename)
# Get image
image_filename = f"media/{word.lower().replace(' ', '_')}.jpg"
image = self.unsplash_client.get_image(
card_info.image_search_term,
image_path
image_filename
)
if image and image.local_path and os.path.exists(image.local_path):
# Use the full path for the media file
if image and image.local_path:
media_files.append(image.local_path)
logger.info("Added image: %s", image.local_path)
image_filename = os.path.basename(image.local_path)
else:
image_path = ""
logger.warning("No image for: %s", word)
image_filename = ""
# Create note
note = genanki.Note(
@ -167,17 +117,15 @@ class CardGenerator:
card_info.sentence_translation,
card_info.usage_notes,
card_info.related_words,
f'<img src="{os.path.basename(image.local_path) if image and image.local_path else ""}" />',
image_filename,
image.photographer if image else "",
' '.join(card_info.tags)
]
)
deck.add_note(note)
# Track the word after successfully creating the note
self._add_to_processed_words(word)
logger.info("Added card for: %s", word)
print(f"Added card for: {word}")
except Exception as e:
logger.error("Error creating card for %s: %s", word, str(e))
print(f"Error creating card for {word}: {str(e)}")
return deck, media_files

View File

@ -2,7 +2,6 @@ from typing import Optional
import requests
from PIL import Image
from io import BytesIO
import os
from ..models import Settings, UnsplashImage
from anki_generator.settings import get_settings, Settings
@ -12,7 +11,7 @@ class UnsplashClient:
self.settings = settings or get_settings()
self.api_key = self.settings.unsplash_api_key.get_secret_value()
def get_image(self, search_term: str, filename: str) -> Optional[UnsplashImage]:
def get_image(self, search_term: str, save_path: str) -> Optional[UnsplashImage]:
"""Fetch and save an image from Unsplash"""
url = "https://api.unsplash.com/search/photos"
headers = {"Authorization": f"Client-ID {self.api_key}"}
@ -38,18 +37,15 @@ class UnsplashClient:
img_response = requests.get(image_url)
img_response.raise_for_status()
# Create directory if it doesn't exist
os.makedirs(os.path.dirname(filename) or '.', exist_ok=True)
# Process and save image
img = Image.open(BytesIO(img_response.content))
img.thumbnail((800, 600))
img.save(filename, "JPEG", quality=85)
img.save(save_path, "JPEG", quality=85)
return UnsplashImage(
url=image_url,
photographer=photographer,
local_path=filename
local_path=save_path
)
except Exception as e:

View File

@ -1,29 +1,24 @@
import os
import argparse
import logging
from datetime import datetime
import shutil
import glob
from typing import List, Tuple, Optional
from typing import List, Tuple
from pathlib import Path
from anki_generator.clients.llm import AnthropicClient
from anki_generator.clients.unsplash import UnsplashClient
from anki_generator.card_generator import CardGenerator, GermanDeckPackage
logger = logging.getLogger(__name__)
def read_word_list(file_path: str) -> List[Tuple[str, str]]:
"""Read word list from file"""
words = []
logger.info("Reading file: %s", file_path)
print(f"Reading file: {file_path}")
with open(file_path, 'r', encoding='utf-8') as f:
for line in f:
logger.debug("Processing line: %s", line.strip())
print(f"Processing line: {line.strip()}")
# Expected format: word,source
word, source = line.strip().split(',', 1)
words.append((word.strip(), source.strip()))
logger.debug("Added word: %s with source: %s", word.strip(), source.strip())
print(f"Added word: {word.strip()} with source: {source.strip()}")
return words
def main():
@ -34,41 +29,35 @@ def main():
args = parser.parse_args()
logger.info("Starting Anki card generation")
print("Loading program")
try:
# Create media directory
os.makedirs('media', exist_ok=True)
# Initialize clients
llm_client = AnthropicClient()
unsplash_client = UnsplashClient()
# Initialize card generator
generator = CardGenerator(llm_client, unsplash_client)
# Read word list
words = read_word_list(args.input)
# Generate deck
deck, media_files = generator.create_deck(words, args.deck_name)
output_dir = "output"
os.makedirs(output_dir, exist_ok=True)
# Generate output filename if not provided
if not args.output:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_file = os.path.join(output_dir, f"german_vocab_{timestamp}.apkg")
output_file = f"german_vocab_{timestamp}.apkg"
else:
output_file = os.path.join(output_dir, args.output)
output_file = args.output
media_paths = []
for media_file in media_files:
media_path = os.path.join(output_dir, os.path.basename(media_file))
if os.path.exists(media_file) and media_file != media_path:
shutil.copy2(media_file, media_path)
media_paths.append(media_path)
package = GermanDeckPackage(deck, media_paths)
# Save deck
package = GermanDeckPackage(deck, media_files)
package.write_to_file(output_file)
logger.info("Deck and media files saved in: %s", output_dir)
except Exception as e:
logger.error("Error: %s", str(e))
raise
print(f"Deck saved as: {output_file}")
if __name__ == "__main__":
main()

View File

@ -1,4 +1,3 @@
import os
from typing import List, Optional
from pydantic import BaseModel, Field, HttpUrl
from pydantic_settings import BaseSettings

View File

@ -1,17 +1,8 @@
import logging
from typing import Optional
from functools import lru_cache
from pydantic import SecretStr
from pydantic_settings import BaseSettings, SettingsConfigDict
def setup_logging():
"""Configure logging for the application."""
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
class Settings(BaseSettings):
"""Application settings that are loaded from environment variables."""
@ -53,5 +44,4 @@ def get_settings() -> Settings:
Returns:
Settings: Application settings instance
"""
setup_logging()
return Settings()

View File

@ -1,8 +1,5 @@
Bauch,Babbel
Kopf,Babbel
Rücken,Babbel
verlinken, Easy German 546: Die Brandmauer
Lieblingsthema, Easy German 546: Die Brandmauer
Zahnvorsorge, Easy German 546: Die Brandmauer
Vorsorgeuntersuchung, Easy German 546: Die Brandmauer
Untersuchung, Easy German 546: Die Brandmauer
Katze,Duolingo
Hund,Book: German Made Simple
Apfel,Language Exchange
Brot,DW Learn German
Wasser,Memrise

1 Bauch Katze Babbel Duolingo
2 Kopf Hund Babbel Book: German Made Simple
3 Rücken Apfel Babbel Language Exchange
4 verlinken Brot Easy German 546: Die Brandmauer DW Learn German
5 Lieblingsthema Wasser Easy German 546: Die Brandmauer Memrise
Zahnvorsorge Easy German 546: Die Brandmauer
Vorsorgeuntersuchung Easy German 546: Die Brandmauer
Untersuchung Easy German 546: Die Brandmauer