Compare commits

..

No commits in common. "7196f1493d2e4aa0bf44cc3e5469362f85f7c76b" and "9c4e32006352e6a96c04fc7a3fe4feaa4460b9e0" have entirely different histories.

8 changed files with 56 additions and 139 deletions

3
.gitignore vendored
View File

@ -40,6 +40,3 @@ media/
# OS # OS
.DS_Store .DS_Store
Thumbs.db Thumbs.db
output/
data/

View File

@ -33,6 +33,7 @@ A Python tool to automatically generate Anki flashcards for language learning.
uv run -m anki_generator --input words.csv --output deck.apkg uv run -m anki_generator --input words.csv --output deck.apkg
``` ```
## Enabling TTS ## To do
- [ ] Add error logging
See [Anki Manual](https://docs.ankiweb.net/templates/fields.html#text-to-speech-for-individual-fields). - [ ] Fix image rendering
- [ ] If word already exists, do not recreate cards

View File

@ -1,6 +1,5 @@
import os import os
import random import random
import logging
import genanki import genanki
from datetime import datetime from datetime import datetime
from typing import List, Tuple, Optional from typing import List, Tuple, Optional
@ -8,21 +7,12 @@ from .models import GermanWord, UnsplashImage
from .clients.llm import AnthropicClient from .clients.llm import AnthropicClient
from .clients.unsplash import UnsplashClient from .clients.unsplash import UnsplashClient
logger = logging.getLogger(__name__) class GermanDeckPackage(genanki.Package):
"""Custom Package class to include media files"""
class GermanDeckPackage:
"""Package class to create Anki deck with media files"""
def __init__(self, deck, media_files): def __init__(self, deck, media_files):
self.deck = deck super().__init__(deck)
self.media_files = media_files self.media_files = media_files
def write_to_file(self, file):
"""Write deck to a file"""
package = genanki.Package([self.deck])
package.media_files = self.media_files
logger.info("Writing deck with media files: %s", self.media_files)
package.write_to_file(file)
class CardGenerator: class CardGenerator:
def __init__(self, llm_client: AnthropicClient, unsplash_client: UnsplashClient): def __init__(self, llm_client: AnthropicClient, unsplash_client: UnsplashClient):
self.llm_client = llm_client self.llm_client = llm_client
@ -53,12 +43,10 @@ class CardGenerator:
{ {
'name': 'German Vocabulary Card', 'name': 'German Vocabulary Card',
'qfmt': ''' 'qfmt': '''
<div style="font-size: 24px;"> <div style="font-size: 24px;">{{German_Word}}</div>
{{German_Word}} {{tts de_DE:German_Word}}
</div>
<div style="font-size: 18px;">Part of speech: {{Part_of_Speech}}</div> <div style="font-size: 18px;">Part of speech: {{Part_of_Speech}}</div>
<div style="font-size: 14px;">Source: {{Source}}</div> <div style="font-size: 14px;">Source: {{Source}}</div>
{{Image}} {{#Image}}<div><img src="{{text:Image}}" style="max-width: 300px; max-height: 200px;"></div>{{/Image}}
''', ''',
'afmt': ''' 'afmt': '''
{{FrontSide}} {{FrontSide}}
@ -66,9 +54,7 @@ class CardGenerator:
<div><b>English meaning:</b> {{English_Meaning}}</div> <div><b>English meaning:</b> {{English_Meaning}}</div>
<div><b>Article:</b> {{Article}}</div> <div><b>Article:</b> {{Article}}</div>
<div><b>Plural Form:</b> {{Plural_Form}}</div> <div><b>Plural Form:</b> {{Plural_Form}}</div>
<div> <div><b>Example:</b> {{Example_Sentence}}</div>
<b>Example:</b> {{Example_Sentence}} {{tts de_DE:Example_Sentence}}
</div>
<div><b>Translation:</b> {{Sentence_Translation}}</div> <div><b>Translation:</b> {{Sentence_Translation}}</div>
<div><b>Usage notes:</b> {{Usage_Notes}}</div> <div><b>Usage notes:</b> {{Usage_Notes}}</div>
<div><b>Related words:</b> {{Related_Words}}</div> <div><b>Related words:</b> {{Related_Words}}</div>
@ -93,65 +79,29 @@ class CardGenerator:
''' '''
) )
def _load_processed_words(self) -> set:
"""Load previously processed words from tracking file"""
processed_words = set()
tracking_file = os.path.join("output", "processed_words.txt")
if os.path.exists(tracking_file):
with open(tracking_file, 'r', encoding='utf-8') as f:
processed_words = set(line.strip() for line in f if line.strip())
logger.info("Found %d previously processed words", len(processed_words))
return processed_words
def _add_to_processed_words(self, word: str):
"""Add a word to the tracking file"""
tracking_file = os.path.join("output", "processed_words.txt")
os.makedirs("output", exist_ok=True)
with open(tracking_file, 'a', encoding='utf-8') as f:
f.write(f"{word}\n")
logger.debug("Added %s to processed words", word)
def create_deck(self, word_list: List[Tuple[str, str]], deck_name: str = "German Vocabulary") -> Tuple[genanki.Deck, List[str]]: def create_deck(self, word_list: List[Tuple[str, str]], deck_name: str = "German Vocabulary") -> Tuple[genanki.Deck, List[str]]:
"""Create an Anki deck from a list of words""" """Create an Anki deck from a list of words"""
deck_id = random.randrange(1 << 30, 1 << 31) deck_id = random.randrange(1 << 30, 1 << 31)
deck = genanki.Deck(deck_id, deck_name) deck = genanki.Deck(deck_id, deck_name)
media_files = [] media_files = []
# Load previously processed words
processed_words = self._load_processed_words()
for word, source in word_list: for word, source in word_list:
# Skip if word has been processed before
if word in processed_words:
logger.info("Skipping %s - already exists in previous deck", word)
continue
try: try:
card_info_dict = self.llm_client.get_card_info(word, source) card_info_dict = self.llm_client.get_card_info(word, source)
card_info = GermanWord(**card_info_dict) card_info = GermanWord(**card_info_dict)
image_filename = f"{word.lower().replace(' ', '_')}.jpg" # Get image
image_filename = f"media/{word.lower().replace(' ', '_')}.jpg"
# Create output directory if it doesn't exist
output_dir = "output"
os.makedirs(output_dir, exist_ok=True)
media_dir = "media"
os.makedirs(media_dir, exist_ok=True)
# Save image directly to media directory
image_path = os.path.join(media_dir, image_filename)
image = self.unsplash_client.get_image( image = self.unsplash_client.get_image(
card_info.image_search_term, card_info.image_search_term,
image_path image_filename
) )
if image and image.local_path and os.path.exists(image.local_path): if image and image.local_path:
# Use the full path for the media file
media_files.append(image.local_path) media_files.append(image.local_path)
logger.info("Added image: %s", image.local_path) image_filename = os.path.basename(image.local_path)
else: else:
image_path = "" image_filename = ""
logger.warning("No image for: %s", word)
# Create note # Create note
note = genanki.Note( note = genanki.Note(
@ -167,17 +117,15 @@ class CardGenerator:
card_info.sentence_translation, card_info.sentence_translation,
card_info.usage_notes, card_info.usage_notes,
card_info.related_words, card_info.related_words,
f'<img src="{os.path.basename(image.local_path) if image and image.local_path else ""}" />', image_filename,
image.photographer if image else "", image.photographer if image else "",
' '.join(card_info.tags) ' '.join(card_info.tags)
] ]
) )
deck.add_note(note) deck.add_note(note)
# Track the word after successfully creating the note print(f"Added card for: {word}")
self._add_to_processed_words(word)
logger.info("Added card for: %s", word)
except Exception as e: except Exception as e:
logger.error("Error creating card for %s: %s", word, str(e)) print(f"Error creating card for {word}: {str(e)}")
return deck, media_files return deck, media_files

View File

@ -2,7 +2,6 @@ from typing import Optional
import requests import requests
from PIL import Image from PIL import Image
from io import BytesIO from io import BytesIO
import os
from ..models import Settings, UnsplashImage from ..models import Settings, UnsplashImage
from anki_generator.settings import get_settings, Settings from anki_generator.settings import get_settings, Settings
@ -12,7 +11,7 @@ class UnsplashClient:
self.settings = settings or get_settings() self.settings = settings or get_settings()
self.api_key = self.settings.unsplash_api_key.get_secret_value() self.api_key = self.settings.unsplash_api_key.get_secret_value()
def get_image(self, search_term: str, filename: str) -> Optional[UnsplashImage]: def get_image(self, search_term: str, save_path: str) -> Optional[UnsplashImage]:
"""Fetch and save an image from Unsplash""" """Fetch and save an image from Unsplash"""
url = "https://api.unsplash.com/search/photos" url = "https://api.unsplash.com/search/photos"
headers = {"Authorization": f"Client-ID {self.api_key}"} headers = {"Authorization": f"Client-ID {self.api_key}"}
@ -38,18 +37,15 @@ class UnsplashClient:
img_response = requests.get(image_url) img_response = requests.get(image_url)
img_response.raise_for_status() img_response.raise_for_status()
# Create directory if it doesn't exist
os.makedirs(os.path.dirname(filename) or '.', exist_ok=True)
# Process and save image # Process and save image
img = Image.open(BytesIO(img_response.content)) img = Image.open(BytesIO(img_response.content))
img.thumbnail((800, 600)) img.thumbnail((800, 600))
img.save(filename, "JPEG", quality=85) img.save(save_path, "JPEG", quality=85)
return UnsplashImage( return UnsplashImage(
url=image_url, url=image_url,
photographer=photographer, photographer=photographer,
local_path=filename local_path=save_path
) )
except Exception as e: except Exception as e:

View File

@ -1,29 +1,24 @@
import os import os
import argparse import argparse
import logging
from datetime import datetime from datetime import datetime
import shutil from typing import List, Tuple
import glob
from typing import List, Tuple, Optional
from pathlib import Path from pathlib import Path
from anki_generator.clients.llm import AnthropicClient from anki_generator.clients.llm import AnthropicClient
from anki_generator.clients.unsplash import UnsplashClient from anki_generator.clients.unsplash import UnsplashClient
from anki_generator.card_generator import CardGenerator, GermanDeckPackage from anki_generator.card_generator import CardGenerator, GermanDeckPackage
logger = logging.getLogger(__name__)
def read_word_list(file_path: str) -> List[Tuple[str, str]]: def read_word_list(file_path: str) -> List[Tuple[str, str]]:
"""Read word list from file""" """Read word list from file"""
words = [] words = []
logger.info("Reading file: %s", file_path) print(f"Reading file: {file_path}")
with open(file_path, 'r', encoding='utf-8') as f: with open(file_path, 'r', encoding='utf-8') as f:
for line in f: for line in f:
logger.debug("Processing line: %s", line.strip()) print(f"Processing line: {line.strip()}")
# Expected format: word,source # Expected format: word,source
word, source = line.strip().split(',', 1) word, source = line.strip().split(',', 1)
words.append((word.strip(), source.strip())) words.append((word.strip(), source.strip()))
logger.debug("Added word: %s with source: %s", word.strip(), source.strip()) print(f"Added word: {word.strip()} with source: {source.strip()}")
return words return words
def main(): def main():
@ -34,41 +29,35 @@ def main():
args = parser.parse_args() args = parser.parse_args()
logger.info("Starting Anki card generation") print("Loading program")
try: # Create media directory
os.makedirs('media', exist_ok=True)
# Initialize clients
llm_client = AnthropicClient() llm_client = AnthropicClient()
unsplash_client = UnsplashClient() unsplash_client = UnsplashClient()
# Initialize card generator
generator = CardGenerator(llm_client, unsplash_client) generator = CardGenerator(llm_client, unsplash_client)
# Read word list
words = read_word_list(args.input) words = read_word_list(args.input)
# Generate deck
deck, media_files = generator.create_deck(words, args.deck_name) deck, media_files = generator.create_deck(words, args.deck_name)
output_dir = "output" # Generate output filename if not provided
os.makedirs(output_dir, exist_ok=True)
if not args.output: if not args.output:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_file = os.path.join(output_dir, f"german_vocab_{timestamp}.apkg") output_file = f"german_vocab_{timestamp}.apkg"
else: else:
output_file = os.path.join(output_dir, args.output) output_file = args.output
media_paths = [] # Save deck
for media_file in media_files: package = GermanDeckPackage(deck, media_files)
media_path = os.path.join(output_dir, os.path.basename(media_file))
if os.path.exists(media_file) and media_file != media_path:
shutil.copy2(media_file, media_path)
media_paths.append(media_path)
package = GermanDeckPackage(deck, media_paths)
package.write_to_file(output_file) package.write_to_file(output_file)
logger.info("Deck and media files saved in: %s", output_dir) print(f"Deck saved as: {output_file}")
except Exception as e:
logger.error("Error: %s", str(e))
raise
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -1,4 +1,3 @@
import os
from typing import List, Optional from typing import List, Optional
from pydantic import BaseModel, Field, HttpUrl from pydantic import BaseModel, Field, HttpUrl
from pydantic_settings import BaseSettings from pydantic_settings import BaseSettings

View File

@ -1,17 +1,8 @@
import logging
from typing import Optional from typing import Optional
from functools import lru_cache from functools import lru_cache
from pydantic import SecretStr from pydantic import SecretStr
from pydantic_settings import BaseSettings, SettingsConfigDict from pydantic_settings import BaseSettings, SettingsConfigDict
def setup_logging():
"""Configure logging for the application."""
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
class Settings(BaseSettings): class Settings(BaseSettings):
"""Application settings that are loaded from environment variables.""" """Application settings that are loaded from environment variables."""
@ -53,5 +44,4 @@ def get_settings() -> Settings:
Returns: Returns:
Settings: Application settings instance Settings: Application settings instance
""" """
setup_logging()
return Settings() return Settings()

View File

@ -1,8 +1,5 @@
Bauch,Babbel Katze,Duolingo
Kopf,Babbel Hund,Book: German Made Simple
Rücken,Babbel Apfel,Language Exchange
verlinken, Easy German 546: Die Brandmauer Brot,DW Learn German
Lieblingsthema, Easy German 546: Die Brandmauer Wasser,Memrise
Zahnvorsorge, Easy German 546: Die Brandmauer
Vorsorgeuntersuchung, Easy German 546: Die Brandmauer
Untersuchung, Easy German 546: Die Brandmauer

1 Bauch Katze Babbel Duolingo
2 Kopf Hund Babbel Book: German Made Simple
3 Rücken Apfel Babbel Language Exchange
4 verlinken Brot Easy German 546: Die Brandmauer DW Learn German
5 Lieblingsthema Wasser Easy German 546: Die Brandmauer Memrise
Zahnvorsorge Easy German 546: Die Brandmauer
Vorsorgeuntersuchung Easy German 546: Die Brandmauer
Untersuchung Easy German 546: Die Brandmauer