diff --git a/.gitignore b/.gitignore index 7cce00e..9917d60 100644 --- a/.gitignore +++ b/.gitignore @@ -39,4 +39,6 @@ media/ # OS .DS_Store -Thumbs.db \ No newline at end of file +Thumbs.db + +output/ \ No newline at end of file diff --git a/anki_generator/card_generator.py b/anki_generator/card_generator.py index d17d588..1a8b81a 100644 --- a/anki_generator/card_generator.py +++ b/anki_generator/card_generator.py @@ -1,5 +1,6 @@ import os import random +import logging import genanki from datetime import datetime from typing import List, Tuple, Optional @@ -7,12 +8,21 @@ from .models import GermanWord, UnsplashImage from .clients.llm import AnthropicClient from .clients.unsplash import UnsplashClient -class GermanDeckPackage(genanki.Package): - """Custom Package class to include media files""" +logger = logging.getLogger(__name__) + +class GermanDeckPackage: + """Package class to create Anki deck with media files""" def __init__(self, deck, media_files): - super().__init__(deck) + self.deck = deck self.media_files = media_files + def write_to_file(self, file): + """Write deck to a file""" + package = genanki.Package([self.deck]) + package.media_files = self.media_files + logger.info("Writing deck with media files: %s", self.media_files) + package.write_to_file(file) + class CardGenerator: def __init__(self, llm_client: AnthropicClient, unsplash_client: UnsplashClient): self.llm_client = llm_client @@ -46,7 +56,7 @@ class CardGenerator:
{{German_Word}}
Part of speech: {{Part_of_Speech}}
Source: {{Source}}
- {{#Image}}
{{/Image}} + {{Image}} ''', 'afmt': ''' {{FrontSide}} @@ -79,29 +89,65 @@ class CardGenerator: ''' ) + def _load_processed_words(self) -> set: + """Load previously processed words from tracking file""" + processed_words = set() + tracking_file = os.path.join("output", "processed_words.txt") + if os.path.exists(tracking_file): + with open(tracking_file, 'r', encoding='utf-8') as f: + processed_words = set(line.strip() for line in f if line.strip()) + logger.info("Found %d previously processed words", len(processed_words)) + return processed_words + + def _add_to_processed_words(self, word: str): + """Add a word to the tracking file""" + tracking_file = os.path.join("output", "processed_words.txt") + os.makedirs("output", exist_ok=True) + with open(tracking_file, 'a', encoding='utf-8') as f: + f.write(f"{word}\n") + logger.debug("Added %s to processed words", word) + def create_deck(self, word_list: List[Tuple[str, str]], deck_name: str = "German Vocabulary") -> Tuple[genanki.Deck, List[str]]: """Create an Anki deck from a list of words""" deck_id = random.randrange(1 << 30, 1 << 31) deck = genanki.Deck(deck_id, deck_name) media_files = [] + + # Load previously processed words + processed_words = self._load_processed_words() for word, source in word_list: + # Skip if word has been processed before + if word in processed_words: + logger.info("Skipping %s - already exists in previous deck", word) + continue try: card_info_dict = self.llm_client.get_card_info(word, source) card_info = GermanWord(**card_info_dict) + + image_filename = f"{word.lower().replace(' ', '_')}.jpg" - # Get image - image_filename = f"media/{word.lower().replace(' ', '_')}.jpg" + # Create output directory if it doesn't exist + output_dir = "output" + os.makedirs(output_dir, exist_ok=True) + + media_dir = "media" + os.makedirs(media_dir, exist_ok=True) + + # Save image directly to media directory + image_path = os.path.join(media_dir, image_filename) image = self.unsplash_client.get_image( card_info.image_search_term, - image_filename + image_path ) - if image and image.local_path: + if image and image.local_path and os.path.exists(image.local_path): + # Use the full path for the media file media_files.append(image.local_path) - image_filename = os.path.basename(image.local_path) + logger.info("Added image: %s", image.local_path) else: - image_filename = "" + image_path = "" + logger.warning("No image for: %s", word) # Create note note = genanki.Note( @@ -117,15 +163,17 @@ class CardGenerator: card_info.sentence_translation, card_info.usage_notes, card_info.related_words, - image_filename, + f'', image.photographer if image else "", ' '.join(card_info.tags) ] ) deck.add_note(note) - print(f"Added card for: {word}") + # Track the word after successfully creating the note + self._add_to_processed_words(word) + logger.info("Added card for: %s", word) except Exception as e: - print(f"Error creating card for {word}: {str(e)}") + logger.error("Error creating card for %s: %s", word, str(e)) return deck, media_files diff --git a/anki_generator/clients/unsplash.py b/anki_generator/clients/unsplash.py index 40e6a38..5e3985a 100644 --- a/anki_generator/clients/unsplash.py +++ b/anki_generator/clients/unsplash.py @@ -2,6 +2,7 @@ from typing import Optional import requests from PIL import Image from io import BytesIO +import os from ..models import Settings, UnsplashImage from anki_generator.settings import get_settings, Settings @@ -11,7 +12,7 @@ class UnsplashClient: self.settings = settings or get_settings() self.api_key = self.settings.unsplash_api_key.get_secret_value() - def get_image(self, search_term: str, save_path: str) -> Optional[UnsplashImage]: + def get_image(self, search_term: str, filename: str) -> Optional[UnsplashImage]: """Fetch and save an image from Unsplash""" url = "https://api.unsplash.com/search/photos" headers = {"Authorization": f"Client-ID {self.api_key}"} @@ -37,15 +38,18 @@ class UnsplashClient: img_response = requests.get(image_url) img_response.raise_for_status() + # Create directory if it doesn't exist + os.makedirs(os.path.dirname(filename) or '.', exist_ok=True) + # Process and save image img = Image.open(BytesIO(img_response.content)) img.thumbnail((800, 600)) - img.save(save_path, "JPEG", quality=85) + img.save(filename, "JPEG", quality=85) return UnsplashImage( url=image_url, photographer=photographer, - local_path=save_path + local_path=filename ) except Exception as e: diff --git a/anki_generator/main.py b/anki_generator/main.py index 472068a..63df1a3 100644 --- a/anki_generator/main.py +++ b/anki_generator/main.py @@ -1,24 +1,29 @@ import os import argparse +import logging from datetime import datetime -from typing import List, Tuple +import shutil +import glob +from typing import List, Tuple, Optional from pathlib import Path from anki_generator.clients.llm import AnthropicClient from anki_generator.clients.unsplash import UnsplashClient from anki_generator.card_generator import CardGenerator, GermanDeckPackage +logger = logging.getLogger(__name__) + def read_word_list(file_path: str) -> List[Tuple[str, str]]: """Read word list from file""" words = [] - print(f"Reading file: {file_path}") + logger.info("Reading file: %s", file_path) with open(file_path, 'r', encoding='utf-8') as f: for line in f: - print(f"Processing line: {line.strip()}") + logger.debug("Processing line: %s", line.strip()) # Expected format: word,source word, source = line.strip().split(',', 1) words.append((word.strip(), source.strip())) - print(f"Added word: {word.strip()} with source: {source.strip()}") + logger.debug("Added word: %s with source: %s", word.strip(), source.strip()) return words def main(): @@ -29,35 +34,41 @@ def main(): args = parser.parse_args() - print("Loading program") + logger.info("Starting Anki card generation") - # Create media directory - os.makedirs('media', exist_ok=True) + try: + llm_client = AnthropicClient() + unsplash_client = UnsplashClient() - # Initialize clients - llm_client = AnthropicClient() - unsplash_client = UnsplashClient() + generator = CardGenerator(llm_client, unsplash_client) - # Initialize card generator - generator = CardGenerator(llm_client, unsplash_client) + words = read_word_list(args.input) - # Read word list - words = read_word_list(args.input) + deck, media_files = generator.create_deck(words, args.deck_name) - # Generate deck - deck, media_files = generator.create_deck(words, args.deck_name) + output_dir = "output" + os.makedirs(output_dir, exist_ok=True) - # Generate output filename if not provided - if not args.output: - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - output_file = f"german_vocab_{timestamp}.apkg" - else: - output_file = args.output + if not args.output: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + output_file = os.path.join(output_dir, f"german_vocab_{timestamp}.apkg") + else: + output_file = os.path.join(output_dir, args.output) - # Save deck - package = GermanDeckPackage(deck, media_files) - package.write_to_file(output_file) - print(f"Deck saved as: {output_file}") + media_paths = [] + for media_file in media_files: + media_path = os.path.join(output_dir, os.path.basename(media_file)) + if os.path.exists(media_file) and media_file != media_path: + shutil.copy2(media_file, media_path) + media_paths.append(media_path) + + package = GermanDeckPackage(deck, media_paths) + package.write_to_file(output_file) + logger.info("Deck and media files saved in: %s", output_dir) + + except Exception as e: + logger.error("Error: %s", str(e)) + raise if __name__ == "__main__": main() diff --git a/anki_generator/settings.py b/anki_generator/settings.py index be387a3..120b57d 100644 --- a/anki_generator/settings.py +++ b/anki_generator/settings.py @@ -1,8 +1,17 @@ +import logging from typing import Optional from functools import lru_cache from pydantic import SecretStr from pydantic_settings import BaseSettings, SettingsConfigDict +def setup_logging(): + """Configure logging for the application.""" + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' + ) + class Settings(BaseSettings): """Application settings that are loaded from environment variables.""" @@ -44,4 +53,5 @@ def get_settings() -> Settings: Returns: Settings: Application settings instance """ - return Settings() \ No newline at end of file + setup_logging() + return Settings() diff --git a/data/words.csv b/data/words.csv index 4924e7f..65d905d 100644 --- a/data/words.csv +++ b/data/words.csv @@ -1,5 +1,3 @@ -Katze,Duolingo -Hund,Book: German Made Simple -Apfel,Language Exchange -Brot,DW Learn German -Wasser,Memrise +Bauch,Babbel +Kopf,Babbel +Rücken,Babbel