refactor: fix image rendering and add logging

This commit is contained in:
Gal 2025-02-10 23:26:17 +01:00
parent 9c4e320063
commit 25e80a7b2e
Signed by: gal
GPG Key ID: F035BC65003BC00B
6 changed files with 122 additions and 49 deletions

4
.gitignore vendored
View File

@ -39,4 +39,6 @@ media/
# OS # OS
.DS_Store .DS_Store
Thumbs.db Thumbs.db
output/

View File

@ -1,5 +1,6 @@
import os import os
import random import random
import logging
import genanki import genanki
from datetime import datetime from datetime import datetime
from typing import List, Tuple, Optional from typing import List, Tuple, Optional
@ -7,12 +8,21 @@ from .models import GermanWord, UnsplashImage
from .clients.llm import AnthropicClient from .clients.llm import AnthropicClient
from .clients.unsplash import UnsplashClient from .clients.unsplash import UnsplashClient
class GermanDeckPackage(genanki.Package): logger = logging.getLogger(__name__)
"""Custom Package class to include media files"""
class GermanDeckPackage:
"""Package class to create Anki deck with media files"""
def __init__(self, deck, media_files): def __init__(self, deck, media_files):
super().__init__(deck) self.deck = deck
self.media_files = media_files self.media_files = media_files
def write_to_file(self, file):
"""Write deck to a file"""
package = genanki.Package([self.deck])
package.media_files = self.media_files
logger.info("Writing deck with media files: %s", self.media_files)
package.write_to_file(file)
class CardGenerator: class CardGenerator:
def __init__(self, llm_client: AnthropicClient, unsplash_client: UnsplashClient): def __init__(self, llm_client: AnthropicClient, unsplash_client: UnsplashClient):
self.llm_client = llm_client self.llm_client = llm_client
@ -46,7 +56,7 @@ class CardGenerator:
<div style="font-size: 24px;">{{German_Word}}</div> <div style="font-size: 24px;">{{German_Word}}</div>
<div style="font-size: 18px;">Part of speech: {{Part_of_Speech}}</div> <div style="font-size: 18px;">Part of speech: {{Part_of_Speech}}</div>
<div style="font-size: 14px;">Source: {{Source}}</div> <div style="font-size: 14px;">Source: {{Source}}</div>
{{#Image}}<div><img src="{{text:Image}}" style="max-width: 300px; max-height: 200px;"></div>{{/Image}} {{Image}}
''', ''',
'afmt': ''' 'afmt': '''
{{FrontSide}} {{FrontSide}}
@ -79,29 +89,65 @@ class CardGenerator:
''' '''
) )
def _load_processed_words(self) -> set:
"""Load previously processed words from tracking file"""
processed_words = set()
tracking_file = os.path.join("output", "processed_words.txt")
if os.path.exists(tracking_file):
with open(tracking_file, 'r', encoding='utf-8') as f:
processed_words = set(line.strip() for line in f if line.strip())
logger.info("Found %d previously processed words", len(processed_words))
return processed_words
def _add_to_processed_words(self, word: str):
"""Add a word to the tracking file"""
tracking_file = os.path.join("output", "processed_words.txt")
os.makedirs("output", exist_ok=True)
with open(tracking_file, 'a', encoding='utf-8') as f:
f.write(f"{word}\n")
logger.debug("Added %s to processed words", word)
def create_deck(self, word_list: List[Tuple[str, str]], deck_name: str = "German Vocabulary") -> Tuple[genanki.Deck, List[str]]: def create_deck(self, word_list: List[Tuple[str, str]], deck_name: str = "German Vocabulary") -> Tuple[genanki.Deck, List[str]]:
"""Create an Anki deck from a list of words""" """Create an Anki deck from a list of words"""
deck_id = random.randrange(1 << 30, 1 << 31) deck_id = random.randrange(1 << 30, 1 << 31)
deck = genanki.Deck(deck_id, deck_name) deck = genanki.Deck(deck_id, deck_name)
media_files = [] media_files = []
# Load previously processed words
processed_words = self._load_processed_words()
for word, source in word_list: for word, source in word_list:
# Skip if word has been processed before
if word in processed_words:
logger.info("Skipping %s - already exists in previous deck", word)
continue
try: try:
card_info_dict = self.llm_client.get_card_info(word, source) card_info_dict = self.llm_client.get_card_info(word, source)
card_info = GermanWord(**card_info_dict) card_info = GermanWord(**card_info_dict)
image_filename = f"{word.lower().replace(' ', '_')}.jpg"
# Get image # Create output directory if it doesn't exist
image_filename = f"media/{word.lower().replace(' ', '_')}.jpg" output_dir = "output"
os.makedirs(output_dir, exist_ok=True)
media_dir = "media"
os.makedirs(media_dir, exist_ok=True)
# Save image directly to media directory
image_path = os.path.join(media_dir, image_filename)
image = self.unsplash_client.get_image( image = self.unsplash_client.get_image(
card_info.image_search_term, card_info.image_search_term,
image_filename image_path
) )
if image and image.local_path: if image and image.local_path and os.path.exists(image.local_path):
# Use the full path for the media file
media_files.append(image.local_path) media_files.append(image.local_path)
image_filename = os.path.basename(image.local_path) logger.info("Added image: %s", image.local_path)
else: else:
image_filename = "" image_path = ""
logger.warning("No image for: %s", word)
# Create note # Create note
note = genanki.Note( note = genanki.Note(
@ -117,15 +163,17 @@ class CardGenerator:
card_info.sentence_translation, card_info.sentence_translation,
card_info.usage_notes, card_info.usage_notes,
card_info.related_words, card_info.related_words,
image_filename, f'<img src="{os.path.basename(image.local_path) if image and image.local_path else ""}" />',
image.photographer if image else "", image.photographer if image else "",
' '.join(card_info.tags) ' '.join(card_info.tags)
] ]
) )
deck.add_note(note) deck.add_note(note)
print(f"Added card for: {word}") # Track the word after successfully creating the note
self._add_to_processed_words(word)
logger.info("Added card for: %s", word)
except Exception as e: except Exception as e:
print(f"Error creating card for {word}: {str(e)}") logger.error("Error creating card for %s: %s", word, str(e))
return deck, media_files return deck, media_files

View File

@ -2,6 +2,7 @@ from typing import Optional
import requests import requests
from PIL import Image from PIL import Image
from io import BytesIO from io import BytesIO
import os
from ..models import Settings, UnsplashImage from ..models import Settings, UnsplashImage
from anki_generator.settings import get_settings, Settings from anki_generator.settings import get_settings, Settings
@ -11,7 +12,7 @@ class UnsplashClient:
self.settings = settings or get_settings() self.settings = settings or get_settings()
self.api_key = self.settings.unsplash_api_key.get_secret_value() self.api_key = self.settings.unsplash_api_key.get_secret_value()
def get_image(self, search_term: str, save_path: str) -> Optional[UnsplashImage]: def get_image(self, search_term: str, filename: str) -> Optional[UnsplashImage]:
"""Fetch and save an image from Unsplash""" """Fetch and save an image from Unsplash"""
url = "https://api.unsplash.com/search/photos" url = "https://api.unsplash.com/search/photos"
headers = {"Authorization": f"Client-ID {self.api_key}"} headers = {"Authorization": f"Client-ID {self.api_key}"}
@ -37,15 +38,18 @@ class UnsplashClient:
img_response = requests.get(image_url) img_response = requests.get(image_url)
img_response.raise_for_status() img_response.raise_for_status()
# Create directory if it doesn't exist
os.makedirs(os.path.dirname(filename) or '.', exist_ok=True)
# Process and save image # Process and save image
img = Image.open(BytesIO(img_response.content)) img = Image.open(BytesIO(img_response.content))
img.thumbnail((800, 600)) img.thumbnail((800, 600))
img.save(save_path, "JPEG", quality=85) img.save(filename, "JPEG", quality=85)
return UnsplashImage( return UnsplashImage(
url=image_url, url=image_url,
photographer=photographer, photographer=photographer,
local_path=save_path local_path=filename
) )
except Exception as e: except Exception as e:

View File

@ -1,24 +1,29 @@
import os import os
import argparse import argparse
import logging
from datetime import datetime from datetime import datetime
from typing import List, Tuple import shutil
import glob
from typing import List, Tuple, Optional
from pathlib import Path from pathlib import Path
from anki_generator.clients.llm import AnthropicClient from anki_generator.clients.llm import AnthropicClient
from anki_generator.clients.unsplash import UnsplashClient from anki_generator.clients.unsplash import UnsplashClient
from anki_generator.card_generator import CardGenerator, GermanDeckPackage from anki_generator.card_generator import CardGenerator, GermanDeckPackage
logger = logging.getLogger(__name__)
def read_word_list(file_path: str) -> List[Tuple[str, str]]: def read_word_list(file_path: str) -> List[Tuple[str, str]]:
"""Read word list from file""" """Read word list from file"""
words = [] words = []
print(f"Reading file: {file_path}") logger.info("Reading file: %s", file_path)
with open(file_path, 'r', encoding='utf-8') as f: with open(file_path, 'r', encoding='utf-8') as f:
for line in f: for line in f:
print(f"Processing line: {line.strip()}") logger.debug("Processing line: %s", line.strip())
# Expected format: word,source # Expected format: word,source
word, source = line.strip().split(',', 1) word, source = line.strip().split(',', 1)
words.append((word.strip(), source.strip())) words.append((word.strip(), source.strip()))
print(f"Added word: {word.strip()} with source: {source.strip()}") logger.debug("Added word: %s with source: %s", word.strip(), source.strip())
return words return words
def main(): def main():
@ -29,35 +34,41 @@ def main():
args = parser.parse_args() args = parser.parse_args()
print("Loading program") logger.info("Starting Anki card generation")
# Create media directory try:
os.makedirs('media', exist_ok=True) llm_client = AnthropicClient()
unsplash_client = UnsplashClient()
# Initialize clients generator = CardGenerator(llm_client, unsplash_client)
llm_client = AnthropicClient()
unsplash_client = UnsplashClient()
# Initialize card generator words = read_word_list(args.input)
generator = CardGenerator(llm_client, unsplash_client)
# Read word list deck, media_files = generator.create_deck(words, args.deck_name)
words = read_word_list(args.input)
# Generate deck output_dir = "output"
deck, media_files = generator.create_deck(words, args.deck_name) os.makedirs(output_dir, exist_ok=True)
# Generate output filename if not provided if not args.output:
if not args.output: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_file = os.path.join(output_dir, f"german_vocab_{timestamp}.apkg")
output_file = f"german_vocab_{timestamp}.apkg" else:
else: output_file = os.path.join(output_dir, args.output)
output_file = args.output
# Save deck media_paths = []
package = GermanDeckPackage(deck, media_files) for media_file in media_files:
package.write_to_file(output_file) media_path = os.path.join(output_dir, os.path.basename(media_file))
print(f"Deck saved as: {output_file}") if os.path.exists(media_file) and media_file != media_path:
shutil.copy2(media_file, media_path)
media_paths.append(media_path)
package = GermanDeckPackage(deck, media_paths)
package.write_to_file(output_file)
logger.info("Deck and media files saved in: %s", output_dir)
except Exception as e:
logger.error("Error: %s", str(e))
raise
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -1,8 +1,17 @@
import logging
from typing import Optional from typing import Optional
from functools import lru_cache from functools import lru_cache
from pydantic import SecretStr from pydantic import SecretStr
from pydantic_settings import BaseSettings, SettingsConfigDict from pydantic_settings import BaseSettings, SettingsConfigDict
def setup_logging():
"""Configure logging for the application."""
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
class Settings(BaseSettings): class Settings(BaseSettings):
"""Application settings that are loaded from environment variables.""" """Application settings that are loaded from environment variables."""
@ -44,4 +53,5 @@ def get_settings() -> Settings:
Returns: Returns:
Settings: Application settings instance Settings: Application settings instance
""" """
return Settings() setup_logging()
return Settings()

View File

@ -1,5 +1,3 @@
Katze,Duolingo Bauch,Babbel
Hund,Book: German Made Simple Kopf,Babbel
Apfel,Language Exchange Rücken,Babbel
Brot,DW Learn German
Wasser,Memrise

1 Katze Bauch Duolingo Babbel
2 Hund Kopf Book: German Made Simple Babbel
3 Apfel Rücken Language Exchange Babbel
Brot DW Learn German
Wasser Memrise