diff --git a/Makefile b/Makefile index 3fd8027..b05c45c 100644 --- a/Makefile +++ b/Makefile @@ -11,9 +11,9 @@ help: # Target-specific variables for the publish target publish: @if [ "$(clean)" = "True" ]; then \ - ./scripts/publish.sh "$(SOURCE_DIR)" "$(DEST_DIR)" True; \ + ./scripts/publish.py clean; \ else \ - ./scripts/publish.sh "$(SOURCE_DIR)" "$(DEST_DIR)"; \ + ./scripts/publish.py; \ fi @if [ "$(local)" = "True" ]; then \ echo "Running npx quartz build --serve"; \ diff --git a/scripts/publish.py b/scripts/publish.py new file mode 100755 index 0000000..3413f44 --- /dev/null +++ b/scripts/publish.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3 + +import os +import shutil +import sys +from pathlib import Path +import re +from typing import Set, Dict +import logging +from datetime import datetime + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=[ + logging.StreamHandler(), + logging.FileHandler(f"/tmp/publish_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log") + ] +) + +def load_env_vars() -> tuple[Path, Path]: + """Load environment variables and return source and destination directories.""" + source_dir = os.getenv('SOURCE_DIR') + dest_dir = os.getenv('DEST_DIR') + + if not source_dir or not dest_dir: + logging.error("SOURCE_DIR and DEST_DIR must be set in environment variables") + sys.exit(1) + + # Expand environment variables like $HOME + source_dir = os.path.expandvars(source_dir) + dest_dir = os.path.expandvars(dest_dir) + + return Path(source_dir), Path(dest_dir) + +def get_publish_status(file_path: Path) -> bool | None: + """Check if a markdown file has publish: true/false in its frontmatter.""" + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + match = re.search(r'^---\n.*?publish:\s*(true|false).*?\n---', content, re.DOTALL | re.MULTILINE) + if match: + return match.group(1).lower() == 'true' + except Exception as e: + logging.error(f"Error reading {file_path}: {e}") + return None + +def extract_image_references(content: str) -> Set[str]: + """Extract image references from markdown content.""" + # Match both ![[image.png]] and ![alt](image.png) syntax + obsidian_images = set(re.findall(r'!\[\[(.*?)\]\]', content)) + markdown_images = set(re.findall(r'!\[.*?\]\((.*?)\)', content)) + return obsidian_images.union(markdown_images) + +def find_image_in_source(image_ref: str, source_dir: Path) -> Path | None: + """Find an image file in the source directory.""" + image_name = Path(image_ref).name + + # Search for the image recursively + for path in source_dir.rglob(image_name): + if path.is_file(): + return path + return None + +def process_images(content: str, source_dir: Path, dest_dir: Path) -> str: + """Process and copy images, updating references in content.""" + images_dir = dest_dir / "images" + images_dir.mkdir(exist_ok=True) + + # Get all image references + image_refs = extract_image_references(content) + + # Process each image reference + for img_ref in image_refs: + img_path = find_image_in_source(img_ref, source_dir) + if img_path: + # Copy image to images directory + dest_image = images_dir / img_path.name + if not dest_image.exists(): + shutil.copy2(img_path, dest_image) + logging.info(f"Copied image: {img_path.name}") + + # Update reference in content + if '![[' in content: + # Convert Obsidian to Markdown style + content = content.replace(f'![[{img_ref}]]', f'![{img_path.stem}](/images/{img_path.name})') + else: + # Update regular Markdown style + content = content.replace(f'({img_ref})', f'(/images/{img_path.name})') + else: + logging.warning(f"Image not found: {img_ref}") + + return content + +def clean_destination(dest_dir: Path): + """Clean the destination directory.""" + for item in dest_dir.iterdir(): + if item.is_file(): + item.unlink() + elif item.is_dir() and item.name != "images": + shutil.rmtree(item) + logging.info(f"Cleaned destination directory: {dest_dir}") + +def sync_files(source_dir: Path, dest_dir: Path, clean: bool = False): + """Synchronize files between source and destination directories.""" + if clean: + clean_destination(dest_dir) + + # Create images directory if it doesn't exist + images_dir = dest_dir / "images" + images_dir.mkdir(exist_ok=True) + + # Track files to keep in destination + files_to_keep = set() + + # Process markdown files + for source_file in source_dir.rglob("*.md"): + publish_status = get_publish_status(source_file) + if publish_status is None: + continue + + # Calculate relative path and destination path + rel_path = source_file.relative_to(source_dir) + dest_file = dest_dir / rel_path + + if publish_status: + # Ensure parent directories exist + dest_file.parent.mkdir(parents=True, exist_ok=True) + + # Read and process content + with open(source_file, 'r', encoding='utf-8') as f: + content = f.read() + + # Process images and update references + content = process_images(content, source_dir, dest_dir) + + # Write processed content + with open(dest_file, 'w', encoding='utf-8') as f: + f.write(content) + + logging.info(f"Processed: {rel_path}") + files_to_keep.add(dest_file) + else: + # Remove if publish is false and file exists + if dest_file.exists(): + dest_file.unlink() + logging.info(f"Removed: {rel_path}") + + # Clean up old files in destination that weren't in source + for dest_file in dest_dir.rglob("*.md"): + if dest_file not in files_to_keep: + dest_file.unlink() + logging.info(f"Removed old file: {dest_file.relative_to(dest_dir)}") + + # Clean up unused images + used_images = set() + for dest_file in dest_dir.rglob("*.md"): + with open(dest_file, 'r', encoding='utf-8') as f: + content = f.read() + # Extract image filenames from markdown links + used_images.update(re.findall(r'!\[.*?\]\(/images/(.*?)\)', content)) + + # Remove unused images + for image_file in images_dir.iterdir(): + if image_file.name not in used_images: + image_file.unlink() + logging.info(f"Removed unused image: {image_file.name}") + +def main(): + """Main function.""" + # Parse command line arguments + clean = len(sys.argv) > 1 and sys.argv[1].lower() == 'clean' + + try: + # Load environment variables + source_dir, dest_dir = load_env_vars() + + # Validate directories + if not source_dir.exists(): + logging.error(f"Source directory does not exist: {source_dir}") + sys.exit(1) + + # Create destination directory if it doesn't exist + dest_dir.mkdir(parents=True, exist_ok=True) + + # Sync files + sync_files(source_dir, dest_dir, clean) + + logging.info("Sync completed successfully") + + except Exception as e: + logging.error(f"An error occurred: {e}") + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/scripts/publish.sh b/scripts/publish.sh deleted file mode 100755 index e5e1f1a..0000000 --- a/scripts/publish.sh +++ /dev/null @@ -1,141 +0,0 @@ -#!/bin/bash - -# Check if both SOURCE_DIR and DEST_DIR are provided as arguments -if [ "$#" -lt 2 ]; then - echo "Usage: $0 [clean=True/False]" - exit 1 -fi - -# Capture SOURCE_DIR and DEST_DIR from arguments -SOURCE_DIR="$1" -DEST_DIR="$2" -CLEAN_FLAG="${3:-False}" # Default value is False if not provided -IMAGE_DIR="$DEST_DIR/images" - -# Ensure destination directories exist -mkdir -p "$DEST_DIR" - -# Function to clean DEST_DIR -clean_destination() { - echo "Cleaning $DEST_DIR" - rm -rf "$DEST_DIR"/* -} - -# Check if CLEAN_FLAG is set to True -if [ "$CLEAN_FLAG" == "True" ]; then - clean_destination -fi - -mkdir -p "$IMAGE_DIR" - -# Temporary log for copied files -LOG_FILE=/tmp/copied_files.log - -# Clear the log file -> "$LOG_FILE" - -# Find Markdown files with "publish: true" and process them -find -L "$SOURCE_DIR" -type f -name "*.md" -exec grep -q "publish: true" {} \; -exec grep -l "publish: true" {} + | while read -r markdown_file; do - # Extract relative path from the source directory - relative_path="${markdown_file#$SOURCE_DIR/}" - - # Destination path - dest_path="$DEST_DIR/$relative_path" - - # Ensure the directory exists before copying the Markdown file - dest_dir=$(dirname "$dest_path") - mkdir -p "$dest_dir" - - # Check if the destination file already exists - if [ -f "$dest_path" ]; then - # Compare the source and destination files - if cmp -s "$markdown_file" "$dest_path"; then - echo "Skipped $markdown_file as it already exists and has the same content in $dest_path" >> "$LOG_FILE" - else - # Replace the destination file with the source file - cp -p "$markdown_file" "$dest_path" - echo "Replaced $dest_path with $markdown_file" >> "$LOG_FILE" - - # Extract and copy referenced images - grep -o '\!\[\[.*\]\]' "$markdown_file" | sed 's/\!\[\[\(.*\)\]\]/\1/' | while IFS= read -r image_ref; do - # Check if image_ref is a relative path - if [[ ! "$image_ref" =~ ^/ ]]; then - image_ref="$SOURCE_DIR/$image_ref" - fi - - # Try to find the image recursively within SOURCE_DIR - image_path=$(find -L "$SOURCE_DIR" -type f -name "$(basename "$image_ref")" -print -quit) - - # Debugging: Print image_path - echo "Checking image_path: $image_path" >> "$LOG_FILE" - - if [ -f "$image_path" ]; then - # Check if the image already exists in IMAGE_DIR - if [ ! -f "$IMAGE_DIR/$(basename "$image_ref")" ]; then - cp "$image_path" "$IMAGE_DIR/" - echo "Copied image $image_ref to $IMAGE_DIR/" >> "$LOG_FILE" - else - echo "Skipped copying image $image_ref as it already exists in $IMAGE_DIR/" >> "$LOG_FILE" - fi - else - echo "Image reference $image_ref in $markdown_file does not exist." >> "$LOG_FILE" - fi - done - fi - else - # Copy the Markdown file since it doesn't exist in the destination - cp -p "$markdown_file" "$dest_path" - echo "Copied $markdown_file to $dest_path" >> "$LOG_FILE" - - # Extract and copy referenced images - grep -o '\!\[\[.*\]\]' "$markdown_file" | sed 's/\!\[\[\(.*\)\]\]/\1/' | while IFS= read -r image_ref; do - # Check if image_ref is a relative path - if [[ ! "$image_ref" =~ ^/ ]]; then - image_ref="$SOURCE_DIR/$image_ref" - fi - - # Try to find the image recursively within SOURCE_DIR - image_path=$(find -L "$SOURCE_DIR" -type f -name "$(basename "$image_ref")" -print -quit) - - # Debugging: Print image_path - echo "Checking image_path: $image_path" >> "$LOG_FILE" - - if [ -f "$image_path" ]; then - # Check if the image already exists in IMAGE_DIR - if [ ! -f "$IMAGE_DIR/$(basename "$image_ref")" ]; then - cp "$image_path" "$IMAGE_DIR/" - echo "Copied image $image_ref to $IMAGE_DIR/" >> "$LOG_FILE" - else - echo "Skipped copying image $image_ref as it already exists in $IMAGE_DIR/" >> "$LOG_FILE" - fi - else - echo "Image reference $image_ref in $markdown_file does not exist." >> "$LOG_FILE" - fi - done - fi -done - - -# Find Markdown files with "publish: false" and process them -find -L "$SOURCE_DIR" -type f -name "*.md" -exec grep -q "publish: false" {} \; -exec grep -l "publish: false" {} + | while read -r markdown_file; do - # Extract relative path from the source directory - relative_path="${markdown_file#$SOURCE_DIR/}" - - # Destination path - dest_path="$DEST_DIR/$relative_path" - - # Check if the destination file exists - if [ -f "$dest_path" ]; then - # Remove the file from DEST_DIR - rm "$dest_path" - echo "Removed $markdown_file from $DEST_DIR" >> "$LOG_FILE" - else - echo "File $markdown_file not found in $DEST_DIR" >> "$LOG_FILE" - fi -done - -# Print the log file -cat "$LOG_FILE" - -# Debugging: Indicate completion -echo "Script execution completed."