refactor: update publish script
This commit is contained in:
parent
9300e1a362
commit
00b048f177
4
Makefile
4
Makefile
|
@ -11,9 +11,9 @@ help:
|
|||
# Target-specific variables for the publish target
|
||||
publish:
|
||||
@if [ "$(clean)" = "True" ]; then \
|
||||
./scripts/publish.sh "$(SOURCE_DIR)" "$(DEST_DIR)" True; \
|
||||
./scripts/publish.py clean; \
|
||||
else \
|
||||
./scripts/publish.sh "$(SOURCE_DIR)" "$(DEST_DIR)"; \
|
||||
./scripts/publish.py; \
|
||||
fi
|
||||
@if [ "$(local)" = "True" ]; then \
|
||||
echo "Running npx quartz build --serve"; \
|
||||
|
|
|
@ -0,0 +1,197 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import re
|
||||
from typing import Set, Dict
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.StreamHandler(),
|
||||
logging.FileHandler(f"/tmp/publish_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
|
||||
]
|
||||
)
|
||||
|
||||
def load_env_vars() -> tuple[Path, Path]:
|
||||
"""Load environment variables and return source and destination directories."""
|
||||
source_dir = os.getenv('SOURCE_DIR')
|
||||
dest_dir = os.getenv('DEST_DIR')
|
||||
|
||||
if not source_dir or not dest_dir:
|
||||
logging.error("SOURCE_DIR and DEST_DIR must be set in environment variables")
|
||||
sys.exit(1)
|
||||
|
||||
# Expand environment variables like $HOME
|
||||
source_dir = os.path.expandvars(source_dir)
|
||||
dest_dir = os.path.expandvars(dest_dir)
|
||||
|
||||
return Path(source_dir), Path(dest_dir)
|
||||
|
||||
def get_publish_status(file_path: Path) -> bool | None:
|
||||
"""Check if a markdown file has publish: true/false in its frontmatter."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
match = re.search(r'^---\n.*?publish:\s*(true|false).*?\n---', content, re.DOTALL | re.MULTILINE)
|
||||
if match:
|
||||
return match.group(1).lower() == 'true'
|
||||
except Exception as e:
|
||||
logging.error(f"Error reading {file_path}: {e}")
|
||||
return None
|
||||
|
||||
def extract_image_references(content: str) -> Set[str]:
|
||||
"""Extract image references from markdown content."""
|
||||
# Match both ![[image.png]] and  syntax
|
||||
obsidian_images = set(re.findall(r'!\[\[(.*?)\]\]', content))
|
||||
markdown_images = set(re.findall(r'!\[.*?\]\((.*?)\)', content))
|
||||
return obsidian_images.union(markdown_images)
|
||||
|
||||
def find_image_in_source(image_ref: str, source_dir: Path) -> Path | None:
|
||||
"""Find an image file in the source directory."""
|
||||
image_name = Path(image_ref).name
|
||||
|
||||
# Search for the image recursively
|
||||
for path in source_dir.rglob(image_name):
|
||||
if path.is_file():
|
||||
return path
|
||||
return None
|
||||
|
||||
def process_images(content: str, source_dir: Path, dest_dir: Path) -> str:
|
||||
"""Process and copy images, updating references in content."""
|
||||
images_dir = dest_dir / "images"
|
||||
images_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Get all image references
|
||||
image_refs = extract_image_references(content)
|
||||
|
||||
# Process each image reference
|
||||
for img_ref in image_refs:
|
||||
img_path = find_image_in_source(img_ref, source_dir)
|
||||
if img_path:
|
||||
# Copy image to images directory
|
||||
dest_image = images_dir / img_path.name
|
||||
if not dest_image.exists():
|
||||
shutil.copy2(img_path, dest_image)
|
||||
logging.info(f"Copied image: {img_path.name}")
|
||||
|
||||
# Update reference in content
|
||||
if '![[' in content:
|
||||
# Convert Obsidian to Markdown style
|
||||
content = content.replace(f'![[{img_ref}]]', f'')
|
||||
else:
|
||||
# Update regular Markdown style
|
||||
content = content.replace(f'({img_ref})', f'(/images/{img_path.name})')
|
||||
else:
|
||||
logging.warning(f"Image not found: {img_ref}")
|
||||
|
||||
return content
|
||||
|
||||
def clean_destination(dest_dir: Path):
|
||||
"""Clean the destination directory."""
|
||||
for item in dest_dir.iterdir():
|
||||
if item.is_file():
|
||||
item.unlink()
|
||||
elif item.is_dir() and item.name != "images":
|
||||
shutil.rmtree(item)
|
||||
logging.info(f"Cleaned destination directory: {dest_dir}")
|
||||
|
||||
def sync_files(source_dir: Path, dest_dir: Path, clean: bool = False):
|
||||
"""Synchronize files between source and destination directories."""
|
||||
if clean:
|
||||
clean_destination(dest_dir)
|
||||
|
||||
# Create images directory if it doesn't exist
|
||||
images_dir = dest_dir / "images"
|
||||
images_dir.mkdir(exist_ok=True)
|
||||
|
||||
# Track files to keep in destination
|
||||
files_to_keep = set()
|
||||
|
||||
# Process markdown files
|
||||
for source_file in source_dir.rglob("*.md"):
|
||||
publish_status = get_publish_status(source_file)
|
||||
if publish_status is None:
|
||||
continue
|
||||
|
||||
# Calculate relative path and destination path
|
||||
rel_path = source_file.relative_to(source_dir)
|
||||
dest_file = dest_dir / rel_path
|
||||
|
||||
if publish_status:
|
||||
# Ensure parent directories exist
|
||||
dest_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Read and process content
|
||||
with open(source_file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Process images and update references
|
||||
content = process_images(content, source_dir, dest_dir)
|
||||
|
||||
# Write processed content
|
||||
with open(dest_file, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
logging.info(f"Processed: {rel_path}")
|
||||
files_to_keep.add(dest_file)
|
||||
else:
|
||||
# Remove if publish is false and file exists
|
||||
if dest_file.exists():
|
||||
dest_file.unlink()
|
||||
logging.info(f"Removed: {rel_path}")
|
||||
|
||||
# Clean up old files in destination that weren't in source
|
||||
for dest_file in dest_dir.rglob("*.md"):
|
||||
if dest_file not in files_to_keep:
|
||||
dest_file.unlink()
|
||||
logging.info(f"Removed old file: {dest_file.relative_to(dest_dir)}")
|
||||
|
||||
# Clean up unused images
|
||||
used_images = set()
|
||||
for dest_file in dest_dir.rglob("*.md"):
|
||||
with open(dest_file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
# Extract image filenames from markdown links
|
||||
used_images.update(re.findall(r'!\[.*?\]\(/images/(.*?)\)', content))
|
||||
|
||||
# Remove unused images
|
||||
for image_file in images_dir.iterdir():
|
||||
if image_file.name not in used_images:
|
||||
image_file.unlink()
|
||||
logging.info(f"Removed unused image: {image_file.name}")
|
||||
|
||||
def main():
|
||||
"""Main function."""
|
||||
# Parse command line arguments
|
||||
clean = len(sys.argv) > 1 and sys.argv[1].lower() == 'clean'
|
||||
|
||||
try:
|
||||
# Load environment variables
|
||||
source_dir, dest_dir = load_env_vars()
|
||||
|
||||
# Validate directories
|
||||
if not source_dir.exists():
|
||||
logging.error(f"Source directory does not exist: {source_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
# Create destination directory if it doesn't exist
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Sync files
|
||||
sync_files(source_dir, dest_dir, clean)
|
||||
|
||||
logging.info("Sync completed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"An error occurred: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,141 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Check if both SOURCE_DIR and DEST_DIR are provided as arguments
|
||||
if [ "$#" -lt 2 ]; then
|
||||
echo "Usage: $0 <SOURCE_DIR> <DEST_DIR> [clean=True/False]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Capture SOURCE_DIR and DEST_DIR from arguments
|
||||
SOURCE_DIR="$1"
|
||||
DEST_DIR="$2"
|
||||
CLEAN_FLAG="${3:-False}" # Default value is False if not provided
|
||||
IMAGE_DIR="$DEST_DIR/images"
|
||||
|
||||
# Ensure destination directories exist
|
||||
mkdir -p "$DEST_DIR"
|
||||
|
||||
# Function to clean DEST_DIR
|
||||
clean_destination() {
|
||||
echo "Cleaning $DEST_DIR"
|
||||
rm -rf "$DEST_DIR"/*
|
||||
}
|
||||
|
||||
# Check if CLEAN_FLAG is set to True
|
||||
if [ "$CLEAN_FLAG" == "True" ]; then
|
||||
clean_destination
|
||||
fi
|
||||
|
||||
mkdir -p "$IMAGE_DIR"
|
||||
|
||||
# Temporary log for copied files
|
||||
LOG_FILE=/tmp/copied_files.log
|
||||
|
||||
# Clear the log file
|
||||
> "$LOG_FILE"
|
||||
|
||||
# Find Markdown files with "publish: true" and process them
|
||||
find -L "$SOURCE_DIR" -type f -name "*.md" -exec grep -q "publish: true" {} \; -exec grep -l "publish: true" {} + | while read -r markdown_file; do
|
||||
# Extract relative path from the source directory
|
||||
relative_path="${markdown_file#$SOURCE_DIR/}"
|
||||
|
||||
# Destination path
|
||||
dest_path="$DEST_DIR/$relative_path"
|
||||
|
||||
# Ensure the directory exists before copying the Markdown file
|
||||
dest_dir=$(dirname "$dest_path")
|
||||
mkdir -p "$dest_dir"
|
||||
|
||||
# Check if the destination file already exists
|
||||
if [ -f "$dest_path" ]; then
|
||||
# Compare the source and destination files
|
||||
if cmp -s "$markdown_file" "$dest_path"; then
|
||||
echo "Skipped $markdown_file as it already exists and has the same content in $dest_path" >> "$LOG_FILE"
|
||||
else
|
||||
# Replace the destination file with the source file
|
||||
cp -p "$markdown_file" "$dest_path"
|
||||
echo "Replaced $dest_path with $markdown_file" >> "$LOG_FILE"
|
||||
|
||||
# Extract and copy referenced images
|
||||
grep -o '\!\[\[.*\]\]' "$markdown_file" | sed 's/\!\[\[\(.*\)\]\]/\1/' | while IFS= read -r image_ref; do
|
||||
# Check if image_ref is a relative path
|
||||
if [[ ! "$image_ref" =~ ^/ ]]; then
|
||||
image_ref="$SOURCE_DIR/$image_ref"
|
||||
fi
|
||||
|
||||
# Try to find the image recursively within SOURCE_DIR
|
||||
image_path=$(find -L "$SOURCE_DIR" -type f -name "$(basename "$image_ref")" -print -quit)
|
||||
|
||||
# Debugging: Print image_path
|
||||
echo "Checking image_path: $image_path" >> "$LOG_FILE"
|
||||
|
||||
if [ -f "$image_path" ]; then
|
||||
# Check if the image already exists in IMAGE_DIR
|
||||
if [ ! -f "$IMAGE_DIR/$(basename "$image_ref")" ]; then
|
||||
cp "$image_path" "$IMAGE_DIR/"
|
||||
echo "Copied image $image_ref to $IMAGE_DIR/" >> "$LOG_FILE"
|
||||
else
|
||||
echo "Skipped copying image $image_ref as it already exists in $IMAGE_DIR/" >> "$LOG_FILE"
|
||||
fi
|
||||
else
|
||||
echo "Image reference $image_ref in $markdown_file does not exist." >> "$LOG_FILE"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
else
|
||||
# Copy the Markdown file since it doesn't exist in the destination
|
||||
cp -p "$markdown_file" "$dest_path"
|
||||
echo "Copied $markdown_file to $dest_path" >> "$LOG_FILE"
|
||||
|
||||
# Extract and copy referenced images
|
||||
grep -o '\!\[\[.*\]\]' "$markdown_file" | sed 's/\!\[\[\(.*\)\]\]/\1/' | while IFS= read -r image_ref; do
|
||||
# Check if image_ref is a relative path
|
||||
if [[ ! "$image_ref" =~ ^/ ]]; then
|
||||
image_ref="$SOURCE_DIR/$image_ref"
|
||||
fi
|
||||
|
||||
# Try to find the image recursively within SOURCE_DIR
|
||||
image_path=$(find -L "$SOURCE_DIR" -type f -name "$(basename "$image_ref")" -print -quit)
|
||||
|
||||
# Debugging: Print image_path
|
||||
echo "Checking image_path: $image_path" >> "$LOG_FILE"
|
||||
|
||||
if [ -f "$image_path" ]; then
|
||||
# Check if the image already exists in IMAGE_DIR
|
||||
if [ ! -f "$IMAGE_DIR/$(basename "$image_ref")" ]; then
|
||||
cp "$image_path" "$IMAGE_DIR/"
|
||||
echo "Copied image $image_ref to $IMAGE_DIR/" >> "$LOG_FILE"
|
||||
else
|
||||
echo "Skipped copying image $image_ref as it already exists in $IMAGE_DIR/" >> "$LOG_FILE"
|
||||
fi
|
||||
else
|
||||
echo "Image reference $image_ref in $markdown_file does not exist." >> "$LOG_FILE"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
# Find Markdown files with "publish: false" and process them
|
||||
find -L "$SOURCE_DIR" -type f -name "*.md" -exec grep -q "publish: false" {} \; -exec grep -l "publish: false" {} + | while read -r markdown_file; do
|
||||
# Extract relative path from the source directory
|
||||
relative_path="${markdown_file#$SOURCE_DIR/}"
|
||||
|
||||
# Destination path
|
||||
dest_path="$DEST_DIR/$relative_path"
|
||||
|
||||
# Check if the destination file exists
|
||||
if [ -f "$dest_path" ]; then
|
||||
# Remove the file from DEST_DIR
|
||||
rm "$dest_path"
|
||||
echo "Removed $markdown_file from $DEST_DIR" >> "$LOG_FILE"
|
||||
else
|
||||
echo "File $markdown_file not found in $DEST_DIR" >> "$LOG_FILE"
|
||||
fi
|
||||
done
|
||||
|
||||
# Print the log file
|
||||
cat "$LOG_FILE"
|
||||
|
||||
# Debugging: Indicate completion
|
||||
echo "Script execution completed."
|
Loading…
Reference in New Issue