berlin-picnic-api/scripts/inspect_street_trees.py

#!/usr/bin/env python3
"""
Inspect the street trees JSON file structure without loading the entire file.
"""

import json
import sys

def inspect_street_trees():
    """Inspect the street trees JSON file structure."""

    file_path = "app/data/processed/street_trees.json"

    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            # Read just the beginning to get metadata
            content = f.read(2000)  # Read first 2KB

        # Find the metadata section
        if '"street_trees":' in content:
            # Extract metadata before the trees array
            metadata_end = content.find('"street_trees":')
            metadata_part = content[:metadata_end]

            # Try to parse what we can
            print("File structure inspection:")
            print(f"File size: ~414MB")

            # Look for key metadata fields
            if '"count":' in content:
                count_start = content.find('"count":') + 8
                count_end = content.find(',', count_start)
                if count_end == -1:
                    count_end = content.find('}', count_start)
                count_str = content[count_start:count_end].strip()
                print(f"Tree count: {count_str}")

            if '"processed_count":' in content:
                proc_start = content.find('"processed_count":') + 18
                proc_end = content.find(',', proc_start)
                proc_str = content[proc_start:proc_end].strip()
                print(f"Processed count: {proc_str}")

            if '"skipped_count":' in content:
                skip_start = content.find('"skipped_count":') + 16
                skip_end = content.find(',', skip_start)
                skip_str = content[skip_start:skip_end].strip()
                print(f"Skipped count: {skip_str}")

            # Now let's find the first tree to see the structure
            trees_start = content.find('"street_trees": [')
            if trees_start != -1:
                # Read a bit more to get the first tree
                with open(file_path, 'r', encoding='utf-8') as f:
                    f.seek(trees_start + 17)  # Skip to after the array start
                    tree_content = f.read(1000)  # Read 1KB to get first tree

                    # Find the first complete tree object
                    first_brace = tree_content.find('{')
                    if first_brace != -1:
                        brace_count = 0
                        end_pos = first_brace
                        for i, char in enumerate(tree_content[first_brace:], first_brace):
                            if char == '{':
                                brace_count += 1
                            elif char == '}':
                                brace_count -= 1
                                if brace_count == 0:
                                    end_pos = i + 1
                                    break

                        first_tree_str = tree_content[first_brace:end_pos]
                        try:
                            first_tree = json.loads(first_tree_str)
                            print("\nFirst tree structure:")
                            for key, value in first_tree.items():
                                print(f"  {key}: {type(value).__name__} = {value}")
                        except json.JSONDecodeError:
                            print("\nCould not parse first tree, but file exists and has data")

            print("\nFile appears to be processed successfully!")
            return True

    except Exception as e:
        print(f"Error inspecting file: {e}")
        return False

if __name__ == "__main__":
    inspect_street_trees()