berlin-picnic-api/scripts/inspect_street_trees.py

90 lines
3.8 KiB
Python

#!/usr/bin/env python3
"""
Inspect the street trees JSON file structure without loading the entire file.
"""
import json
import sys
def inspect_street_trees():
"""Inspect the street trees JSON file structure."""
file_path = "app/data/processed/street_trees.json"
try:
with open(file_path, 'r', encoding='utf-8') as f:
# Read just the beginning to get metadata
content = f.read(2000) # Read first 2KB
# Find the metadata section
if '"street_trees":' in content:
# Extract metadata before the trees array
metadata_end = content.find('"street_trees":')
metadata_part = content[:metadata_end]
# Try to parse what we can
print("File structure inspection:")
print(f"File size: ~414MB")
# Look for key metadata fields
if '"count":' in content:
count_start = content.find('"count":') + 8
count_end = content.find(',', count_start)
if count_end == -1:
count_end = content.find('}', count_start)
count_str = content[count_start:count_end].strip()
print(f"Tree count: {count_str}")
if '"processed_count":' in content:
proc_start = content.find('"processed_count":') + 18
proc_end = content.find(',', proc_start)
proc_str = content[proc_start:proc_end].strip()
print(f"Processed count: {proc_str}")
if '"skipped_count":' in content:
skip_start = content.find('"skipped_count":') + 16
skip_end = content.find(',', skip_start)
skip_str = content[skip_start:skip_end].strip()
print(f"Skipped count: {skip_str}")
# Now let's find the first tree to see the structure
trees_start = content.find('"street_trees": [')
if trees_start != -1:
# Read a bit more to get the first tree
with open(file_path, 'r', encoding='utf-8') as f:
f.seek(trees_start + 17) # Skip to after the array start
tree_content = f.read(1000) # Read 1KB to get first tree
# Find the first complete tree object
first_brace = tree_content.find('{')
if first_brace != -1:
brace_count = 0
end_pos = first_brace
for i, char in enumerate(tree_content[first_brace:], first_brace):
if char == '{':
brace_count += 1
elif char == '}':
brace_count -= 1
if brace_count == 0:
end_pos = i + 1
break
first_tree_str = tree_content[first_brace:end_pos]
try:
first_tree = json.loads(first_tree_str)
print("\nFirst tree structure:")
for key, value in first_tree.items():
print(f" {key}: {type(value).__name__} = {value}")
except json.JSONDecodeError:
print("\nCould not parse first tree, but file exists and has data")
print("\nFile appears to be processed successfully!")
return True
except Exception as e:
print(f"Error inspecting file: {e}")
return False
if __name__ == "__main__":
inspect_street_trees()