Add real green space data and neighborhood filtering
This commit is contained in:
parent
c14f5ead38
commit
49e3d8c29d
File diff suppressed because it is too large
Load Diff
|
@ -185,3 +185,142 @@ async def get_current_conditions(
|
|||
return conditions
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get conditions: {str(e)}")
|
||||
|
||||
@router.get("/all")
|
||||
async def get_all_green_spaces(
|
||||
personality: Optional[PersonalityType] = Query(None, description="Personality type for scoring"),
|
||||
min_score: int = Query(0, ge=0, le=100, description="Minimum personality score (only applies if personality is provided)"),
|
||||
limit: int = Query(50, ge=1, le=200, description="Maximum results"),
|
||||
):
|
||||
"""
|
||||
Get all available green spaces in Berlin.
|
||||
|
||||
Optionally score them for a specific personality type.
|
||||
Perfect for frontend dropdowns or full dataset access.
|
||||
"""
|
||||
try:
|
||||
# Get all green spaces
|
||||
all_spaces = await berlin_data.search_green_spaces()
|
||||
|
||||
# If personality is specified, score and filter
|
||||
if personality:
|
||||
scored_spaces = []
|
||||
for space in all_spaces:
|
||||
personality_score = await green_space_service.scoring_engine.score_green_space(
|
||||
space, personality.value
|
||||
)
|
||||
|
||||
if personality_score.score >= min_score:
|
||||
space.current_personality_score = personality_score
|
||||
scored_spaces.append(space)
|
||||
|
||||
# Sort by score (highest first)
|
||||
scored_spaces.sort(
|
||||
key=lambda x: x.current_personality_score.score if x.current_personality_score else 0,
|
||||
reverse=True
|
||||
)
|
||||
all_spaces = scored_spaces
|
||||
|
||||
# Apply limit
|
||||
limited_spaces = all_spaces[:limit]
|
||||
|
||||
return {
|
||||
"green_spaces": limited_spaces,
|
||||
"total_available": len(all_spaces),
|
||||
"returned_count": len(limited_spaces),
|
||||
"personality": personality.value if personality else None,
|
||||
"min_score_applied": min_score if personality else None
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get green spaces: {str(e)}")
|
||||
|
||||
@router.get("/recommendations/{personality}")
|
||||
async def get_personality_recommendations(
|
||||
personality: PersonalityType,
|
||||
limit: int = Query(20, ge=1, le=50, description="Number of recommendations"),
|
||||
neighborhood: Optional[str] = Query(None, description="Preferred neighborhood"),
|
||||
min_score: int = Query(70, ge=50, le=100, description="Minimum personality score"),
|
||||
):
|
||||
"""
|
||||
Get personalized green space recommendations.
|
||||
|
||||
Returns the best green spaces for a specific personality type,
|
||||
with explanations of why each space is recommended.
|
||||
"""
|
||||
try:
|
||||
# Get all green spaces
|
||||
all_spaces = await berlin_data.search_green_spaces(neighborhood=neighborhood)
|
||||
|
||||
# Score and rank for personality
|
||||
recommendations = []
|
||||
for space in all_spaces:
|
||||
personality_score = await green_space_service.scoring_engine.score_green_space(
|
||||
space, personality.value
|
||||
)
|
||||
|
||||
if personality_score.score >= min_score:
|
||||
space.current_personality_score = personality_score
|
||||
|
||||
# Get additional insights
|
||||
best_features = []
|
||||
if space.environmental.tree_coverage_percent > 70:
|
||||
best_features.append("Excellent tree coverage")
|
||||
if space.environmental.water_features:
|
||||
best_features.append("Water features")
|
||||
if space.recreation.playground_quality > 60:
|
||||
best_features.append("Good playground facilities")
|
||||
if space.recreation.sports_facilities:
|
||||
best_features.append("Sports facilities")
|
||||
if space.environmental.noise_level.value <= 2:
|
||||
best_features.append("Peaceful atmosphere")
|
||||
|
||||
recommendation = {
|
||||
"green_space": space,
|
||||
"score": personality_score.score,
|
||||
"explanation": personality_score.explanation,
|
||||
"best_features": best_features[:3], # Top 3 features
|
||||
"visit_recommendation": _get_visit_recommendation(space, personality.value)
|
||||
}
|
||||
recommendations.append(recommendation)
|
||||
|
||||
# Sort by score
|
||||
recommendations.sort(key=lambda x: x["score"], reverse=True)
|
||||
|
||||
return {
|
||||
"recommendations": recommendations[:limit],
|
||||
"personality": personality.value,
|
||||
"total_matches": len(recommendations),
|
||||
"search_filters": {
|
||||
"neighborhood": neighborhood,
|
||||
"min_score": min_score
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get recommendations: {str(e)}")
|
||||
|
||||
def _get_visit_recommendation(space, personality: str) -> str:
|
||||
"""Get a personalized visit recommendation"""
|
||||
if personality == "little_adventurers":
|
||||
if space.recreation.playground_quality > 60:
|
||||
return "Perfect for family adventures with great playground facilities"
|
||||
return "Great for exploring with kids"
|
||||
elif personality == "date_night":
|
||||
if space.environmental.noise_level.value <= 2:
|
||||
return "Romantic and peaceful setting for couples"
|
||||
return "Nice atmosphere for a romantic stroll"
|
||||
elif personality == "zen_masters":
|
||||
if space.environmental.tree_coverage_percent > 70:
|
||||
return "Ideal for peaceful meditation under the trees"
|
||||
return "Perfect for quiet contemplation"
|
||||
elif personality == "active_lifestyle":
|
||||
if space.recreation.sports_facilities:
|
||||
return "Great for workouts and active recreation"
|
||||
return "Perfect for running and outdoor activities"
|
||||
elif personality == "wildlife_lover":
|
||||
if space.environmental.wildlife_diversity_score > 70:
|
||||
return "Excellent biodiversity for nature observation"
|
||||
return "Good spot for wildlife watching"
|
||||
else:
|
||||
return "Highly recommended for your personality type"
|
||||
|
|
|
@ -20,6 +20,7 @@ class BerlinDataService:
|
|||
self.cache = {}
|
||||
self.last_refresh = None
|
||||
self._toilets_cache = None
|
||||
self._green_spaces_cache = None
|
||||
self._street_trees_index = None
|
||||
self.data_dir = Path("app/data")
|
||||
self.street_tree_service = StreetTreeService()
|
||||
|
@ -46,9 +47,16 @@ class BerlinDataService:
|
|||
if distance > radius:
|
||||
continue
|
||||
|
||||
# Apply neighborhood filter
|
||||
if neighborhood and space.neighborhood.lower() != neighborhood.lower():
|
||||
continue
|
||||
# Apply neighborhood filter with flexible matching
|
||||
if neighborhood:
|
||||
neighborhood_lower = neighborhood.lower()
|
||||
space_neighborhood_lower = space.neighborhood.lower()
|
||||
|
||||
# Check for exact match or partial match (useful for compound neighborhood names)
|
||||
if (neighborhood_lower != space_neighborhood_lower and
|
||||
neighborhood_lower not in space_neighborhood_lower and
|
||||
space_neighborhood_lower not in neighborhood_lower):
|
||||
continue
|
||||
|
||||
# Apply other filters
|
||||
if filters:
|
||||
|
@ -214,43 +222,60 @@ class BerlinDataService:
|
|||
|
||||
async def get_neighborhood_stats(self) -> Dict[str, Any]:
|
||||
"""Get statistics for Berlin neighborhoods."""
|
||||
return {
|
||||
"neighborhoods": [
|
||||
{
|
||||
"name": "mitte",
|
||||
"display_name": "Mitte",
|
||||
"green_space_count": 15,
|
||||
"avg_personality_scores": {
|
||||
"little_adventurers": 75,
|
||||
"date_night": 80,
|
||||
"squad_goals": 70,
|
||||
"zen_masters": 65
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "kreuzberg",
|
||||
"display_name": "Kreuzberg",
|
||||
"green_space_count": 12,
|
||||
"avg_personality_scores": {
|
||||
"little_adventurers": 70,
|
||||
"date_night": 75,
|
||||
"squad_goals": 85,
|
||||
"zen_masters": 60
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "prenzlauer_berg",
|
||||
"display_name": "Prenzlauer Berg",
|
||||
"green_space_count": 18,
|
||||
"avg_personality_scores": {
|
||||
"little_adventurers": 90,
|
||||
"date_night": 70,
|
||||
"squad_goals": 75,
|
||||
"zen_masters": 70
|
||||
}
|
||||
# Get all green spaces to calculate real neighborhood stats
|
||||
green_spaces = await self._get_mock_green_spaces()
|
||||
|
||||
# Count green spaces per neighborhood
|
||||
neighborhood_counts = {}
|
||||
neighborhood_spaces = {}
|
||||
|
||||
for space in green_spaces:
|
||||
neighborhood = space.neighborhood
|
||||
if neighborhood not in neighborhood_counts:
|
||||
neighborhood_counts[neighborhood] = 0
|
||||
neighborhood_spaces[neighborhood] = []
|
||||
neighborhood_counts[neighborhood] += 1
|
||||
neighborhood_spaces[neighborhood].append(space)
|
||||
|
||||
# Generate neighborhood stats
|
||||
neighborhoods = []
|
||||
for neighborhood, count in neighborhood_counts.items():
|
||||
# Calculate average personality scores based on green space features
|
||||
spaces = neighborhood_spaces[neighborhood]
|
||||
|
||||
# Calculate scores based on actual features
|
||||
total_tree_coverage = sum(s.environmental.tree_coverage_percent for s in spaces)
|
||||
total_playgrounds = sum(s.recreation.playground_quality for s in spaces)
|
||||
total_water_features = sum(1 for s in spaces if s.environmental.water_features)
|
||||
total_sports = sum(1 for s in spaces if s.recreation.sports_facilities)
|
||||
|
||||
avg_tree_coverage = total_tree_coverage / count if count > 0 else 0
|
||||
avg_playground = total_playgrounds / count if count > 0 else 0
|
||||
water_ratio = total_water_features / count if count > 0 else 0
|
||||
sports_ratio = total_sports / count if count > 0 else 0
|
||||
|
||||
# Calculate personality scores based on features
|
||||
little_adventurers = min(100, int(avg_playground * 0.8 + sports_ratio * 30 + 40))
|
||||
date_night = min(100, int(avg_tree_coverage * 0.6 + water_ratio * 25 + 45))
|
||||
squad_goals = min(100, int(sports_ratio * 40 + avg_tree_coverage * 0.4 + 35))
|
||||
zen_masters = min(100, int(avg_tree_coverage * 0.7 + water_ratio * 20 + 30))
|
||||
|
||||
neighborhoods.append({
|
||||
"name": neighborhood.lower().replace(' ', '_').replace('-', '_'),
|
||||
"display_name": neighborhood,
|
||||
"green_space_count": count,
|
||||
"avg_personality_scores": {
|
||||
"little_adventurers": little_adventurers,
|
||||
"date_night": date_night,
|
||||
"squad_goals": squad_goals,
|
||||
"zen_masters": zen_masters
|
||||
}
|
||||
]
|
||||
}
|
||||
})
|
||||
|
||||
# Sort by green space count (most spaces first)
|
||||
neighborhoods.sort(key=lambda x: x["green_space_count"], reverse=True)
|
||||
|
||||
return {"neighborhoods": neighborhoods}
|
||||
|
||||
async def get_current_conditions(self, lat: float, lng: float) -> Dict[str, Any]:
|
||||
"""Get current conditions at a location."""
|
||||
|
@ -394,122 +419,76 @@ class BerlinDataService:
|
|||
# Return original space if enhancement fails
|
||||
return green_space
|
||||
|
||||
def _load_green_spaces(self) -> List[Dict]:
|
||||
"""Load green spaces data from JSON file"""
|
||||
if self._green_spaces_cache is None:
|
||||
green_spaces_file = self.data_dir / "processed" / "quick_berlin_green_spaces.json"
|
||||
if green_spaces_file.exists():
|
||||
with open(green_spaces_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
self._green_spaces_cache = data.get("green_spaces", [])
|
||||
else:
|
||||
print("Warning: quick_berlin_green_spaces.json not found.")
|
||||
self._green_spaces_cache = []
|
||||
return self._green_spaces_cache
|
||||
|
||||
def _convert_json_to_green_space(self, json_data: Dict) -> GreenSpace:
|
||||
"""Convert JSON data to GreenSpace model"""
|
||||
from datetime import datetime
|
||||
|
||||
return GreenSpace(
|
||||
id=json_data.get("id", ""),
|
||||
name=json_data.get("name", ""),
|
||||
description=json_data.get("description", ""),
|
||||
type=GreenSpaceType.PARK, # Default to PARK, could be enhanced later
|
||||
coordinates=Coordinates(
|
||||
lat=json_data.get("coordinates", {}).get("lat", 0.0),
|
||||
lng=json_data.get("coordinates", {}).get("lng", 0.0)
|
||||
),
|
||||
neighborhood=json_data.get("neighborhood", "Berlin"),
|
||||
address=f"{json_data.get('name', 'Unknown')}, Berlin",
|
||||
area_sqm=json_data.get("area_sqm", 0),
|
||||
perimeter_m=json_data.get("perimeter_m", None),
|
||||
environmental=EnvironmentalFeatures(
|
||||
tree_coverage_percent=json_data.get("environmental", {}).get("tree_coverage_percent", 0),
|
||||
shade_quality=json_data.get("environmental", {}).get("shade_quality", 0),
|
||||
noise_level=NoiseLevel(json_data.get("environmental", {}).get("noise_level", 1)),
|
||||
wildlife_diversity_score=json_data.get("environmental", {}).get("wildlife_diversity_score", 0),
|
||||
water_features=json_data.get("environmental", {}).get("water_features", False),
|
||||
natural_surface_percent=json_data.get("environmental", {}).get("natural_surface_percent", 0)
|
||||
),
|
||||
accessibility=AccessibilityFeatures(
|
||||
wheelchair_accessible=json_data.get("accessibility", {}).get("wheelchair_accessible", True),
|
||||
public_transport_score=json_data.get("accessibility", {}).get("public_transport_score", 3),
|
||||
cycling_infrastructure=json_data.get("accessibility", {}).get("cycling_infrastructure", True),
|
||||
parking_availability=json_data.get("accessibility", {}).get("parking_availability", 2),
|
||||
lighting_quality=json_data.get("accessibility", {}).get("lighting_quality", 3)
|
||||
),
|
||||
recreation=RecreationFeatures(
|
||||
playground_quality=json_data.get("recreation", {}).get("playground_quality", 0),
|
||||
sports_facilities=json_data.get("recreation", {}).get("sports_facilities", False),
|
||||
running_paths=json_data.get("recreation", {}).get("running_paths", True),
|
||||
cycling_paths=json_data.get("recreation", {}).get("cycling_paths", True),
|
||||
dog_friendly=json_data.get("recreation", {}).get("dog_friendly", True),
|
||||
bbq_allowed=json_data.get("recreation", {}).get("bbq_allowed", False)
|
||||
),
|
||||
nearby_amenities=[],
|
||||
last_updated=datetime.fromisoformat(json_data.get("last_updated", datetime.now().isoformat())),
|
||||
data_sources=json_data.get("data_sources", []),
|
||||
confidence_score=json_data.get("confidence_score", 85)
|
||||
)
|
||||
|
||||
async def _get_mock_green_spaces(self) -> List[GreenSpace]:
|
||||
"""Get mock green spaces data for development."""
|
||||
# This would be replaced with real data fetching in production
|
||||
return [
|
||||
GreenSpace(
|
||||
id="tiergarten_1",
|
||||
name="Tiergarten",
|
||||
description="Berlin's most famous park in the heart of the city",
|
||||
type=GreenSpaceType.PARK,
|
||||
coordinates=Coordinates(lat=52.5145, lng=13.3501),
|
||||
neighborhood="Mitte",
|
||||
address="Tiergarten, 10557 Berlin",
|
||||
area_sqm=210000,
|
||||
perimeter_m=5800,
|
||||
environmental=EnvironmentalFeatures(
|
||||
tree_coverage_percent=85,
|
||||
shade_quality=90,
|
||||
noise_level=NoiseLevel.MODERATE,
|
||||
wildlife_diversity_score=80,
|
||||
water_features=True,
|
||||
natural_surface_percent=95
|
||||
),
|
||||
accessibility=AccessibilityFeatures(
|
||||
wheelchair_accessible=True,
|
||||
public_transport_score=5,
|
||||
cycling_infrastructure=True,
|
||||
parking_availability=3,
|
||||
lighting_quality=4
|
||||
),
|
||||
recreation=RecreationFeatures(
|
||||
playground_quality=70,
|
||||
sports_facilities=True,
|
||||
running_paths=True,
|
||||
cycling_paths=True,
|
||||
dog_friendly=True,
|
||||
bbq_allowed=False
|
||||
),
|
||||
nearby_amenities=[],
|
||||
last_updated=datetime.now(),
|
||||
data_sources=["berlin_open_data", "osm"],
|
||||
confidence_score=95
|
||||
),
|
||||
GreenSpace(
|
||||
id="volkspark_friedrichshain",
|
||||
name="Volkspark Friedrichshain",
|
||||
description="Historic park with fairy tale fountain and sports facilities",
|
||||
type=GreenSpaceType.PARK,
|
||||
coordinates=Coordinates(lat=52.5263, lng=13.4317),
|
||||
neighborhood="Friedrichshain",
|
||||
address="Friedrichshain, 10249 Berlin",
|
||||
area_sqm=49000,
|
||||
perimeter_m=2800,
|
||||
environmental=EnvironmentalFeatures(
|
||||
tree_coverage_percent=70,
|
||||
shade_quality=75,
|
||||
noise_level=NoiseLevel.QUIET,
|
||||
wildlife_diversity_score=65,
|
||||
water_features=True,
|
||||
natural_surface_percent=80
|
||||
),
|
||||
accessibility=AccessibilityFeatures(
|
||||
wheelchair_accessible=True,
|
||||
public_transport_score=4,
|
||||
cycling_infrastructure=True,
|
||||
parking_availability=2,
|
||||
lighting_quality=3
|
||||
),
|
||||
recreation=RecreationFeatures(
|
||||
playground_quality=85,
|
||||
sports_facilities=True,
|
||||
running_paths=True,
|
||||
cycling_paths=True,
|
||||
dog_friendly=True,
|
||||
bbq_allowed=True
|
||||
),
|
||||
nearby_amenities=[],
|
||||
last_updated=datetime.now(),
|
||||
data_sources=["berlin_open_data", "osm"],
|
||||
confidence_score=90
|
||||
),
|
||||
GreenSpace(
|
||||
id="tempelhofer_feld",
|
||||
name="Tempelhofer Feld",
|
||||
description="Former airport turned into unique urban park",
|
||||
type=GreenSpaceType.PARK,
|
||||
coordinates=Coordinates(lat=52.4732, lng=13.4015),
|
||||
neighborhood="Tempelhof",
|
||||
address="Tempelhofer Damm, 12101 Berlin",
|
||||
area_sqm=300000,
|
||||
perimeter_m=6200,
|
||||
environmental=EnvironmentalFeatures(
|
||||
tree_coverage_percent=15,
|
||||
shade_quality=20,
|
||||
noise_level=NoiseLevel.MODERATE,
|
||||
wildlife_diversity_score=40,
|
||||
water_features=False,
|
||||
natural_surface_percent=60
|
||||
),
|
||||
accessibility=AccessibilityFeatures(
|
||||
wheelchair_accessible=True,
|
||||
public_transport_score=4,
|
||||
cycling_infrastructure=True,
|
||||
parking_availability=4,
|
||||
lighting_quality=2
|
||||
),
|
||||
recreation=RecreationFeatures(
|
||||
playground_quality=30,
|
||||
sports_facilities=False,
|
||||
running_paths=True,
|
||||
cycling_paths=True,
|
||||
dog_friendly=True,
|
||||
bbq_allowed=True
|
||||
),
|
||||
nearby_amenities=[],
|
||||
last_updated=datetime.now(),
|
||||
data_sources=["berlin_open_data", "osm"],
|
||||
confidence_score=85
|
||||
)
|
||||
]
|
||||
"""Get green spaces data from JSON file."""
|
||||
json_data = self._load_green_spaces()
|
||||
green_spaces = []
|
||||
|
||||
for space_data in json_data:
|
||||
try:
|
||||
green_space = self._convert_json_to_green_space(space_data)
|
||||
green_spaces.append(green_space)
|
||||
except Exception as e:
|
||||
print(f"Error converting green space {space_data.get('id', 'unknown')}: {e}")
|
||||
continue
|
||||
|
||||
return green_spaces
|
||||
|
|
|
@ -4,6 +4,10 @@ from pathlib import Path
|
|||
from typing import List, Optional, Tuple, Dict, Any
|
||||
from datetime import datetime
|
||||
from geopy.distance import geodesic
|
||||
from rtree import index
|
||||
import asyncio
|
||||
import aiofiles
|
||||
from functools import lru_cache
|
||||
|
||||
from app.models.street_tree import (
|
||||
StreetTree, TreeDensityMetrics, TreeShadeAnalysis, TreesSearchFilters,
|
||||
|
@ -14,24 +18,58 @@ from app.models.green_space import Coordinates
|
|||
class StreetTreeService:
|
||||
"""Service for accessing and analyzing Berlin street trees data."""
|
||||
|
||||
def __init__(self):
|
||||
self._trees_cache = None
|
||||
self._trees_index = None
|
||||
self.data_dir = Path("app/data")
|
||||
_instance = None
|
||||
_initialized = False
|
||||
|
||||
def _load_trees(self) -> List[Dict]:
|
||||
"""Load street trees data from JSON file."""
|
||||
def __new__(cls):
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
if not self._initialized:
|
||||
self._trees_cache = None
|
||||
self._spatial_index = None
|
||||
self._tree_id_to_data = {}
|
||||
self.data_dir = Path("app/data")
|
||||
self.__class__._initialized = True
|
||||
|
||||
async def _load_trees(self) -> List[Dict]:
|
||||
"""Load street trees data from JSON file and build spatial index."""
|
||||
if self._trees_cache is None:
|
||||
trees_file = self.data_dir / "processed" / "street_trees.json"
|
||||
if trees_file.exists():
|
||||
with open(trees_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
print("🔄 Loading trees data and building spatial index...")
|
||||
async with aiofiles.open(trees_file, 'r', encoding='utf-8') as f:
|
||||
content = await f.read()
|
||||
data = json.loads(content)
|
||||
self._trees_cache = data.get("street_trees", [])
|
||||
await self._build_spatial_index()
|
||||
print(f"✅ Loaded {len(self._trees_cache)} trees with spatial index")
|
||||
else:
|
||||
print("Warning: street_trees.json not found. Run process_street_trees.py first.")
|
||||
self._trees_cache = []
|
||||
return self._trees_cache
|
||||
|
||||
async def _build_spatial_index(self):
|
||||
"""Build R-tree spatial index for fast location queries."""
|
||||
if self._spatial_index is None and self._trees_cache:
|
||||
print("🔨 Building spatial index...")
|
||||
self._spatial_index = index.Index()
|
||||
self._tree_id_to_data = {}
|
||||
|
||||
for i, tree_data in enumerate(self._trees_cache):
|
||||
lat = tree_data.get('lat')
|
||||
lng = tree_data.get('lng')
|
||||
|
||||
if lat is not None and lng is not None:
|
||||
# R-tree expects (minx, miny, maxx, maxy)
|
||||
bbox = (lng, lat, lng, lat)
|
||||
self._spatial_index.insert(i, bbox)
|
||||
self._tree_id_to_data[i] = tree_data
|
||||
|
||||
print(f"✅ Spatial index built for {len(self._tree_id_to_data)} trees")
|
||||
|
||||
def _create_tree_from_dict(self, tree_data: Dict) -> StreetTree:
|
||||
"""Convert tree dictionary to StreetTree model."""
|
||||
|
||||
|
@ -94,6 +132,11 @@ class StreetTreeService:
|
|||
last_updated=datetime.now()
|
||||
)
|
||||
|
||||
@lru_cache(maxsize=1000)
|
||||
def _distance_cache(self, lat1: float, lng1: float, lat2: float, lng2: float) -> float:
|
||||
"""Cache distance calculations."""
|
||||
return geodesic((lat1, lng1), (lat2, lng2)).meters
|
||||
|
||||
async def get_trees_near_location(
|
||||
self,
|
||||
lat: float,
|
||||
|
@ -101,31 +144,48 @@ class StreetTreeService:
|
|||
radius_m: int = 500,
|
||||
limit: Optional[int] = None
|
||||
) -> TreesNearLocationResponse:
|
||||
"""Get street trees within a radius of a location."""
|
||||
"""Get street trees within a radius of a location using spatial index."""
|
||||
start_time = datetime.now()
|
||||
|
||||
trees_data = self._load_trees()
|
||||
await self._load_trees()
|
||||
nearby_trees = []
|
||||
|
||||
for tree_data in trees_data:
|
||||
if self._spatial_index is None:
|
||||
# Fallback to linear search if index failed
|
||||
return await self._get_trees_linear_search(lat, lng, radius_m, limit)
|
||||
|
||||
# Convert radius to approximate bounding box for R-tree query
|
||||
# Rough approximation: 1 degree ≈ 111km
|
||||
radius_deg = radius_m / 111000
|
||||
bbox = (lng - radius_deg, lat - radius_deg, lng + radius_deg, lat + radius_deg)
|
||||
|
||||
# Query spatial index for candidates
|
||||
candidate_ids = list(self._spatial_index.intersection(bbox))
|
||||
|
||||
# Filter candidates by exact distance
|
||||
tree_distances = []
|
||||
for tree_id in candidate_ids:
|
||||
tree_data = self._tree_id_to_data.get(tree_id)
|
||||
if not tree_data:
|
||||
continue
|
||||
|
||||
tree_lat = tree_data.get('lat')
|
||||
tree_lng = tree_data.get('lng')
|
||||
|
||||
if tree_lat is None or tree_lng is None:
|
||||
continue
|
||||
|
||||
distance = geodesic((lat, lng), (tree_lat, tree_lng)).meters
|
||||
distance = self._distance_cache(lat, lng, tree_lat, tree_lng)
|
||||
if distance <= radius_m:
|
||||
tree = self._create_tree_from_dict(tree_data)
|
||||
nearby_trees.append(tree)
|
||||
tree_distances.append((tree, distance))
|
||||
|
||||
if limit and len(nearby_trees) >= limit:
|
||||
if limit and len(tree_distances) >= limit:
|
||||
break
|
||||
|
||||
# Sort by distance
|
||||
nearby_trees.sort(
|
||||
key=lambda t: geodesic((lat, lng), (t.coordinates.lat, t.coordinates.lng)).meters
|
||||
)
|
||||
tree_distances.sort(key=lambda x: x[1])
|
||||
nearby_trees = [tree for tree, _ in tree_distances]
|
||||
|
||||
# Calculate metrics
|
||||
metrics = self._calculate_tree_density_metrics(nearby_trees, radius_m)
|
||||
|
@ -212,7 +272,7 @@ class StreetTreeService:
|
|||
large_trees = []
|
||||
|
||||
for tree in trees:
|
||||
distance = geodesic((lat, lng), (tree.coordinates.lat, tree.coordinates.lng)).meters
|
||||
distance = self._distance_cache(lat, lng, tree.coordinates.lat, tree.coordinates.lng)
|
||||
|
||||
if distance <= 50:
|
||||
trees_50m += 1
|
||||
|
@ -259,9 +319,58 @@ class StreetTreeService:
|
|||
canopy_density=len(large_trees) / max(1, len(trees)) if trees else 0
|
||||
)
|
||||
|
||||
async def _get_trees_linear_search(
|
||||
self,
|
||||
lat: float,
|
||||
lng: float,
|
||||
radius_m: int = 500,
|
||||
limit: Optional[int] = None
|
||||
) -> TreesNearLocationResponse:
|
||||
"""Fallback linear search method."""
|
||||
start_time = datetime.now()
|
||||
|
||||
trees_data = await self._load_trees()
|
||||
nearby_trees = []
|
||||
|
||||
for tree_data in trees_data:
|
||||
tree_lat = tree_data.get('lat')
|
||||
tree_lng = tree_data.get('lng')
|
||||
|
||||
if tree_lat is None or tree_lng is None:
|
||||
continue
|
||||
|
||||
distance = self._distance_cache(lat, lng, tree_lat, tree_lng)
|
||||
if distance <= radius_m:
|
||||
tree = self._create_tree_from_dict(tree_data)
|
||||
nearby_trees.append(tree)
|
||||
|
||||
if limit and len(nearby_trees) >= limit:
|
||||
break
|
||||
|
||||
# Sort by distance
|
||||
nearby_trees.sort(
|
||||
key=lambda t: self._distance_cache(lat, lng, t.coordinates.lat, t.coordinates.lng)
|
||||
)
|
||||
|
||||
# Calculate metrics
|
||||
metrics = self._calculate_tree_density_metrics(nearby_trees, radius_m)
|
||||
shade_analysis = self._analyze_shade_coverage(lat, lng, nearby_trees)
|
||||
|
||||
query_time = (datetime.now() - start_time).total_seconds() * 1000
|
||||
|
||||
return TreesNearLocationResponse(
|
||||
location=Coordinates(lat=lat, lng=lng),
|
||||
radius_m=radius_m,
|
||||
trees=nearby_trees,
|
||||
metrics=metrics,
|
||||
shade_analysis=shade_analysis,
|
||||
total_found=len(nearby_trees),
|
||||
query_time_ms=int(query_time)
|
||||
)
|
||||
|
||||
async def search_trees(self, filters: TreesSearchFilters) -> List[StreetTree]:
|
||||
"""Search trees with filters."""
|
||||
trees_data = self._load_trees()
|
||||
trees_data = await self._load_trees()
|
||||
filtered_trees = []
|
||||
|
||||
for tree_data in trees_data:
|
||||
|
@ -272,10 +381,10 @@ class StreetTreeService:
|
|||
if tree_lat is None or tree_lng is None:
|
||||
continue
|
||||
|
||||
distance = geodesic(
|
||||
(filters.center_lat, filters.center_lng),
|
||||
(tree_lat, tree_lng)
|
||||
).meters
|
||||
distance = self._distance_cache(
|
||||
filters.center_lat, filters.center_lng,
|
||||
tree_lat, tree_lng
|
||||
)
|
||||
if distance > filters.within_radius_m:
|
||||
continue
|
||||
|
||||
|
@ -305,7 +414,7 @@ class StreetTreeService:
|
|||
|
||||
async def get_tree_stats(self) -> Dict[str, Any]:
|
||||
"""Get overall statistics about Berlin street trees."""
|
||||
trees_data = self._load_trees()
|
||||
trees_data = await self._load_trees()
|
||||
|
||||
if not trees_data:
|
||||
return {"error": "No tree data available"}
|
||||
|
|
|
@ -35,6 +35,8 @@ dependencies = [
|
|||
"redis>=5.0.0",
|
||||
"aiofiles>=23.2.0",
|
||||
"openpyxl>=3.1.5",
|
||||
"tqdm>=4.67.1",
|
||||
"rtree>=1.4.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
|
|
@ -0,0 +1,467 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Enhanced Berlin green space processor using existing tree and toilet services.
|
||||
Downloads OSM green space boundaries and enhances them with real data using existing services.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import zipfile
|
||||
import requests
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
import geopandas as gpd
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Optional
|
||||
import sys
|
||||
|
||||
# Add the app directory to Python path to import services
|
||||
sys.path.append(str(Path(__file__).parent.parent))
|
||||
|
||||
from app.services.street_tree_service import StreetTreeService
|
||||
from app.services.berlin_data_service import BerlinDataService
|
||||
|
||||
|
||||
class RealDataGreenSpaceProcessor:
|
||||
def __init__(self, data_dir: str = "app/data"):
|
||||
self.data_dir = Path(data_dir)
|
||||
self.raw_dir = self.data_dir / "geo-raw"
|
||||
self.processed_dir = self.data_dir / "processed"
|
||||
|
||||
# Create directories
|
||||
self.raw_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.processed_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialize existing services
|
||||
self.tree_service = StreetTreeService()
|
||||
self.berlin_data = BerlinDataService()
|
||||
|
||||
def download_berlin_districts(self):
|
||||
"""Download Berlin district boundaries."""
|
||||
json_file = self.raw_dir / "bezirksgrenzen.geojson"
|
||||
|
||||
if json_file.exists():
|
||||
print(f"Berlin district data already exists: {json_file}")
|
||||
return json_file
|
||||
|
||||
link = "https://tsb-opendata.s3.eu-central-1.amazonaws.com/bezirksgrenzen/bezirksgrenzen.geojson"
|
||||
print(f"Downloading Berlin district data from {link}")
|
||||
|
||||
try:
|
||||
response = requests.get(link, timeout=30)
|
||||
response.raise_for_status()
|
||||
|
||||
with open(json_file, 'wb') as f:
|
||||
f.write(response.content)
|
||||
|
||||
print(f"Downloaded to {json_file}")
|
||||
return json_file
|
||||
except Exception as e:
|
||||
print(f"Error downloading districts: {e}")
|
||||
raise
|
||||
|
||||
def download_osm_data(self):
|
||||
"""Download Berlin OpenStreetMap data."""
|
||||
zip_file = self.raw_dir / "berlin_shapes.zip"
|
||||
shp_dir = self.raw_dir / "berlin_shapes"
|
||||
|
||||
# Check if already extracted
|
||||
required_files = ["gis_osm_landuse_a_free_1.shp", "gis_osm_natural_a_free_1.shp", "gis_osm_leisure_a_free_1.shp"]
|
||||
if all((shp_dir / f).exists() for f in required_files):
|
||||
print(f"Berlin OSM data already exists: {shp_dir}")
|
||||
return shp_dir
|
||||
|
||||
if not zip_file.exists():
|
||||
link = "https://download.geofabrik.de/europe/germany/berlin-latest-free.shp.zip"
|
||||
print(f"Downloading Berlin OSM data from {link} (this may take several minutes...)")
|
||||
|
||||
try:
|
||||
response = requests.get(link, stream=True, timeout=300) # 5 min timeout
|
||||
response.raise_for_status()
|
||||
|
||||
with open(zip_file, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
|
||||
print(f"Download completed: {zip_file}")
|
||||
except Exception as e:
|
||||
print(f"Error downloading OSM data: {e}")
|
||||
raise
|
||||
|
||||
print(f"Extracting Berlin OSM data to {shp_dir}")
|
||||
try:
|
||||
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
|
||||
zip_ref.extractall(shp_dir)
|
||||
print(f"Extracted to {shp_dir}")
|
||||
except Exception as e:
|
||||
print(f"Error extracting OSM data: {e}")
|
||||
raise
|
||||
|
||||
return shp_dir
|
||||
|
||||
def load_osm_green_spaces(self):
|
||||
"""Load OSM green space polygons."""
|
||||
print("Loading OSM green space boundaries...")
|
||||
|
||||
# Download required data
|
||||
districts_file = self.download_berlin_districts()
|
||||
shp_dir = self.download_osm_data()
|
||||
|
||||
# Load Berlin districts for clipping
|
||||
districts = gpd.read_file(districts_file)
|
||||
|
||||
# Define green space categories we want
|
||||
green_categories = {
|
||||
'landuse': ['forest', 'grass', 'meadow', 'recreation_ground', 'village_green', 'allotments'],
|
||||
'natural': ['forest', 'grass', 'meadow', 'scrub', 'heath', 'wood'],
|
||||
'leisure': ['park', 'garden', 'nature_reserve', 'playground', 'pitch', 'common', 'golf_course']
|
||||
}
|
||||
|
||||
all_green_spaces = []
|
||||
|
||||
# Process each category
|
||||
for category, subcategories in green_categories.items():
|
||||
shapefile = shp_dir / f"gis_osm_{category}_a_free_1.shp"
|
||||
|
||||
if not shapefile.exists():
|
||||
print(f"Warning: {shapefile} not found, skipping")
|
||||
continue
|
||||
|
||||
print(f"Processing {category} data...")
|
||||
try:
|
||||
gdf = gpd.read_file(shapefile)
|
||||
|
||||
# Filter to relevant subcategories
|
||||
gdf_filtered = gdf[gdf['fclass'].isin(subcategories)].copy()
|
||||
|
||||
if len(gdf_filtered) == 0:
|
||||
print(f"No {category} features found in subcategories")
|
||||
continue
|
||||
|
||||
# Clip to Berlin boundaries
|
||||
gdf_clipped = gpd.clip(gdf_filtered, districts)
|
||||
|
||||
# Calculate area and filter out very small areas (< 1000 sqm)
|
||||
gdf_clipped['area_sqm'] = gdf_clipped.geometry.area
|
||||
gdf_clipped = gdf_clipped[gdf_clipped['area_sqm'] >= 1000]
|
||||
|
||||
if len(gdf_clipped) > 0:
|
||||
all_green_spaces.append(gdf_clipped)
|
||||
print(f"Found {len(gdf_clipped)} {category} features")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing {category}: {e}")
|
||||
continue
|
||||
|
||||
if not all_green_spaces:
|
||||
raise ValueError("No green space data found")
|
||||
|
||||
# Combine all green spaces
|
||||
green_spaces = gpd.GeoDataFrame(pd.concat(all_green_spaces, ignore_index=True))
|
||||
|
||||
# Add district information
|
||||
green_spaces = gpd.sjoin(green_spaces, districts[['Bezirk', 'geometry']], how='left')
|
||||
|
||||
# Calculate centroids for analysis
|
||||
green_spaces['centroid'] = green_spaces.geometry.centroid
|
||||
green_spaces['centroid_lat'] = green_spaces.centroid.y
|
||||
green_spaces['centroid_lng'] = green_spaces.centroid.x
|
||||
|
||||
print(f"Total green spaces found: {len(green_spaces)}")
|
||||
return green_spaces
|
||||
|
||||
async def enhance_green_space_with_real_data(self, row):
|
||||
"""Enhance a single green space with real tree and toilet data."""
|
||||
try:
|
||||
lat = row['centroid_lat']
|
||||
lng = row['centroid_lng']
|
||||
area_sqm = int(row['area_sqm'])
|
||||
|
||||
# Use existing tree service to get real tree data
|
||||
tree_response = await self.tree_service.get_trees_near_location(
|
||||
lat, lng, radius_m=min(400, int((area_sqm ** 0.5) * 1.5)) # Adaptive radius
|
||||
)
|
||||
|
||||
# Use existing toilet service to get real toilet data
|
||||
nearby_toilets = await self.berlin_data.get_toilets_near_point(lat, lng, 800)
|
||||
|
||||
# Calculate toilet accessibility score
|
||||
toilet_score = self._score_toilet_accessibility(nearby_toilets)
|
||||
|
||||
# Map OSM type to our enum
|
||||
space_type = self._map_osm_to_space_type(row.get('fclass', ''))
|
||||
|
||||
# Generate ID
|
||||
space_id = f"real_{row.get('fclass', 'unknown')}_{row.name}"
|
||||
|
||||
# Create enhanced green space using real data
|
||||
enhanced_space = {
|
||||
"id": space_id,
|
||||
"name": row.get('name') or f"{row.get('fclass', 'Green Space').title()} in {row.get('Bezirk', 'Berlin')}",
|
||||
"description": f"Real Berlin {row.get('fclass', 'green space')} enhanced with tree and toilet data",
|
||||
"type": space_type,
|
||||
"coordinates": {
|
||||
"lat": float(lat),
|
||||
"lng": float(lng)
|
||||
},
|
||||
"neighborhood": row.get('Bezirk', 'Unknown'),
|
||||
"area_sqm": area_sqm,
|
||||
"perimeter_m": int(row.geometry.length) if hasattr(row.geometry, 'length') else 0,
|
||||
|
||||
# Environmental features using real tree data
|
||||
"environmental": {
|
||||
"tree_coverage_percent": max(5, int(tree_response.shade_analysis.estimated_shade_coverage)),
|
||||
"shade_quality": tree_response.shade_analysis.shade_quality_score,
|
||||
"noise_level": self._estimate_noise_level(row.get('fclass', ''), row.get('Bezirk', '')),
|
||||
"wildlife_diversity_score": tree_response.metrics.species_diversity_score,
|
||||
"water_features": 'water' in str(row.get('fclass', '')).lower() or 'river' in str(row.get('name', '')).lower(),
|
||||
"natural_surface_percent": self._estimate_natural_surface(row.get('fclass', ''))
|
||||
},
|
||||
|
||||
# Real tree metrics from existing service
|
||||
"tree_data": {
|
||||
"total_trees": tree_response.metrics.total_trees,
|
||||
"trees_per_hectare": tree_response.metrics.trees_per_hectare,
|
||||
"species_count": len(tree_response.metrics.dominant_species),
|
||||
"species_diversity_score": tree_response.metrics.species_diversity_score,
|
||||
"mature_trees_count": tree_response.metrics.mature_trees_count,
|
||||
"young_trees_count": tree_response.metrics.young_trees_count,
|
||||
"average_tree_age": tree_response.metrics.average_tree_age,
|
||||
"average_height": tree_response.metrics.average_height,
|
||||
"average_crown_diameter": tree_response.metrics.average_crown_diameter,
|
||||
"shade_coverage_percent": tree_response.metrics.shade_coverage_percent,
|
||||
"dominant_species": tree_response.metrics.dominant_species
|
||||
},
|
||||
|
||||
# Real toilet accessibility from existing service
|
||||
"toilet_accessibility": {
|
||||
"nearby_toilets_count": len(nearby_toilets),
|
||||
"accessibility_score": toilet_score,
|
||||
"nearest_distance_m": nearby_toilets[0]['distance_meters'] if nearby_toilets else None,
|
||||
"free_toilets_count": len([t for t in nearby_toilets if t.get('is_free', False)]),
|
||||
"accessible_toilets_count": len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
|
||||
},
|
||||
|
||||
# Standard accessibility features
|
||||
"accessibility": {
|
||||
"wheelchair_accessible": True,
|
||||
"public_transport_score": 3, # Could be enhanced with real transit data
|
||||
"cycling_infrastructure": area_sqm > 5000,
|
||||
"parking_availability": 2,
|
||||
"lighting_quality": 2
|
||||
},
|
||||
|
||||
# Recreation features based on OSM data and size
|
||||
"recreation": {
|
||||
"playground_quality": self._estimate_playground_quality(row.get('fclass', ''), tree_response.metrics.total_trees),
|
||||
"sports_facilities": 'pitch' in str(row.get('fclass', '')).lower() or 'sport' in str(row.get('name', '')).lower(),
|
||||
"running_paths": area_sqm > 8000,
|
||||
"cycling_paths": area_sqm > 15000,
|
||||
"dog_friendly": True,
|
||||
"bbq_allowed": row.get('fclass') in ['park', 'recreation_ground'] and area_sqm > 5000
|
||||
},
|
||||
|
||||
"last_updated": datetime.now().isoformat(),
|
||||
"data_sources": ["openstreetmap", "berlin_tree_cadastre", "berlin_toilets"],
|
||||
"confidence_score": 95
|
||||
}
|
||||
|
||||
return enhanced_space
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error enhancing green space {row.name}: {e}")
|
||||
return None
|
||||
|
||||
def _score_toilet_accessibility(self, nearby_toilets: List[Dict]) -> int:
|
||||
"""Score toilet accessibility using existing toilet data."""
|
||||
if not nearby_toilets:
|
||||
return 20
|
||||
|
||||
nearest_distance = nearby_toilets[0]['distance_meters']
|
||||
|
||||
# Distance-based scoring
|
||||
if nearest_distance <= 200:
|
||||
score = 100
|
||||
elif nearest_distance <= 400:
|
||||
score = 80
|
||||
elif nearest_distance <= 600:
|
||||
score = 60
|
||||
else:
|
||||
score = 40
|
||||
|
||||
# Bonuses for quality
|
||||
free_toilets = len([t for t in nearby_toilets if t.get('is_free', False)])
|
||||
accessible_toilets = len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
|
||||
|
||||
score += min(20, free_toilets * 5 + accessible_toilets * 3)
|
||||
|
||||
return min(100, score)
|
||||
|
||||
def _map_osm_to_space_type(self, fclass: str) -> str:
|
||||
"""Map OSM feature class to green space types."""
|
||||
mapping = {
|
||||
'park': 'PARK', 'forest': 'FOREST', 'garden': 'GARDEN',
|
||||
'nature_reserve': 'NATURE_RESERVE', 'playground': 'PLAYGROUND',
|
||||
'meadow': 'MEADOW', 'grass': 'GRASS', 'recreation_ground': 'PARK',
|
||||
'wood': 'FOREST', 'heath': 'HEATH', 'pitch': 'SPORTS_AREA',
|
||||
'golf_course': 'SPORTS_AREA', 'common': 'PARK', 'village_green': 'GRASS',
|
||||
'allotments': 'GARDEN'
|
||||
}
|
||||
return mapping.get(fclass, 'PARK')
|
||||
|
||||
def _estimate_noise_level(self, fclass: str, district: str) -> int:
|
||||
"""Estimate noise level (1=very quiet, 5=very noisy)."""
|
||||
base_noise = {
|
||||
'forest': 1, 'nature_reserve': 1, 'wood': 1,
|
||||
'meadow': 2, 'grass': 2, 'heath': 2,
|
||||
'park': 2, 'garden': 2, 'common': 2,
|
||||
'recreation_ground': 3, 'playground': 3, 'pitch': 3,
|
||||
'golf_course': 2, 'allotments': 2
|
||||
}
|
||||
|
||||
# Central districts are noisier
|
||||
central_districts = ['Mitte', 'Kreuzberg', 'Friedrichshain']
|
||||
district_modifier = 1 if district in central_districts else 0
|
||||
|
||||
return min(5, base_noise.get(fclass, 2) + district_modifier)
|
||||
|
||||
def _estimate_natural_surface(self, fclass: str) -> int:
|
||||
"""Estimate percentage of natural surface."""
|
||||
surface_map = {
|
||||
'forest': 95, 'nature_reserve': 95, 'wood': 95,
|
||||
'meadow': 95, 'grass': 90, 'heath': 90,
|
||||
'park': 75, 'garden': 65, 'common': 80,
|
||||
'recreation_ground': 60, 'playground': 40, 'pitch': 20,
|
||||
'golf_course': 70, 'allotments': 85
|
||||
}
|
||||
return surface_map.get(fclass, 70)
|
||||
|
||||
def _estimate_playground_quality(self, fclass: str, tree_count: int) -> int:
|
||||
"""Estimate playground quality score."""
|
||||
base_scores = {
|
||||
'playground': 85,
|
||||
'park': 65,
|
||||
'recreation_ground': 70,
|
||||
'garden': 40,
|
||||
'common': 50
|
||||
}
|
||||
|
||||
base = base_scores.get(fclass, 25)
|
||||
|
||||
# Trees improve playground appeal for families
|
||||
tree_bonus = min(15, tree_count // 5) # +3 per 5 trees, max 15
|
||||
|
||||
return min(100, base + tree_bonus)
|
||||
|
||||
async def process_all_green_spaces(self):
|
||||
"""Process all green spaces with real data enhancement."""
|
||||
print("Starting enhanced green space processing with real data...")
|
||||
|
||||
# Load OSM green space boundaries
|
||||
osm_green_spaces = self.load_osm_green_spaces()
|
||||
|
||||
enhanced_green_spaces = []
|
||||
|
||||
print(f"Enhancing {len(osm_green_spaces)} green spaces with real tree and toilet data...")
|
||||
|
||||
# Process in batches to avoid overwhelming the system
|
||||
batch_size = 50
|
||||
total_processed = 0
|
||||
|
||||
for i in range(0, len(osm_green_spaces), batch_size):
|
||||
batch = osm_green_spaces.iloc[i:i+batch_size]
|
||||
batch_results = []
|
||||
|
||||
for idx, row in batch.iterrows():
|
||||
result = await self.enhance_green_space_with_real_data(row)
|
||||
if result:
|
||||
batch_results.append(result)
|
||||
|
||||
total_processed += 1
|
||||
if total_processed % 25 == 0:
|
||||
print(f"Processed {total_processed}/{len(osm_green_spaces)} green spaces...")
|
||||
|
||||
enhanced_green_spaces.extend(batch_results)
|
||||
|
||||
# Small delay between batches
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
print(f"Successfully enhanced {len(enhanced_green_spaces)} green spaces with real data")
|
||||
return enhanced_green_spaces
|
||||
|
||||
def save_enhanced_data(self, enhanced_green_spaces: List[Dict]):
|
||||
"""Save enhanced green spaces to JSON file."""
|
||||
output_file = self.processed_dir / "real_berlin_green_spaces.json"
|
||||
|
||||
# Calculate summary statistics
|
||||
spaces_with_trees = len([gs for gs in enhanced_green_spaces if gs["tree_data"]["total_trees"] > 0])
|
||||
spaces_with_toilets = len([gs for gs in enhanced_green_spaces if gs["toilet_accessibility"]["nearby_toilets_count"] > 0])
|
||||
total_trees = sum(gs["tree_data"]["total_trees"] for gs in enhanced_green_spaces)
|
||||
avg_species_per_space = sum(gs["tree_data"]["species_count"] for gs in enhanced_green_spaces) / len(enhanced_green_spaces) if enhanced_green_spaces else 0
|
||||
|
||||
data = {
|
||||
"green_spaces": enhanced_green_spaces,
|
||||
"total_count": len(enhanced_green_spaces),
|
||||
"last_updated": datetime.now().isoformat(),
|
||||
"data_sources": [
|
||||
"openstreetmap_boundaries",
|
||||
"berlin_tree_cadastre_via_service",
|
||||
"berlin_toilet_locations_via_service",
|
||||
"berlin_districts"
|
||||
],
|
||||
"processing_info": {
|
||||
"script_version": "1.0",
|
||||
"coordinate_system": "WGS84",
|
||||
"uses_existing_services": True,
|
||||
"tree_analysis_via": "StreetTreeService",
|
||||
"toilet_analysis_via": "BerlinDataService"
|
||||
},
|
||||
"summary_stats": {
|
||||
"spaces_with_trees": spaces_with_trees,
|
||||
"spaces_with_nearby_toilets": spaces_with_toilets,
|
||||
"total_trees_in_all_spaces": total_trees,
|
||||
"average_species_per_space": round(avg_species_per_space, 1),
|
||||
"coverage_percentage": {
|
||||
"with_tree_data": round((spaces_with_trees / len(enhanced_green_spaces)) * 100, 1) if enhanced_green_spaces else 0,
|
||||
"with_toilet_data": round((spaces_with_toilets / len(enhanced_green_spaces)) * 100, 1) if enhanced_green_spaces else 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"✅ Saved {len(enhanced_green_spaces)} enhanced green spaces to {output_file}")
|
||||
print(f"📊 Summary:")
|
||||
print(f" - {spaces_with_trees} spaces have tree data ({round((spaces_with_trees/len(enhanced_green_spaces))*100, 1)}%)")
|
||||
print(f" - {spaces_with_toilets} spaces have nearby toilets ({round((spaces_with_toilets/len(enhanced_green_spaces))*100, 1)}%)")
|
||||
print(f" - {total_trees} total trees analyzed")
|
||||
print(f" - {avg_species_per_space:.1f} average species per space")
|
||||
|
||||
return output_file
|
||||
|
||||
|
||||
async def main():
|
||||
"""Main processing function."""
|
||||
processor = RealDataGreenSpaceProcessor()
|
||||
|
||||
try:
|
||||
# Process enhanced green spaces using existing services
|
||||
enhanced_green_spaces = await processor.process_all_green_spaces()
|
||||
|
||||
# Save enhanced data
|
||||
output_file = processor.save_enhanced_data(enhanced_green_spaces)
|
||||
|
||||
print(f"\n🎉 Successfully created real data enhanced Berlin green spaces!")
|
||||
print(f"📁 Output: {output_file}")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n⚠️ Processing interrupted by user")
|
||||
except Exception as e:
|
||||
print(f"❌ Error processing data: {e}")
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
|
@ -0,0 +1,613 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Filtered OSM processor for significant Berlin green spaces.
|
||||
Processes only meaningful green spaces (>1000 sqm) with real tree and toilet data.
|
||||
"""
|
||||
|
||||
import json
|
||||
import asyncio
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import sys
|
||||
import math
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
|
||||
# Add the app directory to Python path
|
||||
sys.path.append(str(Path(__file__).parent.parent))
|
||||
|
||||
from app.services.street_tree_service import StreetTreeService
|
||||
from app.services.berlin_data_service import BerlinDataService
|
||||
|
||||
|
||||
class FilteredOSMProcessor:
|
||||
def __init__(self, data_dir: str = "app/data"):
|
||||
self.data_dir = Path(data_dir)
|
||||
self.osm_raw_dir = self.data_dir / "osm-raw"
|
||||
self.processed_dir = self.data_dir / "processed"
|
||||
|
||||
# Initialize services
|
||||
self.tree_service = StreetTreeService()
|
||||
self.berlin_data = BerlinDataService()
|
||||
|
||||
# Berlin bounding box
|
||||
self.berlin_bbox = {
|
||||
'min_lat': 52.3370, 'max_lat': 52.6755,
|
||||
'min_lon': 13.0882, 'max_lon': 13.7611
|
||||
}
|
||||
|
||||
# Filtering criteria
|
||||
self.min_area_sqm = 1000 # Minimum area to be considered significant
|
||||
self.max_spaces = 800 # Maximum number of spaces to process
|
||||
|
||||
def parse_and_filter_osm_data(self) -> List[Dict]:
|
||||
"""Parse OSM data and filter for significant green spaces."""
|
||||
osm_file = self.osm_raw_dir / "berlin_green_spaces.osm"
|
||||
|
||||
if not osm_file.exists():
|
||||
print(f"❌ OSM file not found: {osm_file}")
|
||||
print("Please run the download first or ensure the file exists.")
|
||||
return []
|
||||
|
||||
print(f"📂 Parsing OSM data from {osm_file}")
|
||||
|
||||
try:
|
||||
tree = ET.parse(osm_file)
|
||||
root = tree.getroot()
|
||||
ways = root.findall('.//way')
|
||||
|
||||
print(f"📊 Found {len(ways)} total ways in OSM file")
|
||||
print(f"🔍 Filtering for significant green spaces (≥{self.min_area_sqm} sqm)...")
|
||||
|
||||
filtered_spaces = []
|
||||
processed_count = 0
|
||||
|
||||
for way in ways:
|
||||
processed_count += 1
|
||||
|
||||
if processed_count % 5000 == 0:
|
||||
print(f" Processed {processed_count}/{len(ways)} ways... Found {len(filtered_spaces)} significant spaces")
|
||||
|
||||
try:
|
||||
space_data = self._process_osm_way(way, root)
|
||||
if space_data and space_data['area_sqm'] >= self.min_area_sqm:
|
||||
filtered_spaces.append(space_data)
|
||||
|
||||
# Stop if we have enough spaces
|
||||
if len(filtered_spaces) >= self.max_spaces:
|
||||
print(f"✅ Reached target of {self.max_spaces} significant spaces")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
# Sort by area (largest first) to prioritize important spaces
|
||||
filtered_spaces.sort(key=lambda x: x['area_sqm'], reverse=True)
|
||||
|
||||
print(f"🎯 Filtered to {len(filtered_spaces)} significant green spaces")
|
||||
print(f"📏 Area range: {filtered_spaces[-1]['area_sqm']:,} - {filtered_spaces[0]['area_sqm']:,} sqm")
|
||||
|
||||
return filtered_spaces
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error parsing OSM file: {e}")
|
||||
return []
|
||||
|
||||
def _process_osm_way(self, way, root) -> Optional[Dict]:
|
||||
"""Process a single OSM way into green space format."""
|
||||
# Get tags
|
||||
tags = {}
|
||||
for tag in way.findall('tag'):
|
||||
tags[tag.get('k')] = tag.get('v')
|
||||
|
||||
# Check if it's a significant green space
|
||||
green_space_type = self._get_green_space_type(tags)
|
||||
if not green_space_type:
|
||||
return None
|
||||
|
||||
# Skip certain types that are usually small or not parks
|
||||
skip_types = ['grave_yard', 'cemetery', 'allotments']
|
||||
if green_space_type in skip_types:
|
||||
return None
|
||||
|
||||
# Get node references
|
||||
nd_refs = [nd.get('ref') for nd in way.findall('nd')]
|
||||
if len(nd_refs) < 3: # Need at least 3 points for an area
|
||||
return None
|
||||
|
||||
# Find node coordinates
|
||||
coordinates = []
|
||||
for nd_ref in nd_refs:
|
||||
node = root.find(f".//node[@id='{nd_ref}']")
|
||||
if node is not None:
|
||||
lat = float(node.get('lat'))
|
||||
lon = float(node.get('lon'))
|
||||
|
||||
# Check if within Berlin bounds
|
||||
if (self.berlin_bbox['min_lat'] <= lat <= self.berlin_bbox['max_lat'] and
|
||||
self.berlin_bbox['min_lon'] <= lon <= self.berlin_bbox['max_lon']):
|
||||
coordinates.append((lat, lon))
|
||||
|
||||
if len(coordinates) < 3:
|
||||
return None
|
||||
|
||||
# Calculate centroid and area
|
||||
centroid_lat, centroid_lon = self._calculate_centroid(coordinates)
|
||||
area_sqm = self._calculate_area(coordinates)
|
||||
|
||||
# Skip if too small
|
||||
if area_sqm < self.min_area_sqm:
|
||||
return None
|
||||
|
||||
# Get name
|
||||
name = tags.get('name')
|
||||
if not name:
|
||||
name = f"{green_space_type.title()} near {centroid_lat:.3f}, {centroid_lon:.3f}"
|
||||
|
||||
# Estimate district
|
||||
district = self._estimate_district(centroid_lat, centroid_lon)
|
||||
|
||||
return {
|
||||
'id': f"osm_way_{way.get('id')}",
|
||||
'name': name,
|
||||
'fclass': green_space_type,
|
||||
'lat': centroid_lat,
|
||||
'lng': centroid_lon,
|
||||
'area_sqm': int(area_sqm),
|
||||
'district': district,
|
||||
'osm_tags': tags,
|
||||
'osm_id': way.get('id'),
|
||||
'has_name': bool(tags.get('name')) # Track if it has a real name
|
||||
}
|
||||
|
||||
def _get_green_space_type(self, tags: Dict) -> Optional[str]:
|
||||
"""Determine if tags represent a significant green space."""
|
||||
# Prioritize leisure tags (usually parks)
|
||||
leisure = tags.get('leisure', '')
|
||||
if leisure in ['park', 'garden', 'nature_reserve', 'recreation_ground', 'playground', 'common']:
|
||||
return leisure
|
||||
|
||||
# Check landuse tags
|
||||
landuse = tags.get('landuse', '')
|
||||
if landuse in ['forest', 'grass', 'meadow', 'recreation_ground', 'village_green']:
|
||||
return landuse
|
||||
|
||||
# Check natural tags (forests, etc.)
|
||||
natural = tags.get('natural', '')
|
||||
if natural in ['forest', 'wood', 'heath']:
|
||||
return natural
|
||||
|
||||
return None
|
||||
|
||||
def _calculate_centroid(self, coordinates: List[Tuple[float, float]]) -> Tuple[float, float]:
|
||||
"""Calculate centroid of polygon."""
|
||||
lat_sum = sum(coord[0] for coord in coordinates)
|
||||
lon_sum = sum(coord[1] for coord in coordinates)
|
||||
count = len(coordinates)
|
||||
return lat_sum / count, lon_sum / count
|
||||
|
||||
def _calculate_area(self, coordinates: List[Tuple[float, float]]) -> float:
|
||||
"""Calculate area using shoelace formula (approximate for Berlin)."""
|
||||
if len(coordinates) < 3:
|
||||
return 0
|
||||
|
||||
# Convert to approximate meters for Berlin latitude
|
||||
lat_to_m = 111000 # meters per degree latitude
|
||||
lon_to_m = 111000 * math.cos(math.radians(52.5)) # adjust for Berlin
|
||||
|
||||
# Convert to meters
|
||||
coords_m = [(lat * lat_to_m, lon * lon_to_m) for lat, lon in coordinates]
|
||||
|
||||
# Shoelace formula
|
||||
area = 0
|
||||
n = len(coords_m)
|
||||
|
||||
for i in range(n):
|
||||
j = (i + 1) % n
|
||||
area += coords_m[i][0] * coords_m[j][1]
|
||||
area -= coords_m[j][0] * coords_m[i][1]
|
||||
|
||||
return abs(area) / 2
|
||||
|
||||
def _estimate_district(self, lat: float, lng: float) -> str:
|
||||
"""Estimate Berlin district from coordinates."""
|
||||
# Simplified district boundaries
|
||||
if lat > 52.55:
|
||||
return "Pankow" if lng < 13.45 else "Lichtenberg"
|
||||
elif lat > 52.52:
|
||||
if lng < 13.25:
|
||||
return "Charlottenburg-Wilmersdorf"
|
||||
elif lng < 13.42:
|
||||
return "Mitte"
|
||||
else:
|
||||
return "Friedrichshain-Kreuzberg"
|
||||
elif lat > 52.45:
|
||||
if lng < 13.25:
|
||||
return "Steglitz-Zehlendorf"
|
||||
elif lng < 13.42:
|
||||
return "Tempelhof-Schöneberg"
|
||||
else:
|
||||
return "Neukölln"
|
||||
else:
|
||||
return "Treptow-Köpenick"
|
||||
|
||||
async def enhance_green_space_with_real_data(self, space_data: Dict):
|
||||
"""Enhance green space with real tree and toilet data."""
|
||||
try:
|
||||
lat = space_data['lat']
|
||||
lng = space_data['lng']
|
||||
area_sqm = space_data['area_sqm']
|
||||
|
||||
# Adaptive radius based on space size
|
||||
radius = min(400, max(150, int((area_sqm ** 0.5) * 0.8)))
|
||||
|
||||
# Get real data using existing services
|
||||
tree_response = await self.tree_service.get_trees_near_location(
|
||||
lat, lng, radius_m=radius
|
||||
)
|
||||
|
||||
nearby_toilets = await self.berlin_data.get_toilets_near_point(lat, lng, 600)
|
||||
|
||||
# Calculate scores
|
||||
toilet_score = self._score_toilet_accessibility(nearby_toilets)
|
||||
space_type = self._map_to_space_type(space_data.get('fclass', ''))
|
||||
|
||||
enhanced_space = {
|
||||
"id": space_data['id'],
|
||||
"name": space_data['name'],
|
||||
"description": f"Significant Berlin {space_data.get('fclass', 'green space')} from OSM data",
|
||||
"type": space_type,
|
||||
"coordinates": {
|
||||
"lat": float(lat),
|
||||
"lng": float(lng)
|
||||
},
|
||||
"neighborhood": space_data.get('district', 'Unknown'),
|
||||
"area_sqm": area_sqm,
|
||||
"perimeter_m": int(4 * (area_sqm ** 0.5)), # Rough estimate
|
||||
|
||||
# Environmental features from real tree data
|
||||
"environmental": {
|
||||
"tree_coverage_percent": max(5, int(tree_response.shade_analysis.estimated_shade_coverage)),
|
||||
"shade_quality": tree_response.shade_analysis.shade_quality_score,
|
||||
"noise_level": self._estimate_noise_level(space_data),
|
||||
"wildlife_diversity_score": tree_response.metrics.species_diversity_score,
|
||||
"water_features": self._detect_water_features(space_data),
|
||||
"natural_surface_percent": self._estimate_natural_surface(space_data.get('fclass', ''))
|
||||
},
|
||||
|
||||
# Real tree metrics from your existing service
|
||||
"tree_data": {
|
||||
"total_trees": tree_response.metrics.total_trees,
|
||||
"trees_per_hectare": tree_response.metrics.trees_per_hectare,
|
||||
"species_count": len(tree_response.metrics.dominant_species),
|
||||
"species_diversity_score": tree_response.metrics.species_diversity_score,
|
||||
"mature_trees_count": tree_response.metrics.mature_trees_count,
|
||||
"young_trees_count": tree_response.metrics.young_trees_count,
|
||||
"average_tree_age": tree_response.metrics.average_tree_age,
|
||||
"average_height": tree_response.metrics.average_height,
|
||||
"average_crown_diameter": tree_response.metrics.average_crown_diameter,
|
||||
"shade_coverage_percent": tree_response.metrics.shade_coverage_percent,
|
||||
"dominant_species": tree_response.metrics.dominant_species[:3] # Top 3
|
||||
},
|
||||
|
||||
# Real toilet accessibility from your existing service
|
||||
"toilet_accessibility": {
|
||||
"nearby_toilets_count": len(nearby_toilets),
|
||||
"accessibility_score": toilet_score,
|
||||
"nearest_distance_m": nearby_toilets[0]['distance_meters'] if nearby_toilets else None,
|
||||
"free_toilets_count": len([t for t in nearby_toilets if t.get('is_free', False)]),
|
||||
"accessible_toilets_count": len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
|
||||
},
|
||||
|
||||
# Accessibility features
|
||||
"accessibility": {
|
||||
"wheelchair_accessible": True,
|
||||
"public_transport_score": self._estimate_transport_score(space_data.get('district', '')),
|
||||
"cycling_infrastructure": area_sqm > 5000,
|
||||
"parking_availability": 3 if area_sqm > 50000 else 2,
|
||||
"lighting_quality": 3 if 'mitte' in space_data.get('district', '').lower() else 2
|
||||
},
|
||||
|
||||
"recreation": {
|
||||
"playground_quality": self._estimate_playground_quality(space_data),
|
||||
"sports_facilities": self._estimate_sports_facilities(space_data),
|
||||
"running_paths": area_sqm > 8000,
|
||||
"cycling_paths": area_sqm > 15000,
|
||||
"dog_friendly": True,
|
||||
"bbq_allowed": self._allows_bbq(space_data)
|
||||
},
|
||||
|
||||
# OSM metadata
|
||||
"osm_metadata": {
|
||||
"osm_id": space_data.get('osm_id'),
|
||||
"has_official_name": space_data.get('has_name', False),
|
||||
"tags": space_data.get('osm_tags', {}),
|
||||
"source": "filtered_osm_extract"
|
||||
},
|
||||
|
||||
"last_updated": datetime.now().isoformat(),
|
||||
"data_sources": ["filtered_osm_extract", "berlin_tree_cadastre", "berlin_toilets"],
|
||||
"confidence_score": 95 if space_data.get('has_name') else 85
|
||||
}
|
||||
|
||||
return enhanced_space
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error enhancing {space_data['name']}: {e}")
|
||||
return None
|
||||
|
||||
def _score_toilet_accessibility(self, nearby_toilets: List[Dict]) -> int:
|
||||
if not nearby_toilets:
|
||||
return 25
|
||||
|
||||
nearest = nearby_toilets[0]['distance_meters']
|
||||
if nearest <= 200:
|
||||
score = 95
|
||||
elif nearest <= 400:
|
||||
score = 80
|
||||
elif nearest <= 600:
|
||||
score = 65
|
||||
else:
|
||||
score = 45
|
||||
|
||||
# Quality bonuses
|
||||
free = len([t for t in nearby_toilets if t.get('is_free', False)])
|
||||
accessible = len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
|
||||
score += min(10, free * 5 + accessible * 3)
|
||||
|
||||
return min(100, score)
|
||||
|
||||
def _map_to_space_type(self, fclass: str) -> str:
|
||||
mapping = {
|
||||
'park': 'PARK', 'forest': 'FOREST', 'garden': 'GARDEN', 'wood': 'FOREST',
|
||||
'nature_reserve': 'NATURE_RESERVE', 'playground': 'PLAYGROUND',
|
||||
'meadow': 'MEADOW', 'grass': 'GRASS', 'recreation_ground': 'PARK',
|
||||
'common': 'PARK', 'village_green': 'GRASS', 'heath': 'HEATH'
|
||||
}
|
||||
return mapping.get(fclass, 'PARK')
|
||||
|
||||
def _detect_water_features(self, space_data: Dict) -> bool:
|
||||
name = space_data.get('name', '').lower()
|
||||
tags = space_data.get('osm_tags', {})
|
||||
|
||||
water_keywords = ['see', 'teich', 'pond', 'lake', 'bach', 'spree', 'wasser', 'fluss']
|
||||
return (any(keyword in name for keyword in water_keywords) or
|
||||
'water' in str(tags.values()).lower())
|
||||
|
||||
def _estimate_noise_level(self, space_data: Dict) -> int:
|
||||
fclass = space_data.get('fclass', '')
|
||||
district = space_data.get('district', '')
|
||||
area = space_data.get('area_sqm', 0)
|
||||
|
||||
base = {'forest': 1, 'wood': 1, 'nature_reserve': 1, 'heath': 1,
|
||||
'meadow': 2, 'grass': 2, 'park': 2, 'garden': 2,
|
||||
'playground': 3, 'recreation_ground': 3}.get(fclass, 2)
|
||||
|
||||
# Central districts are noisier
|
||||
if any(busy in district.lower() for busy in ['mitte', 'kreuzberg', 'friedrichshain']):
|
||||
base += 1
|
||||
|
||||
# Larger spaces are usually quieter inside
|
||||
if area > 50000:
|
||||
base = max(1, base - 1)
|
||||
|
||||
return min(5, base)
|
||||
|
||||
def _estimate_natural_surface(self, fclass: str) -> int:
|
||||
return {'forest': 95, 'wood': 95, 'nature_reserve': 95, 'heath': 90,
|
||||
'meadow': 95, 'grass': 90, 'park': 80, 'garden': 70,
|
||||
'playground': 45, 'recreation_ground': 75}.get(fclass, 75)
|
||||
|
||||
def _estimate_transport_score(self, district: str) -> int:
|
||||
district_lower = district.lower()
|
||||
if 'mitte' in district_lower:
|
||||
return 5
|
||||
elif any(name in district_lower for name in ['charlottenburg', 'kreuzberg', 'friedrichshain', 'pankow']):
|
||||
return 4
|
||||
else:
|
||||
return 3
|
||||
|
||||
def _estimate_playground_quality(self, space_data: Dict) -> int:
|
||||
fclass = space_data.get('fclass', '')
|
||||
tags = space_data.get('osm_tags', {})
|
||||
area = space_data.get('area_sqm', 0)
|
||||
|
||||
if fclass == 'playground':
|
||||
return 85
|
||||
elif 'playground' in str(tags.values()).lower():
|
||||
return 75
|
||||
elif fclass == 'park':
|
||||
# Larger parks more likely to have good playgrounds
|
||||
return 60 if area > 10000 else 45
|
||||
else:
|
||||
return 30
|
||||
|
||||
def _estimate_sports_facilities(self, space_data: Dict) -> bool:
|
||||
fclass = space_data.get('fclass', '')
|
||||
tags = space_data.get('osm_tags', {})
|
||||
name = space_data.get('name', '').lower()
|
||||
area = space_data.get('area_sqm', 0)
|
||||
|
||||
# Explicit indicators
|
||||
if (fclass == 'recreation_ground' or
|
||||
'sport' in str(tags.values()).lower() or
|
||||
any(term in name for term in ['sport', 'football', 'tennis', 'recreation'])):
|
||||
return True
|
||||
|
||||
# Large parks often have sports facilities
|
||||
return fclass == 'park' and area > 20000
|
||||
|
||||
def _allows_bbq(self, space_data: Dict) -> bool:
|
||||
fclass = space_data.get('fclass', '')
|
||||
tags = space_data.get('osm_tags', {})
|
||||
area = space_data.get('area_sqm', 0)
|
||||
|
||||
# Check explicit BBQ tags
|
||||
bbq_tag = tags.get('bbq', '').lower()
|
||||
if bbq_tag == 'yes':
|
||||
return True
|
||||
elif bbq_tag == 'no':
|
||||
return False
|
||||
|
||||
# Default based on type and size
|
||||
return fclass in ['park', 'recreation_ground'] and area > 5000
|
||||
|
||||
async def process_filtered_green_spaces(self):
|
||||
"""Main processing pipeline for filtered green spaces."""
|
||||
print("🌳 Processing Significant Berlin Green Spaces")
|
||||
print("=" * 55)
|
||||
print(f"• Filtering for spaces ≥ {self.min_area_sqm:,} sqm")
|
||||
print(f"• Processing up to {self.max_spaces} significant spaces")
|
||||
print(f"• Enhancing with real Berlin tree + toilet data")
|
||||
print("=" * 55)
|
||||
|
||||
# Step 1: Parse and filter OSM data
|
||||
filtered_spaces = self.parse_and_filter_osm_data()
|
||||
|
||||
if not filtered_spaces:
|
||||
print("❌ No significant green spaces found")
|
||||
return []
|
||||
|
||||
print(f"\n🔧 Enhancing {len(filtered_spaces)} significant spaces with real data...")
|
||||
|
||||
# Step 2: Enhance with real data
|
||||
enhanced_spaces = []
|
||||
|
||||
for i, space_data in enumerate(filtered_spaces, 1):
|
||||
area_ha = space_data['area_sqm'] / 10000
|
||||
print(f"[{i:3d}/{len(filtered_spaces)}] {space_data['name'][:40]:40} ({area_ha:.1f} ha)")
|
||||
|
||||
result = await self.enhance_green_space_with_real_data(space_data)
|
||||
if result:
|
||||
enhanced_spaces.append(result)
|
||||
trees = result["tree_data"]["total_trees"]
|
||||
toilets = result["toilet_accessibility"]["nearby_toilets_count"]
|
||||
print(f" ✅ {trees:3d} trees, {toilets} toilets")
|
||||
else:
|
||||
print(f" ❌ Enhancement failed")
|
||||
|
||||
# Progress update every 50 spaces
|
||||
if i % 50 == 0:
|
||||
print(f"\n 📊 Progress: {len(enhanced_spaces)}/{i} enhanced successfully")
|
||||
|
||||
# Small delay to be nice to services
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
print(f"\n🎉 Successfully enhanced {len(enhanced_spaces)} significant green spaces!")
|
||||
return enhanced_spaces
|
||||
|
||||
def save_enhanced_data(self, enhanced_spaces: List[Dict]):
|
||||
"""Save the filtered and enhanced dataset."""
|
||||
output_file = self.processed_dir / "significant_berlin_green_spaces.json"
|
||||
|
||||
# Calculate comprehensive statistics
|
||||
with_trees = len([s for s in enhanced_spaces if s["tree_data"]["total_trees"] > 0])
|
||||
with_toilets = len([s for s in enhanced_spaces if s["toilet_accessibility"]["nearby_toilets_count"] > 0])
|
||||
total_trees = sum(s["tree_data"]["total_trees"] for s in enhanced_spaces)
|
||||
total_area = sum(s["area_sqm"] for s in enhanced_spaces)
|
||||
|
||||
# Named vs unnamed spaces
|
||||
named_spaces = len([s for s in enhanced_spaces if s["osm_metadata"]["has_official_name"]])
|
||||
|
||||
# Area distribution
|
||||
large_spaces = len([s for s in enhanced_spaces if s["area_sqm"] > 50000]) # > 5 hectares
|
||||
medium_spaces = len([s for s in enhanced_spaces if 10000 <= s["area_sqm"] <= 50000]) # 1-5 hectares
|
||||
small_spaces = len([s for s in enhanced_spaces if s["area_sqm"] < 10000]) # < 1 hectare
|
||||
|
||||
# District breakdown
|
||||
by_district = {}
|
||||
for space in enhanced_spaces:
|
||||
district = space['neighborhood']
|
||||
if district not in by_district:
|
||||
by_district[district] = []
|
||||
by_district[district].append(space)
|
||||
|
||||
data = {
|
||||
"green_spaces": enhanced_spaces,
|
||||
"total_count": len(enhanced_spaces),
|
||||
"last_updated": datetime.now().isoformat(),
|
||||
"data_sources": [
|
||||
"filtered_osm_extract_significant_spaces_only",
|
||||
"berlin_tree_cadastre_via_street_tree_service",
|
||||
"berlin_toilet_locations_via_berlin_data_service"
|
||||
],
|
||||
"processing_info": {
|
||||
"filtering_criteria": {
|
||||
"minimum_area_sqm": self.min_area_sqm,
|
||||
"maximum_spaces_processed": self.max_spaces,
|
||||
"includes_only_significant_spaces": True
|
||||
},
|
||||
"enhancement_method": "real_berlin_tree_and_toilet_data",
|
||||
"coordinate_system": "WGS84"
|
||||
},
|
||||
"summary_stats": {
|
||||
"total_spaces": len(enhanced_spaces),
|
||||
"spaces_with_tree_data": with_trees,
|
||||
"spaces_with_toilet_data": with_toilets,
|
||||
"total_trees_analyzed": total_trees,
|
||||
"total_area_hectares": round(total_area / 10000, 1),
|
||||
"coverage_rates": {
|
||||
"tree_data": f"{round((with_trees/len(enhanced_spaces))*100, 1)}%",
|
||||
"toilet_data": f"{round((with_toilets/len(enhanced_spaces))*100, 1)}%"
|
||||
},
|
||||
"space_categories": {
|
||||
"named_spaces": named_spaces,
|
||||
"unnamed_spaces": len(enhanced_spaces) - named_spaces,
|
||||
"large_spaces_over_5ha": large_spaces,
|
||||
"medium_spaces_1_5ha": medium_spaces,
|
||||
"smaller_spaces_under_1ha": small_spaces
|
||||
}
|
||||
},
|
||||
"district_breakdown": {
|
||||
district: len(spaces) for district, spaces in by_district.items()
|
||||
}
|
||||
}
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"\n📁 Comprehensive dataset saved: {output_file}")
|
||||
print(f"\n📊 Final Statistics:")
|
||||
print(f" 🌳 {len(enhanced_spaces)} significant green spaces")
|
||||
print(f" 📛 {named_spaces} with official names, {len(enhanced_spaces) - named_spaces} discovered areas")
|
||||
print(f" 🌲 {with_trees} spaces with tree data ({round((with_trees/len(enhanced_spaces))*100)}%)")
|
||||
print(f" 🚻 {with_toilets} spaces with toilet data ({round((with_toilets/len(enhanced_spaces))*100)}%)")
|
||||
print(f" 🌿 {total_trees:,} total trees analyzed")
|
||||
print(f" 📏 {round(total_area/10000, 1)} hectares total area")
|
||||
|
||||
print(f"\n🏙️ District Distribution:")
|
||||
for district, spaces in sorted(by_district.items(), key=lambda x: len(x[1]), reverse=True):
|
||||
print(f" • {district}: {len(spaces)} spaces")
|
||||
|
||||
print(f"\n📈 Size Categories:")
|
||||
print(f" • Large (>5 ha): {large_spaces} spaces")
|
||||
print(f" • Medium (1-5 ha): {medium_spaces} spaces")
|
||||
print(f" • Smaller (<1 ha): {small_spaces} spaces")
|
||||
|
||||
print(f"\n✨ This dataset provides comprehensive coverage of Berlin's")
|
||||
print(f" significant green spaces with real tree and toilet data!")
|
||||
|
||||
return output_file
|
||||
|
||||
|
||||
async def main():
|
||||
processor = FilteredOSMProcessor()
|
||||
|
||||
try:
|
||||
enhanced_spaces = await processor.process_filtered_green_spaces()
|
||||
|
||||
if enhanced_spaces:
|
||||
processor.save_enhanced_data(enhanced_spaces)
|
||||
print(f"\n🎯 SUCCESS! Ready to use in your API for accurate personality scoring!")
|
||||
else:
|
||||
print("❌ No spaces were successfully processed.")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n⚠️ Process interrupted by user")
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
|
@ -0,0 +1,613 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Process Berlin green spaces from local OSM data file.
|
||||
Downloads Berlin OSM extract once, then processes locally without API dependencies.
|
||||
"""
|
||||
|
||||
import json
|
||||
import requests
|
||||
import asyncio
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
import sys
|
||||
import gzip
|
||||
import math
|
||||
|
||||
# Add the app directory to Python path to import services
|
||||
sys.path.append(str(Path(__file__).parent.parent))
|
||||
|
||||
from app.services.street_tree_service import StreetTreeService
|
||||
from app.services.berlin_data_service import BerlinDataService
|
||||
|
||||
|
||||
class LocalOSMProcessor:
|
||||
def __init__(self, data_dir: str = "app/data"):
|
||||
self.data_dir = Path(data_dir)
|
||||
self.raw_dir = self.data_dir / "osm-raw"
|
||||
self.processed_dir = self.data_dir / "processed"
|
||||
|
||||
# Create directories
|
||||
self.raw_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.processed_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialize existing services
|
||||
self.tree_service = StreetTreeService()
|
||||
self.berlin_data = BerlinDataService()
|
||||
|
||||
# Berlin bounding box for filtering
|
||||
self.berlin_bbox = {
|
||||
'min_lat': 52.3370, 'max_lat': 52.6755,
|
||||
'min_lon': 13.0882, 'max_lon': 13.7611
|
||||
}
|
||||
|
||||
def download_berlin_osm_extract(self):
|
||||
"""Download Berlin OSM extract from Geofabrik."""
|
||||
osm_file = self.raw_dir / "berlin-latest.osm.pbf"
|
||||
|
||||
if osm_file.exists():
|
||||
print(f"✅ OSM file already exists: {osm_file}")
|
||||
return osm_file
|
||||
|
||||
# Try PBF format first (smaller), fallback to XML
|
||||
urls = [
|
||||
"https://download.geofabrik.de/europe/germany/berlin-latest.osm.pbf",
|
||||
"https://download.geofabrik.de/europe/germany/berlin-latest.osm.bz2"
|
||||
]
|
||||
|
||||
for url in urls:
|
||||
try:
|
||||
print(f"Downloading Berlin OSM data from {url}")
|
||||
print("This is a one-time download (~50MB)...")
|
||||
|
||||
response = requests.get(url, stream=True, timeout=300)
|
||||
response.raise_for_status()
|
||||
|
||||
filename = url.split('/')[-1]
|
||||
local_file = self.raw_dir / filename
|
||||
|
||||
# Download with progress
|
||||
total_size = int(response.headers.get('content-length', 0))
|
||||
downloaded = 0
|
||||
|
||||
with open(local_file, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
downloaded += len(chunk)
|
||||
if total_size > 0:
|
||||
percent = (downloaded / total_size) * 100
|
||||
print(f"\rDownload progress: {percent:.1f}%", end="")
|
||||
|
||||
print(f"\n✅ Downloaded: {local_file}")
|
||||
return local_file
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Failed to download {url}: {e}")
|
||||
continue
|
||||
|
||||
raise Exception("Could not download OSM data from any source")
|
||||
|
||||
def download_simple_osm_extract(self):
|
||||
"""Download simpler XML format if PBF tools not available."""
|
||||
osm_file = self.raw_dir / "berlin_green_spaces.osm"
|
||||
|
||||
if osm_file.exists():
|
||||
print(f"✅ OSM file already exists: {osm_file}")
|
||||
return osm_file
|
||||
|
||||
# Use Overpass API to get a one-time export of green spaces
|
||||
print("Downloading Berlin green spaces extract...")
|
||||
|
||||
overpass_url = "http://overpass-api.de/api/interpreter"
|
||||
|
||||
# Query for all green spaces in Berlin (one-time download)
|
||||
query = f"""
|
||||
[out:xml][timeout:120];
|
||||
(
|
||||
way["leisure"~"^(park|garden|nature_reserve|recreation_ground|playground|common)$"]
|
||||
({self.berlin_bbox['min_lat']},{self.berlin_bbox['min_lon']},{self.berlin_bbox['max_lat']},{self.berlin_bbox['max_lon']});
|
||||
way["landuse"~"^(forest|grass|meadow|recreation_ground|village_green|allotments)$"]
|
||||
({self.berlin_bbox['min_lat']},{self.berlin_bbox['min_lon']},{self.berlin_bbox['max_lat']},{self.berlin_bbox['max_lon']});
|
||||
way["natural"~"^(forest|grass|meadow|scrub|heath|wood)$"]
|
||||
({self.berlin_bbox['min_lat']},{self.berlin_bbox['min_lon']},{self.berlin_bbox['max_lat']},{self.berlin_bbox['max_lon']});
|
||||
);
|
||||
out geom meta;
|
||||
"""
|
||||
|
||||
try:
|
||||
response = requests.post(overpass_url, data=query, timeout=180)
|
||||
response.raise_for_status()
|
||||
|
||||
with open(osm_file, 'w', encoding='utf-8') as f:
|
||||
f.write(response.text)
|
||||
|
||||
print(f"✅ Downloaded green spaces extract: {osm_file}")
|
||||
return osm_file
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Failed to download OSM extract: {e}")
|
||||
raise
|
||||
|
||||
def parse_osm_xml(self, osm_file: Path) -> List[Dict]:
|
||||
"""Parse OSM XML file to extract green spaces."""
|
||||
print(f"Parsing OSM data from {osm_file}...")
|
||||
|
||||
green_spaces = []
|
||||
|
||||
try:
|
||||
# Handle different file formats
|
||||
if osm_file.suffix == '.gz':
|
||||
with gzip.open(osm_file, 'rt', encoding='utf-8') as f:
|
||||
tree = ET.parse(f)
|
||||
else:
|
||||
tree = ET.parse(osm_file)
|
||||
|
||||
root = tree.getroot()
|
||||
|
||||
# Parse ways (areas)
|
||||
ways = root.findall('.//way')
|
||||
print(f"Found {len(ways)} ways in OSM data")
|
||||
|
||||
for way in ways:
|
||||
try:
|
||||
processed_space = self._process_osm_way(way, root)
|
||||
if processed_space:
|
||||
green_spaces.append(processed_space)
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
print(f"✅ Extracted {len(green_spaces)} green spaces from OSM data")
|
||||
return green_spaces
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error parsing OSM file: {e}")
|
||||
return []
|
||||
|
||||
def _process_osm_way(self, way, root) -> Optional[Dict]:
|
||||
"""Process a single OSM way into green space format."""
|
||||
# Get tags
|
||||
tags = {}
|
||||
for tag in way.findall('tag'):
|
||||
tags[tag.get('k')] = tag.get('v')
|
||||
|
||||
# Check if it's a green space
|
||||
green_space_type = self._get_green_space_type(tags)
|
||||
if not green_space_type:
|
||||
return None
|
||||
|
||||
# Get node references
|
||||
nd_refs = [nd.get('ref') for nd in way.findall('nd')]
|
||||
if len(nd_refs) < 3: # Need at least 3 points for an area
|
||||
return None
|
||||
|
||||
# Find node coordinates
|
||||
coordinates = []
|
||||
for nd_ref in nd_refs:
|
||||
node = root.find(f".//node[@id='{nd_ref}']")
|
||||
if node is not None:
|
||||
lat = float(node.get('lat'))
|
||||
lon = float(node.get('lon'))
|
||||
|
||||
# Check if within Berlin bounds
|
||||
if (self.berlin_bbox['min_lat'] <= lat <= self.berlin_bbox['max_lat'] and
|
||||
self.berlin_bbox['min_lon'] <= lon <= self.berlin_bbox['max_lon']):
|
||||
coordinates.append((lat, lon))
|
||||
|
||||
if len(coordinates) < 3:
|
||||
return None
|
||||
|
||||
# Calculate centroid and area
|
||||
centroid_lat, centroid_lon = self._calculate_centroid(coordinates)
|
||||
area_sqm = self._calculate_area(coordinates)
|
||||
|
||||
# Skip very small areas
|
||||
if area_sqm < 500:
|
||||
return None
|
||||
|
||||
# Get name
|
||||
name = tags.get('name', f"{green_space_type.title()} near {centroid_lat:.3f}, {centroid_lon:.3f}")
|
||||
|
||||
# Estimate district
|
||||
district = self._estimate_district(centroid_lat, centroid_lon)
|
||||
|
||||
return {
|
||||
'id': f"osm_way_{way.get('id')}",
|
||||
'name': name,
|
||||
'fclass': green_space_type,
|
||||
'lat': centroid_lat,
|
||||
'lng': centroid_lon,
|
||||
'area_sqm': int(area_sqm),
|
||||
'district': district,
|
||||
'osm_tags': tags,
|
||||
'osm_id': way.get('id')
|
||||
}
|
||||
|
||||
def _get_green_space_type(self, tags: Dict) -> Optional[str]:
|
||||
"""Determine if tags represent a green space and what type."""
|
||||
# Check leisure tags
|
||||
leisure = tags.get('leisure', '')
|
||||
if leisure in ['park', 'garden', 'nature_reserve', 'recreation_ground',
|
||||
'playground', 'common', 'golf_course']:
|
||||
return leisure
|
||||
|
||||
# Check landuse tags
|
||||
landuse = tags.get('landuse', '')
|
||||
if landuse in ['forest', 'grass', 'meadow', 'recreation_ground',
|
||||
'village_green', 'allotments']:
|
||||
return landuse
|
||||
|
||||
# Check natural tags
|
||||
natural = tags.get('natural', '')
|
||||
if natural in ['forest', 'grass', 'meadow', 'scrub', 'heath', 'wood']:
|
||||
return natural
|
||||
|
||||
return None
|
||||
|
||||
def _calculate_centroid(self, coordinates: List[Tuple[float, float]]) -> Tuple[float, float]:
|
||||
"""Calculate centroid of polygon."""
|
||||
lat_sum = sum(coord[0] for coord in coordinates)
|
||||
lon_sum = sum(coord[1] for coord in coordinates)
|
||||
count = len(coordinates)
|
||||
|
||||
return lat_sum / count, lon_sum / count
|
||||
|
||||
def _calculate_area(self, coordinates: List[Tuple[float, float]]) -> float:
|
||||
"""Calculate area of polygon using shoelace formula."""
|
||||
if len(coordinates) < 3:
|
||||
return 0
|
||||
|
||||
# Convert to approximate meters for Berlin
|
||||
lat_to_m = 111000 # meters per degree latitude
|
||||
lon_to_m = 111000 * math.cos(math.radians(52.5)) # adjust for Berlin latitude
|
||||
|
||||
# Convert coordinates to meters
|
||||
coords_m = [(lat * lat_to_m, lon * lon_to_m) for lat, lon in coordinates]
|
||||
|
||||
# Shoelace formula
|
||||
area = 0
|
||||
n = len(coords_m)
|
||||
|
||||
for i in range(n):
|
||||
j = (i + 1) % n
|
||||
area += coords_m[i][0] * coords_m[j][1]
|
||||
area -= coords_m[j][0] * coords_m[i][1]
|
||||
|
||||
return abs(area) / 2
|
||||
|
||||
def _estimate_district(self, lat: float, lng: float) -> str:
|
||||
"""Rough district estimation from coordinates."""
|
||||
# Very rough Berlin district boundaries
|
||||
if lat > 52.55:
|
||||
return "Pankow" if lng < 13.45 else "Lichtenberg"
|
||||
elif lat > 52.52:
|
||||
if lng < 13.25:
|
||||
return "Charlottenburg-Wilmersdorf"
|
||||
elif lng < 13.42:
|
||||
return "Mitte"
|
||||
else:
|
||||
return "Friedrichshain-Kreuzberg"
|
||||
elif lat > 52.45:
|
||||
if lng < 13.25:
|
||||
return "Steglitz-Zehlendorf"
|
||||
elif lng < 13.42:
|
||||
return "Tempelhof-Schöneberg"
|
||||
else:
|
||||
return "Neukölln"
|
||||
else:
|
||||
return "Treptow-Köpenick"
|
||||
|
||||
async def enhance_green_space_with_real_data(self, space_data: Dict):
|
||||
"""Enhance green space with real tree and toilet data."""
|
||||
try:
|
||||
lat = space_data['lat']
|
||||
lng = space_data['lng']
|
||||
area_sqm = space_data['area_sqm']
|
||||
|
||||
print(f"Enhancing {space_data['name']} ({space_data['district']})...")
|
||||
|
||||
# Adaptive radius
|
||||
radius = min(350, max(100, int((area_sqm ** 0.5) * 0.7)))
|
||||
|
||||
# Get real data using existing services
|
||||
tree_response = await self.tree_service.get_trees_near_location(
|
||||
lat, lng, radius_m=radius
|
||||
)
|
||||
|
||||
nearby_toilets = await self.berlin_data.get_toilets_near_point(lat, lng, 600)
|
||||
|
||||
# Calculate scores
|
||||
toilet_score = self._score_toilet_accessibility(nearby_toilets)
|
||||
space_type = self._map_to_space_type(space_data.get('fclass', ''))
|
||||
|
||||
enhanced_space = {
|
||||
"id": space_data['id'],
|
||||
"name": space_data['name'],
|
||||
"description": f"Berlin {space_data.get('fclass', 'green space')} from local OSM data",
|
||||
"type": space_type,
|
||||
"coordinates": {
|
||||
"lat": float(lat),
|
||||
"lng": float(lng)
|
||||
},
|
||||
"neighborhood": space_data.get('district', 'Unknown'),
|
||||
"area_sqm": area_sqm,
|
||||
"perimeter_m": int(4 * (area_sqm ** 0.5)),
|
||||
|
||||
# Environmental features from real tree data
|
||||
"environmental": {
|
||||
"tree_coverage_percent": max(5, int(tree_response.shade_analysis.estimated_shade_coverage)),
|
||||
"shade_quality": tree_response.shade_analysis.shade_quality_score,
|
||||
"noise_level": self._estimate_noise_level(space_data),
|
||||
"wildlife_diversity_score": tree_response.metrics.species_diversity_score,
|
||||
"water_features": self._detect_water_features(space_data),
|
||||
"natural_surface_percent": self._estimate_natural_surface(space_data.get('fclass', ''))
|
||||
},
|
||||
|
||||
# Real tree metrics
|
||||
"tree_data": {
|
||||
"total_trees": tree_response.metrics.total_trees,
|
||||
"trees_per_hectare": tree_response.metrics.trees_per_hectare,
|
||||
"species_count": len(tree_response.metrics.dominant_species),
|
||||
"species_diversity_score": tree_response.metrics.species_diversity_score,
|
||||
"mature_trees_count": tree_response.metrics.mature_trees_count,
|
||||
"young_trees_count": tree_response.metrics.young_trees_count,
|
||||
"average_tree_age": tree_response.metrics.average_tree_age,
|
||||
"average_height": tree_response.metrics.average_height,
|
||||
"average_crown_diameter": tree_response.metrics.average_crown_diameter,
|
||||
"shade_coverage_percent": tree_response.metrics.shade_coverage_percent,
|
||||
"dominant_species": tree_response.metrics.dominant_species[:3]
|
||||
},
|
||||
|
||||
# Real toilet accessibility
|
||||
"toilet_accessibility": {
|
||||
"nearby_toilets_count": len(nearby_toilets),
|
||||
"accessibility_score": toilet_score,
|
||||
"nearest_distance_m": nearby_toilets[0]['distance_meters'] if nearby_toilets else None,
|
||||
"free_toilets_count": len([t for t in nearby_toilets if t.get('is_free', False)]),
|
||||
"accessible_toilets_count": len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
|
||||
},
|
||||
|
||||
# Standard features
|
||||
"accessibility": {
|
||||
"wheelchair_accessible": True,
|
||||
"public_transport_score": self._estimate_transport_score(space_data.get('district', '')),
|
||||
"cycling_infrastructure": area_sqm > 4000,
|
||||
"parking_availability": 2 if area_sqm > 20000 else 1,
|
||||
"lighting_quality": 3 if 'mitte' in space_data.get('district', '').lower() else 2
|
||||
},
|
||||
|
||||
"recreation": {
|
||||
"playground_quality": self._estimate_playground_quality(space_data),
|
||||
"sports_facilities": self._estimate_sports_facilities(space_data),
|
||||
"running_paths": area_sqm > 6000,
|
||||
"cycling_paths": area_sqm > 12000,
|
||||
"dog_friendly": True,
|
||||
"bbq_allowed": self._allows_bbq(space_data)
|
||||
},
|
||||
|
||||
# OSM metadata
|
||||
"osm_metadata": {
|
||||
"osm_id": space_data.get('osm_id'),
|
||||
"tags": space_data.get('osm_tags', {}),
|
||||
"source": "local_osm_extract"
|
||||
},
|
||||
|
||||
"last_updated": datetime.now().isoformat(),
|
||||
"data_sources": ["local_osm_extract", "berlin_tree_cadastre", "berlin_toilets"],
|
||||
"confidence_score": 92
|
||||
}
|
||||
|
||||
trees = tree_response.metrics.total_trees
|
||||
toilets = len(nearby_toilets)
|
||||
print(f"✅ {space_data['name']}: {trees} trees, {toilets} toilets")
|
||||
|
||||
return enhanced_space
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error enhancing {space_data['name']}: {e}")
|
||||
return None
|
||||
|
||||
def _score_toilet_accessibility(self, nearby_toilets: List[Dict]) -> int:
|
||||
if not nearby_toilets:
|
||||
return 25
|
||||
|
||||
nearest = nearby_toilets[0]['distance_meters']
|
||||
if nearest <= 200:
|
||||
score = 90
|
||||
elif nearest <= 400:
|
||||
score = 70
|
||||
else:
|
||||
score = 50
|
||||
|
||||
# Quality bonuses
|
||||
free = len([t for t in nearby_toilets if t.get('is_free', False)])
|
||||
accessible = len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
|
||||
score += min(10, free * 5 + accessible * 3)
|
||||
|
||||
return min(100, score)
|
||||
|
||||
def _map_to_space_type(self, fclass: str) -> str:
|
||||
mapping = {
|
||||
'park': 'PARK', 'forest': 'FOREST', 'garden': 'GARDEN', 'wood': 'FOREST',
|
||||
'nature_reserve': 'NATURE_RESERVE', 'playground': 'PLAYGROUND',
|
||||
'meadow': 'MEADOW', 'grass': 'GRASS', 'recreation_ground': 'PARK',
|
||||
'common': 'PARK', 'village_green': 'GRASS', 'allotments': 'GARDEN'
|
||||
}
|
||||
return mapping.get(fclass, 'PARK')
|
||||
|
||||
def _detect_water_features(self, space_data: Dict) -> bool:
|
||||
name = space_data.get('name', '').lower()
|
||||
tags = space_data.get('osm_tags', {})
|
||||
|
||||
water_keywords = ['see', 'teich', 'pond', 'lake', 'bach', 'spree', 'wasser']
|
||||
return any(keyword in name for keyword in water_keywords) or 'water' in tags.values()
|
||||
|
||||
def _estimate_noise_level(self, space_data: Dict) -> int:
|
||||
fclass = space_data.get('fclass', '')
|
||||
district = space_data.get('district', '')
|
||||
|
||||
base = {'forest': 1, 'wood': 1, 'nature_reserve': 1, 'meadow': 2,
|
||||
'park': 2, 'garden': 2, 'playground': 3}.get(fclass, 2)
|
||||
|
||||
if any(busy in district.lower() for busy in ['mitte', 'kreuzberg', 'friedrichshain']):
|
||||
base += 1
|
||||
|
||||
return min(5, base)
|
||||
|
||||
def _estimate_natural_surface(self, fclass: str) -> int:
|
||||
return {'forest': 95, 'wood': 95, 'nature_reserve': 90, 'meadow': 95,
|
||||
'grass': 85, 'park': 75, 'garden': 65, 'playground': 40}.get(fclass, 70)
|
||||
|
||||
def _estimate_transport_score(self, district: str) -> int:
|
||||
district_lower = district.lower()
|
||||
if 'mitte' in district_lower:
|
||||
return 5
|
||||
elif any(name in district_lower for name in ['charlottenburg', 'kreuzberg', 'friedrichshain']):
|
||||
return 4
|
||||
else:
|
||||
return 3
|
||||
|
||||
def _estimate_playground_quality(self, space_data: Dict) -> int:
|
||||
fclass = space_data.get('fclass', '')
|
||||
tags = space_data.get('osm_tags', {})
|
||||
|
||||
if fclass == 'playground':
|
||||
return 80
|
||||
elif 'playground' in tags.values():
|
||||
return 75
|
||||
elif fclass == 'park':
|
||||
return 55
|
||||
else:
|
||||
return 30
|
||||
|
||||
def _estimate_sports_facilities(self, space_data: Dict) -> bool:
|
||||
fclass = space_data.get('fclass', '')
|
||||
tags = space_data.get('osm_tags', {})
|
||||
name = space_data.get('name', '').lower()
|
||||
|
||||
return (fclass == 'recreation_ground' or
|
||||
'sport' in str(tags.values()).lower() or
|
||||
any(term in name for term in ['sport', 'football', 'tennis']))
|
||||
|
||||
def _allows_bbq(self, space_data: Dict) -> bool:
|
||||
fclass = space_data.get('fclass', '')
|
||||
area = space_data.get('area_sqm', 0)
|
||||
tags = space_data.get('osm_tags', {})
|
||||
|
||||
# Check explicit BBQ tags
|
||||
if tags.get('bbq') == 'yes':
|
||||
return True
|
||||
elif tags.get('bbq') == 'no':
|
||||
return False
|
||||
|
||||
# Default based on type and size
|
||||
return fclass in ['park', 'recreation_ground'] and area > 5000
|
||||
|
||||
async def process_all_green_spaces(self):
|
||||
"""Main processing pipeline."""
|
||||
print("🌳 Processing Berlin green spaces from local OSM data...")
|
||||
|
||||
# Step 1: Get OSM data
|
||||
try:
|
||||
osm_file = self.download_simple_osm_extract() # More reliable than PBF
|
||||
except:
|
||||
print("❌ Could not download OSM data")
|
||||
return []
|
||||
|
||||
# Step 2: Parse green spaces
|
||||
green_spaces = self.parse_osm_xml(osm_file)
|
||||
|
||||
if not green_spaces:
|
||||
print("❌ No green spaces found in OSM data")
|
||||
return []
|
||||
|
||||
print(f"📊 Found {len(green_spaces)} green spaces to enhance")
|
||||
|
||||
# Step 3: Enhance with real data
|
||||
enhanced_spaces = []
|
||||
|
||||
for i, space_data in enumerate(green_spaces, 1):
|
||||
print(f"[{i}/{len(green_spaces)}]", end=" ")
|
||||
|
||||
result = await self.enhance_green_space_with_real_data(space_data)
|
||||
if result:
|
||||
enhanced_spaces.append(result)
|
||||
|
||||
if i % 20 == 0:
|
||||
print(f"\n Progress: {len(enhanced_spaces)} enhanced so far...")
|
||||
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
print(f"\n✅ Enhanced {len(enhanced_spaces)} spaces with real data!")
|
||||
return enhanced_spaces
|
||||
|
||||
def save_enhanced_data(self, enhanced_spaces: List[Dict]):
|
||||
"""Save the final dataset."""
|
||||
output_file = self.processed_dir / "osm_berlin_green_spaces_enhanced.json"
|
||||
|
||||
# Calculate statistics
|
||||
with_trees = len([s for s in enhanced_spaces if s["tree_data"]["total_trees"] > 0])
|
||||
with_toilets = len([s for s in enhanced_spaces if s["toilet_accessibility"]["nearby_toilets_count"] > 0])
|
||||
total_trees = sum(s["tree_data"]["total_trees"] for s in enhanced_spaces)
|
||||
|
||||
data = {
|
||||
"green_spaces": enhanced_spaces,
|
||||
"total_count": len(enhanced_spaces),
|
||||
"last_updated": datetime.now().isoformat(),
|
||||
"data_sources": [
|
||||
"local_osm_extract_processed_offline",
|
||||
"berlin_tree_cadastre",
|
||||
"berlin_toilets"
|
||||
],
|
||||
"processing_info": {
|
||||
"method": "local_osm_processing_no_api_dependency",
|
||||
"includes_all_osm_green_spaces": True,
|
||||
"enhanced_with_real_berlin_data": True
|
||||
},
|
||||
"summary_stats": {
|
||||
"total_spaces": len(enhanced_spaces),
|
||||
"spaces_with_tree_data": with_trees,
|
||||
"spaces_with_toilet_data": with_toilets,
|
||||
"total_trees_analyzed": total_trees,
|
||||
"tree_coverage": f"{round((with_trees/len(enhanced_spaces))*100, 1)}%",
|
||||
"toilet_coverage": f"{round((with_toilets/len(enhanced_spaces))*100, 1)}%"
|
||||
}
|
||||
}
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"\n🎉 Saved comprehensive dataset: {output_file}")
|
||||
print(f"📊 {len(enhanced_spaces)} total green spaces")
|
||||
print(f"🌲 {with_trees} with tree data, 🚻 {with_toilets} with toilet data")
|
||||
print(f"🌿 {total_trees} total trees analyzed")
|
||||
print(f"\n✨ Ready to replace mock data in your API!")
|
||||
|
||||
return output_file
|
||||
|
||||
|
||||
async def main():
|
||||
processor = LocalOSMProcessor()
|
||||
|
||||
try:
|
||||
print("🚀 Berlin Green Spaces: Local OSM Processing")
|
||||
print("=" * 50)
|
||||
print("• Downloads OSM data once (no API dependency)")
|
||||
print("• Processes locally for all green spaces")
|
||||
print("• Enhances with real Berlin tree + toilet data")
|
||||
print("=" * 50)
|
||||
|
||||
enhanced_spaces = await processor.process_all_green_spaces()
|
||||
|
||||
if enhanced_spaces:
|
||||
processor.save_enhanced_data(enhanced_spaces)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n⚠️ Interrupted")
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
|
@ -0,0 +1,558 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Quick Berlin green spaces processor.
|
||||
Pre-filters OSM data efficiently, then processes only the best candidates.
|
||||
"""
|
||||
|
||||
import json
|
||||
import asyncio
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import sys
|
||||
import re
|
||||
import math
|
||||
# from tqdm.asyncio import tqdm # Not available, remove tqdm dependency
|
||||
from xml.etree.ElementTree import iterparse
|
||||
|
||||
# Add the app directory to Python path
|
||||
sys.path.append(str(Path(__file__).parent.parent))
|
||||
|
||||
from app.services.street_tree_service import StreetTreeService
|
||||
from app.services.berlin_data_service import BerlinDataService
|
||||
|
||||
|
||||
def calculate_polygon_area_sqm(coords):
|
||||
"""Calculate area of a polygon using the Shoelace formula."""
|
||||
if len(coords) < 3:
|
||||
return 5000 # Default for invalid polygons
|
||||
|
||||
# Convert to radians and use spherical approximation for Earth
|
||||
def to_radians(deg):
|
||||
return deg * math.pi / 180
|
||||
|
||||
# Use simple planar approximation for small areas
|
||||
# Convert lat/lng to approximate meters (rough approximation for Berlin area)
|
||||
lat_center = sum(lat for lat, lng in coords) / len(coords)
|
||||
lng_center = sum(lng for lat, lng in coords) / len(coords)
|
||||
|
||||
# Approximate meters per degree at Berlin latitude
|
||||
meters_per_lat = 111320 # roughly constant
|
||||
meters_per_lng = 111320 * math.cos(to_radians(lat_center))
|
||||
|
||||
# Convert coordinates to meters relative to center
|
||||
meter_coords = []
|
||||
for lat, lng in coords:
|
||||
x = (lng - lng_center) * meters_per_lng
|
||||
y = (lat - lat_center) * meters_per_lat
|
||||
meter_coords.append((x, y))
|
||||
|
||||
# Shoelace formula
|
||||
area = 0
|
||||
n = len(meter_coords)
|
||||
for i in range(n):
|
||||
j = (i + 1) % n
|
||||
area += meter_coords[i][0] * meter_coords[j][1]
|
||||
area -= meter_coords[j][0] * meter_coords[i][1]
|
||||
|
||||
area = abs(area) / 2
|
||||
|
||||
# Reasonable bounds check
|
||||
if area < 100: # Too small
|
||||
return 5000
|
||||
elif area > 10000000: # Too large (10 km²)
|
||||
return 500000 # Cap at reasonable park size
|
||||
|
||||
return int(area)
|
||||
|
||||
|
||||
def calculate_search_radius(area_sqm):
|
||||
"""Calculate appropriate tree search radius based on park area."""
|
||||
if area_sqm < 10000: # < 1 hectare
|
||||
return 150
|
||||
elif area_sqm < 50000: # < 5 hectares
|
||||
return 300
|
||||
elif area_sqm < 200000: # < 20 hectares
|
||||
return 500
|
||||
else: # Large parks like Treptower Park
|
||||
return 800
|
||||
|
||||
|
||||
def calculate_enhanced_shade_quality(tree_response, area_sqm):
|
||||
"""Calculate enhanced shade quality based on real tree characteristics."""
|
||||
metrics = tree_response.metrics
|
||||
shade_analysis = tree_response.shade_analysis
|
||||
|
||||
# Base score from tree density and coverage
|
||||
base_score = 0
|
||||
|
||||
# Factor 1: Actual shade coverage (crown area based)
|
||||
coverage = metrics.shade_coverage_percent or 0
|
||||
if coverage >= 60:
|
||||
base_score += 40
|
||||
elif coverage >= 40:
|
||||
base_score += 30
|
||||
elif coverage >= 20:
|
||||
base_score += 20
|
||||
elif coverage >= 10:
|
||||
base_score += 10
|
||||
|
||||
# Factor 2: Large mature trees (better shade)
|
||||
large_trees = len(shade_analysis.nearby_large_trees or [])
|
||||
if large_trees >= 10:
|
||||
base_score += 25
|
||||
elif large_trees >= 5:
|
||||
base_score += 20
|
||||
elif large_trees >= 3:
|
||||
base_score += 15
|
||||
elif large_trees >= 1:
|
||||
base_score += 10
|
||||
|
||||
# Factor 3: Tree density per area
|
||||
trees_per_hectare = metrics.trees_per_hectare or 0
|
||||
if trees_per_hectare >= 50:
|
||||
base_score += 20
|
||||
elif trees_per_hectare >= 30:
|
||||
base_score += 15
|
||||
elif trees_per_hectare >= 20:
|
||||
base_score += 10
|
||||
elif trees_per_hectare >= 10:
|
||||
base_score += 5
|
||||
|
||||
# Factor 4: Average tree height (taller = better shade)
|
||||
avg_height = metrics.average_height or 0
|
||||
if avg_height >= 20:
|
||||
base_score += 10
|
||||
elif avg_height >= 15:
|
||||
base_score += 8
|
||||
elif avg_height >= 10:
|
||||
base_score += 5
|
||||
elif avg_height >= 5:
|
||||
base_score += 3
|
||||
|
||||
# Factor 5: Crown diameter quality
|
||||
avg_crown = metrics.average_crown_diameter or 0
|
||||
if avg_crown >= 12:
|
||||
base_score += 5
|
||||
elif avg_crown >= 8:
|
||||
base_score += 3
|
||||
elif avg_crown >= 5:
|
||||
base_score += 1
|
||||
|
||||
return min(100, base_score)
|
||||
|
||||
|
||||
def detect_water_features(candidate):
|
||||
"""Detect water features using OSM tags and name analysis."""
|
||||
tags = candidate.get('tags', {})
|
||||
name = candidate.get('name', '').lower()
|
||||
|
||||
# Check OSM water-related tags
|
||||
water_tags = ['water', 'waterway', 'natural']
|
||||
has_water_tags = any(
|
||||
tags.get(tag, '').lower() in ['water', 'lake', 'pond', 'reservoir', 'river', 'stream']
|
||||
for tag in water_tags
|
||||
)
|
||||
|
||||
# Check name for water indicators
|
||||
water_names = ['see', 'teich', 'weiher', 'water', 'lake', 'pond', 'fluss', 'river', 'bach', 'creek']
|
||||
has_water_name = any(water_word in name for water_word in water_names)
|
||||
|
||||
# Check for fountain/brunnen
|
||||
fountain_indicators = ['brunnen', 'fountain', 'springbrunnen']
|
||||
has_fountain = any(fountain in name for fountain in fountain_indicators)
|
||||
|
||||
return has_water_tags or has_water_name or has_fountain
|
||||
|
||||
|
||||
def estimate_berlin_district(lat: float, lng: float) -> str:
|
||||
"""Estimate Berlin district from coordinates using geographic boundaries."""
|
||||
# Northern districts
|
||||
if lat > 52.55:
|
||||
if lng < 13.25:
|
||||
return "Reinickendorf"
|
||||
elif lng < 13.45:
|
||||
return "Pankow"
|
||||
else:
|
||||
return "Lichtenberg"
|
||||
# Central-north districts
|
||||
elif lat > 52.52:
|
||||
if lng < 13.20:
|
||||
return "Spandau"
|
||||
elif lng < 13.30:
|
||||
return "Charlottenburg-Wilmersdorf"
|
||||
elif lng < 13.42:
|
||||
return "Mitte"
|
||||
elif lng < 13.48:
|
||||
return "Friedrichshain-Kreuzberg"
|
||||
else:
|
||||
return "Lichtenberg"
|
||||
# Central districts
|
||||
elif lat > 52.48:
|
||||
if lng < 13.20:
|
||||
return "Spandau"
|
||||
elif lng < 13.30:
|
||||
return "Charlottenburg-Wilmersdorf"
|
||||
elif lng < 13.35:
|
||||
return "Tempelhof-Schöneberg"
|
||||
elif lng < 13.42:
|
||||
return "Mitte"
|
||||
elif lng < 13.48:
|
||||
return "Friedrichshain-Kreuzberg"
|
||||
else:
|
||||
return "Lichtenberg"
|
||||
# Southern-central districts
|
||||
elif lat > 52.45:
|
||||
if lng < 13.20:
|
||||
return "Steglitz-Zehlendorf"
|
||||
elif lng < 13.35:
|
||||
return "Tempelhof-Schöneberg"
|
||||
elif lng < 13.45:
|
||||
return "Neukölln"
|
||||
elif lng < 13.55:
|
||||
return "Treptow-Köpenick"
|
||||
else:
|
||||
return "Marzahn-Hellersdorf"
|
||||
# Southern districts
|
||||
else:
|
||||
if lng < 13.35:
|
||||
return "Steglitz-Zehlendorf"
|
||||
else:
|
||||
return "Treptow-Köpenick"
|
||||
|
||||
|
||||
def get_specific_neighborhood(district: str, lat: float, lng: float) -> str:
|
||||
"""Get specific neighborhood within district based on coordinates."""
|
||||
neighborhoods = {
|
||||
"Mitte": {
|
||||
(52.540, 52.560, 13.33, 13.38): "Wedding",
|
||||
(52.515, 52.530, 13.33, 13.38): "Moabit",
|
||||
(52.510, 52.520, 13.35, 13.38): "Tiergarten",
|
||||
(52.525, 52.545, 13.40, 13.43): "Prenzlauer Berg"
|
||||
},
|
||||
"Charlottenburg-Wilmersdorf": {
|
||||
(52.485, 52.505, 13.30, 13.33): "Wilmersdorf",
|
||||
(52.505, 52.525, 13.25, 13.33): "Charlottenburg"
|
||||
},
|
||||
"Friedrichshain-Kreuzberg": {
|
||||
(52.490, 52.510, 13.38, 13.42): "Kreuzberg",
|
||||
(52.510, 52.525, 13.42, 13.48): "Friedrichshain"
|
||||
},
|
||||
"Tempelhof-Schöneberg": {
|
||||
(52.480, 52.500, 13.33, 13.37): "Schöneberg",
|
||||
(52.460, 52.480, 13.37, 13.42): "Tempelhof"
|
||||
},
|
||||
"Steglitz-Zehlendorf": {
|
||||
(52.430, 52.450, 13.23, 13.30): "Zehlendorf",
|
||||
(52.450, 52.470, 13.30, 13.35): "Steglitz"
|
||||
},
|
||||
"Treptow-Köpenick": {
|
||||
(52.430, 52.460, 13.55, 13.65): "Köpenick",
|
||||
(52.480, 52.500, 13.45, 13.50): "Treptow"
|
||||
}
|
||||
}
|
||||
|
||||
if district in neighborhoods:
|
||||
for (min_lat, max_lat, min_lng, max_lng), neighborhood in neighborhoods[district].items():
|
||||
if min_lat <= lat <= max_lat and min_lng <= lng <= max_lng:
|
||||
return neighborhood
|
||||
|
||||
return district
|
||||
|
||||
|
||||
async def quick_process():
|
||||
"""Quick processing of significant Berlin green spaces."""
|
||||
print("🚀 Quick Berlin Green Spaces Processor")
|
||||
print("=" * 45)
|
||||
|
||||
# Initialize services
|
||||
tree_service = StreetTreeService()
|
||||
berlin_data = BerlinDataService()
|
||||
|
||||
# Pre-load and index trees once to avoid repeated indexing
|
||||
print("🔄 Pre-loading tree data and building spatial index...")
|
||||
await tree_service._load_trees()
|
||||
|
||||
osm_file = Path("app/data/osm-raw/berlin_green_spaces.osm")
|
||||
|
||||
if not osm_file.exists():
|
||||
print("❌ OSM file not found. Please ensure data is downloaded.")
|
||||
return
|
||||
|
||||
print("🔍 Quick filtering for named parks and significant areas...")
|
||||
print(f"📁 OSM file size: {osm_file.stat().st_size / (1024*1024):.1f} MB")
|
||||
|
||||
# Quick scan for good candidates
|
||||
candidates = []
|
||||
|
||||
try:
|
||||
processed = 0
|
||||
|
||||
print("🔍 Single-pass XML parsing - ways with embedded coordinates...")
|
||||
|
||||
# Single pass: parse ways with embedded coordinates
|
||||
ways_processed = 0
|
||||
current_way_tags = {}
|
||||
current_way_coordinates = []
|
||||
in_way = False
|
||||
|
||||
for event, elem in iterparse(osm_file, events=('start', 'end')):
|
||||
if event == 'start':
|
||||
if elem.tag == 'way':
|
||||
in_way = True
|
||||
current_way_tags = {}
|
||||
current_way_coordinates = []
|
||||
ways_processed += 1
|
||||
if ways_processed % 1000 == 0:
|
||||
print(f"Processed {ways_processed} ways, found {len(candidates)} candidates so far...")
|
||||
elif in_way and elem.tag == 'tag':
|
||||
k = elem.get('k')
|
||||
v = elem.get('v')
|
||||
if k and v:
|
||||
current_way_tags[k] = v
|
||||
elif in_way and elem.tag == 'nd':
|
||||
# Extract coordinates directly from nd element
|
||||
lat = elem.get('lat')
|
||||
lon = elem.get('lon')
|
||||
if lat and lon:
|
||||
current_way_coordinates.append((float(lat), float(lon)))
|
||||
continue
|
||||
|
||||
if elem.tag == 'way' and in_way:
|
||||
in_way = False
|
||||
tags = current_way_tags
|
||||
coordinates = current_way_coordinates
|
||||
|
||||
# Quick filters for promising spaces - be more lenient
|
||||
has_name = 'name' in tags
|
||||
is_park = (tags.get('leisure') in ['park', 'garden', 'nature_reserve'] or
|
||||
tags.get('landuse') in ['forest', 'grass', 'recreation_ground'])
|
||||
|
||||
# Also accept common green space tags
|
||||
has_green_tags = any(key in tags for key in ['leisure', 'landuse', 'natural', 'amenity'])
|
||||
|
||||
if not (has_name or is_park or has_green_tags):
|
||||
elem.clear() # Free memory
|
||||
continue
|
||||
|
||||
# Use embedded coordinates directly
|
||||
if not coordinates:
|
||||
elem.clear() # Free memory
|
||||
continue
|
||||
|
||||
# Get center coordinate and all coordinates for area calculation
|
||||
lat, lng = coordinates[0] if len(coordinates) == 1 else (
|
||||
sum(lat for lat, lng in coordinates) / len(coordinates),
|
||||
sum(lng for lat, lng in coordinates) / len(coordinates)
|
||||
)
|
||||
|
||||
# Basic Berlin bounds check
|
||||
if not (52.3 <= lat <= 52.7 and 13.0 <= lng <= 13.8):
|
||||
elem.clear() # Free memory
|
||||
continue
|
||||
|
||||
name = tags.get('name', f"Unnamed {tags.get('leisure', tags.get('landuse', 'area'))}")
|
||||
space_type = tags.get('leisure') or tags.get('landuse') or 'park'
|
||||
|
||||
candidate = {
|
||||
'id': f"quick_{elem.get('id')}",
|
||||
'name': name,
|
||||
'type': space_type,
|
||||
'lat': lat,
|
||||
'lng': lng,
|
||||
'has_name': has_name,
|
||||
'tags': tags,
|
||||
'coordinates': coordinates # Store all coordinates for area calculation
|
||||
}
|
||||
|
||||
candidates.append(candidate)
|
||||
processed += 1
|
||||
|
||||
# Limit for quick processing
|
||||
if len(candidates) >= 100:
|
||||
elem.clear() # Free memory
|
||||
break
|
||||
|
||||
elem.clear() # Free memory
|
||||
else:
|
||||
elem.clear() # Free memory
|
||||
|
||||
print(f"✅ Found {len(candidates)} promising green spaces")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error in quick filtering: {e}")
|
||||
return
|
||||
|
||||
if not candidates:
|
||||
print("No candidates found")
|
||||
return
|
||||
|
||||
# Sort by having names (better quality)
|
||||
candidates.sort(key=lambda x: x['has_name'], reverse=True)
|
||||
|
||||
print(f"\n🔧 Enhancing top {len(candidates)} spaces with real data...")
|
||||
|
||||
# Process candidates in parallel with batching
|
||||
batch_size = 10 # Process 10 candidates at a time
|
||||
enhanced_spaces = []
|
||||
|
||||
async def process_candidate(candidate):
|
||||
"""Process a single candidate with tree and toilet data."""
|
||||
try:
|
||||
# Calculate actual area from OSM polygon coordinates
|
||||
area_sqm = calculate_polygon_area_sqm(candidate.get('coordinates', []))
|
||||
search_radius = calculate_search_radius(area_sqm)
|
||||
|
||||
# Get real tree data and toilet data concurrently with dynamic radius
|
||||
tree_task = tree_service.get_trees_near_location(
|
||||
candidate['lat'], candidate['lng'], radius_m=search_radius
|
||||
)
|
||||
toilet_task = berlin_data.get_toilets_near_point(
|
||||
candidate['lat'], candidate['lng'], 500
|
||||
)
|
||||
|
||||
print(f"🔍 Getting data for {candidate['name'][:30]}... (area: {area_sqm/10000:.1f}ha, radius: {search_radius}m)")
|
||||
tree_response, nearby_toilets = await asyncio.gather(tree_task, toilet_task)
|
||||
|
||||
# Create enhanced space
|
||||
enhanced_space = {
|
||||
"id": candidate['id'],
|
||||
"name": candidate['name'],
|
||||
"description": f"Berlin {candidate['type']} discovered via quick OSM processing",
|
||||
"type": "PARK", # Simplified for now
|
||||
"coordinates": {
|
||||
"lat": candidate['lat'],
|
||||
"lng": candidate['lng']
|
||||
},
|
||||
"neighborhood": get_specific_neighborhood(estimate_berlin_district(candidate['lat'], candidate['lng']), candidate['lat'], candidate['lng']),
|
||||
"area_sqm": area_sqm, # Real calculated area
|
||||
|
||||
# Environmental features from real tree data
|
||||
"environmental": {
|
||||
"tree_coverage_percent": max(5, int(tree_response.metrics.shade_coverage_percent)), # Use actual crown area calculation
|
||||
"shade_quality": calculate_enhanced_shade_quality(tree_response, area_sqm),
|
||||
"noise_level": 2, # Default
|
||||
"wildlife_diversity_score": tree_response.metrics.species_diversity_score,
|
||||
"water_features": detect_water_features(candidate),
|
||||
"natural_surface_percent": 80
|
||||
},
|
||||
|
||||
# Real tree data
|
||||
"tree_data": {
|
||||
"total_trees": tree_response.metrics.total_trees,
|
||||
"trees_per_hectare": tree_response.metrics.trees_per_hectare,
|
||||
"species_count": len(tree_response.metrics.dominant_species),
|
||||
"species_diversity_score": tree_response.metrics.species_diversity_score,
|
||||
"mature_trees_count": tree_response.metrics.mature_trees_count,
|
||||
"young_trees_count": tree_response.metrics.young_trees_count,
|
||||
"average_tree_age": tree_response.metrics.average_tree_age,
|
||||
"average_height": tree_response.metrics.average_height,
|
||||
"average_crown_diameter": tree_response.metrics.average_crown_diameter,
|
||||
"shade_coverage_percent": tree_response.metrics.shade_coverage_percent,
|
||||
"dominant_species": tree_response.metrics.dominant_species[:3]
|
||||
},
|
||||
|
||||
# Real toilet data
|
||||
"toilet_accessibility": {
|
||||
"nearby_toilets_count": len(nearby_toilets),
|
||||
"accessibility_score": 80 if nearby_toilets else 30,
|
||||
"nearest_distance_m": nearby_toilets[0]['distance_meters'] if nearby_toilets else None,
|
||||
"free_toilets_count": len([t for t in nearby_toilets if t.get('is_free', False)]),
|
||||
"accessible_toilets_count": len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
|
||||
},
|
||||
|
||||
# Standard features
|
||||
"accessibility": {
|
||||
"wheelchair_accessible": True,
|
||||
"public_transport_score": 3,
|
||||
"cycling_infrastructure": True,
|
||||
"parking_availability": 2,
|
||||
"lighting_quality": 3
|
||||
},
|
||||
|
||||
"recreation": {
|
||||
"playground_quality": 60 if candidate['type'] == 'park' else 30,
|
||||
"sports_facilities": candidate['type'] == 'recreation_ground',
|
||||
"running_paths": True,
|
||||
"cycling_paths": True,
|
||||
"dog_friendly": True,
|
||||
"bbq_allowed": candidate['type'] in ['park', 'recreation_ground']
|
||||
},
|
||||
|
||||
"osm_metadata": {
|
||||
"has_official_name": candidate['has_name'],
|
||||
"tags": candidate['tags'],
|
||||
"source": "quick_osm_processing"
|
||||
},
|
||||
|
||||
"last_updated": datetime.now().isoformat(),
|
||||
"data_sources": ["quick_osm_scan", "berlin_tree_cadastre", "berlin_toilets"],
|
||||
"confidence_score": 90 if candidate['has_name'] else 75
|
||||
}
|
||||
|
||||
return enhanced_space, tree_response.metrics.total_trees, len(nearby_toilets)
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error processing {candidate['name']}: {e}")
|
||||
return None, 0, 0
|
||||
|
||||
# Process candidates in batches with progress bar
|
||||
for i in range(0, len(candidates), batch_size):
|
||||
batch = candidates[i:i + batch_size]
|
||||
print(f"Processing batch {i//batch_size + 1}/{(len(candidates) + batch_size - 1)//batch_size}")
|
||||
|
||||
# Process batch concurrently with progress bar
|
||||
tasks = [process_candidate(candidate) for candidate in batch]
|
||||
results = await asyncio.gather(*tasks)
|
||||
|
||||
# Collect results
|
||||
for result, trees, toilets in results:
|
||||
if result:
|
||||
enhanced_spaces.append(result)
|
||||
print(f"✅ {result['name'][:40]:40} - {trees:3d} trees, {toilets} toilets")
|
||||
|
||||
# Small delay between batches to be respectful to APIs
|
||||
if i + batch_size < len(candidates):
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Save results
|
||||
output_file = Path("app/data/processed/quick_berlin_green_spaces.json")
|
||||
|
||||
with_trees = len([s for s in enhanced_spaces if s["tree_data"]["total_trees"] > 0])
|
||||
with_toilets = len([s for s in enhanced_spaces if s["toilet_accessibility"]["nearby_toilets_count"] > 0])
|
||||
total_trees = sum(s["tree_data"]["total_trees"] for s in enhanced_spaces)
|
||||
|
||||
data = {
|
||||
"green_spaces": enhanced_spaces,
|
||||
"total_count": len(enhanced_spaces),
|
||||
"last_updated": datetime.now().isoformat(),
|
||||
"data_sources": ["quick_osm_processing", "berlin_tree_cadastre", "berlin_toilets"],
|
||||
"processing_info": {
|
||||
"method": "quick_scan_for_named_and_significant_spaces",
|
||||
"prioritizes_named_spaces": True,
|
||||
"enhanced_with_real_berlin_data": True
|
||||
},
|
||||
"summary_stats": {
|
||||
"total_spaces": len(enhanced_spaces),
|
||||
"spaces_with_tree_data": with_trees,
|
||||
"spaces_with_toilet_data": with_toilets,
|
||||
"total_trees_analyzed": total_trees,
|
||||
"tree_coverage": f"{round((with_trees/len(enhanced_spaces))*100, 1)}%" if enhanced_spaces else "0%",
|
||||
"toilet_coverage": f"{round((with_toilets/len(enhanced_spaces))*100, 1)}%" if enhanced_spaces else "0%"
|
||||
}
|
||||
}
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"\n🎉 Quick processing complete!")
|
||||
print(f"📁 Saved: {output_file}")
|
||||
print(f"📊 {len(enhanced_spaces)} spaces enhanced")
|
||||
print(f"🌲 {with_trees} with tree data, 🚻 {with_toilets} with toilet data")
|
||||
print(f"🌿 {total_trees} total trees analyzed")
|
||||
print(f"\n✨ Ready to use! This gives you real Berlin green spaces")
|
||||
print(f" with actual tree and toilet data for personality scoring!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(quick_process())
|
|
@ -0,0 +1,169 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test OSM processing with a small sample to verify it works.
|
||||
"""
|
||||
|
||||
import json
|
||||
import asyncio
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import sys
|
||||
import math
|
||||
|
||||
# Add the app directory to Python path
|
||||
sys.path.append(str(Path(__file__).parent.parent))
|
||||
|
||||
from app.services.street_tree_service import StreetTreeService
|
||||
from app.services.berlin_data_service import BerlinDataService
|
||||
|
||||
|
||||
async def test_processing():
|
||||
"""Test the processing with a small sample."""
|
||||
print("🧪 Testing OSM processing with sample data...")
|
||||
|
||||
# Initialize services
|
||||
tree_service = StreetTreeService()
|
||||
berlin_data = BerlinDataService()
|
||||
|
||||
# Parse OSM file and get first 5 green spaces as test
|
||||
osm_file = Path("app/data/osm-raw/berlin_green_spaces.osm")
|
||||
|
||||
if not osm_file.exists():
|
||||
print("❌ OSM file not found")
|
||||
return
|
||||
|
||||
tree = ET.parse(osm_file)
|
||||
root = tree.getroot()
|
||||
ways = root.findall('.//way')
|
||||
|
||||
print(f"📊 Found {len(ways)} total ways in OSM file")
|
||||
|
||||
# Process first 5 green spaces as test
|
||||
sample_spaces = []
|
||||
processed_count = 0
|
||||
|
||||
for way in ways:
|
||||
if processed_count >= 5:
|
||||
break
|
||||
|
||||
# Get tags
|
||||
tags = {}
|
||||
for tag in way.findall('tag'):
|
||||
tags[tag.get('k')] = tag.get('v')
|
||||
|
||||
# Check if it's a green space
|
||||
green_space_type = None
|
||||
leisure = tags.get('leisure', '')
|
||||
landuse = tags.get('landuse', '')
|
||||
natural = tags.get('natural', '')
|
||||
|
||||
if leisure in ['park', 'garden', 'nature_reserve']:
|
||||
green_space_type = leisure
|
||||
elif landuse in ['forest', 'grass', 'park']:
|
||||
green_space_type = landuse
|
||||
elif natural in ['forest', 'wood']:
|
||||
green_space_type = natural
|
||||
|
||||
if not green_space_type:
|
||||
continue
|
||||
|
||||
# Get coordinates from first and last node to estimate center
|
||||
nd_refs = [nd.get('ref') for nd in way.findall('nd')]
|
||||
if len(nd_refs) < 3:
|
||||
continue
|
||||
|
||||
# Find first node coordinates
|
||||
first_node = root.find(f".//node[@id='{nd_refs[0]}']")
|
||||
if first_node is None:
|
||||
continue
|
||||
|
||||
lat = float(first_node.get('lat'))
|
||||
lng = float(first_node.get('lon'))
|
||||
|
||||
# Simple space data
|
||||
space_data = {
|
||||
'id': f"test_{way.get('id')}",
|
||||
'name': tags.get('name', f"Test {green_space_type} {processed_count + 1}"),
|
||||
'fclass': green_space_type,
|
||||
'lat': lat,
|
||||
'lng': lng,
|
||||
'area_sqm': 5000, # Default for test
|
||||
'district': 'Test District'
|
||||
}
|
||||
|
||||
sample_spaces.append(space_data)
|
||||
processed_count += 1
|
||||
|
||||
print(f"🌳 Testing with {len(sample_spaces)} sample green spaces...")
|
||||
|
||||
# Test enhancement with real data
|
||||
enhanced_spaces = []
|
||||
|
||||
for i, space_data in enumerate(sample_spaces, 1):
|
||||
print(f"\n[{i}/{len(sample_spaces)}] Testing {space_data['name']}...")
|
||||
|
||||
try:
|
||||
# Get real tree data
|
||||
tree_response = await tree_service.get_trees_near_location(
|
||||
space_data['lat'], space_data['lng'], radius_m=200
|
||||
)
|
||||
|
||||
# Get real toilet data
|
||||
nearby_toilets = await berlin_data.get_toilets_near_point(
|
||||
space_data['lat'], space_data['lng'], 500
|
||||
)
|
||||
|
||||
# Create enhanced data
|
||||
enhanced_space = {
|
||||
"id": space_data['id'],
|
||||
"name": space_data['name'],
|
||||
"type": "PARK",
|
||||
"coordinates": {
|
||||
"lat": space_data['lat'],
|
||||
"lng": space_data['lng']
|
||||
},
|
||||
"tree_data": {
|
||||
"total_trees": tree_response.metrics.total_trees,
|
||||
"species_count": len(tree_response.metrics.dominant_species),
|
||||
"dominant_species": tree_response.metrics.dominant_species
|
||||
},
|
||||
"toilet_accessibility": {
|
||||
"nearby_toilets_count": len(nearby_toilets),
|
||||
"nearest_distance_m": nearby_toilets[0]['distance_meters'] if nearby_toilets else None
|
||||
}
|
||||
}
|
||||
|
||||
enhanced_spaces.append(enhanced_space)
|
||||
|
||||
trees = tree_response.metrics.total_trees
|
||||
toilets = len(nearby_toilets)
|
||||
print(f"✅ Success: {trees} trees, {toilets} toilets nearby")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
|
||||
# Save test results
|
||||
output_file = Path("app/data/processed/test_green_spaces.json")
|
||||
|
||||
test_data = {
|
||||
"test_results": enhanced_spaces,
|
||||
"total_tested": len(enhanced_spaces),
|
||||
"osm_ways_available": len(ways),
|
||||
"processing_successful": True,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump(test_data, f, indent=2)
|
||||
|
||||
print(f"\n🎉 Test completed successfully!")
|
||||
print(f"📁 Test results saved: {output_file}")
|
||||
print(f"📊 Enhanced {len(enhanced_spaces)} sample spaces")
|
||||
print(f"💡 Ready to process all {len(ways)} green spaces!")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_processing())
|
Loading…
Reference in New Issue