Add real green space data and neighborhood filtering
This commit is contained in:
parent
c14f5ead38
commit
49e3d8c29d
File diff suppressed because it is too large
Load Diff
|
@ -185,3 +185,142 @@ async def get_current_conditions(
|
||||||
return conditions
|
return conditions
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise HTTPException(status_code=500, detail=f"Failed to get conditions: {str(e)}")
|
raise HTTPException(status_code=500, detail=f"Failed to get conditions: {str(e)}")
|
||||||
|
|
||||||
|
@router.get("/all")
|
||||||
|
async def get_all_green_spaces(
|
||||||
|
personality: Optional[PersonalityType] = Query(None, description="Personality type for scoring"),
|
||||||
|
min_score: int = Query(0, ge=0, le=100, description="Minimum personality score (only applies if personality is provided)"),
|
||||||
|
limit: int = Query(50, ge=1, le=200, description="Maximum results"),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Get all available green spaces in Berlin.
|
||||||
|
|
||||||
|
Optionally score them for a specific personality type.
|
||||||
|
Perfect for frontend dropdowns or full dataset access.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get all green spaces
|
||||||
|
all_spaces = await berlin_data.search_green_spaces()
|
||||||
|
|
||||||
|
# If personality is specified, score and filter
|
||||||
|
if personality:
|
||||||
|
scored_spaces = []
|
||||||
|
for space in all_spaces:
|
||||||
|
personality_score = await green_space_service.scoring_engine.score_green_space(
|
||||||
|
space, personality.value
|
||||||
|
)
|
||||||
|
|
||||||
|
if personality_score.score >= min_score:
|
||||||
|
space.current_personality_score = personality_score
|
||||||
|
scored_spaces.append(space)
|
||||||
|
|
||||||
|
# Sort by score (highest first)
|
||||||
|
scored_spaces.sort(
|
||||||
|
key=lambda x: x.current_personality_score.score if x.current_personality_score else 0,
|
||||||
|
reverse=True
|
||||||
|
)
|
||||||
|
all_spaces = scored_spaces
|
||||||
|
|
||||||
|
# Apply limit
|
||||||
|
limited_spaces = all_spaces[:limit]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"green_spaces": limited_spaces,
|
||||||
|
"total_available": len(all_spaces),
|
||||||
|
"returned_count": len(limited_spaces),
|
||||||
|
"personality": personality.value if personality else None,
|
||||||
|
"min_score_applied": min_score if personality else None
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Failed to get green spaces: {str(e)}")
|
||||||
|
|
||||||
|
@router.get("/recommendations/{personality}")
|
||||||
|
async def get_personality_recommendations(
|
||||||
|
personality: PersonalityType,
|
||||||
|
limit: int = Query(20, ge=1, le=50, description="Number of recommendations"),
|
||||||
|
neighborhood: Optional[str] = Query(None, description="Preferred neighborhood"),
|
||||||
|
min_score: int = Query(70, ge=50, le=100, description="Minimum personality score"),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Get personalized green space recommendations.
|
||||||
|
|
||||||
|
Returns the best green spaces for a specific personality type,
|
||||||
|
with explanations of why each space is recommended.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get all green spaces
|
||||||
|
all_spaces = await berlin_data.search_green_spaces(neighborhood=neighborhood)
|
||||||
|
|
||||||
|
# Score and rank for personality
|
||||||
|
recommendations = []
|
||||||
|
for space in all_spaces:
|
||||||
|
personality_score = await green_space_service.scoring_engine.score_green_space(
|
||||||
|
space, personality.value
|
||||||
|
)
|
||||||
|
|
||||||
|
if personality_score.score >= min_score:
|
||||||
|
space.current_personality_score = personality_score
|
||||||
|
|
||||||
|
# Get additional insights
|
||||||
|
best_features = []
|
||||||
|
if space.environmental.tree_coverage_percent > 70:
|
||||||
|
best_features.append("Excellent tree coverage")
|
||||||
|
if space.environmental.water_features:
|
||||||
|
best_features.append("Water features")
|
||||||
|
if space.recreation.playground_quality > 60:
|
||||||
|
best_features.append("Good playground facilities")
|
||||||
|
if space.recreation.sports_facilities:
|
||||||
|
best_features.append("Sports facilities")
|
||||||
|
if space.environmental.noise_level.value <= 2:
|
||||||
|
best_features.append("Peaceful atmosphere")
|
||||||
|
|
||||||
|
recommendation = {
|
||||||
|
"green_space": space,
|
||||||
|
"score": personality_score.score,
|
||||||
|
"explanation": personality_score.explanation,
|
||||||
|
"best_features": best_features[:3], # Top 3 features
|
||||||
|
"visit_recommendation": _get_visit_recommendation(space, personality.value)
|
||||||
|
}
|
||||||
|
recommendations.append(recommendation)
|
||||||
|
|
||||||
|
# Sort by score
|
||||||
|
recommendations.sort(key=lambda x: x["score"], reverse=True)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"recommendations": recommendations[:limit],
|
||||||
|
"personality": personality.value,
|
||||||
|
"total_matches": len(recommendations),
|
||||||
|
"search_filters": {
|
||||||
|
"neighborhood": neighborhood,
|
||||||
|
"min_score": min_score
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=f"Failed to get recommendations: {str(e)}")
|
||||||
|
|
||||||
|
def _get_visit_recommendation(space, personality: str) -> str:
|
||||||
|
"""Get a personalized visit recommendation"""
|
||||||
|
if personality == "little_adventurers":
|
||||||
|
if space.recreation.playground_quality > 60:
|
||||||
|
return "Perfect for family adventures with great playground facilities"
|
||||||
|
return "Great for exploring with kids"
|
||||||
|
elif personality == "date_night":
|
||||||
|
if space.environmental.noise_level.value <= 2:
|
||||||
|
return "Romantic and peaceful setting for couples"
|
||||||
|
return "Nice atmosphere for a romantic stroll"
|
||||||
|
elif personality == "zen_masters":
|
||||||
|
if space.environmental.tree_coverage_percent > 70:
|
||||||
|
return "Ideal for peaceful meditation under the trees"
|
||||||
|
return "Perfect for quiet contemplation"
|
||||||
|
elif personality == "active_lifestyle":
|
||||||
|
if space.recreation.sports_facilities:
|
||||||
|
return "Great for workouts and active recreation"
|
||||||
|
return "Perfect for running and outdoor activities"
|
||||||
|
elif personality == "wildlife_lover":
|
||||||
|
if space.environmental.wildlife_diversity_score > 70:
|
||||||
|
return "Excellent biodiversity for nature observation"
|
||||||
|
return "Good spot for wildlife watching"
|
||||||
|
else:
|
||||||
|
return "Highly recommended for your personality type"
|
||||||
|
|
|
@ -20,6 +20,7 @@ class BerlinDataService:
|
||||||
self.cache = {}
|
self.cache = {}
|
||||||
self.last_refresh = None
|
self.last_refresh = None
|
||||||
self._toilets_cache = None
|
self._toilets_cache = None
|
||||||
|
self._green_spaces_cache = None
|
||||||
self._street_trees_index = None
|
self._street_trees_index = None
|
||||||
self.data_dir = Path("app/data")
|
self.data_dir = Path("app/data")
|
||||||
self.street_tree_service = StreetTreeService()
|
self.street_tree_service = StreetTreeService()
|
||||||
|
@ -46,9 +47,16 @@ class BerlinDataService:
|
||||||
if distance > radius:
|
if distance > radius:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Apply neighborhood filter
|
# Apply neighborhood filter with flexible matching
|
||||||
if neighborhood and space.neighborhood.lower() != neighborhood.lower():
|
if neighborhood:
|
||||||
continue
|
neighborhood_lower = neighborhood.lower()
|
||||||
|
space_neighborhood_lower = space.neighborhood.lower()
|
||||||
|
|
||||||
|
# Check for exact match or partial match (useful for compound neighborhood names)
|
||||||
|
if (neighborhood_lower != space_neighborhood_lower and
|
||||||
|
neighborhood_lower not in space_neighborhood_lower and
|
||||||
|
space_neighborhood_lower not in neighborhood_lower):
|
||||||
|
continue
|
||||||
|
|
||||||
# Apply other filters
|
# Apply other filters
|
||||||
if filters:
|
if filters:
|
||||||
|
@ -214,43 +222,60 @@ class BerlinDataService:
|
||||||
|
|
||||||
async def get_neighborhood_stats(self) -> Dict[str, Any]:
|
async def get_neighborhood_stats(self) -> Dict[str, Any]:
|
||||||
"""Get statistics for Berlin neighborhoods."""
|
"""Get statistics for Berlin neighborhoods."""
|
||||||
return {
|
# Get all green spaces to calculate real neighborhood stats
|
||||||
"neighborhoods": [
|
green_spaces = await self._get_mock_green_spaces()
|
||||||
{
|
|
||||||
"name": "mitte",
|
# Count green spaces per neighborhood
|
||||||
"display_name": "Mitte",
|
neighborhood_counts = {}
|
||||||
"green_space_count": 15,
|
neighborhood_spaces = {}
|
||||||
"avg_personality_scores": {
|
|
||||||
"little_adventurers": 75,
|
for space in green_spaces:
|
||||||
"date_night": 80,
|
neighborhood = space.neighborhood
|
||||||
"squad_goals": 70,
|
if neighborhood not in neighborhood_counts:
|
||||||
"zen_masters": 65
|
neighborhood_counts[neighborhood] = 0
|
||||||
}
|
neighborhood_spaces[neighborhood] = []
|
||||||
},
|
neighborhood_counts[neighborhood] += 1
|
||||||
{
|
neighborhood_spaces[neighborhood].append(space)
|
||||||
"name": "kreuzberg",
|
|
||||||
"display_name": "Kreuzberg",
|
# Generate neighborhood stats
|
||||||
"green_space_count": 12,
|
neighborhoods = []
|
||||||
"avg_personality_scores": {
|
for neighborhood, count in neighborhood_counts.items():
|
||||||
"little_adventurers": 70,
|
# Calculate average personality scores based on green space features
|
||||||
"date_night": 75,
|
spaces = neighborhood_spaces[neighborhood]
|
||||||
"squad_goals": 85,
|
|
||||||
"zen_masters": 60
|
# Calculate scores based on actual features
|
||||||
}
|
total_tree_coverage = sum(s.environmental.tree_coverage_percent for s in spaces)
|
||||||
},
|
total_playgrounds = sum(s.recreation.playground_quality for s in spaces)
|
||||||
{
|
total_water_features = sum(1 for s in spaces if s.environmental.water_features)
|
||||||
"name": "prenzlauer_berg",
|
total_sports = sum(1 for s in spaces if s.recreation.sports_facilities)
|
||||||
"display_name": "Prenzlauer Berg",
|
|
||||||
"green_space_count": 18,
|
avg_tree_coverage = total_tree_coverage / count if count > 0 else 0
|
||||||
"avg_personality_scores": {
|
avg_playground = total_playgrounds / count if count > 0 else 0
|
||||||
"little_adventurers": 90,
|
water_ratio = total_water_features / count if count > 0 else 0
|
||||||
"date_night": 70,
|
sports_ratio = total_sports / count if count > 0 else 0
|
||||||
"squad_goals": 75,
|
|
||||||
"zen_masters": 70
|
# Calculate personality scores based on features
|
||||||
}
|
little_adventurers = min(100, int(avg_playground * 0.8 + sports_ratio * 30 + 40))
|
||||||
|
date_night = min(100, int(avg_tree_coverage * 0.6 + water_ratio * 25 + 45))
|
||||||
|
squad_goals = min(100, int(sports_ratio * 40 + avg_tree_coverage * 0.4 + 35))
|
||||||
|
zen_masters = min(100, int(avg_tree_coverage * 0.7 + water_ratio * 20 + 30))
|
||||||
|
|
||||||
|
neighborhoods.append({
|
||||||
|
"name": neighborhood.lower().replace(' ', '_').replace('-', '_'),
|
||||||
|
"display_name": neighborhood,
|
||||||
|
"green_space_count": count,
|
||||||
|
"avg_personality_scores": {
|
||||||
|
"little_adventurers": little_adventurers,
|
||||||
|
"date_night": date_night,
|
||||||
|
"squad_goals": squad_goals,
|
||||||
|
"zen_masters": zen_masters
|
||||||
}
|
}
|
||||||
]
|
})
|
||||||
}
|
|
||||||
|
# Sort by green space count (most spaces first)
|
||||||
|
neighborhoods.sort(key=lambda x: x["green_space_count"], reverse=True)
|
||||||
|
|
||||||
|
return {"neighborhoods": neighborhoods}
|
||||||
|
|
||||||
async def get_current_conditions(self, lat: float, lng: float) -> Dict[str, Any]:
|
async def get_current_conditions(self, lat: float, lng: float) -> Dict[str, Any]:
|
||||||
"""Get current conditions at a location."""
|
"""Get current conditions at a location."""
|
||||||
|
@ -394,122 +419,76 @@ class BerlinDataService:
|
||||||
# Return original space if enhancement fails
|
# Return original space if enhancement fails
|
||||||
return green_space
|
return green_space
|
||||||
|
|
||||||
|
def _load_green_spaces(self) -> List[Dict]:
|
||||||
|
"""Load green spaces data from JSON file"""
|
||||||
|
if self._green_spaces_cache is None:
|
||||||
|
green_spaces_file = self.data_dir / "processed" / "quick_berlin_green_spaces.json"
|
||||||
|
if green_spaces_file.exists():
|
||||||
|
with open(green_spaces_file, 'r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
self._green_spaces_cache = data.get("green_spaces", [])
|
||||||
|
else:
|
||||||
|
print("Warning: quick_berlin_green_spaces.json not found.")
|
||||||
|
self._green_spaces_cache = []
|
||||||
|
return self._green_spaces_cache
|
||||||
|
|
||||||
|
def _convert_json_to_green_space(self, json_data: Dict) -> GreenSpace:
|
||||||
|
"""Convert JSON data to GreenSpace model"""
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
return GreenSpace(
|
||||||
|
id=json_data.get("id", ""),
|
||||||
|
name=json_data.get("name", ""),
|
||||||
|
description=json_data.get("description", ""),
|
||||||
|
type=GreenSpaceType.PARK, # Default to PARK, could be enhanced later
|
||||||
|
coordinates=Coordinates(
|
||||||
|
lat=json_data.get("coordinates", {}).get("lat", 0.0),
|
||||||
|
lng=json_data.get("coordinates", {}).get("lng", 0.0)
|
||||||
|
),
|
||||||
|
neighborhood=json_data.get("neighborhood", "Berlin"),
|
||||||
|
address=f"{json_data.get('name', 'Unknown')}, Berlin",
|
||||||
|
area_sqm=json_data.get("area_sqm", 0),
|
||||||
|
perimeter_m=json_data.get("perimeter_m", None),
|
||||||
|
environmental=EnvironmentalFeatures(
|
||||||
|
tree_coverage_percent=json_data.get("environmental", {}).get("tree_coverage_percent", 0),
|
||||||
|
shade_quality=json_data.get("environmental", {}).get("shade_quality", 0),
|
||||||
|
noise_level=NoiseLevel(json_data.get("environmental", {}).get("noise_level", 1)),
|
||||||
|
wildlife_diversity_score=json_data.get("environmental", {}).get("wildlife_diversity_score", 0),
|
||||||
|
water_features=json_data.get("environmental", {}).get("water_features", False),
|
||||||
|
natural_surface_percent=json_data.get("environmental", {}).get("natural_surface_percent", 0)
|
||||||
|
),
|
||||||
|
accessibility=AccessibilityFeatures(
|
||||||
|
wheelchair_accessible=json_data.get("accessibility", {}).get("wheelchair_accessible", True),
|
||||||
|
public_transport_score=json_data.get("accessibility", {}).get("public_transport_score", 3),
|
||||||
|
cycling_infrastructure=json_data.get("accessibility", {}).get("cycling_infrastructure", True),
|
||||||
|
parking_availability=json_data.get("accessibility", {}).get("parking_availability", 2),
|
||||||
|
lighting_quality=json_data.get("accessibility", {}).get("lighting_quality", 3)
|
||||||
|
),
|
||||||
|
recreation=RecreationFeatures(
|
||||||
|
playground_quality=json_data.get("recreation", {}).get("playground_quality", 0),
|
||||||
|
sports_facilities=json_data.get("recreation", {}).get("sports_facilities", False),
|
||||||
|
running_paths=json_data.get("recreation", {}).get("running_paths", True),
|
||||||
|
cycling_paths=json_data.get("recreation", {}).get("cycling_paths", True),
|
||||||
|
dog_friendly=json_data.get("recreation", {}).get("dog_friendly", True),
|
||||||
|
bbq_allowed=json_data.get("recreation", {}).get("bbq_allowed", False)
|
||||||
|
),
|
||||||
|
nearby_amenities=[],
|
||||||
|
last_updated=datetime.fromisoformat(json_data.get("last_updated", datetime.now().isoformat())),
|
||||||
|
data_sources=json_data.get("data_sources", []),
|
||||||
|
confidence_score=json_data.get("confidence_score", 85)
|
||||||
|
)
|
||||||
|
|
||||||
async def _get_mock_green_spaces(self) -> List[GreenSpace]:
|
async def _get_mock_green_spaces(self) -> List[GreenSpace]:
|
||||||
"""Get mock green spaces data for development."""
|
"""Get green spaces data from JSON file."""
|
||||||
# This would be replaced with real data fetching in production
|
json_data = self._load_green_spaces()
|
||||||
return [
|
green_spaces = []
|
||||||
GreenSpace(
|
|
||||||
id="tiergarten_1",
|
for space_data in json_data:
|
||||||
name="Tiergarten",
|
try:
|
||||||
description="Berlin's most famous park in the heart of the city",
|
green_space = self._convert_json_to_green_space(space_data)
|
||||||
type=GreenSpaceType.PARK,
|
green_spaces.append(green_space)
|
||||||
coordinates=Coordinates(lat=52.5145, lng=13.3501),
|
except Exception as e:
|
||||||
neighborhood="Mitte",
|
print(f"Error converting green space {space_data.get('id', 'unknown')}: {e}")
|
||||||
address="Tiergarten, 10557 Berlin",
|
continue
|
||||||
area_sqm=210000,
|
|
||||||
perimeter_m=5800,
|
return green_spaces
|
||||||
environmental=EnvironmentalFeatures(
|
|
||||||
tree_coverage_percent=85,
|
|
||||||
shade_quality=90,
|
|
||||||
noise_level=NoiseLevel.MODERATE,
|
|
||||||
wildlife_diversity_score=80,
|
|
||||||
water_features=True,
|
|
||||||
natural_surface_percent=95
|
|
||||||
),
|
|
||||||
accessibility=AccessibilityFeatures(
|
|
||||||
wheelchair_accessible=True,
|
|
||||||
public_transport_score=5,
|
|
||||||
cycling_infrastructure=True,
|
|
||||||
parking_availability=3,
|
|
||||||
lighting_quality=4
|
|
||||||
),
|
|
||||||
recreation=RecreationFeatures(
|
|
||||||
playground_quality=70,
|
|
||||||
sports_facilities=True,
|
|
||||||
running_paths=True,
|
|
||||||
cycling_paths=True,
|
|
||||||
dog_friendly=True,
|
|
||||||
bbq_allowed=False
|
|
||||||
),
|
|
||||||
nearby_amenities=[],
|
|
||||||
last_updated=datetime.now(),
|
|
||||||
data_sources=["berlin_open_data", "osm"],
|
|
||||||
confidence_score=95
|
|
||||||
),
|
|
||||||
GreenSpace(
|
|
||||||
id="volkspark_friedrichshain",
|
|
||||||
name="Volkspark Friedrichshain",
|
|
||||||
description="Historic park with fairy tale fountain and sports facilities",
|
|
||||||
type=GreenSpaceType.PARK,
|
|
||||||
coordinates=Coordinates(lat=52.5263, lng=13.4317),
|
|
||||||
neighborhood="Friedrichshain",
|
|
||||||
address="Friedrichshain, 10249 Berlin",
|
|
||||||
area_sqm=49000,
|
|
||||||
perimeter_m=2800,
|
|
||||||
environmental=EnvironmentalFeatures(
|
|
||||||
tree_coverage_percent=70,
|
|
||||||
shade_quality=75,
|
|
||||||
noise_level=NoiseLevel.QUIET,
|
|
||||||
wildlife_diversity_score=65,
|
|
||||||
water_features=True,
|
|
||||||
natural_surface_percent=80
|
|
||||||
),
|
|
||||||
accessibility=AccessibilityFeatures(
|
|
||||||
wheelchair_accessible=True,
|
|
||||||
public_transport_score=4,
|
|
||||||
cycling_infrastructure=True,
|
|
||||||
parking_availability=2,
|
|
||||||
lighting_quality=3
|
|
||||||
),
|
|
||||||
recreation=RecreationFeatures(
|
|
||||||
playground_quality=85,
|
|
||||||
sports_facilities=True,
|
|
||||||
running_paths=True,
|
|
||||||
cycling_paths=True,
|
|
||||||
dog_friendly=True,
|
|
||||||
bbq_allowed=True
|
|
||||||
),
|
|
||||||
nearby_amenities=[],
|
|
||||||
last_updated=datetime.now(),
|
|
||||||
data_sources=["berlin_open_data", "osm"],
|
|
||||||
confidence_score=90
|
|
||||||
),
|
|
||||||
GreenSpace(
|
|
||||||
id="tempelhofer_feld",
|
|
||||||
name="Tempelhofer Feld",
|
|
||||||
description="Former airport turned into unique urban park",
|
|
||||||
type=GreenSpaceType.PARK,
|
|
||||||
coordinates=Coordinates(lat=52.4732, lng=13.4015),
|
|
||||||
neighborhood="Tempelhof",
|
|
||||||
address="Tempelhofer Damm, 12101 Berlin",
|
|
||||||
area_sqm=300000,
|
|
||||||
perimeter_m=6200,
|
|
||||||
environmental=EnvironmentalFeatures(
|
|
||||||
tree_coverage_percent=15,
|
|
||||||
shade_quality=20,
|
|
||||||
noise_level=NoiseLevel.MODERATE,
|
|
||||||
wildlife_diversity_score=40,
|
|
||||||
water_features=False,
|
|
||||||
natural_surface_percent=60
|
|
||||||
),
|
|
||||||
accessibility=AccessibilityFeatures(
|
|
||||||
wheelchair_accessible=True,
|
|
||||||
public_transport_score=4,
|
|
||||||
cycling_infrastructure=True,
|
|
||||||
parking_availability=4,
|
|
||||||
lighting_quality=2
|
|
||||||
),
|
|
||||||
recreation=RecreationFeatures(
|
|
||||||
playground_quality=30,
|
|
||||||
sports_facilities=False,
|
|
||||||
running_paths=True,
|
|
||||||
cycling_paths=True,
|
|
||||||
dog_friendly=True,
|
|
||||||
bbq_allowed=True
|
|
||||||
),
|
|
||||||
nearby_amenities=[],
|
|
||||||
last_updated=datetime.now(),
|
|
||||||
data_sources=["berlin_open_data", "osm"],
|
|
||||||
confidence_score=85
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
|
@ -4,6 +4,10 @@ from pathlib import Path
|
||||||
from typing import List, Optional, Tuple, Dict, Any
|
from typing import List, Optional, Tuple, Dict, Any
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from geopy.distance import geodesic
|
from geopy.distance import geodesic
|
||||||
|
from rtree import index
|
||||||
|
import asyncio
|
||||||
|
import aiofiles
|
||||||
|
from functools import lru_cache
|
||||||
|
|
||||||
from app.models.street_tree import (
|
from app.models.street_tree import (
|
||||||
StreetTree, TreeDensityMetrics, TreeShadeAnalysis, TreesSearchFilters,
|
StreetTree, TreeDensityMetrics, TreeShadeAnalysis, TreesSearchFilters,
|
||||||
|
@ -14,24 +18,58 @@ from app.models.green_space import Coordinates
|
||||||
class StreetTreeService:
|
class StreetTreeService:
|
||||||
"""Service for accessing and analyzing Berlin street trees data."""
|
"""Service for accessing and analyzing Berlin street trees data."""
|
||||||
|
|
||||||
def __init__(self):
|
_instance = None
|
||||||
self._trees_cache = None
|
_initialized = False
|
||||||
self._trees_index = None
|
|
||||||
self.data_dir = Path("app/data")
|
|
||||||
|
|
||||||
def _load_trees(self) -> List[Dict]:
|
def __new__(cls):
|
||||||
"""Load street trees data from JSON file."""
|
if cls._instance is None:
|
||||||
|
cls._instance = super().__new__(cls)
|
||||||
|
return cls._instance
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
if not self._initialized:
|
||||||
|
self._trees_cache = None
|
||||||
|
self._spatial_index = None
|
||||||
|
self._tree_id_to_data = {}
|
||||||
|
self.data_dir = Path("app/data")
|
||||||
|
self.__class__._initialized = True
|
||||||
|
|
||||||
|
async def _load_trees(self) -> List[Dict]:
|
||||||
|
"""Load street trees data from JSON file and build spatial index."""
|
||||||
if self._trees_cache is None:
|
if self._trees_cache is None:
|
||||||
trees_file = self.data_dir / "processed" / "street_trees.json"
|
trees_file = self.data_dir / "processed" / "street_trees.json"
|
||||||
if trees_file.exists():
|
if trees_file.exists():
|
||||||
with open(trees_file, 'r', encoding='utf-8') as f:
|
print("🔄 Loading trees data and building spatial index...")
|
||||||
data = json.load(f)
|
async with aiofiles.open(trees_file, 'r', encoding='utf-8') as f:
|
||||||
|
content = await f.read()
|
||||||
|
data = json.loads(content)
|
||||||
self._trees_cache = data.get("street_trees", [])
|
self._trees_cache = data.get("street_trees", [])
|
||||||
|
await self._build_spatial_index()
|
||||||
|
print(f"✅ Loaded {len(self._trees_cache)} trees with spatial index")
|
||||||
else:
|
else:
|
||||||
print("Warning: street_trees.json not found. Run process_street_trees.py first.")
|
print("Warning: street_trees.json not found. Run process_street_trees.py first.")
|
||||||
self._trees_cache = []
|
self._trees_cache = []
|
||||||
return self._trees_cache
|
return self._trees_cache
|
||||||
|
|
||||||
|
async def _build_spatial_index(self):
|
||||||
|
"""Build R-tree spatial index for fast location queries."""
|
||||||
|
if self._spatial_index is None and self._trees_cache:
|
||||||
|
print("🔨 Building spatial index...")
|
||||||
|
self._spatial_index = index.Index()
|
||||||
|
self._tree_id_to_data = {}
|
||||||
|
|
||||||
|
for i, tree_data in enumerate(self._trees_cache):
|
||||||
|
lat = tree_data.get('lat')
|
||||||
|
lng = tree_data.get('lng')
|
||||||
|
|
||||||
|
if lat is not None and lng is not None:
|
||||||
|
# R-tree expects (minx, miny, maxx, maxy)
|
||||||
|
bbox = (lng, lat, lng, lat)
|
||||||
|
self._spatial_index.insert(i, bbox)
|
||||||
|
self._tree_id_to_data[i] = tree_data
|
||||||
|
|
||||||
|
print(f"✅ Spatial index built for {len(self._tree_id_to_data)} trees")
|
||||||
|
|
||||||
def _create_tree_from_dict(self, tree_data: Dict) -> StreetTree:
|
def _create_tree_from_dict(self, tree_data: Dict) -> StreetTree:
|
||||||
"""Convert tree dictionary to StreetTree model."""
|
"""Convert tree dictionary to StreetTree model."""
|
||||||
|
|
||||||
|
@ -94,6 +132,11 @@ class StreetTreeService:
|
||||||
last_updated=datetime.now()
|
last_updated=datetime.now()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1000)
|
||||||
|
def _distance_cache(self, lat1: float, lng1: float, lat2: float, lng2: float) -> float:
|
||||||
|
"""Cache distance calculations."""
|
||||||
|
return geodesic((lat1, lng1), (lat2, lng2)).meters
|
||||||
|
|
||||||
async def get_trees_near_location(
|
async def get_trees_near_location(
|
||||||
self,
|
self,
|
||||||
lat: float,
|
lat: float,
|
||||||
|
@ -101,31 +144,48 @@ class StreetTreeService:
|
||||||
radius_m: int = 500,
|
radius_m: int = 500,
|
||||||
limit: Optional[int] = None
|
limit: Optional[int] = None
|
||||||
) -> TreesNearLocationResponse:
|
) -> TreesNearLocationResponse:
|
||||||
"""Get street trees within a radius of a location."""
|
"""Get street trees within a radius of a location using spatial index."""
|
||||||
start_time = datetime.now()
|
start_time = datetime.now()
|
||||||
|
|
||||||
trees_data = self._load_trees()
|
await self._load_trees()
|
||||||
nearby_trees = []
|
nearby_trees = []
|
||||||
|
|
||||||
for tree_data in trees_data:
|
if self._spatial_index is None:
|
||||||
|
# Fallback to linear search if index failed
|
||||||
|
return await self._get_trees_linear_search(lat, lng, radius_m, limit)
|
||||||
|
|
||||||
|
# Convert radius to approximate bounding box for R-tree query
|
||||||
|
# Rough approximation: 1 degree ≈ 111km
|
||||||
|
radius_deg = radius_m / 111000
|
||||||
|
bbox = (lng - radius_deg, lat - radius_deg, lng + radius_deg, lat + radius_deg)
|
||||||
|
|
||||||
|
# Query spatial index for candidates
|
||||||
|
candidate_ids = list(self._spatial_index.intersection(bbox))
|
||||||
|
|
||||||
|
# Filter candidates by exact distance
|
||||||
|
tree_distances = []
|
||||||
|
for tree_id in candidate_ids:
|
||||||
|
tree_data = self._tree_id_to_data.get(tree_id)
|
||||||
|
if not tree_data:
|
||||||
|
continue
|
||||||
|
|
||||||
tree_lat = tree_data.get('lat')
|
tree_lat = tree_data.get('lat')
|
||||||
tree_lng = tree_data.get('lng')
|
tree_lng = tree_data.get('lng')
|
||||||
|
|
||||||
if tree_lat is None or tree_lng is None:
|
if tree_lat is None or tree_lng is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
distance = geodesic((lat, lng), (tree_lat, tree_lng)).meters
|
distance = self._distance_cache(lat, lng, tree_lat, tree_lng)
|
||||||
if distance <= radius_m:
|
if distance <= radius_m:
|
||||||
tree = self._create_tree_from_dict(tree_data)
|
tree = self._create_tree_from_dict(tree_data)
|
||||||
nearby_trees.append(tree)
|
tree_distances.append((tree, distance))
|
||||||
|
|
||||||
if limit and len(nearby_trees) >= limit:
|
if limit and len(tree_distances) >= limit:
|
||||||
break
|
break
|
||||||
|
|
||||||
# Sort by distance
|
# Sort by distance
|
||||||
nearby_trees.sort(
|
tree_distances.sort(key=lambda x: x[1])
|
||||||
key=lambda t: geodesic((lat, lng), (t.coordinates.lat, t.coordinates.lng)).meters
|
nearby_trees = [tree for tree, _ in tree_distances]
|
||||||
)
|
|
||||||
|
|
||||||
# Calculate metrics
|
# Calculate metrics
|
||||||
metrics = self._calculate_tree_density_metrics(nearby_trees, radius_m)
|
metrics = self._calculate_tree_density_metrics(nearby_trees, radius_m)
|
||||||
|
@ -212,7 +272,7 @@ class StreetTreeService:
|
||||||
large_trees = []
|
large_trees = []
|
||||||
|
|
||||||
for tree in trees:
|
for tree in trees:
|
||||||
distance = geodesic((lat, lng), (tree.coordinates.lat, tree.coordinates.lng)).meters
|
distance = self._distance_cache(lat, lng, tree.coordinates.lat, tree.coordinates.lng)
|
||||||
|
|
||||||
if distance <= 50:
|
if distance <= 50:
|
||||||
trees_50m += 1
|
trees_50m += 1
|
||||||
|
@ -259,9 +319,58 @@ class StreetTreeService:
|
||||||
canopy_density=len(large_trees) / max(1, len(trees)) if trees else 0
|
canopy_density=len(large_trees) / max(1, len(trees)) if trees else 0
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def _get_trees_linear_search(
|
||||||
|
self,
|
||||||
|
lat: float,
|
||||||
|
lng: float,
|
||||||
|
radius_m: int = 500,
|
||||||
|
limit: Optional[int] = None
|
||||||
|
) -> TreesNearLocationResponse:
|
||||||
|
"""Fallback linear search method."""
|
||||||
|
start_time = datetime.now()
|
||||||
|
|
||||||
|
trees_data = await self._load_trees()
|
||||||
|
nearby_trees = []
|
||||||
|
|
||||||
|
for tree_data in trees_data:
|
||||||
|
tree_lat = tree_data.get('lat')
|
||||||
|
tree_lng = tree_data.get('lng')
|
||||||
|
|
||||||
|
if tree_lat is None or tree_lng is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
distance = self._distance_cache(lat, lng, tree_lat, tree_lng)
|
||||||
|
if distance <= radius_m:
|
||||||
|
tree = self._create_tree_from_dict(tree_data)
|
||||||
|
nearby_trees.append(tree)
|
||||||
|
|
||||||
|
if limit and len(nearby_trees) >= limit:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Sort by distance
|
||||||
|
nearby_trees.sort(
|
||||||
|
key=lambda t: self._distance_cache(lat, lng, t.coordinates.lat, t.coordinates.lng)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Calculate metrics
|
||||||
|
metrics = self._calculate_tree_density_metrics(nearby_trees, radius_m)
|
||||||
|
shade_analysis = self._analyze_shade_coverage(lat, lng, nearby_trees)
|
||||||
|
|
||||||
|
query_time = (datetime.now() - start_time).total_seconds() * 1000
|
||||||
|
|
||||||
|
return TreesNearLocationResponse(
|
||||||
|
location=Coordinates(lat=lat, lng=lng),
|
||||||
|
radius_m=radius_m,
|
||||||
|
trees=nearby_trees,
|
||||||
|
metrics=metrics,
|
||||||
|
shade_analysis=shade_analysis,
|
||||||
|
total_found=len(nearby_trees),
|
||||||
|
query_time_ms=int(query_time)
|
||||||
|
)
|
||||||
|
|
||||||
async def search_trees(self, filters: TreesSearchFilters) -> List[StreetTree]:
|
async def search_trees(self, filters: TreesSearchFilters) -> List[StreetTree]:
|
||||||
"""Search trees with filters."""
|
"""Search trees with filters."""
|
||||||
trees_data = self._load_trees()
|
trees_data = await self._load_trees()
|
||||||
filtered_trees = []
|
filtered_trees = []
|
||||||
|
|
||||||
for tree_data in trees_data:
|
for tree_data in trees_data:
|
||||||
|
@ -272,10 +381,10 @@ class StreetTreeService:
|
||||||
if tree_lat is None or tree_lng is None:
|
if tree_lat is None or tree_lng is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
distance = geodesic(
|
distance = self._distance_cache(
|
||||||
(filters.center_lat, filters.center_lng),
|
filters.center_lat, filters.center_lng,
|
||||||
(tree_lat, tree_lng)
|
tree_lat, tree_lng
|
||||||
).meters
|
)
|
||||||
if distance > filters.within_radius_m:
|
if distance > filters.within_radius_m:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -305,7 +414,7 @@ class StreetTreeService:
|
||||||
|
|
||||||
async def get_tree_stats(self) -> Dict[str, Any]:
|
async def get_tree_stats(self) -> Dict[str, Any]:
|
||||||
"""Get overall statistics about Berlin street trees."""
|
"""Get overall statistics about Berlin street trees."""
|
||||||
trees_data = self._load_trees()
|
trees_data = await self._load_trees()
|
||||||
|
|
||||||
if not trees_data:
|
if not trees_data:
|
||||||
return {"error": "No tree data available"}
|
return {"error": "No tree data available"}
|
||||||
|
|
|
@ -35,6 +35,8 @@ dependencies = [
|
||||||
"redis>=5.0.0",
|
"redis>=5.0.0",
|
||||||
"aiofiles>=23.2.0",
|
"aiofiles>=23.2.0",
|
||||||
"openpyxl>=3.1.5",
|
"openpyxl>=3.1.5",
|
||||||
|
"tqdm>=4.67.1",
|
||||||
|
"rtree>=1.4.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
|
|
|
@ -0,0 +1,467 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Enhanced Berlin green space processor using existing tree and toilet services.
|
||||||
|
Downloads OSM green space boundaries and enhances them with real data using existing services.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import zipfile
|
||||||
|
import requests
|
||||||
|
import asyncio
|
||||||
|
from pathlib import Path
|
||||||
|
import geopandas as gpd
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import List, Dict, Optional
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Add the app directory to Python path to import services
|
||||||
|
sys.path.append(str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from app.services.street_tree_service import StreetTreeService
|
||||||
|
from app.services.berlin_data_service import BerlinDataService
|
||||||
|
|
||||||
|
|
||||||
|
class RealDataGreenSpaceProcessor:
|
||||||
|
def __init__(self, data_dir: str = "app/data"):
|
||||||
|
self.data_dir = Path(data_dir)
|
||||||
|
self.raw_dir = self.data_dir / "geo-raw"
|
||||||
|
self.processed_dir = self.data_dir / "processed"
|
||||||
|
|
||||||
|
# Create directories
|
||||||
|
self.raw_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.processed_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Initialize existing services
|
||||||
|
self.tree_service = StreetTreeService()
|
||||||
|
self.berlin_data = BerlinDataService()
|
||||||
|
|
||||||
|
def download_berlin_districts(self):
|
||||||
|
"""Download Berlin district boundaries."""
|
||||||
|
json_file = self.raw_dir / "bezirksgrenzen.geojson"
|
||||||
|
|
||||||
|
if json_file.exists():
|
||||||
|
print(f"Berlin district data already exists: {json_file}")
|
||||||
|
return json_file
|
||||||
|
|
||||||
|
link = "https://tsb-opendata.s3.eu-central-1.amazonaws.com/bezirksgrenzen/bezirksgrenzen.geojson"
|
||||||
|
print(f"Downloading Berlin district data from {link}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(link, timeout=30)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
with open(json_file, 'wb') as f:
|
||||||
|
f.write(response.content)
|
||||||
|
|
||||||
|
print(f"Downloaded to {json_file}")
|
||||||
|
return json_file
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error downloading districts: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def download_osm_data(self):
|
||||||
|
"""Download Berlin OpenStreetMap data."""
|
||||||
|
zip_file = self.raw_dir / "berlin_shapes.zip"
|
||||||
|
shp_dir = self.raw_dir / "berlin_shapes"
|
||||||
|
|
||||||
|
# Check if already extracted
|
||||||
|
required_files = ["gis_osm_landuse_a_free_1.shp", "gis_osm_natural_a_free_1.shp", "gis_osm_leisure_a_free_1.shp"]
|
||||||
|
if all((shp_dir / f).exists() for f in required_files):
|
||||||
|
print(f"Berlin OSM data already exists: {shp_dir}")
|
||||||
|
return shp_dir
|
||||||
|
|
||||||
|
if not zip_file.exists():
|
||||||
|
link = "https://download.geofabrik.de/europe/germany/berlin-latest-free.shp.zip"
|
||||||
|
print(f"Downloading Berlin OSM data from {link} (this may take several minutes...)")
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(link, stream=True, timeout=300) # 5 min timeout
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
with open(zip_file, 'wb') as f:
|
||||||
|
for chunk in response.iter_content(chunk_size=8192):
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
print(f"Download completed: {zip_file}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error downloading OSM data: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
print(f"Extracting Berlin OSM data to {shp_dir}")
|
||||||
|
try:
|
||||||
|
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
|
||||||
|
zip_ref.extractall(shp_dir)
|
||||||
|
print(f"Extracted to {shp_dir}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error extracting OSM data: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
return shp_dir
|
||||||
|
|
||||||
|
def load_osm_green_spaces(self):
|
||||||
|
"""Load OSM green space polygons."""
|
||||||
|
print("Loading OSM green space boundaries...")
|
||||||
|
|
||||||
|
# Download required data
|
||||||
|
districts_file = self.download_berlin_districts()
|
||||||
|
shp_dir = self.download_osm_data()
|
||||||
|
|
||||||
|
# Load Berlin districts for clipping
|
||||||
|
districts = gpd.read_file(districts_file)
|
||||||
|
|
||||||
|
# Define green space categories we want
|
||||||
|
green_categories = {
|
||||||
|
'landuse': ['forest', 'grass', 'meadow', 'recreation_ground', 'village_green', 'allotments'],
|
||||||
|
'natural': ['forest', 'grass', 'meadow', 'scrub', 'heath', 'wood'],
|
||||||
|
'leisure': ['park', 'garden', 'nature_reserve', 'playground', 'pitch', 'common', 'golf_course']
|
||||||
|
}
|
||||||
|
|
||||||
|
all_green_spaces = []
|
||||||
|
|
||||||
|
# Process each category
|
||||||
|
for category, subcategories in green_categories.items():
|
||||||
|
shapefile = shp_dir / f"gis_osm_{category}_a_free_1.shp"
|
||||||
|
|
||||||
|
if not shapefile.exists():
|
||||||
|
print(f"Warning: {shapefile} not found, skipping")
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(f"Processing {category} data...")
|
||||||
|
try:
|
||||||
|
gdf = gpd.read_file(shapefile)
|
||||||
|
|
||||||
|
# Filter to relevant subcategories
|
||||||
|
gdf_filtered = gdf[gdf['fclass'].isin(subcategories)].copy()
|
||||||
|
|
||||||
|
if len(gdf_filtered) == 0:
|
||||||
|
print(f"No {category} features found in subcategories")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Clip to Berlin boundaries
|
||||||
|
gdf_clipped = gpd.clip(gdf_filtered, districts)
|
||||||
|
|
||||||
|
# Calculate area and filter out very small areas (< 1000 sqm)
|
||||||
|
gdf_clipped['area_sqm'] = gdf_clipped.geometry.area
|
||||||
|
gdf_clipped = gdf_clipped[gdf_clipped['area_sqm'] >= 1000]
|
||||||
|
|
||||||
|
if len(gdf_clipped) > 0:
|
||||||
|
all_green_spaces.append(gdf_clipped)
|
||||||
|
print(f"Found {len(gdf_clipped)} {category} features")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing {category}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not all_green_spaces:
|
||||||
|
raise ValueError("No green space data found")
|
||||||
|
|
||||||
|
# Combine all green spaces
|
||||||
|
green_spaces = gpd.GeoDataFrame(pd.concat(all_green_spaces, ignore_index=True))
|
||||||
|
|
||||||
|
# Add district information
|
||||||
|
green_spaces = gpd.sjoin(green_spaces, districts[['Bezirk', 'geometry']], how='left')
|
||||||
|
|
||||||
|
# Calculate centroids for analysis
|
||||||
|
green_spaces['centroid'] = green_spaces.geometry.centroid
|
||||||
|
green_spaces['centroid_lat'] = green_spaces.centroid.y
|
||||||
|
green_spaces['centroid_lng'] = green_spaces.centroid.x
|
||||||
|
|
||||||
|
print(f"Total green spaces found: {len(green_spaces)}")
|
||||||
|
return green_spaces
|
||||||
|
|
||||||
|
async def enhance_green_space_with_real_data(self, row):
|
||||||
|
"""Enhance a single green space with real tree and toilet data."""
|
||||||
|
try:
|
||||||
|
lat = row['centroid_lat']
|
||||||
|
lng = row['centroid_lng']
|
||||||
|
area_sqm = int(row['area_sqm'])
|
||||||
|
|
||||||
|
# Use existing tree service to get real tree data
|
||||||
|
tree_response = await self.tree_service.get_trees_near_location(
|
||||||
|
lat, lng, radius_m=min(400, int((area_sqm ** 0.5) * 1.5)) # Adaptive radius
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use existing toilet service to get real toilet data
|
||||||
|
nearby_toilets = await self.berlin_data.get_toilets_near_point(lat, lng, 800)
|
||||||
|
|
||||||
|
# Calculate toilet accessibility score
|
||||||
|
toilet_score = self._score_toilet_accessibility(nearby_toilets)
|
||||||
|
|
||||||
|
# Map OSM type to our enum
|
||||||
|
space_type = self._map_osm_to_space_type(row.get('fclass', ''))
|
||||||
|
|
||||||
|
# Generate ID
|
||||||
|
space_id = f"real_{row.get('fclass', 'unknown')}_{row.name}"
|
||||||
|
|
||||||
|
# Create enhanced green space using real data
|
||||||
|
enhanced_space = {
|
||||||
|
"id": space_id,
|
||||||
|
"name": row.get('name') or f"{row.get('fclass', 'Green Space').title()} in {row.get('Bezirk', 'Berlin')}",
|
||||||
|
"description": f"Real Berlin {row.get('fclass', 'green space')} enhanced with tree and toilet data",
|
||||||
|
"type": space_type,
|
||||||
|
"coordinates": {
|
||||||
|
"lat": float(lat),
|
||||||
|
"lng": float(lng)
|
||||||
|
},
|
||||||
|
"neighborhood": row.get('Bezirk', 'Unknown'),
|
||||||
|
"area_sqm": area_sqm,
|
||||||
|
"perimeter_m": int(row.geometry.length) if hasattr(row.geometry, 'length') else 0,
|
||||||
|
|
||||||
|
# Environmental features using real tree data
|
||||||
|
"environmental": {
|
||||||
|
"tree_coverage_percent": max(5, int(tree_response.shade_analysis.estimated_shade_coverage)),
|
||||||
|
"shade_quality": tree_response.shade_analysis.shade_quality_score,
|
||||||
|
"noise_level": self._estimate_noise_level(row.get('fclass', ''), row.get('Bezirk', '')),
|
||||||
|
"wildlife_diversity_score": tree_response.metrics.species_diversity_score,
|
||||||
|
"water_features": 'water' in str(row.get('fclass', '')).lower() or 'river' in str(row.get('name', '')).lower(),
|
||||||
|
"natural_surface_percent": self._estimate_natural_surface(row.get('fclass', ''))
|
||||||
|
},
|
||||||
|
|
||||||
|
# Real tree metrics from existing service
|
||||||
|
"tree_data": {
|
||||||
|
"total_trees": tree_response.metrics.total_trees,
|
||||||
|
"trees_per_hectare": tree_response.metrics.trees_per_hectare,
|
||||||
|
"species_count": len(tree_response.metrics.dominant_species),
|
||||||
|
"species_diversity_score": tree_response.metrics.species_diversity_score,
|
||||||
|
"mature_trees_count": tree_response.metrics.mature_trees_count,
|
||||||
|
"young_trees_count": tree_response.metrics.young_trees_count,
|
||||||
|
"average_tree_age": tree_response.metrics.average_tree_age,
|
||||||
|
"average_height": tree_response.metrics.average_height,
|
||||||
|
"average_crown_diameter": tree_response.metrics.average_crown_diameter,
|
||||||
|
"shade_coverage_percent": tree_response.metrics.shade_coverage_percent,
|
||||||
|
"dominant_species": tree_response.metrics.dominant_species
|
||||||
|
},
|
||||||
|
|
||||||
|
# Real toilet accessibility from existing service
|
||||||
|
"toilet_accessibility": {
|
||||||
|
"nearby_toilets_count": len(nearby_toilets),
|
||||||
|
"accessibility_score": toilet_score,
|
||||||
|
"nearest_distance_m": nearby_toilets[0]['distance_meters'] if nearby_toilets else None,
|
||||||
|
"free_toilets_count": len([t for t in nearby_toilets if t.get('is_free', False)]),
|
||||||
|
"accessible_toilets_count": len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
|
||||||
|
},
|
||||||
|
|
||||||
|
# Standard accessibility features
|
||||||
|
"accessibility": {
|
||||||
|
"wheelchair_accessible": True,
|
||||||
|
"public_transport_score": 3, # Could be enhanced with real transit data
|
||||||
|
"cycling_infrastructure": area_sqm > 5000,
|
||||||
|
"parking_availability": 2,
|
||||||
|
"lighting_quality": 2
|
||||||
|
},
|
||||||
|
|
||||||
|
# Recreation features based on OSM data and size
|
||||||
|
"recreation": {
|
||||||
|
"playground_quality": self._estimate_playground_quality(row.get('fclass', ''), tree_response.metrics.total_trees),
|
||||||
|
"sports_facilities": 'pitch' in str(row.get('fclass', '')).lower() or 'sport' in str(row.get('name', '')).lower(),
|
||||||
|
"running_paths": area_sqm > 8000,
|
||||||
|
"cycling_paths": area_sqm > 15000,
|
||||||
|
"dog_friendly": True,
|
||||||
|
"bbq_allowed": row.get('fclass') in ['park', 'recreation_ground'] and area_sqm > 5000
|
||||||
|
},
|
||||||
|
|
||||||
|
"last_updated": datetime.now().isoformat(),
|
||||||
|
"data_sources": ["openstreetmap", "berlin_tree_cadastre", "berlin_toilets"],
|
||||||
|
"confidence_score": 95
|
||||||
|
}
|
||||||
|
|
||||||
|
return enhanced_space
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error enhancing green space {row.name}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _score_toilet_accessibility(self, nearby_toilets: List[Dict]) -> int:
|
||||||
|
"""Score toilet accessibility using existing toilet data."""
|
||||||
|
if not nearby_toilets:
|
||||||
|
return 20
|
||||||
|
|
||||||
|
nearest_distance = nearby_toilets[0]['distance_meters']
|
||||||
|
|
||||||
|
# Distance-based scoring
|
||||||
|
if nearest_distance <= 200:
|
||||||
|
score = 100
|
||||||
|
elif nearest_distance <= 400:
|
||||||
|
score = 80
|
||||||
|
elif nearest_distance <= 600:
|
||||||
|
score = 60
|
||||||
|
else:
|
||||||
|
score = 40
|
||||||
|
|
||||||
|
# Bonuses for quality
|
||||||
|
free_toilets = len([t for t in nearby_toilets if t.get('is_free', False)])
|
||||||
|
accessible_toilets = len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
|
||||||
|
|
||||||
|
score += min(20, free_toilets * 5 + accessible_toilets * 3)
|
||||||
|
|
||||||
|
return min(100, score)
|
||||||
|
|
||||||
|
def _map_osm_to_space_type(self, fclass: str) -> str:
|
||||||
|
"""Map OSM feature class to green space types."""
|
||||||
|
mapping = {
|
||||||
|
'park': 'PARK', 'forest': 'FOREST', 'garden': 'GARDEN',
|
||||||
|
'nature_reserve': 'NATURE_RESERVE', 'playground': 'PLAYGROUND',
|
||||||
|
'meadow': 'MEADOW', 'grass': 'GRASS', 'recreation_ground': 'PARK',
|
||||||
|
'wood': 'FOREST', 'heath': 'HEATH', 'pitch': 'SPORTS_AREA',
|
||||||
|
'golf_course': 'SPORTS_AREA', 'common': 'PARK', 'village_green': 'GRASS',
|
||||||
|
'allotments': 'GARDEN'
|
||||||
|
}
|
||||||
|
return mapping.get(fclass, 'PARK')
|
||||||
|
|
||||||
|
def _estimate_noise_level(self, fclass: str, district: str) -> int:
|
||||||
|
"""Estimate noise level (1=very quiet, 5=very noisy)."""
|
||||||
|
base_noise = {
|
||||||
|
'forest': 1, 'nature_reserve': 1, 'wood': 1,
|
||||||
|
'meadow': 2, 'grass': 2, 'heath': 2,
|
||||||
|
'park': 2, 'garden': 2, 'common': 2,
|
||||||
|
'recreation_ground': 3, 'playground': 3, 'pitch': 3,
|
||||||
|
'golf_course': 2, 'allotments': 2
|
||||||
|
}
|
||||||
|
|
||||||
|
# Central districts are noisier
|
||||||
|
central_districts = ['Mitte', 'Kreuzberg', 'Friedrichshain']
|
||||||
|
district_modifier = 1 if district in central_districts else 0
|
||||||
|
|
||||||
|
return min(5, base_noise.get(fclass, 2) + district_modifier)
|
||||||
|
|
||||||
|
def _estimate_natural_surface(self, fclass: str) -> int:
|
||||||
|
"""Estimate percentage of natural surface."""
|
||||||
|
surface_map = {
|
||||||
|
'forest': 95, 'nature_reserve': 95, 'wood': 95,
|
||||||
|
'meadow': 95, 'grass': 90, 'heath': 90,
|
||||||
|
'park': 75, 'garden': 65, 'common': 80,
|
||||||
|
'recreation_ground': 60, 'playground': 40, 'pitch': 20,
|
||||||
|
'golf_course': 70, 'allotments': 85
|
||||||
|
}
|
||||||
|
return surface_map.get(fclass, 70)
|
||||||
|
|
||||||
|
def _estimate_playground_quality(self, fclass: str, tree_count: int) -> int:
|
||||||
|
"""Estimate playground quality score."""
|
||||||
|
base_scores = {
|
||||||
|
'playground': 85,
|
||||||
|
'park': 65,
|
||||||
|
'recreation_ground': 70,
|
||||||
|
'garden': 40,
|
||||||
|
'common': 50
|
||||||
|
}
|
||||||
|
|
||||||
|
base = base_scores.get(fclass, 25)
|
||||||
|
|
||||||
|
# Trees improve playground appeal for families
|
||||||
|
tree_bonus = min(15, tree_count // 5) # +3 per 5 trees, max 15
|
||||||
|
|
||||||
|
return min(100, base + tree_bonus)
|
||||||
|
|
||||||
|
async def process_all_green_spaces(self):
|
||||||
|
"""Process all green spaces with real data enhancement."""
|
||||||
|
print("Starting enhanced green space processing with real data...")
|
||||||
|
|
||||||
|
# Load OSM green space boundaries
|
||||||
|
osm_green_spaces = self.load_osm_green_spaces()
|
||||||
|
|
||||||
|
enhanced_green_spaces = []
|
||||||
|
|
||||||
|
print(f"Enhancing {len(osm_green_spaces)} green spaces with real tree and toilet data...")
|
||||||
|
|
||||||
|
# Process in batches to avoid overwhelming the system
|
||||||
|
batch_size = 50
|
||||||
|
total_processed = 0
|
||||||
|
|
||||||
|
for i in range(0, len(osm_green_spaces), batch_size):
|
||||||
|
batch = osm_green_spaces.iloc[i:i+batch_size]
|
||||||
|
batch_results = []
|
||||||
|
|
||||||
|
for idx, row in batch.iterrows():
|
||||||
|
result = await self.enhance_green_space_with_real_data(row)
|
||||||
|
if result:
|
||||||
|
batch_results.append(result)
|
||||||
|
|
||||||
|
total_processed += 1
|
||||||
|
if total_processed % 25 == 0:
|
||||||
|
print(f"Processed {total_processed}/{len(osm_green_spaces)} green spaces...")
|
||||||
|
|
||||||
|
enhanced_green_spaces.extend(batch_results)
|
||||||
|
|
||||||
|
# Small delay between batches
|
||||||
|
await asyncio.sleep(0.1)
|
||||||
|
|
||||||
|
print(f"Successfully enhanced {len(enhanced_green_spaces)} green spaces with real data")
|
||||||
|
return enhanced_green_spaces
|
||||||
|
|
||||||
|
def save_enhanced_data(self, enhanced_green_spaces: List[Dict]):
|
||||||
|
"""Save enhanced green spaces to JSON file."""
|
||||||
|
output_file = self.processed_dir / "real_berlin_green_spaces.json"
|
||||||
|
|
||||||
|
# Calculate summary statistics
|
||||||
|
spaces_with_trees = len([gs for gs in enhanced_green_spaces if gs["tree_data"]["total_trees"] > 0])
|
||||||
|
spaces_with_toilets = len([gs for gs in enhanced_green_spaces if gs["toilet_accessibility"]["nearby_toilets_count"] > 0])
|
||||||
|
total_trees = sum(gs["tree_data"]["total_trees"] for gs in enhanced_green_spaces)
|
||||||
|
avg_species_per_space = sum(gs["tree_data"]["species_count"] for gs in enhanced_green_spaces) / len(enhanced_green_spaces) if enhanced_green_spaces else 0
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"green_spaces": enhanced_green_spaces,
|
||||||
|
"total_count": len(enhanced_green_spaces),
|
||||||
|
"last_updated": datetime.now().isoformat(),
|
||||||
|
"data_sources": [
|
||||||
|
"openstreetmap_boundaries",
|
||||||
|
"berlin_tree_cadastre_via_service",
|
||||||
|
"berlin_toilet_locations_via_service",
|
||||||
|
"berlin_districts"
|
||||||
|
],
|
||||||
|
"processing_info": {
|
||||||
|
"script_version": "1.0",
|
||||||
|
"coordinate_system": "WGS84",
|
||||||
|
"uses_existing_services": True,
|
||||||
|
"tree_analysis_via": "StreetTreeService",
|
||||||
|
"toilet_analysis_via": "BerlinDataService"
|
||||||
|
},
|
||||||
|
"summary_stats": {
|
||||||
|
"spaces_with_trees": spaces_with_trees,
|
||||||
|
"spaces_with_nearby_toilets": spaces_with_toilets,
|
||||||
|
"total_trees_in_all_spaces": total_trees,
|
||||||
|
"average_species_per_space": round(avg_species_per_space, 1),
|
||||||
|
"coverage_percentage": {
|
||||||
|
"with_tree_data": round((spaces_with_trees / len(enhanced_green_spaces)) * 100, 1) if enhanced_green_spaces else 0,
|
||||||
|
"with_toilet_data": round((spaces_with_toilets / len(enhanced_green_spaces)) * 100, 1) if enhanced_green_spaces else 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(output_file, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
print(f"✅ Saved {len(enhanced_green_spaces)} enhanced green spaces to {output_file}")
|
||||||
|
print(f"📊 Summary:")
|
||||||
|
print(f" - {spaces_with_trees} spaces have tree data ({round((spaces_with_trees/len(enhanced_green_spaces))*100, 1)}%)")
|
||||||
|
print(f" - {spaces_with_toilets} spaces have nearby toilets ({round((spaces_with_toilets/len(enhanced_green_spaces))*100, 1)}%)")
|
||||||
|
print(f" - {total_trees} total trees analyzed")
|
||||||
|
print(f" - {avg_species_per_space:.1f} average species per space")
|
||||||
|
|
||||||
|
return output_file
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
"""Main processing function."""
|
||||||
|
processor = RealDataGreenSpaceProcessor()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Process enhanced green spaces using existing services
|
||||||
|
enhanced_green_spaces = await processor.process_all_green_spaces()
|
||||||
|
|
||||||
|
# Save enhanced data
|
||||||
|
output_file = processor.save_enhanced_data(enhanced_green_spaces)
|
||||||
|
|
||||||
|
print(f"\n🎉 Successfully created real data enhanced Berlin green spaces!")
|
||||||
|
print(f"📁 Output: {output_file}")
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\n⚠️ Processing interrupted by user")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error processing data: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
|
@ -0,0 +1,613 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Filtered OSM processor for significant Berlin green spaces.
|
||||||
|
Processes only meaningful green spaces (>1000 sqm) with real tree and toilet data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import asyncio
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
import sys
|
||||||
|
import math
|
||||||
|
from typing import List, Dict, Optional, Tuple
|
||||||
|
|
||||||
|
# Add the app directory to Python path
|
||||||
|
sys.path.append(str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from app.services.street_tree_service import StreetTreeService
|
||||||
|
from app.services.berlin_data_service import BerlinDataService
|
||||||
|
|
||||||
|
|
||||||
|
class FilteredOSMProcessor:
|
||||||
|
def __init__(self, data_dir: str = "app/data"):
|
||||||
|
self.data_dir = Path(data_dir)
|
||||||
|
self.osm_raw_dir = self.data_dir / "osm-raw"
|
||||||
|
self.processed_dir = self.data_dir / "processed"
|
||||||
|
|
||||||
|
# Initialize services
|
||||||
|
self.tree_service = StreetTreeService()
|
||||||
|
self.berlin_data = BerlinDataService()
|
||||||
|
|
||||||
|
# Berlin bounding box
|
||||||
|
self.berlin_bbox = {
|
||||||
|
'min_lat': 52.3370, 'max_lat': 52.6755,
|
||||||
|
'min_lon': 13.0882, 'max_lon': 13.7611
|
||||||
|
}
|
||||||
|
|
||||||
|
# Filtering criteria
|
||||||
|
self.min_area_sqm = 1000 # Minimum area to be considered significant
|
||||||
|
self.max_spaces = 800 # Maximum number of spaces to process
|
||||||
|
|
||||||
|
def parse_and_filter_osm_data(self) -> List[Dict]:
|
||||||
|
"""Parse OSM data and filter for significant green spaces."""
|
||||||
|
osm_file = self.osm_raw_dir / "berlin_green_spaces.osm"
|
||||||
|
|
||||||
|
if not osm_file.exists():
|
||||||
|
print(f"❌ OSM file not found: {osm_file}")
|
||||||
|
print("Please run the download first or ensure the file exists.")
|
||||||
|
return []
|
||||||
|
|
||||||
|
print(f"📂 Parsing OSM data from {osm_file}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
tree = ET.parse(osm_file)
|
||||||
|
root = tree.getroot()
|
||||||
|
ways = root.findall('.//way')
|
||||||
|
|
||||||
|
print(f"📊 Found {len(ways)} total ways in OSM file")
|
||||||
|
print(f"🔍 Filtering for significant green spaces (≥{self.min_area_sqm} sqm)...")
|
||||||
|
|
||||||
|
filtered_spaces = []
|
||||||
|
processed_count = 0
|
||||||
|
|
||||||
|
for way in ways:
|
||||||
|
processed_count += 1
|
||||||
|
|
||||||
|
if processed_count % 5000 == 0:
|
||||||
|
print(f" Processed {processed_count}/{len(ways)} ways... Found {len(filtered_spaces)} significant spaces")
|
||||||
|
|
||||||
|
try:
|
||||||
|
space_data = self._process_osm_way(way, root)
|
||||||
|
if space_data and space_data['area_sqm'] >= self.min_area_sqm:
|
||||||
|
filtered_spaces.append(space_data)
|
||||||
|
|
||||||
|
# Stop if we have enough spaces
|
||||||
|
if len(filtered_spaces) >= self.max_spaces:
|
||||||
|
print(f"✅ Reached target of {self.max_spaces} significant spaces")
|
||||||
|
break
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Sort by area (largest first) to prioritize important spaces
|
||||||
|
filtered_spaces.sort(key=lambda x: x['area_sqm'], reverse=True)
|
||||||
|
|
||||||
|
print(f"🎯 Filtered to {len(filtered_spaces)} significant green spaces")
|
||||||
|
print(f"📏 Area range: {filtered_spaces[-1]['area_sqm']:,} - {filtered_spaces[0]['area_sqm']:,} sqm")
|
||||||
|
|
||||||
|
return filtered_spaces
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error parsing OSM file: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _process_osm_way(self, way, root) -> Optional[Dict]:
|
||||||
|
"""Process a single OSM way into green space format."""
|
||||||
|
# Get tags
|
||||||
|
tags = {}
|
||||||
|
for tag in way.findall('tag'):
|
||||||
|
tags[tag.get('k')] = tag.get('v')
|
||||||
|
|
||||||
|
# Check if it's a significant green space
|
||||||
|
green_space_type = self._get_green_space_type(tags)
|
||||||
|
if not green_space_type:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Skip certain types that are usually small or not parks
|
||||||
|
skip_types = ['grave_yard', 'cemetery', 'allotments']
|
||||||
|
if green_space_type in skip_types:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Get node references
|
||||||
|
nd_refs = [nd.get('ref') for nd in way.findall('nd')]
|
||||||
|
if len(nd_refs) < 3: # Need at least 3 points for an area
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Find node coordinates
|
||||||
|
coordinates = []
|
||||||
|
for nd_ref in nd_refs:
|
||||||
|
node = root.find(f".//node[@id='{nd_ref}']")
|
||||||
|
if node is not None:
|
||||||
|
lat = float(node.get('lat'))
|
||||||
|
lon = float(node.get('lon'))
|
||||||
|
|
||||||
|
# Check if within Berlin bounds
|
||||||
|
if (self.berlin_bbox['min_lat'] <= lat <= self.berlin_bbox['max_lat'] and
|
||||||
|
self.berlin_bbox['min_lon'] <= lon <= self.berlin_bbox['max_lon']):
|
||||||
|
coordinates.append((lat, lon))
|
||||||
|
|
||||||
|
if len(coordinates) < 3:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Calculate centroid and area
|
||||||
|
centroid_lat, centroid_lon = self._calculate_centroid(coordinates)
|
||||||
|
area_sqm = self._calculate_area(coordinates)
|
||||||
|
|
||||||
|
# Skip if too small
|
||||||
|
if area_sqm < self.min_area_sqm:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Get name
|
||||||
|
name = tags.get('name')
|
||||||
|
if not name:
|
||||||
|
name = f"{green_space_type.title()} near {centroid_lat:.3f}, {centroid_lon:.3f}"
|
||||||
|
|
||||||
|
# Estimate district
|
||||||
|
district = self._estimate_district(centroid_lat, centroid_lon)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': f"osm_way_{way.get('id')}",
|
||||||
|
'name': name,
|
||||||
|
'fclass': green_space_type,
|
||||||
|
'lat': centroid_lat,
|
||||||
|
'lng': centroid_lon,
|
||||||
|
'area_sqm': int(area_sqm),
|
||||||
|
'district': district,
|
||||||
|
'osm_tags': tags,
|
||||||
|
'osm_id': way.get('id'),
|
||||||
|
'has_name': bool(tags.get('name')) # Track if it has a real name
|
||||||
|
}
|
||||||
|
|
||||||
|
def _get_green_space_type(self, tags: Dict) -> Optional[str]:
|
||||||
|
"""Determine if tags represent a significant green space."""
|
||||||
|
# Prioritize leisure tags (usually parks)
|
||||||
|
leisure = tags.get('leisure', '')
|
||||||
|
if leisure in ['park', 'garden', 'nature_reserve', 'recreation_ground', 'playground', 'common']:
|
||||||
|
return leisure
|
||||||
|
|
||||||
|
# Check landuse tags
|
||||||
|
landuse = tags.get('landuse', '')
|
||||||
|
if landuse in ['forest', 'grass', 'meadow', 'recreation_ground', 'village_green']:
|
||||||
|
return landuse
|
||||||
|
|
||||||
|
# Check natural tags (forests, etc.)
|
||||||
|
natural = tags.get('natural', '')
|
||||||
|
if natural in ['forest', 'wood', 'heath']:
|
||||||
|
return natural
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _calculate_centroid(self, coordinates: List[Tuple[float, float]]) -> Tuple[float, float]:
|
||||||
|
"""Calculate centroid of polygon."""
|
||||||
|
lat_sum = sum(coord[0] for coord in coordinates)
|
||||||
|
lon_sum = sum(coord[1] for coord in coordinates)
|
||||||
|
count = len(coordinates)
|
||||||
|
return lat_sum / count, lon_sum / count
|
||||||
|
|
||||||
|
def _calculate_area(self, coordinates: List[Tuple[float, float]]) -> float:
|
||||||
|
"""Calculate area using shoelace formula (approximate for Berlin)."""
|
||||||
|
if len(coordinates) < 3:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Convert to approximate meters for Berlin latitude
|
||||||
|
lat_to_m = 111000 # meters per degree latitude
|
||||||
|
lon_to_m = 111000 * math.cos(math.radians(52.5)) # adjust for Berlin
|
||||||
|
|
||||||
|
# Convert to meters
|
||||||
|
coords_m = [(lat * lat_to_m, lon * lon_to_m) for lat, lon in coordinates]
|
||||||
|
|
||||||
|
# Shoelace formula
|
||||||
|
area = 0
|
||||||
|
n = len(coords_m)
|
||||||
|
|
||||||
|
for i in range(n):
|
||||||
|
j = (i + 1) % n
|
||||||
|
area += coords_m[i][0] * coords_m[j][1]
|
||||||
|
area -= coords_m[j][0] * coords_m[i][1]
|
||||||
|
|
||||||
|
return abs(area) / 2
|
||||||
|
|
||||||
|
def _estimate_district(self, lat: float, lng: float) -> str:
|
||||||
|
"""Estimate Berlin district from coordinates."""
|
||||||
|
# Simplified district boundaries
|
||||||
|
if lat > 52.55:
|
||||||
|
return "Pankow" if lng < 13.45 else "Lichtenberg"
|
||||||
|
elif lat > 52.52:
|
||||||
|
if lng < 13.25:
|
||||||
|
return "Charlottenburg-Wilmersdorf"
|
||||||
|
elif lng < 13.42:
|
||||||
|
return "Mitte"
|
||||||
|
else:
|
||||||
|
return "Friedrichshain-Kreuzberg"
|
||||||
|
elif lat > 52.45:
|
||||||
|
if lng < 13.25:
|
||||||
|
return "Steglitz-Zehlendorf"
|
||||||
|
elif lng < 13.42:
|
||||||
|
return "Tempelhof-Schöneberg"
|
||||||
|
else:
|
||||||
|
return "Neukölln"
|
||||||
|
else:
|
||||||
|
return "Treptow-Köpenick"
|
||||||
|
|
||||||
|
async def enhance_green_space_with_real_data(self, space_data: Dict):
|
||||||
|
"""Enhance green space with real tree and toilet data."""
|
||||||
|
try:
|
||||||
|
lat = space_data['lat']
|
||||||
|
lng = space_data['lng']
|
||||||
|
area_sqm = space_data['area_sqm']
|
||||||
|
|
||||||
|
# Adaptive radius based on space size
|
||||||
|
radius = min(400, max(150, int((area_sqm ** 0.5) * 0.8)))
|
||||||
|
|
||||||
|
# Get real data using existing services
|
||||||
|
tree_response = await self.tree_service.get_trees_near_location(
|
||||||
|
lat, lng, radius_m=radius
|
||||||
|
)
|
||||||
|
|
||||||
|
nearby_toilets = await self.berlin_data.get_toilets_near_point(lat, lng, 600)
|
||||||
|
|
||||||
|
# Calculate scores
|
||||||
|
toilet_score = self._score_toilet_accessibility(nearby_toilets)
|
||||||
|
space_type = self._map_to_space_type(space_data.get('fclass', ''))
|
||||||
|
|
||||||
|
enhanced_space = {
|
||||||
|
"id": space_data['id'],
|
||||||
|
"name": space_data['name'],
|
||||||
|
"description": f"Significant Berlin {space_data.get('fclass', 'green space')} from OSM data",
|
||||||
|
"type": space_type,
|
||||||
|
"coordinates": {
|
||||||
|
"lat": float(lat),
|
||||||
|
"lng": float(lng)
|
||||||
|
},
|
||||||
|
"neighborhood": space_data.get('district', 'Unknown'),
|
||||||
|
"area_sqm": area_sqm,
|
||||||
|
"perimeter_m": int(4 * (area_sqm ** 0.5)), # Rough estimate
|
||||||
|
|
||||||
|
# Environmental features from real tree data
|
||||||
|
"environmental": {
|
||||||
|
"tree_coverage_percent": max(5, int(tree_response.shade_analysis.estimated_shade_coverage)),
|
||||||
|
"shade_quality": tree_response.shade_analysis.shade_quality_score,
|
||||||
|
"noise_level": self._estimate_noise_level(space_data),
|
||||||
|
"wildlife_diversity_score": tree_response.metrics.species_diversity_score,
|
||||||
|
"water_features": self._detect_water_features(space_data),
|
||||||
|
"natural_surface_percent": self._estimate_natural_surface(space_data.get('fclass', ''))
|
||||||
|
},
|
||||||
|
|
||||||
|
# Real tree metrics from your existing service
|
||||||
|
"tree_data": {
|
||||||
|
"total_trees": tree_response.metrics.total_trees,
|
||||||
|
"trees_per_hectare": tree_response.metrics.trees_per_hectare,
|
||||||
|
"species_count": len(tree_response.metrics.dominant_species),
|
||||||
|
"species_diversity_score": tree_response.metrics.species_diversity_score,
|
||||||
|
"mature_trees_count": tree_response.metrics.mature_trees_count,
|
||||||
|
"young_trees_count": tree_response.metrics.young_trees_count,
|
||||||
|
"average_tree_age": tree_response.metrics.average_tree_age,
|
||||||
|
"average_height": tree_response.metrics.average_height,
|
||||||
|
"average_crown_diameter": tree_response.metrics.average_crown_diameter,
|
||||||
|
"shade_coverage_percent": tree_response.metrics.shade_coverage_percent,
|
||||||
|
"dominant_species": tree_response.metrics.dominant_species[:3] # Top 3
|
||||||
|
},
|
||||||
|
|
||||||
|
# Real toilet accessibility from your existing service
|
||||||
|
"toilet_accessibility": {
|
||||||
|
"nearby_toilets_count": len(nearby_toilets),
|
||||||
|
"accessibility_score": toilet_score,
|
||||||
|
"nearest_distance_m": nearby_toilets[0]['distance_meters'] if nearby_toilets else None,
|
||||||
|
"free_toilets_count": len([t for t in nearby_toilets if t.get('is_free', False)]),
|
||||||
|
"accessible_toilets_count": len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
|
||||||
|
},
|
||||||
|
|
||||||
|
# Accessibility features
|
||||||
|
"accessibility": {
|
||||||
|
"wheelchair_accessible": True,
|
||||||
|
"public_transport_score": self._estimate_transport_score(space_data.get('district', '')),
|
||||||
|
"cycling_infrastructure": area_sqm > 5000,
|
||||||
|
"parking_availability": 3 if area_sqm > 50000 else 2,
|
||||||
|
"lighting_quality": 3 if 'mitte' in space_data.get('district', '').lower() else 2
|
||||||
|
},
|
||||||
|
|
||||||
|
"recreation": {
|
||||||
|
"playground_quality": self._estimate_playground_quality(space_data),
|
||||||
|
"sports_facilities": self._estimate_sports_facilities(space_data),
|
||||||
|
"running_paths": area_sqm > 8000,
|
||||||
|
"cycling_paths": area_sqm > 15000,
|
||||||
|
"dog_friendly": True,
|
||||||
|
"bbq_allowed": self._allows_bbq(space_data)
|
||||||
|
},
|
||||||
|
|
||||||
|
# OSM metadata
|
||||||
|
"osm_metadata": {
|
||||||
|
"osm_id": space_data.get('osm_id'),
|
||||||
|
"has_official_name": space_data.get('has_name', False),
|
||||||
|
"tags": space_data.get('osm_tags', {}),
|
||||||
|
"source": "filtered_osm_extract"
|
||||||
|
},
|
||||||
|
|
||||||
|
"last_updated": datetime.now().isoformat(),
|
||||||
|
"data_sources": ["filtered_osm_extract", "berlin_tree_cadastre", "berlin_toilets"],
|
||||||
|
"confidence_score": 95 if space_data.get('has_name') else 85
|
||||||
|
}
|
||||||
|
|
||||||
|
return enhanced_space
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error enhancing {space_data['name']}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _score_toilet_accessibility(self, nearby_toilets: List[Dict]) -> int:
|
||||||
|
if not nearby_toilets:
|
||||||
|
return 25
|
||||||
|
|
||||||
|
nearest = nearby_toilets[0]['distance_meters']
|
||||||
|
if nearest <= 200:
|
||||||
|
score = 95
|
||||||
|
elif nearest <= 400:
|
||||||
|
score = 80
|
||||||
|
elif nearest <= 600:
|
||||||
|
score = 65
|
||||||
|
else:
|
||||||
|
score = 45
|
||||||
|
|
||||||
|
# Quality bonuses
|
||||||
|
free = len([t for t in nearby_toilets if t.get('is_free', False)])
|
||||||
|
accessible = len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
|
||||||
|
score += min(10, free * 5 + accessible * 3)
|
||||||
|
|
||||||
|
return min(100, score)
|
||||||
|
|
||||||
|
def _map_to_space_type(self, fclass: str) -> str:
|
||||||
|
mapping = {
|
||||||
|
'park': 'PARK', 'forest': 'FOREST', 'garden': 'GARDEN', 'wood': 'FOREST',
|
||||||
|
'nature_reserve': 'NATURE_RESERVE', 'playground': 'PLAYGROUND',
|
||||||
|
'meadow': 'MEADOW', 'grass': 'GRASS', 'recreation_ground': 'PARK',
|
||||||
|
'common': 'PARK', 'village_green': 'GRASS', 'heath': 'HEATH'
|
||||||
|
}
|
||||||
|
return mapping.get(fclass, 'PARK')
|
||||||
|
|
||||||
|
def _detect_water_features(self, space_data: Dict) -> bool:
|
||||||
|
name = space_data.get('name', '').lower()
|
||||||
|
tags = space_data.get('osm_tags', {})
|
||||||
|
|
||||||
|
water_keywords = ['see', 'teich', 'pond', 'lake', 'bach', 'spree', 'wasser', 'fluss']
|
||||||
|
return (any(keyword in name for keyword in water_keywords) or
|
||||||
|
'water' in str(tags.values()).lower())
|
||||||
|
|
||||||
|
def _estimate_noise_level(self, space_data: Dict) -> int:
|
||||||
|
fclass = space_data.get('fclass', '')
|
||||||
|
district = space_data.get('district', '')
|
||||||
|
area = space_data.get('area_sqm', 0)
|
||||||
|
|
||||||
|
base = {'forest': 1, 'wood': 1, 'nature_reserve': 1, 'heath': 1,
|
||||||
|
'meadow': 2, 'grass': 2, 'park': 2, 'garden': 2,
|
||||||
|
'playground': 3, 'recreation_ground': 3}.get(fclass, 2)
|
||||||
|
|
||||||
|
# Central districts are noisier
|
||||||
|
if any(busy in district.lower() for busy in ['mitte', 'kreuzberg', 'friedrichshain']):
|
||||||
|
base += 1
|
||||||
|
|
||||||
|
# Larger spaces are usually quieter inside
|
||||||
|
if area > 50000:
|
||||||
|
base = max(1, base - 1)
|
||||||
|
|
||||||
|
return min(5, base)
|
||||||
|
|
||||||
|
def _estimate_natural_surface(self, fclass: str) -> int:
|
||||||
|
return {'forest': 95, 'wood': 95, 'nature_reserve': 95, 'heath': 90,
|
||||||
|
'meadow': 95, 'grass': 90, 'park': 80, 'garden': 70,
|
||||||
|
'playground': 45, 'recreation_ground': 75}.get(fclass, 75)
|
||||||
|
|
||||||
|
def _estimate_transport_score(self, district: str) -> int:
|
||||||
|
district_lower = district.lower()
|
||||||
|
if 'mitte' in district_lower:
|
||||||
|
return 5
|
||||||
|
elif any(name in district_lower for name in ['charlottenburg', 'kreuzberg', 'friedrichshain', 'pankow']):
|
||||||
|
return 4
|
||||||
|
else:
|
||||||
|
return 3
|
||||||
|
|
||||||
|
def _estimate_playground_quality(self, space_data: Dict) -> int:
|
||||||
|
fclass = space_data.get('fclass', '')
|
||||||
|
tags = space_data.get('osm_tags', {})
|
||||||
|
area = space_data.get('area_sqm', 0)
|
||||||
|
|
||||||
|
if fclass == 'playground':
|
||||||
|
return 85
|
||||||
|
elif 'playground' in str(tags.values()).lower():
|
||||||
|
return 75
|
||||||
|
elif fclass == 'park':
|
||||||
|
# Larger parks more likely to have good playgrounds
|
||||||
|
return 60 if area > 10000 else 45
|
||||||
|
else:
|
||||||
|
return 30
|
||||||
|
|
||||||
|
def _estimate_sports_facilities(self, space_data: Dict) -> bool:
|
||||||
|
fclass = space_data.get('fclass', '')
|
||||||
|
tags = space_data.get('osm_tags', {})
|
||||||
|
name = space_data.get('name', '').lower()
|
||||||
|
area = space_data.get('area_sqm', 0)
|
||||||
|
|
||||||
|
# Explicit indicators
|
||||||
|
if (fclass == 'recreation_ground' or
|
||||||
|
'sport' in str(tags.values()).lower() or
|
||||||
|
any(term in name for term in ['sport', 'football', 'tennis', 'recreation'])):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Large parks often have sports facilities
|
||||||
|
return fclass == 'park' and area > 20000
|
||||||
|
|
||||||
|
def _allows_bbq(self, space_data: Dict) -> bool:
|
||||||
|
fclass = space_data.get('fclass', '')
|
||||||
|
tags = space_data.get('osm_tags', {})
|
||||||
|
area = space_data.get('area_sqm', 0)
|
||||||
|
|
||||||
|
# Check explicit BBQ tags
|
||||||
|
bbq_tag = tags.get('bbq', '').lower()
|
||||||
|
if bbq_tag == 'yes':
|
||||||
|
return True
|
||||||
|
elif bbq_tag == 'no':
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Default based on type and size
|
||||||
|
return fclass in ['park', 'recreation_ground'] and area > 5000
|
||||||
|
|
||||||
|
async def process_filtered_green_spaces(self):
|
||||||
|
"""Main processing pipeline for filtered green spaces."""
|
||||||
|
print("🌳 Processing Significant Berlin Green Spaces")
|
||||||
|
print("=" * 55)
|
||||||
|
print(f"• Filtering for spaces ≥ {self.min_area_sqm:,} sqm")
|
||||||
|
print(f"• Processing up to {self.max_spaces} significant spaces")
|
||||||
|
print(f"• Enhancing with real Berlin tree + toilet data")
|
||||||
|
print("=" * 55)
|
||||||
|
|
||||||
|
# Step 1: Parse and filter OSM data
|
||||||
|
filtered_spaces = self.parse_and_filter_osm_data()
|
||||||
|
|
||||||
|
if not filtered_spaces:
|
||||||
|
print("❌ No significant green spaces found")
|
||||||
|
return []
|
||||||
|
|
||||||
|
print(f"\n🔧 Enhancing {len(filtered_spaces)} significant spaces with real data...")
|
||||||
|
|
||||||
|
# Step 2: Enhance with real data
|
||||||
|
enhanced_spaces = []
|
||||||
|
|
||||||
|
for i, space_data in enumerate(filtered_spaces, 1):
|
||||||
|
area_ha = space_data['area_sqm'] / 10000
|
||||||
|
print(f"[{i:3d}/{len(filtered_spaces)}] {space_data['name'][:40]:40} ({area_ha:.1f} ha)")
|
||||||
|
|
||||||
|
result = await self.enhance_green_space_with_real_data(space_data)
|
||||||
|
if result:
|
||||||
|
enhanced_spaces.append(result)
|
||||||
|
trees = result["tree_data"]["total_trees"]
|
||||||
|
toilets = result["toilet_accessibility"]["nearby_toilets_count"]
|
||||||
|
print(f" ✅ {trees:3d} trees, {toilets} toilets")
|
||||||
|
else:
|
||||||
|
print(f" ❌ Enhancement failed")
|
||||||
|
|
||||||
|
# Progress update every 50 spaces
|
||||||
|
if i % 50 == 0:
|
||||||
|
print(f"\n 📊 Progress: {len(enhanced_spaces)}/{i} enhanced successfully")
|
||||||
|
|
||||||
|
# Small delay to be nice to services
|
||||||
|
await asyncio.sleep(0.1)
|
||||||
|
|
||||||
|
print(f"\n🎉 Successfully enhanced {len(enhanced_spaces)} significant green spaces!")
|
||||||
|
return enhanced_spaces
|
||||||
|
|
||||||
|
def save_enhanced_data(self, enhanced_spaces: List[Dict]):
|
||||||
|
"""Save the filtered and enhanced dataset."""
|
||||||
|
output_file = self.processed_dir / "significant_berlin_green_spaces.json"
|
||||||
|
|
||||||
|
# Calculate comprehensive statistics
|
||||||
|
with_trees = len([s for s in enhanced_spaces if s["tree_data"]["total_trees"] > 0])
|
||||||
|
with_toilets = len([s for s in enhanced_spaces if s["toilet_accessibility"]["nearby_toilets_count"] > 0])
|
||||||
|
total_trees = sum(s["tree_data"]["total_trees"] for s in enhanced_spaces)
|
||||||
|
total_area = sum(s["area_sqm"] for s in enhanced_spaces)
|
||||||
|
|
||||||
|
# Named vs unnamed spaces
|
||||||
|
named_spaces = len([s for s in enhanced_spaces if s["osm_metadata"]["has_official_name"]])
|
||||||
|
|
||||||
|
# Area distribution
|
||||||
|
large_spaces = len([s for s in enhanced_spaces if s["area_sqm"] > 50000]) # > 5 hectares
|
||||||
|
medium_spaces = len([s for s in enhanced_spaces if 10000 <= s["area_sqm"] <= 50000]) # 1-5 hectares
|
||||||
|
small_spaces = len([s for s in enhanced_spaces if s["area_sqm"] < 10000]) # < 1 hectare
|
||||||
|
|
||||||
|
# District breakdown
|
||||||
|
by_district = {}
|
||||||
|
for space in enhanced_spaces:
|
||||||
|
district = space['neighborhood']
|
||||||
|
if district not in by_district:
|
||||||
|
by_district[district] = []
|
||||||
|
by_district[district].append(space)
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"green_spaces": enhanced_spaces,
|
||||||
|
"total_count": len(enhanced_spaces),
|
||||||
|
"last_updated": datetime.now().isoformat(),
|
||||||
|
"data_sources": [
|
||||||
|
"filtered_osm_extract_significant_spaces_only",
|
||||||
|
"berlin_tree_cadastre_via_street_tree_service",
|
||||||
|
"berlin_toilet_locations_via_berlin_data_service"
|
||||||
|
],
|
||||||
|
"processing_info": {
|
||||||
|
"filtering_criteria": {
|
||||||
|
"minimum_area_sqm": self.min_area_sqm,
|
||||||
|
"maximum_spaces_processed": self.max_spaces,
|
||||||
|
"includes_only_significant_spaces": True
|
||||||
|
},
|
||||||
|
"enhancement_method": "real_berlin_tree_and_toilet_data",
|
||||||
|
"coordinate_system": "WGS84"
|
||||||
|
},
|
||||||
|
"summary_stats": {
|
||||||
|
"total_spaces": len(enhanced_spaces),
|
||||||
|
"spaces_with_tree_data": with_trees,
|
||||||
|
"spaces_with_toilet_data": with_toilets,
|
||||||
|
"total_trees_analyzed": total_trees,
|
||||||
|
"total_area_hectares": round(total_area / 10000, 1),
|
||||||
|
"coverage_rates": {
|
||||||
|
"tree_data": f"{round((with_trees/len(enhanced_spaces))*100, 1)}%",
|
||||||
|
"toilet_data": f"{round((with_toilets/len(enhanced_spaces))*100, 1)}%"
|
||||||
|
},
|
||||||
|
"space_categories": {
|
||||||
|
"named_spaces": named_spaces,
|
||||||
|
"unnamed_spaces": len(enhanced_spaces) - named_spaces,
|
||||||
|
"large_spaces_over_5ha": large_spaces,
|
||||||
|
"medium_spaces_1_5ha": medium_spaces,
|
||||||
|
"smaller_spaces_under_1ha": small_spaces
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"district_breakdown": {
|
||||||
|
district: len(spaces) for district, spaces in by_district.items()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(output_file, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
print(f"\n📁 Comprehensive dataset saved: {output_file}")
|
||||||
|
print(f"\n📊 Final Statistics:")
|
||||||
|
print(f" 🌳 {len(enhanced_spaces)} significant green spaces")
|
||||||
|
print(f" 📛 {named_spaces} with official names, {len(enhanced_spaces) - named_spaces} discovered areas")
|
||||||
|
print(f" 🌲 {with_trees} spaces with tree data ({round((with_trees/len(enhanced_spaces))*100)}%)")
|
||||||
|
print(f" 🚻 {with_toilets} spaces with toilet data ({round((with_toilets/len(enhanced_spaces))*100)}%)")
|
||||||
|
print(f" 🌿 {total_trees:,} total trees analyzed")
|
||||||
|
print(f" 📏 {round(total_area/10000, 1)} hectares total area")
|
||||||
|
|
||||||
|
print(f"\n🏙️ District Distribution:")
|
||||||
|
for district, spaces in sorted(by_district.items(), key=lambda x: len(x[1]), reverse=True):
|
||||||
|
print(f" • {district}: {len(spaces)} spaces")
|
||||||
|
|
||||||
|
print(f"\n📈 Size Categories:")
|
||||||
|
print(f" • Large (>5 ha): {large_spaces} spaces")
|
||||||
|
print(f" • Medium (1-5 ha): {medium_spaces} spaces")
|
||||||
|
print(f" • Smaller (<1 ha): {small_spaces} spaces")
|
||||||
|
|
||||||
|
print(f"\n✨ This dataset provides comprehensive coverage of Berlin's")
|
||||||
|
print(f" significant green spaces with real tree and toilet data!")
|
||||||
|
|
||||||
|
return output_file
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
processor = FilteredOSMProcessor()
|
||||||
|
|
||||||
|
try:
|
||||||
|
enhanced_spaces = await processor.process_filtered_green_spaces()
|
||||||
|
|
||||||
|
if enhanced_spaces:
|
||||||
|
processor.save_enhanced_data(enhanced_spaces)
|
||||||
|
print(f"\n🎯 SUCCESS! Ready to use in your API for accurate personality scoring!")
|
||||||
|
else:
|
||||||
|
print("❌ No spaces were successfully processed.")
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\n⚠️ Process interrupted by user")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
|
@ -0,0 +1,613 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Process Berlin green spaces from local OSM data file.
|
||||||
|
Downloads Berlin OSM extract once, then processes locally without API dependencies.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
import asyncio
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import List, Dict, Optional, Tuple
|
||||||
|
import sys
|
||||||
|
import gzip
|
||||||
|
import math
|
||||||
|
|
||||||
|
# Add the app directory to Python path to import services
|
||||||
|
sys.path.append(str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from app.services.street_tree_service import StreetTreeService
|
||||||
|
from app.services.berlin_data_service import BerlinDataService
|
||||||
|
|
||||||
|
|
||||||
|
class LocalOSMProcessor:
|
||||||
|
def __init__(self, data_dir: str = "app/data"):
|
||||||
|
self.data_dir = Path(data_dir)
|
||||||
|
self.raw_dir = self.data_dir / "osm-raw"
|
||||||
|
self.processed_dir = self.data_dir / "processed"
|
||||||
|
|
||||||
|
# Create directories
|
||||||
|
self.raw_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.processed_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Initialize existing services
|
||||||
|
self.tree_service = StreetTreeService()
|
||||||
|
self.berlin_data = BerlinDataService()
|
||||||
|
|
||||||
|
# Berlin bounding box for filtering
|
||||||
|
self.berlin_bbox = {
|
||||||
|
'min_lat': 52.3370, 'max_lat': 52.6755,
|
||||||
|
'min_lon': 13.0882, 'max_lon': 13.7611
|
||||||
|
}
|
||||||
|
|
||||||
|
def download_berlin_osm_extract(self):
|
||||||
|
"""Download Berlin OSM extract from Geofabrik."""
|
||||||
|
osm_file = self.raw_dir / "berlin-latest.osm.pbf"
|
||||||
|
|
||||||
|
if osm_file.exists():
|
||||||
|
print(f"✅ OSM file already exists: {osm_file}")
|
||||||
|
return osm_file
|
||||||
|
|
||||||
|
# Try PBF format first (smaller), fallback to XML
|
||||||
|
urls = [
|
||||||
|
"https://download.geofabrik.de/europe/germany/berlin-latest.osm.pbf",
|
||||||
|
"https://download.geofabrik.de/europe/germany/berlin-latest.osm.bz2"
|
||||||
|
]
|
||||||
|
|
||||||
|
for url in urls:
|
||||||
|
try:
|
||||||
|
print(f"Downloading Berlin OSM data from {url}")
|
||||||
|
print("This is a one-time download (~50MB)...")
|
||||||
|
|
||||||
|
response = requests.get(url, stream=True, timeout=300)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
filename = url.split('/')[-1]
|
||||||
|
local_file = self.raw_dir / filename
|
||||||
|
|
||||||
|
# Download with progress
|
||||||
|
total_size = int(response.headers.get('content-length', 0))
|
||||||
|
downloaded = 0
|
||||||
|
|
||||||
|
with open(local_file, 'wb') as f:
|
||||||
|
for chunk in response.iter_content(chunk_size=8192):
|
||||||
|
if chunk:
|
||||||
|
f.write(chunk)
|
||||||
|
downloaded += len(chunk)
|
||||||
|
if total_size > 0:
|
||||||
|
percent = (downloaded / total_size) * 100
|
||||||
|
print(f"\rDownload progress: {percent:.1f}%", end="")
|
||||||
|
|
||||||
|
print(f"\n✅ Downloaded: {local_file}")
|
||||||
|
return local_file
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Failed to download {url}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
raise Exception("Could not download OSM data from any source")
|
||||||
|
|
||||||
|
def download_simple_osm_extract(self):
|
||||||
|
"""Download simpler XML format if PBF tools not available."""
|
||||||
|
osm_file = self.raw_dir / "berlin_green_spaces.osm"
|
||||||
|
|
||||||
|
if osm_file.exists():
|
||||||
|
print(f"✅ OSM file already exists: {osm_file}")
|
||||||
|
return osm_file
|
||||||
|
|
||||||
|
# Use Overpass API to get a one-time export of green spaces
|
||||||
|
print("Downloading Berlin green spaces extract...")
|
||||||
|
|
||||||
|
overpass_url = "http://overpass-api.de/api/interpreter"
|
||||||
|
|
||||||
|
# Query for all green spaces in Berlin (one-time download)
|
||||||
|
query = f"""
|
||||||
|
[out:xml][timeout:120];
|
||||||
|
(
|
||||||
|
way["leisure"~"^(park|garden|nature_reserve|recreation_ground|playground|common)$"]
|
||||||
|
({self.berlin_bbox['min_lat']},{self.berlin_bbox['min_lon']},{self.berlin_bbox['max_lat']},{self.berlin_bbox['max_lon']});
|
||||||
|
way["landuse"~"^(forest|grass|meadow|recreation_ground|village_green|allotments)$"]
|
||||||
|
({self.berlin_bbox['min_lat']},{self.berlin_bbox['min_lon']},{self.berlin_bbox['max_lat']},{self.berlin_bbox['max_lon']});
|
||||||
|
way["natural"~"^(forest|grass|meadow|scrub|heath|wood)$"]
|
||||||
|
({self.berlin_bbox['min_lat']},{self.berlin_bbox['min_lon']},{self.berlin_bbox['max_lat']},{self.berlin_bbox['max_lon']});
|
||||||
|
);
|
||||||
|
out geom meta;
|
||||||
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.post(overpass_url, data=query, timeout=180)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
with open(osm_file, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(response.text)
|
||||||
|
|
||||||
|
print(f"✅ Downloaded green spaces extract: {osm_file}")
|
||||||
|
return osm_file
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Failed to download OSM extract: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def parse_osm_xml(self, osm_file: Path) -> List[Dict]:
|
||||||
|
"""Parse OSM XML file to extract green spaces."""
|
||||||
|
print(f"Parsing OSM data from {osm_file}...")
|
||||||
|
|
||||||
|
green_spaces = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Handle different file formats
|
||||||
|
if osm_file.suffix == '.gz':
|
||||||
|
with gzip.open(osm_file, 'rt', encoding='utf-8') as f:
|
||||||
|
tree = ET.parse(f)
|
||||||
|
else:
|
||||||
|
tree = ET.parse(osm_file)
|
||||||
|
|
||||||
|
root = tree.getroot()
|
||||||
|
|
||||||
|
# Parse ways (areas)
|
||||||
|
ways = root.findall('.//way')
|
||||||
|
print(f"Found {len(ways)} ways in OSM data")
|
||||||
|
|
||||||
|
for way in ways:
|
||||||
|
try:
|
||||||
|
processed_space = self._process_osm_way(way, root)
|
||||||
|
if processed_space:
|
||||||
|
green_spaces.append(processed_space)
|
||||||
|
except Exception as e:
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(f"✅ Extracted {len(green_spaces)} green spaces from OSM data")
|
||||||
|
return green_spaces
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error parsing OSM file: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _process_osm_way(self, way, root) -> Optional[Dict]:
|
||||||
|
"""Process a single OSM way into green space format."""
|
||||||
|
# Get tags
|
||||||
|
tags = {}
|
||||||
|
for tag in way.findall('tag'):
|
||||||
|
tags[tag.get('k')] = tag.get('v')
|
||||||
|
|
||||||
|
# Check if it's a green space
|
||||||
|
green_space_type = self._get_green_space_type(tags)
|
||||||
|
if not green_space_type:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Get node references
|
||||||
|
nd_refs = [nd.get('ref') for nd in way.findall('nd')]
|
||||||
|
if len(nd_refs) < 3: # Need at least 3 points for an area
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Find node coordinates
|
||||||
|
coordinates = []
|
||||||
|
for nd_ref in nd_refs:
|
||||||
|
node = root.find(f".//node[@id='{nd_ref}']")
|
||||||
|
if node is not None:
|
||||||
|
lat = float(node.get('lat'))
|
||||||
|
lon = float(node.get('lon'))
|
||||||
|
|
||||||
|
# Check if within Berlin bounds
|
||||||
|
if (self.berlin_bbox['min_lat'] <= lat <= self.berlin_bbox['max_lat'] and
|
||||||
|
self.berlin_bbox['min_lon'] <= lon <= self.berlin_bbox['max_lon']):
|
||||||
|
coordinates.append((lat, lon))
|
||||||
|
|
||||||
|
if len(coordinates) < 3:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Calculate centroid and area
|
||||||
|
centroid_lat, centroid_lon = self._calculate_centroid(coordinates)
|
||||||
|
area_sqm = self._calculate_area(coordinates)
|
||||||
|
|
||||||
|
# Skip very small areas
|
||||||
|
if area_sqm < 500:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Get name
|
||||||
|
name = tags.get('name', f"{green_space_type.title()} near {centroid_lat:.3f}, {centroid_lon:.3f}")
|
||||||
|
|
||||||
|
# Estimate district
|
||||||
|
district = self._estimate_district(centroid_lat, centroid_lon)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': f"osm_way_{way.get('id')}",
|
||||||
|
'name': name,
|
||||||
|
'fclass': green_space_type,
|
||||||
|
'lat': centroid_lat,
|
||||||
|
'lng': centroid_lon,
|
||||||
|
'area_sqm': int(area_sqm),
|
||||||
|
'district': district,
|
||||||
|
'osm_tags': tags,
|
||||||
|
'osm_id': way.get('id')
|
||||||
|
}
|
||||||
|
|
||||||
|
def _get_green_space_type(self, tags: Dict) -> Optional[str]:
|
||||||
|
"""Determine if tags represent a green space and what type."""
|
||||||
|
# Check leisure tags
|
||||||
|
leisure = tags.get('leisure', '')
|
||||||
|
if leisure in ['park', 'garden', 'nature_reserve', 'recreation_ground',
|
||||||
|
'playground', 'common', 'golf_course']:
|
||||||
|
return leisure
|
||||||
|
|
||||||
|
# Check landuse tags
|
||||||
|
landuse = tags.get('landuse', '')
|
||||||
|
if landuse in ['forest', 'grass', 'meadow', 'recreation_ground',
|
||||||
|
'village_green', 'allotments']:
|
||||||
|
return landuse
|
||||||
|
|
||||||
|
# Check natural tags
|
||||||
|
natural = tags.get('natural', '')
|
||||||
|
if natural in ['forest', 'grass', 'meadow', 'scrub', 'heath', 'wood']:
|
||||||
|
return natural
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _calculate_centroid(self, coordinates: List[Tuple[float, float]]) -> Tuple[float, float]:
|
||||||
|
"""Calculate centroid of polygon."""
|
||||||
|
lat_sum = sum(coord[0] for coord in coordinates)
|
||||||
|
lon_sum = sum(coord[1] for coord in coordinates)
|
||||||
|
count = len(coordinates)
|
||||||
|
|
||||||
|
return lat_sum / count, lon_sum / count
|
||||||
|
|
||||||
|
def _calculate_area(self, coordinates: List[Tuple[float, float]]) -> float:
|
||||||
|
"""Calculate area of polygon using shoelace formula."""
|
||||||
|
if len(coordinates) < 3:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Convert to approximate meters for Berlin
|
||||||
|
lat_to_m = 111000 # meters per degree latitude
|
||||||
|
lon_to_m = 111000 * math.cos(math.radians(52.5)) # adjust for Berlin latitude
|
||||||
|
|
||||||
|
# Convert coordinates to meters
|
||||||
|
coords_m = [(lat * lat_to_m, lon * lon_to_m) for lat, lon in coordinates]
|
||||||
|
|
||||||
|
# Shoelace formula
|
||||||
|
area = 0
|
||||||
|
n = len(coords_m)
|
||||||
|
|
||||||
|
for i in range(n):
|
||||||
|
j = (i + 1) % n
|
||||||
|
area += coords_m[i][0] * coords_m[j][1]
|
||||||
|
area -= coords_m[j][0] * coords_m[i][1]
|
||||||
|
|
||||||
|
return abs(area) / 2
|
||||||
|
|
||||||
|
def _estimate_district(self, lat: float, lng: float) -> str:
|
||||||
|
"""Rough district estimation from coordinates."""
|
||||||
|
# Very rough Berlin district boundaries
|
||||||
|
if lat > 52.55:
|
||||||
|
return "Pankow" if lng < 13.45 else "Lichtenberg"
|
||||||
|
elif lat > 52.52:
|
||||||
|
if lng < 13.25:
|
||||||
|
return "Charlottenburg-Wilmersdorf"
|
||||||
|
elif lng < 13.42:
|
||||||
|
return "Mitte"
|
||||||
|
else:
|
||||||
|
return "Friedrichshain-Kreuzberg"
|
||||||
|
elif lat > 52.45:
|
||||||
|
if lng < 13.25:
|
||||||
|
return "Steglitz-Zehlendorf"
|
||||||
|
elif lng < 13.42:
|
||||||
|
return "Tempelhof-Schöneberg"
|
||||||
|
else:
|
||||||
|
return "Neukölln"
|
||||||
|
else:
|
||||||
|
return "Treptow-Köpenick"
|
||||||
|
|
||||||
|
async def enhance_green_space_with_real_data(self, space_data: Dict):
|
||||||
|
"""Enhance green space with real tree and toilet data."""
|
||||||
|
try:
|
||||||
|
lat = space_data['lat']
|
||||||
|
lng = space_data['lng']
|
||||||
|
area_sqm = space_data['area_sqm']
|
||||||
|
|
||||||
|
print(f"Enhancing {space_data['name']} ({space_data['district']})...")
|
||||||
|
|
||||||
|
# Adaptive radius
|
||||||
|
radius = min(350, max(100, int((area_sqm ** 0.5) * 0.7)))
|
||||||
|
|
||||||
|
# Get real data using existing services
|
||||||
|
tree_response = await self.tree_service.get_trees_near_location(
|
||||||
|
lat, lng, radius_m=radius
|
||||||
|
)
|
||||||
|
|
||||||
|
nearby_toilets = await self.berlin_data.get_toilets_near_point(lat, lng, 600)
|
||||||
|
|
||||||
|
# Calculate scores
|
||||||
|
toilet_score = self._score_toilet_accessibility(nearby_toilets)
|
||||||
|
space_type = self._map_to_space_type(space_data.get('fclass', ''))
|
||||||
|
|
||||||
|
enhanced_space = {
|
||||||
|
"id": space_data['id'],
|
||||||
|
"name": space_data['name'],
|
||||||
|
"description": f"Berlin {space_data.get('fclass', 'green space')} from local OSM data",
|
||||||
|
"type": space_type,
|
||||||
|
"coordinates": {
|
||||||
|
"lat": float(lat),
|
||||||
|
"lng": float(lng)
|
||||||
|
},
|
||||||
|
"neighborhood": space_data.get('district', 'Unknown'),
|
||||||
|
"area_sqm": area_sqm,
|
||||||
|
"perimeter_m": int(4 * (area_sqm ** 0.5)),
|
||||||
|
|
||||||
|
# Environmental features from real tree data
|
||||||
|
"environmental": {
|
||||||
|
"tree_coverage_percent": max(5, int(tree_response.shade_analysis.estimated_shade_coverage)),
|
||||||
|
"shade_quality": tree_response.shade_analysis.shade_quality_score,
|
||||||
|
"noise_level": self._estimate_noise_level(space_data),
|
||||||
|
"wildlife_diversity_score": tree_response.metrics.species_diversity_score,
|
||||||
|
"water_features": self._detect_water_features(space_data),
|
||||||
|
"natural_surface_percent": self._estimate_natural_surface(space_data.get('fclass', ''))
|
||||||
|
},
|
||||||
|
|
||||||
|
# Real tree metrics
|
||||||
|
"tree_data": {
|
||||||
|
"total_trees": tree_response.metrics.total_trees,
|
||||||
|
"trees_per_hectare": tree_response.metrics.trees_per_hectare,
|
||||||
|
"species_count": len(tree_response.metrics.dominant_species),
|
||||||
|
"species_diversity_score": tree_response.metrics.species_diversity_score,
|
||||||
|
"mature_trees_count": tree_response.metrics.mature_trees_count,
|
||||||
|
"young_trees_count": tree_response.metrics.young_trees_count,
|
||||||
|
"average_tree_age": tree_response.metrics.average_tree_age,
|
||||||
|
"average_height": tree_response.metrics.average_height,
|
||||||
|
"average_crown_diameter": tree_response.metrics.average_crown_diameter,
|
||||||
|
"shade_coverage_percent": tree_response.metrics.shade_coverage_percent,
|
||||||
|
"dominant_species": tree_response.metrics.dominant_species[:3]
|
||||||
|
},
|
||||||
|
|
||||||
|
# Real toilet accessibility
|
||||||
|
"toilet_accessibility": {
|
||||||
|
"nearby_toilets_count": len(nearby_toilets),
|
||||||
|
"accessibility_score": toilet_score,
|
||||||
|
"nearest_distance_m": nearby_toilets[0]['distance_meters'] if nearby_toilets else None,
|
||||||
|
"free_toilets_count": len([t for t in nearby_toilets if t.get('is_free', False)]),
|
||||||
|
"accessible_toilets_count": len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
|
||||||
|
},
|
||||||
|
|
||||||
|
# Standard features
|
||||||
|
"accessibility": {
|
||||||
|
"wheelchair_accessible": True,
|
||||||
|
"public_transport_score": self._estimate_transport_score(space_data.get('district', '')),
|
||||||
|
"cycling_infrastructure": area_sqm > 4000,
|
||||||
|
"parking_availability": 2 if area_sqm > 20000 else 1,
|
||||||
|
"lighting_quality": 3 if 'mitte' in space_data.get('district', '').lower() else 2
|
||||||
|
},
|
||||||
|
|
||||||
|
"recreation": {
|
||||||
|
"playground_quality": self._estimate_playground_quality(space_data),
|
||||||
|
"sports_facilities": self._estimate_sports_facilities(space_data),
|
||||||
|
"running_paths": area_sqm > 6000,
|
||||||
|
"cycling_paths": area_sqm > 12000,
|
||||||
|
"dog_friendly": True,
|
||||||
|
"bbq_allowed": self._allows_bbq(space_data)
|
||||||
|
},
|
||||||
|
|
||||||
|
# OSM metadata
|
||||||
|
"osm_metadata": {
|
||||||
|
"osm_id": space_data.get('osm_id'),
|
||||||
|
"tags": space_data.get('osm_tags', {}),
|
||||||
|
"source": "local_osm_extract"
|
||||||
|
},
|
||||||
|
|
||||||
|
"last_updated": datetime.now().isoformat(),
|
||||||
|
"data_sources": ["local_osm_extract", "berlin_tree_cadastre", "berlin_toilets"],
|
||||||
|
"confidence_score": 92
|
||||||
|
}
|
||||||
|
|
||||||
|
trees = tree_response.metrics.total_trees
|
||||||
|
toilets = len(nearby_toilets)
|
||||||
|
print(f"✅ {space_data['name']}: {trees} trees, {toilets} toilets")
|
||||||
|
|
||||||
|
return enhanced_space
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error enhancing {space_data['name']}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _score_toilet_accessibility(self, nearby_toilets: List[Dict]) -> int:
|
||||||
|
if not nearby_toilets:
|
||||||
|
return 25
|
||||||
|
|
||||||
|
nearest = nearby_toilets[0]['distance_meters']
|
||||||
|
if nearest <= 200:
|
||||||
|
score = 90
|
||||||
|
elif nearest <= 400:
|
||||||
|
score = 70
|
||||||
|
else:
|
||||||
|
score = 50
|
||||||
|
|
||||||
|
# Quality bonuses
|
||||||
|
free = len([t for t in nearby_toilets if t.get('is_free', False)])
|
||||||
|
accessible = len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
|
||||||
|
score += min(10, free * 5 + accessible * 3)
|
||||||
|
|
||||||
|
return min(100, score)
|
||||||
|
|
||||||
|
def _map_to_space_type(self, fclass: str) -> str:
|
||||||
|
mapping = {
|
||||||
|
'park': 'PARK', 'forest': 'FOREST', 'garden': 'GARDEN', 'wood': 'FOREST',
|
||||||
|
'nature_reserve': 'NATURE_RESERVE', 'playground': 'PLAYGROUND',
|
||||||
|
'meadow': 'MEADOW', 'grass': 'GRASS', 'recreation_ground': 'PARK',
|
||||||
|
'common': 'PARK', 'village_green': 'GRASS', 'allotments': 'GARDEN'
|
||||||
|
}
|
||||||
|
return mapping.get(fclass, 'PARK')
|
||||||
|
|
||||||
|
def _detect_water_features(self, space_data: Dict) -> bool:
|
||||||
|
name = space_data.get('name', '').lower()
|
||||||
|
tags = space_data.get('osm_tags', {})
|
||||||
|
|
||||||
|
water_keywords = ['see', 'teich', 'pond', 'lake', 'bach', 'spree', 'wasser']
|
||||||
|
return any(keyword in name for keyword in water_keywords) or 'water' in tags.values()
|
||||||
|
|
||||||
|
def _estimate_noise_level(self, space_data: Dict) -> int:
|
||||||
|
fclass = space_data.get('fclass', '')
|
||||||
|
district = space_data.get('district', '')
|
||||||
|
|
||||||
|
base = {'forest': 1, 'wood': 1, 'nature_reserve': 1, 'meadow': 2,
|
||||||
|
'park': 2, 'garden': 2, 'playground': 3}.get(fclass, 2)
|
||||||
|
|
||||||
|
if any(busy in district.lower() for busy in ['mitte', 'kreuzberg', 'friedrichshain']):
|
||||||
|
base += 1
|
||||||
|
|
||||||
|
return min(5, base)
|
||||||
|
|
||||||
|
def _estimate_natural_surface(self, fclass: str) -> int:
|
||||||
|
return {'forest': 95, 'wood': 95, 'nature_reserve': 90, 'meadow': 95,
|
||||||
|
'grass': 85, 'park': 75, 'garden': 65, 'playground': 40}.get(fclass, 70)
|
||||||
|
|
||||||
|
def _estimate_transport_score(self, district: str) -> int:
|
||||||
|
district_lower = district.lower()
|
||||||
|
if 'mitte' in district_lower:
|
||||||
|
return 5
|
||||||
|
elif any(name in district_lower for name in ['charlottenburg', 'kreuzberg', 'friedrichshain']):
|
||||||
|
return 4
|
||||||
|
else:
|
||||||
|
return 3
|
||||||
|
|
||||||
|
def _estimate_playground_quality(self, space_data: Dict) -> int:
|
||||||
|
fclass = space_data.get('fclass', '')
|
||||||
|
tags = space_data.get('osm_tags', {})
|
||||||
|
|
||||||
|
if fclass == 'playground':
|
||||||
|
return 80
|
||||||
|
elif 'playground' in tags.values():
|
||||||
|
return 75
|
||||||
|
elif fclass == 'park':
|
||||||
|
return 55
|
||||||
|
else:
|
||||||
|
return 30
|
||||||
|
|
||||||
|
def _estimate_sports_facilities(self, space_data: Dict) -> bool:
|
||||||
|
fclass = space_data.get('fclass', '')
|
||||||
|
tags = space_data.get('osm_tags', {})
|
||||||
|
name = space_data.get('name', '').lower()
|
||||||
|
|
||||||
|
return (fclass == 'recreation_ground' or
|
||||||
|
'sport' in str(tags.values()).lower() or
|
||||||
|
any(term in name for term in ['sport', 'football', 'tennis']))
|
||||||
|
|
||||||
|
def _allows_bbq(self, space_data: Dict) -> bool:
|
||||||
|
fclass = space_data.get('fclass', '')
|
||||||
|
area = space_data.get('area_sqm', 0)
|
||||||
|
tags = space_data.get('osm_tags', {})
|
||||||
|
|
||||||
|
# Check explicit BBQ tags
|
||||||
|
if tags.get('bbq') == 'yes':
|
||||||
|
return True
|
||||||
|
elif tags.get('bbq') == 'no':
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Default based on type and size
|
||||||
|
return fclass in ['park', 'recreation_ground'] and area > 5000
|
||||||
|
|
||||||
|
async def process_all_green_spaces(self):
|
||||||
|
"""Main processing pipeline."""
|
||||||
|
print("🌳 Processing Berlin green spaces from local OSM data...")
|
||||||
|
|
||||||
|
# Step 1: Get OSM data
|
||||||
|
try:
|
||||||
|
osm_file = self.download_simple_osm_extract() # More reliable than PBF
|
||||||
|
except:
|
||||||
|
print("❌ Could not download OSM data")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Step 2: Parse green spaces
|
||||||
|
green_spaces = self.parse_osm_xml(osm_file)
|
||||||
|
|
||||||
|
if not green_spaces:
|
||||||
|
print("❌ No green spaces found in OSM data")
|
||||||
|
return []
|
||||||
|
|
||||||
|
print(f"📊 Found {len(green_spaces)} green spaces to enhance")
|
||||||
|
|
||||||
|
# Step 3: Enhance with real data
|
||||||
|
enhanced_spaces = []
|
||||||
|
|
||||||
|
for i, space_data in enumerate(green_spaces, 1):
|
||||||
|
print(f"[{i}/{len(green_spaces)}]", end=" ")
|
||||||
|
|
||||||
|
result = await self.enhance_green_space_with_real_data(space_data)
|
||||||
|
if result:
|
||||||
|
enhanced_spaces.append(result)
|
||||||
|
|
||||||
|
if i % 20 == 0:
|
||||||
|
print(f"\n Progress: {len(enhanced_spaces)} enhanced so far...")
|
||||||
|
|
||||||
|
await asyncio.sleep(0.1)
|
||||||
|
|
||||||
|
print(f"\n✅ Enhanced {len(enhanced_spaces)} spaces with real data!")
|
||||||
|
return enhanced_spaces
|
||||||
|
|
||||||
|
def save_enhanced_data(self, enhanced_spaces: List[Dict]):
|
||||||
|
"""Save the final dataset."""
|
||||||
|
output_file = self.processed_dir / "osm_berlin_green_spaces_enhanced.json"
|
||||||
|
|
||||||
|
# Calculate statistics
|
||||||
|
with_trees = len([s for s in enhanced_spaces if s["tree_data"]["total_trees"] > 0])
|
||||||
|
with_toilets = len([s for s in enhanced_spaces if s["toilet_accessibility"]["nearby_toilets_count"] > 0])
|
||||||
|
total_trees = sum(s["tree_data"]["total_trees"] for s in enhanced_spaces)
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"green_spaces": enhanced_spaces,
|
||||||
|
"total_count": len(enhanced_spaces),
|
||||||
|
"last_updated": datetime.now().isoformat(),
|
||||||
|
"data_sources": [
|
||||||
|
"local_osm_extract_processed_offline",
|
||||||
|
"berlin_tree_cadastre",
|
||||||
|
"berlin_toilets"
|
||||||
|
],
|
||||||
|
"processing_info": {
|
||||||
|
"method": "local_osm_processing_no_api_dependency",
|
||||||
|
"includes_all_osm_green_spaces": True,
|
||||||
|
"enhanced_with_real_berlin_data": True
|
||||||
|
},
|
||||||
|
"summary_stats": {
|
||||||
|
"total_spaces": len(enhanced_spaces),
|
||||||
|
"spaces_with_tree_data": with_trees,
|
||||||
|
"spaces_with_toilet_data": with_toilets,
|
||||||
|
"total_trees_analyzed": total_trees,
|
||||||
|
"tree_coverage": f"{round((with_trees/len(enhanced_spaces))*100, 1)}%",
|
||||||
|
"toilet_coverage": f"{round((with_toilets/len(enhanced_spaces))*100, 1)}%"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(output_file, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
print(f"\n🎉 Saved comprehensive dataset: {output_file}")
|
||||||
|
print(f"📊 {len(enhanced_spaces)} total green spaces")
|
||||||
|
print(f"🌲 {with_trees} with tree data, 🚻 {with_toilets} with toilet data")
|
||||||
|
print(f"🌿 {total_trees} total trees analyzed")
|
||||||
|
print(f"\n✨ Ready to replace mock data in your API!")
|
||||||
|
|
||||||
|
return output_file
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
processor = LocalOSMProcessor()
|
||||||
|
|
||||||
|
try:
|
||||||
|
print("🚀 Berlin Green Spaces: Local OSM Processing")
|
||||||
|
print("=" * 50)
|
||||||
|
print("• Downloads OSM data once (no API dependency)")
|
||||||
|
print("• Processes locally for all green spaces")
|
||||||
|
print("• Enhances with real Berlin tree + toilet data")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
enhanced_spaces = await processor.process_all_green_spaces()
|
||||||
|
|
||||||
|
if enhanced_spaces:
|
||||||
|
processor.save_enhanced_data(enhanced_spaces)
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\n⚠️ Interrupted")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
|
@ -0,0 +1,558 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Quick Berlin green spaces processor.
|
||||||
|
Pre-filters OSM data efficiently, then processes only the best candidates.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import asyncio
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
import math
|
||||||
|
# from tqdm.asyncio import tqdm # Not available, remove tqdm dependency
|
||||||
|
from xml.etree.ElementTree import iterparse
|
||||||
|
|
||||||
|
# Add the app directory to Python path
|
||||||
|
sys.path.append(str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from app.services.street_tree_service import StreetTreeService
|
||||||
|
from app.services.berlin_data_service import BerlinDataService
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_polygon_area_sqm(coords):
|
||||||
|
"""Calculate area of a polygon using the Shoelace formula."""
|
||||||
|
if len(coords) < 3:
|
||||||
|
return 5000 # Default for invalid polygons
|
||||||
|
|
||||||
|
# Convert to radians and use spherical approximation for Earth
|
||||||
|
def to_radians(deg):
|
||||||
|
return deg * math.pi / 180
|
||||||
|
|
||||||
|
# Use simple planar approximation for small areas
|
||||||
|
# Convert lat/lng to approximate meters (rough approximation for Berlin area)
|
||||||
|
lat_center = sum(lat for lat, lng in coords) / len(coords)
|
||||||
|
lng_center = sum(lng for lat, lng in coords) / len(coords)
|
||||||
|
|
||||||
|
# Approximate meters per degree at Berlin latitude
|
||||||
|
meters_per_lat = 111320 # roughly constant
|
||||||
|
meters_per_lng = 111320 * math.cos(to_radians(lat_center))
|
||||||
|
|
||||||
|
# Convert coordinates to meters relative to center
|
||||||
|
meter_coords = []
|
||||||
|
for lat, lng in coords:
|
||||||
|
x = (lng - lng_center) * meters_per_lng
|
||||||
|
y = (lat - lat_center) * meters_per_lat
|
||||||
|
meter_coords.append((x, y))
|
||||||
|
|
||||||
|
# Shoelace formula
|
||||||
|
area = 0
|
||||||
|
n = len(meter_coords)
|
||||||
|
for i in range(n):
|
||||||
|
j = (i + 1) % n
|
||||||
|
area += meter_coords[i][0] * meter_coords[j][1]
|
||||||
|
area -= meter_coords[j][0] * meter_coords[i][1]
|
||||||
|
|
||||||
|
area = abs(area) / 2
|
||||||
|
|
||||||
|
# Reasonable bounds check
|
||||||
|
if area < 100: # Too small
|
||||||
|
return 5000
|
||||||
|
elif area > 10000000: # Too large (10 km²)
|
||||||
|
return 500000 # Cap at reasonable park size
|
||||||
|
|
||||||
|
return int(area)
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_search_radius(area_sqm):
|
||||||
|
"""Calculate appropriate tree search radius based on park area."""
|
||||||
|
if area_sqm < 10000: # < 1 hectare
|
||||||
|
return 150
|
||||||
|
elif area_sqm < 50000: # < 5 hectares
|
||||||
|
return 300
|
||||||
|
elif area_sqm < 200000: # < 20 hectares
|
||||||
|
return 500
|
||||||
|
else: # Large parks like Treptower Park
|
||||||
|
return 800
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_enhanced_shade_quality(tree_response, area_sqm):
|
||||||
|
"""Calculate enhanced shade quality based on real tree characteristics."""
|
||||||
|
metrics = tree_response.metrics
|
||||||
|
shade_analysis = tree_response.shade_analysis
|
||||||
|
|
||||||
|
# Base score from tree density and coverage
|
||||||
|
base_score = 0
|
||||||
|
|
||||||
|
# Factor 1: Actual shade coverage (crown area based)
|
||||||
|
coverage = metrics.shade_coverage_percent or 0
|
||||||
|
if coverage >= 60:
|
||||||
|
base_score += 40
|
||||||
|
elif coverage >= 40:
|
||||||
|
base_score += 30
|
||||||
|
elif coverage >= 20:
|
||||||
|
base_score += 20
|
||||||
|
elif coverage >= 10:
|
||||||
|
base_score += 10
|
||||||
|
|
||||||
|
# Factor 2: Large mature trees (better shade)
|
||||||
|
large_trees = len(shade_analysis.nearby_large_trees or [])
|
||||||
|
if large_trees >= 10:
|
||||||
|
base_score += 25
|
||||||
|
elif large_trees >= 5:
|
||||||
|
base_score += 20
|
||||||
|
elif large_trees >= 3:
|
||||||
|
base_score += 15
|
||||||
|
elif large_trees >= 1:
|
||||||
|
base_score += 10
|
||||||
|
|
||||||
|
# Factor 3: Tree density per area
|
||||||
|
trees_per_hectare = metrics.trees_per_hectare or 0
|
||||||
|
if trees_per_hectare >= 50:
|
||||||
|
base_score += 20
|
||||||
|
elif trees_per_hectare >= 30:
|
||||||
|
base_score += 15
|
||||||
|
elif trees_per_hectare >= 20:
|
||||||
|
base_score += 10
|
||||||
|
elif trees_per_hectare >= 10:
|
||||||
|
base_score += 5
|
||||||
|
|
||||||
|
# Factor 4: Average tree height (taller = better shade)
|
||||||
|
avg_height = metrics.average_height or 0
|
||||||
|
if avg_height >= 20:
|
||||||
|
base_score += 10
|
||||||
|
elif avg_height >= 15:
|
||||||
|
base_score += 8
|
||||||
|
elif avg_height >= 10:
|
||||||
|
base_score += 5
|
||||||
|
elif avg_height >= 5:
|
||||||
|
base_score += 3
|
||||||
|
|
||||||
|
# Factor 5: Crown diameter quality
|
||||||
|
avg_crown = metrics.average_crown_diameter or 0
|
||||||
|
if avg_crown >= 12:
|
||||||
|
base_score += 5
|
||||||
|
elif avg_crown >= 8:
|
||||||
|
base_score += 3
|
||||||
|
elif avg_crown >= 5:
|
||||||
|
base_score += 1
|
||||||
|
|
||||||
|
return min(100, base_score)
|
||||||
|
|
||||||
|
|
||||||
|
def detect_water_features(candidate):
|
||||||
|
"""Detect water features using OSM tags and name analysis."""
|
||||||
|
tags = candidate.get('tags', {})
|
||||||
|
name = candidate.get('name', '').lower()
|
||||||
|
|
||||||
|
# Check OSM water-related tags
|
||||||
|
water_tags = ['water', 'waterway', 'natural']
|
||||||
|
has_water_tags = any(
|
||||||
|
tags.get(tag, '').lower() in ['water', 'lake', 'pond', 'reservoir', 'river', 'stream']
|
||||||
|
for tag in water_tags
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check name for water indicators
|
||||||
|
water_names = ['see', 'teich', 'weiher', 'water', 'lake', 'pond', 'fluss', 'river', 'bach', 'creek']
|
||||||
|
has_water_name = any(water_word in name for water_word in water_names)
|
||||||
|
|
||||||
|
# Check for fountain/brunnen
|
||||||
|
fountain_indicators = ['brunnen', 'fountain', 'springbrunnen']
|
||||||
|
has_fountain = any(fountain in name for fountain in fountain_indicators)
|
||||||
|
|
||||||
|
return has_water_tags or has_water_name or has_fountain
|
||||||
|
|
||||||
|
|
||||||
|
def estimate_berlin_district(lat: float, lng: float) -> str:
|
||||||
|
"""Estimate Berlin district from coordinates using geographic boundaries."""
|
||||||
|
# Northern districts
|
||||||
|
if lat > 52.55:
|
||||||
|
if lng < 13.25:
|
||||||
|
return "Reinickendorf"
|
||||||
|
elif lng < 13.45:
|
||||||
|
return "Pankow"
|
||||||
|
else:
|
||||||
|
return "Lichtenberg"
|
||||||
|
# Central-north districts
|
||||||
|
elif lat > 52.52:
|
||||||
|
if lng < 13.20:
|
||||||
|
return "Spandau"
|
||||||
|
elif lng < 13.30:
|
||||||
|
return "Charlottenburg-Wilmersdorf"
|
||||||
|
elif lng < 13.42:
|
||||||
|
return "Mitte"
|
||||||
|
elif lng < 13.48:
|
||||||
|
return "Friedrichshain-Kreuzberg"
|
||||||
|
else:
|
||||||
|
return "Lichtenberg"
|
||||||
|
# Central districts
|
||||||
|
elif lat > 52.48:
|
||||||
|
if lng < 13.20:
|
||||||
|
return "Spandau"
|
||||||
|
elif lng < 13.30:
|
||||||
|
return "Charlottenburg-Wilmersdorf"
|
||||||
|
elif lng < 13.35:
|
||||||
|
return "Tempelhof-Schöneberg"
|
||||||
|
elif lng < 13.42:
|
||||||
|
return "Mitte"
|
||||||
|
elif lng < 13.48:
|
||||||
|
return "Friedrichshain-Kreuzberg"
|
||||||
|
else:
|
||||||
|
return "Lichtenberg"
|
||||||
|
# Southern-central districts
|
||||||
|
elif lat > 52.45:
|
||||||
|
if lng < 13.20:
|
||||||
|
return "Steglitz-Zehlendorf"
|
||||||
|
elif lng < 13.35:
|
||||||
|
return "Tempelhof-Schöneberg"
|
||||||
|
elif lng < 13.45:
|
||||||
|
return "Neukölln"
|
||||||
|
elif lng < 13.55:
|
||||||
|
return "Treptow-Köpenick"
|
||||||
|
else:
|
||||||
|
return "Marzahn-Hellersdorf"
|
||||||
|
# Southern districts
|
||||||
|
else:
|
||||||
|
if lng < 13.35:
|
||||||
|
return "Steglitz-Zehlendorf"
|
||||||
|
else:
|
||||||
|
return "Treptow-Köpenick"
|
||||||
|
|
||||||
|
|
||||||
|
def get_specific_neighborhood(district: str, lat: float, lng: float) -> str:
|
||||||
|
"""Get specific neighborhood within district based on coordinates."""
|
||||||
|
neighborhoods = {
|
||||||
|
"Mitte": {
|
||||||
|
(52.540, 52.560, 13.33, 13.38): "Wedding",
|
||||||
|
(52.515, 52.530, 13.33, 13.38): "Moabit",
|
||||||
|
(52.510, 52.520, 13.35, 13.38): "Tiergarten",
|
||||||
|
(52.525, 52.545, 13.40, 13.43): "Prenzlauer Berg"
|
||||||
|
},
|
||||||
|
"Charlottenburg-Wilmersdorf": {
|
||||||
|
(52.485, 52.505, 13.30, 13.33): "Wilmersdorf",
|
||||||
|
(52.505, 52.525, 13.25, 13.33): "Charlottenburg"
|
||||||
|
},
|
||||||
|
"Friedrichshain-Kreuzberg": {
|
||||||
|
(52.490, 52.510, 13.38, 13.42): "Kreuzberg",
|
||||||
|
(52.510, 52.525, 13.42, 13.48): "Friedrichshain"
|
||||||
|
},
|
||||||
|
"Tempelhof-Schöneberg": {
|
||||||
|
(52.480, 52.500, 13.33, 13.37): "Schöneberg",
|
||||||
|
(52.460, 52.480, 13.37, 13.42): "Tempelhof"
|
||||||
|
},
|
||||||
|
"Steglitz-Zehlendorf": {
|
||||||
|
(52.430, 52.450, 13.23, 13.30): "Zehlendorf",
|
||||||
|
(52.450, 52.470, 13.30, 13.35): "Steglitz"
|
||||||
|
},
|
||||||
|
"Treptow-Köpenick": {
|
||||||
|
(52.430, 52.460, 13.55, 13.65): "Köpenick",
|
||||||
|
(52.480, 52.500, 13.45, 13.50): "Treptow"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if district in neighborhoods:
|
||||||
|
for (min_lat, max_lat, min_lng, max_lng), neighborhood in neighborhoods[district].items():
|
||||||
|
if min_lat <= lat <= max_lat and min_lng <= lng <= max_lng:
|
||||||
|
return neighborhood
|
||||||
|
|
||||||
|
return district
|
||||||
|
|
||||||
|
|
||||||
|
async def quick_process():
|
||||||
|
"""Quick processing of significant Berlin green spaces."""
|
||||||
|
print("🚀 Quick Berlin Green Spaces Processor")
|
||||||
|
print("=" * 45)
|
||||||
|
|
||||||
|
# Initialize services
|
||||||
|
tree_service = StreetTreeService()
|
||||||
|
berlin_data = BerlinDataService()
|
||||||
|
|
||||||
|
# Pre-load and index trees once to avoid repeated indexing
|
||||||
|
print("🔄 Pre-loading tree data and building spatial index...")
|
||||||
|
await tree_service._load_trees()
|
||||||
|
|
||||||
|
osm_file = Path("app/data/osm-raw/berlin_green_spaces.osm")
|
||||||
|
|
||||||
|
if not osm_file.exists():
|
||||||
|
print("❌ OSM file not found. Please ensure data is downloaded.")
|
||||||
|
return
|
||||||
|
|
||||||
|
print("🔍 Quick filtering for named parks and significant areas...")
|
||||||
|
print(f"📁 OSM file size: {osm_file.stat().st_size / (1024*1024):.1f} MB")
|
||||||
|
|
||||||
|
# Quick scan for good candidates
|
||||||
|
candidates = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
processed = 0
|
||||||
|
|
||||||
|
print("🔍 Single-pass XML parsing - ways with embedded coordinates...")
|
||||||
|
|
||||||
|
# Single pass: parse ways with embedded coordinates
|
||||||
|
ways_processed = 0
|
||||||
|
current_way_tags = {}
|
||||||
|
current_way_coordinates = []
|
||||||
|
in_way = False
|
||||||
|
|
||||||
|
for event, elem in iterparse(osm_file, events=('start', 'end')):
|
||||||
|
if event == 'start':
|
||||||
|
if elem.tag == 'way':
|
||||||
|
in_way = True
|
||||||
|
current_way_tags = {}
|
||||||
|
current_way_coordinates = []
|
||||||
|
ways_processed += 1
|
||||||
|
if ways_processed % 1000 == 0:
|
||||||
|
print(f"Processed {ways_processed} ways, found {len(candidates)} candidates so far...")
|
||||||
|
elif in_way and elem.tag == 'tag':
|
||||||
|
k = elem.get('k')
|
||||||
|
v = elem.get('v')
|
||||||
|
if k and v:
|
||||||
|
current_way_tags[k] = v
|
||||||
|
elif in_way and elem.tag == 'nd':
|
||||||
|
# Extract coordinates directly from nd element
|
||||||
|
lat = elem.get('lat')
|
||||||
|
lon = elem.get('lon')
|
||||||
|
if lat and lon:
|
||||||
|
current_way_coordinates.append((float(lat), float(lon)))
|
||||||
|
continue
|
||||||
|
|
||||||
|
if elem.tag == 'way' and in_way:
|
||||||
|
in_way = False
|
||||||
|
tags = current_way_tags
|
||||||
|
coordinates = current_way_coordinates
|
||||||
|
|
||||||
|
# Quick filters for promising spaces - be more lenient
|
||||||
|
has_name = 'name' in tags
|
||||||
|
is_park = (tags.get('leisure') in ['park', 'garden', 'nature_reserve'] or
|
||||||
|
tags.get('landuse') in ['forest', 'grass', 'recreation_ground'])
|
||||||
|
|
||||||
|
# Also accept common green space tags
|
||||||
|
has_green_tags = any(key in tags for key in ['leisure', 'landuse', 'natural', 'amenity'])
|
||||||
|
|
||||||
|
if not (has_name or is_park or has_green_tags):
|
||||||
|
elem.clear() # Free memory
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Use embedded coordinates directly
|
||||||
|
if not coordinates:
|
||||||
|
elem.clear() # Free memory
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Get center coordinate and all coordinates for area calculation
|
||||||
|
lat, lng = coordinates[0] if len(coordinates) == 1 else (
|
||||||
|
sum(lat for lat, lng in coordinates) / len(coordinates),
|
||||||
|
sum(lng for lat, lng in coordinates) / len(coordinates)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Basic Berlin bounds check
|
||||||
|
if not (52.3 <= lat <= 52.7 and 13.0 <= lng <= 13.8):
|
||||||
|
elem.clear() # Free memory
|
||||||
|
continue
|
||||||
|
|
||||||
|
name = tags.get('name', f"Unnamed {tags.get('leisure', tags.get('landuse', 'area'))}")
|
||||||
|
space_type = tags.get('leisure') or tags.get('landuse') or 'park'
|
||||||
|
|
||||||
|
candidate = {
|
||||||
|
'id': f"quick_{elem.get('id')}",
|
||||||
|
'name': name,
|
||||||
|
'type': space_type,
|
||||||
|
'lat': lat,
|
||||||
|
'lng': lng,
|
||||||
|
'has_name': has_name,
|
||||||
|
'tags': tags,
|
||||||
|
'coordinates': coordinates # Store all coordinates for area calculation
|
||||||
|
}
|
||||||
|
|
||||||
|
candidates.append(candidate)
|
||||||
|
processed += 1
|
||||||
|
|
||||||
|
# Limit for quick processing
|
||||||
|
if len(candidates) >= 100:
|
||||||
|
elem.clear() # Free memory
|
||||||
|
break
|
||||||
|
|
||||||
|
elem.clear() # Free memory
|
||||||
|
else:
|
||||||
|
elem.clear() # Free memory
|
||||||
|
|
||||||
|
print(f"✅ Found {len(candidates)} promising green spaces")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error in quick filtering: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if not candidates:
|
||||||
|
print("No candidates found")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Sort by having names (better quality)
|
||||||
|
candidates.sort(key=lambda x: x['has_name'], reverse=True)
|
||||||
|
|
||||||
|
print(f"\n🔧 Enhancing top {len(candidates)} spaces with real data...")
|
||||||
|
|
||||||
|
# Process candidates in parallel with batching
|
||||||
|
batch_size = 10 # Process 10 candidates at a time
|
||||||
|
enhanced_spaces = []
|
||||||
|
|
||||||
|
async def process_candidate(candidate):
|
||||||
|
"""Process a single candidate with tree and toilet data."""
|
||||||
|
try:
|
||||||
|
# Calculate actual area from OSM polygon coordinates
|
||||||
|
area_sqm = calculate_polygon_area_sqm(candidate.get('coordinates', []))
|
||||||
|
search_radius = calculate_search_radius(area_sqm)
|
||||||
|
|
||||||
|
# Get real tree data and toilet data concurrently with dynamic radius
|
||||||
|
tree_task = tree_service.get_trees_near_location(
|
||||||
|
candidate['lat'], candidate['lng'], radius_m=search_radius
|
||||||
|
)
|
||||||
|
toilet_task = berlin_data.get_toilets_near_point(
|
||||||
|
candidate['lat'], candidate['lng'], 500
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"🔍 Getting data for {candidate['name'][:30]}... (area: {area_sqm/10000:.1f}ha, radius: {search_radius}m)")
|
||||||
|
tree_response, nearby_toilets = await asyncio.gather(tree_task, toilet_task)
|
||||||
|
|
||||||
|
# Create enhanced space
|
||||||
|
enhanced_space = {
|
||||||
|
"id": candidate['id'],
|
||||||
|
"name": candidate['name'],
|
||||||
|
"description": f"Berlin {candidate['type']} discovered via quick OSM processing",
|
||||||
|
"type": "PARK", # Simplified for now
|
||||||
|
"coordinates": {
|
||||||
|
"lat": candidate['lat'],
|
||||||
|
"lng": candidate['lng']
|
||||||
|
},
|
||||||
|
"neighborhood": get_specific_neighborhood(estimate_berlin_district(candidate['lat'], candidate['lng']), candidate['lat'], candidate['lng']),
|
||||||
|
"area_sqm": area_sqm, # Real calculated area
|
||||||
|
|
||||||
|
# Environmental features from real tree data
|
||||||
|
"environmental": {
|
||||||
|
"tree_coverage_percent": max(5, int(tree_response.metrics.shade_coverage_percent)), # Use actual crown area calculation
|
||||||
|
"shade_quality": calculate_enhanced_shade_quality(tree_response, area_sqm),
|
||||||
|
"noise_level": 2, # Default
|
||||||
|
"wildlife_diversity_score": tree_response.metrics.species_diversity_score,
|
||||||
|
"water_features": detect_water_features(candidate),
|
||||||
|
"natural_surface_percent": 80
|
||||||
|
},
|
||||||
|
|
||||||
|
# Real tree data
|
||||||
|
"tree_data": {
|
||||||
|
"total_trees": tree_response.metrics.total_trees,
|
||||||
|
"trees_per_hectare": tree_response.metrics.trees_per_hectare,
|
||||||
|
"species_count": len(tree_response.metrics.dominant_species),
|
||||||
|
"species_diversity_score": tree_response.metrics.species_diversity_score,
|
||||||
|
"mature_trees_count": tree_response.metrics.mature_trees_count,
|
||||||
|
"young_trees_count": tree_response.metrics.young_trees_count,
|
||||||
|
"average_tree_age": tree_response.metrics.average_tree_age,
|
||||||
|
"average_height": tree_response.metrics.average_height,
|
||||||
|
"average_crown_diameter": tree_response.metrics.average_crown_diameter,
|
||||||
|
"shade_coverage_percent": tree_response.metrics.shade_coverage_percent,
|
||||||
|
"dominant_species": tree_response.metrics.dominant_species[:3]
|
||||||
|
},
|
||||||
|
|
||||||
|
# Real toilet data
|
||||||
|
"toilet_accessibility": {
|
||||||
|
"nearby_toilets_count": len(nearby_toilets),
|
||||||
|
"accessibility_score": 80 if nearby_toilets else 30,
|
||||||
|
"nearest_distance_m": nearby_toilets[0]['distance_meters'] if nearby_toilets else None,
|
||||||
|
"free_toilets_count": len([t for t in nearby_toilets if t.get('is_free', False)]),
|
||||||
|
"accessible_toilets_count": len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
|
||||||
|
},
|
||||||
|
|
||||||
|
# Standard features
|
||||||
|
"accessibility": {
|
||||||
|
"wheelchair_accessible": True,
|
||||||
|
"public_transport_score": 3,
|
||||||
|
"cycling_infrastructure": True,
|
||||||
|
"parking_availability": 2,
|
||||||
|
"lighting_quality": 3
|
||||||
|
},
|
||||||
|
|
||||||
|
"recreation": {
|
||||||
|
"playground_quality": 60 if candidate['type'] == 'park' else 30,
|
||||||
|
"sports_facilities": candidate['type'] == 'recreation_ground',
|
||||||
|
"running_paths": True,
|
||||||
|
"cycling_paths": True,
|
||||||
|
"dog_friendly": True,
|
||||||
|
"bbq_allowed": candidate['type'] in ['park', 'recreation_ground']
|
||||||
|
},
|
||||||
|
|
||||||
|
"osm_metadata": {
|
||||||
|
"has_official_name": candidate['has_name'],
|
||||||
|
"tags": candidate['tags'],
|
||||||
|
"source": "quick_osm_processing"
|
||||||
|
},
|
||||||
|
|
||||||
|
"last_updated": datetime.now().isoformat(),
|
||||||
|
"data_sources": ["quick_osm_scan", "berlin_tree_cadastre", "berlin_toilets"],
|
||||||
|
"confidence_score": 90 if candidate['has_name'] else 75
|
||||||
|
}
|
||||||
|
|
||||||
|
return enhanced_space, tree_response.metrics.total_trees, len(nearby_toilets)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error processing {candidate['name']}: {e}")
|
||||||
|
return None, 0, 0
|
||||||
|
|
||||||
|
# Process candidates in batches with progress bar
|
||||||
|
for i in range(0, len(candidates), batch_size):
|
||||||
|
batch = candidates[i:i + batch_size]
|
||||||
|
print(f"Processing batch {i//batch_size + 1}/{(len(candidates) + batch_size - 1)//batch_size}")
|
||||||
|
|
||||||
|
# Process batch concurrently with progress bar
|
||||||
|
tasks = [process_candidate(candidate) for candidate in batch]
|
||||||
|
results = await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
# Collect results
|
||||||
|
for result, trees, toilets in results:
|
||||||
|
if result:
|
||||||
|
enhanced_spaces.append(result)
|
||||||
|
print(f"✅ {result['name'][:40]:40} - {trees:3d} trees, {toilets} toilets")
|
||||||
|
|
||||||
|
# Small delay between batches to be respectful to APIs
|
||||||
|
if i + batch_size < len(candidates):
|
||||||
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
|
# Save results
|
||||||
|
output_file = Path("app/data/processed/quick_berlin_green_spaces.json")
|
||||||
|
|
||||||
|
with_trees = len([s for s in enhanced_spaces if s["tree_data"]["total_trees"] > 0])
|
||||||
|
with_toilets = len([s for s in enhanced_spaces if s["toilet_accessibility"]["nearby_toilets_count"] > 0])
|
||||||
|
total_trees = sum(s["tree_data"]["total_trees"] for s in enhanced_spaces)
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"green_spaces": enhanced_spaces,
|
||||||
|
"total_count": len(enhanced_spaces),
|
||||||
|
"last_updated": datetime.now().isoformat(),
|
||||||
|
"data_sources": ["quick_osm_processing", "berlin_tree_cadastre", "berlin_toilets"],
|
||||||
|
"processing_info": {
|
||||||
|
"method": "quick_scan_for_named_and_significant_spaces",
|
||||||
|
"prioritizes_named_spaces": True,
|
||||||
|
"enhanced_with_real_berlin_data": True
|
||||||
|
},
|
||||||
|
"summary_stats": {
|
||||||
|
"total_spaces": len(enhanced_spaces),
|
||||||
|
"spaces_with_tree_data": with_trees,
|
||||||
|
"spaces_with_toilet_data": with_toilets,
|
||||||
|
"total_trees_analyzed": total_trees,
|
||||||
|
"tree_coverage": f"{round((with_trees/len(enhanced_spaces))*100, 1)}%" if enhanced_spaces else "0%",
|
||||||
|
"toilet_coverage": f"{round((with_toilets/len(enhanced_spaces))*100, 1)}%" if enhanced_spaces else "0%"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(output_file, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
print(f"\n🎉 Quick processing complete!")
|
||||||
|
print(f"📁 Saved: {output_file}")
|
||||||
|
print(f"📊 {len(enhanced_spaces)} spaces enhanced")
|
||||||
|
print(f"🌲 {with_trees} with tree data, 🚻 {with_toilets} with toilet data")
|
||||||
|
print(f"🌿 {total_trees} total trees analyzed")
|
||||||
|
print(f"\n✨ Ready to use! This gives you real Berlin green spaces")
|
||||||
|
print(f" with actual tree and toilet data for personality scoring!")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(quick_process())
|
|
@ -0,0 +1,169 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test OSM processing with a small sample to verify it works.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import asyncio
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
import sys
|
||||||
|
import math
|
||||||
|
|
||||||
|
# Add the app directory to Python path
|
||||||
|
sys.path.append(str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from app.services.street_tree_service import StreetTreeService
|
||||||
|
from app.services.berlin_data_service import BerlinDataService
|
||||||
|
|
||||||
|
|
||||||
|
async def test_processing():
|
||||||
|
"""Test the processing with a small sample."""
|
||||||
|
print("🧪 Testing OSM processing with sample data...")
|
||||||
|
|
||||||
|
# Initialize services
|
||||||
|
tree_service = StreetTreeService()
|
||||||
|
berlin_data = BerlinDataService()
|
||||||
|
|
||||||
|
# Parse OSM file and get first 5 green spaces as test
|
||||||
|
osm_file = Path("app/data/osm-raw/berlin_green_spaces.osm")
|
||||||
|
|
||||||
|
if not osm_file.exists():
|
||||||
|
print("❌ OSM file not found")
|
||||||
|
return
|
||||||
|
|
||||||
|
tree = ET.parse(osm_file)
|
||||||
|
root = tree.getroot()
|
||||||
|
ways = root.findall('.//way')
|
||||||
|
|
||||||
|
print(f"📊 Found {len(ways)} total ways in OSM file")
|
||||||
|
|
||||||
|
# Process first 5 green spaces as test
|
||||||
|
sample_spaces = []
|
||||||
|
processed_count = 0
|
||||||
|
|
||||||
|
for way in ways:
|
||||||
|
if processed_count >= 5:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Get tags
|
||||||
|
tags = {}
|
||||||
|
for tag in way.findall('tag'):
|
||||||
|
tags[tag.get('k')] = tag.get('v')
|
||||||
|
|
||||||
|
# Check if it's a green space
|
||||||
|
green_space_type = None
|
||||||
|
leisure = tags.get('leisure', '')
|
||||||
|
landuse = tags.get('landuse', '')
|
||||||
|
natural = tags.get('natural', '')
|
||||||
|
|
||||||
|
if leisure in ['park', 'garden', 'nature_reserve']:
|
||||||
|
green_space_type = leisure
|
||||||
|
elif landuse in ['forest', 'grass', 'park']:
|
||||||
|
green_space_type = landuse
|
||||||
|
elif natural in ['forest', 'wood']:
|
||||||
|
green_space_type = natural
|
||||||
|
|
||||||
|
if not green_space_type:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Get coordinates from first and last node to estimate center
|
||||||
|
nd_refs = [nd.get('ref') for nd in way.findall('nd')]
|
||||||
|
if len(nd_refs) < 3:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Find first node coordinates
|
||||||
|
first_node = root.find(f".//node[@id='{nd_refs[0]}']")
|
||||||
|
if first_node is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
lat = float(first_node.get('lat'))
|
||||||
|
lng = float(first_node.get('lon'))
|
||||||
|
|
||||||
|
# Simple space data
|
||||||
|
space_data = {
|
||||||
|
'id': f"test_{way.get('id')}",
|
||||||
|
'name': tags.get('name', f"Test {green_space_type} {processed_count + 1}"),
|
||||||
|
'fclass': green_space_type,
|
||||||
|
'lat': lat,
|
||||||
|
'lng': lng,
|
||||||
|
'area_sqm': 5000, # Default for test
|
||||||
|
'district': 'Test District'
|
||||||
|
}
|
||||||
|
|
||||||
|
sample_spaces.append(space_data)
|
||||||
|
processed_count += 1
|
||||||
|
|
||||||
|
print(f"🌳 Testing with {len(sample_spaces)} sample green spaces...")
|
||||||
|
|
||||||
|
# Test enhancement with real data
|
||||||
|
enhanced_spaces = []
|
||||||
|
|
||||||
|
for i, space_data in enumerate(sample_spaces, 1):
|
||||||
|
print(f"\n[{i}/{len(sample_spaces)}] Testing {space_data['name']}...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get real tree data
|
||||||
|
tree_response = await tree_service.get_trees_near_location(
|
||||||
|
space_data['lat'], space_data['lng'], radius_m=200
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get real toilet data
|
||||||
|
nearby_toilets = await berlin_data.get_toilets_near_point(
|
||||||
|
space_data['lat'], space_data['lng'], 500
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create enhanced data
|
||||||
|
enhanced_space = {
|
||||||
|
"id": space_data['id'],
|
||||||
|
"name": space_data['name'],
|
||||||
|
"type": "PARK",
|
||||||
|
"coordinates": {
|
||||||
|
"lat": space_data['lat'],
|
||||||
|
"lng": space_data['lng']
|
||||||
|
},
|
||||||
|
"tree_data": {
|
||||||
|
"total_trees": tree_response.metrics.total_trees,
|
||||||
|
"species_count": len(tree_response.metrics.dominant_species),
|
||||||
|
"dominant_species": tree_response.metrics.dominant_species
|
||||||
|
},
|
||||||
|
"toilet_accessibility": {
|
||||||
|
"nearby_toilets_count": len(nearby_toilets),
|
||||||
|
"nearest_distance_m": nearby_toilets[0]['distance_meters'] if nearby_toilets else None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enhanced_spaces.append(enhanced_space)
|
||||||
|
|
||||||
|
trees = tree_response.metrics.total_trees
|
||||||
|
toilets = len(nearby_toilets)
|
||||||
|
print(f"✅ Success: {trees} trees, {toilets} toilets nearby")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"❌ Error: {e}")
|
||||||
|
|
||||||
|
# Save test results
|
||||||
|
output_file = Path("app/data/processed/test_green_spaces.json")
|
||||||
|
|
||||||
|
test_data = {
|
||||||
|
"test_results": enhanced_spaces,
|
||||||
|
"total_tested": len(enhanced_spaces),
|
||||||
|
"osm_ways_available": len(ways),
|
||||||
|
"processing_successful": True,
|
||||||
|
"timestamp": datetime.now().isoformat()
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(output_file, 'w') as f:
|
||||||
|
json.dump(test_data, f, indent=2)
|
||||||
|
|
||||||
|
print(f"\n🎉 Test completed successfully!")
|
||||||
|
print(f"📁 Test results saved: {output_file}")
|
||||||
|
print(f"📊 Enhanced {len(enhanced_spaces)} sample spaces")
|
||||||
|
print(f"💡 Ready to process all {len(ways)} green spaces!")
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(test_processing())
|
Loading…
Reference in New Issue