berlin-picnic-api/app/services/berlin_data_service.py

495 lines
21 KiB
Python

# app/services/berlin_data_service.py
from typing import List, Optional, Tuple, Dict, Any
import asyncio
import json
from datetime import datetime
from pathlib import Path
from geopy.distance import geodesic
from app.models.green_space import (
GreenSpace, Coordinates, Amenity, AmenityType, GreenSpaceType,
EnvironmentalFeatures, AccessibilityFeatures, RecreationFeatures,
NoiseLevel, LocationScore
)
from app.services.street_tree_service import StreetTreeService
class BerlinDataService:
"""Service for accessing Berlin open data and external APIs."""
def __init__(self):
self.cache = {}
self.last_refresh = None
self._toilets_cache = None
self._green_spaces_cache = None
self._street_trees_index = None
self.data_dir = Path("app/data")
self.street_tree_service = StreetTreeService()
async def search_green_spaces(
self,
location: Optional[Tuple[float, float]] = None,
radius: int = 2000,
neighborhood: Optional[str] = None,
filters: Dict[str, Any] = None
) -> List[GreenSpace]:
"""Search for green spaces based on criteria."""
# This is a mock implementation - in a real app, this would query Berlin's open data
mock_spaces = await self._get_mock_green_spaces()
filtered_spaces = []
for space in mock_spaces:
# Apply location filter
if location:
distance = geodesic(
location,
(space.coordinates.lat, space.coordinates.lng)
).meters
if distance > radius:
continue
# Apply neighborhood filter with flexible matching
if neighborhood:
neighborhood_lower = neighborhood.lower()
space_neighborhood_lower = space.neighborhood.lower()
# Check for exact match or partial match (useful for compound neighborhood names)
if (neighborhood_lower != space_neighborhood_lower and
neighborhood_lower not in space_neighborhood_lower and
space_neighborhood_lower not in neighborhood_lower):
continue
# Apply other filters
if filters:
if filters.get("min_size") and space.area_sqm and space.area_sqm < filters["min_size"]:
continue
if filters.get("has_water") and not space.environmental.water_features:
continue
if filters.get("has_playground") and space.recreation.playground_quality == 0:
continue
if filters.get("max_noise_level") and space.environmental.noise_level.value > filters["max_noise_level"]:
continue
filtered_spaces.append(space)
return filtered_spaces
async def get_green_space_by_id(self, space_id: str) -> Optional[GreenSpace]:
"""Get a specific green space by ID."""
spaces = await self._get_mock_green_spaces()
for space in spaces:
if space.id == space_id:
return space
return None
async def get_green_space_at_location(self, lat: float, lng: float) -> Optional[GreenSpace]:
"""Check if a location is within a green space and enhance it with real tree data."""
spaces = await self._get_mock_green_spaces()
for space in spaces:
# Simple distance check - in reality you'd use proper polygon containment
distance = geodesic(
(lat, lng),
(space.coordinates.lat, space.coordinates.lng)
).meters
if distance < 500: # Within 500m of center (larger radius for better coverage)
# Enhance the green space with real tree data
enhanced_space = await self._enhance_green_space_with_real_trees(space, lat, lng)
return enhanced_space
return None
async def get_green_spaces_within_radius(
self,
lat: float,
lng: float,
radius: int
) -> List[GreenSpace]:
"""Get all green spaces within a radius."""
spaces = await self._get_mock_green_spaces()
nearby_spaces = []
for space in spaces:
distance = geodesic(
(lat, lng),
(space.coordinates.lat, space.coordinates.lng)
).meters
if distance <= radius:
nearby_spaces.append(space)
return nearby_spaces
async def get_amenities_near_point(
self,
lat: float,
lng: float,
radius: int = 500,
amenity_types: Optional[List[str]] = None
) -> List[Amenity]:
"""Get amenities near a specific point."""
# Mock amenities data
mock_amenities = [
Amenity(
id="toilet_1",
name="Public Toilet",
type=AmenityType.TOILET,
coordinates=Coordinates(lat=lat + 0.001, lng=lng + 0.001),
distance_meters=100,
rating=3.5
),
Amenity(
id="cafe_1",
name="Park Café",
type=AmenityType.CAFE,
coordinates=Coordinates(lat=lat + 0.002, lng=lng - 0.001),
distance_meters=200,
rating=4.2,
opening_hours="8:00-18:00"
),
Amenity(
id="playground_1",
name="Children's Playground",
type=AmenityType.PLAYGROUND,
coordinates=Coordinates(lat=lat - 0.001, lng=lng + 0.002),
distance_meters=150,
rating=4.0
)
]
# Filter by types if specified
if amenity_types:
mock_amenities = [
amenity for amenity in mock_amenities
if amenity.type.value in amenity_types
]
# Filter by radius
filtered_amenities = []
for amenity in mock_amenities:
distance = geodesic(
(lat, lng),
(amenity.coordinates.lat, amenity.coordinates.lng)
).meters
if distance <= radius:
amenity.distance_meters = int(distance)
filtered_amenities.append(amenity)
return filtered_amenities
async def calculate_distance(
self,
lat1: float,
lng1: float,
lat2: float,
lng2: float
) -> int:
"""Calculate distance between two points in meters."""
return int(geodesic((lat1, lng1), (lat2, lng2)).meters)
async def find_similar_green_spaces(
self,
green_space: GreenSpace,
limit: int = 5
) -> List[GreenSpace]:
"""Find green spaces similar to the given one."""
all_spaces = await self._get_mock_green_spaces()
similar_spaces = []
for space in all_spaces:
if space.id == green_space.id:
continue
# Simple similarity based on type and features
similarity_score = 0
if space.type == green_space.type:
similarity_score += 30
if space.neighborhood == green_space.neighborhood:
similarity_score += 20
if space.environmental.water_features == green_space.environmental.water_features:
similarity_score += 15
if abs(space.environmental.tree_coverage_percent - green_space.environmental.tree_coverage_percent) < 20:
similarity_score += 15
if space.recreation.sports_facilities == green_space.recreation.sports_facilities:
similarity_score += 10
if similarity_score >= 50: # Threshold for similarity
similar_spaces.append(space)
# Sort by similarity and return top results
return similar_spaces[:limit]
async def get_neighborhood_stats(self) -> Dict[str, Any]:
"""Get statistics for Berlin neighborhoods."""
# Get all green spaces to calculate real neighborhood stats
green_spaces = await self._get_mock_green_spaces()
# Count green spaces per neighborhood
neighborhood_counts = {}
neighborhood_spaces = {}
for space in green_spaces:
neighborhood = space.neighborhood
if neighborhood not in neighborhood_counts:
neighborhood_counts[neighborhood] = 0
neighborhood_spaces[neighborhood] = []
neighborhood_counts[neighborhood] += 1
neighborhood_spaces[neighborhood].append(space)
# Generate neighborhood stats
neighborhoods = []
for neighborhood, count in neighborhood_counts.items():
# Calculate average personality scores based on green space features
spaces = neighborhood_spaces[neighborhood]
# Calculate scores based on actual features
total_tree_coverage = sum(s.environmental.tree_coverage_percent for s in spaces)
total_playgrounds = sum(s.recreation.playground_quality for s in spaces)
total_water_features = sum(1 for s in spaces if s.environmental.water_features)
total_sports = sum(1 for s in spaces if s.recreation.sports_facilities)
avg_tree_coverage = total_tree_coverage / count if count > 0 else 0
avg_playground = total_playgrounds / count if count > 0 else 0
water_ratio = total_water_features / count if count > 0 else 0
sports_ratio = total_sports / count if count > 0 else 0
# Calculate personality scores based on features
little_adventurers = min(100, int(avg_playground * 0.8 + sports_ratio * 30 + 40))
date_night = min(100, int(avg_tree_coverage * 0.6 + water_ratio * 25 + 45))
squad_goals = min(100, int(sports_ratio * 40 + avg_tree_coverage * 0.4 + 35))
zen_masters = min(100, int(avg_tree_coverage * 0.7 + water_ratio * 20 + 30))
neighborhoods.append({
"name": neighborhood.lower().replace(' ', '_').replace('-', '_'),
"display_name": neighborhood,
"green_space_count": count,
"avg_personality_scores": {
"little_adventurers": little_adventurers,
"date_night": date_night,
"squad_goals": squad_goals,
"zen_masters": zen_masters
}
})
# Sort by green space count (most spaces first)
neighborhoods.sort(key=lambda x: x["green_space_count"], reverse=True)
return {"neighborhoods": neighborhoods}
async def get_current_conditions(self, lat: float, lng: float) -> Dict[str, Any]:
"""Get current conditions at a location."""
# Mock current conditions
return {
"weather": {
"temperature": 22,
"condition": "partly_cloudy",
"humidity": 65,
"wind_speed": 10
},
"crowd_level": "moderate",
"air_quality": "good",
"noise_level": 2,
"last_updated": datetime.now().isoformat()
}
async def refresh_all_data(self) -> Dict[str, str]:
"""Refresh all cached data."""
self.cache.clear()
self.last_refresh = datetime.now()
return {
"green_spaces": "refreshed",
"amenities": "refreshed",
"neighborhoods": "refreshed",
"timestamp": self.last_refresh.isoformat()
}
def summarize_amenities(self, amenities: List[Amenity]) -> Dict[str, int]:
"""Summarize amenities by type."""
summary = {}
for amenity in amenities:
amenity_type = amenity.type.value
summary[amenity_type] = summary.get(amenity_type, 0) + 1
return summary
def _load_toilets(self) -> List[Dict]:
"""Load toilets data from JSON file"""
if self._toilets_cache is None:
toilets_file = self.data_dir / "processed" / "toilets.json"
if toilets_file.exists():
with open(toilets_file, 'r', encoding='utf-8') as f:
data = json.load(f)
self._toilets_cache = data.get("toilets", [])
else:
print("Warning: toilets.json not found. Run process_toilet_csv.py first.")
self._toilets_cache = []
return self._toilets_cache
async def get_toilets_near_point(
self,
lat: float,
lng: float,
radius: int = 500
) -> List[Dict]:
"""Get toilets near a point for picnic scoring"""
toilets = self._load_toilets()
nearby_toilets = []
for toilet in toilets:
# Skip toilets with invalid coordinates
toilet_lat = toilet.get('lat')
toilet_lng = toilet.get('lng')
if toilet_lat is None or toilet_lng is None:
continue
# Check if coordinates are valid numbers
try:
toilet_lat = float(toilet_lat)
toilet_lng = float(toilet_lng)
# Check for NaN or infinite values
if not (isinstance(toilet_lat, (int, float)) and isinstance(toilet_lng, (int, float))):
continue
if toilet_lat != toilet_lat or toilet_lng != toilet_lng: # NaN check
continue
if abs(toilet_lat) > 90 or abs(toilet_lng) > 180: # Invalid coordinate range
continue
except (ValueError, TypeError):
continue
distance = geodesic((lat, lng), (toilet_lat, toilet_lng)).meters
if distance <= radius:
toilet_copy = toilet.copy()
toilet_copy['distance_meters'] = int(distance)
nearby_toilets.append(toilet_copy)
return sorted(nearby_toilets, key=lambda x: x['distance_meters'])
async def _enhance_green_space_with_real_trees(self, green_space: GreenSpace, actual_lat: float, actual_lng: float) -> GreenSpace:
"""Enhance green space environmental features with real tree data."""
try:
# Get real tree data for the actual location (not just the park center)
tree_response = await self.street_tree_service.get_trees_near_location(
actual_lat, actual_lng, radius_m=300
)
# Calculate enhanced environmental features using real tree data
tree_coverage = max(
green_space.environmental.tree_coverage_percent,
int(tree_response.shade_analysis.estimated_shade_coverage)
)
shade_quality = max(
green_space.environmental.shade_quality,
tree_response.shade_analysis.shade_quality_score
)
wildlife_diversity = max(
green_space.environmental.wildlife_diversity_score,
tree_response.metrics.species_diversity_score
)
# Create enhanced environmental features
enhanced_environmental = EnvironmentalFeatures(
tree_coverage_percent=min(100, tree_coverage),
shade_quality=min(100, shade_quality),
noise_level=green_space.environmental.noise_level,
wildlife_diversity_score=min(100, wildlife_diversity),
water_features=green_space.environmental.water_features,
natural_surface_percent=green_space.environmental.natural_surface_percent
)
# Create enhanced green space with real tree data
enhanced_space = green_space.model_copy(update={
"environmental": enhanced_environmental,
"coordinates": Coordinates(lat=actual_lat, lng=actual_lng) # Use actual query location
})
# Update data sources to indicate real tree data is used
if "real_street_trees" not in enhanced_space.data_sources:
enhanced_space.data_sources.append("real_street_trees")
return enhanced_space
except Exception as e:
print(f"Error enhancing green space with real tree data: {e}")
# Return original space if enhancement fails
return green_space
def _load_green_spaces(self) -> List[Dict]:
"""Load green spaces data from JSON file"""
if self._green_spaces_cache is None:
green_spaces_file = self.data_dir / "processed" / "quick_berlin_green_spaces.json"
if green_spaces_file.exists():
with open(green_spaces_file, 'r', encoding='utf-8') as f:
data = json.load(f)
self._green_spaces_cache = data.get("green_spaces", [])
else:
print("Warning: quick_berlin_green_spaces.json not found.")
self._green_spaces_cache = []
return self._green_spaces_cache
def _convert_json_to_green_space(self, json_data: Dict) -> GreenSpace:
"""Convert JSON data to GreenSpace model"""
from datetime import datetime
return GreenSpace(
id=json_data.get("id", ""),
name=json_data.get("name", ""),
description=json_data.get("description", ""),
type=GreenSpaceType.PARK, # Default to PARK, could be enhanced later
coordinates=Coordinates(
lat=json_data.get("coordinates", {}).get("lat", 0.0),
lng=json_data.get("coordinates", {}).get("lng", 0.0)
),
neighborhood=json_data.get("neighborhood", "Berlin"),
address=f"{json_data.get('name', 'Unknown')}, Berlin",
area_sqm=json_data.get("area_sqm", 0),
perimeter_m=json_data.get("perimeter_m", None),
environmental=EnvironmentalFeatures(
tree_coverage_percent=json_data.get("environmental", {}).get("tree_coverage_percent", 0),
shade_quality=json_data.get("environmental", {}).get("shade_quality", 0),
noise_level=NoiseLevel(json_data.get("environmental", {}).get("noise_level", 1)),
wildlife_diversity_score=json_data.get("environmental", {}).get("wildlife_diversity_score", 0),
water_features=json_data.get("environmental", {}).get("water_features", False),
natural_surface_percent=json_data.get("environmental", {}).get("natural_surface_percent", 0)
),
accessibility=AccessibilityFeatures(
wheelchair_accessible=json_data.get("accessibility", {}).get("wheelchair_accessible", True),
public_transport_score=json_data.get("accessibility", {}).get("public_transport_score", 3),
cycling_infrastructure=json_data.get("accessibility", {}).get("cycling_infrastructure", True),
parking_availability=json_data.get("accessibility", {}).get("parking_availability", 2),
lighting_quality=json_data.get("accessibility", {}).get("lighting_quality", 3)
),
recreation=RecreationFeatures(
playground_quality=json_data.get("recreation", {}).get("playground_quality", 0),
sports_facilities=json_data.get("recreation", {}).get("sports_facilities", False),
running_paths=json_data.get("recreation", {}).get("running_paths", True),
cycling_paths=json_data.get("recreation", {}).get("cycling_paths", True),
dog_friendly=json_data.get("recreation", {}).get("dog_friendly", True),
bbq_allowed=json_data.get("recreation", {}).get("bbq_allowed", False)
),
nearby_amenities=[],
last_updated=datetime.fromisoformat(json_data.get("last_updated", datetime.now().isoformat())),
data_sources=json_data.get("data_sources", []),
confidence_score=json_data.get("confidence_score", 85)
)
async def _get_mock_green_spaces(self) -> List[GreenSpace]:
"""Get green spaces data from JSON file."""
json_data = self._load_green_spaces()
green_spaces = []
for space_data in json_data:
try:
green_space = self._convert_json_to_green_space(space_data)
green_spaces.append(green_space)
except Exception as e:
print(f"Error converting green space {space_data.get('id', 'unknown')}: {e}")
continue
return green_spaces