berlin-picnic-api/app/services/berlin_data_service.py

460 lines
17 KiB
Python

# app/services/berlin_data_service.py
from typing import List, Optional, Tuple, Dict, Any
import asyncio
import json
from datetime import datetime
from pathlib import Path
from geopy.distance import geodesic
from app.models.green_space import (
GreenSpace, Coordinates, Amenity, AmenityType, GreenSpaceType,
EnvironmentalFeatures, AccessibilityFeatures, RecreationFeatures,
NoiseLevel, LocationScore
)
class BerlinDataService:
"""Service for accessing Berlin open data and external APIs."""
def __init__(self):
self.cache = {}
self.last_refresh = None
self._toilets_cache = None
self.data_dir = Path("app/data")
async def search_green_spaces(
self,
location: Optional[Tuple[float, float]] = None,
radius: int = 2000,
neighborhood: Optional[str] = None,
filters: Dict[str, Any] = None
) -> List[GreenSpace]:
"""Search for green spaces based on criteria."""
# This is a mock implementation - in a real app, this would query Berlin's open data
mock_spaces = await self._get_mock_green_spaces()
filtered_spaces = []
for space in mock_spaces:
# Apply location filter
if location:
distance = geodesic(
location,
(space.coordinates.lat, space.coordinates.lng)
).meters
if distance > radius:
continue
# Apply neighborhood filter
if neighborhood and space.neighborhood.lower() != neighborhood.lower():
continue
# Apply other filters
if filters:
if filters.get("min_size") and space.area_sqm and space.area_sqm < filters["min_size"]:
continue
if filters.get("has_water") and not space.environmental.water_features:
continue
if filters.get("has_playground") and space.recreation.playground_quality == 0:
continue
if filters.get("max_noise_level") and space.environmental.noise_level.value > filters["max_noise_level"]:
continue
filtered_spaces.append(space)
return filtered_spaces
async def get_green_space_by_id(self, space_id: str) -> Optional[GreenSpace]:
"""Get a specific green space by ID."""
spaces = await self._get_mock_green_spaces()
for space in spaces:
if space.id == space_id:
return space
return None
async def get_green_space_at_location(self, lat: float, lng: float) -> Optional[GreenSpace]:
"""Check if a location is within a green space."""
spaces = await self._get_mock_green_spaces()
for space in spaces:
# Simple distance check - in reality you'd use proper polygon containment
distance = geodesic(
(lat, lng),
(space.coordinates.lat, space.coordinates.lng)
).meters
if distance < 100: # Within 100m of center
return space
return None
async def get_green_spaces_within_radius(
self,
lat: float,
lng: float,
radius: int
) -> List[GreenSpace]:
"""Get all green spaces within a radius."""
spaces = await self._get_mock_green_spaces()
nearby_spaces = []
for space in spaces:
distance = geodesic(
(lat, lng),
(space.coordinates.lat, space.coordinates.lng)
).meters
if distance <= radius:
nearby_spaces.append(space)
return nearby_spaces
async def get_amenities_near_point(
self,
lat: float,
lng: float,
radius: int = 500,
amenity_types: Optional[List[str]] = None
) -> List[Amenity]:
"""Get amenities near a specific point."""
# Mock amenities data
mock_amenities = [
Amenity(
id="toilet_1",
name="Public Toilet",
type=AmenityType.TOILET,
coordinates=Coordinates(lat=lat + 0.001, lng=lng + 0.001),
distance_meters=100,
rating=3.5
),
Amenity(
id="cafe_1",
name="Park Café",
type=AmenityType.CAFE,
coordinates=Coordinates(lat=lat + 0.002, lng=lng - 0.001),
distance_meters=200,
rating=4.2,
opening_hours="8:00-18:00"
),
Amenity(
id="playground_1",
name="Children's Playground",
type=AmenityType.PLAYGROUND,
coordinates=Coordinates(lat=lat - 0.001, lng=lng + 0.002),
distance_meters=150,
rating=4.0
)
]
# Filter by types if specified
if amenity_types:
mock_amenities = [
amenity for amenity in mock_amenities
if amenity.type.value in amenity_types
]
# Filter by radius
filtered_amenities = []
for amenity in mock_amenities:
distance = geodesic(
(lat, lng),
(amenity.coordinates.lat, amenity.coordinates.lng)
).meters
if distance <= radius:
amenity.distance_meters = int(distance)
filtered_amenities.append(amenity)
return filtered_amenities
async def calculate_distance(
self,
lat1: float,
lng1: float,
lat2: float,
lng2: float
) -> int:
"""Calculate distance between two points in meters."""
return int(geodesic((lat1, lng1), (lat2, lng2)).meters)
async def find_similar_green_spaces(
self,
green_space: GreenSpace,
limit: int = 5
) -> List[GreenSpace]:
"""Find green spaces similar to the given one."""
all_spaces = await self._get_mock_green_spaces()
similar_spaces = []
for space in all_spaces:
if space.id == green_space.id:
continue
# Simple similarity based on type and features
similarity_score = 0
if space.type == green_space.type:
similarity_score += 30
if space.neighborhood == green_space.neighborhood:
similarity_score += 20
if space.environmental.water_features == green_space.environmental.water_features:
similarity_score += 15
if abs(space.environmental.tree_coverage_percent - green_space.environmental.tree_coverage_percent) < 20:
similarity_score += 15
if space.recreation.sports_facilities == green_space.recreation.sports_facilities:
similarity_score += 10
if similarity_score >= 50: # Threshold for similarity
similar_spaces.append(space)
# Sort by similarity and return top results
return similar_spaces[:limit]
async def get_neighborhood_stats(self) -> Dict[str, Any]:
"""Get statistics for Berlin neighborhoods."""
return {
"neighborhoods": [
{
"name": "mitte",
"display_name": "Mitte",
"green_space_count": 15,
"avg_personality_scores": {
"little_adventurers": 75,
"date_night": 80,
"squad_goals": 70,
"zen_masters": 65
}
},
{
"name": "kreuzberg",
"display_name": "Kreuzberg",
"green_space_count": 12,
"avg_personality_scores": {
"little_adventurers": 70,
"date_night": 75,
"squad_goals": 85,
"zen_masters": 60
}
},
{
"name": "prenzlauer_berg",
"display_name": "Prenzlauer Berg",
"green_space_count": 18,
"avg_personality_scores": {
"little_adventurers": 90,
"date_night": 70,
"squad_goals": 75,
"zen_masters": 70
}
}
]
}
async def get_current_conditions(self, lat: float, lng: float) -> Dict[str, Any]:
"""Get current conditions at a location."""
# Mock current conditions
return {
"weather": {
"temperature": 22,
"condition": "partly_cloudy",
"humidity": 65,
"wind_speed": 10
},
"crowd_level": "moderate",
"air_quality": "good",
"noise_level": 2,
"last_updated": datetime.now().isoformat()
}
async def refresh_all_data(self) -> Dict[str, str]:
"""Refresh all cached data."""
self.cache.clear()
self.last_refresh = datetime.now()
return {
"green_spaces": "refreshed",
"amenities": "refreshed",
"neighborhoods": "refreshed",
"timestamp": self.last_refresh.isoformat()
}
def summarize_amenities(self, amenities: List[Amenity]) -> Dict[str, int]:
"""Summarize amenities by type."""
summary = {}
for amenity in amenities:
amenity_type = amenity.type.value
summary[amenity_type] = summary.get(amenity_type, 0) + 1
return summary
def _load_toilets(self) -> List[Dict]:
"""Load toilets data from JSON file"""
if self._toilets_cache is None:
toilets_file = self.data_dir / "processed" / "toilets.json"
if toilets_file.exists():
with open(toilets_file, 'r', encoding='utf-8') as f:
data = json.load(f)
self._toilets_cache = data.get("toilets", [])
else:
print("Warning: toilets.json not found. Run process_toilet_csv.py first.")
self._toilets_cache = []
return self._toilets_cache
async def get_toilets_near_point(
self,
lat: float,
lng: float,
radius: int = 500
) -> List[Dict]:
"""Get toilets near a point for picnic scoring"""
toilets = self._load_toilets()
nearby_toilets = []
for toilet in toilets:
# Skip toilets with invalid coordinates
toilet_lat = toilet.get('lat')
toilet_lng = toilet.get('lng')
if toilet_lat is None or toilet_lng is None:
continue
# Check if coordinates are valid numbers
try:
toilet_lat = float(toilet_lat)
toilet_lng = float(toilet_lng)
# Check for NaN or infinite values
if not (isinstance(toilet_lat, (int, float)) and isinstance(toilet_lng, (int, float))):
continue
if toilet_lat != toilet_lat or toilet_lng != toilet_lng: # NaN check
continue
if abs(toilet_lat) > 90 or abs(toilet_lng) > 180: # Invalid coordinate range
continue
except (ValueError, TypeError):
continue
distance = geodesic((lat, lng), (toilet_lat, toilet_lng)).meters
if distance <= radius:
toilet_copy = toilet.copy()
toilet_copy['distance_meters'] = int(distance)
nearby_toilets.append(toilet_copy)
return sorted(nearby_toilets, key=lambda x: x['distance_meters'])
async def _get_mock_green_spaces(self) -> List[GreenSpace]:
"""Get mock green spaces data for development."""
# This would be replaced with real data fetching in production
return [
GreenSpace(
id="tiergarten_1",
name="Tiergarten",
description="Berlin's most famous park in the heart of the city",
type=GreenSpaceType.PARK,
coordinates=Coordinates(lat=52.5145, lng=13.3501),
neighborhood="Mitte",
address="Tiergarten, 10557 Berlin",
area_sqm=210000,
perimeter_m=5800,
environmental=EnvironmentalFeatures(
tree_coverage_percent=85,
shade_quality=90,
noise_level=NoiseLevel.MODERATE,
wildlife_diversity_score=80,
water_features=True,
natural_surface_percent=95
),
accessibility=AccessibilityFeatures(
wheelchair_accessible=True,
public_transport_score=5,
cycling_infrastructure=True,
parking_availability=3,
lighting_quality=4
),
recreation=RecreationFeatures(
playground_quality=70,
sports_facilities=True,
running_paths=True,
cycling_paths=True,
dog_friendly=True,
bbq_allowed=False
),
nearby_amenities=[],
last_updated=datetime.now(),
data_sources=["berlin_open_data", "osm"],
confidence_score=95
),
GreenSpace(
id="volkspark_friedrichshain",
name="Volkspark Friedrichshain",
description="Historic park with fairy tale fountain and sports facilities",
type=GreenSpaceType.PARK,
coordinates=Coordinates(lat=52.5263, lng=13.4317),
neighborhood="Friedrichshain",
address="Friedrichshain, 10249 Berlin",
area_sqm=49000,
perimeter_m=2800,
environmental=EnvironmentalFeatures(
tree_coverage_percent=70,
shade_quality=75,
noise_level=NoiseLevel.QUIET,
wildlife_diversity_score=65,
water_features=True,
natural_surface_percent=80
),
accessibility=AccessibilityFeatures(
wheelchair_accessible=True,
public_transport_score=4,
cycling_infrastructure=True,
parking_availability=2,
lighting_quality=3
),
recreation=RecreationFeatures(
playground_quality=85,
sports_facilities=True,
running_paths=True,
cycling_paths=True,
dog_friendly=True,
bbq_allowed=True
),
nearby_amenities=[],
last_updated=datetime.now(),
data_sources=["berlin_open_data", "osm"],
confidence_score=90
),
GreenSpace(
id="tempelhofer_feld",
name="Tempelhofer Feld",
description="Former airport turned into unique urban park",
type=GreenSpaceType.PARK,
coordinates=Coordinates(lat=52.4732, lng=13.4015),
neighborhood="Tempelhof",
address="Tempelhofer Damm, 12101 Berlin",
area_sqm=300000,
perimeter_m=6200,
environmental=EnvironmentalFeatures(
tree_coverage_percent=15,
shade_quality=20,
noise_level=NoiseLevel.MODERATE,
wildlife_diversity_score=40,
water_features=False,
natural_surface_percent=60
),
accessibility=AccessibilityFeatures(
wheelchair_accessible=True,
public_transport_score=4,
cycling_infrastructure=True,
parking_availability=4,
lighting_quality=2
),
recreation=RecreationFeatures(
playground_quality=30,
sports_facilities=False,
running_paths=True,
cycling_paths=True,
dog_friendly=True,
bbq_allowed=True
),
nearby_amenities=[],
last_updated=datetime.now(),
data_sources=["berlin_open_data", "osm"],
confidence_score=85
)
]