495 lines
21 KiB
Python
495 lines
21 KiB
Python
# app/services/berlin_data_service.py
|
|
from typing import List, Optional, Tuple, Dict, Any
|
|
import asyncio
|
|
import json
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from geopy.distance import geodesic
|
|
|
|
from app.models.green_space import (
|
|
GreenSpace, Coordinates, Amenity, AmenityType, GreenSpaceType,
|
|
EnvironmentalFeatures, AccessibilityFeatures, RecreationFeatures,
|
|
NoiseLevel, LocationScore
|
|
)
|
|
from app.services.street_tree_service import StreetTreeService
|
|
|
|
class BerlinDataService:
|
|
"""Service for accessing Berlin open data and external APIs."""
|
|
|
|
def __init__(self):
|
|
self.cache = {}
|
|
self.last_refresh = None
|
|
self._toilets_cache = None
|
|
self._green_spaces_cache = None
|
|
self._street_trees_index = None
|
|
self.data_dir = Path("app/data")
|
|
self.street_tree_service = StreetTreeService()
|
|
|
|
async def search_green_spaces(
|
|
self,
|
|
location: Optional[Tuple[float, float]] = None,
|
|
radius: int = 2000,
|
|
neighborhood: Optional[str] = None,
|
|
filters: Dict[str, Any] = None
|
|
) -> List[GreenSpace]:
|
|
"""Search for green spaces based on criteria."""
|
|
# This is a mock implementation - in a real app, this would query Berlin's open data
|
|
mock_spaces = await self._get_mock_green_spaces()
|
|
|
|
filtered_spaces = []
|
|
for space in mock_spaces:
|
|
# Apply location filter
|
|
if location:
|
|
distance = geodesic(
|
|
location,
|
|
(space.coordinates.lat, space.coordinates.lng)
|
|
).meters
|
|
if distance > radius:
|
|
continue
|
|
|
|
# Apply neighborhood filter with flexible matching
|
|
if neighborhood:
|
|
neighborhood_lower = neighborhood.lower()
|
|
space_neighborhood_lower = space.neighborhood.lower()
|
|
|
|
# Check for exact match or partial match (useful for compound neighborhood names)
|
|
if (neighborhood_lower != space_neighborhood_lower and
|
|
neighborhood_lower not in space_neighborhood_lower and
|
|
space_neighborhood_lower not in neighborhood_lower):
|
|
continue
|
|
|
|
# Apply other filters
|
|
if filters:
|
|
if filters.get("min_size") and space.area_sqm and space.area_sqm < filters["min_size"]:
|
|
continue
|
|
if filters.get("has_water") and not space.environmental.water_features:
|
|
continue
|
|
if filters.get("has_playground") and space.recreation.playground_quality == 0:
|
|
continue
|
|
if filters.get("max_noise_level") and space.environmental.noise_level.value > filters["max_noise_level"]:
|
|
continue
|
|
|
|
filtered_spaces.append(space)
|
|
|
|
return filtered_spaces
|
|
|
|
async def get_green_space_by_id(self, space_id: str) -> Optional[GreenSpace]:
|
|
"""Get a specific green space by ID."""
|
|
spaces = await self._get_mock_green_spaces()
|
|
for space in spaces:
|
|
if space.id == space_id:
|
|
return space
|
|
return None
|
|
|
|
async def get_green_space_at_location(self, lat: float, lng: float) -> Optional[GreenSpace]:
|
|
"""Check if a location is within a green space and enhance it with real tree data."""
|
|
spaces = await self._get_mock_green_spaces()
|
|
for space in spaces:
|
|
# Simple distance check - in reality you'd use proper polygon containment
|
|
distance = geodesic(
|
|
(lat, lng),
|
|
(space.coordinates.lat, space.coordinates.lng)
|
|
).meters
|
|
if distance < 500: # Within 500m of center (larger radius for better coverage)
|
|
# Enhance the green space with real tree data
|
|
enhanced_space = await self._enhance_green_space_with_real_trees(space, lat, lng)
|
|
return enhanced_space
|
|
return None
|
|
|
|
async def get_green_spaces_within_radius(
|
|
self,
|
|
lat: float,
|
|
lng: float,
|
|
radius: int
|
|
) -> List[GreenSpace]:
|
|
"""Get all green spaces within a radius."""
|
|
spaces = await self._get_mock_green_spaces()
|
|
nearby_spaces = []
|
|
|
|
for space in spaces:
|
|
distance = geodesic(
|
|
(lat, lng),
|
|
(space.coordinates.lat, space.coordinates.lng)
|
|
).meters
|
|
if distance <= radius:
|
|
nearby_spaces.append(space)
|
|
|
|
return nearby_spaces
|
|
|
|
async def get_amenities_near_point(
|
|
self,
|
|
lat: float,
|
|
lng: float,
|
|
radius: int = 500,
|
|
amenity_types: Optional[List[str]] = None
|
|
) -> List[Amenity]:
|
|
"""Get amenities near a specific point."""
|
|
# Mock amenities data
|
|
mock_amenities = [
|
|
Amenity(
|
|
id="toilet_1",
|
|
name="Public Toilet",
|
|
type=AmenityType.TOILET,
|
|
coordinates=Coordinates(lat=lat + 0.001, lng=lng + 0.001),
|
|
distance_meters=100,
|
|
rating=3.5
|
|
),
|
|
Amenity(
|
|
id="cafe_1",
|
|
name="Park Café",
|
|
type=AmenityType.CAFE,
|
|
coordinates=Coordinates(lat=lat + 0.002, lng=lng - 0.001),
|
|
distance_meters=200,
|
|
rating=4.2,
|
|
opening_hours="8:00-18:00"
|
|
),
|
|
Amenity(
|
|
id="playground_1",
|
|
name="Children's Playground",
|
|
type=AmenityType.PLAYGROUND,
|
|
coordinates=Coordinates(lat=lat - 0.001, lng=lng + 0.002),
|
|
distance_meters=150,
|
|
rating=4.0
|
|
)
|
|
]
|
|
|
|
# Filter by types if specified
|
|
if amenity_types:
|
|
mock_amenities = [
|
|
amenity for amenity in mock_amenities
|
|
if amenity.type.value in amenity_types
|
|
]
|
|
|
|
# Filter by radius
|
|
filtered_amenities = []
|
|
for amenity in mock_amenities:
|
|
distance = geodesic(
|
|
(lat, lng),
|
|
(amenity.coordinates.lat, amenity.coordinates.lng)
|
|
).meters
|
|
if distance <= radius:
|
|
amenity.distance_meters = int(distance)
|
|
filtered_amenities.append(amenity)
|
|
|
|
return filtered_amenities
|
|
|
|
async def calculate_distance(
|
|
self,
|
|
lat1: float,
|
|
lng1: float,
|
|
lat2: float,
|
|
lng2: float
|
|
) -> int:
|
|
"""Calculate distance between two points in meters."""
|
|
return int(geodesic((lat1, lng1), (lat2, lng2)).meters)
|
|
|
|
async def find_similar_green_spaces(
|
|
self,
|
|
green_space: GreenSpace,
|
|
limit: int = 5
|
|
) -> List[GreenSpace]:
|
|
"""Find green spaces similar to the given one."""
|
|
all_spaces = await self._get_mock_green_spaces()
|
|
similar_spaces = []
|
|
|
|
for space in all_spaces:
|
|
if space.id == green_space.id:
|
|
continue
|
|
|
|
# Simple similarity based on type and features
|
|
similarity_score = 0
|
|
|
|
if space.type == green_space.type:
|
|
similarity_score += 30
|
|
|
|
if space.neighborhood == green_space.neighborhood:
|
|
similarity_score += 20
|
|
|
|
if space.environmental.water_features == green_space.environmental.water_features:
|
|
similarity_score += 15
|
|
|
|
if abs(space.environmental.tree_coverage_percent - green_space.environmental.tree_coverage_percent) < 20:
|
|
similarity_score += 15
|
|
|
|
if space.recreation.sports_facilities == green_space.recreation.sports_facilities:
|
|
similarity_score += 10
|
|
|
|
if similarity_score >= 50: # Threshold for similarity
|
|
similar_spaces.append(space)
|
|
|
|
# Sort by similarity and return top results
|
|
return similar_spaces[:limit]
|
|
|
|
async def get_neighborhood_stats(self) -> Dict[str, Any]:
|
|
"""Get statistics for Berlin neighborhoods."""
|
|
# Get all green spaces to calculate real neighborhood stats
|
|
green_spaces = await self._get_mock_green_spaces()
|
|
|
|
# Count green spaces per neighborhood
|
|
neighborhood_counts = {}
|
|
neighborhood_spaces = {}
|
|
|
|
for space in green_spaces:
|
|
neighborhood = space.neighborhood
|
|
if neighborhood not in neighborhood_counts:
|
|
neighborhood_counts[neighborhood] = 0
|
|
neighborhood_spaces[neighborhood] = []
|
|
neighborhood_counts[neighborhood] += 1
|
|
neighborhood_spaces[neighborhood].append(space)
|
|
|
|
# Generate neighborhood stats
|
|
neighborhoods = []
|
|
for neighborhood, count in neighborhood_counts.items():
|
|
# Calculate average personality scores based on green space features
|
|
spaces = neighborhood_spaces[neighborhood]
|
|
|
|
# Calculate scores based on actual features
|
|
total_tree_coverage = sum(s.environmental.tree_coverage_percent for s in spaces)
|
|
total_playgrounds = sum(s.recreation.playground_quality for s in spaces)
|
|
total_water_features = sum(1 for s in spaces if s.environmental.water_features)
|
|
total_sports = sum(1 for s in spaces if s.recreation.sports_facilities)
|
|
|
|
avg_tree_coverage = total_tree_coverage / count if count > 0 else 0
|
|
avg_playground = total_playgrounds / count if count > 0 else 0
|
|
water_ratio = total_water_features / count if count > 0 else 0
|
|
sports_ratio = total_sports / count if count > 0 else 0
|
|
|
|
# Calculate personality scores based on features
|
|
little_adventurers = min(100, int(avg_playground * 0.8 + sports_ratio * 30 + 40))
|
|
date_night = min(100, int(avg_tree_coverage * 0.6 + water_ratio * 25 + 45))
|
|
squad_goals = min(100, int(sports_ratio * 40 + avg_tree_coverage * 0.4 + 35))
|
|
zen_masters = min(100, int(avg_tree_coverage * 0.7 + water_ratio * 20 + 30))
|
|
|
|
neighborhoods.append({
|
|
"name": neighborhood.lower().replace(' ', '_').replace('-', '_'),
|
|
"display_name": neighborhood,
|
|
"green_space_count": count,
|
|
"avg_personality_scores": {
|
|
"little_adventurers": little_adventurers,
|
|
"date_night": date_night,
|
|
"squad_goals": squad_goals,
|
|
"zen_masters": zen_masters
|
|
}
|
|
})
|
|
|
|
# Sort by green space count (most spaces first)
|
|
neighborhoods.sort(key=lambda x: x["green_space_count"], reverse=True)
|
|
|
|
return {"neighborhoods": neighborhoods}
|
|
|
|
async def get_current_conditions(self, lat: float, lng: float) -> Dict[str, Any]:
|
|
"""Get current conditions at a location."""
|
|
# Mock current conditions
|
|
return {
|
|
"weather": {
|
|
"temperature": 22,
|
|
"condition": "partly_cloudy",
|
|
"humidity": 65,
|
|
"wind_speed": 10
|
|
},
|
|
"crowd_level": "moderate",
|
|
"air_quality": "good",
|
|
"noise_level": 2,
|
|
"last_updated": datetime.now().isoformat()
|
|
}
|
|
|
|
async def refresh_all_data(self) -> Dict[str, str]:
|
|
"""Refresh all cached data."""
|
|
self.cache.clear()
|
|
self.last_refresh = datetime.now()
|
|
|
|
return {
|
|
"green_spaces": "refreshed",
|
|
"amenities": "refreshed",
|
|
"neighborhoods": "refreshed",
|
|
"timestamp": self.last_refresh.isoformat()
|
|
}
|
|
|
|
def summarize_amenities(self, amenities: List[Amenity]) -> Dict[str, int]:
|
|
"""Summarize amenities by type."""
|
|
summary = {}
|
|
for amenity in amenities:
|
|
amenity_type = amenity.type.value
|
|
summary[amenity_type] = summary.get(amenity_type, 0) + 1
|
|
return summary
|
|
|
|
def _load_toilets(self) -> List[Dict]:
|
|
"""Load toilets data from JSON file"""
|
|
if self._toilets_cache is None:
|
|
toilets_file = self.data_dir / "processed" / "toilets.json"
|
|
if toilets_file.exists():
|
|
with open(toilets_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
self._toilets_cache = data.get("toilets", [])
|
|
else:
|
|
print("Warning: toilets.json not found. Run process_toilet_csv.py first.")
|
|
self._toilets_cache = []
|
|
return self._toilets_cache
|
|
|
|
async def get_toilets_near_point(
|
|
self,
|
|
lat: float,
|
|
lng: float,
|
|
radius: int = 500
|
|
) -> List[Dict]:
|
|
"""Get toilets near a point for picnic scoring"""
|
|
toilets = self._load_toilets()
|
|
nearby_toilets = []
|
|
|
|
for toilet in toilets:
|
|
# Skip toilets with invalid coordinates
|
|
toilet_lat = toilet.get('lat')
|
|
toilet_lng = toilet.get('lng')
|
|
|
|
if toilet_lat is None or toilet_lng is None:
|
|
continue
|
|
|
|
# Check if coordinates are valid numbers
|
|
try:
|
|
toilet_lat = float(toilet_lat)
|
|
toilet_lng = float(toilet_lng)
|
|
|
|
# Check for NaN or infinite values
|
|
if not (isinstance(toilet_lat, (int, float)) and isinstance(toilet_lng, (int, float))):
|
|
continue
|
|
if toilet_lat != toilet_lat or toilet_lng != toilet_lng: # NaN check
|
|
continue
|
|
if abs(toilet_lat) > 90 or abs(toilet_lng) > 180: # Invalid coordinate range
|
|
continue
|
|
|
|
except (ValueError, TypeError):
|
|
continue
|
|
|
|
distance = geodesic((lat, lng), (toilet_lat, toilet_lng)).meters
|
|
if distance <= radius:
|
|
toilet_copy = toilet.copy()
|
|
toilet_copy['distance_meters'] = int(distance)
|
|
nearby_toilets.append(toilet_copy)
|
|
|
|
return sorted(nearby_toilets, key=lambda x: x['distance_meters'])
|
|
|
|
async def _enhance_green_space_with_real_trees(self, green_space: GreenSpace, actual_lat: float, actual_lng: float) -> GreenSpace:
|
|
"""Enhance green space environmental features with real tree data."""
|
|
try:
|
|
# Get real tree data for the actual location (not just the park center)
|
|
tree_response = await self.street_tree_service.get_trees_near_location(
|
|
actual_lat, actual_lng, radius_m=300
|
|
)
|
|
|
|
# Calculate enhanced environmental features using real tree data
|
|
tree_coverage = max(
|
|
green_space.environmental.tree_coverage_percent,
|
|
int(tree_response.shade_analysis.estimated_shade_coverage)
|
|
)
|
|
|
|
shade_quality = max(
|
|
green_space.environmental.shade_quality,
|
|
tree_response.shade_analysis.shade_quality_score
|
|
)
|
|
|
|
wildlife_diversity = max(
|
|
green_space.environmental.wildlife_diversity_score,
|
|
tree_response.metrics.species_diversity_score
|
|
)
|
|
|
|
# Create enhanced environmental features
|
|
enhanced_environmental = EnvironmentalFeatures(
|
|
tree_coverage_percent=min(100, tree_coverage),
|
|
shade_quality=min(100, shade_quality),
|
|
noise_level=green_space.environmental.noise_level,
|
|
wildlife_diversity_score=min(100, wildlife_diversity),
|
|
water_features=green_space.environmental.water_features,
|
|
natural_surface_percent=green_space.environmental.natural_surface_percent
|
|
)
|
|
|
|
# Create enhanced green space with real tree data
|
|
enhanced_space = green_space.model_copy(update={
|
|
"environmental": enhanced_environmental,
|
|
"coordinates": Coordinates(lat=actual_lat, lng=actual_lng) # Use actual query location
|
|
})
|
|
|
|
# Update data sources to indicate real tree data is used
|
|
if "real_street_trees" not in enhanced_space.data_sources:
|
|
enhanced_space.data_sources.append("real_street_trees")
|
|
|
|
return enhanced_space
|
|
|
|
except Exception as e:
|
|
print(f"Error enhancing green space with real tree data: {e}")
|
|
# Return original space if enhancement fails
|
|
return green_space
|
|
|
|
def _load_green_spaces(self) -> List[Dict]:
|
|
"""Load green spaces data from JSON file"""
|
|
if self._green_spaces_cache is None:
|
|
green_spaces_file = self.data_dir / "processed" / "quick_berlin_green_spaces.json"
|
|
if green_spaces_file.exists():
|
|
with open(green_spaces_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
self._green_spaces_cache = data.get("green_spaces", [])
|
|
else:
|
|
print("Warning: quick_berlin_green_spaces.json not found.")
|
|
self._green_spaces_cache = []
|
|
return self._green_spaces_cache
|
|
|
|
def _convert_json_to_green_space(self, json_data: Dict) -> GreenSpace:
|
|
"""Convert JSON data to GreenSpace model"""
|
|
from datetime import datetime
|
|
|
|
return GreenSpace(
|
|
id=json_data.get("id", ""),
|
|
name=json_data.get("name", ""),
|
|
description=json_data.get("description", ""),
|
|
type=GreenSpaceType.PARK, # Default to PARK, could be enhanced later
|
|
coordinates=Coordinates(
|
|
lat=json_data.get("coordinates", {}).get("lat", 0.0),
|
|
lng=json_data.get("coordinates", {}).get("lng", 0.0)
|
|
),
|
|
neighborhood=json_data.get("neighborhood", "Berlin"),
|
|
address=f"{json_data.get('name', 'Unknown')}, Berlin",
|
|
area_sqm=json_data.get("area_sqm", 0),
|
|
perimeter_m=json_data.get("perimeter_m", None),
|
|
environmental=EnvironmentalFeatures(
|
|
tree_coverage_percent=json_data.get("environmental", {}).get("tree_coverage_percent", 0),
|
|
shade_quality=json_data.get("environmental", {}).get("shade_quality", 0),
|
|
noise_level=NoiseLevel(json_data.get("environmental", {}).get("noise_level", 1)),
|
|
wildlife_diversity_score=json_data.get("environmental", {}).get("wildlife_diversity_score", 0),
|
|
water_features=json_data.get("environmental", {}).get("water_features", False),
|
|
natural_surface_percent=json_data.get("environmental", {}).get("natural_surface_percent", 0)
|
|
),
|
|
accessibility=AccessibilityFeatures(
|
|
wheelchair_accessible=json_data.get("accessibility", {}).get("wheelchair_accessible", True),
|
|
public_transport_score=json_data.get("accessibility", {}).get("public_transport_score", 3),
|
|
cycling_infrastructure=json_data.get("accessibility", {}).get("cycling_infrastructure", True),
|
|
parking_availability=json_data.get("accessibility", {}).get("parking_availability", 2),
|
|
lighting_quality=json_data.get("accessibility", {}).get("lighting_quality", 3)
|
|
),
|
|
recreation=RecreationFeatures(
|
|
playground_quality=json_data.get("recreation", {}).get("playground_quality", 0),
|
|
sports_facilities=json_data.get("recreation", {}).get("sports_facilities", False),
|
|
running_paths=json_data.get("recreation", {}).get("running_paths", True),
|
|
cycling_paths=json_data.get("recreation", {}).get("cycling_paths", True),
|
|
dog_friendly=json_data.get("recreation", {}).get("dog_friendly", True),
|
|
bbq_allowed=json_data.get("recreation", {}).get("bbq_allowed", False)
|
|
),
|
|
nearby_amenities=[],
|
|
last_updated=datetime.fromisoformat(json_data.get("last_updated", datetime.now().isoformat())),
|
|
data_sources=json_data.get("data_sources", []),
|
|
confidence_score=json_data.get("confidence_score", 85)
|
|
)
|
|
|
|
async def _get_mock_green_spaces(self) -> List[GreenSpace]:
|
|
"""Get green spaces data from JSON file."""
|
|
json_data = self._load_green_spaces()
|
|
green_spaces = []
|
|
|
|
for space_data in json_data:
|
|
try:
|
|
green_space = self._convert_json_to_green_space(space_data)
|
|
green_spaces.append(green_space)
|
|
except Exception as e:
|
|
print(f"Error converting green space {space_data.get('id', 'unknown')}: {e}")
|
|
continue
|
|
|
|
return green_spaces
|