# app/services/berlin_data_service.py from typing import List, Optional, Tuple, Dict, Any import asyncio import json from datetime import datetime from pathlib import Path from geopy.distance import geodesic from app.models.green_space import ( GreenSpace, Coordinates, Amenity, AmenityType, GreenSpaceType, EnvironmentalFeatures, AccessibilityFeatures, RecreationFeatures, NoiseLevel, LocationScore ) from app.services.street_tree_service import StreetTreeService class BerlinDataService: """Service for accessing Berlin open data and external APIs.""" def __init__(self): self.cache = {} self.last_refresh = None self._toilets_cache = None self._street_trees_index = None self.data_dir = Path("app/data") self.street_tree_service = StreetTreeService() async def search_green_spaces( self, location: Optional[Tuple[float, float]] = None, radius: int = 2000, neighborhood: Optional[str] = None, filters: Dict[str, Any] = None ) -> List[GreenSpace]: """Search for green spaces based on criteria.""" # This is a mock implementation - in a real app, this would query Berlin's open data mock_spaces = await self._get_mock_green_spaces() filtered_spaces = [] for space in mock_spaces: # Apply location filter if location: distance = geodesic( location, (space.coordinates.lat, space.coordinates.lng) ).meters if distance > radius: continue # Apply neighborhood filter if neighborhood and space.neighborhood.lower() != neighborhood.lower(): continue # Apply other filters if filters: if filters.get("min_size") and space.area_sqm and space.area_sqm < filters["min_size"]: continue if filters.get("has_water") and not space.environmental.water_features: continue if filters.get("has_playground") and space.recreation.playground_quality == 0: continue if filters.get("max_noise_level") and space.environmental.noise_level.value > filters["max_noise_level"]: continue filtered_spaces.append(space) return filtered_spaces async def get_green_space_by_id(self, space_id: str) -> Optional[GreenSpace]: """Get a specific green space by ID.""" spaces = await self._get_mock_green_spaces() for space in spaces: if space.id == space_id: return space return None async def get_green_space_at_location(self, lat: float, lng: float) -> Optional[GreenSpace]: """Check if a location is within a green space and enhance it with real tree data.""" spaces = await self._get_mock_green_spaces() for space in spaces: # Simple distance check - in reality you'd use proper polygon containment distance = geodesic( (lat, lng), (space.coordinates.lat, space.coordinates.lng) ).meters if distance < 500: # Within 500m of center (larger radius for better coverage) # Enhance the green space with real tree data enhanced_space = await self._enhance_green_space_with_real_trees(space, lat, lng) return enhanced_space return None async def get_green_spaces_within_radius( self, lat: float, lng: float, radius: int ) -> List[GreenSpace]: """Get all green spaces within a radius.""" spaces = await self._get_mock_green_spaces() nearby_spaces = [] for space in spaces: distance = geodesic( (lat, lng), (space.coordinates.lat, space.coordinates.lng) ).meters if distance <= radius: nearby_spaces.append(space) return nearby_spaces async def get_amenities_near_point( self, lat: float, lng: float, radius: int = 500, amenity_types: Optional[List[str]] = None ) -> List[Amenity]: """Get amenities near a specific point.""" # Mock amenities data mock_amenities = [ Amenity( id="toilet_1", name="Public Toilet", type=AmenityType.TOILET, coordinates=Coordinates(lat=lat + 0.001, lng=lng + 0.001), distance_meters=100, rating=3.5 ), Amenity( id="cafe_1", name="Park Café", type=AmenityType.CAFE, coordinates=Coordinates(lat=lat + 0.002, lng=lng - 0.001), distance_meters=200, rating=4.2, opening_hours="8:00-18:00" ), Amenity( id="playground_1", name="Children's Playground", type=AmenityType.PLAYGROUND, coordinates=Coordinates(lat=lat - 0.001, lng=lng + 0.002), distance_meters=150, rating=4.0 ) ] # Filter by types if specified if amenity_types: mock_amenities = [ amenity for amenity in mock_amenities if amenity.type.value in amenity_types ] # Filter by radius filtered_amenities = [] for amenity in mock_amenities: distance = geodesic( (lat, lng), (amenity.coordinates.lat, amenity.coordinates.lng) ).meters if distance <= radius: amenity.distance_meters = int(distance) filtered_amenities.append(amenity) return filtered_amenities async def calculate_distance( self, lat1: float, lng1: float, lat2: float, lng2: float ) -> int: """Calculate distance between two points in meters.""" return int(geodesic((lat1, lng1), (lat2, lng2)).meters) async def find_similar_green_spaces( self, green_space: GreenSpace, limit: int = 5 ) -> List[GreenSpace]: """Find green spaces similar to the given one.""" all_spaces = await self._get_mock_green_spaces() similar_spaces = [] for space in all_spaces: if space.id == green_space.id: continue # Simple similarity based on type and features similarity_score = 0 if space.type == green_space.type: similarity_score += 30 if space.neighborhood == green_space.neighborhood: similarity_score += 20 if space.environmental.water_features == green_space.environmental.water_features: similarity_score += 15 if abs(space.environmental.tree_coverage_percent - green_space.environmental.tree_coverage_percent) < 20: similarity_score += 15 if space.recreation.sports_facilities == green_space.recreation.sports_facilities: similarity_score += 10 if similarity_score >= 50: # Threshold for similarity similar_spaces.append(space) # Sort by similarity and return top results return similar_spaces[:limit] async def get_neighborhood_stats(self) -> Dict[str, Any]: """Get statistics for Berlin neighborhoods.""" return { "neighborhoods": [ { "name": "mitte", "display_name": "Mitte", "green_space_count": 15, "avg_personality_scores": { "little_adventurers": 75, "date_night": 80, "squad_goals": 70, "zen_masters": 65 } }, { "name": "kreuzberg", "display_name": "Kreuzberg", "green_space_count": 12, "avg_personality_scores": { "little_adventurers": 70, "date_night": 75, "squad_goals": 85, "zen_masters": 60 } }, { "name": "prenzlauer_berg", "display_name": "Prenzlauer Berg", "green_space_count": 18, "avg_personality_scores": { "little_adventurers": 90, "date_night": 70, "squad_goals": 75, "zen_masters": 70 } } ] } async def get_current_conditions(self, lat: float, lng: float) -> Dict[str, Any]: """Get current conditions at a location.""" # Mock current conditions return { "weather": { "temperature": 22, "condition": "partly_cloudy", "humidity": 65, "wind_speed": 10 }, "crowd_level": "moderate", "air_quality": "good", "noise_level": 2, "last_updated": datetime.now().isoformat() } async def refresh_all_data(self) -> Dict[str, str]: """Refresh all cached data.""" self.cache.clear() self.last_refresh = datetime.now() return { "green_spaces": "refreshed", "amenities": "refreshed", "neighborhoods": "refreshed", "timestamp": self.last_refresh.isoformat() } def summarize_amenities(self, amenities: List[Amenity]) -> Dict[str, int]: """Summarize amenities by type.""" summary = {} for amenity in amenities: amenity_type = amenity.type.value summary[amenity_type] = summary.get(amenity_type, 0) + 1 return summary def _load_toilets(self) -> List[Dict]: """Load toilets data from JSON file""" if self._toilets_cache is None: toilets_file = self.data_dir / "processed" / "toilets.json" if toilets_file.exists(): with open(toilets_file, 'r', encoding='utf-8') as f: data = json.load(f) self._toilets_cache = data.get("toilets", []) else: print("Warning: toilets.json not found. Run process_toilet_csv.py first.") self._toilets_cache = [] return self._toilets_cache async def get_toilets_near_point( self, lat: float, lng: float, radius: int = 500 ) -> List[Dict]: """Get toilets near a point for picnic scoring""" toilets = self._load_toilets() nearby_toilets = [] for toilet in toilets: # Skip toilets with invalid coordinates toilet_lat = toilet.get('lat') toilet_lng = toilet.get('lng') if toilet_lat is None or toilet_lng is None: continue # Check if coordinates are valid numbers try: toilet_lat = float(toilet_lat) toilet_lng = float(toilet_lng) # Check for NaN or infinite values if not (isinstance(toilet_lat, (int, float)) and isinstance(toilet_lng, (int, float))): continue if toilet_lat != toilet_lat or toilet_lng != toilet_lng: # NaN check continue if abs(toilet_lat) > 90 or abs(toilet_lng) > 180: # Invalid coordinate range continue except (ValueError, TypeError): continue distance = geodesic((lat, lng), (toilet_lat, toilet_lng)).meters if distance <= radius: toilet_copy = toilet.copy() toilet_copy['distance_meters'] = int(distance) nearby_toilets.append(toilet_copy) return sorted(nearby_toilets, key=lambda x: x['distance_meters']) async def _enhance_green_space_with_real_trees(self, green_space: GreenSpace, actual_lat: float, actual_lng: float) -> GreenSpace: """Enhance green space environmental features with real tree data.""" try: # Get real tree data for the actual location (not just the park center) tree_response = await self.street_tree_service.get_trees_near_location( actual_lat, actual_lng, radius_m=300 ) # Calculate enhanced environmental features using real tree data tree_coverage = max( green_space.environmental.tree_coverage_percent, int(tree_response.shade_analysis.estimated_shade_coverage) ) shade_quality = max( green_space.environmental.shade_quality, tree_response.shade_analysis.shade_quality_score ) wildlife_diversity = max( green_space.environmental.wildlife_diversity_score, tree_response.metrics.species_diversity_score ) # Create enhanced environmental features enhanced_environmental = EnvironmentalFeatures( tree_coverage_percent=min(100, tree_coverage), shade_quality=min(100, shade_quality), noise_level=green_space.environmental.noise_level, wildlife_diversity_score=min(100, wildlife_diversity), water_features=green_space.environmental.water_features, natural_surface_percent=green_space.environmental.natural_surface_percent ) # Create enhanced green space with real tree data enhanced_space = green_space.model_copy(update={ "environmental": enhanced_environmental, "coordinates": Coordinates(lat=actual_lat, lng=actual_lng) # Use actual query location }) # Update data sources to indicate real tree data is used if "real_street_trees" not in enhanced_space.data_sources: enhanced_space.data_sources.append("real_street_trees") return enhanced_space except Exception as e: print(f"Error enhancing green space with real tree data: {e}") # Return original space if enhancement fails return green_space async def _get_mock_green_spaces(self) -> List[GreenSpace]: """Get mock green spaces data for development.""" # This would be replaced with real data fetching in production return [ GreenSpace( id="tiergarten_1", name="Tiergarten", description="Berlin's most famous park in the heart of the city", type=GreenSpaceType.PARK, coordinates=Coordinates(lat=52.5145, lng=13.3501), neighborhood="Mitte", address="Tiergarten, 10557 Berlin", area_sqm=210000, perimeter_m=5800, environmental=EnvironmentalFeatures( tree_coverage_percent=85, shade_quality=90, noise_level=NoiseLevel.MODERATE, wildlife_diversity_score=80, water_features=True, natural_surface_percent=95 ), accessibility=AccessibilityFeatures( wheelchair_accessible=True, public_transport_score=5, cycling_infrastructure=True, parking_availability=3, lighting_quality=4 ), recreation=RecreationFeatures( playground_quality=70, sports_facilities=True, running_paths=True, cycling_paths=True, dog_friendly=True, bbq_allowed=False ), nearby_amenities=[], last_updated=datetime.now(), data_sources=["berlin_open_data", "osm"], confidence_score=95 ), GreenSpace( id="volkspark_friedrichshain", name="Volkspark Friedrichshain", description="Historic park with fairy tale fountain and sports facilities", type=GreenSpaceType.PARK, coordinates=Coordinates(lat=52.5263, lng=13.4317), neighborhood="Friedrichshain", address="Friedrichshain, 10249 Berlin", area_sqm=49000, perimeter_m=2800, environmental=EnvironmentalFeatures( tree_coverage_percent=70, shade_quality=75, noise_level=NoiseLevel.QUIET, wildlife_diversity_score=65, water_features=True, natural_surface_percent=80 ), accessibility=AccessibilityFeatures( wheelchair_accessible=True, public_transport_score=4, cycling_infrastructure=True, parking_availability=2, lighting_quality=3 ), recreation=RecreationFeatures( playground_quality=85, sports_facilities=True, running_paths=True, cycling_paths=True, dog_friendly=True, bbq_allowed=True ), nearby_amenities=[], last_updated=datetime.now(), data_sources=["berlin_open_data", "osm"], confidence_score=90 ), GreenSpace( id="tempelhofer_feld", name="Tempelhofer Feld", description="Former airport turned into unique urban park", type=GreenSpaceType.PARK, coordinates=Coordinates(lat=52.4732, lng=13.4015), neighborhood="Tempelhof", address="Tempelhofer Damm, 12101 Berlin", area_sqm=300000, perimeter_m=6200, environmental=EnvironmentalFeatures( tree_coverage_percent=15, shade_quality=20, noise_level=NoiseLevel.MODERATE, wildlife_diversity_score=40, water_features=False, natural_surface_percent=60 ), accessibility=AccessibilityFeatures( wheelchair_accessible=True, public_transport_score=4, cycling_infrastructure=True, parking_availability=4, lighting_quality=2 ), recreation=RecreationFeatures( playground_quality=30, sports_facilities=False, running_paths=True, cycling_paths=True, dog_friendly=True, bbq_allowed=True ), nearby_amenities=[], last_updated=datetime.now(), data_sources=["berlin_open_data", "osm"], confidence_score=85 ) ]