# app/services/berlin_data_service.py from typing import List, Optional, Tuple, Dict, Any import asyncio import json from datetime import datetime from pathlib import Path from geopy.distance import geodesic from app.models.green_space import ( GreenSpace, Coordinates, Amenity, AmenityType, GreenSpaceType, EnvironmentalFeatures, AccessibilityFeatures, RecreationFeatures, NoiseLevel, LocationScore ) from app.services.street_tree_service import StreetTreeService class BerlinDataService: """Service for accessing Berlin open data and external APIs.""" def __init__(self): self.cache = {} self.last_refresh = None self._toilets_cache = None self._green_spaces_cache = None self._street_trees_index = None self.data_dir = Path("app/data") self.street_tree_service = StreetTreeService() async def search_green_spaces( self, location: Optional[Tuple[float, float]] = None, radius: int = 2000, neighborhood: Optional[str] = None, filters: Dict[str, Any] = None ) -> List[GreenSpace]: """Search for green spaces based on criteria.""" # This is a mock implementation - in a real app, this would query Berlin's open data mock_spaces = await self._get_mock_green_spaces() filtered_spaces = [] for space in mock_spaces: # Apply location filter if location: distance = geodesic( location, (space.coordinates.lat, space.coordinates.lng) ).meters if distance > radius: continue # Apply neighborhood filter with flexible matching if neighborhood: neighborhood_lower = neighborhood.lower() space_neighborhood_lower = space.neighborhood.lower() # Check for exact match or partial match (useful for compound neighborhood names) if (neighborhood_lower != space_neighborhood_lower and neighborhood_lower not in space_neighborhood_lower and space_neighborhood_lower not in neighborhood_lower): continue # Apply other filters if filters: if filters.get("min_size") and space.area_sqm and space.area_sqm < filters["min_size"]: continue if filters.get("has_water") and not space.environmental.water_features: continue if filters.get("has_playground") and space.recreation.playground_quality == 0: continue if filters.get("max_noise_level") and space.environmental.noise_level.value > filters["max_noise_level"]: continue filtered_spaces.append(space) return filtered_spaces async def get_green_space_by_id(self, space_id: str) -> Optional[GreenSpace]: """Get a specific green space by ID.""" spaces = await self._get_mock_green_spaces() for space in spaces: if space.id == space_id: return space return None async def get_green_space_at_location(self, lat: float, lng: float) -> Optional[GreenSpace]: """Check if a location is within a green space and enhance it with real tree data.""" spaces = await self._get_mock_green_spaces() for space in spaces: # Simple distance check - in reality you'd use proper polygon containment distance = geodesic( (lat, lng), (space.coordinates.lat, space.coordinates.lng) ).meters if distance < 500: # Within 500m of center (larger radius for better coverage) # Enhance the green space with real tree data enhanced_space = await self._enhance_green_space_with_real_trees(space, lat, lng) return enhanced_space return None async def get_green_spaces_within_radius( self, lat: float, lng: float, radius: int ) -> List[GreenSpace]: """Get all green spaces within a radius.""" spaces = await self._get_mock_green_spaces() nearby_spaces = [] for space in spaces: distance = geodesic( (lat, lng), (space.coordinates.lat, space.coordinates.lng) ).meters if distance <= radius: nearby_spaces.append(space) return nearby_spaces async def get_amenities_near_point( self, lat: float, lng: float, radius: int = 500, amenity_types: Optional[List[str]] = None ) -> List[Amenity]: """Get amenities near a specific point.""" # Mock amenities data mock_amenities = [ Amenity( id="toilet_1", name="Public Toilet", type=AmenityType.TOILET, coordinates=Coordinates(lat=lat + 0.001, lng=lng + 0.001), distance_meters=100, rating=3.5 ), Amenity( id="cafe_1", name="Park Café", type=AmenityType.CAFE, coordinates=Coordinates(lat=lat + 0.002, lng=lng - 0.001), distance_meters=200, rating=4.2, opening_hours="8:00-18:00" ), Amenity( id="playground_1", name="Children's Playground", type=AmenityType.PLAYGROUND, coordinates=Coordinates(lat=lat - 0.001, lng=lng + 0.002), distance_meters=150, rating=4.0 ) ] # Filter by types if specified if amenity_types: mock_amenities = [ amenity for amenity in mock_amenities if amenity.type.value in amenity_types ] # Filter by radius filtered_amenities = [] for amenity in mock_amenities: distance = geodesic( (lat, lng), (amenity.coordinates.lat, amenity.coordinates.lng) ).meters if distance <= radius: amenity.distance_meters = int(distance) filtered_amenities.append(amenity) return filtered_amenities async def calculate_distance( self, lat1: float, lng1: float, lat2: float, lng2: float ) -> int: """Calculate distance between two points in meters.""" return int(geodesic((lat1, lng1), (lat2, lng2)).meters) async def find_similar_green_spaces( self, green_space: GreenSpace, limit: int = 5 ) -> List[GreenSpace]: """Find green spaces similar to the given one.""" all_spaces = await self._get_mock_green_spaces() similar_spaces = [] for space in all_spaces: if space.id == green_space.id: continue # Simple similarity based on type and features similarity_score = 0 if space.type == green_space.type: similarity_score += 30 if space.neighborhood == green_space.neighborhood: similarity_score += 20 if space.environmental.water_features == green_space.environmental.water_features: similarity_score += 15 if abs(space.environmental.tree_coverage_percent - green_space.environmental.tree_coverage_percent) < 20: similarity_score += 15 if space.recreation.sports_facilities == green_space.recreation.sports_facilities: similarity_score += 10 if similarity_score >= 50: # Threshold for similarity similar_spaces.append(space) # Sort by similarity and return top results return similar_spaces[:limit] async def get_neighborhood_stats(self) -> Dict[str, Any]: """Get statistics for Berlin neighborhoods.""" # Get all green spaces to calculate real neighborhood stats green_spaces = await self._get_mock_green_spaces() # Count green spaces per neighborhood neighborhood_counts = {} neighborhood_spaces = {} for space in green_spaces: neighborhood = space.neighborhood if neighborhood not in neighborhood_counts: neighborhood_counts[neighborhood] = 0 neighborhood_spaces[neighborhood] = [] neighborhood_counts[neighborhood] += 1 neighborhood_spaces[neighborhood].append(space) # Generate neighborhood stats neighborhoods = [] for neighborhood, count in neighborhood_counts.items(): # Calculate average personality scores based on green space features spaces = neighborhood_spaces[neighborhood] # Calculate scores based on actual features total_tree_coverage = sum(s.environmental.tree_coverage_percent for s in spaces) total_playgrounds = sum(s.recreation.playground_quality for s in spaces) total_water_features = sum(1 for s in spaces if s.environmental.water_features) total_sports = sum(1 for s in spaces if s.recreation.sports_facilities) avg_tree_coverage = total_tree_coverage / count if count > 0 else 0 avg_playground = total_playgrounds / count if count > 0 else 0 water_ratio = total_water_features / count if count > 0 else 0 sports_ratio = total_sports / count if count > 0 else 0 # Calculate personality scores based on features little_adventurers = min(100, int(avg_playground * 0.8 + sports_ratio * 30 + 40)) date_night = min(100, int(avg_tree_coverage * 0.6 + water_ratio * 25 + 45)) squad_goals = min(100, int(sports_ratio * 40 + avg_tree_coverage * 0.4 + 35)) zen_masters = min(100, int(avg_tree_coverage * 0.7 + water_ratio * 20 + 30)) neighborhoods.append({ "name": neighborhood.lower().replace(' ', '_').replace('-', '_'), "display_name": neighborhood, "green_space_count": count, "avg_personality_scores": { "little_adventurers": little_adventurers, "date_night": date_night, "squad_goals": squad_goals, "zen_masters": zen_masters } }) # Sort by green space count (most spaces first) neighborhoods.sort(key=lambda x: x["green_space_count"], reverse=True) return {"neighborhoods": neighborhoods} async def get_current_conditions(self, lat: float, lng: float) -> Dict[str, Any]: """Get current conditions at a location.""" # Mock current conditions return { "weather": { "temperature": 22, "condition": "partly_cloudy", "humidity": 65, "wind_speed": 10 }, "crowd_level": "moderate", "air_quality": "good", "noise_level": 2, "last_updated": datetime.now().isoformat() } async def refresh_all_data(self) -> Dict[str, str]: """Refresh all cached data.""" self.cache.clear() self.last_refresh = datetime.now() return { "green_spaces": "refreshed", "amenities": "refreshed", "neighborhoods": "refreshed", "timestamp": self.last_refresh.isoformat() } def summarize_amenities(self, amenities: List[Amenity]) -> Dict[str, int]: """Summarize amenities by type.""" summary = {} for amenity in amenities: amenity_type = amenity.type.value summary[amenity_type] = summary.get(amenity_type, 0) + 1 return summary def _load_toilets(self) -> List[Dict]: """Load toilets data from JSON file""" if self._toilets_cache is None: toilets_file = self.data_dir / "processed" / "toilets.json" if toilets_file.exists(): with open(toilets_file, 'r', encoding='utf-8') as f: data = json.load(f) self._toilets_cache = data.get("toilets", []) else: print("Warning: toilets.json not found. Run process_toilet_csv.py first.") self._toilets_cache = [] return self._toilets_cache async def get_toilets_near_point( self, lat: float, lng: float, radius: int = 500 ) -> List[Dict]: """Get toilets near a point for picnic scoring""" toilets = self._load_toilets() nearby_toilets = [] for toilet in toilets: # Skip toilets with invalid coordinates toilet_lat = toilet.get('lat') toilet_lng = toilet.get('lng') if toilet_lat is None or toilet_lng is None: continue # Check if coordinates are valid numbers try: toilet_lat = float(toilet_lat) toilet_lng = float(toilet_lng) # Check for NaN or infinite values if not (isinstance(toilet_lat, (int, float)) and isinstance(toilet_lng, (int, float))): continue if toilet_lat != toilet_lat or toilet_lng != toilet_lng: # NaN check continue if abs(toilet_lat) > 90 or abs(toilet_lng) > 180: # Invalid coordinate range continue except (ValueError, TypeError): continue distance = geodesic((lat, lng), (toilet_lat, toilet_lng)).meters if distance <= radius: toilet_copy = toilet.copy() toilet_copy['distance_meters'] = int(distance) nearby_toilets.append(toilet_copy) return sorted(nearby_toilets, key=lambda x: x['distance_meters']) async def _enhance_green_space_with_real_trees(self, green_space: GreenSpace, actual_lat: float, actual_lng: float) -> GreenSpace: """Enhance green space environmental features with real tree data.""" try: # Get real tree data for the actual location (not just the park center) tree_response = await self.street_tree_service.get_trees_near_location( actual_lat, actual_lng, radius_m=300 ) # Calculate enhanced environmental features using real tree data tree_coverage = max( green_space.environmental.tree_coverage_percent, int(tree_response.shade_analysis.estimated_shade_coverage) ) shade_quality = max( green_space.environmental.shade_quality, tree_response.shade_analysis.shade_quality_score ) wildlife_diversity = max( green_space.environmental.wildlife_diversity_score, tree_response.metrics.species_diversity_score ) # Create enhanced environmental features enhanced_environmental = EnvironmentalFeatures( tree_coverage_percent=min(100, tree_coverage), shade_quality=min(100, shade_quality), noise_level=green_space.environmental.noise_level, wildlife_diversity_score=min(100, wildlife_diversity), water_features=green_space.environmental.water_features, natural_surface_percent=green_space.environmental.natural_surface_percent ) # Create enhanced green space with real tree data enhanced_space = green_space.model_copy(update={ "environmental": enhanced_environmental, "coordinates": Coordinates(lat=actual_lat, lng=actual_lng) # Use actual query location }) # Update data sources to indicate real tree data is used if "real_street_trees" not in enhanced_space.data_sources: enhanced_space.data_sources.append("real_street_trees") return enhanced_space except Exception as e: print(f"Error enhancing green space with real tree data: {e}") # Return original space if enhancement fails return green_space def _load_green_spaces(self) -> List[Dict]: """Load green spaces data from JSON file""" if self._green_spaces_cache is None: green_spaces_file = self.data_dir / "processed" / "quick_berlin_green_spaces.json" if green_spaces_file.exists(): with open(green_spaces_file, 'r', encoding='utf-8') as f: data = json.load(f) self._green_spaces_cache = data.get("green_spaces", []) else: print("Warning: quick_berlin_green_spaces.json not found.") self._green_spaces_cache = [] return self._green_spaces_cache def _convert_json_to_green_space(self, json_data: Dict) -> GreenSpace: """Convert JSON data to GreenSpace model""" from datetime import datetime return GreenSpace( id=json_data.get("id", ""), name=json_data.get("name", ""), description=json_data.get("description", ""), type=GreenSpaceType.PARK, # Default to PARK, could be enhanced later coordinates=Coordinates( lat=json_data.get("coordinates", {}).get("lat", 0.0), lng=json_data.get("coordinates", {}).get("lng", 0.0) ), neighborhood=json_data.get("neighborhood", "Berlin"), address=f"{json_data.get('name', 'Unknown')}, Berlin", area_sqm=json_data.get("area_sqm", 0), perimeter_m=json_data.get("perimeter_m", None), environmental=EnvironmentalFeatures( tree_coverage_percent=json_data.get("environmental", {}).get("tree_coverage_percent", 0), shade_quality=json_data.get("environmental", {}).get("shade_quality", 0), noise_level=NoiseLevel(json_data.get("environmental", {}).get("noise_level", 1)), wildlife_diversity_score=json_data.get("environmental", {}).get("wildlife_diversity_score", 0), water_features=json_data.get("environmental", {}).get("water_features", False), natural_surface_percent=json_data.get("environmental", {}).get("natural_surface_percent", 0) ), accessibility=AccessibilityFeatures( wheelchair_accessible=json_data.get("accessibility", {}).get("wheelchair_accessible", True), public_transport_score=json_data.get("accessibility", {}).get("public_transport_score", 3), cycling_infrastructure=json_data.get("accessibility", {}).get("cycling_infrastructure", True), parking_availability=json_data.get("accessibility", {}).get("parking_availability", 2), lighting_quality=json_data.get("accessibility", {}).get("lighting_quality", 3) ), recreation=RecreationFeatures( playground_quality=json_data.get("recreation", {}).get("playground_quality", 0), sports_facilities=json_data.get("recreation", {}).get("sports_facilities", False), running_paths=json_data.get("recreation", {}).get("running_paths", True), cycling_paths=json_data.get("recreation", {}).get("cycling_paths", True), dog_friendly=json_data.get("recreation", {}).get("dog_friendly", True), bbq_allowed=json_data.get("recreation", {}).get("bbq_allowed", False) ), nearby_amenities=[], last_updated=datetime.fromisoformat(json_data.get("last_updated", datetime.now().isoformat())), data_sources=json_data.get("data_sources", []), confidence_score=json_data.get("confidence_score", 85) ) async def _get_mock_green_spaces(self) -> List[GreenSpace]: """Get green spaces data from JSON file.""" json_data = self._load_green_spaces() green_spaces = [] for space_data in json_data: try: green_space = self._convert_json_to_green_space(space_data) green_spaces.append(green_space) except Exception as e: print(f"Error converting green space {space_data.get('id', 'unknown')}: {e}") continue return green_spaces