import json import math from pathlib import Path from typing import List, Optional, Tuple, Dict, Any from datetime import datetime from geopy.distance import geodesic from rtree import index import asyncio import aiofiles from functools import lru_cache from app.models.street_tree import ( StreetTree, TreeDensityMetrics, TreeShadeAnalysis, TreesSearchFilters, TreesNearLocationResponse, TreeGenus, TreeHealthStatus ) from app.models.green_space import Coordinates class StreetTreeService: """Service for accessing and analyzing Berlin street trees data.""" _instance = None _initialized = False def __new__(cls): if cls._instance is None: cls._instance = super().__new__(cls) return cls._instance def __init__(self): if not self._initialized: self._trees_cache = None self._spatial_index = None self._tree_id_to_data = {} self.data_dir = Path("app/data") self.__class__._initialized = True async def _load_trees(self) -> List[Dict]: """Load street trees data from JSON file and build spatial index.""" if self._trees_cache is None: trees_file = self.data_dir / "processed" / "street_trees.json" if trees_file.exists(): print("🔄 Loading trees data and building spatial index...") async with aiofiles.open(trees_file, 'r', encoding='utf-8') as f: content = await f.read() data = json.loads(content) self._trees_cache = data.get("street_trees", []) await self._build_spatial_index() print(f"✅ Loaded {len(self._trees_cache)} trees with spatial index") else: print("Warning: street_trees.json not found. Run process_street_trees.py first.") self._trees_cache = [] return self._trees_cache async def _build_spatial_index(self): """Build R-tree spatial index for fast location queries.""" if self._spatial_index is None and self._trees_cache: print("🔨 Building spatial index...") self._spatial_index = index.Index() self._tree_id_to_data = {} for i, tree_data in enumerate(self._trees_cache): lat = tree_data.get('lat') lng = tree_data.get('lng') if lat is not None and lng is not None: # R-tree expects (minx, miny, maxx, maxy) bbox = (lng, lat, lng, lat) self._spatial_index.insert(i, bbox) self._tree_id_to_data[i] = tree_data print(f"✅ Spatial index built for {len(self._tree_id_to_data)} trees") def _create_tree_from_dict(self, tree_data: Dict) -> StreetTree: """Convert tree dictionary to StreetTree model.""" # Map genus to enum genus_mapping = { "AHORN": TreeGenus.AHORN, "LINDE": TreeGenus.LINDE, "KASTANIE": TreeGenus.KASTANIE, "ROSSKASTANIE": TreeGenus.ROSSKASTANIE, "EICHE": TreeGenus.EICHE, "PLATANE": TreeGenus.PLATANE, "BIRKE": TreeGenus.BIRKE, "WEIßDORN": TreeGenus.WEISSDORN, "PAPPEL": TreeGenus.PAPPEL, "ESCHE": TreeGenus.ESCHE, } genus_german = (tree_data.get('genus_german') or '').upper() genus_category = genus_mapping.get(genus_german, TreeGenus.OTHER) # Determine health status based on available data health_status = TreeHealthStatus.UNKNOWN if tree_data.get('age'): age = tree_data['age'] if age > 80: health_status = TreeHealthStatus.FAIR elif age > 50: health_status = TreeHealthStatus.GOOD elif age > 0: health_status = TreeHealthStatus.EXCELLENT return StreetTree( id=tree_data.get('id', ''), object_id=tree_data.get('object_id'), tree_id=tree_data.get('tree_id'), location_number=tree_data.get('location_number'), identifier=tree_data.get('identifier'), object_name=tree_data.get('object_name'), species_german=tree_data.get('species_german'), species_botanical=tree_data.get('species_botanical'), genus_german=tree_data.get('genus_german'), genus_botanical=tree_data.get('genus_botanical'), genus_category=genus_category, coordinates=Coordinates( lat=tree_data.get('lat', 0.0), lng=tree_data.get('lng', 0.0) ), district=tree_data.get('district'), owner=tree_data.get('owner'), category=tree_data.get('category'), street=tree_data.get('street'), house_number=tree_data.get('house_number'), address_addition=tree_data.get('address_addition'), planting_year=tree_data.get('planting_year'), age=tree_data.get('age'), crown_diameter_m=tree_data.get('crown_diameter_m'), trunk_circumference_cm=tree_data.get('trunk_circumference_cm'), height_m=tree_data.get('height_m'), health_status=health_status, last_updated=datetime.now() ) @lru_cache(maxsize=1000) def _distance_cache(self, lat1: float, lng1: float, lat2: float, lng2: float) -> float: """Cache distance calculations.""" return geodesic((lat1, lng1), (lat2, lng2)).meters async def get_trees_near_location( self, lat: float, lng: float, radius_m: int = 500, limit: Optional[int] = None ) -> TreesNearLocationResponse: """Get street trees within a radius of a location using spatial index.""" start_time = datetime.now() await self._load_trees() nearby_trees = [] if self._spatial_index is None: # Fallback to linear search if index failed return await self._get_trees_linear_search(lat, lng, radius_m, limit) # Convert radius to approximate bounding box for R-tree query # Rough approximation: 1 degree ≈ 111km radius_deg = radius_m / 111000 bbox = (lng - radius_deg, lat - radius_deg, lng + radius_deg, lat + radius_deg) # Query spatial index for candidates candidate_ids = list(self._spatial_index.intersection(bbox)) # Filter candidates by exact distance tree_distances = [] for tree_id in candidate_ids: tree_data = self._tree_id_to_data.get(tree_id) if not tree_data: continue tree_lat = tree_data.get('lat') tree_lng = tree_data.get('lng') if tree_lat is None or tree_lng is None: continue distance = self._distance_cache(lat, lng, tree_lat, tree_lng) if distance <= radius_m: tree = self._create_tree_from_dict(tree_data) tree_distances.append((tree, distance)) if limit and len(tree_distances) >= limit: break # Sort by distance tree_distances.sort(key=lambda x: x[1]) nearby_trees = [tree for tree, _ in tree_distances] # Calculate metrics metrics = self._calculate_tree_density_metrics(nearby_trees, radius_m) shade_analysis = self._analyze_shade_coverage(lat, lng, nearby_trees) query_time = (datetime.now() - start_time).total_seconds() * 1000 return TreesNearLocationResponse( location=Coordinates(lat=lat, lng=lng), radius_m=radius_m, trees=nearby_trees, metrics=metrics, shade_analysis=shade_analysis, total_found=len(nearby_trees), query_time_ms=int(query_time) ) def _calculate_tree_density_metrics( self, trees: List[StreetTree], radius_m: int ) -> TreeDensityMetrics: """Calculate tree density and coverage metrics.""" if not trees: return TreeDensityMetrics() area_hectares = (math.pi * radius_m * radius_m) / 10000 # Convert to hectares # Calculate averages ages = [t.age for t in trees if t.age is not None] heights = [t.height_m for t in trees if t.height_m is not None] crowns = [t.crown_diameter_m for t in trees if t.crown_diameter_m is not None] avg_age = sum(ages) / len(ages) if ages else None avg_height = sum(heights) / len(heights) if heights else None avg_crown = sum(crowns) / len(crowns) if crowns else None # Count mature vs young trees mature_trees = len([t for t in trees if t.age and t.age > 20]) young_trees = len([t for t in trees if t.age and t.age < 10]) # Calculate shade coverage (rough estimate) shade_coverage = 0.0 if crowns: total_crown_area = sum(math.pi * (d/2)**2 for d in crowns if d > 0) shade_coverage = min(100.0, (total_crown_area / (math.pi * radius_m * radius_m)) * 100) # Get dominant species species_count = {} for tree in trees: if tree.species_german: species_count[tree.species_german] = species_count.get(tree.species_german, 0) + 1 dominant_species = sorted(species_count.items(), key=lambda x: x[1], reverse=True)[:3] dominant_species_names = [species[0] for species in dominant_species] # Calculate species diversity (simple calculation) unique_species = len(species_count) diversity_score = min(100, (unique_species * 10)) if unique_species > 0 else 0 return TreeDensityMetrics( total_trees=len(trees), trees_per_hectare=len(trees) / area_hectares if area_hectares > 0 else 0, average_tree_age=avg_age, average_height=avg_height, average_crown_diameter=avg_crown, shade_coverage_percent=shade_coverage, mature_trees_count=mature_trees, young_trees_count=young_trees, dominant_species=dominant_species_names, species_diversity_score=diversity_score ) def _analyze_shade_coverage( self, lat: float, lng: float, trees: List[StreetTree] ) -> TreeShadeAnalysis: """Analyze shade coverage for picnic spot evaluation.""" trees_50m = 0 trees_100m = 0 large_trees = [] for tree in trees: distance = self._distance_cache(lat, lng, tree.coordinates.lat, tree.coordinates.lng) if distance <= 50: trees_50m += 1 if distance <= 100: trees_100m += 1 # Consider large trees (good crown diameter or height) if ((tree.crown_diameter_m and tree.crown_diameter_m > 8) or (tree.height_m and tree.height_m > 15) or (tree.age and tree.age > 30)): large_trees.append(tree) # Estimate shade coverage shade_coverage = 0 if trees_50m > 0: shade_coverage = min(100, trees_50m * 15) # Rough estimate # Shade quality based on tree density and size shade_quality = 0 if trees_50m > 3: shade_quality = 80 elif trees_50m > 1: shade_quality = 60 elif trees_100m > 5: shade_quality = 40 elif trees_100m > 2: shade_quality = 20 # Best shade times (simplified) best_times = [] if shade_quality > 60: best_times = ["10:00-12:00", "14:00-16:00"] elif shade_quality > 30: best_times = ["11:00-13:00"] return TreeShadeAnalysis( has_nearby_trees=len(trees) > 0, trees_within_50m=trees_50m, trees_within_100m=trees_100m, estimated_shade_coverage=shade_coverage, shade_quality_score=shade_quality, best_shade_times=best_times, nearby_large_trees=large_trees[:5], # Limit to 5 for response size canopy_density=len(large_trees) / max(1, len(trees)) if trees else 0 ) async def _get_trees_linear_search( self, lat: float, lng: float, radius_m: int = 500, limit: Optional[int] = None ) -> TreesNearLocationResponse: """Fallback linear search method.""" start_time = datetime.now() trees_data = await self._load_trees() nearby_trees = [] for tree_data in trees_data: tree_lat = tree_data.get('lat') tree_lng = tree_data.get('lng') if tree_lat is None or tree_lng is None: continue distance = self._distance_cache(lat, lng, tree_lat, tree_lng) if distance <= radius_m: tree = self._create_tree_from_dict(tree_data) nearby_trees.append(tree) if limit and len(nearby_trees) >= limit: break # Sort by distance nearby_trees.sort( key=lambda t: self._distance_cache(lat, lng, t.coordinates.lat, t.coordinates.lng) ) # Calculate metrics metrics = self._calculate_tree_density_metrics(nearby_trees, radius_m) shade_analysis = self._analyze_shade_coverage(lat, lng, nearby_trees) query_time = (datetime.now() - start_time).total_seconds() * 1000 return TreesNearLocationResponse( location=Coordinates(lat=lat, lng=lng), radius_m=radius_m, trees=nearby_trees, metrics=metrics, shade_analysis=shade_analysis, total_found=len(nearby_trees), query_time_ms=int(query_time) ) async def search_trees(self, filters: TreesSearchFilters) -> List[StreetTree]: """Search trees with filters.""" trees_data = await self._load_trees() filtered_trees = [] for tree_data in trees_data: # Apply location filter first if specified if (filters.center_lat and filters.center_lng and filters.within_radius_m): tree_lat = tree_data.get('lat') tree_lng = tree_data.get('lng') if tree_lat is None or tree_lng is None: continue distance = self._distance_cache( filters.center_lat, filters.center_lng, tree_lat, tree_lng ) if distance > filters.within_radius_m: continue # Apply other filters if filters.species and tree_data.get('species_german') not in filters.species: continue if filters.district and tree_data.get('district') != filters.district: continue if filters.min_age and (not tree_data.get('age') or tree_data['age'] < filters.min_age): continue if filters.max_age and (not tree_data.get('age') or tree_data['age'] > filters.max_age): continue if filters.min_height and (not tree_data.get('height_m') or tree_data['height_m'] < filters.min_height): continue if filters.max_height and (not tree_data.get('height_m') or tree_data['height_m'] > filters.max_height): continue tree = self._create_tree_from_dict(tree_data) filtered_trees.append(tree) return filtered_trees async def get_tree_stats(self) -> Dict[str, Any]: """Get overall statistics about Berlin street trees.""" trees_data = await self._load_trees() if not trees_data: return {"error": "No tree data available"} # Count by district district_counts = {} species_counts = {} age_distribution = {"0-10": 0, "11-20": 0, "21-50": 0, "51+": 0, "unknown": 0} for tree in trees_data: # District stats district = tree.get('district') if district: district_counts[district] = district_counts.get(district, 0) + 1 # Species stats species = tree.get('species_german') if species: species_counts[species] = species_counts.get(species, 0) + 1 # Age distribution age = tree.get('age') if age is None: age_distribution["unknown"] += 1 elif age <= 10: age_distribution["0-10"] += 1 elif age <= 20: age_distribution["11-20"] += 1 elif age <= 50: age_distribution["21-50"] += 1 else: age_distribution["51+"] += 1 # Top 10 species top_species = sorted(species_counts.items(), key=lambda x: x[1], reverse=True)[:10] return { "total_trees": len(trees_data), "districts": len(district_counts), "unique_species": len(species_counts), "district_counts": district_counts, "age_distribution": age_distribution, "top_species": dict(top_species), "last_updated": datetime.now().isoformat() }