Add real green space data and neighborhood filtering

2025-06-21 22:58:50 +02:00 · 2025-06-21 22:58:50 +02:00 · 49e3d8c29d
parent c14f5ead38
commit 49e3d8c29d
10 changed files with 934901 additions and 181 deletions
--- a/app/data/osm-raw/berlin_green_spaces.osm
+++ b/app/data/osm-raw/berlin_green_spaces.osm
--- a/app/routers/green_spaces.py
+++ b/app/routers/green_spaces.py
@ -185,3 +185,142 @@ async def get_current_conditions(
        return conditions
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Failed to get conditions: {str(e)}")
+
+@router.get("/all")
+async def get_all_green_spaces(
+    personality: Optional[PersonalityType] = Query(None, description="Personality type for scoring"),
+    min_score: int = Query(0, ge=0, le=100, description="Minimum personality score (only applies if personality is provided)"),
+    limit: int = Query(50, ge=1, le=200, description="Maximum results"),
+):
+    """
+    Get all available green spaces in Berlin.
+    
+    Optionally score them for a specific personality type.
+    Perfect for frontend dropdowns or full dataset access.
+    """
+    try:
+        # Get all green spaces
+        all_spaces = await berlin_data.search_green_spaces()
+        
+        # If personality is specified, score and filter
+        if personality:
+            scored_spaces = []
+            for space in all_spaces:
+                personality_score = await green_space_service.scoring_engine.score_green_space(
+                    space, personality.value
+                )
+                
+                if personality_score.score >= min_score:
+                    space.current_personality_score = personality_score
+                    scored_spaces.append(space)
+            
+            # Sort by score (highest first)
+            scored_spaces.sort(
+                key=lambda x: x.current_personality_score.score if x.current_personality_score else 0,
+                reverse=True
+            )
+            all_spaces = scored_spaces
+        
+        # Apply limit
+        limited_spaces = all_spaces[:limit]
+        
+        return {
+            "green_spaces": limited_spaces,
+            "total_available": len(all_spaces),
+            "returned_count": len(limited_spaces),
+            "personality": personality.value if personality else None,
+            "min_score_applied": min_score if personality else None
+        }
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to get green spaces: {str(e)}")
+
+@router.get("/recommendations/{personality}")
+async def get_personality_recommendations(
+    personality: PersonalityType,
+    limit: int = Query(20, ge=1, le=50, description="Number of recommendations"),
+    neighborhood: Optional[str] = Query(None, description="Preferred neighborhood"),
+    min_score: int = Query(70, ge=50, le=100, description="Minimum personality score"),
+):
+    """
+    Get personalized green space recommendations.
+    
+    Returns the best green spaces for a specific personality type,
+    with explanations of why each space is recommended.
+    """
+    try:
+        # Get all green spaces
+        all_spaces = await berlin_data.search_green_spaces(neighborhood=neighborhood)
+        
+        # Score and rank for personality
+        recommendations = []
+        for space in all_spaces:
+            personality_score = await green_space_service.scoring_engine.score_green_space(
+                space, personality.value
+            )
+            
+            if personality_score.score >= min_score:
+                space.current_personality_score = personality_score
+                
+                # Get additional insights
+                best_features = []
+                if space.environmental.tree_coverage_percent > 70:
+                    best_features.append("Excellent tree coverage")
+                if space.environmental.water_features:
+                    best_features.append("Water features")
+                if space.recreation.playground_quality > 60:
+                    best_features.append("Good playground facilities")
+                if space.recreation.sports_facilities:
+                    best_features.append("Sports facilities")
+                if space.environmental.noise_level.value <= 2:
+                    best_features.append("Peaceful atmosphere")
+                
+                recommendation = {
+                    "green_space": space,
+                    "score": personality_score.score,
+                    "explanation": personality_score.explanation,
+                    "best_features": best_features[:3],  # Top 3 features
+                    "visit_recommendation": _get_visit_recommendation(space, personality.value)
+                }
+                recommendations.append(recommendation)
+        
+        # Sort by score
+        recommendations.sort(key=lambda x: x["score"], reverse=True)
+        
+        return {
+            "recommendations": recommendations[:limit],
+            "personality": personality.value,
+            "total_matches": len(recommendations),
+            "search_filters": {
+                "neighborhood": neighborhood,
+                "min_score": min_score
+            }
+        }
+        
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to get recommendations: {str(e)}")
+
+def _get_visit_recommendation(space, personality: str) -> str:
+    """Get a personalized visit recommendation"""
+    if personality == "little_adventurers":
+        if space.recreation.playground_quality > 60:
+            return "Perfect for family adventures with great playground facilities"
+        return "Great for exploring with kids"
+    elif personality == "date_night":
+        if space.environmental.noise_level.value <= 2:
+            return "Romantic and peaceful setting for couples"
+        return "Nice atmosphere for a romantic stroll"
+    elif personality == "zen_masters":
+        if space.environmental.tree_coverage_percent > 70:
+            return "Ideal for peaceful meditation under the trees"
+        return "Perfect for quiet contemplation"
+    elif personality == "active_lifestyle":
+        if space.recreation.sports_facilities:
+            return "Great for workouts and active recreation"
+        return "Perfect for running and outdoor activities"
+    elif personality == "wildlife_lover":
+        if space.environmental.wildlife_diversity_score > 70:
+            return "Excellent biodiversity for nature observation"
+        return "Good spot for wildlife watching"
+    else:
+        return "Highly recommended for your personality type"
--- a/app/services/berlin_data_service.py
+++ b/app/services/berlin_data_service.py
@ -20,6 +20,7 @@ class BerlinDataService:
        self.cache = {}
        self.last_refresh = None
        self._toilets_cache = None
+        self._green_spaces_cache = None
        self._street_trees_index = None
        self.data_dir = Path("app/data")
        self.street_tree_service = StreetTreeService()
@ -46,9 +47,16 @@ class BerlinDataService:
                if distance > radius:
                    continue
            
-            # Apply neighborhood filter
-            if neighborhood and space.neighborhood.lower() != neighborhood.lower():
-                continue
+            # Apply neighborhood filter with flexible matching
+            if neighborhood:
+                neighborhood_lower = neighborhood.lower()
+                space_neighborhood_lower = space.neighborhood.lower()
+                
+                # Check for exact match or partial match (useful for compound neighborhood names)
+                if (neighborhood_lower != space_neighborhood_lower and 
+                    neighborhood_lower not in space_neighborhood_lower and 
+                    space_neighborhood_lower not in neighborhood_lower):
+                    continue
            
            # Apply other filters
            if filters:
@ -214,43 +222,60 @@ class BerlinDataService:
    
    async def get_neighborhood_stats(self) -> Dict[str, Any]:
        """Get statistics for Berlin neighborhoods."""
-        return {
-            "neighborhoods": [
-                {
-                    "name": "mitte",
-                    "display_name": "Mitte",
-                    "green_space_count": 15,
-                    "avg_personality_scores": {
-                        "little_adventurers": 75,
-                        "date_night": 80,
-                        "squad_goals": 70,
-                        "zen_masters": 65
-                    }
-                },
-                {
-                    "name": "kreuzberg",
-                    "display_name": "Kreuzberg",
-                    "green_space_count": 12,
-                    "avg_personality_scores": {
-                        "little_adventurers": 70,
-                        "date_night": 75,
-                        "squad_goals": 85,
-                        "zen_masters": 60
-                    }
-                },
-                {
-                    "name": "prenzlauer_berg",
-                    "display_name": "Prenzlauer Berg",
-                    "green_space_count": 18,
-                    "avg_personality_scores": {
-                        "little_adventurers": 90,
-                        "date_night": 70,
-                        "squad_goals": 75,
-                        "zen_masters": 70
-                    }
+        # Get all green spaces to calculate real neighborhood stats
+        green_spaces = await self._get_mock_green_spaces()
+        
+        # Count green spaces per neighborhood
+        neighborhood_counts = {}
+        neighborhood_spaces = {}
+        
+        for space in green_spaces:
+            neighborhood = space.neighborhood
+            if neighborhood not in neighborhood_counts:
+                neighborhood_counts[neighborhood] = 0
+                neighborhood_spaces[neighborhood] = []
+            neighborhood_counts[neighborhood] += 1
+            neighborhood_spaces[neighborhood].append(space)
+        
+        # Generate neighborhood stats
+        neighborhoods = []
+        for neighborhood, count in neighborhood_counts.items():
+            # Calculate average personality scores based on green space features
+            spaces = neighborhood_spaces[neighborhood]
+            
+            # Calculate scores based on actual features
+            total_tree_coverage = sum(s.environmental.tree_coverage_percent for s in spaces)
+            total_playgrounds = sum(s.recreation.playground_quality for s in spaces)
+            total_water_features = sum(1 for s in spaces if s.environmental.water_features)
+            total_sports = sum(1 for s in spaces if s.recreation.sports_facilities)
+            
+            avg_tree_coverage = total_tree_coverage / count if count > 0 else 0
+            avg_playground = total_playgrounds / count if count > 0 else 0
+            water_ratio = total_water_features / count if count > 0 else 0
+            sports_ratio = total_sports / count if count > 0 else 0
+            
+            # Calculate personality scores based on features
+            little_adventurers = min(100, int(avg_playground * 0.8 + sports_ratio * 30 + 40))
+            date_night = min(100, int(avg_tree_coverage * 0.6 + water_ratio * 25 + 45))
+            squad_goals = min(100, int(sports_ratio * 40 + avg_tree_coverage * 0.4 + 35))
+            zen_masters = min(100, int(avg_tree_coverage * 0.7 + water_ratio * 20 + 30))
+            
+            neighborhoods.append({
+                "name": neighborhood.lower().replace(' ', '_').replace('-', '_'),
+                "display_name": neighborhood,
+                "green_space_count": count,
+                "avg_personality_scores": {
+                    "little_adventurers": little_adventurers,
+                    "date_night": date_night,
+                    "squad_goals": squad_goals,
+                    "zen_masters": zen_masters
                }
-            ]
-        }
+            })
+        
+        # Sort by green space count (most spaces first)
+        neighborhoods.sort(key=lambda x: x["green_space_count"], reverse=True)
+        
+        return {"neighborhoods": neighborhoods}
    
    async def get_current_conditions(self, lat: float, lng: float) -> Dict[str, Any]:
        """Get current conditions at a location."""
@ -394,122 +419,76 @@ class BerlinDataService:
            # Return original space if enhancement fails
            return green_space
    
+    def _load_green_spaces(self) -> List[Dict]:
+        """Load green spaces data from JSON file"""
+        if self._green_spaces_cache is None:
+            green_spaces_file = self.data_dir / "processed" / "quick_berlin_green_spaces.json"
+            if green_spaces_file.exists():
+                with open(green_spaces_file, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                    self._green_spaces_cache = data.get("green_spaces", [])
+            else:
+                print("Warning: quick_berlin_green_spaces.json not found.")
+                self._green_spaces_cache = []
+        return self._green_spaces_cache
+    
+    def _convert_json_to_green_space(self, json_data: Dict) -> GreenSpace:
+        """Convert JSON data to GreenSpace model"""
+        from datetime import datetime
+        
+        return GreenSpace(
+            id=json_data.get("id", ""),
+            name=json_data.get("name", ""),
+            description=json_data.get("description", ""),
+            type=GreenSpaceType.PARK,  # Default to PARK, could be enhanced later
+            coordinates=Coordinates(
+                lat=json_data.get("coordinates", {}).get("lat", 0.0),
+                lng=json_data.get("coordinates", {}).get("lng", 0.0)
+            ),
+            neighborhood=json_data.get("neighborhood", "Berlin"),
+            address=f"{json_data.get('name', 'Unknown')}, Berlin",
+            area_sqm=json_data.get("area_sqm", 0),
+            perimeter_m=json_data.get("perimeter_m", None),
+            environmental=EnvironmentalFeatures(
+                tree_coverage_percent=json_data.get("environmental", {}).get("tree_coverage_percent", 0),
+                shade_quality=json_data.get("environmental", {}).get("shade_quality", 0),
+                noise_level=NoiseLevel(json_data.get("environmental", {}).get("noise_level", 1)),
+                wildlife_diversity_score=json_data.get("environmental", {}).get("wildlife_diversity_score", 0),
+                water_features=json_data.get("environmental", {}).get("water_features", False),
+                natural_surface_percent=json_data.get("environmental", {}).get("natural_surface_percent", 0)
+            ),
+            accessibility=AccessibilityFeatures(
+                wheelchair_accessible=json_data.get("accessibility", {}).get("wheelchair_accessible", True),
+                public_transport_score=json_data.get("accessibility", {}).get("public_transport_score", 3),
+                cycling_infrastructure=json_data.get("accessibility", {}).get("cycling_infrastructure", True),
+                parking_availability=json_data.get("accessibility", {}).get("parking_availability", 2),
+                lighting_quality=json_data.get("accessibility", {}).get("lighting_quality", 3)
+            ),
+            recreation=RecreationFeatures(
+                playground_quality=json_data.get("recreation", {}).get("playground_quality", 0),
+                sports_facilities=json_data.get("recreation", {}).get("sports_facilities", False),
+                running_paths=json_data.get("recreation", {}).get("running_paths", True),
+                cycling_paths=json_data.get("recreation", {}).get("cycling_paths", True),
+                dog_friendly=json_data.get("recreation", {}).get("dog_friendly", True),
+                bbq_allowed=json_data.get("recreation", {}).get("bbq_allowed", False)
+            ),
+            nearby_amenities=[],
+            last_updated=datetime.fromisoformat(json_data.get("last_updated", datetime.now().isoformat())),
+            data_sources=json_data.get("data_sources", []),
+            confidence_score=json_data.get("confidence_score", 85)
+        )
+
    async def _get_mock_green_spaces(self) -> List[GreenSpace]:
-        """Get mock green spaces data for development."""
-        # This would be replaced with real data fetching in production
-        return [
-            GreenSpace(
-                id="tiergarten_1",
-                name="Tiergarten",
-                description="Berlin's most famous park in the heart of the city",
-                type=GreenSpaceType.PARK,
-                coordinates=Coordinates(lat=52.5145, lng=13.3501),
-                neighborhood="Mitte",
-                address="Tiergarten, 10557 Berlin",
-                area_sqm=210000,
-                perimeter_m=5800,
-                environmental=EnvironmentalFeatures(
-                    tree_coverage_percent=85,
-                    shade_quality=90,
-                    noise_level=NoiseLevel.MODERATE,
-                    wildlife_diversity_score=80,
-                    water_features=True,
-                    natural_surface_percent=95
-                ),
-                accessibility=AccessibilityFeatures(
-                    wheelchair_accessible=True,
-                    public_transport_score=5,
-                    cycling_infrastructure=True,
-                    parking_availability=3,
-                    lighting_quality=4
-                ),
-                recreation=RecreationFeatures(
-                    playground_quality=70,
-                    sports_facilities=True,
-                    running_paths=True,
-                    cycling_paths=True,
-                    dog_friendly=True,
-                    bbq_allowed=False
-                ),
-                nearby_amenities=[],
-                last_updated=datetime.now(),
-                data_sources=["berlin_open_data", "osm"],
-                confidence_score=95
-            ),
-            GreenSpace(
-                id="volkspark_friedrichshain",
-                name="Volkspark Friedrichshain",
-                description="Historic park with fairy tale fountain and sports facilities",
-                type=GreenSpaceType.PARK,
-                coordinates=Coordinates(lat=52.5263, lng=13.4317),
-                neighborhood="Friedrichshain",
-                address="Friedrichshain, 10249 Berlin",
-                area_sqm=49000,
-                perimeter_m=2800,
-                environmental=EnvironmentalFeatures(
-                    tree_coverage_percent=70,
-                    shade_quality=75,
-                    noise_level=NoiseLevel.QUIET,
-                    wildlife_diversity_score=65,
-                    water_features=True,
-                    natural_surface_percent=80
-                ),
-                accessibility=AccessibilityFeatures(
-                    wheelchair_accessible=True,
-                    public_transport_score=4,
-                    cycling_infrastructure=True,
-                    parking_availability=2,
-                    lighting_quality=3
-                ),
-                recreation=RecreationFeatures(
-                    playground_quality=85,
-                    sports_facilities=True,
-                    running_paths=True,
-                    cycling_paths=True,
-                    dog_friendly=True,
-                    bbq_allowed=True
-                ),
-                nearby_amenities=[],
-                last_updated=datetime.now(),
-                data_sources=["berlin_open_data", "osm"],
-                confidence_score=90
-            ),
-            GreenSpace(
-                id="tempelhofer_feld",
-                name="Tempelhofer Feld",
-                description="Former airport turned into unique urban park",
-                type=GreenSpaceType.PARK,
-                coordinates=Coordinates(lat=52.4732, lng=13.4015),
-                neighborhood="Tempelhof",
-                address="Tempelhofer Damm, 12101 Berlin",
-                area_sqm=300000,
-                perimeter_m=6200,
-                environmental=EnvironmentalFeatures(
-                    tree_coverage_percent=15,
-                    shade_quality=20,
-                    noise_level=NoiseLevel.MODERATE,
-                    wildlife_diversity_score=40,
-                    water_features=False,
-                    natural_surface_percent=60
-                ),
-                accessibility=AccessibilityFeatures(
-                    wheelchair_accessible=True,
-                    public_transport_score=4,
-                    cycling_infrastructure=True,
-                    parking_availability=4,
-                    lighting_quality=2
-                ),
-                recreation=RecreationFeatures(
-                    playground_quality=30,
-                    sports_facilities=False,
-                    running_paths=True,
-                    cycling_paths=True,
-                    dog_friendly=True,
-                    bbq_allowed=True
-                ),
-                nearby_amenities=[],
-                last_updated=datetime.now(),
-                data_sources=["berlin_open_data", "osm"],
-                confidence_score=85
-            )
-        ]
+        """Get green spaces data from JSON file."""
+        json_data = self._load_green_spaces()
+        green_spaces = []
+        
+        for space_data in json_data:
+            try:
+                green_space = self._convert_json_to_green_space(space_data)
+                green_spaces.append(green_space)
+            except Exception as e:
+                print(f"Error converting green space {space_data.get('id', 'unknown')}: {e}")
+                continue
+        
+        return green_spaces
--- a/app/services/street_tree_service.py
+++ b/app/services/street_tree_service.py
@ -4,6 +4,10 @@ from pathlib import Path
 from typing import List, Optional, Tuple, Dict, Any
 from datetime import datetime
 from geopy.distance import geodesic
+from rtree import index
+import asyncio
+import aiofiles
+from functools import lru_cache

 from app.models.street_tree import (
    StreetTree, TreeDensityMetrics, TreeShadeAnalysis, TreesSearchFilters,
@ -14,24 +18,58 @@ from app.models.green_space import Coordinates
 class StreetTreeService:
    """Service for accessing and analyzing Berlin street trees data."""
    
-    def __init__(self):
-        self._trees_cache = None
-        self._trees_index = None
-        self.data_dir = Path("app/data")
+    _instance = None
+    _initialized = False
    
-    def _load_trees(self) -> List[Dict]:
-        """Load street trees data from JSON file."""
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+    
+    def __init__(self):
+        if not self._initialized:
+            self._trees_cache = None
+            self._spatial_index = None
+            self._tree_id_to_data = {}
+            self.data_dir = Path("app/data")
+            self.__class__._initialized = True
+    
+    async def _load_trees(self) -> List[Dict]:
+        """Load street trees data from JSON file and build spatial index."""
        if self._trees_cache is None:
            trees_file = self.data_dir / "processed" / "street_trees.json"
            if trees_file.exists():
-                with open(trees_file, 'r', encoding='utf-8') as f:
-                    data = json.load(f)
+                print("🔄 Loading trees data and building spatial index...")
+                async with aiofiles.open(trees_file, 'r', encoding='utf-8') as f:
+                    content = await f.read()
+                    data = json.loads(content)
                    self._trees_cache = data.get("street_trees", [])
+                    await self._build_spatial_index()
+                    print(f"✅ Loaded {len(self._trees_cache)} trees with spatial index")
            else:
                print("Warning: street_trees.json not found. Run process_street_trees.py first.")
                self._trees_cache = []
        return self._trees_cache
    
+    async def _build_spatial_index(self):
+        """Build R-tree spatial index for fast location queries."""
+        if self._spatial_index is None and self._trees_cache:
+            print("🔨 Building spatial index...")
+            self._spatial_index = index.Index()
+            self._tree_id_to_data = {}
+            
+            for i, tree_data in enumerate(self._trees_cache):
+                lat = tree_data.get('lat')
+                lng = tree_data.get('lng')
+                
+                if lat is not None and lng is not None:
+                    # R-tree expects (minx, miny, maxx, maxy)
+                    bbox = (lng, lat, lng, lat)
+                    self._spatial_index.insert(i, bbox)
+                    self._tree_id_to_data[i] = tree_data
+            
+            print(f"✅ Spatial index built for {len(self._tree_id_to_data)} trees")
+    
    def _create_tree_from_dict(self, tree_data: Dict) -> StreetTree:
        """Convert tree dictionary to StreetTree model."""
        
@ -94,6 +132,11 @@ class StreetTreeService:
            last_updated=datetime.now()
        )
    
+    @lru_cache(maxsize=1000)
+    def _distance_cache(self, lat1: float, lng1: float, lat2: float, lng2: float) -> float:
+        """Cache distance calculations."""
+        return geodesic((lat1, lng1), (lat2, lng2)).meters
+    
    async def get_trees_near_location(
        self,
        lat: float,
@ -101,31 +144,48 @@ class StreetTreeService:
        radius_m: int = 500,
        limit: Optional[int] = None
    ) -> TreesNearLocationResponse:
-        """Get street trees within a radius of a location."""
+        """Get street trees within a radius of a location using spatial index."""
        start_time = datetime.now()
        
-        trees_data = self._load_trees()
+        await self._load_trees()
        nearby_trees = []
        
-        for tree_data in trees_data:
+        if self._spatial_index is None:
+            # Fallback to linear search if index failed
+            return await self._get_trees_linear_search(lat, lng, radius_m, limit)
+        
+        # Convert radius to approximate bounding box for R-tree query
+        # Rough approximation: 1 degree ≈ 111km
+        radius_deg = radius_m / 111000
+        bbox = (lng - radius_deg, lat - radius_deg, lng + radius_deg, lat + radius_deg)
+        
+        # Query spatial index for candidates
+        candidate_ids = list(self._spatial_index.intersection(bbox))
+        
+        # Filter candidates by exact distance
+        tree_distances = []
+        for tree_id in candidate_ids:
+            tree_data = self._tree_id_to_data.get(tree_id)
+            if not tree_data:
+                continue
+                
            tree_lat = tree_data.get('lat')
            tree_lng = tree_data.get('lng')
            
            if tree_lat is None or tree_lng is None:
                continue
            
-            distance = geodesic((lat, lng), (tree_lat, tree_lng)).meters
+            distance = self._distance_cache(lat, lng, tree_lat, tree_lng)
            if distance <= radius_m:
                tree = self._create_tree_from_dict(tree_data)
-                nearby_trees.append(tree)
+                tree_distances.append((tree, distance))
                
-                if limit and len(nearby_trees) >= limit:
+                if limit and len(tree_distances) >= limit:
                    break
        
        # Sort by distance
-        nearby_trees.sort(
-            key=lambda t: geodesic((lat, lng), (t.coordinates.lat, t.coordinates.lng)).meters
-        )
+        tree_distances.sort(key=lambda x: x[1])
+        nearby_trees = [tree for tree, _ in tree_distances]
        
        # Calculate metrics
        metrics = self._calculate_tree_density_metrics(nearby_trees, radius_m)
@ -212,7 +272,7 @@ class StreetTreeService:
        large_trees = []
        
        for tree in trees:
-            distance = geodesic((lat, lng), (tree.coordinates.lat, tree.coordinates.lng)).meters
+            distance = self._distance_cache(lat, lng, tree.coordinates.lat, tree.coordinates.lng)
            
            if distance <= 50:
                trees_50m += 1
@ -259,9 +319,58 @@ class StreetTreeService:
            canopy_density=len(large_trees) / max(1, len(trees)) if trees else 0
        )
    
+    async def _get_trees_linear_search(
+        self,
+        lat: float,
+        lng: float,
+        radius_m: int = 500,
+        limit: Optional[int] = None
+    ) -> TreesNearLocationResponse:
+        """Fallback linear search method."""
+        start_time = datetime.now()
+        
+        trees_data = await self._load_trees()
+        nearby_trees = []
+        
+        for tree_data in trees_data:
+            tree_lat = tree_data.get('lat')
+            tree_lng = tree_data.get('lng')
+            
+            if tree_lat is None or tree_lng is None:
+                continue
+            
+            distance = self._distance_cache(lat, lng, tree_lat, tree_lng)
+            if distance <= radius_m:
+                tree = self._create_tree_from_dict(tree_data)
+                nearby_trees.append(tree)
+                
+                if limit and len(nearby_trees) >= limit:
+                    break
+        
+        # Sort by distance
+        nearby_trees.sort(
+            key=lambda t: self._distance_cache(lat, lng, t.coordinates.lat, t.coordinates.lng)
+        )
+        
+        # Calculate metrics
+        metrics = self._calculate_tree_density_metrics(nearby_trees, radius_m)
+        shade_analysis = self._analyze_shade_coverage(lat, lng, nearby_trees)
+        
+        query_time = (datetime.now() - start_time).total_seconds() * 1000
+        
+        return TreesNearLocationResponse(
+            location=Coordinates(lat=lat, lng=lng),
+            radius_m=radius_m,
+            trees=nearby_trees,
+            metrics=metrics,
+            shade_analysis=shade_analysis,
+            total_found=len(nearby_trees),
+            query_time_ms=int(query_time)
+        )
+    
    async def search_trees(self, filters: TreesSearchFilters) -> List[StreetTree]:
        """Search trees with filters."""
-        trees_data = self._load_trees()
+        trees_data = await self._load_trees()
        filtered_trees = []
        
        for tree_data in trees_data:
@ -272,10 +381,10 @@ class StreetTreeService:
                if tree_lat is None or tree_lng is None:
                    continue
                
-                distance = geodesic(
-                    (filters.center_lat, filters.center_lng),
-                    (tree_lat, tree_lng)
-                ).meters
+                distance = self._distance_cache(
+                    filters.center_lat, filters.center_lng,
+                    tree_lat, tree_lng
+                )
                if distance > filters.within_radius_m:
                    continue
            
@ -305,7 +414,7 @@ class StreetTreeService:
    
    async def get_tree_stats(self) -> Dict[str, Any]:
        """Get overall statistics about Berlin street trees."""
-        trees_data = self._load_trees()
+        trees_data = await self._load_trees()
        
        if not trees_data:
            return {"error": "No tree data available"}
--- a/pyproject.toml
+++ b/pyproject.toml
@ -35,6 +35,8 @@ dependencies = [
    "redis>=5.0.0",
    "aiofiles>=23.2.0",
    "openpyxl>=3.1.5",
+    "tqdm>=4.67.1",
+    "rtree>=1.4.0",
 ]

 [project.optional-dependencies]
--- a/scripts/enhance_green_spaces_with_real_data.py
+++ b/scripts/enhance_green_spaces_with_real_data.py
@ -0,0 +1,467 @@
+#!/usr/bin/env python3
+"""
+Enhanced Berlin green space processor using existing tree and toilet services.
+Downloads OSM green space boundaries and enhances them with real data using existing services.
+"""
+
+import os
+import json
+import zipfile
+import requests
+import asyncio
+from pathlib import Path
+import geopandas as gpd
+import pandas as pd
+from datetime import datetime
+from typing import List, Dict, Optional
+import sys
+
+# Add the app directory to Python path to import services
+sys.path.append(str(Path(__file__).parent.parent))
+
+from app.services.street_tree_service import StreetTreeService
+from app.services.berlin_data_service import BerlinDataService
+
+
+class RealDataGreenSpaceProcessor:
+    def __init__(self, data_dir: str = "app/data"):
+        self.data_dir = Path(data_dir)
+        self.raw_dir = self.data_dir / "geo-raw"
+        self.processed_dir = self.data_dir / "processed"
+        
+        # Create directories
+        self.raw_dir.mkdir(parents=True, exist_ok=True)
+        self.processed_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Initialize existing services
+        self.tree_service = StreetTreeService()
+        self.berlin_data = BerlinDataService()
+        
+    def download_berlin_districts(self):
+        """Download Berlin district boundaries."""
+        json_file = self.raw_dir / "bezirksgrenzen.geojson"
+        
+        if json_file.exists():
+            print(f"Berlin district data already exists: {json_file}")
+            return json_file
+        
+        link = "https://tsb-opendata.s3.eu-central-1.amazonaws.com/bezirksgrenzen/bezirksgrenzen.geojson"
+        print(f"Downloading Berlin district data from {link}")
+        
+        try:
+            response = requests.get(link, timeout=30)
+            response.raise_for_status()
+            
+            with open(json_file, 'wb') as f:
+                f.write(response.content)
+            
+            print(f"Downloaded to {json_file}")
+            return json_file
+        except Exception as e:
+            print(f"Error downloading districts: {e}")
+            raise
+    
+    def download_osm_data(self):
+        """Download Berlin OpenStreetMap data."""
+        zip_file = self.raw_dir / "berlin_shapes.zip"
+        shp_dir = self.raw_dir / "berlin_shapes"
+        
+        # Check if already extracted
+        required_files = ["gis_osm_landuse_a_free_1.shp", "gis_osm_natural_a_free_1.shp", "gis_osm_leisure_a_free_1.shp"]
+        if all((shp_dir / f).exists() for f in required_files):
+            print(f"Berlin OSM data already exists: {shp_dir}")
+            return shp_dir
+        
+        if not zip_file.exists():
+            link = "https://download.geofabrik.de/europe/germany/berlin-latest-free.shp.zip"
+            print(f"Downloading Berlin OSM data from {link} (this may take several minutes...)")
+            
+            try:
+                response = requests.get(link, stream=True, timeout=300)  # 5 min timeout
+                response.raise_for_status()
+                
+                with open(zip_file, 'wb') as f:
+                    for chunk in response.iter_content(chunk_size=8192):
+                        f.write(chunk)
+                        
+                print(f"Download completed: {zip_file}")
+            except Exception as e:
+                print(f"Error downloading OSM data: {e}")
+                raise
+        
+        print(f"Extracting Berlin OSM data to {shp_dir}")
+        try:
+            with zipfile.ZipFile(zip_file, 'r') as zip_ref:
+                zip_ref.extractall(shp_dir)
+            print(f"Extracted to {shp_dir}")
+        except Exception as e:
+            print(f"Error extracting OSM data: {e}")
+            raise
+        
+        return shp_dir
+    
+    def load_osm_green_spaces(self):
+        """Load OSM green space polygons."""
+        print("Loading OSM green space boundaries...")
+        
+        # Download required data
+        districts_file = self.download_berlin_districts()
+        shp_dir = self.download_osm_data()
+        
+        # Load Berlin districts for clipping
+        districts = gpd.read_file(districts_file)
+        
+        # Define green space categories we want
+        green_categories = {
+            'landuse': ['forest', 'grass', 'meadow', 'recreation_ground', 'village_green', 'allotments'],
+            'natural': ['forest', 'grass', 'meadow', 'scrub', 'heath', 'wood'],
+            'leisure': ['park', 'garden', 'nature_reserve', 'playground', 'pitch', 'common', 'golf_course']
+        }
+        
+        all_green_spaces = []
+        
+        # Process each category
+        for category, subcategories in green_categories.items():
+            shapefile = shp_dir / f"gis_osm_{category}_a_free_1.shp"
+            
+            if not shapefile.exists():
+                print(f"Warning: {shapefile} not found, skipping")
+                continue
+            
+            print(f"Processing {category} data...")
+            try:
+                gdf = gpd.read_file(shapefile)
+                
+                # Filter to relevant subcategories
+                gdf_filtered = gdf[gdf['fclass'].isin(subcategories)].copy()
+                
+                if len(gdf_filtered) == 0:
+                    print(f"No {category} features found in subcategories")
+                    continue
+                
+                # Clip to Berlin boundaries
+                gdf_clipped = gpd.clip(gdf_filtered, districts)
+                
+                # Calculate area and filter out very small areas (< 1000 sqm)
+                gdf_clipped['area_sqm'] = gdf_clipped.geometry.area
+                gdf_clipped = gdf_clipped[gdf_clipped['area_sqm'] >= 1000]
+                
+                if len(gdf_clipped) > 0:
+                    all_green_spaces.append(gdf_clipped)
+                    print(f"Found {len(gdf_clipped)} {category} features")
+                
+            except Exception as e:
+                print(f"Error processing {category}: {e}")
+                continue
+        
+        if not all_green_spaces:
+            raise ValueError("No green space data found")
+        
+        # Combine all green spaces
+        green_spaces = gpd.GeoDataFrame(pd.concat(all_green_spaces, ignore_index=True))
+        
+        # Add district information
+        green_spaces = gpd.sjoin(green_spaces, districts[['Bezirk', 'geometry']], how='left')
+        
+        # Calculate centroids for analysis
+        green_spaces['centroid'] = green_spaces.geometry.centroid
+        green_spaces['centroid_lat'] = green_spaces.centroid.y
+        green_spaces['centroid_lng'] = green_spaces.centroid.x
+        
+        print(f"Total green spaces found: {len(green_spaces)}")
+        return green_spaces
+    
+    async def enhance_green_space_with_real_data(self, row):
+        """Enhance a single green space with real tree and toilet data."""
+        try:
+            lat = row['centroid_lat']
+            lng = row['centroid_lng']
+            area_sqm = int(row['area_sqm'])
+            
+            # Use existing tree service to get real tree data
+            tree_response = await self.tree_service.get_trees_near_location(
+                lat, lng, radius_m=min(400, int((area_sqm ** 0.5) * 1.5))  # Adaptive radius
+            )
+            
+            # Use existing toilet service to get real toilet data  
+            nearby_toilets = await self.berlin_data.get_toilets_near_point(lat, lng, 800)
+            
+            # Calculate toilet accessibility score
+            toilet_score = self._score_toilet_accessibility(nearby_toilets)
+            
+            # Map OSM type to our enum
+            space_type = self._map_osm_to_space_type(row.get('fclass', ''))
+            
+            # Generate ID
+            space_id = f"real_{row.get('fclass', 'unknown')}_{row.name}"
+            
+            # Create enhanced green space using real data
+            enhanced_space = {
+                "id": space_id,
+                "name": row.get('name') or f"{row.get('fclass', 'Green Space').title()} in {row.get('Bezirk', 'Berlin')}",
+                "description": f"Real Berlin {row.get('fclass', 'green space')} enhanced with tree and toilet data",
+                "type": space_type,
+                "coordinates": {
+                    "lat": float(lat),
+                    "lng": float(lng)
+                },
+                "neighborhood": row.get('Bezirk', 'Unknown'),
+                "area_sqm": area_sqm,
+                "perimeter_m": int(row.geometry.length) if hasattr(row.geometry, 'length') else 0,
+                
+                # Environmental features using real tree data
+                "environmental": {
+                    "tree_coverage_percent": max(5, int(tree_response.shade_analysis.estimated_shade_coverage)),
+                    "shade_quality": tree_response.shade_analysis.shade_quality_score,
+                    "noise_level": self._estimate_noise_level(row.get('fclass', ''), row.get('Bezirk', '')),
+                    "wildlife_diversity_score": tree_response.metrics.species_diversity_score,
+                    "water_features": 'water' in str(row.get('fclass', '')).lower() or 'river' in str(row.get('name', '')).lower(),
+                    "natural_surface_percent": self._estimate_natural_surface(row.get('fclass', ''))
+                },
+                
+                # Real tree metrics from existing service
+                "tree_data": {
+                    "total_trees": tree_response.metrics.total_trees,
+                    "trees_per_hectare": tree_response.metrics.trees_per_hectare,
+                    "species_count": len(tree_response.metrics.dominant_species),
+                    "species_diversity_score": tree_response.metrics.species_diversity_score,
+                    "mature_trees_count": tree_response.metrics.mature_trees_count,
+                    "young_trees_count": tree_response.metrics.young_trees_count,
+                    "average_tree_age": tree_response.metrics.average_tree_age,
+                    "average_height": tree_response.metrics.average_height,
+                    "average_crown_diameter": tree_response.metrics.average_crown_diameter,
+                    "shade_coverage_percent": tree_response.metrics.shade_coverage_percent,
+                    "dominant_species": tree_response.metrics.dominant_species
+                },
+                
+                # Real toilet accessibility from existing service
+                "toilet_accessibility": {
+                    "nearby_toilets_count": len(nearby_toilets),
+                    "accessibility_score": toilet_score,
+                    "nearest_distance_m": nearby_toilets[0]['distance_meters'] if nearby_toilets else None,
+                    "free_toilets_count": len([t for t in nearby_toilets if t.get('is_free', False)]),
+                    "accessible_toilets_count": len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
+                },
+                
+                # Standard accessibility features
+                "accessibility": {
+                    "wheelchair_accessible": True,
+                    "public_transport_score": 3,  # Could be enhanced with real transit data
+                    "cycling_infrastructure": area_sqm > 5000,
+                    "parking_availability": 2,
+                    "lighting_quality": 2
+                },
+                
+                # Recreation features based on OSM data and size
+                "recreation": {
+                    "playground_quality": self._estimate_playground_quality(row.get('fclass', ''), tree_response.metrics.total_trees),
+                    "sports_facilities": 'pitch' in str(row.get('fclass', '')).lower() or 'sport' in str(row.get('name', '')).lower(),
+                    "running_paths": area_sqm > 8000,
+                    "cycling_paths": area_sqm > 15000,
+                    "dog_friendly": True,
+                    "bbq_allowed": row.get('fclass') in ['park', 'recreation_ground'] and area_sqm > 5000
+                },
+                
+                "last_updated": datetime.now().isoformat(),
+                "data_sources": ["openstreetmap", "berlin_tree_cadastre", "berlin_toilets"],
+                "confidence_score": 95
+            }
+            
+            return enhanced_space
+            
+        except Exception as e:
+            print(f"Error enhancing green space {row.name}: {e}")
+            return None
+    
+    def _score_toilet_accessibility(self, nearby_toilets: List[Dict]) -> int:
+        """Score toilet accessibility using existing toilet data."""
+        if not nearby_toilets:
+            return 20
+        
+        nearest_distance = nearby_toilets[0]['distance_meters']
+        
+        # Distance-based scoring
+        if nearest_distance <= 200:
+            score = 100
+        elif nearest_distance <= 400:
+            score = 80
+        elif nearest_distance <= 600:
+            score = 60
+        else:
+            score = 40
+        
+        # Bonuses for quality
+        free_toilets = len([t for t in nearby_toilets if t.get('is_free', False)])
+        accessible_toilets = len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
+        
+        score += min(20, free_toilets * 5 + accessible_toilets * 3)
+        
+        return min(100, score)
+    
+    def _map_osm_to_space_type(self, fclass: str) -> str:
+        """Map OSM feature class to green space types."""
+        mapping = {
+            'park': 'PARK', 'forest': 'FOREST', 'garden': 'GARDEN',
+            'nature_reserve': 'NATURE_RESERVE', 'playground': 'PLAYGROUND',
+            'meadow': 'MEADOW', 'grass': 'GRASS', 'recreation_ground': 'PARK',
+            'wood': 'FOREST', 'heath': 'HEATH', 'pitch': 'SPORTS_AREA',
+            'golf_course': 'SPORTS_AREA', 'common': 'PARK', 'village_green': 'GRASS',
+            'allotments': 'GARDEN'
+        }
+        return mapping.get(fclass, 'PARK')
+    
+    def _estimate_noise_level(self, fclass: str, district: str) -> int:
+        """Estimate noise level (1=very quiet, 5=very noisy)."""
+        base_noise = {
+            'forest': 1, 'nature_reserve': 1, 'wood': 1,
+            'meadow': 2, 'grass': 2, 'heath': 2,
+            'park': 2, 'garden': 2, 'common': 2,
+            'recreation_ground': 3, 'playground': 3, 'pitch': 3,
+            'golf_course': 2, 'allotments': 2
+        }
+        
+        # Central districts are noisier
+        central_districts = ['Mitte', 'Kreuzberg', 'Friedrichshain']
+        district_modifier = 1 if district in central_districts else 0
+        
+        return min(5, base_noise.get(fclass, 2) + district_modifier)
+    
+    def _estimate_natural_surface(self, fclass: str) -> int:
+        """Estimate percentage of natural surface."""
+        surface_map = {
+            'forest': 95, 'nature_reserve': 95, 'wood': 95,
+            'meadow': 95, 'grass': 90, 'heath': 90,
+            'park': 75, 'garden': 65, 'common': 80,
+            'recreation_ground': 60, 'playground': 40, 'pitch': 20,
+            'golf_course': 70, 'allotments': 85
+        }
+        return surface_map.get(fclass, 70)
+    
+    def _estimate_playground_quality(self, fclass: str, tree_count: int) -> int:
+        """Estimate playground quality score."""
+        base_scores = {
+            'playground': 85,
+            'park': 65,
+            'recreation_ground': 70,
+            'garden': 40,
+            'common': 50
+        }
+        
+        base = base_scores.get(fclass, 25)
+        
+        # Trees improve playground appeal for families
+        tree_bonus = min(15, tree_count // 5)  # +3 per 5 trees, max 15
+        
+        return min(100, base + tree_bonus)
+    
+    async def process_all_green_spaces(self):
+        """Process all green spaces with real data enhancement."""
+        print("Starting enhanced green space processing with real data...")
+        
+        # Load OSM green space boundaries
+        osm_green_spaces = self.load_osm_green_spaces()
+        
+        enhanced_green_spaces = []
+        
+        print(f"Enhancing {len(osm_green_spaces)} green spaces with real tree and toilet data...")
+        
+        # Process in batches to avoid overwhelming the system
+        batch_size = 50
+        total_processed = 0
+        
+        for i in range(0, len(osm_green_spaces), batch_size):
+            batch = osm_green_spaces.iloc[i:i+batch_size]
+            batch_results = []
+            
+            for idx, row in batch.iterrows():
+                result = await self.enhance_green_space_with_real_data(row)
+                if result:
+                    batch_results.append(result)
+                
+                total_processed += 1
+                if total_processed % 25 == 0:
+                    print(f"Processed {total_processed}/{len(osm_green_spaces)} green spaces...")
+            
+            enhanced_green_spaces.extend(batch_results)
+            
+            # Small delay between batches
+            await asyncio.sleep(0.1)
+        
+        print(f"Successfully enhanced {len(enhanced_green_spaces)} green spaces with real data")
+        return enhanced_green_spaces
+    
+    def save_enhanced_data(self, enhanced_green_spaces: List[Dict]):
+        """Save enhanced green spaces to JSON file."""
+        output_file = self.processed_dir / "real_berlin_green_spaces.json"
+        
+        # Calculate summary statistics
+        spaces_with_trees = len([gs for gs in enhanced_green_spaces if gs["tree_data"]["total_trees"] > 0])
+        spaces_with_toilets = len([gs for gs in enhanced_green_spaces if gs["toilet_accessibility"]["nearby_toilets_count"] > 0])
+        total_trees = sum(gs["tree_data"]["total_trees"] for gs in enhanced_green_spaces)
+        avg_species_per_space = sum(gs["tree_data"]["species_count"] for gs in enhanced_green_spaces) / len(enhanced_green_spaces) if enhanced_green_spaces else 0
+        
+        data = {
+            "green_spaces": enhanced_green_spaces,
+            "total_count": len(enhanced_green_spaces),
+            "last_updated": datetime.now().isoformat(),
+            "data_sources": [
+                "openstreetmap_boundaries",
+                "berlin_tree_cadastre_via_service", 
+                "berlin_toilet_locations_via_service",
+                "berlin_districts"
+            ],
+            "processing_info": {
+                "script_version": "1.0",
+                "coordinate_system": "WGS84",
+                "uses_existing_services": True,
+                "tree_analysis_via": "StreetTreeService",
+                "toilet_analysis_via": "BerlinDataService"
+            },
+            "summary_stats": {
+                "spaces_with_trees": spaces_with_trees,
+                "spaces_with_nearby_toilets": spaces_with_toilets,
+                "total_trees_in_all_spaces": total_trees,
+                "average_species_per_space": round(avg_species_per_space, 1),
+                "coverage_percentage": {
+                    "with_tree_data": round((spaces_with_trees / len(enhanced_green_spaces)) * 100, 1) if enhanced_green_spaces else 0,
+                    "with_toilet_data": round((spaces_with_toilets / len(enhanced_green_spaces)) * 100, 1) if enhanced_green_spaces else 0
+                }
+            }
+        }
+        
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(data, f, indent=2, ensure_ascii=False)
+        
+        print(f"✅ Saved {len(enhanced_green_spaces)} enhanced green spaces to {output_file}")
+        print(f"📊 Summary:")
+        print(f"   - {spaces_with_trees} spaces have tree data ({round((spaces_with_trees/len(enhanced_green_spaces))*100, 1)}%)")
+        print(f"   - {spaces_with_toilets} spaces have nearby toilets ({round((spaces_with_toilets/len(enhanced_green_spaces))*100, 1)}%)")
+        print(f"   - {total_trees} total trees analyzed")
+        print(f"   - {avg_species_per_space:.1f} average species per space")
+        
+        return output_file
+
+
+async def main():
+    """Main processing function."""
+    processor = RealDataGreenSpaceProcessor()
+    
+    try:
+        # Process enhanced green spaces using existing services
+        enhanced_green_spaces = await processor.process_all_green_spaces()
+        
+        # Save enhanced data
+        output_file = processor.save_enhanced_data(enhanced_green_spaces)
+        
+        print(f"\n🎉 Successfully created real data enhanced Berlin green spaces!")
+        print(f"📁 Output: {output_file}")
+        
+    except KeyboardInterrupt:
+        print("\n⚠️  Processing interrupted by user")
+    except Exception as e:
+        print(f"❌ Error processing data: {e}")
+        raise
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/scripts/osm_filtered_processor.py
+++ b/scripts/osm_filtered_processor.py
@ -0,0 +1,613 @@
+#!/usr/bin/env python3
+"""
+Filtered OSM processor for significant Berlin green spaces.
+Processes only meaningful green spaces (>1000 sqm) with real tree and toilet data.
+"""
+
+import json
+import asyncio
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from datetime import datetime
+import sys
+import math
+from typing import List, Dict, Optional, Tuple
+
+# Add the app directory to Python path
+sys.path.append(str(Path(__file__).parent.parent))
+
+from app.services.street_tree_service import StreetTreeService
+from app.services.berlin_data_service import BerlinDataService
+
+
+class FilteredOSMProcessor:
+    def __init__(self, data_dir: str = "app/data"):
+        self.data_dir = Path(data_dir)
+        self.osm_raw_dir = self.data_dir / "osm-raw"
+        self.processed_dir = self.data_dir / "processed"
+        
+        # Initialize services
+        self.tree_service = StreetTreeService()
+        self.berlin_data = BerlinDataService()
+        
+        # Berlin bounding box
+        self.berlin_bbox = {
+            'min_lat': 52.3370, 'max_lat': 52.6755,
+            'min_lon': 13.0882, 'max_lon': 13.7611
+        }
+        
+        # Filtering criteria
+        self.min_area_sqm = 1000  # Minimum area to be considered significant
+        self.max_spaces = 800     # Maximum number of spaces to process
+    
+    def parse_and_filter_osm_data(self) -> List[Dict]:
+        """Parse OSM data and filter for significant green spaces."""
+        osm_file = self.osm_raw_dir / "berlin_green_spaces.osm"
+        
+        if not osm_file.exists():
+            print(f"❌ OSM file not found: {osm_file}")
+            print("Please run the download first or ensure the file exists.")
+            return []
+        
+        print(f"📂 Parsing OSM data from {osm_file}")
+        
+        try:
+            tree = ET.parse(osm_file)
+            root = tree.getroot()
+            ways = root.findall('.//way')
+            
+            print(f"📊 Found {len(ways)} total ways in OSM file")
+            print(f"🔍 Filtering for significant green spaces (≥{self.min_area_sqm} sqm)...")
+            
+            filtered_spaces = []
+            processed_count = 0
+            
+            for way in ways:
+                processed_count += 1
+                
+                if processed_count % 5000 == 0:
+                    print(f"   Processed {processed_count}/{len(ways)} ways... Found {len(filtered_spaces)} significant spaces")
+                
+                try:
+                    space_data = self._process_osm_way(way, root)
+                    if space_data and space_data['area_sqm'] >= self.min_area_sqm:
+                        filtered_spaces.append(space_data)
+                        
+                        # Stop if we have enough spaces
+                        if len(filtered_spaces) >= self.max_spaces:
+                            print(f"✅ Reached target of {self.max_spaces} significant spaces")
+                            break
+                            
+                except Exception as e:
+                    continue
+            
+            # Sort by area (largest first) to prioritize important spaces
+            filtered_spaces.sort(key=lambda x: x['area_sqm'], reverse=True)
+            
+            print(f"🎯 Filtered to {len(filtered_spaces)} significant green spaces")
+            print(f"📏 Area range: {filtered_spaces[-1]['area_sqm']:,} - {filtered_spaces[0]['area_sqm']:,} sqm")
+            
+            return filtered_spaces
+            
+        except Exception as e:
+            print(f"❌ Error parsing OSM file: {e}")
+            return []
+    
+    def _process_osm_way(self, way, root) -> Optional[Dict]:
+        """Process a single OSM way into green space format."""
+        # Get tags
+        tags = {}
+        for tag in way.findall('tag'):
+            tags[tag.get('k')] = tag.get('v')
+        
+        # Check if it's a significant green space
+        green_space_type = self._get_green_space_type(tags)
+        if not green_space_type:
+            return None
+        
+        # Skip certain types that are usually small or not parks
+        skip_types = ['grave_yard', 'cemetery', 'allotments']
+        if green_space_type in skip_types:
+            return None
+        
+        # Get node references
+        nd_refs = [nd.get('ref') for nd in way.findall('nd')]
+        if len(nd_refs) < 3:  # Need at least 3 points for an area
+            return None
+        
+        # Find node coordinates
+        coordinates = []
+        for nd_ref in nd_refs:
+            node = root.find(f".//node[@id='{nd_ref}']")
+            if node is not None:
+                lat = float(node.get('lat'))
+                lon = float(node.get('lon'))
+                
+                # Check if within Berlin bounds
+                if (self.berlin_bbox['min_lat'] <= lat <= self.berlin_bbox['max_lat'] and
+                    self.berlin_bbox['min_lon'] <= lon <= self.berlin_bbox['max_lon']):
+                    coordinates.append((lat, lon))
+        
+        if len(coordinates) < 3:
+            return None
+        
+        # Calculate centroid and area
+        centroid_lat, centroid_lon = self._calculate_centroid(coordinates)
+        area_sqm = self._calculate_area(coordinates)
+        
+        # Skip if too small
+        if area_sqm < self.min_area_sqm:
+            return None
+        
+        # Get name
+        name = tags.get('name')
+        if not name:
+            name = f"{green_space_type.title()} near {centroid_lat:.3f}, {centroid_lon:.3f}"
+        
+        # Estimate district
+        district = self._estimate_district(centroid_lat, centroid_lon)
+        
+        return {
+            'id': f"osm_way_{way.get('id')}",
+            'name': name,
+            'fclass': green_space_type,
+            'lat': centroid_lat,
+            'lng': centroid_lon,
+            'area_sqm': int(area_sqm),
+            'district': district,
+            'osm_tags': tags,
+            'osm_id': way.get('id'),
+            'has_name': bool(tags.get('name'))  # Track if it has a real name
+        }
+    
+    def _get_green_space_type(self, tags: Dict) -> Optional[str]:
+        """Determine if tags represent a significant green space."""
+        # Prioritize leisure tags (usually parks)
+        leisure = tags.get('leisure', '')
+        if leisure in ['park', 'garden', 'nature_reserve', 'recreation_ground', 'playground', 'common']:
+            return leisure
+        
+        # Check landuse tags
+        landuse = tags.get('landuse', '')
+        if landuse in ['forest', 'grass', 'meadow', 'recreation_ground', 'village_green']:
+            return landuse
+        
+        # Check natural tags (forests, etc.)
+        natural = tags.get('natural', '')
+        if natural in ['forest', 'wood', 'heath']:
+            return natural
+        
+        return None
+    
+    def _calculate_centroid(self, coordinates: List[Tuple[float, float]]) -> Tuple[float, float]:
+        """Calculate centroid of polygon."""
+        lat_sum = sum(coord[0] for coord in coordinates)
+        lon_sum = sum(coord[1] for coord in coordinates)
+        count = len(coordinates)
+        return lat_sum / count, lon_sum / count
+    
+    def _calculate_area(self, coordinates: List[Tuple[float, float]]) -> float:
+        """Calculate area using shoelace formula (approximate for Berlin)."""
+        if len(coordinates) < 3:
+            return 0
+        
+        # Convert to approximate meters for Berlin latitude
+        lat_to_m = 111000  # meters per degree latitude
+        lon_to_m = 111000 * math.cos(math.radians(52.5))  # adjust for Berlin
+        
+        # Convert to meters
+        coords_m = [(lat * lat_to_m, lon * lon_to_m) for lat, lon in coordinates]
+        
+        # Shoelace formula
+        area = 0
+        n = len(coords_m)
+        
+        for i in range(n):
+            j = (i + 1) % n
+            area += coords_m[i][0] * coords_m[j][1]
+            area -= coords_m[j][0] * coords_m[i][1]
+        
+        return abs(area) / 2
+    
+    def _estimate_district(self, lat: float, lng: float) -> str:
+        """Estimate Berlin district from coordinates."""
+        # Simplified district boundaries
+        if lat > 52.55:
+            return "Pankow" if lng < 13.45 else "Lichtenberg"
+        elif lat > 52.52:
+            if lng < 13.25:
+                return "Charlottenburg-Wilmersdorf"
+            elif lng < 13.42:
+                return "Mitte"
+            else:
+                return "Friedrichshain-Kreuzberg"
+        elif lat > 52.45:
+            if lng < 13.25:
+                return "Steglitz-Zehlendorf"
+            elif lng < 13.42:
+                return "Tempelhof-Schöneberg"
+            else:
+                return "Neukölln"
+        else:
+            return "Treptow-Köpenick"
+    
+    async def enhance_green_space_with_real_data(self, space_data: Dict):
+        """Enhance green space with real tree and toilet data."""
+        try:
+            lat = space_data['lat']
+            lng = space_data['lng']
+            area_sqm = space_data['area_sqm']
+            
+            # Adaptive radius based on space size
+            radius = min(400, max(150, int((area_sqm ** 0.5) * 0.8)))
+            
+            # Get real data using existing services
+            tree_response = await self.tree_service.get_trees_near_location(
+                lat, lng, radius_m=radius
+            )
+            
+            nearby_toilets = await self.berlin_data.get_toilets_near_point(lat, lng, 600)
+            
+            # Calculate scores
+            toilet_score = self._score_toilet_accessibility(nearby_toilets)
+            space_type = self._map_to_space_type(space_data.get('fclass', ''))
+            
+            enhanced_space = {
+                "id": space_data['id'],
+                "name": space_data['name'],
+                "description": f"Significant Berlin {space_data.get('fclass', 'green space')} from OSM data",
+                "type": space_type,
+                "coordinates": {
+                    "lat": float(lat),
+                    "lng": float(lng)
+                },
+                "neighborhood": space_data.get('district', 'Unknown'),
+                "area_sqm": area_sqm,
+                "perimeter_m": int(4 * (area_sqm ** 0.5)),  # Rough estimate
+                
+                # Environmental features from real tree data
+                "environmental": {
+                    "tree_coverage_percent": max(5, int(tree_response.shade_analysis.estimated_shade_coverage)),
+                    "shade_quality": tree_response.shade_analysis.shade_quality_score,
+                    "noise_level": self._estimate_noise_level(space_data),
+                    "wildlife_diversity_score": tree_response.metrics.species_diversity_score,
+                    "water_features": self._detect_water_features(space_data),
+                    "natural_surface_percent": self._estimate_natural_surface(space_data.get('fclass', ''))
+                },
+                
+                # Real tree metrics from your existing service
+                "tree_data": {
+                    "total_trees": tree_response.metrics.total_trees,
+                    "trees_per_hectare": tree_response.metrics.trees_per_hectare,
+                    "species_count": len(tree_response.metrics.dominant_species),
+                    "species_diversity_score": tree_response.metrics.species_diversity_score,
+                    "mature_trees_count": tree_response.metrics.mature_trees_count,
+                    "young_trees_count": tree_response.metrics.young_trees_count,
+                    "average_tree_age": tree_response.metrics.average_tree_age,
+                    "average_height": tree_response.metrics.average_height,
+                    "average_crown_diameter": tree_response.metrics.average_crown_diameter,
+                    "shade_coverage_percent": tree_response.metrics.shade_coverage_percent,
+                    "dominant_species": tree_response.metrics.dominant_species[:3]  # Top 3
+                },
+                
+                # Real toilet accessibility from your existing service
+                "toilet_accessibility": {
+                    "nearby_toilets_count": len(nearby_toilets),
+                    "accessibility_score": toilet_score,
+                    "nearest_distance_m": nearby_toilets[0]['distance_meters'] if nearby_toilets else None,
+                    "free_toilets_count": len([t for t in nearby_toilets if t.get('is_free', False)]),
+                    "accessible_toilets_count": len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
+                },
+                
+                # Accessibility features
+                "accessibility": {
+                    "wheelchair_accessible": True,
+                    "public_transport_score": self._estimate_transport_score(space_data.get('district', '')),
+                    "cycling_infrastructure": area_sqm > 5000,
+                    "parking_availability": 3 if area_sqm > 50000 else 2,
+                    "lighting_quality": 3 if 'mitte' in space_data.get('district', '').lower() else 2
+                },
+                
+                "recreation": {
+                    "playground_quality": self._estimate_playground_quality(space_data),
+                    "sports_facilities": self._estimate_sports_facilities(space_data),
+                    "running_paths": area_sqm > 8000,
+                    "cycling_paths": area_sqm > 15000,
+                    "dog_friendly": True,
+                    "bbq_allowed": self._allows_bbq(space_data)
+                },
+                
+                # OSM metadata
+                "osm_metadata": {
+                    "osm_id": space_data.get('osm_id'),
+                    "has_official_name": space_data.get('has_name', False),
+                    "tags": space_data.get('osm_tags', {}),
+                    "source": "filtered_osm_extract"
+                },
+                
+                "last_updated": datetime.now().isoformat(),
+                "data_sources": ["filtered_osm_extract", "berlin_tree_cadastre", "berlin_toilets"],
+                "confidence_score": 95 if space_data.get('has_name') else 85
+            }
+            
+            return enhanced_space
+            
+        except Exception as e:
+            print(f"❌ Error enhancing {space_data['name']}: {e}")
+            return None
+    
+    def _score_toilet_accessibility(self, nearby_toilets: List[Dict]) -> int:
+        if not nearby_toilets:
+            return 25
+        
+        nearest = nearby_toilets[0]['distance_meters']
+        if nearest <= 200:
+            score = 95
+        elif nearest <= 400:
+            score = 80
+        elif nearest <= 600:
+            score = 65
+        else:
+            score = 45
+        
+        # Quality bonuses
+        free = len([t for t in nearby_toilets if t.get('is_free', False)])
+        accessible = len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
+        score += min(10, free * 5 + accessible * 3)
+        
+        return min(100, score)
+    
+    def _map_to_space_type(self, fclass: str) -> str:
+        mapping = {
+            'park': 'PARK', 'forest': 'FOREST', 'garden': 'GARDEN', 'wood': 'FOREST',
+            'nature_reserve': 'NATURE_RESERVE', 'playground': 'PLAYGROUND',
+            'meadow': 'MEADOW', 'grass': 'GRASS', 'recreation_ground': 'PARK',
+            'common': 'PARK', 'village_green': 'GRASS', 'heath': 'HEATH'
+        }
+        return mapping.get(fclass, 'PARK')
+    
+    def _detect_water_features(self, space_data: Dict) -> bool:
+        name = space_data.get('name', '').lower()
+        tags = space_data.get('osm_tags', {})
+        
+        water_keywords = ['see', 'teich', 'pond', 'lake', 'bach', 'spree', 'wasser', 'fluss']
+        return (any(keyword in name for keyword in water_keywords) or 
+                'water' in str(tags.values()).lower())
+    
+    def _estimate_noise_level(self, space_data: Dict) -> int:
+        fclass = space_data.get('fclass', '')
+        district = space_data.get('district', '')
+        area = space_data.get('area_sqm', 0)
+        
+        base = {'forest': 1, 'wood': 1, 'nature_reserve': 1, 'heath': 1,
+               'meadow': 2, 'grass': 2, 'park': 2, 'garden': 2, 
+               'playground': 3, 'recreation_ground': 3}.get(fclass, 2)
+        
+        # Central districts are noisier
+        if any(busy in district.lower() for busy in ['mitte', 'kreuzberg', 'friedrichshain']):
+            base += 1
+        
+        # Larger spaces are usually quieter inside
+        if area > 50000:
+            base = max(1, base - 1)
+        
+        return min(5, base)
+    
+    def _estimate_natural_surface(self, fclass: str) -> int:
+        return {'forest': 95, 'wood': 95, 'nature_reserve': 95, 'heath': 90,
+               'meadow': 95, 'grass': 90, 'park': 80, 'garden': 70, 
+               'playground': 45, 'recreation_ground': 75}.get(fclass, 75)
+    
+    def _estimate_transport_score(self, district: str) -> int:
+        district_lower = district.lower()
+        if 'mitte' in district_lower:
+            return 5
+        elif any(name in district_lower for name in ['charlottenburg', 'kreuzberg', 'friedrichshain', 'pankow']):
+            return 4
+        else:
+            return 3
+    
+    def _estimate_playground_quality(self, space_data: Dict) -> int:
+        fclass = space_data.get('fclass', '')
+        tags = space_data.get('osm_tags', {})
+        area = space_data.get('area_sqm', 0)
+        
+        if fclass == 'playground':
+            return 85
+        elif 'playground' in str(tags.values()).lower():
+            return 75
+        elif fclass == 'park':
+            # Larger parks more likely to have good playgrounds
+            return 60 if area > 10000 else 45
+        else:
+            return 30
+    
+    def _estimate_sports_facilities(self, space_data: Dict) -> bool:
+        fclass = space_data.get('fclass', '')
+        tags = space_data.get('osm_tags', {})
+        name = space_data.get('name', '').lower()
+        area = space_data.get('area_sqm', 0)
+        
+        # Explicit indicators
+        if (fclass == 'recreation_ground' or 
+            'sport' in str(tags.values()).lower() or
+            any(term in name for term in ['sport', 'football', 'tennis', 'recreation'])):
+            return True
+        
+        # Large parks often have sports facilities
+        return fclass == 'park' and area > 20000
+    
+    def _allows_bbq(self, space_data: Dict) -> bool:
+        fclass = space_data.get('fclass', '')
+        tags = space_data.get('osm_tags', {})
+        area = space_data.get('area_sqm', 0)
+        
+        # Check explicit BBQ tags
+        bbq_tag = tags.get('bbq', '').lower()
+        if bbq_tag == 'yes':
+            return True
+        elif bbq_tag == 'no':
+            return False
+        
+        # Default based on type and size
+        return fclass in ['park', 'recreation_ground'] and area > 5000
+    
+    async def process_filtered_green_spaces(self):
+        """Main processing pipeline for filtered green spaces."""
+        print("🌳 Processing Significant Berlin Green Spaces")
+        print("=" * 55)
+        print(f"• Filtering for spaces ≥ {self.min_area_sqm:,} sqm")
+        print(f"• Processing up to {self.max_spaces} significant spaces")
+        print(f"• Enhancing with real Berlin tree + toilet data")
+        print("=" * 55)
+        
+        # Step 1: Parse and filter OSM data
+        filtered_spaces = self.parse_and_filter_osm_data()
+        
+        if not filtered_spaces:
+            print("❌ No significant green spaces found")
+            return []
+        
+        print(f"\n🔧 Enhancing {len(filtered_spaces)} significant spaces with real data...")
+        
+        # Step 2: Enhance with real data
+        enhanced_spaces = []
+        
+        for i, space_data in enumerate(filtered_spaces, 1):
+            area_ha = space_data['area_sqm'] / 10000
+            print(f"[{i:3d}/{len(filtered_spaces)}] {space_data['name'][:40]:40} ({area_ha:.1f} ha)")
+            
+            result = await self.enhance_green_space_with_real_data(space_data)
+            if result:
+                enhanced_spaces.append(result)
+                trees = result["tree_data"]["total_trees"]
+                toilets = result["toilet_accessibility"]["nearby_toilets_count"]
+                print(f"              ✅ {trees:3d} trees, {toilets} toilets")
+            else:
+                print(f"              ❌ Enhancement failed")
+            
+            # Progress update every 50 spaces
+            if i % 50 == 0:
+                print(f"\n   📊 Progress: {len(enhanced_spaces)}/{i} enhanced successfully")
+            
+            # Small delay to be nice to services
+            await asyncio.sleep(0.1)
+        
+        print(f"\n🎉 Successfully enhanced {len(enhanced_spaces)} significant green spaces!")
+        return enhanced_spaces
+    
+    def save_enhanced_data(self, enhanced_spaces: List[Dict]):
+        """Save the filtered and enhanced dataset."""
+        output_file = self.processed_dir / "significant_berlin_green_spaces.json"
+        
+        # Calculate comprehensive statistics
+        with_trees = len([s for s in enhanced_spaces if s["tree_data"]["total_trees"] > 0])
+        with_toilets = len([s for s in enhanced_spaces if s["toilet_accessibility"]["nearby_toilets_count"] > 0])
+        total_trees = sum(s["tree_data"]["total_trees"] for s in enhanced_spaces)
+        total_area = sum(s["area_sqm"] for s in enhanced_spaces)
+        
+        # Named vs unnamed spaces
+        named_spaces = len([s for s in enhanced_spaces if s["osm_metadata"]["has_official_name"]])
+        
+        # Area distribution
+        large_spaces = len([s for s in enhanced_spaces if s["area_sqm"] > 50000])  # > 5 hectares
+        medium_spaces = len([s for s in enhanced_spaces if 10000 <= s["area_sqm"] <= 50000])  # 1-5 hectares
+        small_spaces = len([s for s in enhanced_spaces if s["area_sqm"] < 10000])  # < 1 hectare
+        
+        # District breakdown
+        by_district = {}
+        for space in enhanced_spaces:
+            district = space['neighborhood']
+            if district not in by_district:
+                by_district[district] = []
+            by_district[district].append(space)
+        
+        data = {
+            "green_spaces": enhanced_spaces,
+            "total_count": len(enhanced_spaces),
+            "last_updated": datetime.now().isoformat(),
+            "data_sources": [
+                "filtered_osm_extract_significant_spaces_only",
+                "berlin_tree_cadastre_via_street_tree_service", 
+                "berlin_toilet_locations_via_berlin_data_service"
+            ],
+            "processing_info": {
+                "filtering_criteria": {
+                    "minimum_area_sqm": self.min_area_sqm,
+                    "maximum_spaces_processed": self.max_spaces,
+                    "includes_only_significant_spaces": True
+                },
+                "enhancement_method": "real_berlin_tree_and_toilet_data",
+                "coordinate_system": "WGS84"
+            },
+            "summary_stats": {
+                "total_spaces": len(enhanced_spaces),
+                "spaces_with_tree_data": with_trees,
+                "spaces_with_toilet_data": with_toilets,
+                "total_trees_analyzed": total_trees,
+                "total_area_hectares": round(total_area / 10000, 1),
+                "coverage_rates": {
+                    "tree_data": f"{round((with_trees/len(enhanced_spaces))*100, 1)}%",
+                    "toilet_data": f"{round((with_toilets/len(enhanced_spaces))*100, 1)}%"
+                },
+                "space_categories": {
+                    "named_spaces": named_spaces,
+                    "unnamed_spaces": len(enhanced_spaces) - named_spaces,
+                    "large_spaces_over_5ha": large_spaces,
+                    "medium_spaces_1_5ha": medium_spaces,
+                    "smaller_spaces_under_1ha": small_spaces
+                }
+            },
+            "district_breakdown": {
+                district: len(spaces) for district, spaces in by_district.items()
+            }
+        }
+        
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(data, f, indent=2, ensure_ascii=False)
+        
+        print(f"\n📁 Comprehensive dataset saved: {output_file}")
+        print(f"\n📊 Final Statistics:")
+        print(f"   🌳 {len(enhanced_spaces)} significant green spaces")
+        print(f"   📛 {named_spaces} with official names, {len(enhanced_spaces) - named_spaces} discovered areas")
+        print(f"   🌲 {with_trees} spaces with tree data ({round((with_trees/len(enhanced_spaces))*100)}%)")
+        print(f"   🚻 {with_toilets} spaces with toilet data ({round((with_toilets/len(enhanced_spaces))*100)}%)")
+        print(f"   🌿 {total_trees:,} total trees analyzed")
+        print(f"   📏 {round(total_area/10000, 1)} hectares total area")
+        
+        print(f"\n🏙️ District Distribution:")
+        for district, spaces in sorted(by_district.items(), key=lambda x: len(x[1]), reverse=True):
+            print(f"   • {district}: {len(spaces)} spaces")
+        
+        print(f"\n📈 Size Categories:")
+        print(f"   • Large (>5 ha): {large_spaces} spaces")
+        print(f"   • Medium (1-5 ha): {medium_spaces} spaces") 
+        print(f"   • Smaller (<1 ha): {small_spaces} spaces")
+        
+        print(f"\n✨ This dataset provides comprehensive coverage of Berlin's")
+        print(f"    significant green spaces with real tree and toilet data!")
+        
+        return output_file
+
+
+async def main():
+    processor = FilteredOSMProcessor()
+    
+    try:
+        enhanced_spaces = await processor.process_filtered_green_spaces()
+        
+        if enhanced_spaces:
+            processor.save_enhanced_data(enhanced_spaces)
+            print(f"\n🎯 SUCCESS! Ready to use in your API for accurate personality scoring!")
+        else:
+            print("❌ No spaces were successfully processed.")
+        
+    except KeyboardInterrupt:
+        print("\n⚠️ Process interrupted by user")
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        raise
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/scripts/osm_local_processor.py
+++ b/scripts/osm_local_processor.py
@ -0,0 +1,613 @@
+#!/usr/bin/env python3
+"""
+Process Berlin green spaces from local OSM data file.
+Downloads Berlin OSM extract once, then processes locally without API dependencies.
+"""
+
+import json
+import requests
+import asyncio
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from datetime import datetime
+from typing import List, Dict, Optional, Tuple
+import sys
+import gzip
+import math
+
+# Add the app directory to Python path to import services
+sys.path.append(str(Path(__file__).parent.parent))
+
+from app.services.street_tree_service import StreetTreeService
+from app.services.berlin_data_service import BerlinDataService
+
+
+class LocalOSMProcessor:
+    def __init__(self, data_dir: str = "app/data"):
+        self.data_dir = Path(data_dir)
+        self.raw_dir = self.data_dir / "osm-raw"
+        self.processed_dir = self.data_dir / "processed"
+        
+        # Create directories
+        self.raw_dir.mkdir(parents=True, exist_ok=True)
+        self.processed_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Initialize existing services
+        self.tree_service = StreetTreeService()
+        self.berlin_data = BerlinDataService()
+        
+        # Berlin bounding box for filtering
+        self.berlin_bbox = {
+            'min_lat': 52.3370, 'max_lat': 52.6755,
+            'min_lon': 13.0882, 'max_lon': 13.7611
+        }
+    
+    def download_berlin_osm_extract(self):
+        """Download Berlin OSM extract from Geofabrik."""
+        osm_file = self.raw_dir / "berlin-latest.osm.pbf"
+        
+        if osm_file.exists():
+            print(f"✅ OSM file already exists: {osm_file}")
+            return osm_file
+        
+        # Try PBF format first (smaller), fallback to XML
+        urls = [
+            "https://download.geofabrik.de/europe/germany/berlin-latest.osm.pbf",
+            "https://download.geofabrik.de/europe/germany/berlin-latest.osm.bz2"
+        ]
+        
+        for url in urls:
+            try:
+                print(f"Downloading Berlin OSM data from {url}")
+                print("This is a one-time download (~50MB)...")
+                
+                response = requests.get(url, stream=True, timeout=300)
+                response.raise_for_status()
+                
+                filename = url.split('/')[-1]
+                local_file = self.raw_dir / filename
+                
+                # Download with progress
+                total_size = int(response.headers.get('content-length', 0))
+                downloaded = 0
+                
+                with open(local_file, 'wb') as f:
+                    for chunk in response.iter_content(chunk_size=8192):
+                        if chunk:
+                            f.write(chunk)
+                            downloaded += len(chunk)
+                            if total_size > 0:
+                                percent = (downloaded / total_size) * 100
+                                print(f"\rDownload progress: {percent:.1f}%", end="")
+                
+                print(f"\n✅ Downloaded: {local_file}")
+                return local_file
+                
+            except Exception as e:
+                print(f"❌ Failed to download {url}: {e}")
+                continue
+        
+        raise Exception("Could not download OSM data from any source")
+    
+    def download_simple_osm_extract(self):
+        """Download simpler XML format if PBF tools not available."""
+        osm_file = self.raw_dir / "berlin_green_spaces.osm"
+        
+        if osm_file.exists():
+            print(f"✅ OSM file already exists: {osm_file}")
+            return osm_file
+        
+        # Use Overpass API to get a one-time export of green spaces
+        print("Downloading Berlin green spaces extract...")
+        
+        overpass_url = "http://overpass-api.de/api/interpreter"
+        
+        # Query for all green spaces in Berlin (one-time download)
+        query = f"""
+        [out:xml][timeout:120];
+        (
+          way["leisure"~"^(park|garden|nature_reserve|recreation_ground|playground|common)$"]
+             ({self.berlin_bbox['min_lat']},{self.berlin_bbox['min_lon']},{self.berlin_bbox['max_lat']},{self.berlin_bbox['max_lon']});
+          way["landuse"~"^(forest|grass|meadow|recreation_ground|village_green|allotments)$"]
+             ({self.berlin_bbox['min_lat']},{self.berlin_bbox['min_lon']},{self.berlin_bbox['max_lat']},{self.berlin_bbox['max_lon']});
+          way["natural"~"^(forest|grass|meadow|scrub|heath|wood)$"]
+             ({self.berlin_bbox['min_lat']},{self.berlin_bbox['min_lon']},{self.berlin_bbox['max_lat']},{self.berlin_bbox['max_lon']});
+        );
+        out geom meta;
+        """
+        
+        try:
+            response = requests.post(overpass_url, data=query, timeout=180)
+            response.raise_for_status()
+            
+            with open(osm_file, 'w', encoding='utf-8') as f:
+                f.write(response.text)
+            
+            print(f"✅ Downloaded green spaces extract: {osm_file}")
+            return osm_file
+            
+        except Exception as e:
+            print(f"❌ Failed to download OSM extract: {e}")
+            raise
+    
+    def parse_osm_xml(self, osm_file: Path) -> List[Dict]:
+        """Parse OSM XML file to extract green spaces."""
+        print(f"Parsing OSM data from {osm_file}...")
+        
+        green_spaces = []
+        
+        try:
+            # Handle different file formats
+            if osm_file.suffix == '.gz':
+                with gzip.open(osm_file, 'rt', encoding='utf-8') as f:
+                    tree = ET.parse(f)
+            else:
+                tree = ET.parse(osm_file)
+            
+            root = tree.getroot()
+            
+            # Parse ways (areas)
+            ways = root.findall('.//way')
+            print(f"Found {len(ways)} ways in OSM data")
+            
+            for way in ways:
+                try:
+                    processed_space = self._process_osm_way(way, root)
+                    if processed_space:
+                        green_spaces.append(processed_space)
+                except Exception as e:
+                    continue
+            
+            print(f"✅ Extracted {len(green_spaces)} green spaces from OSM data")
+            return green_spaces
+            
+        except Exception as e:
+            print(f"❌ Error parsing OSM file: {e}")
+            return []
+    
+    def _process_osm_way(self, way, root) -> Optional[Dict]:
+        """Process a single OSM way into green space format."""
+        # Get tags
+        tags = {}
+        for tag in way.findall('tag'):
+            tags[tag.get('k')] = tag.get('v')
+        
+        # Check if it's a green space
+        green_space_type = self._get_green_space_type(tags)
+        if not green_space_type:
+            return None
+        
+        # Get node references
+        nd_refs = [nd.get('ref') for nd in way.findall('nd')]
+        if len(nd_refs) < 3:  # Need at least 3 points for an area
+            return None
+        
+        # Find node coordinates
+        coordinates = []
+        for nd_ref in nd_refs:
+            node = root.find(f".//node[@id='{nd_ref}']")
+            if node is not None:
+                lat = float(node.get('lat'))
+                lon = float(node.get('lon'))
+                
+                # Check if within Berlin bounds
+                if (self.berlin_bbox['min_lat'] <= lat <= self.berlin_bbox['max_lat'] and
+                    self.berlin_bbox['min_lon'] <= lon <= self.berlin_bbox['max_lon']):
+                    coordinates.append((lat, lon))
+        
+        if len(coordinates) < 3:
+            return None
+        
+        # Calculate centroid and area
+        centroid_lat, centroid_lon = self._calculate_centroid(coordinates)
+        area_sqm = self._calculate_area(coordinates)
+        
+        # Skip very small areas
+        if area_sqm < 500:
+            return None
+        
+        # Get name
+        name = tags.get('name', f"{green_space_type.title()} near {centroid_lat:.3f}, {centroid_lon:.3f}")
+        
+        # Estimate district
+        district = self._estimate_district(centroid_lat, centroid_lon)
+        
+        return {
+            'id': f"osm_way_{way.get('id')}",
+            'name': name,
+            'fclass': green_space_type,
+            'lat': centroid_lat,
+            'lng': centroid_lon,
+            'area_sqm': int(area_sqm),
+            'district': district,
+            'osm_tags': tags,
+            'osm_id': way.get('id')
+        }
+    
+    def _get_green_space_type(self, tags: Dict) -> Optional[str]:
+        """Determine if tags represent a green space and what type."""
+        # Check leisure tags
+        leisure = tags.get('leisure', '')
+        if leisure in ['park', 'garden', 'nature_reserve', 'recreation_ground', 
+                      'playground', 'common', 'golf_course']:
+            return leisure
+        
+        # Check landuse tags
+        landuse = tags.get('landuse', '')
+        if landuse in ['forest', 'grass', 'meadow', 'recreation_ground', 
+                      'village_green', 'allotments']:
+            return landuse
+        
+        # Check natural tags
+        natural = tags.get('natural', '')
+        if natural in ['forest', 'grass', 'meadow', 'scrub', 'heath', 'wood']:
+            return natural
+        
+        return None
+    
+    def _calculate_centroid(self, coordinates: List[Tuple[float, float]]) -> Tuple[float, float]:
+        """Calculate centroid of polygon."""
+        lat_sum = sum(coord[0] for coord in coordinates)
+        lon_sum = sum(coord[1] for coord in coordinates)
+        count = len(coordinates)
+        
+        return lat_sum / count, lon_sum / count
+    
+    def _calculate_area(self, coordinates: List[Tuple[float, float]]) -> float:
+        """Calculate area of polygon using shoelace formula."""
+        if len(coordinates) < 3:
+            return 0
+        
+        # Convert to approximate meters for Berlin
+        lat_to_m = 111000  # meters per degree latitude
+        lon_to_m = 111000 * math.cos(math.radians(52.5))  # adjust for Berlin latitude
+        
+        # Convert coordinates to meters
+        coords_m = [(lat * lat_to_m, lon * lon_to_m) for lat, lon in coordinates]
+        
+        # Shoelace formula
+        area = 0
+        n = len(coords_m)
+        
+        for i in range(n):
+            j = (i + 1) % n
+            area += coords_m[i][0] * coords_m[j][1]
+            area -= coords_m[j][0] * coords_m[i][1]
+        
+        return abs(area) / 2
+    
+    def _estimate_district(self, lat: float, lng: float) -> str:
+        """Rough district estimation from coordinates."""
+        # Very rough Berlin district boundaries
+        if lat > 52.55:
+            return "Pankow" if lng < 13.45 else "Lichtenberg"
+        elif lat > 52.52:
+            if lng < 13.25:
+                return "Charlottenburg-Wilmersdorf"
+            elif lng < 13.42:
+                return "Mitte"
+            else:
+                return "Friedrichshain-Kreuzberg"
+        elif lat > 52.45:
+            if lng < 13.25:
+                return "Steglitz-Zehlendorf"
+            elif lng < 13.42:
+                return "Tempelhof-Schöneberg"
+            else:
+                return "Neukölln"
+        else:
+            return "Treptow-Köpenick"
+    
+    async def enhance_green_space_with_real_data(self, space_data: Dict):
+        """Enhance green space with real tree and toilet data."""
+        try:
+            lat = space_data['lat']
+            lng = space_data['lng']
+            area_sqm = space_data['area_sqm']
+            
+            print(f"Enhancing {space_data['name']} ({space_data['district']})...")
+            
+            # Adaptive radius
+            radius = min(350, max(100, int((area_sqm ** 0.5) * 0.7)))
+            
+            # Get real data using existing services
+            tree_response = await self.tree_service.get_trees_near_location(
+                lat, lng, radius_m=radius
+            )
+            
+            nearby_toilets = await self.berlin_data.get_toilets_near_point(lat, lng, 600)
+            
+            # Calculate scores
+            toilet_score = self._score_toilet_accessibility(nearby_toilets)
+            space_type = self._map_to_space_type(space_data.get('fclass', ''))
+            
+            enhanced_space = {
+                "id": space_data['id'],
+                "name": space_data['name'],
+                "description": f"Berlin {space_data.get('fclass', 'green space')} from local OSM data",
+                "type": space_type,
+                "coordinates": {
+                    "lat": float(lat),
+                    "lng": float(lng)
+                },
+                "neighborhood": space_data.get('district', 'Unknown'),
+                "area_sqm": area_sqm,
+                "perimeter_m": int(4 * (area_sqm ** 0.5)),
+                
+                # Environmental features from real tree data
+                "environmental": {
+                    "tree_coverage_percent": max(5, int(tree_response.shade_analysis.estimated_shade_coverage)),
+                    "shade_quality": tree_response.shade_analysis.shade_quality_score,
+                    "noise_level": self._estimate_noise_level(space_data),
+                    "wildlife_diversity_score": tree_response.metrics.species_diversity_score,
+                    "water_features": self._detect_water_features(space_data),
+                    "natural_surface_percent": self._estimate_natural_surface(space_data.get('fclass', ''))
+                },
+                
+                # Real tree metrics
+                "tree_data": {
+                    "total_trees": tree_response.metrics.total_trees,
+                    "trees_per_hectare": tree_response.metrics.trees_per_hectare,
+                    "species_count": len(tree_response.metrics.dominant_species),
+                    "species_diversity_score": tree_response.metrics.species_diversity_score,
+                    "mature_trees_count": tree_response.metrics.mature_trees_count,
+                    "young_trees_count": tree_response.metrics.young_trees_count,
+                    "average_tree_age": tree_response.metrics.average_tree_age,
+                    "average_height": tree_response.metrics.average_height,
+                    "average_crown_diameter": tree_response.metrics.average_crown_diameter,
+                    "shade_coverage_percent": tree_response.metrics.shade_coverage_percent,
+                    "dominant_species": tree_response.metrics.dominant_species[:3]
+                },
+                
+                # Real toilet accessibility
+                "toilet_accessibility": {
+                    "nearby_toilets_count": len(nearby_toilets),
+                    "accessibility_score": toilet_score,
+                    "nearest_distance_m": nearby_toilets[0]['distance_meters'] if nearby_toilets else None,
+                    "free_toilets_count": len([t for t in nearby_toilets if t.get('is_free', False)]),
+                    "accessible_toilets_count": len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
+                },
+                
+                # Standard features
+                "accessibility": {
+                    "wheelchair_accessible": True,
+                    "public_transport_score": self._estimate_transport_score(space_data.get('district', '')),
+                    "cycling_infrastructure": area_sqm > 4000,
+                    "parking_availability": 2 if area_sqm > 20000 else 1,
+                    "lighting_quality": 3 if 'mitte' in space_data.get('district', '').lower() else 2
+                },
+                
+                "recreation": {
+                    "playground_quality": self._estimate_playground_quality(space_data),
+                    "sports_facilities": self._estimate_sports_facilities(space_data),
+                    "running_paths": area_sqm > 6000,
+                    "cycling_paths": area_sqm > 12000,
+                    "dog_friendly": True,
+                    "bbq_allowed": self._allows_bbq(space_data)
+                },
+                
+                # OSM metadata
+                "osm_metadata": {
+                    "osm_id": space_data.get('osm_id'),
+                    "tags": space_data.get('osm_tags', {}),
+                    "source": "local_osm_extract"
+                },
+                
+                "last_updated": datetime.now().isoformat(),
+                "data_sources": ["local_osm_extract", "berlin_tree_cadastre", "berlin_toilets"],
+                "confidence_score": 92
+            }
+            
+            trees = tree_response.metrics.total_trees
+            toilets = len(nearby_toilets)
+            print(f"✅ {space_data['name']}: {trees} trees, {toilets} toilets")
+            
+            return enhanced_space
+            
+        except Exception as e:
+            print(f"❌ Error enhancing {space_data['name']}: {e}")
+            return None
+    
+    def _score_toilet_accessibility(self, nearby_toilets: List[Dict]) -> int:
+        if not nearby_toilets:
+            return 25
+        
+        nearest = nearby_toilets[0]['distance_meters']
+        if nearest <= 200:
+            score = 90
+        elif nearest <= 400:
+            score = 70
+        else:
+            score = 50
+            
+        # Quality bonuses
+        free = len([t for t in nearby_toilets if t.get('is_free', False)])
+        accessible = len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
+        score += min(10, free * 5 + accessible * 3)
+        
+        return min(100, score)
+    
+    def _map_to_space_type(self, fclass: str) -> str:
+        mapping = {
+            'park': 'PARK', 'forest': 'FOREST', 'garden': 'GARDEN', 'wood': 'FOREST',
+            'nature_reserve': 'NATURE_RESERVE', 'playground': 'PLAYGROUND',
+            'meadow': 'MEADOW', 'grass': 'GRASS', 'recreation_ground': 'PARK',
+            'common': 'PARK', 'village_green': 'GRASS', 'allotments': 'GARDEN'
+        }
+        return mapping.get(fclass, 'PARK')
+    
+    def _detect_water_features(self, space_data: Dict) -> bool:
+        name = space_data.get('name', '').lower()
+        tags = space_data.get('osm_tags', {})
+        
+        water_keywords = ['see', 'teich', 'pond', 'lake', 'bach', 'spree', 'wasser']
+        return any(keyword in name for keyword in water_keywords) or 'water' in tags.values()
+    
+    def _estimate_noise_level(self, space_data: Dict) -> int:
+        fclass = space_data.get('fclass', '')
+        district = space_data.get('district', '')
+        
+        base = {'forest': 1, 'wood': 1, 'nature_reserve': 1, 'meadow': 2, 
+               'park': 2, 'garden': 2, 'playground': 3}.get(fclass, 2)
+        
+        if any(busy in district.lower() for busy in ['mitte', 'kreuzberg', 'friedrichshain']):
+            base += 1
+            
+        return min(5, base)
+    
+    def _estimate_natural_surface(self, fclass: str) -> int:
+        return {'forest': 95, 'wood': 95, 'nature_reserve': 90, 'meadow': 95,
+               'grass': 85, 'park': 75, 'garden': 65, 'playground': 40}.get(fclass, 70)
+    
+    def _estimate_transport_score(self, district: str) -> int:
+        district_lower = district.lower()
+        if 'mitte' in district_lower:
+            return 5
+        elif any(name in district_lower for name in ['charlottenburg', 'kreuzberg', 'friedrichshain']):
+            return 4
+        else:
+            return 3
+    
+    def _estimate_playground_quality(self, space_data: Dict) -> int:
+        fclass = space_data.get('fclass', '')
+        tags = space_data.get('osm_tags', {})
+        
+        if fclass == 'playground':
+            return 80
+        elif 'playground' in tags.values():
+            return 75
+        elif fclass == 'park':
+            return 55
+        else:
+            return 30
+    
+    def _estimate_sports_facilities(self, space_data: Dict) -> bool:
+        fclass = space_data.get('fclass', '')
+        tags = space_data.get('osm_tags', {})
+        name = space_data.get('name', '').lower()
+        
+        return (fclass == 'recreation_ground' or 
+               'sport' in str(tags.values()).lower() or
+               any(term in name for term in ['sport', 'football', 'tennis']))
+    
+    def _allows_bbq(self, space_data: Dict) -> bool:
+        fclass = space_data.get('fclass', '')
+        area = space_data.get('area_sqm', 0)
+        tags = space_data.get('osm_tags', {})
+        
+        # Check explicit BBQ tags
+        if tags.get('bbq') == 'yes':
+            return True
+        elif tags.get('bbq') == 'no':
+            return False
+        
+        # Default based on type and size
+        return fclass in ['park', 'recreation_ground'] and area > 5000
+    
+    async def process_all_green_spaces(self):
+        """Main processing pipeline."""
+        print("🌳 Processing Berlin green spaces from local OSM data...")
+        
+        # Step 1: Get OSM data
+        try:
+            osm_file = self.download_simple_osm_extract()  # More reliable than PBF
+        except:
+            print("❌ Could not download OSM data")
+            return []
+        
+        # Step 2: Parse green spaces
+        green_spaces = self.parse_osm_xml(osm_file)
+        
+        if not green_spaces:
+            print("❌ No green spaces found in OSM data")
+            return []
+        
+        print(f"📊 Found {len(green_spaces)} green spaces to enhance")
+        
+        # Step 3: Enhance with real data
+        enhanced_spaces = []
+        
+        for i, space_data in enumerate(green_spaces, 1):
+            print(f"[{i}/{len(green_spaces)}]", end=" ")
+            
+            result = await self.enhance_green_space_with_real_data(space_data)
+            if result:
+                enhanced_spaces.append(result)
+            
+            if i % 20 == 0:
+                print(f"\n   Progress: {len(enhanced_spaces)} enhanced so far...")
+            
+            await asyncio.sleep(0.1)
+        
+        print(f"\n✅ Enhanced {len(enhanced_spaces)} spaces with real data!")
+        return enhanced_spaces
+    
+    def save_enhanced_data(self, enhanced_spaces: List[Dict]):
+        """Save the final dataset."""
+        output_file = self.processed_dir / "osm_berlin_green_spaces_enhanced.json"
+        
+        # Calculate statistics
+        with_trees = len([s for s in enhanced_spaces if s["tree_data"]["total_trees"] > 0])
+        with_toilets = len([s for s in enhanced_spaces if s["toilet_accessibility"]["nearby_toilets_count"] > 0])
+        total_trees = sum(s["tree_data"]["total_trees"] for s in enhanced_spaces)
+        
+        data = {
+            "green_spaces": enhanced_spaces,
+            "total_count": len(enhanced_spaces),
+            "last_updated": datetime.now().isoformat(),
+            "data_sources": [
+                "local_osm_extract_processed_offline",
+                "berlin_tree_cadastre", 
+                "berlin_toilets"
+            ],
+            "processing_info": {
+                "method": "local_osm_processing_no_api_dependency",
+                "includes_all_osm_green_spaces": True,
+                "enhanced_with_real_berlin_data": True
+            },
+            "summary_stats": {
+                "total_spaces": len(enhanced_spaces),
+                "spaces_with_tree_data": with_trees,
+                "spaces_with_toilet_data": with_toilets,
+                "total_trees_analyzed": total_trees,
+                "tree_coverage": f"{round((with_trees/len(enhanced_spaces))*100, 1)}%",
+                "toilet_coverage": f"{round((with_toilets/len(enhanced_spaces))*100, 1)}%"
+            }
+        }
+        
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(data, f, indent=2, ensure_ascii=False)
+        
+        print(f"\n🎉 Saved comprehensive dataset: {output_file}")
+        print(f"📊 {len(enhanced_spaces)} total green spaces")
+        print(f"🌲 {with_trees} with tree data, 🚻 {with_toilets} with toilet data")
+        print(f"🌿 {total_trees} total trees analyzed")
+        print(f"\n✨ Ready to replace mock data in your API!")
+        
+        return output_file
+
+
+async def main():
+    processor = LocalOSMProcessor()
+    
+    try:
+        print("🚀 Berlin Green Spaces: Local OSM Processing")
+        print("=" * 50)
+        print("• Downloads OSM data once (no API dependency)")
+        print("• Processes locally for all green spaces")
+        print("• Enhances with real Berlin tree + toilet data")
+        print("=" * 50)
+        
+        enhanced_spaces = await processor.process_all_green_spaces()
+        
+        if enhanced_spaces:
+            processor.save_enhanced_data(enhanced_spaces)
+        
+    except KeyboardInterrupt:
+        print("\n⚠️ Interrupted")
+    except Exception as e:
+        print(f"❌ Error: {e}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/scripts/quick_green_spaces.py
+++ b/scripts/quick_green_spaces.py
@ -0,0 +1,558 @@
+#!/usr/bin/env python3
+"""
+Quick Berlin green spaces processor.
+Pre-filters OSM data efficiently, then processes only the best candidates.
+"""
+
+import json
+import asyncio
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from datetime import datetime
+import sys
+import re
+import math
+# from tqdm.asyncio import tqdm  # Not available, remove tqdm dependency
+from xml.etree.ElementTree import iterparse
+
+# Add the app directory to Python path
+sys.path.append(str(Path(__file__).parent.parent))
+
+from app.services.street_tree_service import StreetTreeService
+from app.services.berlin_data_service import BerlinDataService
+
+
+def calculate_polygon_area_sqm(coords):
+    """Calculate area of a polygon using the Shoelace formula."""
+    if len(coords) < 3:
+        return 5000  # Default for invalid polygons
+    
+    # Convert to radians and use spherical approximation for Earth
+    def to_radians(deg):
+        return deg * math.pi / 180
+    
+    # Use simple planar approximation for small areas
+    # Convert lat/lng to approximate meters (rough approximation for Berlin area)
+    lat_center = sum(lat for lat, lng in coords) / len(coords)
+    lng_center = sum(lng for lat, lng in coords) / len(coords)
+    
+    # Approximate meters per degree at Berlin latitude
+    meters_per_lat = 111320  # roughly constant
+    meters_per_lng = 111320 * math.cos(to_radians(lat_center))
+    
+    # Convert coordinates to meters relative to center
+    meter_coords = []
+    for lat, lng in coords:
+        x = (lng - lng_center) * meters_per_lng
+        y = (lat - lat_center) * meters_per_lat
+        meter_coords.append((x, y))
+    
+    # Shoelace formula
+    area = 0
+    n = len(meter_coords)
+    for i in range(n):
+        j = (i + 1) % n
+        area += meter_coords[i][0] * meter_coords[j][1]
+        area -= meter_coords[j][0] * meter_coords[i][1]
+    
+    area = abs(area) / 2
+    
+    # Reasonable bounds check
+    if area < 100:  # Too small
+        return 5000
+    elif area > 10000000:  # Too large (10 km²)
+        return 500000  # Cap at reasonable park size
+    
+    return int(area)
+
+
+def calculate_search_radius(area_sqm):
+    """Calculate appropriate tree search radius based on park area."""
+    if area_sqm < 10000:  # < 1 hectare
+        return 150
+    elif area_sqm < 50000:  # < 5 hectares  
+        return 300
+    elif area_sqm < 200000:  # < 20 hectares
+        return 500
+    else:  # Large parks like Treptower Park
+        return 800
+
+
+def calculate_enhanced_shade_quality(tree_response, area_sqm):
+    """Calculate enhanced shade quality based on real tree characteristics."""
+    metrics = tree_response.metrics
+    shade_analysis = tree_response.shade_analysis
+    
+    # Base score from tree density and coverage
+    base_score = 0
+    
+    # Factor 1: Actual shade coverage (crown area based)
+    coverage = metrics.shade_coverage_percent or 0
+    if coverage >= 60:
+        base_score += 40
+    elif coverage >= 40:
+        base_score += 30
+    elif coverage >= 20:
+        base_score += 20
+    elif coverage >= 10:
+        base_score += 10
+    
+    # Factor 2: Large mature trees (better shade)
+    large_trees = len(shade_analysis.nearby_large_trees or [])
+    if large_trees >= 10:
+        base_score += 25
+    elif large_trees >= 5:
+        base_score += 20
+    elif large_trees >= 3:
+        base_score += 15
+    elif large_trees >= 1:
+        base_score += 10
+    
+    # Factor 3: Tree density per area
+    trees_per_hectare = metrics.trees_per_hectare or 0
+    if trees_per_hectare >= 50:
+        base_score += 20
+    elif trees_per_hectare >= 30:
+        base_score += 15
+    elif trees_per_hectare >= 20:
+        base_score += 10
+    elif trees_per_hectare >= 10:
+        base_score += 5
+    
+    # Factor 4: Average tree height (taller = better shade)
+    avg_height = metrics.average_height or 0
+    if avg_height >= 20:
+        base_score += 10
+    elif avg_height >= 15:
+        base_score += 8
+    elif avg_height >= 10:
+        base_score += 5
+    elif avg_height >= 5:
+        base_score += 3
+    
+    # Factor 5: Crown diameter quality
+    avg_crown = metrics.average_crown_diameter or 0
+    if avg_crown >= 12:
+        base_score += 5
+    elif avg_crown >= 8:
+        base_score += 3
+    elif avg_crown >= 5:
+        base_score += 1
+    
+    return min(100, base_score)
+
+
+def detect_water_features(candidate):
+    """Detect water features using OSM tags and name analysis."""
+    tags = candidate.get('tags', {})
+    name = candidate.get('name', '').lower()
+    
+    # Check OSM water-related tags
+    water_tags = ['water', 'waterway', 'natural']
+    has_water_tags = any(
+        tags.get(tag, '').lower() in ['water', 'lake', 'pond', 'reservoir', 'river', 'stream'] 
+        for tag in water_tags
+    )
+    
+    # Check name for water indicators
+    water_names = ['see', 'teich', 'weiher', 'water', 'lake', 'pond', 'fluss', 'river', 'bach', 'creek']
+    has_water_name = any(water_word in name for water_word in water_names)
+    
+    # Check for fountain/brunnen
+    fountain_indicators = ['brunnen', 'fountain', 'springbrunnen']
+    has_fountain = any(fountain in name for fountain in fountain_indicators)
+    
+    return has_water_tags or has_water_name or has_fountain
+
+
+def estimate_berlin_district(lat: float, lng: float) -> str:
+    """Estimate Berlin district from coordinates using geographic boundaries."""
+    # Northern districts
+    if lat > 52.55:
+        if lng < 13.25:
+            return "Reinickendorf"
+        elif lng < 13.45:
+            return "Pankow"
+        else:
+            return "Lichtenberg"
+    # Central-north districts
+    elif lat > 52.52:
+        if lng < 13.20:
+            return "Spandau"
+        elif lng < 13.30:
+            return "Charlottenburg-Wilmersdorf"
+        elif lng < 13.42:
+            return "Mitte"
+        elif lng < 13.48:
+            return "Friedrichshain-Kreuzberg"
+        else:
+            return "Lichtenberg"
+    # Central districts
+    elif lat > 52.48:
+        if lng < 13.20:
+            return "Spandau"
+        elif lng < 13.30:
+            return "Charlottenburg-Wilmersdorf"
+        elif lng < 13.35:
+            return "Tempelhof-Schöneberg"
+        elif lng < 13.42:
+            return "Mitte"
+        elif lng < 13.48:
+            return "Friedrichshain-Kreuzberg"
+        else:
+            return "Lichtenberg"
+    # Southern-central districts
+    elif lat > 52.45:
+        if lng < 13.20:
+            return "Steglitz-Zehlendorf"
+        elif lng < 13.35:
+            return "Tempelhof-Schöneberg"
+        elif lng < 13.45:
+            return "Neukölln"
+        elif lng < 13.55:
+            return "Treptow-Köpenick"
+        else:
+            return "Marzahn-Hellersdorf"
+    # Southern districts
+    else:
+        if lng < 13.35:
+            return "Steglitz-Zehlendorf"
+        else:
+            return "Treptow-Köpenick"
+
+
+def get_specific_neighborhood(district: str, lat: float, lng: float) -> str:
+    """Get specific neighborhood within district based on coordinates."""
+    neighborhoods = {
+        "Mitte": {
+            (52.540, 52.560, 13.33, 13.38): "Wedding",
+            (52.515, 52.530, 13.33, 13.38): "Moabit",
+            (52.510, 52.520, 13.35, 13.38): "Tiergarten",
+            (52.525, 52.545, 13.40, 13.43): "Prenzlauer Berg"
+        },
+        "Charlottenburg-Wilmersdorf": {
+            (52.485, 52.505, 13.30, 13.33): "Wilmersdorf",
+            (52.505, 52.525, 13.25, 13.33): "Charlottenburg"
+        },
+        "Friedrichshain-Kreuzberg": {
+            (52.490, 52.510, 13.38, 13.42): "Kreuzberg",
+            (52.510, 52.525, 13.42, 13.48): "Friedrichshain"
+        },
+        "Tempelhof-Schöneberg": {
+            (52.480, 52.500, 13.33, 13.37): "Schöneberg",
+            (52.460, 52.480, 13.37, 13.42): "Tempelhof"
+        },
+        "Steglitz-Zehlendorf": {
+            (52.430, 52.450, 13.23, 13.30): "Zehlendorf",
+            (52.450, 52.470, 13.30, 13.35): "Steglitz"
+        },
+        "Treptow-Köpenick": {
+            (52.430, 52.460, 13.55, 13.65): "Köpenick",
+            (52.480, 52.500, 13.45, 13.50): "Treptow"
+        }
+    }
+    
+    if district in neighborhoods:
+        for (min_lat, max_lat, min_lng, max_lng), neighborhood in neighborhoods[district].items():
+            if min_lat <= lat <= max_lat and min_lng <= lng <= max_lng:
+                return neighborhood
+    
+    return district
+
+
+async def quick_process():
+    """Quick processing of significant Berlin green spaces."""
+    print("🚀 Quick Berlin Green Spaces Processor")
+    print("=" * 45)
+    
+    # Initialize services
+    tree_service = StreetTreeService()
+    berlin_data = BerlinDataService()
+    
+    # Pre-load and index trees once to avoid repeated indexing
+    print("🔄 Pre-loading tree data and building spatial index...")
+    await tree_service._load_trees()
+    
+    osm_file = Path("app/data/osm-raw/berlin_green_spaces.osm")
+    
+    if not osm_file.exists():
+        print("❌ OSM file not found. Please ensure data is downloaded.")
+        return
+    
+    print("🔍 Quick filtering for named parks and significant areas...")
+    print(f"📁 OSM file size: {osm_file.stat().st_size / (1024*1024):.1f} MB")
+    
+    # Quick scan for good candidates
+    candidates = []
+    
+    try:
+        processed = 0
+        
+        print("🔍 Single-pass XML parsing - ways with embedded coordinates...")
+        
+        # Single pass: parse ways with embedded coordinates
+        ways_processed = 0
+        current_way_tags = {}
+        current_way_coordinates = []
+        in_way = False
+        
+        for event, elem in iterparse(osm_file, events=('start', 'end')):
+            if event == 'start':
+                if elem.tag == 'way':
+                    in_way = True
+                    current_way_tags = {}
+                    current_way_coordinates = []
+                    ways_processed += 1
+                    if ways_processed % 1000 == 0:
+                        print(f"Processed {ways_processed} ways, found {len(candidates)} candidates so far...")
+                elif in_way and elem.tag == 'tag':
+                    k = elem.get('k')
+                    v = elem.get('v')
+                    if k and v:
+                        current_way_tags[k] = v
+                elif in_way and elem.tag == 'nd':
+                    # Extract coordinates directly from nd element
+                    lat = elem.get('lat')
+                    lon = elem.get('lon')
+                    if lat and lon:
+                        current_way_coordinates.append((float(lat), float(lon)))
+                continue
+                
+            if elem.tag == 'way' and in_way:
+                in_way = False
+                tags = current_way_tags
+                coordinates = current_way_coordinates
+                
+                # Quick filters for promising spaces - be more lenient
+                has_name = 'name' in tags
+                is_park = (tags.get('leisure') in ['park', 'garden', 'nature_reserve'] or 
+                          tags.get('landuse') in ['forest', 'grass', 'recreation_ground'])
+                
+                # Also accept common green space tags
+                has_green_tags = any(key in tags for key in ['leisure', 'landuse', 'natural', 'amenity'])
+                
+                if not (has_name or is_park or has_green_tags):
+                    elem.clear()  # Free memory
+                    continue
+                
+                # Use embedded coordinates directly
+                if not coordinates:
+                    elem.clear()  # Free memory
+                    continue
+                
+                # Get center coordinate and all coordinates for area calculation
+                lat, lng = coordinates[0] if len(coordinates) == 1 else (
+                    sum(lat for lat, lng in coordinates) / len(coordinates),
+                    sum(lng for lat, lng in coordinates) / len(coordinates)
+                )
+                
+                # Basic Berlin bounds check
+                if not (52.3 <= lat <= 52.7 and 13.0 <= lng <= 13.8):
+                    elem.clear()  # Free memory
+                    continue
+                
+                name = tags.get('name', f"Unnamed {tags.get('leisure', tags.get('landuse', 'area'))}")
+                space_type = tags.get('leisure') or tags.get('landuse') or 'park'
+                
+                candidate = {
+                    'id': f"quick_{elem.get('id')}",
+                    'name': name,
+                    'type': space_type,
+                    'lat': lat,
+                    'lng': lng,
+                    'has_name': has_name,
+                    'tags': tags,
+                    'coordinates': coordinates  # Store all coordinates for area calculation
+                }
+                
+                candidates.append(candidate)
+                processed += 1
+                
+                # Limit for quick processing
+                if len(candidates) >= 100:
+                    elem.clear()  # Free memory
+                    break
+                    
+                elem.clear()  # Free memory
+            else:
+                elem.clear()  # Free memory
+        
+        print(f"✅ Found {len(candidates)} promising green spaces")
+        
+    except Exception as e:
+        print(f"❌ Error in quick filtering: {e}")
+        return
+    
+    if not candidates:
+        print("No candidates found")
+        return
+    
+    # Sort by having names (better quality)
+    candidates.sort(key=lambda x: x['has_name'], reverse=True)
+    
+    print(f"\n🔧 Enhancing top {len(candidates)} spaces with real data...")
+    
+    # Process candidates in parallel with batching
+    batch_size = 10  # Process 10 candidates at a time
+    enhanced_spaces = []
+    
+    async def process_candidate(candidate):
+        """Process a single candidate with tree and toilet data."""
+        try:
+            # Calculate actual area from OSM polygon coordinates
+            area_sqm = calculate_polygon_area_sqm(candidate.get('coordinates', []))
+            search_radius = calculate_search_radius(area_sqm)
+            
+            # Get real tree data and toilet data concurrently with dynamic radius
+            tree_task = tree_service.get_trees_near_location(
+                candidate['lat'], candidate['lng'], radius_m=search_radius
+            )
+            toilet_task = berlin_data.get_toilets_near_point(
+                candidate['lat'], candidate['lng'], 500
+            )
+            
+            print(f"🔍 Getting data for {candidate['name'][:30]}... (area: {area_sqm/10000:.1f}ha, radius: {search_radius}m)")
+            tree_response, nearby_toilets = await asyncio.gather(tree_task, toilet_task)
+            
+            # Create enhanced space
+            enhanced_space = {
+                "id": candidate['id'],
+                "name": candidate['name'],
+                "description": f"Berlin {candidate['type']} discovered via quick OSM processing",
+                "type": "PARK",  # Simplified for now
+                "coordinates": {
+                    "lat": candidate['lat'],
+                    "lng": candidate['lng']
+                },
+                "neighborhood": get_specific_neighborhood(estimate_berlin_district(candidate['lat'], candidate['lng']), candidate['lat'], candidate['lng']),
+                "area_sqm": area_sqm,  # Real calculated area
+                
+                # Environmental features from real tree data
+                "environmental": {
+                    "tree_coverage_percent": max(5, int(tree_response.metrics.shade_coverage_percent)),  # Use actual crown area calculation
+                    "shade_quality": calculate_enhanced_shade_quality(tree_response, area_sqm),
+                    "noise_level": 2,  # Default
+                    "wildlife_diversity_score": tree_response.metrics.species_diversity_score,
+                    "water_features": detect_water_features(candidate),
+                    "natural_surface_percent": 80
+                },
+                
+                # Real tree data
+                "tree_data": {
+                    "total_trees": tree_response.metrics.total_trees,
+                    "trees_per_hectare": tree_response.metrics.trees_per_hectare,
+                    "species_count": len(tree_response.metrics.dominant_species),
+                    "species_diversity_score": tree_response.metrics.species_diversity_score,
+                    "mature_trees_count": tree_response.metrics.mature_trees_count,
+                    "young_trees_count": tree_response.metrics.young_trees_count,
+                    "average_tree_age": tree_response.metrics.average_tree_age,
+                    "average_height": tree_response.metrics.average_height,
+                    "average_crown_diameter": tree_response.metrics.average_crown_diameter,
+                    "shade_coverage_percent": tree_response.metrics.shade_coverage_percent,
+                    "dominant_species": tree_response.metrics.dominant_species[:3]
+                },
+                
+                # Real toilet data
+                "toilet_accessibility": {
+                    "nearby_toilets_count": len(nearby_toilets),
+                    "accessibility_score": 80 if nearby_toilets else 30,
+                    "nearest_distance_m": nearby_toilets[0]['distance_meters'] if nearby_toilets else None,
+                    "free_toilets_count": len([t for t in nearby_toilets if t.get('is_free', False)]),
+                    "accessible_toilets_count": len([t for t in nearby_toilets if t.get('wheelchair_accessible', False)])
+                },
+                
+                # Standard features
+                "accessibility": {
+                    "wheelchair_accessible": True,
+                    "public_transport_score": 3,
+                    "cycling_infrastructure": True,
+                    "parking_availability": 2,
+                    "lighting_quality": 3
+                },
+                
+                "recreation": {
+                    "playground_quality": 60 if candidate['type'] == 'park' else 30,
+                    "sports_facilities": candidate['type'] == 'recreation_ground',
+                    "running_paths": True,
+                    "cycling_paths": True,
+                    "dog_friendly": True,
+                    "bbq_allowed": candidate['type'] in ['park', 'recreation_ground']
+                },
+                
+                "osm_metadata": {
+                    "has_official_name": candidate['has_name'],
+                    "tags": candidate['tags'],
+                    "source": "quick_osm_processing"
+                },
+                
+                "last_updated": datetime.now().isoformat(),
+                "data_sources": ["quick_osm_scan", "berlin_tree_cadastre", "berlin_toilets"],
+                "confidence_score": 90 if candidate['has_name'] else 75
+            }
+            
+            return enhanced_space, tree_response.metrics.total_trees, len(nearby_toilets)
+            
+        except Exception as e:
+            print(f"❌ Error processing {candidate['name']}: {e}")
+            return None, 0, 0
+    
+    # Process candidates in batches with progress bar
+    for i in range(0, len(candidates), batch_size):
+        batch = candidates[i:i + batch_size]
+        print(f"Processing batch {i//batch_size + 1}/{(len(candidates) + batch_size - 1)//batch_size}")
+        
+        # Process batch concurrently with progress bar
+        tasks = [process_candidate(candidate) for candidate in batch]
+        results = await asyncio.gather(*tasks)
+        
+        # Collect results
+        for result, trees, toilets in results:
+            if result:
+                enhanced_spaces.append(result)
+                print(f"✅ {result['name'][:40]:40} - {trees:3d} trees, {toilets} toilets")
+        
+        # Small delay between batches to be respectful to APIs
+        if i + batch_size < len(candidates):
+            await asyncio.sleep(0.5)
+    
+    # Save results
+    output_file = Path("app/data/processed/quick_berlin_green_spaces.json")
+    
+    with_trees = len([s for s in enhanced_spaces if s["tree_data"]["total_trees"] > 0])
+    with_toilets = len([s for s in enhanced_spaces if s["toilet_accessibility"]["nearby_toilets_count"] > 0])
+    total_trees = sum(s["tree_data"]["total_trees"] for s in enhanced_spaces)
+    
+    data = {
+        "green_spaces": enhanced_spaces,
+        "total_count": len(enhanced_spaces),
+        "last_updated": datetime.now().isoformat(),
+        "data_sources": ["quick_osm_processing", "berlin_tree_cadastre", "berlin_toilets"],
+        "processing_info": {
+            "method": "quick_scan_for_named_and_significant_spaces",
+            "prioritizes_named_spaces": True,
+            "enhanced_with_real_berlin_data": True
+        },
+        "summary_stats": {
+            "total_spaces": len(enhanced_spaces),
+            "spaces_with_tree_data": with_trees,
+            "spaces_with_toilet_data": with_toilets,
+            "total_trees_analyzed": total_trees,
+            "tree_coverage": f"{round((with_trees/len(enhanced_spaces))*100, 1)}%" if enhanced_spaces else "0%",
+            "toilet_coverage": f"{round((with_toilets/len(enhanced_spaces))*100, 1)}%" if enhanced_spaces else "0%"
+        }
+    }
+    
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(data, f, indent=2, ensure_ascii=False)
+    
+    print(f"\n🎉 Quick processing complete!")
+    print(f"📁 Saved: {output_file}")
+    print(f"📊 {len(enhanced_spaces)} spaces enhanced")
+    print(f"🌲 {with_trees} with tree data, 🚻 {with_toilets} with toilet data")
+    print(f"🌿 {total_trees} total trees analyzed")
+    print(f"\n✨ Ready to use! This gives you real Berlin green spaces")
+    print(f"   with actual tree and toilet data for personality scoring!")
+
+
+if __name__ == "__main__":
+    asyncio.run(quick_process())
--- a/scripts/test_osm_processing.py
+++ b/scripts/test_osm_processing.py
@ -0,0 +1,169 @@
+#!/usr/bin/env python3
+"""
+Test OSM processing with a small sample to verify it works.
+"""
+
+import json
+import asyncio
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from datetime import datetime
+import sys
+import math
+
+# Add the app directory to Python path
+sys.path.append(str(Path(__file__).parent.parent))
+
+from app.services.street_tree_service import StreetTreeService
+from app.services.berlin_data_service import BerlinDataService
+
+
+async def test_processing():
+    """Test the processing with a small sample."""
+    print("🧪 Testing OSM processing with sample data...")
+    
+    # Initialize services
+    tree_service = StreetTreeService()
+    berlin_data = BerlinDataService()
+    
+    # Parse OSM file and get first 5 green spaces as test
+    osm_file = Path("app/data/osm-raw/berlin_green_spaces.osm")
+    
+    if not osm_file.exists():
+        print("❌ OSM file not found")
+        return
+    
+    tree = ET.parse(osm_file)
+    root = tree.getroot()
+    ways = root.findall('.//way')
+    
+    print(f"📊 Found {len(ways)} total ways in OSM file")
+    
+    # Process first 5 green spaces as test
+    sample_spaces = []
+    processed_count = 0
+    
+    for way in ways:
+        if processed_count >= 5:
+            break
+            
+        # Get tags
+        tags = {}
+        for tag in way.findall('tag'):
+            tags[tag.get('k')] = tag.get('v')
+        
+        # Check if it's a green space
+        green_space_type = None
+        leisure = tags.get('leisure', '')
+        landuse = tags.get('landuse', '')
+        natural = tags.get('natural', '')
+        
+        if leisure in ['park', 'garden', 'nature_reserve']:
+            green_space_type = leisure
+        elif landuse in ['forest', 'grass', 'park']:
+            green_space_type = landuse
+        elif natural in ['forest', 'wood']:
+            green_space_type = natural
+        
+        if not green_space_type:
+            continue
+        
+        # Get coordinates from first and last node to estimate center
+        nd_refs = [nd.get('ref') for nd in way.findall('nd')]
+        if len(nd_refs) < 3:
+            continue
+        
+        # Find first node coordinates
+        first_node = root.find(f".//node[@id='{nd_refs[0]}']")
+        if first_node is None:
+            continue
+            
+        lat = float(first_node.get('lat'))
+        lng = float(first_node.get('lon'))
+        
+        # Simple space data
+        space_data = {
+            'id': f"test_{way.get('id')}",
+            'name': tags.get('name', f"Test {green_space_type} {processed_count + 1}"),
+            'fclass': green_space_type,
+            'lat': lat,
+            'lng': lng,
+            'area_sqm': 5000,  # Default for test
+            'district': 'Test District'
+        }
+        
+        sample_spaces.append(space_data)
+        processed_count += 1
+    
+    print(f"🌳 Testing with {len(sample_spaces)} sample green spaces...")
+    
+    # Test enhancement with real data
+    enhanced_spaces = []
+    
+    for i, space_data in enumerate(sample_spaces, 1):
+        print(f"\n[{i}/{len(sample_spaces)}] Testing {space_data['name']}...")
+        
+        try:
+            # Get real tree data
+            tree_response = await tree_service.get_trees_near_location(
+                space_data['lat'], space_data['lng'], radius_m=200
+            )
+            
+            # Get real toilet data  
+            nearby_toilets = await berlin_data.get_toilets_near_point(
+                space_data['lat'], space_data['lng'], 500
+            )
+            
+            # Create enhanced data
+            enhanced_space = {
+                "id": space_data['id'],
+                "name": space_data['name'],
+                "type": "PARK",
+                "coordinates": {
+                    "lat": space_data['lat'],
+                    "lng": space_data['lng']
+                },
+                "tree_data": {
+                    "total_trees": tree_response.metrics.total_trees,
+                    "species_count": len(tree_response.metrics.dominant_species),
+                    "dominant_species": tree_response.metrics.dominant_species
+                },
+                "toilet_accessibility": {
+                    "nearby_toilets_count": len(nearby_toilets),
+                    "nearest_distance_m": nearby_toilets[0]['distance_meters'] if nearby_toilets else None
+                }
+            }
+            
+            enhanced_spaces.append(enhanced_space)
+            
+            trees = tree_response.metrics.total_trees
+            toilets = len(nearby_toilets)
+            print(f"✅ Success: {trees} trees, {toilets} toilets nearby")
+            
+        except Exception as e:
+            print(f"❌ Error: {e}")
+    
+    # Save test results
+    output_file = Path("app/data/processed/test_green_spaces.json")
+    
+    test_data = {
+        "test_results": enhanced_spaces,
+        "total_tested": len(enhanced_spaces),
+        "osm_ways_available": len(ways),
+        "processing_successful": True,
+        "timestamp": datetime.now().isoformat()
+    }
+    
+    with open(output_file, 'w') as f:
+        json.dump(test_data, f, indent=2)
+    
+    print(f"\n🎉 Test completed successfully!")
+    print(f"📁 Test results saved: {output_file}")
+    print(f"📊 Enhanced {len(enhanced_spaces)} sample spaces")
+    print(f"💡 Ready to process all {len(ways)} green spaces!")
+    
+    return True
+
+
+if __name__ == "__main__":
+    asyncio.run(test_processing())