""" Backfill geocoding for existing listings This script geocodes addresses for listings that have an address but no coordinates """ import asyncio from app.database import connect_db, get_db from app.ai.tools.listing_tool import geocode_address from bson import ObjectId async def backfill_geocoding(): # Initialize database connection await connect_db() db = await get_db() # Find listings that have a location but no latitude/longitude query = { "$or": [ # Has address but no coordinates {"address": {"$ne": None}, "latitude": None}, {"address": {"$ne": None}, "latitude": {"$exists": False}}, # Has location but no coordinates (for city-level geocoding) {"location": {"$ne": None}, "latitude": None}, {"location": {"$ne": None}, "latitude": {"$exists": False}}, ] } cursor = db.listings.find(query) print("=== Backfilling Geocoding for Existing Listings ===\n") updated_count = 0 failed_count = 0 async for doc in cursor: listing_id = str(doc.get("_id")) title = doc.get("title", "No title")[:40] location = doc.get("location") address = doc.get("address") # Build search query - prefer address if available if address and location: search_query = f"{address}, {location}" elif address: search_query = address elif location: search_query = location else: print(f"⏭️ Skipping {title} - no location or address") continue print(f"🔍 Geocoding: {title}") print(f" Query: {search_query}") # Call geocode function geo_result = await geocode_address(search_query, location) if geo_result.get("success"): lat = geo_result.get("latitude") lon = geo_result.get("longitude") # Update the listing in the database update_data = { "latitude": lat, "longitude": lon, } # If we didn't have an address, store the location as address if not address and location: update_data["address"] = location result = await db.listings.update_one( {"_id": ObjectId(listing_id)}, {"$set": update_data} ) if result.modified_count > 0: print(f" ✅ Updated: lat={lat}, lon={lon}") updated_count += 1 else: print(f" ⚠️ No change made") else: print(f" ❌ Geocoding failed: {geo_result.get('error', 'Unknown error')}") failed_count += 1 # Small delay to respect Nominatim rate limits await asyncio.sleep(1.1) print(f"\n=== Summary ===") print(f"✅ Updated: {updated_count} listings") print(f"❌ Failed: {failed_count} listings") if __name__ == "__main__": asyncio.run(backfill_geocoding())