Spaces:
Running
Running
| """ | |
| Backfill geocoding for existing listings | |
| This script geocodes addresses for listings that have an address but no coordinates | |
| """ | |
| import asyncio | |
| from app.database import connect_db, get_db | |
| from app.ai.tools.listing_tool import geocode_address | |
| from bson import ObjectId | |
| async def backfill_geocoding(): | |
| # Initialize database connection | |
| await connect_db() | |
| db = await get_db() | |
| # Find listings that have a location but no latitude/longitude | |
| query = { | |
| "$or": [ | |
| # Has address but no coordinates | |
| {"address": {"$ne": None}, "latitude": None}, | |
| {"address": {"$ne": None}, "latitude": {"$exists": False}}, | |
| # Has location but no coordinates (for city-level geocoding) | |
| {"location": {"$ne": None}, "latitude": None}, | |
| {"location": {"$ne": None}, "latitude": {"$exists": False}}, | |
| ] | |
| } | |
| cursor = db.listings.find(query) | |
| print("=== Backfilling Geocoding for Existing Listings ===\n") | |
| updated_count = 0 | |
| failed_count = 0 | |
| async for doc in cursor: | |
| listing_id = str(doc.get("_id")) | |
| title = doc.get("title", "No title")[:40] | |
| location = doc.get("location") | |
| address = doc.get("address") | |
| # Build search query - prefer address if available | |
| if address and location: | |
| search_query = f"{address}, {location}" | |
| elif address: | |
| search_query = address | |
| elif location: | |
| search_query = location | |
| else: | |
| print(f"βοΈ Skipping {title} - no location or address") | |
| continue | |
| print(f"π Geocoding: {title}") | |
| print(f" Query: {search_query}") | |
| # Call geocode function | |
| geo_result = await geocode_address(search_query, location) | |
| if geo_result.get("success"): | |
| lat = geo_result.get("latitude") | |
| lon = geo_result.get("longitude") | |
| # Update the listing in the database | |
| update_data = { | |
| "latitude": lat, | |
| "longitude": lon, | |
| } | |
| # If we didn't have an address, store the location as address | |
| if not address and location: | |
| update_data["address"] = location | |
| result = await db.listings.update_one( | |
| {"_id": ObjectId(listing_id)}, | |
| {"$set": update_data} | |
| ) | |
| if result.modified_count > 0: | |
| print(f" β Updated: lat={lat}, lon={lon}") | |
| updated_count += 1 | |
| else: | |
| print(f" β οΈ No change made") | |
| else: | |
| print(f" β Geocoding failed: {geo_result.get('error', 'Unknown error')}") | |
| failed_count += 1 | |
| # Small delay to respect Nominatim rate limits | |
| await asyncio.sleep(1.1) | |
| print(f"\n=== Summary ===") | |
| print(f"β Updated: {updated_count} listings") | |
| print(f"β Failed: {failed_count} listings") | |
| if __name__ == "__main__": | |
| asyncio.run(backfill_geocoding()) | |