AIDA / backfill_geocoding.py
destinyebuka's picture
fyp
5a6c225
"""
Backfill geocoding for existing listings
This script geocodes addresses for listings that have an address but no coordinates
"""
import asyncio
from app.database import connect_db, get_db
from app.ai.tools.listing_tool import geocode_address
from bson import ObjectId
async def backfill_geocoding():
# Initialize database connection
await connect_db()
db = await get_db()
# Find listings that have a location but no latitude/longitude
query = {
"$or": [
# Has address but no coordinates
{"address": {"$ne": None}, "latitude": None},
{"address": {"$ne": None}, "latitude": {"$exists": False}},
# Has location but no coordinates (for city-level geocoding)
{"location": {"$ne": None}, "latitude": None},
{"location": {"$ne": None}, "latitude": {"$exists": False}},
]
}
cursor = db.listings.find(query)
print("=== Backfilling Geocoding for Existing Listings ===\n")
updated_count = 0
failed_count = 0
async for doc in cursor:
listing_id = str(doc.get("_id"))
title = doc.get("title", "No title")[:40]
location = doc.get("location")
address = doc.get("address")
# Build search query - prefer address if available
if address and location:
search_query = f"{address}, {location}"
elif address:
search_query = address
elif location:
search_query = location
else:
print(f"⏭️ Skipping {title} - no location or address")
continue
print(f"πŸ” Geocoding: {title}")
print(f" Query: {search_query}")
# Call geocode function
geo_result = await geocode_address(search_query, location)
if geo_result.get("success"):
lat = geo_result.get("latitude")
lon = geo_result.get("longitude")
# Update the listing in the database
update_data = {
"latitude": lat,
"longitude": lon,
}
# If we didn't have an address, store the location as address
if not address and location:
update_data["address"] = location
result = await db.listings.update_one(
{"_id": ObjectId(listing_id)},
{"$set": update_data}
)
if result.modified_count > 0:
print(f" βœ… Updated: lat={lat}, lon={lon}")
updated_count += 1
else:
print(f" ⚠️ No change made")
else:
print(f" ❌ Geocoding failed: {geo_result.get('error', 'Unknown error')}")
failed_count += 1
# Small delay to respect Nominatim rate limits
await asyncio.sleep(1.1)
print(f"\n=== Summary ===")
print(f"βœ… Updated: {updated_count} listings")
print(f"❌ Failed: {failed_count} listings")
if __name__ == "__main__":
asyncio.run(backfill_geocoding())