Spaces:
Running
Running
File size: 3,137 Bytes
5a6c225 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
"""
Backfill geocoding for existing listings
This script geocodes addresses for listings that have an address but no coordinates
"""
import asyncio
from app.database import connect_db, get_db
from app.ai.tools.listing_tool import geocode_address
from bson import ObjectId
async def backfill_geocoding():
# Initialize database connection
await connect_db()
db = await get_db()
# Find listings that have a location but no latitude/longitude
query = {
"$or": [
# Has address but no coordinates
{"address": {"$ne": None}, "latitude": None},
{"address": {"$ne": None}, "latitude": {"$exists": False}},
# Has location but no coordinates (for city-level geocoding)
{"location": {"$ne": None}, "latitude": None},
{"location": {"$ne": None}, "latitude": {"$exists": False}},
]
}
cursor = db.listings.find(query)
print("=== Backfilling Geocoding for Existing Listings ===\n")
updated_count = 0
failed_count = 0
async for doc in cursor:
listing_id = str(doc.get("_id"))
title = doc.get("title", "No title")[:40]
location = doc.get("location")
address = doc.get("address")
# Build search query - prefer address if available
if address and location:
search_query = f"{address}, {location}"
elif address:
search_query = address
elif location:
search_query = location
else:
print(f"βοΈ Skipping {title} - no location or address")
continue
print(f"π Geocoding: {title}")
print(f" Query: {search_query}")
# Call geocode function
geo_result = await geocode_address(search_query, location)
if geo_result.get("success"):
lat = geo_result.get("latitude")
lon = geo_result.get("longitude")
# Update the listing in the database
update_data = {
"latitude": lat,
"longitude": lon,
}
# If we didn't have an address, store the location as address
if not address and location:
update_data["address"] = location
result = await db.listings.update_one(
{"_id": ObjectId(listing_id)},
{"$set": update_data}
)
if result.modified_count > 0:
print(f" β
Updated: lat={lat}, lon={lon}")
updated_count += 1
else:
print(f" β οΈ No change made")
else:
print(f" β Geocoding failed: {geo_result.get('error', 'Unknown error')}")
failed_count += 1
# Small delay to respect Nominatim rate limits
await asyncio.sleep(1.1)
print(f"\n=== Summary ===")
print(f"β
Updated: {updated_count} listings")
print(f"β Failed: {failed_count} listings")
if __name__ == "__main__":
asyncio.run(backfill_geocoding())
|