File size: 3,137 Bytes
5a6c225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""
Backfill geocoding for existing listings
This script geocodes addresses for listings that have an address but no coordinates
"""
import asyncio
from app.database import connect_db, get_db
from app.ai.tools.listing_tool import geocode_address
from bson import ObjectId

async def backfill_geocoding():
    # Initialize database connection
    await connect_db()
    db = await get_db()
    
    # Find listings that have a location but no latitude/longitude
    query = {
        "$or": [
            # Has address but no coordinates
            {"address": {"$ne": None}, "latitude": None},
            {"address": {"$ne": None}, "latitude": {"$exists": False}},
            # Has location but no coordinates (for city-level geocoding)
            {"location": {"$ne": None}, "latitude": None},
            {"location": {"$ne": None}, "latitude": {"$exists": False}},
        ]
    }
    
    cursor = db.listings.find(query)
    
    print("=== Backfilling Geocoding for Existing Listings ===\n")
    
    updated_count = 0
    failed_count = 0
    
    async for doc in cursor:
        listing_id = str(doc.get("_id"))
        title = doc.get("title", "No title")[:40]
        location = doc.get("location")
        address = doc.get("address")
        
        # Build search query - prefer address if available
        if address and location:
            search_query = f"{address}, {location}"
        elif address:
            search_query = address
        elif location:
            search_query = location
        else:
            print(f"⏭️  Skipping {title} - no location or address")
            continue
        
        print(f"πŸ” Geocoding: {title}")
        print(f"   Query: {search_query}")
        
        # Call geocode function
        geo_result = await geocode_address(search_query, location)
        
        if geo_result.get("success"):
            lat = geo_result.get("latitude")
            lon = geo_result.get("longitude")
            
            # Update the listing in the database
            update_data = {
                "latitude": lat,
                "longitude": lon,
            }
            
            # If we didn't have an address, store the location as address
            if not address and location:
                update_data["address"] = location
            
            result = await db.listings.update_one(
                {"_id": ObjectId(listing_id)},
                {"$set": update_data}
            )
            
            if result.modified_count > 0:
                print(f"   βœ… Updated: lat={lat}, lon={lon}")
                updated_count += 1
            else:
                print(f"   ⚠️ No change made")
        else:
            print(f"   ❌ Geocoding failed: {geo_result.get('error', 'Unknown error')}")
            failed_count += 1
        
        # Small delay to respect Nominatim rate limits
        await asyncio.sleep(1.1)
    
    print(f"\n=== Summary ===")
    print(f"βœ… Updated: {updated_count} listings")
    print(f"❌ Failed: {failed_count} listings")

if __name__ == "__main__":
    asyncio.run(backfill_geocoding())