import os
import re
import time
import io
import base64
import sqlite3
import requests
from pathlib import Path
from hashlib import md5
from PIL import Image

DB_NAME = "recommender.db"
HEADERS = {
    'User-Agent': 'CulturalHeritageBot/1.0 (contact@example.org)',
    'Accept': 'application/sparql-results+json'
}

CITIES = {
    "tokyo": "Q1490",
    "berlin": "Q64",
    "rio": "Q8678",
    "rome": "Q220"
}

def init_db():
    conn = sqlite3.connect(DB_NAME)
    cursor = conn.cursor()
    
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS city (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        name TEXT UNIQUE NOT NULL
    )""")
    
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS category (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        name TEXT UNIQUE NOT NULL
    )""")
    
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS place (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        city_id INTEGER,
        name TEXT NOT NULL,
        description TEXT,
        image_base64 TEXT,
        FOREIGN KEY (city_id) REFERENCES city(id)
    )""")
    
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS place_category (
        place_id INTEGER,
        category_id INTEGER,
        PRIMARY KEY (place_id, category_id),
        FOREIGN KEY (place_id) REFERENCES place(id),
        FOREIGN KEY (category_id) REFERENCES category(id)
    )""")
    
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS place_link (
        source_id INTEGER,
        target_id INTEGER,
        PRIMARY KEY (source_id, target_id),
        FOREIGN KEY (source_id) REFERENCES place(id),
        FOREIGN KEY (target_id) REFERENCES place(id)
    )""")

    cursor.execute("""
    CREATE TABLE IF NOT EXISTS similar_places (
        main_place_id INTEGER,
        similar_place_id INTEGER,
        sim_score REAL,
        strategy TEXT, 
        PRIMARY KEY (main_place_id, similar_place_id, strategy)
    )""")
    
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS pagerank (
        place_id INTEGER PRIMARY KEY,
        score REAL,
        FOREIGN KEY (place_id) REFERENCES place(id)
    )""")
    
    conn.commit()
    conn.close()

def get_city_item_ids(city_qid):
    url = 'https://query.wikidata.org/sparql'
    query = f"""
    SELECT DISTINCT ?item WHERE {{
      ?item wdt:P131+ wd:{city_qid} .
      VALUES ?targetClass {{ wd:Q33506 wd:Q4989906 wd:Q179700 wd:Q570116 }}
      ?item wdt:P31/wdt:P279* ?targetClass .
      ?item wdt:P18 ?image . 
    }}
    LIMIT 80
    """
    
    # Retry mechanism for SPARQL endpoint timeouts
    max_retries = 3
    for attempt in range(max_retries):
        try:
            res = requests.post(url, data={'query': query}, headers=HEADERS, timeout=20)
            res.raise_for_status()
            return [b['item']['value'].split('/')[-1] for b in res.json()['results']['bindings']]
        except (requests.exceptions.RequestException, KeyError) as e:
            print(f"Attempt {attempt + 1}/{max_retries} failed for QID {city_qid}: {e}")
            if attempt < max_retries - 1:
                time.sleep(5)  # Wait 5 seconds before trying again
            else:
                print(f"Max retries reached. Skipping SPARQL extraction for QID {city_qid}.")
                return []

def fetch_and_store_city(city_name, qid):
    conn = sqlite3.connect(DB_NAME)
    cursor = conn.cursor()
    
    cursor.execute("INSERT OR IGNORE INTO city (name) VALUES (?)", (city_name.title(),))
    cursor.execute("SELECT id FROM city WHERE name = ?", (city_name.title(),))
    city_id = cursor.fetchone()[0]
    
    qids = get_city_item_ids(qid)
    count = 0
    
    for idx, qid in enumerate(qids):
        if count >= 55: 
            break
        try:
            time.sleep(0.05)
            res = requests.get(f'https://www.wikidata.org/wiki/Special:EntityData/{qid}.json', headers=HEADERS, timeout=10)
            entity = res.json()['entities'][qid]
            
            name = entity.get('labels', {}).get('en', {}).get('value')
            if not name: continue
                
            claims = entity.get('claims', {})
            if 'P18' not in claims: continue
                
            img_filename = claims['P18'][0]['mainsnak']['datavalue']['value'].replace(' ', '_')
            digest = md5(img_filename.encode('utf-8')).hexdigest()
            img_url = f"https://upload.wikimedia.org/wikipedia/commons/{digest[0]}/{digest[0:2]}/{img_filename}"
            
            img_res = requests.get(img_url, headers=HEADERS, timeout=10)
            if img_res.status_code != 200: continue
            
            # Downscale Image Section
            img = Image.open(io.BytesIO(img_res.content))
            img.thumbnail((400, 400))  # Scales down while maintaining aspect ratio smoothly
            
            buffer = io.BytesIO()
            img.convert("RGB").save(buffer, format="JPEG", quality=85)
            b64_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
            
            desc = entity.get('descriptions', {}).get('en', {}).get('value', "A beautiful place of historical interest.")
            
            cursor.execute("INSERT INTO place (city_id, name, description, image_base64) VALUES (?, ?, ?, ?)",
                           (city_id, name, desc, b64_str))
            place_id = cursor.lastrowid
            
            if 'P31' in claims:
                for stmt in claims['P31']:
                    try:
                        cat_qid = stmt['mainsnak']['datavalue']['value']['id']
                        cursor.execute("INSERT OR IGNORE INTO category (name) VALUES (?)", (cat_qid,))
                        cursor.execute("SELECT id FROM category WHERE name = ?", (cat_qid,))
                        cat_id = cursor.fetchone()[0]
                        cursor.execute("INSERT OR IGNORE INTO place_category (place_id, category_id) VALUES (?, ?)", (place_id, cat_id))
                    except: continue
            
            count += 1
            print(f"[{city_name.title()}] Ingested {count}: {name}")
        except Exception:
            continue
            
    conn.commit()
    conn.close()

def inject_mock_structural_links():
    conn = sqlite3.connect(DB_NAME)
    cursor = conn.cursor()
    cursor.execute("SELECT id, city_id FROM place")
    places = cursor.fetchall()
    
    for src_id, c_id in places:
        cursor.execute("SELECT id FROM place WHERE city_id = ? AND id != ?", (c_id, src_id))
        siblings = [r[0] for r in cursor.fetchall()]
        import random
        for target in random.sample(siblings, min(len(siblings), 4)):
            cursor.execute("INSERT OR IGNORE INTO place_link (source_id, target_id) VALUES (?, ?)", (src_id, target))
            
    conn.commit()
    conn.close()

if __name__ == "__main__":
    print("Initializing Database structure...")
    init_db()
    for name, qid in CITIES.items():
        print(f"Starting data pipeline extraction for: {name}")
        fetch_and_store_city(name, qid)
    print("Interlinking structural dataset paths...")
    inject_mock_structural_links()
    print("Database data harvesting cycle complete.")