# untitled://extract_products.py
import json
from bs4 import BeautifulSoup

# load your HTML document
with open("comicasa.html", "r", encoding="utf-8") as f:
    soup = BeautifulSoup(f, "html.parser")

products = []
for section in soup.select("div.list"):
    title_tag = section.select_one(".list__title")
    section_name = title_tag.get_text(strip=True) if title_tag else ""
    for row in section.select("[data-test-id='product-row-content']"):
        name_tag = row.select_one("[data-test-id='product-row-name__highlighter']")
        
        # Extraer el atributo "text" del span interno para el nombre
        inner_span = name_tag.find("span") if name_tag else None
        name = inner_span.get("text", "") if inner_span else ""
        
        # Extraer el precio desde el elemento específico de precio
        price_tag = row.select_one("[data-test-id='product-price-effective']")
        price = price_tag.get_text(strip=True) if price_tag else ""
        # Limpieza del precio: eliminar símbolo de euro y reemplazar coma por punto
        price = price.replace("€", "").strip()
        price = price.replace(",", ".")
        
        # Extraer descripción larga
        desc_tag = row.select_one("[data-test-id='product-row-description__highlighter']")
        if desc_tag:
            inner_desc_span = desc_tag.find("span")
            descripcionlarga = inner_desc_span.get("text", "") if inner_desc_span else ""
        else:
            descripcionlarga = ""
            
        img = row.select_one("img.product-row__picture")
        image_url = img["src"] if img and img.has_attr("src") else None

        products.append({
            "section": section_name,
            "name": name,
            "price": price,
            "image": image_url,
            "descripcionlarga": descripcionlarga
        })

# output as series of INSERT statements
inserts = []
for p in products:
    sec = p["section"].replace("'", "''")
    desc = p["name"].replace("'", "''")
    price = p["price"].replace("'", "''")
    img = (p["image"] or "").replace("'", "''")
    descrip_larga = p["descripcionlarga"].replace("'", "''")
    inserts.append(
        f"insert into articulos(seccion,descripcion,vrventapublico,urlfotoarticulo,descripcionlarga) "
        f"values('{sec}','{desc}','{price}','{img}','{descrip_larga}')"
    )
# incluir borrado previo de datos
prefix = "delete from articulos;\ndelete from secciones;\n"
# instrucciones de inserción de artículos
body = ";\n".join(inserts) + ";"
# agregar al final las sentencias para secciones
suffix = (
    "\ninsert into secciones(seccion) select DISTINCT(seccion) from articulos;\n"
    "UPDATE articulos\n"
    "JOIN secciones ON articulos.seccion = secciones.seccion\n"
    "SET articulos.idseccion = secciones.id;\n"
    "update secciones set saleentiendavirtual=1;\n"
    "select * from secciones;"
)
sql_output = prefix + body + suffix
with open("sentencias.txt", "w", encoding="utf-8") as f:
    f.write(sql_output)