tcg_web_scraper/product_scraping/product_scraper.py

import pandas as pd
from openpyxl import load_workbook, Workbook
from openpyxl.worksheet.worksheet import Worksheet
import requests
import re
import time
import random
from playwright.sync_api import sync_playwright, Browser, Page
from playwright.async_api import async_playwright
import asyncio
from aioconsole import ainput
from collections import defaultdict
from datetime import datetime, timedelta
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import subprocess
import os

CYCLE_INTERVAL_HOURS = 4


class Email_Notifier:
    def __init__(self, sender_email, sender_password, recipient_email, smtp_host='smtp.gmail.com', smtp_port=587):
        self.sender_email = sender_email
        self.sender_password = sender_password
        self.recipient_email = recipient_email
        self.smtp_host = smtp_host
        self.smtp_port = smtp_port

    def send_email(self, subject, body_html):
        msg = MIMEMultipart('alternative')
        msg['From'] = self.sender_email
        msg['To'] = self.recipient_email
        msg['Subject'] = subject
        msg.attach(MIMEText(body_html, 'html'))
        try:
            if self.smtp_port == 465:
                with smtplib.SMTP_SSL(self.smtp_host, self.smtp_port) as server:
                    server.login(self.sender_email, self.sender_password)
                    server.send_message(msg)
            else:
                with smtplib.SMTP(self.smtp_host, self.smtp_port) as server:
                    server.starttls()
                    server.login(self.sender_email, self.sender_password)
                    server.send_message(msg)
            print(f"Email sent: {subject}")
            return True
        except Exception as e:
            print(f"Error sending email: {e}")
            return False


class Profitability_Monitor:
    MTG_SET_WORKSHEET_NAME = 'MTG Set'
    MTG_SET_DATA_START_ROW = 3  # Row 1 is a merged group header, row 2 has column names

    # MTG Set sheet column indices (1-based)
    COL_MTG_SET_NAME = 2
    COL_PLAY_PROFIT = 45
    COL_PLAY_MARGIN = 46
    COL_COLLECTOR_PROFIT = 53
    COL_COLLECTOR_MARGIN = 54
    COL_PLAY_SINGLES_PROFIT = 59
    COL_PLAY_SINGLES_MARGIN = 60
    COL_COLLECTOR_SINGLES_PROFIT = 73
    COL_COLLECTOR_SINGLES_MARGIN = 74

    # Product sheet column indices (1-based) for precons
    COL_PRODUCT_NAME = 2
    COL_PRODUCT_IS_PRECON = 7
    COL_PRODUCT_MIN_COST = 8
    COL_PRODUCT_PROFIT = 10

    PROFIT_CHECKS = [
        {'profit_col': COL_PLAY_PROFIT,              'margin_col': COL_PLAY_MARGIN,              'action_buy': 'Buy Play Booster',       'action_no_buy': 'DO NOT Buy Play Booster'},
        {'profit_col': COL_COLLECTOR_PROFIT,         'margin_col': COL_COLLECTOR_MARGIN,         'action_buy': 'Buy Collector Booster',  'action_no_buy': 'DO NOT Buy Collector Booster'},
        {'profit_col': COL_PLAY_SINGLES_PROFIT,      'margin_col': COL_PLAY_SINGLES_MARGIN,      'action_buy': 'Split Play Booster',     'action_no_buy': 'DO NOT Split Play Booster'},
        {'profit_col': COL_COLLECTOR_SINGLES_PROFIT, 'margin_col': COL_COLLECTOR_SINGLES_MARGIN, 'action_buy': 'Split Collector Booster','action_no_buy': 'DO NOT Split Collector Booster'},
    ]

    def read_states(self, workbook_path):
        """Load the workbook with data_only=True to read formula-calculated profit values."""
        wb = load_workbook(workbook_path, data_only=True)
        mtg_set_states = {}
        precon_states = {}

        mtg_sheet = wb[self.MTG_SET_WORKSHEET_NAME]
        for row in range(self.MTG_SET_DATA_START_ROW, mtg_sheet.max_row + 1):
            set_name = mtg_sheet.cell(row, self.COL_MTG_SET_NAME).value
            if not set_name:
                continue
            set_data = {}
            for check in self.PROFIT_CHECKS:
                profit = mtg_sheet.cell(row, check['profit_col']).value
                margin = mtg_sheet.cell(row, check['margin_col']).value
                set_data[check['profit_col']] = {
                    'profit': profit,
                    'margin': margin,
                    'is_profitable': isinstance(profit, (int, float)) and profit > 0,
                }
            mtg_set_states[set_name] = set_data

        prod_sheet = wb['Product']
        for row in range(2, prod_sheet.max_row + 1):
            is_precon = prod_sheet.cell(row, self.COL_PRODUCT_IS_PRECON).value
            if not is_precon:
                continue
            name = prod_sheet.cell(row, self.COL_PRODUCT_NAME).value
            profit = prod_sheet.cell(row, self.COL_PRODUCT_PROFIT).value
            min_cost = prod_sheet.cell(row, self.COL_PRODUCT_MIN_COST).value
            if not name:
                continue
            margin = (profit / min_cost) if (isinstance(profit, (int, float)) and isinstance(min_cost, (int, float)) and min_cost != 0) else None
            precon_states[name] = {
                'profit': profit,
                'margin': margin,
                'is_profitable': isinstance(profit, (int, float)) and profit > 0,
            }

        wb.close()
        return {'mtg_set': mtg_set_states, 'precon': precon_states}

    def find_changes(self, old_states, new_states):
        """Compare old and new profit states; return list of alert dicts for any crossings of the 0 threshold."""
        alerts = []

        for check in self.PROFIT_CHECKS:
            col = check['profit_col']
            for set_name, new_set_data in new_states['mtg_set'].items():
                new_entry = new_set_data.get(col, {})
                old_entry = old_states.get('mtg_set', {}).get(set_name, {}).get(col, {})
                old_profitable = old_entry.get('is_profitable', None)
                new_profitable = new_entry.get('is_profitable', False)
                if old_profitable is None or old_profitable == new_profitable:
                    continue
                action = check['action_buy'] if new_profitable else check['action_no_buy']
                margin = new_entry.get('margin')
                margin_str = f"{margin * 100:.1f}%" if isinstance(margin, (int, float)) else "N/A"
                alerts.append({'name': set_name, 'action': action, 'margin': margin_str})

        for product_name, new_entry in new_states['precon'].items():
            old_entry = old_states.get('precon', {}).get(product_name, {})
            old_profitable = old_entry.get('is_profitable', None)
            new_profitable = new_entry.get('is_profitable', False)
            if old_profitable is None or old_profitable == new_profitable:
                continue
            action = 'Buy Precon' if new_profitable else 'DO NOT Buy Precon'
            margin = new_entry.get('margin')
            margin_str = f"{margin * 100:.1f}%" if isinstance(margin, (int, float)) else "N/A"
            alerts.append({'name': product_name, 'action': action, 'margin': margin_str})

        return alerts

    def format_email_html(self, alerts):
        rows = ''.join(
            f"<tr><td style='padding:6px 12px'>{a['name']}</td>"
            f"<td style='padding:6px 12px'>{a['action']}</td>"
            f"<td style='padding:6px 12px;text-align:right'>{a['margin']}</td></tr>"
            for a in alerts
        )
        return (
            "<html><body>"
            "<h2 style='font-family:sans-serif'>TCG Profitability Alert</h2>"
            "<table border='1' cellpadding='0' cellspacing='0' style='border-collapse:collapse;font-family:sans-serif'>"
            "<tr style='background:#ddd'>"
            "<th style='padding:6px 12px'>MTG Set</th>"
            "<th style='padding:6px 12px'>Action</th>"
            "<th style='padding:6px 12px'>Margin</th>"
            "</tr>"
            f"{rows}"
            "</table>"
            f"<p style='font-family:sans-serif;color:#666'><small>Generated {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</small></p>"
            "</body></html>"
        )

class Product_Scraper:
    domain: str
    page: Page

    def __init__(self, domain):
        print("Setting up browser automation")
        self.domain = domain

    @staticmethod
    def parse_cost(cost_text):
        if not cost_text:
            return None
        cost_clean = re.sub(r'[^\d,]', '', cost_text)
        try:
            return float(cost_clean) / 100
        except ValueError:
            return None
    @classmethod
    def parse_cost_from_pennies(cls, cost_text):
        if not cost_text:
            return None
        cost_clean = cls.parse_cost(cost_text = cost_text)
        if cost_clean is not None:
            cost_clean = cost_clean / 100
        return cost_clean

    @classmethod
    def parse_cost_chaoscards(cls, cost_text):
        return cls.parse_cost(cost_text = cost_text)
    @classmethod
    def parse_cost_cardmarket(cls, cost_text):
        """Convert '141,30 €' format to float in EUR"""
        if not cost_text:
            return None
        cost_clean = re.sub(r'[^\d,]', '', cost_text)
        cost_clean = cost_clean.replace(',', '.')
        try:
            return float(cost_clean)
        except ValueError:
            return None
    @classmethod
    def parse_cost_gameslore(cls, cost_text):
        return cls.parse_cost(cost_text = cost_text)
    @classmethod
    def parse_cost_magicmadhouse(cls, cost_text):
        return cls.parse_cost(cost_text = cost_text)
    @classmethod
    def parse_cost_newrealitiesgaming(cls, cost_text):
        return cls.parse_cost(cost_text = cost_text)

    async def scrape_cost_and_active_playwright(self, browser: Browser, url, page_load_element_selector, cost_selector, active_selector, invalid_active_statuses):
        print(f"  Loading page...")
        self.page = await browser.new_page()
        cost = None
        active = None
        try:
            await self.page.goto(url=url, wait_until="domcontentloaded", timeout=30000)
            await asyncio.sleep(random.uniform(20, 25))
            element = self.page.locator(selector = page_load_element_selector)
            page_title = await self.page.title()
            print(f"  Page title: {page_title}")

            element = self.page.locator(selector = cost_selector)
            text = await element.text_content()
            print(f"    Text: '{text}'")
            cost = text

            active = None
            if active_selector is None:
                active = (cost is not None)
            else:
                try:
                    elements = await self.page.query_selector_all(selector = active_selector)
                    print(f'# active elements: {len(elements)}')
                    if len(elements) == 0:
                        active = True
                    else:
                        text = await elements[0].text_content()
                        text = text.strip()
                        print(f"    Text: '{text}'")
                        active = (invalid_active_statuses is None or text not in invalid_active_statuses)
                except Exception as e:
                    print(f"  Selector failed: {e}")

            if cost is None or active is None:
                print(f"  ✗ No cost found")
                # await ainput("Press Enter to continue to next URL...")
            print(f"Cost: {cost}, Active: {active}")

        except Exception as e:
            print(f"  Error: {e}")
            # await ainput("Press Enter to continue to next URL...")
            return None, None
        finally:
            await self.page.close()
            return cost, active

    async def scrape_cost_and_active_playwright_cardmarket(self, browser, url, eur_to_gbp_rate):
        page_load_element_selector = "body > main.container > div.page-title-container"
        cost_selector = '#table > div:nth-child(1) > div.table-body > .row.article-row.g-0:nth-child(1) > div.col-offer > div.price-container > div > div:nth-child(1) > span:nth-child(1)'
        cost_text, active = await self.scrape_cost_and_active_playwright(
              browser = browser
            , url = url
            , page_load_element_selector = page_load_element_selector
            , cost_selector = cost_selector
            , active_selector = None
            , invalid_active_statuses = []
        )
        cost = Product_Scraper.parse_cost_cardmarket(cost_text = cost_text)
        if cost is not None:
            item_shipping_cost_in = 0
            if cost < 10:
                item_shipping_cost_in = 2
            elif cost < 100:
                item_shipping_cost_in = 8
            else:
                item_shipping_cost_in = 20
            cost = cost * eur_to_gbp_rate + item_shipping_cost_in
        active = (cost is not None)
        return cost, active

    async def scrape_cost_and_active_playwright_chaoscards(self, browser, url):
        cost_selector = '.price_inc > span:nth-child(2)'
        active_selector = '.product__right > form > ul.prod_det_fields.left.product-section.product-section--stock > li.prod_det_stock > div:nth-child(1) > div:nth-child(2)'
        cost_text, active = await self.scrape_cost_and_active_playwright(
              browser = browser
            , url = url
            , page_load_element_selector = cost_selector
            , cost_selector = cost_selector
            , active_selector = active_selector
            , invalid_active_statuses = ["Out of stock", "Coming soon"]
        )
        cost = Product_Scraper.parse_cost_chaoscards(cost_text = cost_text)
        return cost, active

    async def scrape_cost_and_active_playwright_gameslore(self, browser, url):
        cost_selector = 'div.columns > div.column.main > div.product-info-main > div.product-info-price > div.price-box > span.special-price > span.price-container > span.price-wrapper > span.price'
        active_selector = '.stock > span:nth-child(1)'
        cost_text, active = await self.scrape_cost_and_active_playwright(
              browser = browser
            , url = url
            , page_load_element_selector = cost_selector
            , cost_selector = cost_selector
            , active_selector = active_selector
            , invalid_active_statuses = ["OUT OF STOCK"]
        )
        cost = Product_Scraper.parse_cost_gameslore(cost_text = cost_text)
        return cost, active

    async def scrape_cost_and_active_playwright_magicmadhouse(self, browser, url):
        page_load_element_selector = '.productView-title'
        cost_selector = 'div.body > div.container > div > div.productView > section.productView-details > div.productView-options > form > div.productView-options-selections > div.productView-product > div.productView-info > div.price-rating > div.productView-price > div.price-section.actual-price > span.price'
        active_selector = '.alertBox.alertBox--error'
        cost_text, active = await self.scrape_cost_and_active_playwright(
              browser = browser
            , url = url
            , page_load_element_selector = page_load_element_selector
            , cost_selector = cost_selector
            , active_selector = active_selector
            , invalid_active_statuses = []
        )
        cost = Product_Scraper.parse_cost_magicmadhouse(cost_text = cost_text)
        return cost, active

    async def scrape_cost_and_active_playwright_newrealitiesgaming(self, browser, url):
        button_selector = 'div.display-desktop.add-to-cart-button__wrapper div.w-wrapper form button'
        page_load_element_selector = button_selector
        cost_selector = f'{button_selector} span:nth-child(2)'
        active_selector = f'{button_selector} span:nth-child(1)'
        cost_text, active = await self.scrape_cost_and_active_playwright(
              browser = browser
            , url = url
            , page_load_element_selector = page_load_element_selector
            , cost_selector = cost_selector
            , active_selector = active_selector
            , invalid_active_statuses = ['Out of stock']
        )
        cost = Product_Scraper.parse_cost_magicmadhouse(cost_text = cost_text)
        return cost, active

    async def scrape_prices_and_quantities_playwright_cardmarket(self, browser: Browser, url, eur_to_gbp_rate):
        offer_container_selector = '#table > div:nth-child(1) > div.table-body > .row.article-row.g-0:nth-child(1) > div.col-offer'
        price_selector = 'div.price-container > div > div:nth-child(1) > span:nth-child(1)'
        quantity_selector = 'div.amount-container > span:nth-child(1)'

        print(f"  Loading page...")
        self.page = await browser.new_page()
        try:
            await self.page.goto(url=url, wait_until="domcontentloaded", timeout=30000)
            await asyncio.sleep(random.uniform(20, 25))
            page_title = await self.page.title()
            print(f"  Page title: {page_title}")

            price_quantity_pairs = []
            try:
                offer_containers = await self.page.query_selector_all(offer_container_selector)
                print(f"  Offer container selector: Found {len(offer_containers)} elements")
                for offer_container in offer_containers:
                    price_element = await offer_container.query_selector(price_selector)
                    price_text = await price_element.text_content()
                    if '€' in price_text and re.search(r'\d', price_text):
                        print(f"  ✓ Found price: {price_text}")
                    else:
                        price_text = None

                    quantity_element = await offer_container.query_selector(quantity_selector)
                    quantity_text = await quantity_element.text_content()

                    if price_text is None or quantity_text is None:
                        continue
                    price_quantity_pairs.append({
                        'price': Product_Scraper.parse_cost_cardmarket(cost_text = price_text) * eur_to_gbp_rate
                        , 'quantity': Product_Scraper.parse_cost_cardmarket(cost_text = quantity_text)
                    })
            except Exception as e:
                print(f"  Price selector failed: {e}")
                # await ainput("Press enter to continue to next URL...")
                return []
        finally:
            await self.page.close()
            return price_quantity_pairs


class TCG_Sole_Trader_Workbook_Container:
    NAME_COLUMN_ACTIVE: str = 'Active'
    NAME_COLUMN_INDEX_ROW: str = 'Index Row'
    NAME_COLUMN_LINK: str = 'Link'
    NAME_COLUMN_PRODUCT_ID: str = 'Product Id'
    NAME_COLUMN_PRODUCT_IS_BOOSTER: str = 'Product Is Booster'
    NAME_COLUMN_PRODUCT_IS_BOOSTER_BOX: str = 'Product Is Booster Box'
    NAME_COLUMN_PRODUCT_IS_PRECON: str = 'Product Is Precon'
    NAME_COLUMN_SOURCE_NAME: str = 'Source Name'
    NAME_COLUMN_UNIT_COST: str = 'Cost'
    NAME_COLUMN_UNIT_PRICE: str = 'Price'
    PRODUCT_WORKSHEET_NAME = 'Product'
    SOURCING_WORKSHEET_NAME = 'Sourcing'
    WORKBOOK_NAME = 'TCG Sole Trader Copy.xlsx'

    index_column_active_sourcing: int
    index_column_is_booster_product: int
    index_column_is_booster_box_product: int
    index_column_is_precon_product: int
    index_column_link_sourcing: int
    index_column_name_sourcing: int
    index_column_product_id_product: int
    index_column_product_id_sourcing: int
    index_column_unit_cost_sourcing: int
    index_column_unit_price_sourcing: int
    index_row_header_product: int
    index_row_header_sourcing: int
    product_sheet: Worksheet
    sourcing_sheet: Worksheet
    workbook: Workbook

    def __init__(self):
        print("Loading workbook...")
        self.workbook = load_workbook(self.WORKBOOK_NAME)

        if self.SOURCING_WORKSHEET_NAME not in self.workbook.sheetnames:
            print(f"Error: Sheet '{self.SOURCING_WORKSHEET_NAME}' not found")
            return
        if self.PRODUCT_WORKSHEET_NAME not in self.workbook.sheetnames:
            print(f"Error: Sheet '{self.PRODUCT_WORKSHEET_NAME}' not found")
            return

        self.sourcing_sheet = self.workbook[self.SOURCING_WORKSHEET_NAME]
        self.product_sheet = self.workbook[self.PRODUCT_WORKSHEET_NAME]

        sourcing_table_found = False
        for row in range(1, self.sourcing_sheet.max_row + 1):
            if self.sourcing_sheet.cell(row, 1).value == 'tbl_Sourcing' or 'Source Name' in str(self.sourcing_sheet.cell(row, 3).value):
                self.index_row_header_sourcing = row
                sourcing_table_found = True
                break

        if not sourcing_table_found or not self.index_row_header_sourcing:
            for row in range(1, min(20, self.sourcing_sheet.max_row + 1)):
                if 'Source Name' in str(self.sourcing_sheet.cell(row, 3).value):
                    self.index_row_header_sourcing = row
                    sourcing_table_found = True
                    break

        if not sourcing_table_found:
            print("Error: Could not find table 'tbl_Sourcing'")
            return

        product_table_found = False
        for row in range(1, self.product_sheet.max_row + 1):
            if self.product_sheet.cell(row, 1).value == 'tbl_Product' or 'Product Id' in str(self.product_sheet.cell(row, 1).value):
                self.index_row_header_product = row
                product_table_found = True
                break

        if not product_table_found:
            print("Error: Could not find table 'tbl_Product'")
            return

        for index_column in range(1, self.sourcing_sheet.max_column + 1):
            header = str(self.sourcing_sheet.cell(self.index_row_header_sourcing, index_column).value).strip()
            if 'Source Name' == header:
                self.index_column_name_sourcing = index_column
            elif 'Source Link' == header:
                self.index_column_link_sourcing = index_column
            elif 'Source Unit Cost' == header:
                self.index_column_unit_cost_sourcing = index_column
            elif 'Sale Price' == header:
                self.index_column_unit_price_sourcing = index_column
            elif 'Active' == header:
                self.index_column_active_sourcing = index_column
            elif 'Product Id' == header:
                self.index_column_product_id_sourcing = index_column

        for index_column in range(1, self.product_sheet.max_column + 1):
            header = str(self.product_sheet.cell(self.index_row_header_product, index_column).value).strip()
            if 'Is Booster Box' == header:
                self.index_column_is_booster_box_product = index_column
            elif 'Is Booster' == header:
                self.index_column_is_booster_product = index_column
            elif 'Is Precon' == header:
                self.index_column_is_precon_product = index_column
            elif 'Product Id' == header:
                self.index_column_product_id_product = index_column

        print(f"Sourcing max row: {self.sourcing_sheet.max_row}")
        print(f"Sourcing header row: {self.index_row_header_sourcing}")
        print(f"Sourcing header 1: {self.sourcing_sheet.cell(self.index_row_header_sourcing, 1).value}")
        print(f"Sourcing Columns - Name: {self.index_column_name_sourcing}, Link: {self.index_column_link_sourcing}, Unit Cost: {self.index_column_unit_cost_sourcing}, Sale price: {self.index_column_unit_price_sourcing}, Active: {self.index_column_active_sourcing}, Product Id: {self.index_column_product_id_sourcing}")
        print(f"Product max row: {self.product_sheet.max_row}")
        print(f"Product header row: {self.index_row_header_product}")
        print(f"Sourcing header 1: {self.product_sheet.cell(self.index_row_header_product, 1).value}")
        print(f"Product Columns - Id: {self.index_column_product_id_product}, Is Booster: {self.index_column_is_booster_product}, Is Booster Box: {self.index_column_is_booster_box_product}, Is Precon: {self.index_column_is_precon_product}")

        if not all([
              self.index_column_name_sourcing
            , self.index_column_link_sourcing
            , self.index_column_unit_cost_sourcing
            , self.index_column_unit_price_sourcing
            , self.index_column_product_id_sourcing
            , self.index_column_active_sourcing
            , self.index_column_product_id_product
            , self.index_column_is_booster_product
            , self.index_column_is_booster_box_product
            , self.index_column_is_precon_product
        ]):
            print("Error: Could not find required columns")
            return

    @classmethod
    def create_product_source_df(cls):
        return pd.DataFrame(columns = [
              cls.NAME_COLUMN_INDEX_ROW
            , cls.NAME_COLUMN_PRODUCT_ID
            , cls.NAME_COLUMN_SOURCE_NAME
            , cls.NAME_COLUMN_LINK
            , cls.NAME_COLUMN_PRODUCT_IS_BOOSTER
            , cls.NAME_COLUMN_UNIT_COST
            , cls.NAME_COLUMN_UNIT_PRICE
            , cls.NAME_COLUMN_ACTIVE
        ])

    def get_sourcing_entries(self):
        product_sources = self.create_product_source_df()
        try:
            for index_row in range(self.index_row_header_sourcing + 1, self.sourcing_sheet.max_row + 1):
                source_name = self.sourcing_sheet.cell(index_row, self.index_column_name_sourcing).value
                source_link = self.sourcing_sheet.cell(index_row, self.index_column_link_sourcing).value
                source_product_id = self.sourcing_sheet.cell(index_row, self.index_column_product_id_sourcing).value
                if not source_name or not source_link:
                    continue
                print(f"found source: {source_name} - product: {source_product_id} - link: {source_link}")
                product_is_booster = False
                for product_row in range(self.index_row_header_product + 1, self.product_sheet.max_row + 1):
                    product_id = self.product_sheet.cell(product_row, self.index_column_product_id_product).value
                    if product_id == source_product_id:
                        product_is_booster_text = str(self.product_sheet.cell(product_row, self.index_column_is_booster_product).value).upper()
                        product_is_booster = (product_is_booster_text == "TRUE")
                        break
                print(f"product is booster: {product_is_booster}")

                product_sources.loc[len(product_sources)] = [
                    index_row
                    , source_product_id
                    , source_name
                    , source_link
                    , product_is_booster
                    , None # cost
                    , None # price
                    , None # active
                ]
        except Exception as e:
            print(f"Error: {e}")
        product_sources.sort_values(self.NAME_COLUMN_SOURCE_NAME)
        return product_sources

    def clear_row_sourcing_sheet(self, index_row):
        self.sourcing_sheet.cell(index_row, self.index_column_unit_cost_sourcing).value = None
        self.sourcing_sheet.cell(index_row, self.index_column_active_sourcing).value = "FALSE"

    def update_row_sourcing_sheet(self, index_row, unit_cost = None, unit_price = None, active = None):
        if unit_cost is not None:
            self.sourcing_sheet.cell(index_row, self.index_column_unit_cost_sourcing).value = unit_cost
        if unit_price is not None:
            self.sourcing_sheet.cell(index_row, self.index_column_unit_price_sourcing).value = unit_price
        if active is not None:
            self.sourcing_sheet.cell(index_row, self.index_column_active_sourcing).value = "TRUE" if active else "FALSE"

    def save_workbook(self):
        print(f"\n{'='*60}")
        print(f"Saving workbook...")
        self.workbook.save(self.WORKBOOK_NAME)


class Cost_Fetcher:
    ACCESSED_LAST_ON_FLAG: str = 'Accessed Last On'
    ACTIVE_FLAG: str = 'Active'
    COST_FLAG: str = 'Cost'
    DATA_FLAG: str = 'Data'
    ERROR_FLAG: str = 'Error'
    INDEX_DOMAIN_FLAG: str = 'Index Domain'
    INDEX_ROW_FLAG: str = 'Index Row'
    NAME_DOMAIN_CARD_MARKET: str = 'Card Market'
    NAME_DOMAIN_CHAOS_CARDS: str = 'Chaos Cards'
    NAME_DOMAIN_GAMES_LORE: str = 'Games Lore'
    NAME_DOMAIN_MAGIC_MADHOUSE: str = 'Magic Madhouse'
    NAME_DOMAIN_NEW_REALITIES_GAMING: str = 'New Realities Gaming'
    NAME_FLAG: str = 'Name'
    PRICE_FLAG: str = 'Price'
    SUCCESS_FLAG: str = 'Success'
    URL_FLAG: str = 'Url'

    domain_names: list[str]
    eur_to_gbp_rate: float
    product_scrapers: list[Product_Scraper]
    product_sources: pd.DataFrame
    workbook_container: TCG_Sole_Trader_Workbook_Container

    def __init__(self, email_notifier=None):
        self.email_notifier = email_notifier
        self.profitability_monitor = Profitability_Monitor()
        self.workbook_save_lock = asyncio.Lock()
        self.domain_names = [
              # self.NAME_DOMAIN_CARD_MARKET
              self.NAME_DOMAIN_CHAOS_CARDS
            , self.NAME_DOMAIN_GAMES_LORE
            , self.NAME_DOMAIN_MAGIC_MADHOUSE
            # , self.NAME_DOMAIN_NEW_REALITIES_GAMING
        ]
        self.domain_details = {
              self.NAME_DOMAIN_CHAOS_CARDS: {
                  self.NAME_FLAG: self.NAME_DOMAIN_CHAOS_CARDS
                , self.INDEX_DOMAIN_FLAG: self.get_index_domain_from_name(self.NAME_DOMAIN_CHAOS_CARDS)
                , self.ACCESSED_LAST_ON_FLAG: 0
            }
            , self.NAME_DOMAIN_GAMES_LORE: {
                  self.NAME_FLAG: self.NAME_DOMAIN_GAMES_LORE
                , self.INDEX_DOMAIN_FLAG: self.get_index_domain_from_name(self.NAME_DOMAIN_GAMES_LORE)
                , self.ACCESSED_LAST_ON_FLAG: 0
            }
            , self.NAME_DOMAIN_MAGIC_MADHOUSE: {
                  self.NAME_FLAG: self.NAME_DOMAIN_MAGIC_MADHOUSE
                , self.INDEX_DOMAIN_FLAG: self.get_index_domain_from_name(self.NAME_DOMAIN_MAGIC_MADHOUSE)
                , self.ACCESSED_LAST_ON_FLAG: 0
            }
        }
        """
            self.NAME_DOMAIN_CARD_MARKET: {
                self.NAME_FLAG: self.NAME_DOMAIN_CARD_MARKET
            , self.INDEX_DOMAIN_FLAG: self.get_index_domain_from_name(self.NAME_DOMAIN_CARD_MARKET)
            , self.ACCESSED_LAST_ON_FLAG: 0
        }
        """
        """
        , self.NAME_DOMAIN_NEW_REALITIES_GAMING: {
                self.NAME_FLAG: self.NAME_DOMAIN_NEW_REALITIES_GAMING
            , self.INDEX_DOMAIN_FLAG: self.get_index_domain_from_name(self.NAME_DOMAIN_NEW_REALITIES_GAMING)
            , self.ACCESSED_LAST_ON_FLAG: 0
        }
        """
        product_scrapers = []
        for index_domain in range(len(self.domain_names)):
            domain = self.domain_names[index_domain]
            product_scraper = Product_Scraper(domain)
            product_scrapers.append(product_scraper)
        self.product_scrapers = product_scrapers
        self.workbook_container = None
        self.eur_to_gbp_rate = 0.85

    def get_index_domain_from_name(self, domain_name):
        for index_domain in range(len(self.domain_names)):
            if (self.domain_names[index_domain] == domain_name):
                return index_domain
        raise ValueError(f'Domain does not exist: {domain_name}')

    def get_eur_to_gbp_rate(self):
        try:
            response = requests.get('https://api.exchangerate-api.com/v4/latest/EUR', timeout=10)
            data = response.json()
            self.eur_to_gbp_rate = data['rates']['GBP']
        except Exception as e:
            print(f"Error fetching exchange rate: {e}")
            print("Using fallback rate: 0.85")
            self.eur_to_gbp_rate = 0.85

    async def fetch_all(self):
        try:
            if self.email_notifier:
                sent = self.email_notifier.send_email(
                      subject=f"TCG Profitability Scanner Boot - {datetime.now().strftime('%Y-%m-%d %H:%M')}"
                    , body_html="<html><body><h2>Booted</h2></body></html>"
                )
                if sent:
                    print("Sent boot test email")
                else:
                    print("Error sending boot test email")
            # Reload workbook and exchange rate fresh each cycle
            self.get_eur_to_gbp_rate()
            self.workbook_container = TCG_Sole_Trader_Workbook_Container()
            self.product_sources = self.workbook_container.get_sourcing_entries()

            workbook_path = os.path.abspath(TCG_Sole_Trader_Workbook_Container.WORKBOOK_NAME)

            # Snapshot profitability before any scraping
            print("Reading current profitability states...")
            old_profit_states = self.profitability_monitor.read_states(workbook_path)

            # Group product sources by domain
            domain_groups = {domain: [] for domain in self.domain_names}
            for _, product_source in self.product_sources.iterrows():
                source_name = product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_SOURCE_NAME]
                if source_name in domain_groups:
                    domain_groups[source_name].append(product_source)

            # Create one browser per domain and process all URLs; saves workbook after each item
            processed_count = 0
            updated_count = 0
            async with async_playwright() as p:
                domain_tasks = []
                for domain_name in self.domain_names:
                    if domain_groups[domain_name]:
                        browser = await p.chromium.launch(headless=False)
                        task = self.process_domain_urls(browser, domain_name, domain_groups[domain_name])
                        domain_tasks.append(task)

                all_domain_results = await asyncio.gather(*domain_tasks)

                for domain_results in all_domain_results:
                    for result in domain_results:
                        processed_count += 1
                        if result[self.ACTIVE_FLAG]:
                            updated_count += 1

            print(f"\nComplete! Processed: {processed_count} entries, Updated: {updated_count} costs")

            # Recalculate spreadsheet formulas and check for profitability changes
            if self.email_notifier:
                recalculated = self.recalculate_workbook(workbook_path)
                if recalculated:
                    new_profit_states = self.profitability_monitor.read_states(workbook_path)
                    alerts = self.profitability_monitor.find_changes(old_profit_states, new_profit_states)
                    if alerts:
                        html = self.profitability_monitor.format_email_html(alerts)
                        self.email_notifier.send_email(
                              subject=f"TCG Profitability Alert - {datetime.now().strftime('%Y-%m-%d %H:%M')}"
                            , body_html=html
                        )
                        print(f"Sent {len(alerts)} profitability alert(s).")
                    else:
                        print("No profitability changes detected.")
        except Exception as e:
            import traceback
            print(f"Error in fetch_all: {e}")
            traceback.print_exc()

    async def process_domain_urls(self, browser, domain_name, product_sources):
        """Process all URLs for a single domain sequentially with rate limiting.
        Saves the workbook immediately after each item is cleared (before scrape)
        and again after each result is written (after scrape)."""
        results = []
        last_access_time = 0

        try:
            for product_source in product_sources:
                # Rate limiting: wait between requests to the same domain
                time_since_last = time.time() - last_access_time
                if time_since_last < 45:
                    wait_time = 45 - time_since_last + random.uniform(0, 5)
                    print(f"  [{domain_name}] Waiting {wait_time:.1f}s before next request...")
                    await asyncio.sleep(wait_time)

                index_row = product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_INDEX_ROW]

                # Clear stale data and persist before scraping
                async with self.workbook_save_lock:
                    self.workbook_container.clear_row_sourcing_sheet(index_row)
                    self.workbook_container.save_workbook()

                result = await self.fetch_single_with_browser(browser, domain_name, product_source)

                # Write fresh data and persist immediately
                async with self.workbook_save_lock:
                    if result[self.ACTIVE_FLAG]:
                        self.workbook_container.update_row_sourcing_sheet(
                              index_row=result[self.INDEX_ROW_FLAG]
                            , unit_cost=result[self.COST_FLAG]
                            , unit_price=result[self.PRICE_FLAG]
                            , active=result[self.ACTIVE_FLAG]
                        )
                        self.workbook_container.save_workbook()

                results.append(result)
                last_access_time = time.time()

        finally:
            await browser.close()

        return results

    async def fetch_single_with_browser(self, browser, domain_name, product_source):
        """Fetch a single URL using the provided browser"""
        index_row = product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_INDEX_ROW]
        source_link = product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_LINK]

        Cost_Fetcher.log_processing_new_row(
            index_row = index_row
            , source_link = source_link
        )

        index_domain = self.get_index_domain_from_name(domain_name)
        cost = None
        price = None
        active = None

        try:
            did_attempt = False
            """
            if domain_name == self.NAME_DOMAIN_CARD_MARKET:
                if product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_PRODUCT_IS_BOOSTER]:
                    price_quantity_pairs = await self.product_scrapers[index_domain].scrape_prices_and_quantities_playwright_cardmarket(
                          browser = browser
                        , url = source_link
                        , eur_to_gbp_rate = self.eur_to_gbp_rate
                    )
                    price = self.get_sale_price_from_price_quantity_pairs(price_quantity_pairs = price_quantity_pairs)
                cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_cardmarket(
                      browser = browser
                    , url = source_link
                    , eur_to_gbp_rate = self.eur_to_gbp_rate
                )
            el"""
            if domain_name == self.NAME_DOMAIN_CHAOS_CARDS:
                did_attempt = True
                cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_chaoscards(
                      browser = browser
                    , url = source_link
                )
            elif domain_name == self.NAME_DOMAIN_GAMES_LORE:
                did_attempt = True
                cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_gameslore(
                      browser = browser
                    , url = source_link
                )
            elif domain_name == self.NAME_DOMAIN_MAGIC_MADHOUSE:
                did_attempt = True
                cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_magicmadhouse(
                      browser = browser
                    , url = source_link
                )
            """ unverified
            elif domain_name == self.NAME_DOMAIN_NEW_REALITIES_GAMING:
                cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_newrealitiesgaming(
                      browser = browser
                    , url = source_link
                )
            """

            if (
                    did_attempt
                and (
                        (
                            cost is None
                        and price is None
                    )
                    or  active is None
                )
            ):
                print(f"  Error: Could not find cost on page")

        except Exception as e:
            print(f"  Error processing {source_link}: {e}")

        return self.make_result_data_json(
            index_row = index_row
            , cost = cost
            , price = price
            , active = active
        )

    @classmethod
    def make_result_data_json(cls, index_row, cost = None, price = None, active = None):
        return {
            cls.INDEX_ROW_FLAG: index_row
            , cls.COST_FLAG: cost
            , cls.PRICE_FLAG: price
            , cls.ACTIVE_FLAG: active
        }

    def get_sale_price_from_price_quantity_pairs(self, price_quantity_pairs):
        if not price_quantity_pairs:
            return None

        max_quantity = 0
        price = None

        # First pass: look for quantity >= 8
        for price_quantity_pair in price_quantity_pairs:
            eur_price = price_quantity_pair['price']
            quantity = price_quantity_pair['quantity']
            print(f"  Found price: €{eur_price}")
            print(f"  Found quantity: {quantity}")
            max_quantity = max(max_quantity, quantity)

            if quantity >= 8 and eur_price:
                price = eur_price * self.eur_to_gbp_rate
                print(f"  Converted: €{eur_price:.2f} → £{price:.2f}")
                return price

        # Second pass: use max quantity if no quantity >= 8
        print("Offer with quantity >= 8 not found")
        for price_quantity_pair in price_quantity_pairs:
            eur_price = price_quantity_pair['price']
            quantity = price_quantity_pair['quantity']

            if (max_quantity <= 2 or quantity == max_quantity) and eur_price:
                price = eur_price * self.eur_to_gbp_rate
                print(f"  Converted: €{eur_price:.2f} → £{price:.2f}")
                return price

        return price

    def recalculate_workbook(self, workbook_path):
        """Run LibreOffice headless to recalculate all formula cells after saving new data.
        Returns True if recalculation succeeded, False otherwise."""
        workbook_dir = os.path.dirname(workbook_path)
        workbook_name = os.path.basename(workbook_path)
        lock_file = os.path.join(workbook_dir, f'.~lock.{workbook_name}#')

        if os.path.exists(lock_file):
            print(f"Warning: '{workbook_name}' is open in LibreOffice — skipping recalculation to avoid conflict.")
            return False

        print("Recalculating workbook formulas with LibreOffice headless...")
        try:
            result = subprocess.run(
                  ['libreoffice', '--headless', '--norestore', '--convert-to', 'xlsx', '--outdir', workbook_dir, workbook_path]
                , capture_output=True, text=True, timeout=120
            )
            if result.returncode == 0:
                print("Recalculation complete.")
                return True
            print(f"Recalculation failed (exit {result.returncode}): {result.stderr.strip()}")
        except subprocess.TimeoutExpired:
            print("LibreOffice recalculation timed out.")
        except Exception as e:
            print(f"Error during recalculation: {e}")
        return False

    async def run_continuous(self):
        """Run fetch_all in an infinite loop, sleeping CYCLE_INTERVAL_HOURS between cycles."""
        while True:
            print(f"\n{'='*60}")
            print(f"Cycle started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
            try:
                await self.fetch_all()
            except Exception as e:
                import traceback
                print(f"Unhandled cycle error: {e}")
                traceback.print_exc()
            next_run = datetime.now() + timedelta(hours=CYCLE_INTERVAL_HOURS)
            print(f"Next cycle: {next_run.strftime('%Y-%m-%d %H:%M:%S')} (in {CYCLE_INTERVAL_HOURS}h)")
            await asyncio.sleep(CYCLE_INTERVAL_HOURS * 3600)

    @staticmethod
    def log_processing_new_row(index_row, source_link):
        print(f"\n{'='*60}")
        print(f"Processing row {index_row}: {source_link}")
        print(f"{'='*60}")

async def main():
    sender_email    = os.environ.get('EMAIL_SENDER')
    sender_password = os.environ.get('EMAIL_PASSWORD')
    recipient_email = os.environ.get('EMAIL_RECIPIENT')
    smtp_host       = os.environ.get('SMTP_HOST', 'smtp.gmail.com')
    smtp_port       = int(os.environ.get('SMTP_PORT', '587'))

    email_notifier = None
    if sender_email and sender_password and recipient_email:
        email_notifier = Email_Notifier(
              sender_email=sender_email
            , sender_password=sender_password
            , recipient_email=recipient_email
            , smtp_host=smtp_host
            , smtp_port=smtp_port
        )
        print(f"Email notifications enabled: {sender_email} → {recipient_email}")
    else:
        print(
            "Email notifications disabled.\n"
            "Set EMAIL_SENDER, EMAIL_PASSWORD, and EMAIL_RECIPIENT environment variables to enable.\n"
            "For Gmail, use an App Password (https://myaccount.google.com/apppasswords)."
        )

    cost_fetcher = Cost_Fetcher(email_notifier=email_notifier)
    await cost_fetcher.run_continuous()

if __name__ == "__main__":
    asyncio.run(main())