trading_card_games/python/mtg_booster_expected_value_fetcher.py

import pandas as pd
from openpyxl import load_workbook
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import re
import time

def setup_driver(headless=True):
    """Setup Chrome driver"""
    chrome_options = Options()
    if headless:
        chrome_options.add_argument('--headless')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument('--disable-blink-features=AutomationControlled')
    chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
    chrome_options.add_argument('--window-size=1920,1080')

    try:
        driver = webdriver.Chrome(options=chrome_options)
        return driver
    except Exception as e:
        print(f"Error setting up Chrome driver: {e}")
        print("Make sure Chrome and chromedriver are installed")
        return None

def parse_price_value(text):
    """Extract numeric value from price string like '$5.50' or '€5,50'"""
    if not text:
        return None
    # Remove currency symbols and extract numbers
    cleaned = re.sub(r'[^\d,.\-]', '', text)
    # Replace comma with period for decimal
    cleaned = cleaned.replace(',', '.')
    try:
        return float(cleaned)
    except ValueError:
        return None

def scrape_mtg_stocks_values(driver, url):
    """Scrape expected value and market value from MTG Stocks"""
    try:
        print(f"  Loading page...")
        driver.get(url)

        # Wait for table to load
        time.sleep(3)

        # Valid booster types to match
        valid_booster_types = [
            'Play Booster',
            'Set Booster',
            'Booster',
            'Play Booster Pack',
            'Set Booster Pack',
            'Booster Pack'
        ]

        # Find all rows in the table
        row_selector = 'mtg-sets-expected-value > mtg-product-tree > .table-responsive > table > tbody:nth-child(2) > tr'
        rows = driver.find_elements(By.CSS_SELECTOR, row_selector)

        print(f"  Found {len(rows)} rows in table")

        for row in rows:
            try:
                # Get the booster type from first column
                booster_type_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(1) > div.d-flex.align-items-center:nth-child(1) > a:nth-child(2)')
                booster_type = booster_type_elem.text.strip()

                print(f"    Checking row: '{booster_type}'")

                # Check if this matches our valid types
                if booster_type in valid_booster_types:
                    print(f"    ✓ Match found: '{booster_type}'")

                    # Get expected value (3rd column)
                    expected_value_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(3)')
                    expected_value_text = expected_value_elem.text.strip()

                    # Get market value (5th column)
                    market_value_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(5)')
                    market_value_text = market_value_elem.text.strip()

                    print(f"    Expected Value: '{expected_value_text}'")
                    print(f"    Market Value: '{market_value_text}'")

                    # Parse values
                    expected_value = parse_price_value(expected_value_text)
                    market_value = parse_price_value(market_value_text)

                    return {
                        'expected_value': expected_value,
                        'market_value': market_value,
                        'found': True
                    }

            except Exception as e:
                # Row doesn't match structure, continue to next
                continue

        print(f"  ✗ No matching booster type found")
        return {
            'expected_value': None,
            'market_value': None,
            'found': False
        }

    except Exception as e:
        print(f"  Error: {e}")
        return {
            'expected_value': None,
            'market_value': None,
            'found': False
        }

def main():
    workbook_name = 'TCG Sole Trader Copy.xlsx'
    sheet_name = 'MTG Set'

    print("Loading workbook...")
    wb = load_workbook(workbook_name)

    if sheet_name not in wb.sheetnames:
        print(f"Error: Sheet '{sheet_name}' not found")
        return

    sheet = wb[sheet_name]

    # Find table boundaries and columns
    table_found = False
    start_row = None
    header_row = None

    # Search for table header
    print("max sheet column: ", str(sheet.max_column))

    for row in range(2, max(50, sheet.max_row + 1)):
        cell_value = str(sheet.cell(row, 1).value)
        # Check multiple columns for table indicators
        for col in range(1, max(10, sheet.max_column + 1)):
            cell_value = str(sheet.cell(row, col).value)
            if 'EV MTG Stocks Link' in cell_value:
                header_row = row
                start_row = row + 1
                table_found = True
                break
        if table_found:
            break

    if not table_found:
        print("Error: Could not find 'EV MTG Stocks Link' column")
        return

    print(f"Found table header at row {header_row}")
    print(f"Starting from row {start_row}")

    # Find column indices
    ev_link_col = None
    expected_value_col = None
    market_value_col = None

    for col in range(1, sheet.max_column + 1):
        header = str(sheet.cell(header_row, col).value).strip()
        if 'EV MTG Stocks Link' in header:
            ev_link_col = col
        elif 'Play Booster Expected Market Value' in header:
            expected_value_col = col
        elif 'Play Boost Sealed Market Value' in header:
            market_value_col = col

    print(f"Columns - EV Link: {ev_link_col}, Expected Value: {expected_value_col}, Market Value: {market_value_col}")

    if not all([ev_link_col, expected_value_col, market_value_col]):
        print("Error: Could not find all required columns")
        print(f"  EV MTG Stocks Link: {'Found' if ev_link_col else 'NOT FOUND'}")
        print(f"  Play Booster Expected Market Value: {'Found' if expected_value_col else 'NOT FOUND'}")
        print(f"  Play Boost Sealed Market Value: {'Found' if market_value_col else 'NOT FOUND'}")
        return

    # Setup Selenium driver
    print("Setting up browser automation...")
    driver = setup_driver(headless=False)  # Set to False to see browser
    if not driver:
        return

    try:
        # Process rows
        processed_count = 0
        updated_count = 0
        cleared_count = 0

        for row in range(start_row, sheet.max_row + 1):
            ev_link = sheet.cell(row, ev_link_col).value

            # Check if row is empty
            if not ev_link:
                # Check if we've passed the end of the table
                empty_count = 0
                for check_col in range(1, min(10, sheet.max_column + 1)):
                    if not sheet.cell(row, check_col).value:
                        empty_count += 1
                if empty_count >= 5:  # If most columns are empty, assume end of table
                    break
                continue

            processed_count += 1
            print(f"\n{'='*80}")
            print(f"Processing row {row}: {ev_link}")
            print(f"{'='*80}")

            # Scrape values
            result = scrape_mtg_stocks_values(driver, ev_link)

            if result['found']:
                # Update cells with found values
                sheet.cell(row, expected_value_col).value = result['expected_value']
                sheet.cell(row, market_value_col).value = result['market_value']
                updated_count += 1
                print(f"  ✓ Updated - Expected: {result['expected_value']}, Market: {result['market_value']}")
            else:
                # Clear cells - no matching booster type found
                sheet.cell(row, expected_value_col).value = ''
                sheet.cell(row, market_value_col).value = ''
                cleared_count += 1
                print(f"  ✗ Cleared values - no matching booster type found")

            # Small delay between requests
            time.sleep(2)

        # Save workbook
        print(f"\n{'='*80}")
        print(f"Saving workbook...")
        wb.save(workbook_name)

        print(f"\nComplete!")
        print(f"Processed: {processed_count} entries")
        print(f"Updated: {updated_count} entries")
        print(f"Cleared: {cleared_count} entries (no matching data)")

    finally:
        driver.quit()

if __name__ == "__main__":
    main()