import pandas as pd from openpyxl import load_workbook from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.chrome.options import Options import re import time def setup_driver(headless=True): """Setup Chrome driver""" chrome_options = Options() if headless: chrome_options.add_argument('--headless') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-dev-shm-usage') chrome_options.add_argument('--disable-blink-features=AutomationControlled') chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36') chrome_options.add_argument('--window-size=1920,1080') try: driver = webdriver.Chrome(options=chrome_options) return driver except Exception as e: print(f"Error setting up Chrome driver: {e}") print("Make sure Chrome and chromedriver are installed") return None def parse_price_value(text): """Extract numeric value from price string like '$5.50' or '€5,50'""" if not text: return None # Remove currency symbols and extract numbers cleaned = re.sub(r'[^\d,.\-]', '', text) # Replace comma with period for decimal cleaned = cleaned.replace(',', '.') try: return float(cleaned) except ValueError: return None def scrape_mtg_stocks_values(driver, url): """Scrape expected value and market value from MTG Stocks""" try: print(f" Loading page...") driver.get(url) # Wait for table to load time.sleep(3) # Valid booster types to match valid_booster_types = [ 'Play Booster', 'Set Booster', 'Booster', 'Play Booster Pack', 'Set Booster Pack', 'Booster Pack' ] # Find all rows in the table row_selector = 'mtg-sets-expected-value > mtg-product-tree > .table-responsive > table > tbody:nth-child(2) > tr' rows = driver.find_elements(By.CSS_SELECTOR, row_selector) print(f" Found {len(rows)} rows in table") for row in rows: try: # Get the booster type from first column booster_type_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(1) > div.d-flex.align-items-center:nth-child(1) > a:nth-child(2)') booster_type = booster_type_elem.text.strip() print(f" Checking row: '{booster_type}'") # Check if this matches our valid types if booster_type in valid_booster_types: print(f" ✓ Match found: '{booster_type}'") # Get expected value (3rd column) expected_value_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(3)') expected_value_text = expected_value_elem.text.strip() # Get market value (5th column) market_value_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(5)') market_value_text = market_value_elem.text.strip() print(f" Expected Value: '{expected_value_text}'") print(f" Market Value: '{market_value_text}'") # Parse values expected_value = parse_price_value(expected_value_text) market_value = parse_price_value(market_value_text) return { 'expected_value': expected_value, 'market_value': market_value, 'found': True } except Exception as e: # Row doesn't match structure, continue to next continue print(f" ✗ No matching booster type found") return { 'expected_value': None, 'market_value': None, 'found': False } except Exception as e: print(f" Error: {e}") return { 'expected_value': None, 'market_value': None, 'found': False } def main(): workbook_name = 'TCG Sole Trader Copy.xlsx' sheet_name = 'MTG Set' print("Loading workbook...") wb = load_workbook(workbook_name) if sheet_name not in wb.sheetnames: print(f"Error: Sheet '{sheet_name}' not found") return sheet = wb[sheet_name] # Find table boundaries and columns table_found = False start_row = None header_row = None # Search for table header print("max sheet column: ", str(sheet.max_column)) for row in range(2, max(50, sheet.max_row + 1)): cell_value = str(sheet.cell(row, 1).value) # Check multiple columns for table indicators for col in range(1, max(10, sheet.max_column + 1)): cell_value = str(sheet.cell(row, col).value) if 'EV MTG Stocks Link' in cell_value: header_row = row start_row = row + 1 table_found = True break if table_found: break if not table_found: print("Error: Could not find 'EV MTG Stocks Link' column") return print(f"Found table header at row {header_row}") print(f"Starting from row {start_row}") # Find column indices ev_link_col = None expected_value_col = None market_value_col = None for col in range(1, sheet.max_column + 1): header = str(sheet.cell(header_row, col).value).strip() if 'EV MTG Stocks Link' in header: ev_link_col = col elif 'Play Booster Expected Market Value' in header: expected_value_col = col elif 'Play Boost Sealed Market Value' in header: market_value_col = col print(f"Columns - EV Link: {ev_link_col}, Expected Value: {expected_value_col}, Market Value: {market_value_col}") if not all([ev_link_col, expected_value_col, market_value_col]): print("Error: Could not find all required columns") print(f" EV MTG Stocks Link: {'Found' if ev_link_col else 'NOT FOUND'}") print(f" Play Booster Expected Market Value: {'Found' if expected_value_col else 'NOT FOUND'}") print(f" Play Boost Sealed Market Value: {'Found' if market_value_col else 'NOT FOUND'}") return # Setup Selenium driver print("Setting up browser automation...") driver = setup_driver(headless=False) # Set to False to see browser if not driver: return try: # Process rows processed_count = 0 updated_count = 0 cleared_count = 0 for row in range(start_row, sheet.max_row + 1): ev_link = sheet.cell(row, ev_link_col).value # Check if row is empty if not ev_link: # Check if we've passed the end of the table empty_count = 0 for check_col in range(1, min(10, sheet.max_column + 1)): if not sheet.cell(row, check_col).value: empty_count += 1 if empty_count >= 5: # If most columns are empty, assume end of table break continue processed_count += 1 print(f"\n{'='*80}") print(f"Processing row {row}: {ev_link}") print(f"{'='*80}") # Scrape values result = scrape_mtg_stocks_values(driver, ev_link) if result['found']: # Update cells with found values sheet.cell(row, expected_value_col).value = result['expected_value'] sheet.cell(row, market_value_col).value = result['market_value'] updated_count += 1 print(f" ✓ Updated - Expected: {result['expected_value']}, Market: {result['market_value']}") else: # Clear cells - no matching booster type found sheet.cell(row, expected_value_col).value = '' sheet.cell(row, market_value_col).value = '' cleared_count += 1 print(f" ✗ Cleared values - no matching booster type found") # Small delay between requests time.sleep(2) # Save workbook print(f"\n{'='*80}") print(f"Saving workbook...") wb.save(workbook_name) print(f"\nComplete!") print(f"Processed: {processed_count} entries") print(f"Updated: {updated_count} entries") print(f"Cleared: {cleared_count} entries (no matching data)") finally: driver.quit() if __name__ == "__main__": main()