Files
trading_card_games/python/mtg_booster_expected_value_fetcher.py
2026-01-28 22:05:35 +00:00

248 lines
9.0 KiB
Python

import pandas as pd
from openpyxl import load_workbook
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import re
import time
def setup_driver(headless=True):
"""Setup Chrome driver"""
chrome_options = Options()
if headless:
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
chrome_options.add_argument('--window-size=1920,1080')
try:
driver = webdriver.Chrome(options=chrome_options)
return driver
except Exception as e:
print(f"Error setting up Chrome driver: {e}")
print("Make sure Chrome and chromedriver are installed")
return None
def parse_price_value(text):
"""Extract numeric value from price string like '$5.50' or '€5,50'"""
if not text:
return None
# Remove currency symbols and extract numbers
cleaned = re.sub(r'[^\d,.\-]', '', text)
# Replace comma with period for decimal
cleaned = cleaned.replace(',', '.')
try:
return float(cleaned)
except ValueError:
return None
def scrape_mtg_stocks_values(driver, url):
"""Scrape expected value and market value from MTG Stocks"""
try:
print(f" Loading page...")
driver.get(url)
# Wait for table to load
time.sleep(3)
# Valid booster types to match
valid_booster_types = [
'Play Booster',
'Set Booster',
'Booster',
'Play Booster Pack',
'Set Booster Pack',
'Booster Pack'
]
# Find all rows in the table
row_selector = 'mtg-sets-expected-value > mtg-product-tree > .table-responsive > table > tbody:nth-child(2) > tr'
rows = driver.find_elements(By.CSS_SELECTOR, row_selector)
print(f" Found {len(rows)} rows in table")
for row in rows:
try:
# Get the booster type from first column
booster_type_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(1) > div.d-flex.align-items-center:nth-child(1) > a:nth-child(2)')
booster_type = booster_type_elem.text.strip()
print(f" Checking row: '{booster_type}'")
# Check if this matches our valid types
if booster_type in valid_booster_types:
print(f" ✓ Match found: '{booster_type}'")
# Get expected value (3rd column)
expected_value_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(3)')
expected_value_text = expected_value_elem.text.strip()
# Get market value (5th column)
market_value_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(5)')
market_value_text = market_value_elem.text.strip()
print(f" Expected Value: '{expected_value_text}'")
print(f" Market Value: '{market_value_text}'")
# Parse values
expected_value = parse_price_value(expected_value_text)
market_value = parse_price_value(market_value_text)
return {
'expected_value': expected_value,
'market_value': market_value,
'found': True
}
except Exception as e:
# Row doesn't match structure, continue to next
continue
print(f" ✗ No matching booster type found")
return {
'expected_value': None,
'market_value': None,
'found': False
}
except Exception as e:
print(f" Error: {e}")
return {
'expected_value': None,
'market_value': None,
'found': False
}
def main():
workbook_name = 'TCG Sole Trader Copy.xlsx'
sheet_name = 'MTG Set'
print("Loading workbook...")
wb = load_workbook(workbook_name)
if sheet_name not in wb.sheetnames:
print(f"Error: Sheet '{sheet_name}' not found")
return
sheet = wb[sheet_name]
# Find table boundaries and columns
table_found = False
start_row = None
header_row = None
# Search for table header
print("max sheet column: ", str(sheet.max_column))
for row in range(2, max(50, sheet.max_row + 1)):
cell_value = str(sheet.cell(row, 1).value)
# Check multiple columns for table indicators
for col in range(1, max(10, sheet.max_column + 1)):
cell_value = str(sheet.cell(row, col).value)
if 'EV MTG Stocks Link' in cell_value:
header_row = row
start_row = row + 1
table_found = True
break
if table_found:
break
if not table_found:
print("Error: Could not find 'EV MTG Stocks Link' column")
return
print(f"Found table header at row {header_row}")
print(f"Starting from row {start_row}")
# Find column indices
ev_link_col = None
expected_value_col = None
market_value_col = None
for col in range(1, sheet.max_column + 1):
header = str(sheet.cell(header_row, col).value).strip()
if 'EV MTG Stocks Link' in header:
ev_link_col = col
elif 'Play Booster Expected Market Value' in header:
expected_value_col = col
elif 'Play Boost Sealed Market Value' in header:
market_value_col = col
print(f"Columns - EV Link: {ev_link_col}, Expected Value: {expected_value_col}, Market Value: {market_value_col}")
if not all([ev_link_col, expected_value_col, market_value_col]):
print("Error: Could not find all required columns")
print(f" EV MTG Stocks Link: {'Found' if ev_link_col else 'NOT FOUND'}")
print(f" Play Booster Expected Market Value: {'Found' if expected_value_col else 'NOT FOUND'}")
print(f" Play Boost Sealed Market Value: {'Found' if market_value_col else 'NOT FOUND'}")
return
# Setup Selenium driver
print("Setting up browser automation...")
driver = setup_driver(headless=False) # Set to False to see browser
if not driver:
return
try:
# Process rows
processed_count = 0
updated_count = 0
cleared_count = 0
for row in range(start_row, sheet.max_row + 1):
ev_link = sheet.cell(row, ev_link_col).value
# Check if row is empty
if not ev_link:
# Check if we've passed the end of the table
empty_count = 0
for check_col in range(1, min(10, sheet.max_column + 1)):
if not sheet.cell(row, check_col).value:
empty_count += 1
if empty_count >= 5: # If most columns are empty, assume end of table
break
continue
processed_count += 1
print(f"\n{'='*80}")
print(f"Processing row {row}: {ev_link}")
print(f"{'='*80}")
# Scrape values
result = scrape_mtg_stocks_values(driver, ev_link)
if result['found']:
# Update cells with found values
sheet.cell(row, expected_value_col).value = result['expected_value']
sheet.cell(row, market_value_col).value = result['market_value']
updated_count += 1
print(f" ✓ Updated - Expected: {result['expected_value']}, Market: {result['market_value']}")
else:
# Clear cells - no matching booster type found
sheet.cell(row, expected_value_col).value = ''
sheet.cell(row, market_value_col).value = ''
cleared_count += 1
print(f" ✗ Cleared values - no matching booster type found")
# Small delay between requests
time.sleep(2)
# Save workbook
print(f"\n{'='*80}")
print(f"Saving workbook...")
wb.save(workbook_name)
print(f"\nComplete!")
print(f"Processed: {processed_count} entries")
print(f"Updated: {updated_count} entries")
print(f"Cleared: {cleared_count} entries (no matching data)")
finally:
driver.quit()
if __name__ == "__main__":
main()