248 lines
9.0 KiB
Python
248 lines
9.0 KiB
Python
import pandas as pd
|
|
from openpyxl import load_workbook
|
|
from selenium import webdriver
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
from selenium.webdriver.chrome.options import Options
|
|
import re
|
|
import time
|
|
|
|
def setup_driver(headless=True):
|
|
"""Setup Chrome driver"""
|
|
chrome_options = Options()
|
|
if headless:
|
|
chrome_options.add_argument('--headless')
|
|
chrome_options.add_argument('--no-sandbox')
|
|
chrome_options.add_argument('--disable-dev-shm-usage')
|
|
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
|
|
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
|
|
chrome_options.add_argument('--window-size=1920,1080')
|
|
|
|
try:
|
|
driver = webdriver.Chrome(options=chrome_options)
|
|
return driver
|
|
except Exception as e:
|
|
print(f"Error setting up Chrome driver: {e}")
|
|
print("Make sure Chrome and chromedriver are installed")
|
|
return None
|
|
|
|
def parse_price_value(text):
|
|
"""Extract numeric value from price string like '$5.50' or '€5,50'"""
|
|
if not text:
|
|
return None
|
|
# Remove currency symbols and extract numbers
|
|
cleaned = re.sub(r'[^\d,.\-]', '', text)
|
|
# Replace comma with period for decimal
|
|
cleaned = cleaned.replace(',', '.')
|
|
try:
|
|
return float(cleaned)
|
|
except ValueError:
|
|
return None
|
|
|
|
def scrape_mtg_stocks_values(driver, url):
|
|
"""Scrape expected value and market value from MTG Stocks"""
|
|
try:
|
|
print(f" Loading page...")
|
|
driver.get(url)
|
|
|
|
# Wait for table to load
|
|
time.sleep(3)
|
|
|
|
# Valid booster types to match
|
|
valid_booster_types = [
|
|
'Play Booster',
|
|
'Set Booster',
|
|
'Booster',
|
|
'Play Booster Pack',
|
|
'Set Booster Pack',
|
|
'Booster Pack'
|
|
]
|
|
|
|
# Find all rows in the table
|
|
row_selector = 'mtg-sets-expected-value > mtg-product-tree > .table-responsive > table > tbody:nth-child(2) > tr'
|
|
rows = driver.find_elements(By.CSS_SELECTOR, row_selector)
|
|
|
|
print(f" Found {len(rows)} rows in table")
|
|
|
|
for row in rows:
|
|
try:
|
|
# Get the booster type from first column
|
|
booster_type_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(1) > div.d-flex.align-items-center:nth-child(1) > a:nth-child(2)')
|
|
booster_type = booster_type_elem.text.strip()
|
|
|
|
print(f" Checking row: '{booster_type}'")
|
|
|
|
# Check if this matches our valid types
|
|
if booster_type in valid_booster_types:
|
|
print(f" ✓ Match found: '{booster_type}'")
|
|
|
|
# Get expected value (3rd column)
|
|
expected_value_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(3)')
|
|
expected_value_text = expected_value_elem.text.strip()
|
|
|
|
# Get market value (5th column)
|
|
market_value_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(5)')
|
|
market_value_text = market_value_elem.text.strip()
|
|
|
|
print(f" Expected Value: '{expected_value_text}'")
|
|
print(f" Market Value: '{market_value_text}'")
|
|
|
|
# Parse values
|
|
expected_value = parse_price_value(expected_value_text)
|
|
market_value = parse_price_value(market_value_text)
|
|
|
|
return {
|
|
'expected_value': expected_value,
|
|
'market_value': market_value,
|
|
'found': True
|
|
}
|
|
|
|
except Exception as e:
|
|
# Row doesn't match structure, continue to next
|
|
continue
|
|
|
|
print(f" ✗ No matching booster type found")
|
|
return {
|
|
'expected_value': None,
|
|
'market_value': None,
|
|
'found': False
|
|
}
|
|
|
|
except Exception as e:
|
|
print(f" Error: {e}")
|
|
return {
|
|
'expected_value': None,
|
|
'market_value': None,
|
|
'found': False
|
|
}
|
|
|
|
def main():
|
|
workbook_name = 'TCG Sole Trader Copy.xlsx'
|
|
sheet_name = 'MTG Set'
|
|
|
|
print("Loading workbook...")
|
|
wb = load_workbook(workbook_name)
|
|
|
|
if sheet_name not in wb.sheetnames:
|
|
print(f"Error: Sheet '{sheet_name}' not found")
|
|
return
|
|
|
|
sheet = wb[sheet_name]
|
|
|
|
# Find table boundaries and columns
|
|
table_found = False
|
|
start_row = None
|
|
header_row = None
|
|
|
|
# Search for table header
|
|
print("max sheet column: ", str(sheet.max_column))
|
|
|
|
for row in range(2, max(50, sheet.max_row + 1)):
|
|
cell_value = str(sheet.cell(row, 1).value)
|
|
# Check multiple columns for table indicators
|
|
for col in range(1, max(10, sheet.max_column + 1)):
|
|
cell_value = str(sheet.cell(row, col).value)
|
|
if 'EV MTG Stocks Link' in cell_value:
|
|
header_row = row
|
|
start_row = row + 1
|
|
table_found = True
|
|
break
|
|
if table_found:
|
|
break
|
|
|
|
if not table_found:
|
|
print("Error: Could not find 'EV MTG Stocks Link' column")
|
|
return
|
|
|
|
print(f"Found table header at row {header_row}")
|
|
print(f"Starting from row {start_row}")
|
|
|
|
# Find column indices
|
|
ev_link_col = None
|
|
expected_value_col = None
|
|
market_value_col = None
|
|
|
|
for col in range(1, sheet.max_column + 1):
|
|
header = str(sheet.cell(header_row, col).value).strip()
|
|
if 'EV MTG Stocks Link' in header:
|
|
ev_link_col = col
|
|
elif 'Play Booster Expected Market Value' in header:
|
|
expected_value_col = col
|
|
elif 'Play Boost Sealed Market Value' in header:
|
|
market_value_col = col
|
|
|
|
print(f"Columns - EV Link: {ev_link_col}, Expected Value: {expected_value_col}, Market Value: {market_value_col}")
|
|
|
|
if not all([ev_link_col, expected_value_col, market_value_col]):
|
|
print("Error: Could not find all required columns")
|
|
print(f" EV MTG Stocks Link: {'Found' if ev_link_col else 'NOT FOUND'}")
|
|
print(f" Play Booster Expected Market Value: {'Found' if expected_value_col else 'NOT FOUND'}")
|
|
print(f" Play Boost Sealed Market Value: {'Found' if market_value_col else 'NOT FOUND'}")
|
|
return
|
|
|
|
# Setup Selenium driver
|
|
print("Setting up browser automation...")
|
|
driver = setup_driver(headless=False) # Set to False to see browser
|
|
if not driver:
|
|
return
|
|
|
|
try:
|
|
# Process rows
|
|
processed_count = 0
|
|
updated_count = 0
|
|
cleared_count = 0
|
|
|
|
for row in range(start_row, sheet.max_row + 1):
|
|
ev_link = sheet.cell(row, ev_link_col).value
|
|
|
|
# Check if row is empty
|
|
if not ev_link:
|
|
# Check if we've passed the end of the table
|
|
empty_count = 0
|
|
for check_col in range(1, min(10, sheet.max_column + 1)):
|
|
if not sheet.cell(row, check_col).value:
|
|
empty_count += 1
|
|
if empty_count >= 5: # If most columns are empty, assume end of table
|
|
break
|
|
continue
|
|
|
|
processed_count += 1
|
|
print(f"\n{'='*80}")
|
|
print(f"Processing row {row}: {ev_link}")
|
|
print(f"{'='*80}")
|
|
|
|
# Scrape values
|
|
result = scrape_mtg_stocks_values(driver, ev_link)
|
|
|
|
if result['found']:
|
|
# Update cells with found values
|
|
sheet.cell(row, expected_value_col).value = result['expected_value']
|
|
sheet.cell(row, market_value_col).value = result['market_value']
|
|
updated_count += 1
|
|
print(f" ✓ Updated - Expected: {result['expected_value']}, Market: {result['market_value']}")
|
|
else:
|
|
# Clear cells - no matching booster type found
|
|
sheet.cell(row, expected_value_col).value = ''
|
|
sheet.cell(row, market_value_col).value = ''
|
|
cleared_count += 1
|
|
print(f" ✗ Cleared values - no matching booster type found")
|
|
|
|
# Small delay between requests
|
|
time.sleep(2)
|
|
|
|
# Save workbook
|
|
print(f"\n{'='*80}")
|
|
print(f"Saving workbook...")
|
|
wb.save(workbook_name)
|
|
|
|
print(f"\nComplete!")
|
|
print(f"Processed: {processed_count} entries")
|
|
print(f"Updated: {updated_count} entries")
|
|
print(f"Cleared: {cleared_count} entries (no matching data)")
|
|
|
|
finally:
|
|
driver.quit()
|
|
|
|
if __name__ == "__main__":
|
|
main() |