Initial commit.

This commit is contained in:
2026-01-09 20:53:52 +00:00
parent 32502972f8
commit 08aaf33e70
19 changed files with 3308 additions and 0 deletions

View File

@@ -0,0 +1 @@
,teddy,lord-T-1024,09.01.2026 15:38,file:///home/teddy/.config/libreoffice/4;

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,593 @@
import pandas as pd
from openpyxl import load_workbook, Workbook
from openpyxl.worksheet.worksheet import Worksheet
import requests
from selenium import webdriver
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import re
import time
import random
# import undetected_chromedriver as uc
from undetected_chromedriver import Chrome
class Cost_Fetcher_Base:
PRODUCT_WORKSHEET_NAME = 'Product'
SOURCING_WORKSHEET_NAME = 'Sourcing'
WORKBOOK_NAME = 'TCG Sole Trader Copy.xlsx'
driver: Chrome # webdriver.Chrome
eur_to_gbp_rate: float
index_column_active_sourcing: int
index_column_is_booster_product: int
index_column_is_booster_box_product: int
index_column_is_precon_product: int
index_column_link_sourcing: int
index_column_name_sourcing: int
index_column_product_id_product: int
index_column_product_id_sourcing: int
index_column_unit_cost_sourcing: int
index_row_header_product: int
index_row_header_sourcing: int
product_sheet: Worksheet
sourcing_sheet: Worksheet
wait: WebDriverWait
workbook: Workbook
@staticmethod
def parse_cost(cost_text):
if not cost_text:
return None
cost_clean = re.sub(r'[^\d,]', '', cost_text)
try:
return float(cost_clean) / 100
except ValueError:
return None
@classmethod
def parse_cost_from_pennies(cls, cost_text):
if not cost_text:
return None
cost_clean = cls.parse_cost(cost_text = cost_text)
if cost_clean is not None:
cost_clean = cost_clean / 100
return cost_clean
@classmethod
def parse_cost_chaoscards(cls, cost_text):
return cls.parse_cost(cost_text = cost_text)
@classmethod
def parse_cost_cardmarket(cls, cost_text):
# return cls.parse_cost(cost_text = cost_text)
"""Convert '141,30 €' format to float in EUR"""
if not cost_text:
return None
cost_clean = re.sub(r'[^\d,]', '', cost_text)
cost_clean = cost_clean.replace(',', '.')
try:
return float(cost_clean)
except ValueError:
return None
@classmethod
def parse_cost_gameslore(cls, cost_text):
return cls.parse_cost(cost_text = cost_text)
@classmethod
def parse_cost_magicmadhouse(cls, cost_text):
return cls.parse_cost(cost_text = cost_text)
def get_eur_to_gbp_rate(self):
try:
response = requests.get('https://api.exchangerate-api.com/v4/latest/EUR', timeout=10)
data = response.json()
self.eur_to_gbp_rate = data['rates']['GBP']
except Exception as e:
print(f"Error fetching exchange rate: {e}")
print("Using fallback rate: 0.85")
self.eur_to_gbp_rate = 0.85
def setup_driver(self):
print("Starting driver")
"""
chrome_options = Options()
# Remove headless mode to see the browser
# chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
chrome_options.add_argument('--window-size=1920,1080')
"""
try:
self.driver = Chrome(version_main=133) # webdriver.Chrome(options=chrome_options)
# return driver
except Exception as e:
print(f"Error setting up Chrome driver: {e}")
print("Make sure Chrome and chromedriver are installed")
# return None
self.wait = WebDriverWait(self.driver, 15)
def scrape_cost_and_active_selenium(self, url, page_load_element_selector, cost_selector, active_selector, invalid_active_statuses):
try:
print(f" Loading page...")
# time.sleep(random.uniform(6, 10))
try:
self.driver.get(url)
element = self.wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, page_load_element_selector))
)
element = self.wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR, page_load_element_selector))
)
except Exception as e:
self.driver.get(url)
element = self.wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, page_load_element_selector))
)
element = self.wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR, page_load_element_selector))
)
max_attempts = 10
for attempt in range(max_attempts):
try:
element = None
element = self.driver.find_element(By.CSS_SELECTOR, page_load_element_selector)
text = element.text
print(f"✓ Element loaded successfully on attempt {attempt + 1}")
# return True
break
except StaleElementReferenceException:
print(f"Stale element on attempt {attempt + 1}, retrying...")
if attempt < max_attempts - 1:
time.sleep(1)
else:
raise ValueError("StaleElementReferenceException")
print(f" Page title: {self.driver.title}")
cost = None
element = None
counter = 0
while cost is None:
counter += 1
try:
element = self.driver.find_element(By.CSS_SELECTOR, cost_selector)
text = element.text
print(f" Text: '{text}'")
cost = text
except Exception as e:
print(f" Selector failed: {e}")
cost = None
time.sleep(random.uniform(2, 4))
if counter > 10:
print("10 cost selector fails")
break
active = None
if active_selector is None: # or invalid_active_statuses is None or invalid_active_statuses == []:
active = (cost is not None)
else:
try:
elements = None
elements = self.driver.find_elements(By.CSS_SELECTOR, active_selector)
if len(elements) == 0:
active = True
else:
text = elements[0].text
print(f" Text: '{text}'")
active = (invalid_active_statuses is None or text not in invalid_active_statuses)
except Exception as e:
print(f" Selector failed: {e}")
if cost is None or active is None:
print(f" ✗ No cost found")
print(f"Cost: {cost}, Active: {active}")
input("Press Enter to continue to next URL...")
return cost, active
except Exception as e:
print(f" Error: {e}")
input("Press Enter to continue to next URL...")
return None, None
def scrape_cost_and_active_selenium_cardmarket(self, url):
page_load_element_selector = "body > main.container > div.page-title-container"
cost_selector = '#table > div:nth-child(1) > div.table-body > .row.article-row.g-0:nth-child(1) > div.col-offer > div.price-container > div > div:nth-child(1) > span:nth-child(1)'
cost_text, active = self.scrape_cost_and_active_selenium(
url = url
, page_load_element_selector = page_load_element_selector
, cost_selector = cost_selector
, active_selector = None
, invalid_active_statuses = []
)
cost = Cost_Fetcher_Base.parse_cost_cardmarket(cost_text)
if cost is not None:
item_shipping_cost_in = 0
if cost < 10:
item_shipping_cost_in = 2
elif cost < 100:
item_shipping_cost_in = 8
else:
item_shipping_cost_in = 20
cost = cost * self.eur_to_gbp_rate + item_shipping_cost_in
active = (cost is not None)
return cost, active
def scrape_cost_and_active_selenium_chaoscards(self, url):
# page_load_element_selector = '#prod_title'
cost_selector = '.price_inc > span:nth-child(2)'
active_selector = '.product__right > form > ul.prod_det_fields.left.product-section.product-section--stock > li > div:nth-child(1) > div:nth-child(2)'
cost_text, active = self.scrape_cost_and_active_selenium(
url = url
, page_load_element_selector = cost_selector # page_load_element_selector
, cost_selector = cost_selector
, active_selector = active_selector
, invalid_active_statuses = ["Out of stock", "Coming soon"]
)
cost = Cost_Fetcher_Base.parse_cost_chaoscards(cost_text)
return cost, active
def scrape_cost_and_active_selenium_gameslore(self, url):
# page_load_element_selector = '.page-title'
cost_selector = 'div.columns > div.column.main > div.product-info-main > div.product-info-price > div.price-box > span.special-price > span.price-container > span.price-wrapper > span.price'
active_selector = '.stock > span:nth-child(1)'
cost_text, active = self.scrape_cost_and_active_selenium(
url = url
, page_load_element_selector = cost_selector # page_load_element_selector
, cost_selector = cost_selector
, active_selector = active_selector
, invalid_active_statuses = ["OUT OF STOCK"]
)
cost = Cost_Fetcher_Base.parse_cost_gameslore(cost_text)
return cost, active
def scrape_cost_and_active_selenium_magicmadhouse(self, url):
page_load_element_selector = '.productView-title'
cost_selector = 'div.body > div.container > div > div.productView > section.productView-details > div.productView-options > form > div.productView-options-selections > div.productView-product > div.productView-info > div.price-rating > div.productView-price > div.price-section.actual-price > span.price'
active_selector = '.alertBox.alertBox--error'
cost_text, active = self.scrape_cost_and_active_selenium(
url = url
, page_load_element_selector = page_load_element_selector
, cost_selector = cost_selector
, active_selector = active_selector
, invalid_active_statuses = []
)
cost = Cost_Fetcher_Base.parse_cost_magicmadhouse(cost_text)
return cost, active
def scrape_prices_and_quantities_selenium_cardmarket(self, url):
offer_container_selector = '#table > div:nth-child(1) > div.table-body > .row.article-row.g-0:nth-child(1) > div.col-offer'
price_selector = 'div.price-container > div > div:nth-child(1) > span:nth-child(1)'
quantity_selector = 'div.amount-container > span:nth-child(1)'
try:
print(f" Loading page...")
# time.sleep(random.uniform(6, 10))
try:
self.driver.get(url)
element = self.wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, offer_container_selector))
)
element = self.wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR, offer_container_selector))
)
except Exception as e:
self.driver.get(url)
element = self.wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, offer_container_selector))
)
element = self.wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR, offer_container_selector))
)
max_attempts = 10
for attempt in range(max_attempts):
try:
element = None
element = self.driver.find_element(By.CSS_SELECTOR, offer_container_selector)
text = element.text
print(f"✓ Element loaded successfully on attempt {attempt + 1}")
# return True
break
except StaleElementReferenceException:
print(f"Stale element on attempt {attempt + 1}, retrying...")
if attempt < max_attempts - 1:
time.sleep(1)
else:
raise ValueError("StaleElementReferenceException")
print(f" Page title: {self.driver.title}")
price_quantity_pairs = []
try:
offer_containers = self.driver.find_elements(By.CSS_SELECTOR, offer_container_selector)
print(f" Offer container selector: Found {len(offer_containers)} elements")
for offer_container in offer_containers:
price_element = offer_container.find_element(By.CSS_SELECTOR, price_selector)
price_text = price_element.text
if '' in price_text and re.search(r'\d', price_text):
print(f" ✓ Found price: {price_text}")
else:
price_text = None
quantity_element = offer_container.find_element(By.CSS_SELECTOR, quantity_selector)
quantity_text = quantity_element.text
if price_text is None or quantity_text is None:
continue
price_quantity_pairs.append({
'price': Cost_Fetcher_Base.parse_cost_cardmarket(price_text = price_text)
, 'quantity': Cost_Fetcher_Base.parse_cost_cardmarket(quantity_text = quantity_text)
})
except Exception as e:
print(f" Price selector failed: {e}")
return []
finally:
return price_quantity_pairs
def load_tcg_sole_trader_workbook(self):
print("Loading workbook...")
self.workbook = load_workbook(Cost_Fetcher_Base.WORKBOOK_NAME)
if Cost_Fetcher_Base.SOURCING_WORKSHEET_NAME not in self.workbook.sheetnames:
print(f"Error: Sheet '{Cost_Fetcher_Base.SOURCING_WORKSHEET_NAME}' not found")
return
if Cost_Fetcher_Base.PRODUCT_WORKSHEET_NAME not in self.workbook.sheetnames:
print(f"Error: Sheet '{Cost_Fetcher_Base.PRODUCT_WORKSHEET_NAME}' not found")
return
self.sourcing_sheet = self.workbook[Cost_Fetcher_Base.SOURCING_WORKSHEET_NAME]
self.product_sheet = self.workbook[Cost_Fetcher_Base.PRODUCT_WORKSHEET_NAME]
sourcing_table_found = False
for row in range(1, self.sourcing_sheet.max_row + 1):
if self.sourcing_sheet.cell(row, 1).value == 'tbl_Sourcing' or 'Source Name' in str(self.sourcing_sheet.cell(row, 3).value):
self.index_row_header_sourcing = row
sourcing_table_found = True
break
if not sourcing_table_found or not self.index_row_header_sourcing:
for row in range(1, min(20, self.sourcing_sheet.max_row + 1)):
if 'Source Name' in str(self.sourcing_sheet.cell(row, 3).value):
self.index_row_header_sourcing = row
sourcing_table_found = True
break
if not sourcing_table_found:
print("Error: Could not find table 'tbl_Sourcing'")
return
product_table_found = False
for row in range(1, self.product_sheet.max_row + 1):
if self.product_sheet.cell(row, 1).value == 'tbl_Product' or 'Product Id' in str(self.product_sheet.cell(row, 1).value):
self.index_row_header_product = row
product_table_found = True
break
if not product_table_found:
print("Error: Could not find table 'tbl_Product'")
return
for index_column in range(1, self.sourcing_sheet.max_column + 1):
header = str(self.sourcing_sheet.cell(self.index_row_header_sourcing, index_column).value).strip()
if 'Source Name' == header:
self.index_column_name_sourcing = index_column
elif 'Source Link' == header:
self.index_column_link_sourcing = index_column
elif 'Source Unit Cost' == header:
self.index_column_unit_cost_sourcing = index_column
elif 'Active' == header:
self.index_column_active_sourcing = index_column
elif 'Product Id' == header:
self.index_column_product_id_sourcing = index_column
for index_column in range(1, self.product_sheet.max_column + 1):
header = str(self.product_sheet.cell(self.index_row_header_product, index_column).value).strip()
if 'Is Booster Box' == header:
self.index_column_is_booster_box_product = index_column
elif 'Is Booster' == header:
self.index_column_is_booster_product = index_column
elif 'Is Precon' == header:
self.index_column_is_precon_product = index_column
elif 'Product Id' == header:
self.index_column_product_id_product = index_column
print(f"Sourcing max row: {self.sourcing_sheet.max_row}")
print(f"Sourcing header row: {self.index_row_header_sourcing}")
print(f"Sourcing header 1: {self.sourcing_sheet.cell(self.index_row_header_sourcing, 1).value}")
print(f"Sourcing Columns - Name: {self.index_column_name_sourcing}, Link: {self.index_column_link_sourcing}, Unit Cost: {self.index_column_unit_cost_sourcing}, Active: {self.index_column_active_sourcing}, Product Id: {self.index_column_product_id_sourcing}")
print(f"Product max row: {self.product_sheet.max_row}")
print(f"Product header row: {self.index_row_header_product}")
print(f"Sourcing header 1: {self.product_sheet.cell(self.index_row_header_product, 1).value}")
print(f"Product Columns - Id: {self.index_column_product_id_product}, Is Booster: {self.index_column_is_booster_product}, Is Booster Box: {self.index_column_is_booster_box_product}, Is Precon: {self.index_column_is_precon_product}")
if not all([
self.index_column_name_sourcing
, self.index_column_link_sourcing
, self.index_column_unit_cost_sourcing
, self.index_column_product_id_sourcing
, self.index_column_active_sourcing
, self.index_column_product_id_product
, self.index_column_is_booster_product
, self.index_column_is_booster_box_product
, self.index_column_is_precon_product
]):
print("Error: Could not find required columns")
return
def scrape_all_costs(self):
try:
processed_count = 0
updated_count = 0
cardmarket_accessed_last_on = 0
chaoscards_accessed_last_on = 0
gameslore_accessed_last_on = 0
magicmadhouse_accessed_last_on = 0
did_restart_since_last_chaos_cards_visit = True
did_restart_since_last_games_lore_visit = True
for index_row in range(self.index_row_header_sourcing + 1, self.sourcing_sheet.max_row + 1):
# print(f"index_row: {index_row}")
# print(f"{self.sourcing_sheet.cell(index_row, 1).value}, {self.sourcing_sheet.cell(index_row, 2).value}, {self.sourcing_sheet.cell(index_row, 3).value}, {self.sourcing_sheet.cell(index_row, 4).value}, {self.sourcing_sheet.cell(index_row, 5).value}, {self.sourcing_sheet.cell(index_row, 6).value}, {self.sourcing_sheet.cell(index_row, 7).value}, {self.sourcing_sheet.cell(index_row, 8).value}, {self.sourcing_sheet.cell(index_row, 9).value}, {self.sourcing_sheet.cell(index_row, 10).value}, {self.sourcing_sheet.cell(index_row, 11).value}, {self.sourcing_sheet.cell(index_row, 12).value}, {self.sourcing_sheet.cell(index_row, 13).value}, {self.sourcing_sheet.cell(index_row, 14).value}, {self.sourcing_sheet.cell(index_row, 15).value}, {self.sourcing_sheet.cell(index_row, 16).value}, {self.sourcing_sheet.cell(index_row, 17).value}, {self.sourcing_sheet.cell(index_row, 18).value}, {self.sourcing_sheet.cell(index_row, 19).value}")
source_name = self.sourcing_sheet.cell(index_row, self.index_column_name_sourcing).value
source_link = self.sourcing_sheet.cell(index_row, self.index_column_link_sourcing).value
source_product_id = self.sourcing_sheet.cell(index_row, self.index_column_product_id_sourcing).value
if not source_name or not source_link: # or not str(source_link).strip():
continue
print(f"found source: {source_name} - product: {source_product_id} - link: {source_link}")
product_is_booster = False
for product_row in range(self.index_row_header_product + 1, self.product_sheet.max_row + 1):
product_id = self.product_sheet.cell(product_row, self.index_column_product_id_product).value
# print(f"found product: id {product_id}")
if product_id == source_product_id:
product_is_booster_text = str(self.product_sheet.cell(product_row, self.index_column_is_booster_product).value).upper()
# print(f"product is booster: {product_is_booster_text}, type: {str(type(product_is_booster_text))}")
product_is_booster = (product_is_booster_text == "TRUE")
break
print(f"product is booster: {product_is_booster}")
if (
(
source_name == "Chaos Cards"
and not did_restart_since_last_chaos_cards_visit
)
or (
source_name == "Games Lore"
and not did_restart_since_last_games_lore_visit
)
):
self.stop_driver()
self.setup_driver()
if not self.driver:
return
did_restart_since_last_chaos_cards_visit = True
did_restart_since_last_games_lore_visit = True
if source_name in ["Card Market", "Chaos Cards", "Games Lore", "Magic Madhouse"]:
self.clear_row_sourcing_sheet(index_row = index_row)
processed_count += 1
Cost_Fetcher_Base.log_processing_new_row(
index_row = index_row
, source_link = source_link
)
cost = None
active = None
if source_name == "Card Market":
while (time.time() - cardmarket_accessed_last_on < random.uniform(10, 20)):
time.sleep(random.uniform(3, 5))
if product_is_booster:
price_quantity_pairs = self.scrape_prices_and_quantities_selenium_cardmarket(url = source_link)
if price_quantity_pairs:
self.sourcing_sheet.cell(index_row, self.index_column_active_sourcing).value = "TRUE"
max_quantity = 0
updated_row_price = False
for price_quantity_pair in price_quantity_pairs:
eur_price = price_quantity_pair['price']
quantity = price_quantity_pair['quantity']
print(f" Found price: €{eur_price}")
print(f" Found quantity: {quantity}")
max_quantity = max(max_quantity, quantity)
if quantity >= 8:
if eur_price:
gbp_price = eur_price * self.eur_to_gbp_rate
print(f" Converted: €{eur_price:.2f} → £{gbp_price:.2f}")
self.sourcing_sheet.cell(index_row, self.index_column_unit_cost_sourcing).value = gbp_price
updated_count += 1
updated_row_price = True
print(f"output row: {index_row}, value: {self.sourcing_sheet.cell(index_row, self.index_column_unit_cost_sourcing).value}")
break
else:
print(f" Error: Could not parse price")
if not updated_row_price:
print("Offer with quantity >= 8 not found")
for price_quantity_pair in price_quantity_pairs:
eur_price = price_quantity_pair['price']
quantity = price_quantity_pair['quantity']
print(f" Found price: €{eur_price}")
print(f" Found quantity: {quantity}")
if max_quantity <= 2 or quantity == max_quantity:
if eur_price:
gbp_price = eur_price * self.eur_to_gbp_rate
print(f" Converted: €{eur_price:.2f} → £{gbp_price:.2f}")
self.sourcing_sheet.cell(index_row, self.index_column_unit_cost_sourcing).value = gbp_price
updated_count += 1
updated_row_price = True
print(f"output row: {index_row}, value: {self.sourcing_sheet.cell(index_row, self.index_column_unit_cost_sourcing).value}")
break
else:
print(f" Error: Could not parse price")
else:
cost, active = self.scrape_cost_and_active_selenium_cardmarket(url = source_link)
cardmarket_accessed_last_on = time.time()
elif source_name == "Chaos Cards":
while (time.time() - chaoscards_accessed_last_on < random.uniform(20, 30)):
time.sleep(random.uniform(3, 5))
cost, active = self.scrape_cost_and_active_selenium_chaoscards(url = source_link)
chaoscards_accessed_last_on = time.time()
did_restart_since_last_chaos_cards_visit = False
elif source_name == "Games Lore":
while (time.time() - gameslore_accessed_last_on < random.uniform(10, 20)):
time.sleep(random.uniform(3, 5))
cost, active = self.scrape_cost_and_active_selenium_gameslore(url = source_link)
gameslore_accessed_last_on = time.time()
did_restart_since_last_games_lore_visit = False
elif source_name == "Magic Madhouse":
while (time.time() - magicmadhouse_accessed_last_on < random.uniform(10, 20)):
time.sleep(random.uniform(3, 5))
cost, active = self.scrape_cost_and_active_selenium_magicmadhouse(url = source_link)
magicmadhouse_accessed_last_on = time.time()
if (cost is not None and active is not None):
print(f" Found cost: {cost}, active: {active}")
self.sourcing_sheet.cell(index_row, self.index_column_unit_cost_sourcing).value = cost
self.sourcing_sheet.cell(index_row, self.index_column_active_sourcing).value = "TRUE" if active else "FALSE"
updated_count += 1
else:
print(f" Error: Could not find cost on page")
# Save workbook
print(f"\n{'='*60}")
print(f"Saving workbook...")
self.workbook.save(Cost_Fetcher_Base.WORKBOOK_NAME)
print(f"\nComplete!")
print(f"Processed: {processed_count} entries")
print(f"Updated: {updated_count} costs")
except Exception as e:
print(f"Error: {e}")
def clear_row_sourcing_sheet(self, index_row):
self.sourcing_sheet.cell(index_row, self.index_column_unit_cost_sourcing).value = None
self.sourcing_sheet.cell(index_row, self.index_column_active_sourcing).value = "FALSE"
@staticmethod
def log_processing_new_row(index_row, source_link):
print(f"\n{'='*60}")
print(f"Processing row {index_row}: {source_link}")
print(f"{'='*60}")
def __init__(self):
print("Setting up browser automation (browser will not be visible)...")
self.setup_driver()
if not self.driver:
return
self.load_tcg_sole_trader_workbook()
self.get_eur_to_gbp_rate()
def stop_driver(self):
self.driver.quit()
def main():
cost_fetcher = Cost_Fetcher_Base()
cost_fetcher.scrape_all_costs()
cost_fetcher.stop_driver()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,267 @@
import pandas as pd
from openpyxl import load_workbook
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import re
import time
import random
ITEM_SHIPPING_COST_IN = 8
def get_eur_to_gbp_rate():
"""Fetch current EUR to GBP conversion rate"""
try:
response = requests.get('https://api.exchangerate-api.com/v4/latest/EUR', timeout=10)
data = response.json()
return data['rates']['GBP']
except Exception as e:
print(f"Error fetching exchange rate: {e}")
print("Using fallback rate: 0.85")
return 0.85
def parse_cardmarket_price(price_text):
"""Convert '141,30 €' format to float in EUR"""
if not price_text:
return None
price_clean = re.sub(r'[^\d,]', '', price_text)
price_clean = price_clean.replace(',', '.')
try:
return float(price_clean)
except ValueError:
return None
def setup_driver():
"""Setup Chrome driver with visible window"""
chrome_options = Options()
# Remove headless mode to see the browser
# chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
chrome_options.add_argument('--window-size=1920,1080')
try:
driver = webdriver.Chrome(options=chrome_options)
return driver
except Exception as e:
print(f"Error setting up Chrome driver: {e}")
print("Make sure Chrome and chromedriver are installed")
return None
def scrape_cardmarket_price_selenium(driver, url):
"""Scrape price from Card Market URL using Selenium"""
try:
print(f" Loading page...")
driver.get(url)
# Wait for page to load
time.sleep(random.uniform(10, 20))
#time.sleep(3)
print(f" Page title: {driver.title}")
# Try multiple selector strategies
price_selector = '#table > div:nth-child(1) > div.table-body > .row.article-row.g-0:nth-child(1) > div.col-offer > div.price-container > div > div:nth-child(1) > span:nth-child(1)'
try:
elements = driver.find_elements(By.CSS_SELECTOR, price_selector)
print(f" Selector: Found {len(elements)} elements")
for elem in elements[:3]: # Check first 3
text = elem.text
print(f" Text: '{text}'")
if '' in text and re.search(r'\d', text):
print(f" ✓ Found price with selector: {text}")
# input("Confirm")
return text
except Exception as e:
print(f" Selector failed: {e}")
print(f" ✗ No price found")
# input("Press Enter to continue to next URL...")
return None
except Exception as e:
print(f" Error: {e}")
return None
def main():
workbook_name = 'TCG Sole Trader Copy.xlsx'
sourcing_sheet_name = 'Sourcing'
product_sheet_name = 'Product'
print("Loading workbook...")
wb = load_workbook(workbook_name)
if sourcing_sheet_name not in wb.sheetnames:
print(f"Error: Sheet '{sourcing_sheet_name}' not found")
return
if product_sheet_name not in wb.sheetnames:
print(f"Error: Sheet '{product_sheet_name}' not found")
return
sourcing_sheet = wb[sourcing_sheet_name]
product_sheet = wb[product_sheet_name]
sourcing_table_found = False
start_row = None
for row in range(1, sourcing_sheet.max_row + 1):
if sourcing_sheet.cell(row, 1).value == 'tbl_Sourcing' or 'Source Name' in str(sourcing_sheet.cell(row, 3).value):
start_row = row + 1
sourcing_table_found = True
break
if not sourcing_table_found or not start_row:
for row in range(1, min(20, sourcing_sheet.max_row + 1)):
if 'Source Name' in str(sourcing_sheet.cell(row, 3).value):
start_row = row + 1
sourcing_table_found = True
break
start_row = None
product_table_found = False
for row in range(1, product_sheet.max_row + 1):
if product_sheet.cell(row, 1).value == 'tbl_Product' or 'Product Id' in str(product_sheet.cell(row, 1).value):
start_row = row + 1
product_table_found = True
break
if not sourcing_table_found:
print("Error: Could not find table 'tbl_Sourcing'")
return
if not product_table_found:
print("Error: Could not find table 'tbl_Product'")
return
# Find column indices
header_row = start_row - 1
source_name_col = None
source_link_col = None
source_unit_price_col = None
source_is_available_col = None
source_product_id_col = None
product_id_col = None
product_is_booster_box_col = None
product_is_precon_col = None
for col in range(1, sourcing_sheet.max_column + 1):
header = str(sourcing_sheet.cell(header_row, col).value).strip()
if 'Source Name' in header:
source_name_col = col
elif 'Source Link' in header:
source_link_col = col
elif 'Source Unit Cost' in header:
source_unit_price_col = col
elif 'Active' in header:
source_is_available_col = col
elif 'Product Id' in header:
source_product_id_col = col
for col in range(1, product_sheet.max_column + 1):
header = str(product_sheet.cell(header_row, col).value).strip()
if 'Is Booster Box' in header:
product_is_booster_box_col = col
elif 'Is Precon' in header:
product_is_precon_col = col
elif 'Product Id' in header:
product_id_col = col
print(f"Starting from row {start_row}")
print(f"Sourcing Columns - Source Name: {source_name_col}, Source Link: {source_link_col}, Source Unit Cost: {source_unit_price_col}, Active: {source_is_available_col}, Product Id: {source_product_id_col}")
print(f"Product Columns - Id: {product_id_col}, Is Booster: {product_is_booster_box_col}, Is Precon: {product_is_precon_col}")
if not all([source_name_col, source_link_col, source_unit_price_col, source_is_available_col, source_product_id_col, product_id_col, product_is_booster_box_col, product_is_precon_col]):
print("Error: Could not find required columns")
return
# Get EUR to GBP rate
eur_to_gbp = get_eur_to_gbp_rate()
print(f"Using EUR to GBP rate: {eur_to_gbp}")
# Setup Selenium driver
print("Setting up browser automation (browser will be visible)...")
driver = setup_driver()
if not driver:
return
try:
processed_count = 0
updated_count = 0
for row in range(start_row, sourcing_sheet.max_row + 1):
source_name = sourcing_sheet.cell(row, source_name_col).value
source_link = sourcing_sheet.cell(row, source_link_col).value
source_product_id = sourcing_sheet.cell(row, source_product_id_col).value
if not source_name and not source_link:
break
print(f"found source: {source_name} - product: {source_product_id} - link: {source_link}")
product_is_booster_box = False
product_is_precon = False
for product_row in range(start_row, product_sheet.max_row + 1):
product_id = product_sheet.cell(product_row, product_id_col).value
# print(f"found product: id {product_id}")
if product_id == source_product_id:
product_is_booster_box_text = str(product_sheet.cell(product_row, product_is_booster_box_col).value).upper()
product_is_booster_box = (product_is_booster_box_text == "TRUE")
product_is_precon_text = str(product_sheet.cell(product_row, product_is_precon_col).value).upper()
product_is_precon = (product_is_precon_text == "TRUE")
break
print(f"product is booster box: {product_is_booster_box}")
# Check conditions
if (
(product_is_booster_box or product_is_precon)
and source_name == "Card Market"
and source_link
and str(source_link).strip()
):
sourcing_sheet.cell(row, source_unit_price_col).value = None
sourcing_sheet.cell(row, source_is_available_col).value = "FALSE"
processed_count += 1
print(f"\n{'='*60}")
print(f"Processing row {row}: {source_link}")
print(f"{'='*60}")
# Scrape price
price_text = scrape_cardmarket_price_selenium(driver, source_link)
if price_text:
print(f" Found price: {price_text}")
# Parse and convert
eur_price = parse_cardmarket_price(price_text)
if eur_price:
gbp_price = eur_price * eur_to_gbp
print(f" Converted: €{eur_price:.2f} → £{gbp_price:.2f}")
# Update cell
sourcing_sheet.cell(row, source_unit_price_col).value = gbp_price + ITEM_SHIPPING_COST_IN
sourcing_sheet.cell(row, source_is_available_col).value = "TRUE"
updated_count += 1
else:
print(f" Error: Could not parse price")
else:
print(f" Error: Could not find price on page")
# Save workbook
print(f"\n{'='*60}")
print(f"Saving workbook...")
wb.save(workbook_name)
print(f"\nComplete!")
print(f"Processed: {processed_count} Card Market entries")
print(f"Updated: {updated_count} prices")
finally:
driver.quit()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,312 @@
import pandas as pd
from openpyxl import load_workbook
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import re
import time
import random
def setup_driver(headless=True):
"""Setup Chrome driver"""
chrome_options = Options()
if headless:
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
chrome_options.add_argument('--window-size=1920,1080')
try:
driver = webdriver.Chrome(options=chrome_options)
return driver
except Exception as e:
print(f"Error setting up Chrome driver: {e}")
print("Make sure Chrome and chromedriver are installed")
return None
def parse_price_value(text):
"""Extract numeric value from price string like '$5.50' or '€5,50'"""
if not text:
return None
# Remove currency symbols and extract numbers
cleaned = re.sub(r'[^\d,.\-]', '', text)
# Replace comma with period for decimal
cleaned = cleaned.replace(',', '.')
try:
return float(cleaned)
except ValueError:
return None
def scrape_mtg_stocks_values(driver, url):
"""Scrape expected value and market value from MTG Stocks"""
try:
print(f" Loading page...")
driver.get(url)
# Wait for table to load
time.sleep(random.uniform(10, 20))
# Valid booster types to match
valid_play_booster_types = [
'Play Booster Pack',
'Set Booster Pack',
'Booster Pack',
'Play Booster',
'Set Booster',
'Booster'
]
valid_collector_booster_types = [
'Collector Booster Pack',
'Collector Booster'
]
# Find all rows in the table
row_selector = 'mtg-sets-expected-value > mtg-product-tree > .table-responsive > table > tbody:nth-child(2) > tr'
rows = driver.find_elements(By.CSS_SELECTOR, row_selector)
print(f" Found {len(rows)} rows in table")
found_play = False
found_collector = False
play_expected_value = None
play_market_value = None
collector_expected_value = None
collector_market_value = None
for row in rows:
try:
# Get the booster type from first column
booster_type_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(1) > div.d-flex.align-items-center:nth-child(1) > a:nth-child(2)')
booster_type = booster_type_elem.text.strip()
print(f" Checking row: '{booster_type}'")
if booster_type in valid_play_booster_types and found_play == False:
print(f" ✓ Match found: '{booster_type}'")
found_play = True
# Get expected value (3rd column)
expected_value_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(3)')
expected_value_text = expected_value_elem.text.strip()
# Get market value (5th column)
market_value_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(5)')
market_value_text = market_value_elem.text.strip()
print(f" Expected Value: '{expected_value_text}'")
print(f" Market Value: '{market_value_text}'")
# Parse values
play_expected_value = parse_price_value(expected_value_text)
play_market_value = parse_price_value(market_value_text)
if booster_type in valid_collector_booster_types and found_collector == False:
print(f" ✓ Match found: '{booster_type}'")
found_collector = True
# Get expected value (3rd column)
expected_value_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(3)')
expected_value_text = expected_value_elem.text.strip()
# Get market value (5th column)
market_value_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(5)')
market_value_text = market_value_elem.text.strip()
print(f" Expected Value: '{expected_value_text}'")
print(f" Market Value: '{market_value_text}'")
# Parse values
collector_expected_value = parse_price_value(expected_value_text)
collector_market_value = parse_price_value(market_value_text)
if found_play and found_collector:
return {
'play_expected_value': play_expected_value,
'play_market_value': play_market_value,
'collector_expected_value': collector_expected_value,
'collector_market_value': collector_market_value,
'found_play': True,
'found_collector': True
}
except Exception as e:
# Row doesn't match structure, continue to next
continue
print(f" ✗ No matching booster type found")
return {
'play_expected_value': play_expected_value,
'play_market_value': play_market_value,
'collector_expected_value': collector_expected_value,
'collector_market_value': collector_market_value,
'found_play': found_play,
'found_collector': found_collector
}
except Exception as e:
print(f" Error: {e}")
return {
'play_expected_value': play_expected_value,
'play_market_value': play_market_value,
'collector_expected_value': collector_expected_value,
'collector_market_value': collector_market_value,
'found_play': found_play,
'found_collector': found_collector
}
def main():
workbook_name = 'TCG Sole Trader Copy.xlsx'
sheet_name = 'MTG Set'
print("Loading workbook...")
wb = load_workbook(workbook_name)
if sheet_name not in wb.sheetnames:
print(f"Error: Sheet '{sheet_name}' not found")
return
sheet = wb[sheet_name]
# Find table boundaries and columns
table_found = False
start_row = None
header_row = None
# Search for table header
print("max sheet column: ", str(sheet.max_column))
for row in range(2, max(50, sheet.max_row + 1)):
cell_value = str(sheet.cell(row, 1).value)
# Check multiple columns for table indicators
for col in range(1, max(10, sheet.max_column + 1)):
cell_value = str(sheet.cell(row, col).value)
if 'EV MTG Stocks Link' in cell_value:
header_row = row
start_row = row + 1
table_found = True
break
if table_found:
break
if not table_found:
print("Error: Could not find 'EV MTG Stocks Link' column")
return
print(f"Found table header at row {header_row}")
print(f"Starting from row {start_row}")
# Find column indices
ev_link_col = None
play_expected_value_col = None
play_market_value_col = None
collector_expected_value_col = None
collector_market_value_col = None
for col in range(1, sheet.max_column + 1):
header = str(sheet.cell(header_row, col).value).strip()
if 'EV MTG Stocks Link' in header:
ev_link_col = col
elif 'Play Booster Expected Market Value' in header:
play_expected_value_col = col
elif 'Play Boost Sealed Market Value' in header:
play_market_value_col = col
elif 'Collector Booster Expected Market Value' in header:
collector_expected_value_col = col
elif 'Collector Boost Sealed Market Value' in header:
collector_market_value_col = col
print(f"Columns - EV Link: {ev_link_col}, Play Expected Value: {play_expected_value_col}, Play Market Value: {play_market_value_col}, Collector Expected Value: {collector_expected_value_col}, Collector Market Value: {collector_market_value_col}")
if not all([ev_link_col, play_expected_value_col, play_market_value_col, collector_expected_value_col, collector_market_value_col]):
print("Error: Could not find all required columns")
print(f" EV MTG Stocks Link: {'Found' if ev_link_col else 'NOT FOUND'}")
print(f" Play Booster Expected Market Value: {'Found' if play_expected_value_col else 'NOT FOUND'}")
print(f" Play Boost Sealed Market Value: {'Found' if play_market_value_col else 'NOT FOUND'}")
print(f" Collector Booster Expected Market Value: {'Found' if collector_expected_value_col else 'NOT FOUND'}")
print(f" Collector Boost Sealed Market Value: {'Found' if collector_market_value_col else 'NOT FOUND'}")
return
# Setup Selenium driver
print("Setting up browser automation...")
driver = setup_driver(headless=False) # Set to False to see browser
if not driver:
return
try:
# Process rows
processed_count = 0
updated_count = 0
play_cleared_count = 0
collector_cleared_count = 0
for row in range(start_row, sheet.max_row + 1):
ev_link = sheet.cell(row, ev_link_col).value
# Check if row is empty
if not ev_link:
# Check if we've passed the end of the table
empty_count = 0
for check_col in range(1, min(10, sheet.max_column + 1)):
if not sheet.cell(row, check_col).value:
empty_count += 1
if empty_count >= 5: # If most columns are empty, assume end of table
break
continue
processed_count += 1
print(f"\n{'='*80}")
print(f"Processing row {row}: {ev_link}")
print(f"{'='*80}")
# Scrape values
result = scrape_mtg_stocks_values(driver, ev_link)
if result['found_play']:
# Update cells with found values
sheet.cell(row, play_expected_value_col).value = result['play_expected_value']
sheet.cell(row, play_market_value_col).value = result['play_market_value']
updated_count += 1
print(f" ✓ Updated - Expected: {result['play_expected_value']}, Market: {result['play_market_value']}")
else:
# Clear cells - no matching booster type found
sheet.cell(row, play_expected_value_col).value = ''
sheet.cell(row, play_market_value_col).value = ''
play_cleared_count += 1
print(f" ✗ Cleared values - no matching booster type found")
if result['found_collector']:
# Update cells with found values
sheet.cell(row, collector_expected_value_col).value = result['collector_expected_value']
sheet.cell(row, collector_market_value_col).value = result['collector_market_value']
updated_count += 1
print(f" ✓ Updated - Expected: {result['collector_expected_value']}, Market: {result['collector_market_value']}")
else:
# Clear cells - no matching booster type found
sheet.cell(row, collector_expected_value_col).value = ''
sheet.cell(row, collector_market_value_col).value = ''
collector_cleared_count += 1
print(f" ✗ Cleared values - no matching booster type found")
# Small delay between requests
time.sleep(random.uniform(10, 20))
# Save workbook
print(f"\n{'='*80}")
print(f"Saving workbook...")
wb.save(workbook_name)
print(f"\nComplete!")
print(f"Processed: {processed_count} entries")
print(f"Updated: {updated_count} entries")
print(f"Play fields cleared: {play_cleared_count} entries (no matching data)")
print(f"Collector fields cleared: {collector_cleared_count} entries (no matching data)")
finally:
driver.quit()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,328 @@
import pandas as pd
from openpyxl import load_workbook
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import re
import time
import random
from datetime import datetime
def get_eur_to_gbp_rate():
"""Fetch current EUR to GBP conversion rate"""
try:
response = requests.get('https://api.exchangerate-api.com/v4/latest/EUR', timeout=10)
data = response.json()
return data['rates']['GBP']
except Exception as e:
print(f"Error fetching exchange rate: {e}")
print("Using fallback rate: 0.85")
return 0.85
def parse_cardmarket_price(price_text):
"""Convert '141,30 €' format to float in EUR"""
if not price_text:
return None
price_clean = re.sub(r'[^\d,]', '', price_text)
price_clean = price_clean.replace(',', '.')
try:
return float(price_clean)
except ValueError:
return None
def parse_cardmarket_quantity(quantity_text):
if not quantity_text:
return None
try:
return float(quantity_text)
except ValueError:
return None
def setup_driver():
"""Setup Chrome driver with visible window"""
chrome_options = Options()
# Remove headless mode to see the browser
# chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
chrome_options.add_argument('--window-size=1920,1080')
try:
driver = webdriver.Chrome(options=chrome_options)
return driver
except Exception as e:
print(f"Error setting up Chrome driver: {e}")
print("Make sure Chrome and chromedriver are installed")
return None
def scrape_cardmarket_prices_and_quantities_selenium(driver, url):
try:
print(f" Loading page...")
driver.get(url)
# Wait for page to load + human reading time
time.sleep(random.uniform(20, 30))
print(f" Page title: {driver.title}")
offer_container_selector = '#table > div:nth-child(1) > div.table-body > .row.article-row.g-0:nth-child(1) > div.col-offer'
price_selector = 'div.price-container > div > div:nth-child(1) > span:nth-child(1)'
quantity_selector = 'div.amount-container > span:nth-child(1)'
price_quantity_pairs = []
try:
offer_containers = driver.find_elements(By.CSS_SELECTOR, offer_container_selector)
print(f" Offer container selector: Found {len(offer_containers)} elements")
for offer_container in offer_containers:
price_element = offer_container.find_element(By.CSS_SELECTOR, price_selector)
price_text = price_element.text
if '' in price_text and re.search(r'\d', price_text):
print(f" ✓ Found price: {price_text}")
else:
price_text = None
quantity_element = offer_container.find_element(By.CSS_SELECTOR, quantity_selector)
quantity_text = quantity_element.text
if price_text is None or quantity_text is None:
continue
price_quantity_pairs.append({
'price': parse_cardmarket_price(price_text = price_text)
, 'quantity': parse_cardmarket_quantity(quantity_text = quantity_text)
})
except Exception as e:
print(f" Price selector failed: {e}")
return []
"""
if price_text is None:
print(f" ✗ No price found")
if quantity_text is not None:
input("Press Enter to continue to next URL...")
if quantity_text is None:
print(f" ✗ No quantity found")
input("Press Enter to continue to next URL...")
"""
return price_quantity_pairs # price_text, quantity_text
except Exception as e:
print(f" Error: {e}")
return []
def main():
workbook_name = 'TCG Sole Trader Copy.xlsx'
sourcing_sheet_name = 'Sourcing'
# mtg_set_sheet_name = 'MTG Set'
product_sheet_name = 'Product'
print("Loading workbook...")
wb = load_workbook(workbook_name)
if sourcing_sheet_name not in wb.sheetnames:
print(f"Error: Sheet '{sourcing_sheet_name}' not found")
return
"""
if mtg_set_sheet_name not in wb.sheetnames:
print(f"Error: Sheet '{mtg_set_sheet_name}' not found")
return
"""
if product_sheet_name not in wb.sheetnames:
print(f"Error: Sheet '{product_sheet_name}' not found")
return
sourcing_sheet = wb[sourcing_sheet_name]
# mtg_set_sheet = wb[mtg_set_sheet_name]
product_sheet = wb[product_sheet_name]
sourcing_table_found = False
start_row = None
for row in range(1, sourcing_sheet.max_row + 1):
if sourcing_sheet.cell(row, 1).value == 'tbl_Sourcing' or 'Source Name' in str(sourcing_sheet.cell(row, 3).value):
start_row = row + 1
sourcing_table_found = True
break
if not sourcing_table_found or not start_row:
for row in range(1, min(20, sourcing_sheet.max_row + 1)):
if 'Source Name' in str(sourcing_sheet.cell(row, 2).value):
start_row = row + 1
sourcing_table_found = True
break
"""
start_row = None
# mtg_set_table_found = False
for row in range(1, mtg_set_sheet.max_row + 1):
if mtg_set_sheet.cell(row, 1).value == 'tbl_MTG_Set' or 'Set Name' in str(mtg_set_sheet.cell(row, 2).value):
start_row = row + 1
mtg_set_table_found = True
break
"""
start_row = None
product_table_found = False
for row in range(1, product_sheet.max_row + 1):
if product_sheet.cell(row, 1).value == 'tbl_Product' or 'Product Id' in str(product_sheet.cell(row, 1).value):
start_row = row + 1
product_table_found = True
break
if not sourcing_table_found:
print("Error: Could not find table 'tbl_Sourcing' or 'Source Name' column")
return
"""
if not mtg_set_table_found:
print("Error: Could not find table 'tbl_MTG_Set' or 'Set Name' column")
return
"""
if not product_table_found:
print("Error: Could not find table 'tbl_Product' or 'Product Id' column")
return
header_row = start_row - 1
source_name_col = None
source_link_col = None
source_unit_price_col = None
source_is_available_col = None
source_product_id_col = None
product_id_col = None
product_is_booster_col = None
for col in range(1, sourcing_sheet.max_column + 1):
header = str(sourcing_sheet.cell(header_row, col).value).strip()
if 'Source Name' in header:
source_name_col = col
elif 'Source Link' in header:
source_link_col = col
elif 'Sale Price' in header:
source_unit_price_col = col
elif 'Active' in header:
source_is_available_col = col
elif 'Product Id' in header:
source_product_id_col = col
for col in range(1, product_sheet.max_column + 1):
header = str(product_sheet.cell(header_row, col).value).strip()
if 'Is Booster' in header:
product_is_booster_col = col
elif 'Product Id' in header:
product_id_col = col
print(f"Starting from row {start_row}")
print(f"Sourcing Columns - Source Name: {source_name_col}, Source Link: {source_link_col}, Source Unit Cost: {source_unit_price_col}, Active: {source_is_available_col}, Product Id: {source_product_id_col}")
print(f"Product Columns - Id: {product_id_col}, Is Booster: {product_is_booster_col}")
if not all([source_name_col, source_link_col, source_unit_price_col, source_is_available_col, source_product_id_col, product_id_col, product_is_booster_col]):
print("Error: Could not find required columns")
return
# Get EUR to GBP rate
eur_to_gbp = get_eur_to_gbp_rate()
print(f"Using EUR to GBP rate: {eur_to_gbp}")
# Setup Selenium driver
print("Setting up browser automation (browser will be visible)...")
driver = setup_driver()
if not driver:
return
try:
processed_count = 0
updated_count = 0
for row in range(start_row, sourcing_sheet.max_row + 1):
source_name = sourcing_sheet.cell(row, source_name_col).value
source_link = sourcing_sheet.cell(row, source_link_col).value
source_product_id = sourcing_sheet.cell(row, source_product_id_col).value
# Check if row is empty
if not source_name or not source_link or not source_product_id:
continue
print(f"found source: {source_name} - product: {source_product_id} - link: {source_link}")
product_is_booster = False
for product_row in range(start_row, product_sheet.max_row + 1):
product_id = product_sheet.cell(product_row, product_id_col).value
# print(f"found product: id {product_id}")
if product_id == source_product_id:
product_is_booster_text = str(product_sheet.cell(product_row, product_is_booster_col).value).upper()
# print(f"product is booster: {product_is_booster_text}, type: {str(type(product_is_booster_text))}")
product_is_booster = (product_is_booster_text == "TRUE")
break
print(f"product is booster: {product_is_booster}")
# Check conditions
if product_is_booster and source_name == "Card Market" and source_link and str(source_link).strip():
sourcing_sheet.cell(row, source_unit_price_col).value = None
# sourcing_sheet.cell(row, source_is_available_col).value = "FALSE"
processed_count += 1
print(f"\n{'='*60}")
print(f"Processing row {row}: {source_link}")
print(f"{'='*60}")
# Scrape price
price_quantity_pairs = scrape_cardmarket_prices_and_quantities_selenium(driver, source_link)
if price_quantity_pairs:
# sourcing_sheet.cell(row, source_is_available_col).value = "TRUE"
max_quantity = 0
updated_row_price = False
for price_quantity_pair in price_quantity_pairs:
eur_price = price_quantity_pair['price']
quantity = price_quantity_pair['quantity']
print(f" Found price: €{eur_price}")
print(f" Found quantity: {quantity}")
max_quantity = max(max_quantity, quantity)
if quantity >= 8:
if eur_price:
gbp_price = eur_price * eur_to_gbp
print(f" Converted: €{eur_price:.2f} → £{gbp_price:.2f}")
sourcing_sheet.cell(row, source_unit_price_col).value = gbp_price
updated_count += 1
updated_row_price = True
print(f"output row: {row}, value: {sourcing_sheet.cell(row, source_unit_price_col).value}")
break
else:
print(f" Error: Could not parse price")
if not updated_row_price:
print("Offer with quantity >= 8 not found")
for price_quantity_pair in price_quantity_pairs:
eur_price = price_quantity_pair['price']
quantity = price_quantity_pair['quantity']
print(f" Found price: €{eur_price}")
print(f" Found quantity: {quantity}")
if max_quantity <= 2 or quantity == max_quantity:
if eur_price:
gbp_price = eur_price * eur_to_gbp
print(f" Converted: €{eur_price:.2f} → £{gbp_price:.2f}")
sourcing_sheet.cell(row, source_unit_price_col).value = gbp_price
updated_count += 1
updated_row_price = True
print(f"output row: {row}, value: {sourcing_sheet.cell(row, source_unit_price_col).value}")
break
else:
print(f" Error: Could not parse price")
else:
print(f" Error: Could not find price on page")
# Save workbook
print(f"\n{'='*60}")
print(f"Saving workbook...")
wb.save(workbook_name)
print(f"\nComplete!")
print(f"Processed: {processed_count} Card Market entries")
print(f"Updated: {updated_count} prices")
print(datetime.now())
finally:
driver.quit()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,241 @@
import pandas as pd
from openpyxl import load_workbook
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import re
import time
import random
ITEM_SHIPPING_COST_IN = 8
def parse_chaoscards_price(price_text):
"""Convert '141,30 €' format to float in EUR"""
if not price_text:
return None
price_clean = re.sub(r'[^\d,]', '', price_text)
try:
return float(price_clean) / 100
except ValueError:
return None
def setup_driver():
"""Setup Chrome driver with visible window"""
chrome_options = Options()
# Remove headless mode to see the browser
# chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
chrome_options.add_argument('--window-size=1920,1080')
try:
driver = webdriver.Chrome(options=chrome_options)
return driver
except Exception as e:
print(f"Error setting up Chrome driver: {e}")
print("Make sure Chrome and chromedriver are installed")
return None
def scrape_chaoscards_price_selenium(driver, url):
try:
print(f" Loading page...")
driver.get(url)
time.sleep(random.uniform(5, 10))
print(f" Page title: {driver.title}")
price_selector = '.price_inc > span:nth-child(2)'
price = None
try:
element = driver.find_element(By.CSS_SELECTOR, price_selector)
text = element.text
print(f" Text: '{text}'")
price = parse_chaoscards_price(text)
except Exception as e:
print(f" Selector failed: {e}")
active_selector = '.product__right > form > ul.prod_det_fields.left.product-section.product-section--stock > li > div:nth-child(1) > div:nth-child(2)'
active = None
try:
element = driver.find_element(By.CSS_SELECTOR, active_selector)
text = element.text
print(f" Text: '{text}'")
active = (text != "Out of stock")
except Exception as e:
print(f" Selector failed: {e}")
if price is None or active is None:
print(f" ✗ No price found")
input("Press Enter to continue to next URL...")
return price, active
except Exception as e:
print(f" Error: {e}")
return None, None
def main():
workbook_name = 'TCG Sole Trader Copy.xlsx'
sourcing_sheet_name = 'Sourcing'
# product_sheet_name = 'Product'
print("Loading workbook...")
wb = load_workbook(workbook_name)
if sourcing_sheet_name not in wb.sheetnames:
print(f"Error: Sheet '{sourcing_sheet_name}' not found")
return
"""
if product_sheet_name not in wb.sheetnames:
print(f"Error: Sheet '{product_sheet_name}' not found")
return
"""
sourcing_sheet = wb[sourcing_sheet_name]
# product_sheet = wb[product_sheet_name]
sourcing_table_found = False
start_row = None
for row in range(1, sourcing_sheet.max_row + 1):
if sourcing_sheet.cell(row, 1).value == 'tbl_Sourcing' or 'Source Name' in str(sourcing_sheet.cell(row, 3).value):
start_row = row + 1
sourcing_table_found = True
break
if not sourcing_table_found or not start_row:
for row in range(1, min(20, sourcing_sheet.max_row + 1)):
if 'Source Name' in str(sourcing_sheet.cell(row, 3).value):
start_row = row + 1
sourcing_table_found = True
break
"""
start_row = None
product_table_found = False
for row in range(1, product_sheet.max_row + 1):
if product_sheet.cell(row, 1).value == 'tbl_Product' or 'Product Id' in str(product_sheet.cell(row, 1).value):
start_row = row + 1
product_table_found = True
break
"""
if not sourcing_table_found:
print("Error: Could not find table 'tbl_Sourcing'")
return
"""
if not product_table_found:
print("Error: Could not find table 'tbl_Product'")
return
"""
# Find column indices
header_row = start_row - 1
source_name_col = None
source_link_col = None
source_unit_price_col = None
source_is_available_col = None
source_product_id_col = None
"""
product_id_col = None
product_is_booster_box_col = None
product_is_precon_col = None
"""
for col in range(1, sourcing_sheet.max_column + 1):
header = str(sourcing_sheet.cell(header_row, col).value).strip()
if 'Source Name' in header:
source_name_col = col
elif 'Source Link' in header:
source_link_col = col
elif 'Source Unit Cost' in header:
source_unit_price_col = col
elif 'Active' in header:
source_is_available_col = col
elif 'Product Id' in header:
source_product_id_col = col
"""
for col in range(1, product_sheet.max_column + 1):
header = str(product_sheet.cell(header_row, col).value).strip()
if 'Is Booster Box' in header:
product_is_booster_box_col = col
elif 'Is Precon' in header:
product_is_precon_col = col
elif 'Product Id' in header:
product_id_col = col
"""
print(f"Starting from row {start_row}")
print(f"Sourcing Columns - Source Name: {source_name_col}, Source Link: {source_link_col}, Source Unit Cost: {source_unit_price_col}, Active: {source_is_available_col}, Product Id: {source_product_id_col}")
# print(f"Product Columns - Id: {product_id_col}, Is Booster: {product_is_booster_box_col}, Is Precon: {product_is_precon_col}")
if not all([source_name_col, source_link_col, source_unit_price_col, source_is_available_col, source_product_id_col]): # , product_id_col, product_is_booster_box_col, product_is_precon_col]):
print("Error: Could not find required columns")
return
# Setup Selenium driver
print("Setting up browser automation (browser will be visible)...")
driver = setup_driver()
if not driver:
return
try:
processed_count = 0
updated_count = 0
for row in range(start_row, sourcing_sheet.max_row + 1):
driver.quit()
driver = setup_driver()
if not driver:
return
source_name = sourcing_sheet.cell(row, source_name_col).value
source_link = sourcing_sheet.cell(row, source_link_col).value
source_product_id = sourcing_sheet.cell(row, source_product_id_col).value
if not source_name and not source_link:
break
print(f"found source: {source_name} - product: {source_product_id} - link: {source_link}")
if (
source_name == "Chaos Cards"
and source_link
and str(source_link).strip()
):
sourcing_sheet.cell(row, source_unit_price_col).value = None
sourcing_sheet.cell(row, source_is_available_col).value = "FALSE"
processed_count += 1
print(f"\n{'='*60}")
print(f"Processing row {row}: {source_link}")
print(f"{'='*60}")
# Scrape price
gbp_price, active = scrape_chaoscards_price_selenium(driver, source_link)
if (gbp_price is not None and active is not None):
print(f" Found price: {gbp_price}, active: {active}")
if gbp_price:
sourcing_sheet.cell(row, source_unit_price_col).value = gbp_price + ITEM_SHIPPING_COST_IN
sourcing_sheet.cell(row, source_is_available_col).value = "TRUE" if active else "FALSE"
updated_count += 1
else:
print(f" Error: Could not parse price")
else:
print(f" Error: Could not find price on page")
# Save workbook
print(f"\n{'='*60}")
print(f"Saving workbook...")
wb.save(workbook_name)
print(f"\nComplete!")
print(f"Processed: {processed_count} Chaos Cards entries")
print(f"Updated: {updated_count} prices")
finally:
driver.quit()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,235 @@
import pandas as pd
from openpyxl import load_workbook
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import re
import time
import random
ITEM_SHIPPING_COST_IN = 8
def parse_gameslore_price(price_text):
if not price_text:
return None
price_clean = re.sub(r'[^\d,]', '', price_text)
try:
return float(price_clean) / 100
except ValueError:
return None
def setup_driver():
"""Setup Chrome driver with visible window"""
chrome_options = Options()
# Remove headless mode to see the browser
# chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
chrome_options.add_argument('--window-size=1920,1080')
try:
driver = webdriver.Chrome(options=chrome_options)
return driver
except Exception as e:
print(f"Error setting up Chrome driver: {e}")
print("Make sure Chrome and chromedriver are installed")
return None
def scrape_gameslore_price_selenium(driver, url):
try:
print(f" Loading page...")
driver.get(url)
time.sleep(random.uniform(10, 20))
print(f" Page title: {driver.title}")
price_selector = 'div.columns > div.column.main > div.product-info-main > div.product-info-price > div.price-box > span.special-price > span.price-container > span.price-wrapper > span.price'
price = None
try:
element = driver.find_element(By.CSS_SELECTOR, price_selector)
text = element.text
print(f" Text: '{text}'")
price = parse_gameslore_price(text)
except Exception as e:
print(f" Selector failed: {e}")
active_selector = '.stock > span:nth-child(1)'
active = None
try:
element = driver.find_element(By.CSS_SELECTOR, active_selector)
text = element.text
print(f" ✓ Found stock availability with selector: {text}")
active = (text != "OUT OF STOCK")
except Exception as e:
print(f" Selector failed: {e}")
if price is None or active is None:
print(f" ✗ No price found")
input("Press Enter to continue to next URL...")
return price, active
except Exception as e:
print(f" Error: {e}")
return None, None
def main():
workbook_name = 'TCG Sole Trader Copy.xlsx'
sourcing_sheet_name = 'Sourcing'
# product_sheet_name = 'Product'
print("Loading workbook...")
wb = load_workbook(workbook_name)
if sourcing_sheet_name not in wb.sheetnames:
print(f"Error: Sheet '{sourcing_sheet_name}' not found")
return
"""
if product_sheet_name not in wb.sheetnames:
print(f"Error: Sheet '{product_sheet_name}' not found")
return
"""
sourcing_sheet = wb[sourcing_sheet_name]
# product_sheet = wb[product_sheet_name]
sourcing_table_found = False
start_row = None
for row in range(1, sourcing_sheet.max_row + 1):
if sourcing_sheet.cell(row, 1).value == 'tbl_Sourcing' or 'Source Name' in str(sourcing_sheet.cell(row, 3).value):
start_row = row + 1
sourcing_table_found = True
break
if not sourcing_table_found or not start_row:
for row in range(1, min(20, sourcing_sheet.max_row + 1)):
if 'Source Name' in str(sourcing_sheet.cell(row, 3).value):
start_row = row + 1
sourcing_table_found = True
break
"""
start_row = None
product_table_found = False
for row in range(1, product_sheet.max_row + 1):
if product_sheet.cell(row, 1).value == 'tbl_Product' or 'Product Id' in str(product_sheet.cell(row, 1).value):
start_row = row + 1
product_table_found = True
break
"""
if not sourcing_table_found:
print("Error: Could not find table 'tbl_Sourcing'")
return
"""
if not product_table_found:
print("Error: Could not find table 'tbl_Product'")
return
"""
# Find column indices
header_row = start_row - 1
source_name_col = None
source_link_col = None
source_unit_price_col = None
source_is_available_col = None
source_product_id_col = None
"""
product_id_col = None
product_is_booster_box_col = None
product_is_precon_col = None
"""
for col in range(1, sourcing_sheet.max_column + 1):
header = str(sourcing_sheet.cell(header_row, col).value).strip()
if 'Source Name' in header:
source_name_col = col
elif 'Source Link' in header:
source_link_col = col
elif 'Source Unit Cost' in header:
source_unit_price_col = col
elif 'Active' in header:
source_is_available_col = col
elif 'Product Id' in header:
source_product_id_col = col
"""
for col in range(1, product_sheet.max_column + 1):
header = str(product_sheet.cell(header_row, col).value).strip()
if 'Is Booster Box' in header:
product_is_booster_box_col = col
elif 'Is Precon' in header:
product_is_precon_col = col
elif 'Product Id' in header:
product_id_col = col
"""
print(f"Starting from row {start_row}")
print(f"Sourcing Columns - Source Name: {source_name_col}, Source Link: {source_link_col}, Source Unit Cost: {source_unit_price_col}, Active: {source_is_available_col}, Product Id: {source_product_id_col}")
# print(f"Product Columns - Id: {product_id_col}, Is Booster: {product_is_booster_box_col}, Is Precon: {product_is_precon_col}")
if not all([source_name_col, source_link_col, source_unit_price_col, source_is_available_col, source_product_id_col]): # , product_id_col, product_is_booster_box_col, product_is_precon_col]):
print("Error: Could not find required columns")
return
# Setup Selenium driver
print("Setting up browser automation (browser will be visible)...")
driver = setup_driver()
if not driver:
return
try:
processed_count = 0
updated_count = 0
for row in range(start_row, sourcing_sheet.max_row + 1):
source_name = sourcing_sheet.cell(row, source_name_col).value
source_link = sourcing_sheet.cell(row, source_link_col).value
source_product_id = sourcing_sheet.cell(row, source_product_id_col).value
if not source_name and not source_link:
break
print(f"found source: {source_name} - product: {source_product_id} - link: {source_link}")
if (
source_name == "Games Lore"
and source_link
and str(source_link).strip()
):
sourcing_sheet.cell(row, source_unit_price_col).value = None
sourcing_sheet.cell(row, source_is_available_col).value = "FALSE"
processed_count += 1
print(f"\n{'='*60}")
print(f"Processing row {row}: {source_link}")
print(f"{'='*60}")
# Scrape price
gbp_price, active = scrape_gameslore_price_selenium(driver, source_link)
if (gbp_price is not None and active is not None):
print(f" Found price: {gbp_price}, active: {active}")
if gbp_price:
sourcing_sheet.cell(row, source_unit_price_col).value = gbp_price + ITEM_SHIPPING_COST_IN
sourcing_sheet.cell(row, source_is_available_col).value = "TRUE" if active else "FALSE"
updated_count += 1
else:
print(f" Error: Could not parse price")
else:
print(f" Error: Could not find price on page")
# Save workbook
print(f"\n{'='*60}")
print(f"Saving workbook...")
wb.save(workbook_name)
print(f"\nComplete!")
print(f"Processed: {processed_count} Games Lore entries")
print(f"Updated: {updated_count} prices")
finally:
driver.quit()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,235 @@
import pandas as pd
from openpyxl import load_workbook
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import re
import time
import random
ITEM_SHIPPING_COST_IN = 8
def parse_magicmadhouse_price(price_text):
"""Convert '141,30 €' format to float in EUR"""
if not price_text:
return None
price_clean = re.sub(r'[^\d,]', '', price_text)
price_clean = price_clean.replace(',', '')
try:
return float(price_clean) / 100
except ValueError:
return None
def setup_driver():
"""Setup Chrome driver with visible window"""
chrome_options = Options()
# Remove headless mode to see the browser
# chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
chrome_options.add_argument('--window-size=1920,1080')
try:
driver = webdriver.Chrome(options=chrome_options)
return driver
except Exception as e:
print(f"Error setting up Chrome driver: {e}")
print("Make sure Chrome and chromedriver are installed")
return None
def scrape_magicmadhouse_price_selenium(driver, url):
try:
print(f" Loading page...")
driver.get(url)
time.sleep(random.uniform(10, 20))
print(f" Page title: {driver.title}")
price_selector = 'div.body > div.container > div > div.productView > section.productView-details > div.productView-options > form > div.productView-options-selections > div.productView-product > div.productView-info > div.price-rating > div.productView-price > div.price-section.actual-price > span.price'
price = None
try:
element = driver.find_element(By.CSS_SELECTOR, price_selector)
text = element.text
print(f" Text: '{text}'")
price = parse_magicmadhouse_price(text)
except Exception as e:
print(f" Selector failed: {e}")
active_selector = '.alertBox.alertBox--error'
active = False
try:
element = driver.find_element(By.CSS_SELECTOR, active_selector)
active = False
except Exception as e:
active = True
if price is None or active is None:
print(f" ✗ No out of stock item found")
input("Press Enter to continue to next URL...")
return price, active
except Exception as e:
print(f" Error: {e}")
return None, None
def main():
workbook_name = 'TCG Sole Trader Copy.xlsx'
sourcing_sheet_name = 'Sourcing'
# product_sheet_name = 'Product'
print("Loading workbook...")
wb = load_workbook(workbook_name)
if sourcing_sheet_name not in wb.sheetnames:
print(f"Error: Sheet '{sourcing_sheet_name}' not found")
return
"""
if product_sheet_name not in wb.sheetnames:
print(f"Error: Sheet '{product_sheet_name}' not found")
return
"""
sourcing_sheet = wb[sourcing_sheet_name]
# product_sheet = wb[product_sheet_name]
sourcing_table_found = False
start_row = None
for row in range(1, sourcing_sheet.max_row + 1):
if sourcing_sheet.cell(row, 1).value == 'tbl_Sourcing' or 'Source Name' in str(sourcing_sheet.cell(row, 3).value):
start_row = row + 1
sourcing_table_found = True
break
if not sourcing_table_found or not start_row:
for row in range(1, min(20, sourcing_sheet.max_row + 1)):
if 'Source Name' in str(sourcing_sheet.cell(row, 3).value):
start_row = row + 1
sourcing_table_found = True
break
"""
start_row = None
product_table_found = False
for row in range(1, product_sheet.max_row + 1):
if product_sheet.cell(row, 1).value == 'tbl_Product' or 'Product Id' in str(product_sheet.cell(row, 1).value):
start_row = row + 1
product_table_found = True
break
"""
if not sourcing_table_found:
print("Error: Could not find table 'tbl_Sourcing'")
return
"""
if not product_table_found:
print("Error: Could not find table 'tbl_Product'")
return
"""
# Find column indices
header_row = start_row - 1
source_name_col = None
source_link_col = None
source_unit_price_col = None
source_is_available_col = None
source_product_id_col = None
"""
product_id_col = None
product_is_booster_box_col = None
product_is_precon_col = None
"""
for col in range(1, sourcing_sheet.max_column + 1):
header = str(sourcing_sheet.cell(header_row, col).value).strip()
if 'Source Name' in header:
source_name_col = col
elif 'Source Link' in header:
source_link_col = col
elif 'Source Unit Cost' in header:
source_unit_price_col = col
elif 'Active' in header:
source_is_available_col = col
elif 'Product Id' in header:
source_product_id_col = col
"""
for col in range(1, product_sheet.max_column + 1):
header = str(product_sheet.cell(header_row, col).value).strip()
if 'Is Booster Box' in header:
product_is_booster_box_col = col
elif 'Is Precon' in header:
product_is_precon_col = col
elif 'Product Id' in header:
product_id_col = col
"""
print(f"Starting from row {start_row}")
print(f"Sourcing Columns - Source Name: {source_name_col}, Source Link: {source_link_col}, Source Unit Cost: {source_unit_price_col}, Active: {source_is_available_col}, Product Id: {source_product_id_col}")
# print(f"Product Columns - Id: {product_id_col}, Is Booster: {product_is_booster_box_col}, Is Precon: {product_is_precon_col}")
if not all([source_name_col, source_link_col, source_unit_price_col, source_is_available_col, source_product_id_col]): # , product_id_col, product_is_booster_box_col, product_is_precon_col]):
print("Error: Could not find required columns")
return
# Setup Selenium driver
print("Setting up browser automation (browser will be visible)...")
driver = setup_driver()
if not driver:
return
try:
processed_count = 0
updated_count = 0
for row in range(start_row, sourcing_sheet.max_row + 1):
source_name = sourcing_sheet.cell(row, source_name_col).value
source_link = sourcing_sheet.cell(row, source_link_col).value
source_product_id = sourcing_sheet.cell(row, source_product_id_col).value
if not source_name and not source_link:
break
print(f"found source: {source_name} - product: {source_product_id} - link: {source_link}")
if (
source_name == "Magic Madhouse"
and source_link
and str(source_link).strip()
):
sourcing_sheet.cell(row, source_unit_price_col).value = None
sourcing_sheet.cell(row, source_is_available_col).value = "FALSE"
processed_count += 1
print(f"\n{'='*60}")
print(f"Processing row {row}: {source_link}")
print(f"{'='*60}")
# Scrape price
gbp_price, active = scrape_magicmadhouse_price_selenium(driver, source_link)
if (gbp_price is not None and active is not None):
print(f" Found price: {gbp_price}, active: {active}")
if gbp_price:
sourcing_sheet.cell(row, source_unit_price_col).value = gbp_price + ITEM_SHIPPING_COST_IN
sourcing_sheet.cell(row, source_is_available_col).value = "TRUE" if active else "FALSE"
updated_count += 1
else:
print(f" Error: Could not parse price")
else:
print(f" Error: Could not find price on page")
# Save workbook
print(f"\n{'='*60}")
print(f"Saving workbook...")
wb.save(workbook_name)
print(f"\nComplete!")
print(f"Processed: {processed_count} Magic Madhouse entries")
print(f"Updated: {updated_count} prices")
finally:
driver.quit()
if __name__ == "__main__":
main()