Feat: Continuous background product scraping service.

This commit is contained in:
2026-03-09 08:40:44 +00:00
parent 752bb52e02
commit fd9d0cf874
14 changed files with 1810 additions and 1005 deletions

View File

@@ -1,764 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Commander Tracker</title>
<link href="https://fonts.googleapis.com/css2?family=Cinzel:wght@400;600;700&family=Crimson+Text:wght@400;600&display=swap" rel="stylesheet">
<style>
:root {
--bg-primary: #0a0a0f;
--bg-secondary: #1a1520;
--bg-card: #251a2e;
--accent-gold: #d4af37;
--accent-purple: #8b5cf6;
--accent-red: #dc2626;
--text-primary: #e7e5e4;
--text-secondary: #a8a29e;
--border-color: #3f3745;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Crimson Text', serif;
background: var(--bg-primary);
color: var(--text-primary);
min-height: 100vh;
background-image:
radial-gradient(circle at 20% 50%, rgba(139, 92, 246, 0.08) 0%, transparent 50%),
radial-gradient(circle at 80% 50%, rgba(212, 175, 55, 0.06) 0%, transparent 50%),
url("data:image/svg+xml,%3Csvg width='60' height='60' viewBox='0 0 60 60' xmlns='http://www.w3.org/2000/svg'%3E%3Cg fill='none' fill-rule='evenodd'%3E%3Cg fill='%23ffffff' fill-opacity='0.02'%3E%3Cpath d='M36 34v-4h-2v4h-4v2h4v4h2v-4h4v-2h-4zm0-30V0h-2v4h-4v2h4v4h2V6h4V4h-4zM6 34v-4H4v4H0v2h4v4h2v-4h4v-2H6zM6 4V0H4v4H0v2h4v4h2V6h4V4H6z'/%3E%3C/g%3E%3C/g%3E%3C/svg%3E");
overflow-x: hidden;
}
.container {
max-width: 1400px;
margin: 0 auto;
padding: 2rem;
}
header {
text-align: center;
margin-bottom: 3rem;
animation: fadeInDown 0.8s ease-out;
}
h1 {
font-family: 'Cinzel', serif;
font-size: 3.5rem;
font-weight: 700;
background: linear-gradient(135deg, var(--accent-gold), var(--accent-purple));
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
margin-bottom: 0.5rem;
letter-spacing: 0.05em;
text-transform: uppercase;
}
.subtitle {
font-size: 1.2rem;
color: var(--text-secondary);
letter-spacing: 0.15em;
text-transform: uppercase;
}
.setup-section {
background: var(--bg-secondary);
border: 2px solid var(--border-color);
border-radius: 16px;
padding: 2rem;
margin-bottom: 2rem;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
animation: fadeIn 0.8s ease-out 0.2s backwards;
}
.setup-controls {
display: flex;
gap: 1rem;
align-items: center;
justify-content: center;
flex-wrap: wrap;
}
label {
font-family: 'Cinzel', serif;
font-size: 1.1rem;
color: var(--accent-gold);
letter-spacing: 0.05em;
}
input[type="number"] {
background: var(--bg-card);
border: 2px solid var(--border-color);
color: var(--text-primary);
padding: 0.75rem 1rem;
border-radius: 8px;
font-size: 1.2rem;
width: 80px;
text-align: center;
font-family: 'Cinzel', serif;
transition: all 0.3s ease;
}
input[type="number"]:focus {
outline: none;
border-color: var(--accent-purple);
box-shadow: 0 0 0 3px rgba(139, 92, 246, 0.2);
}
input[type="text"] {
background: var(--bg-card);
border: 2px solid var(--border-color);
color: var(--text-primary);
padding: 0.75rem 1rem;
border-radius: 8px;
font-size: 1rem;
width: 100%;
font-family: 'Crimson Text', serif;
transition: all 0.3s ease;
}
input[type="text"]:focus {
outline: none;
border-color: var(--accent-purple);
box-shadow: 0 0 0 3px rgba(139, 92, 246, 0.2);
}
input[type="text"]::placeholder {
color: var(--text-secondary);
opacity: 0.6;
}
.player-name-input-wrapper {
display: flex;
flex-direction: column;
gap: 0.5rem;
}
.player-name-label {
font-size: 0.9rem;
color: var(--text-secondary);
letter-spacing: 0.05em;
}
.btn {
background: linear-gradient(135deg, var(--accent-purple), var(--accent-gold));
border: none;
color: var(--bg-primary);
padding: 0.875rem 2rem;
border-radius: 8px;
font-family: 'Cinzel', serif;
font-size: 1.1rem;
font-weight: 600;
cursor: pointer;
letter-spacing: 0.05em;
text-transform: uppercase;
transition: all 0.3s ease;
box-shadow: 0 4px 16px rgba(139, 92, 246, 0.3);
}
.btn:hover {
transform: translateY(-2px);
box-shadow: 0 6px 24px rgba(139, 92, 246, 0.5);
}
.btn:active {
transform: translateY(0);
}
.btn-secondary {
background: var(--bg-card);
color: var(--text-primary);
border: 2px solid var(--border-color);
box-shadow: none;
}
.btn-secondary:hover {
border-color: var(--accent-purple);
box-shadow: 0 4px 16px rgba(139, 92, 246, 0.2);
}
.players-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
gap: 2rem;
margin-bottom: 2rem;
}
.player-card {
background: var(--bg-secondary);
border: 2px solid var(--border-color);
border-radius: 16px;
padding: 1.5rem;
position: relative;
overflow: hidden;
transition: all 0.4s ease;
animation: scaleIn 0.5s ease-out backwards;
}
.player-card::before {
content: '';
position: absolute;
top: 0;
left: 0;
right: 0;
height: 4px;
background: linear-gradient(90deg, var(--accent-purple), var(--accent-gold));
}
.player-card.eliminated {
opacity: 0.5;
filter: grayscale(0.8);
}
.player-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 1.5rem;
flex-wrap: wrap;
gap: 1rem;
}
.player-info {
flex: 1;
min-width: 150px;
}
.player-name {
font-family: 'Cinzel', serif;
font-size: 1.5rem;
font-weight: 600;
color: var(--accent-gold);
letter-spacing: 0.05em;
margin-bottom: 0.5rem;
}
.commander-deaths {
display: flex;
align-items: center;
gap: 0.5rem;
font-size: 0.9rem;
color: var(--text-secondary);
}
.death-counter {
display: flex;
align-items: center;
gap: 0.5rem;
background: var(--bg-card);
border: 1px solid var(--border-color);
border-radius: 6px;
padding: 0.25rem 0.5rem;
}
.death-display {
font-family: 'Cinzel', serif;
font-weight: 600;
color: var(--text-primary);
min-width: 20px;
text-align: center;
}
.death-btn {
background: transparent;
border: none;
color: var(--text-secondary);
cursor: pointer;
font-size: 1rem;
width: 20px;
height: 20px;
display: flex;
align-items: center;
justify-content: center;
transition: all 0.2s ease;
}
.death-btn:hover {
color: var(--accent-red);
transform: scale(1.2);
}
.eliminate-btn {
background: var(--accent-red);
color: white;
border: none;
padding: 0.5rem 1rem;
border-radius: 6px;
font-size: 0.9rem;
cursor: pointer;
font-family: 'Cinzel', serif;
transition: all 0.3s ease;
}
.eliminate-btn:hover {
background: #b91c1c;
transform: scale(1.05);
}
.life-total {
text-align: center;
margin-bottom: 2rem;
}
.life-display {
font-family: 'Cinzel', serif;
font-size: 4rem;
font-weight: 700;
color: var(--text-primary);
margin-bottom: 1rem;
text-shadow: 0 2px 8px rgba(0, 0, 0, 0.5);
}
.life-controls {
display: flex;
gap: 1rem;
justify-content: center;
}
.life-btn {
background: var(--bg-card);
border: 2px solid var(--border-color);
color: var(--text-primary);
width: 60px;
height: 60px;
border-radius: 12px;
font-family: 'Cinzel', serif;
font-size: 1.8rem;
font-weight: 600;
cursor: pointer;
transition: all 0.2s ease;
display: flex;
align-items: center;
justify-content: center;
}
.life-btn:hover {
border-color: var(--accent-purple);
background: var(--accent-purple);
color: var(--bg-primary);
transform: scale(1.1);
}
.life-btn:active {
transform: scale(0.95);
}
.commander-damage-section {
border-top: 1px solid var(--border-color);
padding-top: 1.5rem;
}
.section-title {
font-family: 'Cinzel', serif;
font-size: 1.1rem;
color: var(--accent-purple);
margin-bottom: 1rem;
letter-spacing: 0.05em;
text-transform: uppercase;
}
.damage-grid {
display: grid;
gap: 0.75rem;
}
.damage-row {
background: var(--bg-card);
border: 1px solid var(--border-color);
border-radius: 8px;
padding: 0.75rem;
display: flex;
justify-content: space-between;
align-items: center;
transition: all 0.3s ease;
}
.damage-row:hover {
border-color: var(--accent-purple);
background: rgba(139, 92, 246, 0.1);
}
.damage-source {
font-size: 1rem;
color: var(--text-secondary);
}
.damage-controls {
display: flex;
gap: 0.5rem;
align-items: center;
}
.damage-display {
font-family: 'Cinzel', serif;
font-size: 1.5rem;
font-weight: 600;
color: var(--text-primary);
min-width: 40px;
text-align: center;
}
.damage-display.lethal {
color: var(--accent-red);
animation: pulse 1s ease-in-out infinite;
}
.damage-btn {
background: var(--bg-primary);
border: 1px solid var(--border-color);
color: var(--text-primary);
width: 36px;
height: 36px;
border-radius: 6px;
font-size: 1.2rem;
cursor: pointer;
transition: all 0.2s ease;
display: flex;
align-items: center;
justify-content: center;
}
.damage-btn:hover {
border-color: var(--accent-purple);
background: var(--accent-purple);
color: var(--bg-primary);
}
@keyframes fadeInDown {
from {
opacity: 0;
transform: translateY(-30px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
@keyframes fadeIn {
from { opacity: 0; }
to { opacity: 1; }
}
@keyframes scaleIn {
from {
opacity: 0;
transform: scale(0.9);
}
to {
opacity: 1;
transform: scale(1);
}
}
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.6; }
}
.hidden {
display: none;
}
.save-indicator {
position: fixed;
bottom: 2rem;
right: 2rem;
background: var(--bg-secondary);
border: 2px solid var(--accent-gold);
border-radius: 8px;
padding: 0.75rem 1.5rem;
font-family: 'Cinzel', serif;
color: var(--accent-gold);
font-size: 0.9rem;
opacity: 0;
transform: translateY(20px);
transition: all 0.3s ease;
pointer-events: none;
z-index: 1000;
box-shadow: 0 4px 16px rgba(0, 0, 0, 0.5);
}
.save-indicator.show {
opacity: 1;
transform: translateY(0);
}
@media (max-width: 768px) {
h1 {
font-size: 2.5rem;
}
.players-grid {
grid-template-columns: 1fr;
}
.life-display {
font-size: 3rem;
}
}
</style>
</head>
<body>
<div class="container">
<header>
<h1>Commander</h1>
<div class="subtitle">Life & Damage Tracker</div>
</header>
<div class="setup-section" id="setupSection">
<div class="setup-controls">
<label for="playerCount">Players:</label>
<input type="number" id="playerCount" min="2" max="8" value="4" onchange="updatePlayerNames()">
<label for="startingLife">Starting Life:</label>
<input type="number" id="startingLife" min="1" value="40">
</div>
<div id="playerNamesSection" style="margin-top: 2rem;">
<div class="section-title" style="text-align: center; margin-bottom: 1.5rem;">Player Names</div>
<div id="playerNamesGrid" style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; max-width: 800px; margin: 0 auto 1.5rem;"></div>
</div>
<div style="text-align: center;">
<button class="btn" onclick="startGame()">Begin Battle</button>
</div>
</div>
<div class="hidden" id="gameSection">
<div style="text-align: center; margin-bottom: 2rem;">
<button class="btn btn-secondary" onclick="resetGame()">New Game</button>
</div>
<div class="players-grid" id="playersGrid"></div>
</div>
</div>
<div class="save-indicator" id="saveIndicator">Game Saved</div>
<script>
let gameState = {
players: [],
startingLife: 40
};
let saveTimeout = null;
// Initialize player name inputs on page load
window.onload = function() {
loadGame();
updatePlayerNames();
};
// Auto-save function
function saveGame() {
if (gameState.players.length > 0) {
localStorage.setItem('mtgCommanderGame', JSON.stringify(gameState));
showSaveIndicator();
}
}
// Show save indicator
function showSaveIndicator() {
const indicator = document.getElementById('saveIndicator');
indicator.classList.add('show');
setTimeout(() => {
indicator.classList.remove('show');
}, 2000);
}
// Debounced save - saves 500ms after last change
function debouncedSave() {
clearTimeout(saveTimeout);
saveTimeout = setTimeout(saveGame, 500);
}
// Load game from localStorage
function loadGame() {
const saved = localStorage.getItem('mtgCommanderGame');
if (saved) {
try {
gameState = JSON.parse(saved);
// Show game section if there's a saved game
if (gameState.players.length > 0) {
document.getElementById('setupSection').classList.add('hidden');
document.getElementById('gameSection').classList.remove('hidden');
renderGame();
}
} catch (e) {
console.error('Error loading saved game:', e);
}
}
}
function updatePlayerNames() {
const playerCount = parseInt(document.getElementById('playerCount').value);
const grid = document.getElementById('playerNamesGrid');
grid.innerHTML = '';
for (let i = 0; i < playerCount; i++) {
const wrapper = document.createElement('div');
wrapper.className = 'player-name-input-wrapper';
wrapper.innerHTML = `
<label class="player-name-label">Player ${i + 1}</label>
<input type="text" id="playerName${i}" placeholder="Enter name..." value="Player ${i + 1}">
`;
grid.appendChild(wrapper);
}
}
function startGame() {
const playerCount = parseInt(document.getElementById('playerCount').value);
const startingLife = parseInt(document.getElementById('startingLife').value);
gameState.startingLife = startingLife;
gameState.players = [];
for (let i = 0; i < playerCount; i++) {
const nameInput = document.getElementById(`playerName${i}`);
const playerName = nameInput.value.trim() || `Player ${i + 1}`;
const player = {
id: i,
name: playerName,
life: startingLife,
commanderDamage: {},
commanderDeaths: 0,
eliminated: false
};
// Initialize commander damage tracking
for (let j = 0; j < playerCount; j++) {
if (i !== j) {
player.commanderDamage[j] = 0;
}
}
gameState.players.push(player);
}
document.getElementById('setupSection').classList.add('hidden');
document.getElementById('gameSection').classList.remove('hidden');
renderGame();
saveGame();
}
function renderGame() {
const grid = document.getElementById('playersGrid');
grid.innerHTML = '';
gameState.players.forEach((player, index) => {
const card = document.createElement('div');
card.className = `player-card ${player.eliminated ? 'eliminated' : ''}`;
card.style.animationDelay = `${index * 0.1}s`;
card.innerHTML = `
<div class="player-header">
<div class="player-info">
<div class="player-name">${player.name}</div>
<div class="commander-deaths">
<span>Commander Deaths:</span>
<div class="death-counter">
<button class="death-btn" onclick="changeCommanderDeaths(${player.id}, -1)"></button>
<span class="death-display">${player.commanderDeaths || 0}</span>
<button class="death-btn" onclick="changeCommanderDeaths(${player.id}, 1)">+</button>
</div>
</div>
</div>
<button class="eliminate-btn" onclick="toggleEliminate(${player.id})">
${player.eliminated ? 'Revive' : 'Eliminate'}
</button>
</div>
<div class="life-total">
<div class="life-display">${player.life}</div>
<div class="life-controls">
<button class="life-btn" onclick="changeLife(${player.id}, -5)">-5</button>
<button class="life-btn" onclick="changeLife(${player.id}, -1)">-1</button>
<button class="life-btn" onclick="changeLife(${player.id}, 1)">+1</button>
<button class="life-btn" onclick="changeLife(${player.id}, 5)">+5</button>
</div>
</div>
<div class="commander-damage-section">
<div class="section-title">Commander Damage Taken</div>
<div class="damage-grid">
${renderCommanderDamage(player)}
</div>
</div>
`;
grid.appendChild(card);
});
}
function renderCommanderDamage(player) {
return Object.keys(player.commanderDamage)
.map(sourceId => {
const source = gameState.players[sourceId];
const damage = player.commanderDamage[sourceId];
const isLethal = damage >= 21;
return `
<div class="damage-row">
<span class="damage-source">from ${source.name}</span>
<div class="damage-controls">
<button class="damage-btn" onclick="changeCommanderDamage(${player.id}, ${sourceId}, -1)"></button>
<span class="damage-display ${isLethal ? 'lethal' : ''}">${damage}</span>
<button class="damage-btn" onclick="changeCommanderDamage(${player.id}, ${sourceId}, 1)">+</button>
</div>
</div>
`;
})
.join('');
}
function changeLife(playerId, amount) {
const player = gameState.players[playerId];
if (player.eliminated) return;
player.life = Math.max(0, player.life + amount);
renderGame();
debouncedSave();
}
function changeCommanderDamage(playerId, sourceId, amount) {
const player = gameState.players[playerId];
if (player.eliminated) return;
player.commanderDamage[sourceId] = Math.max(0, player.commanderDamage[sourceId] + amount);
renderGame();
debouncedSave();
}
function changeCommanderDeaths(playerId, amount) {
const player = gameState.players[playerId];
if (player.eliminated) return;
player.commanderDeaths = Math.max(0, (player.commanderDeaths || 0) + amount);
renderGame();
debouncedSave();
}
function toggleEliminate(playerId) {
const player = gameState.players[playerId];
player.eliminated = !player.eliminated;
renderGame();
debouncedSave();
}
function resetGame() {
if (confirm('Are you sure you want to start a new game? Current game will be lost.')) {
localStorage.removeItem('mtgCommanderGame');
document.getElementById('setupSection').classList.remove('hidden');
document.getElementById('gameSection').classList.add('hidden');
gameState = { players: [], startingLife: 40 };
updatePlayerNames();
}
}
</script>
</body>
</html>

Binary file not shown.

View File

View File

@@ -0,0 +1,69 @@
"""
fix_excel_for_libreoffice.py
----------------------------
Run this once on an xlsx downloaded from Excel Online before opening it in
LibreOffice Calc. It removes the _xlfn. / _xlws. prefixes that Excel Online
injects into formulas — those prefixes make LibreOffice show #NAME? errors.
Usage:
python3 fix_excel_for_libreoffice.py "TCG Sole Trader Copy.xlsx"
The original file is left untouched (a .bak copy is kept).
"""
import sys
import os
import re
import zipfile
import shutil
FORMULA_PREFIXES = re.compile(r'_xlfn\.|_xlws\.')
def fix_xlsx(path: str) -> None:
if not os.path.exists(path):
print(f"File not found: {path}")
sys.exit(1)
bak_path = path + '.bak'
tmp_path = path + '.tmp'
shutil.copy2(path, bak_path)
print(f"Backup saved → {bak_path}")
changes = 0
with zipfile.ZipFile(path, 'r') as zin, \
zipfile.ZipFile(tmp_path, 'w', zipfile.ZIP_DEFLATED) as zout:
for item in zin.infolist():
data = zin.read(item.filename)
# Only patch sheet XML — leave everything else byte-for-byte identical
if item.filename.startswith('xl/worksheets/sheet') and item.filename.endswith('.xml'):
text = data.decode('utf-8')
new_text, n = FORMULA_PREFIXES.subn('', text)
if n:
print(f" {item.filename}: removed {n} prefix(es)")
changes += n
data = new_text.encode('utf-8')
zout.writestr(item, data)
os.replace(tmp_path, path)
if changes:
print(f"\nDone — {changes} prefix(es) removed. Open {path} in LibreOffice Calc.")
else:
print("\nNo _xlfn./_xlws. prefixes found — file was already clean.")
os.remove(bak_path)
if __name__ == '__main__':
if len(sys.argv) < 2:
# Default to the workbook the scraper uses
target = 'TCG Sole Trader Copy.xlsx'
else:
target = sys.argv[1]
fix_xlsx(target)

View File

@@ -0,0 +1,213 @@
import pandas as pd
from openpyxl import load_workbook
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import re
import time
import random
def setup_driver(headless=True):
chrome_options = Options()
if headless:
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
chrome_options.add_argument('--window-size=1920,1080')
try:
driver = webdriver.Chrome(options=chrome_options)
return driver
except Exception as e:
print(f"Error setting up Chrome driver: {e}")
print("Make sure Chrome and chromedriver are installed")
return None
def parse_price_value(text):
if not text:
return None
# Remove currency symbols and extract numbers
cleaned = re.sub(r'[^\d,.\-]', '', text)
# Replace comma with full-stop for decimal
cleaned = cleaned.replace(',', '.')
try:
return float(cleaned)
except ValueError:
return None
def scrape_mtg_stocks_values(driver, url):
cost = None
expected_value = None
try:
print(f" Loading page...")
driver.get(url)
time.sleep(random.uniform(5, 10))
cost_element_id = 'cost-value'
expected_value_element_id = 'ev-value'
cost_element = driver.find_element(By.ID, cost_element_id)
cost_text = cost_element.text.strip()
expected_value_element = driver.find_element(By.ID, expected_value_element_id)
expected_value_text = expected_value_element.text.strip()
print(f" Cost: '{cost_text}'")
print(f" Expected Value: '{expected_value_text}'")
cost = parse_price_value(cost_text)
expected_value = parse_price_value(expected_value_text)
return {
'expected_value': expected_value,
'market_value': cost
}
except Exception as ex:
print(f" ✗ Failed:\nEV: {expected_value}\nCost: {cost}")
return {
'expected_value': expected_value,
'market_value': cost
}
def main():
workbook_name = 'TCG Sole Trader Copy.xlsx'
sheet_name = 'MTG Set'
print("Loading workbook...")
wb = load_workbook(workbook_name)
if sheet_name not in wb.sheetnames:
print(f"Error: Sheet '{sheet_name}' not found")
return
sheet = wb[sheet_name]
table_found = False
start_row = None
header_row = None
for row in range(2, max(50, sheet.max_row + 1)):
cell_value = str(sheet.cell(row, 1).value)
for col in range(1, max(10, sheet.max_column + 1)):
cell_value = str(sheet.cell(row, col).value)
if 'EV Play BotBox Link' in cell_value:
header_row = row
start_row = row + 1
table_found = True
break
if table_found:
break
if not table_found:
print("Error: Could not find 'EV Play BotBox Link' column")
return
print(f"Found table header at row {header_row}")
print(f"Starting from row {start_row}")
play_ev_link_col = None
collector_ev_link_col = None
play_expected_value_col = None
play_market_value_col = None
collector_expected_value_col = None
collector_market_value_col = None
for col in range(1, sheet.max_column + 1):
header = str(sheet.cell(header_row, col).value).strip()
if 'EV Play BotBox Link' in header:
play_ev_link_col = col
elif 'EV Collector BotBox Link' in header:
collector_ev_link_col = col
elif 'Play Booster Expected Market Value BotBox' in header:
play_expected_value_col = col
elif 'Play Booster Sealed Market Value BotBox' in header:
play_market_value_col = col
elif 'Collector Booster Expected Market Value BotBox' in header:
collector_expected_value_col = col
elif 'Collector Booster Sealed Market Value BotBox' in header:
collector_market_value_col = col
print(f"Columns - Play EV Link: {play_ev_link_col}, Collector EV Link: {collector_ev_link_col}, Play Expected Value: {play_expected_value_col}, Play Market Value: {play_market_value_col}, Collector Expected Value: {collector_expected_value_col}, Collector Market Value: {collector_market_value_col}")
if not all([play_ev_link_col, collector_ev_link_col, play_expected_value_col, play_market_value_col, collector_expected_value_col, collector_market_value_col]):
print("Error: Could not find all required columns")
print(f" EV Play BotBox Link: {'Found' if play_ev_link_col else 'NOT FOUND'}")
print(f" EV Collector BotBox Link: {'Found' if collector_ev_link_col else 'NOT FOUND'}")
print(f" Play Booster Expected Market Value BotBox: {'Found' if play_expected_value_col else 'NOT FOUND'}")
print(f" Play Booster Sealed Market Value BotBox: {'Found' if play_market_value_col else 'NOT FOUND'}")
print(f" Collector Booster Expected Market Value BotBox: {'Found' if collector_expected_value_col else 'NOT FOUND'}")
print(f" Collector Booster Sealed Market Value BotBox : {'Found' if collector_market_value_col else 'NOT FOUND'}")
return
print("Setting up browser automation...")
driver = setup_driver(headless=False)
if not driver:
return
try:
processed_count = 0
play_updated_count = 0
collector_updated_count = 0
play_cleared_count = 0
collector_cleared_count = 0
for row in range(start_row, sheet.max_row + 1):
play_ev_link = sheet.cell(row, play_ev_link_col).value
collector_ev_link = sheet.cell(row, collector_ev_link_col).value
if play_ev_link is None or play_ev_link == '' or collector_ev_link is None or collector_ev_link == '':
continue
processed_count += 1
print(f"\n{'='*80}")
print(f"Processing row {row}:\n{play_ev_link}\n{collector_ev_link}")
print(f"{'='*80}")
play_result = scrape_mtg_stocks_values(driver, play_ev_link)
play_cost = play_result['market_value']
play_ev = play_result['expected_value']
sheet.cell(row, play_market_value_col).value = play_cost if play_cost is not None else ''
sheet.cell(row, play_expected_value_col).value = play_ev if play_ev is not None else ''
if play_cost is not None or play_ev is not None:
play_updated_count += 1
print(f" ✓ Play updated - Expected: {play_ev}, Market: {play_cost}")
else:
play_cleared_count += 1
print(f" ✗ Play cleared values - no matching booster type found")
collector_result = scrape_mtg_stocks_values(driver, collector_ev_link)
collector_cost = collector_result['market_value']
collector_ev = collector_result['expected_value']
sheet.cell(row, collector_market_value_col).value = collector_cost if collector_cost is not None else ''
sheet.cell(row, collector_expected_value_col).value = collector_ev if collector_ev is not None else ''
if collector_cost is not None or collector_ev is not None:
collector_updated_count += 1
print(f" ✓ Collector updated - Expected: {collector_ev}, Market: {collector_cost}")
else:
collector_cleared_count += 1
print(f" ✗ Collector cleared values - no matching booster type found")
print(f"\n{'='*80}")
print(f"Saving workbook...")
wb.save(workbook_name)
print(f"\nComplete!")
print(f"Processed: {processed_count} entries")
print(f"Play updated: {play_updated_count} entries")
print(f"Collector updated: {collector_updated_count} entries")
print(f"Play fields cleared: {play_cleared_count} entries (no matching data)")
print(f"Collector fields cleared: {collector_cleared_count} entries (no matching data)")
finally:
driver.quit()
if __name__ == "__main__":
main()

View File

@@ -171,13 +171,9 @@ def main():
sheet = wb[sheet_name]
# Find table boundaries and columns
table_found = False
start_row = None
header_row = None
# Search for table header
print("max sheet column: ", str(sheet.max_column))
for row in range(2, max(50, sheet.max_row + 1)):
cell_value = str(sheet.cell(row, 1).value)
@@ -210,13 +206,13 @@ def main():
header = str(sheet.cell(header_row, col).value).strip()
if 'EV MTG Stocks Link' in header:
ev_link_col = col
elif 'Play Booster Expected Market Value' in header:
elif 'Play Booster Expected Market Value MTG Stocks' in header:
play_expected_value_col = col
elif 'Play Boost Sealed Market Value' in header:
elif 'Play Booster Sealed Market Value MTG Stocks' in header:
play_market_value_col = col
elif 'Collector Booster Expected Market Value' in header:
elif 'Collector Booster Expected Market Value MTG Stocks' in header:
collector_expected_value_col = col
elif 'Collector Boost Sealed Market Value' in header:
elif 'Collector Booster Sealed Market Value MTG Stocks' in header:
collector_market_value_col = col
print(f"Columns - EV Link: {ev_link_col}, Play Expected Value: {play_expected_value_col}, Play Market Value: {play_market_value_col}, Collector Expected Value: {collector_expected_value_col}, Collector Market Value: {collector_market_value_col}")
@@ -225,19 +221,19 @@ def main():
print("Error: Could not find all required columns")
print(f" EV MTG Stocks Link: {'Found' if ev_link_col else 'NOT FOUND'}")
print(f" Play Booster Expected Market Value: {'Found' if play_expected_value_col else 'NOT FOUND'}")
print(f" Play Boost Sealed Market Value: {'Found' if play_market_value_col else 'NOT FOUND'}")
print(f" Play Booster Sealed Market Value: {'Found' if play_market_value_col else 'NOT FOUND'}")
print(f" Collector Booster Expected Market Value: {'Found' if collector_expected_value_col else 'NOT FOUND'}")
print(f" Collector Boost Sealed Market Value: {'Found' if collector_market_value_col else 'NOT FOUND'}")
print(f" Collector Booster Sealed Market Value: {'Found' if collector_market_value_col else 'NOT FOUND'}")
return
# Setup Selenium driver
print("Setting up browser automation...")
driver = setup_driver(headless=False) # Set to False to see browser
driver = setup_driver(headless=False)
if not driver:
return
print(f'Sheet dimensions: {sheet.max_row} rows x {sheet.max_column} columns')
try:
# Process rows
processed_count = 0
updated_count = 0
play_cleared_count = 0
@@ -245,24 +241,11 @@ def main():
for row in range(start_row, sheet.max_row + 1):
ev_link = sheet.cell(row, ev_link_col).value
# Check if row is empty
if not ev_link:
# Check if we've passed the end of the table
empty_count = 0
for check_col in range(1, min(10, sheet.max_column + 1)):
if not sheet.cell(row, check_col).value:
empty_count += 1
if empty_count >= 5: # If most columns are empty, assume end of table
break
continue
processed_count += 1
print(f"\n{'='*80}")
print(f"Processing row {row}: {ev_link}")
print(f"{'='*80}")
# Scrape values
result = scrape_mtg_stocks_values(driver, ev_link)
if result['found_play']:

View File

@@ -0,0 +1,708 @@
import pandas as pd
from openpyxl import load_workbook, Workbook
from openpyxl.worksheet.worksheet import Worksheet
import requests
import re
import time
import random
from playwright.sync_api import sync_playwright, Browser, Page
from playwright.async_api import async_playwright
import asyncio
from aioconsole import ainput
from collections import defaultdict
from datetime import datetime, timedelta
class Product_Scraper:
domain: str
page: Page
def __init__(self, domain):
print("Setting up browser automation")
self.domain = domain
@staticmethod
def parse_cost(cost_text):
if not cost_text:
return None
cost_clean = re.sub(r'[^\d,]', '', cost_text)
try:
return float(cost_clean) / 100
except ValueError:
return None
@classmethod
def parse_cost_from_pennies(cls, cost_text):
if not cost_text:
return None
cost_clean = cls.parse_cost(cost_text = cost_text)
if cost_clean is not None:
cost_clean = cost_clean / 100
return cost_clean
@classmethod
def parse_cost_chaoscards(cls, cost_text):
return cls.parse_cost(cost_text = cost_text)
@classmethod
def parse_cost_cardmarket(cls, cost_text):
"""Convert '141,30 €' format to float in EUR"""
if not cost_text:
return None
cost_clean = re.sub(r'[^\d,]', '', cost_text)
cost_clean = cost_clean.replace(',', '.')
try:
return float(cost_clean)
except ValueError:
return None
@classmethod
def parse_cost_gameslore(cls, cost_text):
return cls.parse_cost(cost_text = cost_text)
@classmethod
def parse_cost_magicmadhouse(cls, cost_text):
return cls.parse_cost(cost_text = cost_text)
@classmethod
def parse_cost_newrealitiesgaming(cls, cost_text):
return cls.parse_cost(cost_text = cost_text)
async def scrape_cost_and_active_playwright(self, browser: Browser, url, page_load_element_selector, cost_selector, active_selector, invalid_active_statuses):
print(f" Loading page...")
self.page = await browser.new_page()
await self.page.goto(url = url)
await asyncio.sleep(random.uniform(20, 25))
cost = None
active = None
try:
element = self.page.locator(selector = page_load_element_selector)
page_title = await self.page.title()
print(f" Page title: {page_title}")
element = self.page.locator(selector = cost_selector)
text = await element.text_content()
print(f" Text: '{text}'")
cost = text
active = None
if active_selector is None:
active = (cost is not None)
else:
try:
elements = await self.page.query_selector_all(selector = active_selector)
print(f'# active elements: {len(elements)}')
if len(elements) == 0:
active = True
else:
text = await elements[0].text_content()
text = text.strip()
print(f" Text: '{text}'")
active = (invalid_active_statuses is None or text not in invalid_active_statuses)
except Exception as e:
print(f" Selector failed: {e}")
if cost is None or active is None:
print(f" ✗ No cost found")
# await ainput("Press Enter to continue to next URL...")
print(f"Cost: {cost}, Active: {active}")
except Exception as e:
print(f" Error: {e}")
# await ainput("Press Enter to continue to next URL...")
return None, None
finally:
await self.page.close()
return cost, active
async def scrape_cost_and_active_playwright_cardmarket(self, browser, url, eur_to_gbp_rate):
page_load_element_selector = "body > main.container > div.page-title-container"
cost_selector = '#table > div:nth-child(1) > div.table-body > .row.article-row.g-0:nth-child(1) > div.col-offer > div.price-container > div > div:nth-child(1) > span:nth-child(1)'
cost_text, active = await self.scrape_cost_and_active_playwright(
browser = browser
, url = url
, page_load_element_selector = page_load_element_selector
, cost_selector = cost_selector
, active_selector = None
, invalid_active_statuses = []
)
cost = Product_Scraper.parse_cost_cardmarket(cost_text = cost_text)
if cost is not None:
item_shipping_cost_in = 0
if cost < 10:
item_shipping_cost_in = 2
elif cost < 100:
item_shipping_cost_in = 8
else:
item_shipping_cost_in = 20
cost = cost * eur_to_gbp_rate + item_shipping_cost_in
active = (cost is not None)
return cost, active
async def scrape_cost_and_active_playwright_chaoscards(self, browser, url):
cost_selector = '.price_inc > span:nth-child(2)'
active_selector = '.product__right > form > ul.prod_det_fields.left.product-section.product-section--stock > li.prod_det_stock > div:nth-child(1) > div:nth-child(2)'
cost_text, active = await self.scrape_cost_and_active_playwright(
browser = browser
, url = url
, page_load_element_selector = cost_selector
, cost_selector = cost_selector
, active_selector = active_selector
, invalid_active_statuses = ["Out of stock", "Coming soon"]
)
cost = Product_Scraper.parse_cost_chaoscards(cost_text = cost_text)
return cost, active
async def scrape_cost_and_active_playwright_gameslore(self, browser, url):
cost_selector = 'div.columns > div.column.main > div.product-info-main > div.product-info-price > div.price-box > span.special-price > span.price-container > span.price-wrapper > span.price'
active_selector = '.stock > span:nth-child(1)'
cost_text, active = await self.scrape_cost_and_active_playwright(
browser = browser
, url = url
, page_load_element_selector = cost_selector
, cost_selector = cost_selector
, active_selector = active_selector
, invalid_active_statuses = ["OUT OF STOCK"]
)
cost = Product_Scraper.parse_cost_gameslore(cost_text = cost_text)
return cost, active
async def scrape_cost_and_active_playwright_magicmadhouse(self, browser, url):
page_load_element_selector = '.productView-title'
cost_selector = 'div.body > div.container > div > div.productView > section.productView-details > div.productView-options > form > div.productView-options-selections > div.productView-product > div.productView-info > div.price-rating > div.productView-price > div.price-section.actual-price > span.price'
active_selector = '.alertBox.alertBox--error'
cost_text, active = await self.scrape_cost_and_active_playwright(
browser = browser
, url = url
, page_load_element_selector = page_load_element_selector
, cost_selector = cost_selector
, active_selector = active_selector
, invalid_active_statuses = []
)
cost = Product_Scraper.parse_cost_magicmadhouse(cost_text = cost_text)
return cost, active
async def scrape_cost_and_active_playwright_newrealitiesgaming(self, browser, url):
button_selector = 'div.display-desktop.add-to-cart-button__wrapper div.w-wrapper form button'
page_load_element_selector = button_selector
cost_selector = f'{button_selector} span:nth-child(2)'
active_selector = f'{button_selector} span:nth-child(1)'
cost_text, active = await self.scrape_cost_and_active_playwright(
browser = browser
, url = url
, page_load_element_selector = page_load_element_selector
, cost_selector = cost_selector
, active_selector = active_selector
, invalid_active_statuses = ['Out of stock']
)
cost = Product_Scraper.parse_cost_magicmadhouse(cost_text = cost_text)
return cost, active
async def scrape_prices_and_quantities_playwright_cardmarket(self, browser: Browser, url, eur_to_gbp_rate):
offer_container_selector = '#table > div:nth-child(1) > div.table-body > .row.article-row.g-0:nth-child(1) > div.col-offer'
price_selector = 'div.price-container > div > div:nth-child(1) > span:nth-child(1)'
quantity_selector = 'div.amount-container > span:nth-child(1)'
print(f" Loading page...")
self.page = await browser.new_page()
await self.page.goto(url = url)
await asyncio.sleep(random.uniform(20, 25))
try:
page_title = await self.page.title()
print(f" Page title: {page_title}")
price_quantity_pairs = []
try:
offer_containers = await self.page.query_selector_all(offer_container_selector)
print(f" Offer container selector: Found {len(offer_containers)} elements")
for offer_container in offer_containers:
price_element = await offer_container.query_selector(price_selector)
price_text = await price_element.text_content()
if '' in price_text and re.search(r'\d', price_text):
print(f" ✓ Found price: {price_text}")
else:
price_text = None
quantity_element = await offer_container.query_selector(quantity_selector)
quantity_text = await quantity_element.text_content()
if price_text is None or quantity_text is None:
continue
price_quantity_pairs.append({
'price': Product_Scraper.parse_cost_cardmarket(cost_text = price_text) * eur_to_gbp_rate
, 'quantity': Product_Scraper.parse_cost_cardmarket(cost_text = quantity_text)
})
except Exception as e:
print(f" Price selector failed: {e}")
# await ainput("Press enter to continue to next URL...")
return []
finally:
await self.page.close()
return price_quantity_pairs
class TCG_Sole_Trader_Workbook_Container:
NAME_COLUMN_ACTIVE: str = 'Active'
NAME_COLUMN_INDEX_ROW: str = 'Index Row'
NAME_COLUMN_LINK: str = 'Link'
NAME_COLUMN_PRODUCT_ID: str = 'Product Id'
NAME_COLUMN_PRODUCT_IS_BOOSTER: str = 'Product Is Booster'
NAME_COLUMN_PRODUCT_IS_BOOSTER_BOX: str = 'Product Is Booster Box'
NAME_COLUMN_PRODUCT_IS_PRECON: str = 'Product Is Precon'
NAME_COLUMN_SOURCE_NAME: str = 'Source Name'
NAME_COLUMN_UNIT_COST: str = 'Cost'
NAME_COLUMN_UNIT_PRICE: str = 'Price'
PRODUCT_WORKSHEET_NAME = 'Product'
SOURCING_WORKSHEET_NAME = 'Sourcing'
WORKBOOK_NAME = 'TCG Sole Trader Copy.xlsx'
index_column_active_sourcing: int
index_column_is_booster_product: int
index_column_is_booster_box_product: int
index_column_is_precon_product: int
index_column_link_sourcing: int
index_column_name_sourcing: int
index_column_product_id_product: int
index_column_product_id_sourcing: int
index_column_unit_cost_sourcing: int
index_column_unit_price_sourcing: int
index_row_header_product: int
index_row_header_sourcing: int
product_sheet: Worksheet
sourcing_sheet: Worksheet
workbook: Workbook
def __init__(self):
print("Loading workbook...")
self.workbook = load_workbook(self.WORKBOOK_NAME)
if self.SOURCING_WORKSHEET_NAME not in self.workbook.sheetnames:
print(f"Error: Sheet '{self.SOURCING_WORKSHEET_NAME}' not found")
return
if self.PRODUCT_WORKSHEET_NAME not in self.workbook.sheetnames:
print(f"Error: Sheet '{self.PRODUCT_WORKSHEET_NAME}' not found")
return
self.sourcing_sheet = self.workbook[self.SOURCING_WORKSHEET_NAME]
self.product_sheet = self.workbook[self.PRODUCT_WORKSHEET_NAME]
sourcing_table_found = False
for row in range(1, self.sourcing_sheet.max_row + 1):
if self.sourcing_sheet.cell(row, 1).value == 'tbl_Sourcing' or 'Source Name' in str(self.sourcing_sheet.cell(row, 3).value):
self.index_row_header_sourcing = row
sourcing_table_found = True
break
if not sourcing_table_found or not self.index_row_header_sourcing:
for row in range(1, min(20, self.sourcing_sheet.max_row + 1)):
if 'Source Name' in str(self.sourcing_sheet.cell(row, 3).value):
self.index_row_header_sourcing = row
sourcing_table_found = True
break
if not sourcing_table_found:
print("Error: Could not find table 'tbl_Sourcing'")
return
product_table_found = False
for row in range(1, self.product_sheet.max_row + 1):
if self.product_sheet.cell(row, 1).value == 'tbl_Product' or 'Product Id' in str(self.product_sheet.cell(row, 1).value):
self.index_row_header_product = row
product_table_found = True
break
if not product_table_found:
print("Error: Could not find table 'tbl_Product'")
return
for index_column in range(1, self.sourcing_sheet.max_column + 1):
header = str(self.sourcing_sheet.cell(self.index_row_header_sourcing, index_column).value).strip()
if 'Source Name' == header:
self.index_column_name_sourcing = index_column
elif 'Source Link' == header:
self.index_column_link_sourcing = index_column
elif 'Source Unit Cost' == header:
self.index_column_unit_cost_sourcing = index_column
elif 'Sale Price' == header:
self.index_column_unit_price_sourcing = index_column
elif 'Active' == header:
self.index_column_active_sourcing = index_column
elif 'Product Id' == header:
self.index_column_product_id_sourcing = index_column
for index_column in range(1, self.product_sheet.max_column + 1):
header = str(self.product_sheet.cell(self.index_row_header_product, index_column).value).strip()
if 'Is Booster Box' == header:
self.index_column_is_booster_box_product = index_column
elif 'Is Booster' == header:
self.index_column_is_booster_product = index_column
elif 'Is Precon' == header:
self.index_column_is_precon_product = index_column
elif 'Product Id' == header:
self.index_column_product_id_product = index_column
print(f"Sourcing max row: {self.sourcing_sheet.max_row}")
print(f"Sourcing header row: {self.index_row_header_sourcing}")
print(f"Sourcing header 1: {self.sourcing_sheet.cell(self.index_row_header_sourcing, 1).value}")
print(f"Sourcing Columns - Name: {self.index_column_name_sourcing}, Link: {self.index_column_link_sourcing}, Unit Cost: {self.index_column_unit_cost_sourcing}, Sale price: {self.index_column_unit_price_sourcing}, Active: {self.index_column_active_sourcing}, Product Id: {self.index_column_product_id_sourcing}")
print(f"Product max row: {self.product_sheet.max_row}")
print(f"Product header row: {self.index_row_header_product}")
print(f"Sourcing header 1: {self.product_sheet.cell(self.index_row_header_product, 1).value}")
print(f"Product Columns - Id: {self.index_column_product_id_product}, Is Booster: {self.index_column_is_booster_product}, Is Booster Box: {self.index_column_is_booster_box_product}, Is Precon: {self.index_column_is_precon_product}")
if not all([
self.index_column_name_sourcing
, self.index_column_link_sourcing
, self.index_column_unit_cost_sourcing
, self.index_column_unit_price_sourcing
, self.index_column_product_id_sourcing
, self.index_column_active_sourcing
, self.index_column_product_id_product
, self.index_column_is_booster_product
, self.index_column_is_booster_box_product
, self.index_column_is_precon_product
]):
print("Error: Could not find required columns")
return
@classmethod
def create_product_source_df(cls):
return pd.DataFrame(columns = [
cls.NAME_COLUMN_INDEX_ROW
, cls.NAME_COLUMN_PRODUCT_ID
, cls.NAME_COLUMN_SOURCE_NAME
, cls.NAME_COLUMN_LINK
, cls.NAME_COLUMN_PRODUCT_IS_BOOSTER
, cls.NAME_COLUMN_UNIT_COST
, cls.NAME_COLUMN_UNIT_PRICE
, cls.NAME_COLUMN_ACTIVE
])
def get_sourcing_entries(self):
product_sources = self.create_product_source_df()
try:
for index_row in range(self.index_row_header_sourcing + 1, self.sourcing_sheet.max_row + 1):
source_name = self.sourcing_sheet.cell(index_row, self.index_column_name_sourcing).value
source_link = self.sourcing_sheet.cell(index_row, self.index_column_link_sourcing).value
source_product_id = self.sourcing_sheet.cell(index_row, self.index_column_product_id_sourcing).value
if not source_name or not source_link:
continue
print(f"found source: {source_name} - product: {source_product_id} - link: {source_link}")
product_is_booster = False
for product_row in range(self.index_row_header_product + 1, self.product_sheet.max_row + 1):
product_id = self.product_sheet.cell(product_row, self.index_column_product_id_product).value
if product_id == source_product_id:
product_is_booster_text = str(self.product_sheet.cell(product_row, self.index_column_is_booster_product).value).upper()
product_is_booster = (product_is_booster_text == "TRUE")
break
print(f"product is booster: {product_is_booster}")
product_sources.loc[len(product_sources)] = [
index_row
, source_product_id
, source_name
, source_link
, product_is_booster
, None # cost
, None # price
, None # active
]
except Exception as e:
print(f"Error: {e}")
product_sources.sort_values(self.NAME_COLUMN_SOURCE_NAME)
return product_sources
def clear_row_sourcing_sheet(self, index_row):
self.sourcing_sheet.cell(index_row, self.index_column_unit_cost_sourcing).value = None
self.sourcing_sheet.cell(index_row, self.index_column_active_sourcing).value = "FALSE"
def update_row_sourcing_sheet(self, index_row, unit_cost = None, unit_price = None, active = None):
if unit_cost is not None:
self.sourcing_sheet.cell(index_row, self.index_column_unit_cost_sourcing).value = unit_cost
if unit_price is not None:
self.sourcing_sheet.cell(index_row, self.index_column_unit_price_sourcing).value = unit_price
if active is not None:
self.sourcing_sheet.cell(index_row, self.index_column_active_sourcing).value = "TRUE" if active else "FALSE"
def save_workbook(self):
print(f"\n{'='*60}")
print(f"Saving workbook...")
self.workbook.save(self.WORKBOOK_NAME)
class Cost_Fetcher:
ACCESSED_LAST_ON_FLAG: str = 'Accessed Last On'
ACTIVE_FLAG: str = 'Active'
COST_FLAG: str = 'Cost'
DATA_FLAG: str = 'Data'
ERROR_FLAG: str = 'Error'
INDEX_DOMAIN_FLAG: str = 'Index Domain'
INDEX_ROW_FLAG: str = 'Index Row'
NAME_DOMAIN_CARD_MARKET: str = 'Card Market'
NAME_DOMAIN_CHAOS_CARDS: str = 'Chaos Cards'
NAME_DOMAIN_GAMES_LORE: str = 'Games Lore'
NAME_DOMAIN_MAGIC_MADHOUSE: str = 'Magic Madhouse'
NAME_DOMAIN_NEW_REALITIES_GAMING: str = 'New Realities Gaming'
NAME_FLAG: str = 'Name'
PRICE_FLAG: str = 'Price'
SUCCESS_FLAG: str = 'Success'
URL_FLAG: str = 'Url'
domain_names: list[str]
eur_to_gbp_rate: float
product_scrapers: list[Product_Scraper]
product_sources: pd.DataFrame
workbook_container: TCG_Sole_Trader_Workbook_Container
def __init__(self):
self.domain_names = [
self.NAME_DOMAIN_CARD_MARKET
, self.NAME_DOMAIN_CHAOS_CARDS
, self.NAME_DOMAIN_GAMES_LORE
, self.NAME_DOMAIN_MAGIC_MADHOUSE
, self.NAME_DOMAIN_NEW_REALITIES_GAMING
]
self.domain_details = {
self.NAME_DOMAIN_CARD_MARKET: {
self.NAME_FLAG: self.NAME_DOMAIN_CARD_MARKET
, self.INDEX_DOMAIN_FLAG: self.get_index_domain_from_name(self.NAME_DOMAIN_CARD_MARKET)
, self.ACCESSED_LAST_ON_FLAG: 0
}
, self.NAME_DOMAIN_CHAOS_CARDS: {
self.NAME_FLAG: self.NAME_DOMAIN_CHAOS_CARDS
, self.INDEX_DOMAIN_FLAG: self.get_index_domain_from_name(self.NAME_DOMAIN_CHAOS_CARDS)
, self.ACCESSED_LAST_ON_FLAG: 0
}
, self.NAME_DOMAIN_GAMES_LORE: {
self.NAME_FLAG: self.NAME_DOMAIN_GAMES_LORE
, self.INDEX_DOMAIN_FLAG: self.get_index_domain_from_name(self.NAME_DOMAIN_GAMES_LORE)
, self.ACCESSED_LAST_ON_FLAG: 0
}
, self.NAME_DOMAIN_MAGIC_MADHOUSE: {
self.NAME_FLAG: self.NAME_DOMAIN_MAGIC_MADHOUSE
, self.INDEX_DOMAIN_FLAG: self.get_index_domain_from_name(self.NAME_DOMAIN_MAGIC_MADHOUSE)
, self.ACCESSED_LAST_ON_FLAG: 0
}
, self.NAME_DOMAIN_NEW_REALITIES_GAMING: {
self.NAME_FLAG: self.NAME_DOMAIN_NEW_REALITIES_GAMING
, self.INDEX_DOMAIN_FLAG: self.get_index_domain_from_name(self.NAME_DOMAIN_NEW_REALITIES_GAMING)
, self.ACCESSED_LAST_ON_FLAG: 0
}
}
product_scrapers = []
for index_domain in range(len(self.domain_names)):
domain = self.domain_names[index_domain]
product_scraper = Product_Scraper(domain)
product_scrapers.append(product_scraper)
self.product_scrapers = product_scrapers
self.workbook_container = TCG_Sole_Trader_Workbook_Container()
self.get_eur_to_gbp_rate()
def get_index_domain_from_name(self, domain_name):
for index_domain in range(len(self.domain_names)):
if (self.domain_names[index_domain] == domain_name):
return index_domain
raise ValueError(f'Domain does not exist: {domain_name}')
def get_eur_to_gbp_rate(self):
try:
response = requests.get('https://api.exchangerate-api.com/v4/latest/EUR', timeout=10)
data = response.json()
self.eur_to_gbp_rate = data['rates']['GBP']
except Exception as e:
print(f"Error fetching exchange rate: {e}")
print("Using fallback rate: 0.85")
self.eur_to_gbp_rate = 0.85
async def fetch_all(self):
try:
processed_count = 0
updated_count = 0
self.product_sources = self.workbook_container.get_sourcing_entries()
# Group product sources by domain
domain_groups = {domain: [] for domain in self.domain_names}
for index_product_source, product_source in self.product_sources.iterrows():
source_name = product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_SOURCE_NAME]
if source_name in domain_groups:
domain_groups[source_name].append(product_source)
# Create one browser per domain and process all URLs for that domain
async with async_playwright() as p:
# Create one browser per domain that has URLs
domain_tasks = []
for domain_name in self.domain_names:
if domain_groups[domain_name]: # Only if there are URLs for this domain
browser = await p.chromium.launch(headless=False)
task = self.process_domain_urls(browser, domain_name, domain_groups[domain_name])
domain_tasks.append(task)
# Process all domains in parallel
all_sourced_products = await asyncio.gather(*domain_tasks)
# Flatten results from all domains
sourced_products = []
for domain_results in all_sourced_products:
sourced_products.extend(domain_results)
# Process results
for sourced_product in sourced_products:
index_row = sourced_product[self.INDEX_ROW_FLAG]
unit_cost = sourced_product[self.COST_FLAG]
unit_price = sourced_product[self.PRICE_FLAG]
active = sourced_product[self.ACTIVE_FLAG]
processed_count += 1
if not active:
continue
updated_count += 1
self.workbook_container.update_row_sourcing_sheet(
index_row = index_row
, unit_cost = unit_cost
, unit_price = unit_price
, active = active
)
self.workbook_container.save_workbook()
print(f"\nComplete!")
print(f"Processed: {processed_count} entries")
print(f"Updated: {updated_count} costs")
except Exception as e:
print(f"Error: {e}")
async def process_domain_urls(self, browser, domain_name, product_sources):
"""Process all URLs for a single domain sequentially with rate limiting"""
results = []
last_access_time = 0
try:
for product_source in product_sources:
# Rate limiting: wait 60s between requests to same domain
time_since_last = time.time() - last_access_time
if time_since_last < 45:
wait_time = 45 - time_since_last + random.uniform(0, 5) # 45-50s
print(f" [{domain_name}] Waiting {wait_time:.1f}s before next request...")
await asyncio.sleep(wait_time)
# Process the URL
result = await self.fetch_single_with_browser(browser, domain_name, product_source)
results.append(result)
last_access_time = time.time()
finally:
await browser.close()
return results
async def fetch_single_with_browser(self, browser, domain_name, product_source):
"""Fetch a single URL using the provided browser"""
index_row = product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_INDEX_ROW]
source_link = product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_LINK]
self.workbook_container.clear_row_sourcing_sheet(index_row = index_row)
Cost_Fetcher.log_processing_new_row(
index_row = index_row
, source_link = source_link
)
index_domain = self.get_index_domain_from_name(domain_name)
cost = None
price = None
active = None
try:
if domain_name == self.NAME_DOMAIN_CARD_MARKET:
if product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_PRODUCT_IS_BOOSTER]:
price_quantity_pairs = await self.product_scrapers[index_domain].scrape_prices_and_quantities_playwright_cardmarket(
browser = browser
, url = source_link
, eur_to_gbp_rate = self.eur_to_gbp_rate
)
price = self.get_sale_price_from_price_quantity_pairs(price_quantity_pairs = price_quantity_pairs)
cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_cardmarket(
browser = browser
, url = source_link
, eur_to_gbp_rate = self.eur_to_gbp_rate
)
elif domain_name == self.NAME_DOMAIN_CHAOS_CARDS:
cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_chaoscards(
browser = browser
, url = source_link
)
elif domain_name == self.NAME_DOMAIN_GAMES_LORE:
cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_gameslore(
browser = browser
, url = source_link
)
elif domain_name == self.NAME_DOMAIN_MAGIC_MADHOUSE:
cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_magicmadhouse(
browser = browser
, url = source_link
)
""" unverified
elif domain_name == self.NAME_DOMAIN_NEW_REALITIES_GAMING:
cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_newrealitiesgaming(
browser = browser
, url = source_link
)
"""
if ((cost is None and price is None) or active is None):
print(f" Error: Could not find cost on page")
except Exception as e:
print(f" Error processing {source_link}: {e}")
return self.make_result_data_json(
index_row = index_row
, cost = cost
, price = price
, active = active
)
@classmethod
def make_result_data_json(cls, index_row, cost = None, price = None, active = None):
return {
cls.INDEX_ROW_FLAG: index_row
, cls.COST_FLAG: cost
, cls.PRICE_FLAG: price
, cls.ACTIVE_FLAG: active
}
def get_sale_price_from_price_quantity_pairs(self, price_quantity_pairs):
if not price_quantity_pairs:
return None
max_quantity = 0
price = None
# First pass: look for quantity >= 8
for price_quantity_pair in price_quantity_pairs:
eur_price = price_quantity_pair['price']
quantity = price_quantity_pair['quantity']
print(f" Found price: €{eur_price}")
print(f" Found quantity: {quantity}")
max_quantity = max(max_quantity, quantity)
if quantity >= 8 and eur_price:
price = eur_price * self.eur_to_gbp_rate
print(f" Converted: €{eur_price:.2f} → £{price:.2f}")
return price
# Second pass: use max quantity if no quantity >= 8
print("Offer with quantity >= 8 not found")
for price_quantity_pair in price_quantity_pairs:
eur_price = price_quantity_pair['price']
quantity = price_quantity_pair['quantity']
if (max_quantity <= 2 or quantity == max_quantity) and eur_price:
price = eur_price * self.eur_to_gbp_rate
print(f" Converted: €{eur_price:.2f} → £{price:.2f}")
return price
return price
@staticmethod
def log_processing_new_row(index_row, source_link):
print(f"\n{'='*60}")
print(f"Processing row {index_row}: {source_link}")
print(f"{'='*60}")
async def main():
cost_fetcher = Cost_Fetcher()
await cost_fetcher.fetch_all()
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -2,28 +2,177 @@ import pandas as pd
from openpyxl import load_workbook, Workbook
from openpyxl.worksheet.worksheet import Worksheet
import requests
"""
from selenium import webdriver
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
"""
import re
import time
import random
from playwright.sync_api import sync_playwright, Browser, Page
from playwright.async_api import async_playwright
# import playwright
# import undetected_chromedriver as uc
# from undetected_chromedriver import Chrome
import asyncio
from aioconsole import ainput
from collections import defaultdict
from datetime import datetime, timedelta
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import subprocess
import os
CYCLE_INTERVAL_HOURS = 4
class Email_Notifier:
def __init__(self, sender_email, sender_password, recipient_email, smtp_host='smtp.gmail.com', smtp_port=587):
self.sender_email = sender_email
self.sender_password = sender_password
self.recipient_email = recipient_email
self.smtp_host = smtp_host
self.smtp_port = smtp_port
def send_email(self, subject, body_html):
msg = MIMEMultipart('alternative')
msg['From'] = self.sender_email
msg['To'] = self.recipient_email
msg['Subject'] = subject
msg.attach(MIMEText(body_html, 'html'))
try:
if self.smtp_port == 465:
with smtplib.SMTP_SSL(self.smtp_host, self.smtp_port) as server:
server.login(self.sender_email, self.sender_password)
server.send_message(msg)
else:
with smtplib.SMTP(self.smtp_host, self.smtp_port) as server:
server.starttls()
server.login(self.sender_email, self.sender_password)
server.send_message(msg)
print(f"Email sent: {subject}")
return True
except Exception as e:
print(f"Error sending email: {e}")
return False
class Profitability_Monitor:
MTG_SET_WORKSHEET_NAME = 'MTG Set'
MTG_SET_DATA_START_ROW = 3 # Row 1 is a merged group header, row 2 has column names
# MTG Set sheet column indices (1-based)
COL_MTG_SET_NAME = 2
COL_PLAY_PROFIT = 45
COL_PLAY_MARGIN = 46
COL_COLLECTOR_PROFIT = 53
COL_COLLECTOR_MARGIN = 54
COL_PLAY_SINGLES_PROFIT = 59
COL_PLAY_SINGLES_MARGIN = 60
COL_COLLECTOR_SINGLES_PROFIT = 73
COL_COLLECTOR_SINGLES_MARGIN = 74
# Product sheet column indices (1-based) for precons
COL_PRODUCT_NAME = 2
COL_PRODUCT_IS_PRECON = 7
COL_PRODUCT_MIN_COST = 8
COL_PRODUCT_PROFIT = 10
PROFIT_CHECKS = [
{'profit_col': COL_PLAY_PROFIT, 'margin_col': COL_PLAY_MARGIN, 'action_buy': 'Buy Play Booster', 'action_no_buy': 'DO NOT Buy Play Booster'},
{'profit_col': COL_COLLECTOR_PROFIT, 'margin_col': COL_COLLECTOR_MARGIN, 'action_buy': 'Buy Collector Booster', 'action_no_buy': 'DO NOT Buy Collector Booster'},
{'profit_col': COL_PLAY_SINGLES_PROFIT, 'margin_col': COL_PLAY_SINGLES_MARGIN, 'action_buy': 'Split Play Booster', 'action_no_buy': 'DO NOT Split Play Booster'},
{'profit_col': COL_COLLECTOR_SINGLES_PROFIT, 'margin_col': COL_COLLECTOR_SINGLES_MARGIN, 'action_buy': 'Split Collector Booster','action_no_buy': 'DO NOT Split Collector Booster'},
]
def read_states(self, workbook_path):
"""Load the workbook with data_only=True to read formula-calculated profit values."""
wb = load_workbook(workbook_path, data_only=True)
mtg_set_states = {}
precon_states = {}
mtg_sheet = wb[self.MTG_SET_WORKSHEET_NAME]
for row in range(self.MTG_SET_DATA_START_ROW, mtg_sheet.max_row + 1):
set_name = mtg_sheet.cell(row, self.COL_MTG_SET_NAME).value
if not set_name:
continue
set_data = {}
for check in self.PROFIT_CHECKS:
profit = mtg_sheet.cell(row, check['profit_col']).value
margin = mtg_sheet.cell(row, check['margin_col']).value
set_data[check['profit_col']] = {
'profit': profit,
'margin': margin,
'is_profitable': isinstance(profit, (int, float)) and profit > 0,
}
mtg_set_states[set_name] = set_data
prod_sheet = wb['Product']
for row in range(2, prod_sheet.max_row + 1):
is_precon = prod_sheet.cell(row, self.COL_PRODUCT_IS_PRECON).value
if not is_precon:
continue
name = prod_sheet.cell(row, self.COL_PRODUCT_NAME).value
profit = prod_sheet.cell(row, self.COL_PRODUCT_PROFIT).value
min_cost = prod_sheet.cell(row, self.COL_PRODUCT_MIN_COST).value
if not name:
continue
margin = (profit / min_cost) if (isinstance(profit, (int, float)) and isinstance(min_cost, (int, float)) and min_cost != 0) else None
precon_states[name] = {
'profit': profit,
'margin': margin,
'is_profitable': isinstance(profit, (int, float)) and profit > 0,
}
wb.close()
return {'mtg_set': mtg_set_states, 'precon': precon_states}
def find_changes(self, old_states, new_states):
"""Compare old and new profit states; return list of alert dicts for any crossings of the 0 threshold."""
alerts = []
for check in self.PROFIT_CHECKS:
col = check['profit_col']
for set_name, new_set_data in new_states['mtg_set'].items():
new_entry = new_set_data.get(col, {})
old_entry = old_states.get('mtg_set', {}).get(set_name, {}).get(col, {})
old_profitable = old_entry.get('is_profitable', None)
new_profitable = new_entry.get('is_profitable', False)
if old_profitable is None or old_profitable == new_profitable:
continue
action = check['action_buy'] if new_profitable else check['action_no_buy']
margin = new_entry.get('margin')
margin_str = f"{margin * 100:.1f}%" if isinstance(margin, (int, float)) else "N/A"
alerts.append({'name': set_name, 'action': action, 'margin': margin_str})
for product_name, new_entry in new_states['precon'].items():
old_entry = old_states.get('precon', {}).get(product_name, {})
old_profitable = old_entry.get('is_profitable', None)
new_profitable = new_entry.get('is_profitable', False)
if old_profitable is None or old_profitable == new_profitable:
continue
action = 'Buy Precon' if new_profitable else 'DO NOT Buy Precon'
margin = new_entry.get('margin')
margin_str = f"{margin * 100:.1f}%" if isinstance(margin, (int, float)) else "N/A"
alerts.append({'name': product_name, 'action': action, 'margin': margin_str})
return alerts
def format_email_html(self, alerts):
rows = ''.join(
f"<tr><td style='padding:6px 12px'>{a['name']}</td>"
f"<td style='padding:6px 12px'>{a['action']}</td>"
f"<td style='padding:6px 12px;text-align:right'>{a['margin']}</td></tr>"
for a in alerts
)
return (
"<html><body>"
"<h2 style='font-family:sans-serif'>TCG Profitability Alert</h2>"
"<table border='1' cellpadding='0' cellspacing='0' style='border-collapse:collapse;font-family:sans-serif'>"
"<tr style='background:#ddd'>"
"<th style='padding:6px 12px'>MTG Set</th>"
"<th style='padding:6px 12px'>Action</th>"
"<th style='padding:6px 12px'>Margin</th>"
"</tr>"
f"{rows}"
"</table>"
f"<p style='font-family:sans-serif;color:#666'><small>Generated {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</small></p>"
"</body></html>"
)
class Product_Scraper:
domain: str
@@ -50,12 +199,12 @@ class Product_Scraper:
if cost_clean is not None:
cost_clean = cost_clean / 100
return cost_clean
@classmethod
def parse_cost_chaoscards(cls, cost_text):
return cls.parse_cost(cost_text = cost_text)
@classmethod
def parse_cost_cardmarket(cls, cost_text):
# return cls.parse_cost(cost_text = cost_text)
"""Convert '141,30 €' format to float in EUR"""
if not cost_text:
return None
@@ -71,15 +220,18 @@ class Product_Scraper:
@classmethod
def parse_cost_magicmadhouse(cls, cost_text):
return cls.parse_cost(cost_text = cost_text)
@classmethod
def parse_cost_newrealitiesgaming(cls, cost_text):
return cls.parse_cost(cost_text = cost_text)
async def scrape_cost_and_active_playwright(self, browser: Browser, url, page_load_element_selector, cost_selector, active_selector, invalid_active_statuses, min_delay = 0):
async def scrape_cost_and_active_playwright(self, browser: Browser, url, page_load_element_selector, cost_selector, active_selector, invalid_active_statuses):
print(f" Loading page...")
self.page = await browser.new_page()
await self.page.goto(url = url)
cost = None
active = None
try:
# Automatically waits up to 30s by default
await self.page.goto(url=url, wait_until="domcontentloaded", timeout=30000)
await asyncio.sleep(random.uniform(20, 25))
element = self.page.locator(selector = page_load_element_selector)
page_title = await self.page.title()
print(f" Page title: {page_title}")
@@ -94,11 +246,13 @@ class Product_Scraper:
active = (cost is not None)
else:
try:
elements = await self.page.query_selector_all(selector = cost_selector)
elements = await self.page.query_selector_all(selector = active_selector)
print(f'# active elements: {len(elements)}')
if len(elements) == 0:
active = True
else:
text = await elements[0].text_content()
text = text.strip()
print(f" Text: '{text}'")
active = (invalid_active_statuses is None or text not in invalid_active_statuses)
except Exception as e:
@@ -106,14 +260,15 @@ class Product_Scraper:
if cost is None or active is None:
print(f" ✗ No cost found")
print(f"Cost: {cost}, Active: {active}")
await ainput("Press Enter to continue to next URL...")
# await ainput("Press Enter to continue to next URL...")
print(f"Cost: {cost}, Active: {active}")
except Exception as e:
print(f" Error: {e}")
await ainput("Press Enter to continue to next URL...")
# await ainput("Press Enter to continue to next URL...")
return None, None
finally:
await self.page.close()
return cost, active
async def scrape_cost_and_active_playwright_cardmarket(self, browser, url, eur_to_gbp_rate):
@@ -126,9 +281,8 @@ class Product_Scraper:
, cost_selector = cost_selector
, active_selector = None
, invalid_active_statuses = []
, min_delay = 15
)
cost = Product_Scraper.parse_cost_cardmarket(cost_text)
cost = Product_Scraper.parse_cost_cardmarket(cost_text = cost_text)
if cost is not None:
item_shipping_cost_in = 0
if cost < 10:
@@ -142,34 +296,31 @@ class Product_Scraper:
return cost, active
async def scrape_cost_and_active_playwright_chaoscards(self, browser, url):
# page_load_element_selector = '#prod_title'
cost_selector = '.price_inc > span:nth-child(2)'
active_selector = '.product__right > form > ul.prod_det_fields.left.product-section.product-section--stock > li > div:nth-child(1) > div:nth-child(2)'
active_selector = '.product__right > form > ul.prod_det_fields.left.product-section.product-section--stock > li.prod_det_stock > div:nth-child(1) > div:nth-child(2)'
cost_text, active = await self.scrape_cost_and_active_playwright(
browser = browser
, url = url
, page_load_element_selector = cost_selector # page_load_element_selector
, page_load_element_selector = cost_selector
, cost_selector = cost_selector
, active_selector = active_selector
, invalid_active_statuses = ["Out of stock", "Coming soon"]
, min_delay = 15
)
cost = Product_Scraper.parse_cost_chaoscards(cost_text)
cost = Product_Scraper.parse_cost_chaoscards(cost_text = cost_text)
return cost, active
async def scrape_cost_and_active_playwright_gameslore(self, browser, url):
# page_load_element_selector = '.page-title'
cost_selector = 'div.columns > div.column.main > div.product-info-main > div.product-info-price > div.price-box > span.special-price > span.price-container > span.price-wrapper > span.price'
active_selector = '.stock > span:nth-child(1)'
cost_text, active = await self.scrape_cost_and_active_playwright(
browser = browser
, url = url
, page_load_element_selector = cost_selector # page_load_element_selector
, page_load_element_selector = cost_selector
, cost_selector = cost_selector
, active_selector = active_selector
, invalid_active_statuses = ["OUT OF STOCK"]
)
cost = Product_Scraper.parse_cost_gameslore(cost_text)
cost = Product_Scraper.parse_cost_gameslore(cost_text = cost_text)
return cost, active
async def scrape_cost_and_active_playwright_magicmadhouse(self, browser, url):
@@ -184,7 +335,23 @@ class Product_Scraper:
, active_selector = active_selector
, invalid_active_statuses = []
)
cost = Product_Scraper.parse_cost_magicmadhouse(cost_text)
cost = Product_Scraper.parse_cost_magicmadhouse(cost_text = cost_text)
return cost, active
async def scrape_cost_and_active_playwright_newrealitiesgaming(self, browser, url):
button_selector = 'div.display-desktop.add-to-cart-button__wrapper div.w-wrapper form button'
page_load_element_selector = button_selector
cost_selector = f'{button_selector} span:nth-child(2)'
active_selector = f'{button_selector} span:nth-child(1)'
cost_text, active = await self.scrape_cost_and_active_playwright(
browser = browser
, url = url
, page_load_element_selector = page_load_element_selector
, cost_selector = cost_selector
, active_selector = active_selector
, invalid_active_statuses = ['Out of stock']
)
cost = Product_Scraper.parse_cost_magicmadhouse(cost_text = cost_text)
return cost, active
async def scrape_prices_and_quantities_playwright_cardmarket(self, browser: Browser, url, eur_to_gbp_rate):
@@ -194,13 +361,12 @@ class Product_Scraper:
print(f" Loading page...")
self.page = await browser.new_page()
await self.page.goto(url = url)
try:
# Automatically waits up to 30s by default
await self.page.goto(url=url, wait_until="domcontentloaded", timeout=30000)
await asyncio.sleep(random.uniform(20, 25))
page_title = await self.page.title()
print(f" Page title: {page_title}")
price_quantity_pairs = []
try:
offer_containers = await self.page.query_selector_all(offer_container_selector)
@@ -219,14 +385,15 @@ class Product_Scraper:
if price_text is None or quantity_text is None:
continue
price_quantity_pairs.append({
'price': Product_Scraper.parse_cost_cardmarket(price_text = price_text) * eur_to_gbp_rate
, 'quantity': Product_Scraper.parse_cost_cardmarket(quantity_text = quantity_text)
'price': Product_Scraper.parse_cost_cardmarket(cost_text = price_text) * eur_to_gbp_rate
, 'quantity': Product_Scraper.parse_cost_cardmarket(cost_text = quantity_text)
})
except Exception as e:
print(f" Price selector failed: {e}")
await ainput("Press enter to continue to next URL...")
# await ainput("Press enter to continue to next URL...")
return []
finally:
await self.page.close()
return price_quantity_pairs
@@ -370,7 +537,6 @@ class TCG_Sole_Trader_Workbook_Container:
def get_sourcing_entries(self):
product_sources = self.create_product_source_df()
try:
# products = []
for index_row in range(self.index_row_header_sourcing + 1, self.sourcing_sheet.max_row + 1):
source_name = self.sourcing_sheet.cell(index_row, self.index_column_name_sourcing).value
source_link = self.sourcing_sheet.cell(index_row, self.index_column_link_sourcing).value
@@ -381,15 +547,12 @@ class TCG_Sole_Trader_Workbook_Container:
product_is_booster = False
for product_row in range(self.index_row_header_product + 1, self.product_sheet.max_row + 1):
product_id = self.product_sheet.cell(product_row, self.index_column_product_id_product).value
# print(f"found product: id {product_id}")
if product_id == source_product_id:
product_is_booster_text = str(self.product_sheet.cell(product_row, self.index_column_is_booster_product).value).upper()
# print(f"product is booster: {product_is_booster_text}, type: {str(type(product_is_booster_text))}")
product_is_booster = (product_is_booster_text == "TRUE")
break
print(f"product is booster: {product_is_booster}")
# products.append((index_row, source_product_id, source_name, source_link, product_is_booster))
product_sources.loc[len(product_sources)] = [
index_row
, source_product_id
@@ -435,32 +598,31 @@ class Cost_Fetcher:
NAME_DOMAIN_CHAOS_CARDS: str = 'Chaos Cards'
NAME_DOMAIN_GAMES_LORE: str = 'Games Lore'
NAME_DOMAIN_MAGIC_MADHOUSE: str = 'Magic Madhouse'
NAME_DOMAIN_NEW_REALITIES_GAMING: str = 'New Realities Gaming'
NAME_FLAG: str = 'Name'
PRICE_FLAG: str = 'Price'
SUCCESS_FLAG: str = 'Success'
URL_FLAG: str = 'Url'
active_row_indices = list[int]
domain_names: list[str]
eur_to_gbp_rate: float
product_scrapers: list[Product_Scraper]
product_sources: pd.DataFrame
workbook_container: TCG_Sole_Trader_Workbook_Container
def __init__(self):
def __init__(self, email_notifier=None):
self.email_notifier = email_notifier
self.profitability_monitor = Profitability_Monitor()
self.workbook_save_lock = asyncio.Lock()
self.domain_names = [
self.NAME_DOMAIN_CARD_MARKET
, self.NAME_DOMAIN_CHAOS_CARDS
# self.NAME_DOMAIN_CARD_MARKET
self.NAME_DOMAIN_CHAOS_CARDS
, self.NAME_DOMAIN_GAMES_LORE
, self.NAME_DOMAIN_MAGIC_MADHOUSE
# , self.NAME_DOMAIN_NEW_REALITIES_GAMING
]
self.domain_details = {
self.NAME_DOMAIN_CARD_MARKET: {
self.NAME_FLAG: self.NAME_DOMAIN_CARD_MARKET
, self.INDEX_DOMAIN_FLAG: self.get_index_domain_from_name(self.NAME_DOMAIN_CARD_MARKET)
, self.ACCESSED_LAST_ON_FLAG: 0
}
, self.NAME_DOMAIN_CHAOS_CARDS: {
self.NAME_DOMAIN_CHAOS_CARDS: {
self.NAME_FLAG: self.NAME_DOMAIN_CHAOS_CARDS
, self.INDEX_DOMAIN_FLAG: self.get_index_domain_from_name(self.NAME_DOMAIN_CHAOS_CARDS)
, self.ACCESSED_LAST_ON_FLAG: 0
@@ -476,19 +638,28 @@ class Cost_Fetcher:
, self.ACCESSED_LAST_ON_FLAG: 0
}
}
"""
self.NAME_DOMAIN_CARD_MARKET: {
self.NAME_FLAG: self.NAME_DOMAIN_CARD_MARKET
, self.INDEX_DOMAIN_FLAG: self.get_index_domain_from_name(self.NAME_DOMAIN_CARD_MARKET)
, self.ACCESSED_LAST_ON_FLAG: 0
}
"""
"""
, self.NAME_DOMAIN_NEW_REALITIES_GAMING: {
self.NAME_FLAG: self.NAME_DOMAIN_NEW_REALITIES_GAMING
, self.INDEX_DOMAIN_FLAG: self.get_index_domain_from_name(self.NAME_DOMAIN_NEW_REALITIES_GAMING)
, self.ACCESSED_LAST_ON_FLAG: 0
}
"""
product_scrapers = []
active_row_indices = []
for index_domain in range(len(self.domain_names)):
domain = self.domain_names[index_domain]
product_scraper = Product_Scraper(domain)
product_scrapers.append(product_scraper)
active_row_indices.append(None)
self.product_scrapers = product_scrapers
self.active_row_indices = active_row_indices
self.workbook_container = TCG_Sole_Trader_Workbook_Container()
self.get_eur_to_gbp_rate()
self.domain_locks = defaultdict(asyncio.Lock)
self.workbook_container = None
self.eur_to_gbp_rate = 0.85
def get_index_domain_from_name(self, domain_name):
for index_domain in range(len(self.domain_names)):
@@ -508,134 +679,198 @@ class Cost_Fetcher:
async def fetch_all(self):
try:
if self.email_notifier:
sent = self.email_notifier.send_email(
subject=f"TCG Profitability Scanner Boot - {datetime.now().strftime('%Y-%m-%d %H:%M')}"
, body_html="<html><body><h2>Booted</h2></body></html>"
)
if sent:
print("Sent boot test email")
else:
print("Error sending boot test email")
# Reload workbook and exchange rate fresh each cycle
self.get_eur_to_gbp_rate()
self.workbook_container = TCG_Sole_Trader_Workbook_Container()
self.product_sources = self.workbook_container.get_sourcing_entries()
workbook_path = os.path.abspath(TCG_Sole_Trader_Workbook_Container.WORKBOOK_NAME)
# Snapshot profitability before any scraping
print("Reading current profitability states...")
old_profit_states = self.profitability_monitor.read_states(workbook_path)
# Group product sources by domain
domain_groups = {domain: [] for domain in self.domain_names}
for _, product_source in self.product_sources.iterrows():
source_name = product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_SOURCE_NAME]
if source_name in domain_groups:
domain_groups[source_name].append(product_source)
# Create one browser per domain and process all URLs; saves workbook after each item
processed_count = 0
updated_count = 0
self.product_sources = self.workbook_container.get_sourcing_entries()
# sourced_products = await self.scrape_with_browser_pool()
# for sourced_product in sourced_products:
# for product_source in self.product_sources:
# sourced_product = await self.fetch_single(product_source = product_source)
# for index_product_source, product_source in self.product_sources.iterrows():
# sourced_product = await self.fetch_single(product_source = product_source)
# Create tasks for parallel execution
tasks = []
for index_product_source, product_source in self.product_sources.iterrows():
task = self.fetch_single(product_source = product_source)
tasks.append(task)
async with async_playwright() as p:
domain_tasks = []
for domain_name in self.domain_names:
if domain_groups[domain_name]:
browser = await p.chromium.launch(headless=False)
task = self.process_domain_urls(browser, domain_name, domain_groups[domain_name])
domain_tasks.append(task)
# Execute all tasks in parallel
sourced_products = await asyncio.gather(*tasks)
all_domain_results = await asyncio.gather(*domain_tasks)
# Process results
for sourced_product in sourced_products:
index_row = sourced_product[self.workbook_container.NAME_COLUMN_INDEX_ROW]
unit_cost = sourced_product[self.workbook_container.NAME_COLUMN_UNIT_COST]
unit_price = sourced_product[self.workbook_container.index_column_unit_price_sourcing]
active = sourced_product[self.workbook_container.index_column_active_sourcing]
processed_count += 1
if not active:
continue
updated_count += 1
self.workbook_container.update_row_sourcing_sheet(
index_row = index_row
, unit_cost = unit_cost
, unit_price = unit_price
, active = active
)
self.workbook_container.save_workbook()
print(f"\nComplete!")
print(f"Processed: {processed_count} entries")
print(f"Updated: {updated_count} costs")
for domain_results in all_domain_results:
for result in domain_results:
processed_count += 1
if result[self.ACTIVE_FLAG]:
updated_count += 1
print(f"\nComplete! Processed: {processed_count} entries, Updated: {updated_count} costs")
# Recalculate spreadsheet formulas and check for profitability changes
if self.email_notifier:
recalculated = self.recalculate_workbook(workbook_path)
if recalculated:
new_profit_states = self.profitability_monitor.read_states(workbook_path)
alerts = self.profitability_monitor.find_changes(old_profit_states, new_profit_states)
if alerts:
html = self.profitability_monitor.format_email_html(alerts)
self.email_notifier.send_email(
subject=f"TCG Profitability Alert - {datetime.now().strftime('%Y-%m-%d %H:%M')}"
, body_html=html
)
print(f"Sent {len(alerts)} profitability alert(s).")
else:
print("No profitability changes detected.")
except Exception as e:
print(f"Error: {e}")
"""
async def scrape_with_browser_pool(self):
count_domains = len(self.domain_names)
async with async_playwright() as p:
browsers = [await p.chromium.launch(headless = False) for _ in range(count_domains)]
try:
tasks = []
# for i, url in enumerate(urls):
for index_product_source in range(len(self.product_sources)):
product_source = self.product_sources.loc[index_product_source]
browser = browsers[index_product_source % count_domains]
tasks.append(self.fetch_single(browser, product_source))
return await asyncio.gather(*tasks)
finally:
for browser in browsers:
await browser.close()
"""
async def fetch_single(self, product_source): # browser,
async with async_playwright() as p:
browser = await p.chromium.launch(headless = False)
print(f'Product source: {product_source}')
index_row = product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_INDEX_ROW]
source_name = product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_SOURCE_NAME]
source_link = product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_LINK]
index_domain = None
try:
index_domain = self.get_index_domain_from_name(source_name)
except:
await browser.close()
return self.make_result_data_json(index_row = index_row)
domain_details = self.domain_details[source_name]
self.workbook_container.clear_row_sourcing_sheet(index_row = index_row)
Cost_Fetcher.log_processing_new_row(
index_row = index_row
, source_link = source_link
)
cost = None
price = None
active = None
if source_name == self.NAME_DOMAIN_CARD_MARKET:
while (self.active_row_indices[index_domain] is None or time.time() - domain_details[self.ACCESSED_LAST_ON_FLAG] < random.uniform(30, 40)):
await asyncio.sleep(random.uniform(3, 5))
self.active_row_indices[index_domain] = index_row
if product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_PRODUCT_IS_BOOSTER]:
price_quantity_pairs = await self.product_scrapers[index_domain].scrape_prices_and_quantities_playwright_cardmarket(browser = browser, url = source_link, eur_to_gbp_rate = self.eur_to_gbp_rate)
price = self.get_sale_price_from_price_quantity_pairs(price_quantity_pairs = price_quantity_pairs)
cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_cardmarket(browser = browser, url = source_link, eur_to_gbp_rate = self.eur_to_gbp_rate)
elif source_name == self.NAME_DOMAIN_CHAOS_CARDS:
while (self.active_row_indices[index_domain] is None or time.time() - self.domain_details[source_name][self.ACCESSED_LAST_ON_FLAG] < random.uniform(30, 40)):
await asyncio.sleep(random.uniform(3, 5))
self.active_row_indices[index_domain] = index_row
cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_chaoscards(browser = browser, url = source_link)
elif source_name == self.NAME_DOMAIN_GAMES_LORE:
while (self.active_row_indices[index_domain] is None or time.time() - self.domain_details[source_name][self.ACCESSED_LAST_ON_FLAG] < random.uniform(10, 20)):
await asyncio.sleep(random.uniform(3, 5))
self.active_row_indices[index_domain] = index_row
cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_gameslore(browser = browser, url = source_link)
elif source_name == self.NAME_DOMAIN_MAGIC_MADHOUSE:
while (self.active_row_indices[index_domain] is None or time.time() - self.domain_details[source_name][self.ACCESSED_LAST_ON_FLAG] < random.uniform(10, 20)):
await asyncio.sleep(random.uniform(3, 5))
self.active_row_indices[index_domain] = index_row
cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_magicmadhouse(browser = browser, url = source_link)
self.domain_details[source_name][self.ACCESSED_LAST_ON_FLAG] = time.time()
self.active_row_indices[index_domain] = None
if ((cost is None and price is None) or active is None):
print(f" Error: Could not find cost on page")
import traceback
print(f"Error in fetch_all: {e}")
traceback.print_exc()
async def process_domain_urls(self, browser, domain_name, product_sources):
"""Process all URLs for a single domain sequentially with rate limiting.
Saves the workbook immediately after each item is cleared (before scrape)
and again after each result is written (after scrape)."""
results = []
last_access_time = 0
try:
for product_source in product_sources:
# Rate limiting: wait between requests to the same domain
time_since_last = time.time() - last_access_time
if time_since_last < 45:
wait_time = 45 - time_since_last + random.uniform(0, 5)
print(f" [{domain_name}] Waiting {wait_time:.1f}s before next request...")
await asyncio.sleep(wait_time)
index_row = product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_INDEX_ROW]
# Clear stale data and persist before scraping
async with self.workbook_save_lock:
self.workbook_container.clear_row_sourcing_sheet(index_row)
self.workbook_container.save_workbook()
result = await self.fetch_single_with_browser(browser, domain_name, product_source)
# Write fresh data and persist immediately
async with self.workbook_save_lock:
if result[self.ACTIVE_FLAG]:
self.workbook_container.update_row_sourcing_sheet(
index_row=result[self.INDEX_ROW_FLAG]
, unit_cost=result[self.COST_FLAG]
, unit_price=result[self.PRICE_FLAG]
, active=result[self.ACTIVE_FLAG]
)
self.workbook_container.save_workbook()
results.append(result)
last_access_time = time.time()
finally:
await browser.close()
return self.make_result_data_json(
index_row = index_row
, cost = cost
, price = price
, active = active
)
@classmethod
def make_result(cls, url, success, data, error):
return {
cls.URL_FLAG: url
, cls.SUCCESS_FLAG: success
, cls.DATA_FLAG: data
, cls.ERROR_FLAG: error
}
return results
async def fetch_single_with_browser(self, browser, domain_name, product_source):
"""Fetch a single URL using the provided browser"""
index_row = product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_INDEX_ROW]
source_link = product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_LINK]
Cost_Fetcher.log_processing_new_row(
index_row = index_row
, source_link = source_link
)
index_domain = self.get_index_domain_from_name(domain_name)
cost = None
price = None
active = None
try:
did_attempt = False
"""
if domain_name == self.NAME_DOMAIN_CARD_MARKET:
if product_source[TCG_Sole_Trader_Workbook_Container.NAME_COLUMN_PRODUCT_IS_BOOSTER]:
price_quantity_pairs = await self.product_scrapers[index_domain].scrape_prices_and_quantities_playwright_cardmarket(
browser = browser
, url = source_link
, eur_to_gbp_rate = self.eur_to_gbp_rate
)
price = self.get_sale_price_from_price_quantity_pairs(price_quantity_pairs = price_quantity_pairs)
cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_cardmarket(
browser = browser
, url = source_link
, eur_to_gbp_rate = self.eur_to_gbp_rate
)
el"""
if domain_name == self.NAME_DOMAIN_CHAOS_CARDS:
did_attempt = True
cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_chaoscards(
browser = browser
, url = source_link
)
elif domain_name == self.NAME_DOMAIN_GAMES_LORE:
did_attempt = True
cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_gameslore(
browser = browser
, url = source_link
)
elif domain_name == self.NAME_DOMAIN_MAGIC_MADHOUSE:
did_attempt = True
cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_magicmadhouse(
browser = browser
, url = source_link
)
""" unverified
elif domain_name == self.NAME_DOMAIN_NEW_REALITIES_GAMING:
cost, active = await self.product_scrapers[index_domain].scrape_cost_and_active_playwright_newrealitiesgaming(
browser = browser
, url = source_link
)
"""
if (
did_attempt
and (
(
cost is None
and price is None
)
or active is None
)
):
print(f" Error: Could not find cost on page")
except Exception as e:
print(f" Error processing {source_link}: {e}")
return self.make_result_data_json(
index_row = index_row
, cost = cost
, price = price
, active = active
)
@classmethod
def make_result_data_json(cls, index_row, cost = None, price = None, active = None):
return {
@@ -644,44 +879,82 @@ class Cost_Fetcher:
, cls.PRICE_FLAG: price
, cls.ACTIVE_FLAG: active
}
def get_sale_price_from_price_quantity_pairs(self, price_quantity_pairs):
if not price_quantity_pairs: return None, False
if not price_quantity_pairs:
return None
max_quantity = 0
updated_row_price = False
price = None
# First pass: look for quantity >= 8
for price_quantity_pair in price_quantity_pairs:
eur_price = price_quantity_pair['price']
quantity = price_quantity_pair['quantity']
print(f" Found price: €{eur_price}")
print(f" Found quantity: {quantity}")
max_quantity = max(max_quantity, quantity)
if quantity >= 8:
if eur_price:
price = eur_price * self.eur_to_gbp_rate
print(f" Converted: €{eur_price:.2f} → £{price:.2f}")
# self.workbook_container.sourcing_sheet.cell(index_row, self.workbook_container.index_column_unit_price_sourcing).value = gbp_price
updated_row_price = True
# print(f"output row: {index_row}, value: {self.workbook_container.sourcing_sheet.cell(index_row, self.workbook_container.index_column_unit_cost_sourcing).value}")
break
else:
print(f" Error: Could not parse price")
if not updated_row_price:
print("Offer with quantity >= 8 not found")
for price_quantity_pair in price_quantity_pairs:
eur_price = price_quantity_pair['price']
quantity = price_quantity_pair['quantity']
print(f" Found price: €{eur_price}")
print(f" Found quantity: {quantity}")
if max_quantity <= 2 or quantity == max_quantity:
if eur_price:
price = eur_price * self.eur_to_gbp_rate
print(f" Converted: €{eur_price:.2f} → £{price:.2f}")
# self.workbook_container.sourcing_sheet.cell(index_row, self.workbook_container.index_column_unit_cost_sourcing).value = gbp_price
updated_row_price = True
# print(f"output row: {index_row}, value: {self.workbook_container.sourcing_sheet.cell(index_row, self.workbook_container.index_column_unit_cost_sourcing).value}")
break
else:
print(f" Error: Could not parse price")
return price, True
if quantity >= 8 and eur_price:
price = eur_price * self.eur_to_gbp_rate
print(f" Converted: €{eur_price:.2f} → £{price:.2f}")
return price
# Second pass: use max quantity if no quantity >= 8
print("Offer with quantity >= 8 not found")
for price_quantity_pair in price_quantity_pairs:
eur_price = price_quantity_pair['price']
quantity = price_quantity_pair['quantity']
if (max_quantity <= 2 or quantity == max_quantity) and eur_price:
price = eur_price * self.eur_to_gbp_rate
print(f" Converted: €{eur_price:.2f} → £{price:.2f}")
return price
return price
def recalculate_workbook(self, workbook_path):
"""Run LibreOffice headless to recalculate all formula cells after saving new data.
Returns True if recalculation succeeded, False otherwise."""
workbook_dir = os.path.dirname(workbook_path)
workbook_name = os.path.basename(workbook_path)
lock_file = os.path.join(workbook_dir, f'.~lock.{workbook_name}#')
if os.path.exists(lock_file):
print(f"Warning: '{workbook_name}' is open in LibreOffice — skipping recalculation to avoid conflict.")
return False
print("Recalculating workbook formulas with LibreOffice headless...")
try:
result = subprocess.run(
['libreoffice', '--headless', '--norestore', '--convert-to', 'xlsx', '--outdir', workbook_dir, workbook_path]
, capture_output=True, text=True, timeout=120
)
if result.returncode == 0:
print("Recalculation complete.")
return True
print(f"Recalculation failed (exit {result.returncode}): {result.stderr.strip()}")
except subprocess.TimeoutExpired:
print("LibreOffice recalculation timed out.")
except Exception as e:
print(f"Error during recalculation: {e}")
return False
async def run_continuous(self):
"""Run fetch_all in an infinite loop, sleeping CYCLE_INTERVAL_HOURS between cycles."""
while True:
print(f"\n{'='*60}")
print(f"Cycle started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
try:
await self.fetch_all()
except Exception as e:
import traceback
print(f"Unhandled cycle error: {e}")
traceback.print_exc()
next_run = datetime.now() + timedelta(hours=CYCLE_INTERVAL_HOURS)
print(f"Next cycle: {next_run.strftime('%Y-%m-%d %H:%M:%S')} (in {CYCLE_INTERVAL_HOURS}h)")
await asyncio.sleep(CYCLE_INTERVAL_HOURS * 3600)
@staticmethod
def log_processing_new_row(index_row, source_link):
print(f"\n{'='*60}")
@@ -689,8 +962,31 @@ class Cost_Fetcher:
print(f"{'='*60}")
async def main():
cost_fetcher = Cost_Fetcher()
await cost_fetcher.fetch_all()
sender_email = os.environ.get('EMAIL_SENDER')
sender_password = os.environ.get('EMAIL_PASSWORD')
recipient_email = os.environ.get('EMAIL_RECIPIENT')
smtp_host = os.environ.get('SMTP_HOST', 'smtp.gmail.com')
smtp_port = int(os.environ.get('SMTP_PORT', '587'))
email_notifier = None
if sender_email and sender_password and recipient_email:
email_notifier = Email_Notifier(
sender_email=sender_email
, sender_password=sender_password
, recipient_email=recipient_email
, smtp_host=smtp_host
, smtp_port=smtp_port
)
print(f"Email notifications enabled: {sender_email}{recipient_email}")
else:
print(
"Email notifications disabled.\n"
"Set EMAIL_SENDER, EMAIL_PASSWORD, and EMAIL_RECIPIENT environment variables to enable.\n"
"For Gmail, use an App Password (https://myaccount.google.com/apppasswords)."
)
cost_fetcher = Cost_Fetcher(email_notifier=email_notifier)
await cost_fetcher.run_continuous()
if __name__ == "__main__":
asyncio.run(main())

View File

View File

@@ -0,0 +1,237 @@
"""
Project: Shuffle & Skirmish Market Scraper
Author: Edward Middleton-Smith
Shuffle & Skirmish
Technology: Business Objects
Feature: Product Scraper Class
"""
import pandas as pd
from openpyxl import load_workbook, Workbook
from openpyxl.worksheet.worksheet import Worksheet
import requests
import re
import time
import random
from playwright.sync_api import sync_playwright, Browser, Page
from playwright.async_api import async_playwright
import asyncio
from aioconsole import ainput
from collections import defaultdict
from datetime import datetime, timedelta
class Product_Scraper:
domain: str
page: Page
def __init__(self, domain):
print("Setting up browser automation")
self.domain = domain
@staticmethod
def parse_cost(cost_text):
if not cost_text:
return None
cost_clean = re.sub(r'[^\d,]', '', cost_text)
try:
return float(cost_clean) / 100
except ValueError:
return None
@classmethod
def parse_cost_chaoscards(cls, cost_text):
return cls.parse_cost(cost_text = cost_text)
@classmethod
def parse_cost_cardmarket(cls, cost_text):
"""Convert '141,30 €' format to float in EUR"""
if not cost_text:
return None
cost_clean = re.sub(r'[^\d,]', '', cost_text)
cost_clean = cost_clean.replace(',', '.')
try:
return float(cost_clean)
except ValueError:
return None
@classmethod
def parse_cost_gameslore(cls, cost_text):
return cls.parse_cost(cost_text = cost_text)
@classmethod
def parse_cost_magicmadhouse(cls, cost_text):
return cls.parse_cost(cost_text = cost_text)
@classmethod
def parse_cost_newrealitiesgaming(cls, cost_text):
return cls.parse_cost(cost_text = cost_text)
async def scrape_cost_and_active_playwright(self, browser: Browser, url, page_load_element_selector, cost_selector, active_selector, invalid_active_statuses):
print(f" Loading page...")
self.page = await browser.new_page()
await self.page.goto(url = url)
await asyncio.sleep(random.uniform(20, 25))
cost = None
active = None
try:
element = self.page.locator(selector = page_load_element_selector)
page_title = await self.page.title()
print(f" Page title: {page_title}")
element = self.page.locator(selector = cost_selector)
text = await element.text_content()
print(f" Text: '{text}'")
cost = text
active = None
if active_selector is None:
active = (cost is not None)
else:
try:
elements = await self.page.query_selector_all(selector = active_selector)
print(f'# active elements: {len(elements)}')
if len(elements) == 0:
active = True
else:
text = await elements[0].text_content()
text = text.strip()
print(f" Text: '{text}'")
active = (invalid_active_statuses is None or text not in invalid_active_statuses)
except Exception as e:
print(f" Selector failed: {e}")
if cost is None or active is None:
print(f" ✗ No cost found")
# await ainput("Press Enter to continue to next URL...")
print(f"Cost: {cost}, Active: {active}")
except Exception as e:
print(f" Error: {e}")
# await ainput("Press Enter to continue to next URL...")
return None, None
finally:
await self.page.close()
return cost, active
async def scrape_cost_and_active_playwright_cardmarket(self, browser, url, eur_to_gbp_rate):
page_load_element_selector = "body > main.container > div.page-title-container"
cost_selector = '#table > div:nth-child(1) > div.table-body > .row.article-row.g-0:nth-child(1) > div.col-offer > div.price-container > div > div:nth-child(1) > span:nth-child(1)'
cost_text, active = await self.scrape_cost_and_active_playwright(
browser = browser
, url = url
, page_load_element_selector = page_load_element_selector
, cost_selector = cost_selector
, active_selector = None
, invalid_active_statuses = []
)
cost = Product_Scraper.parse_cost_cardmarket(cost_text = cost_text)
if cost is not None:
item_shipping_cost_in = 0
if cost < 10:
item_shipping_cost_in = 2
elif cost < 100:
item_shipping_cost_in = 8
else:
item_shipping_cost_in = 20
cost = cost * eur_to_gbp_rate + item_shipping_cost_in
active = (cost is not None)
return cost, active
async def scrape_cost_and_active_playwright_chaoscards(self, browser, url):
cost_selector = '.price_inc > span:nth-child(2)'
active_selector = '.product__right > form > ul.prod_det_fields.left.product-section.product-section--stock > li.prod_det_stock > div:nth-child(1) > div:nth-child(2)'
cost_text, active = await self.scrape_cost_and_active_playwright(
browser = browser
, url = url
, page_load_element_selector = cost_selector
, cost_selector = cost_selector
, active_selector = active_selector
, invalid_active_statuses = ["Out of stock", "Coming soon"]
)
cost = Product_Scraper.parse_cost_chaoscards(cost_text = cost_text)
return cost, active
async def scrape_cost_and_active_playwright_gameslore(self, browser, url):
cost_selector = 'div.columns > div.column.main > div.product-info-main > div.product-info-price > div.price-box > span.special-price > span.price-container > span.price-wrapper > span.price'
active_selector = '.stock > span:nth-child(1)'
cost_text, active = await self.scrape_cost_and_active_playwright(
browser = browser
, url = url
, page_load_element_selector = cost_selector
, cost_selector = cost_selector
, active_selector = active_selector
, invalid_active_statuses = ["OUT OF STOCK"]
)
cost = Product_Scraper.parse_cost_gameslore(cost_text = cost_text)
return cost, active
async def scrape_cost_and_active_playwright_magicmadhouse(self, browser, url):
page_load_element_selector = '.productView-title'
cost_selector = 'div.body > div.container > div > div.productView > section.productView-details > div.productView-options > form > div.productView-options-selections > div.productView-product > div.productView-info > div.price-rating > div.productView-price > div.price-section.actual-price > span.price'
active_selector = '.alertBox.alertBox--error'
cost_text, active = await self.scrape_cost_and_active_playwright(
browser = browser
, url = url
, page_load_element_selector = page_load_element_selector
, cost_selector = cost_selector
, active_selector = active_selector
, invalid_active_statuses = []
)
cost = Product_Scraper.parse_cost_magicmadhouse(cost_text = cost_text)
return cost, active
async def scrape_cost_and_active_playwright_newrealitiesgaming(self, browser, url):
button_selector = 'div.display-desktop.add-to-cart-button__wrapper div.w-wrapper form button'
page_load_element_selector = button_selector
cost_selector = f'{button_selector} span:nth-child(2)'
active_selector = f'{button_selector} span:nth-child(1)'
cost_text, active = await self.scrape_cost_and_active_playwright(
browser = browser
, url = url
, page_load_element_selector = page_load_element_selector
, cost_selector = cost_selector
, active_selector = active_selector
, invalid_active_statuses = ['Out of stock']
)
cost = Product_Scraper.parse_cost_magicmadhouse(cost_text = cost_text)
return cost, active
async def scrape_prices_and_quantities_playwright_cardmarket(self, browser: Browser, url, eur_to_gbp_rate):
offer_container_selector = '#table > div:nth-child(1) > div.table-body > .row.article-row.g-0:nth-child(1) > div.col-offer'
price_selector = 'div.price-container > div > div:nth-child(1) > span:nth-child(1)'
quantity_selector = 'div.amount-container > span:nth-child(1)'
print(f" Loading page...")
self.page = await browser.new_page()
await self.page.goto(url = url)
await asyncio.sleep(random.uniform(20, 25))
try:
page_title = await self.page.title()
print(f" Page title: {page_title}")
price_quantity_pairs = []
try:
offer_containers = await self.page.query_selector_all(offer_container_selector)
print(f" Offer container selector: Found {len(offer_containers)} elements")
for offer_container in offer_containers:
price_element = await offer_container.query_selector(price_selector)
price_text = await price_element.text_content()
if '' in price_text and re.search(r'\d', price_text):
print(f" ✓ Found price: {price_text}")
else:
price_text = None
quantity_element = await offer_container.query_selector(quantity_selector)
quantity_text = await quantity_element.text_content()
if price_text is None or quantity_text is None:
continue
price_quantity_pairs.append({
'price': Product_Scraper.parse_cost_cardmarket(cost_text = price_text) * eur_to_gbp_rate
, 'quantity': Product_Scraper.parse_cost_cardmarket(cost_text = quantity_text)
})
except Exception as e:
print(f" Price selector failed: {e}")
# await ainput("Press enter to continue to next URL...")
return []
finally:
await self.page.close()
return price_quantity_pairs

View File

@@ -0,0 +1,33 @@
[Unit]
Description=TCG Sole Trader Price Scraper
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
User=teddy
WorkingDirectory=/media/teddy/3_6GB-SSD-Storage/Documents/Programming/python/Yu-Gi-Oh_Card_Fetcher/product_scraping
ExecStart=/media/teddy/3_6GB-SSD-Storage/Documents/Programming/python/Yu-Gi-Oh_Card_Fetcher/env_api/bin/python3 product_scraper.py
# Email credentials — fill these in before installing
Environment=EMAIL_SENDER=bot@partsltd.co.uk
Environment=EMAIL_PASSWORD=@sJt1b5Cliu6
Environment=EMAIL_RECIPIENT=teddy@shuffleandskirmish.co.uk
# ,teddy@partsltd.co.uk,edward.middletonsmith@gmail.com
# Optional overrides (defaults shown):
Environment=SMTP_HOST=mail.partsltd.co.uk
Environment=SMTP_PORT=465
# Restart automatically if it crashes, but not if you stop it intentionally
Restart=on-failure
RestartSec=30
# Give the browser enough time to shut down cleanly on stop
TimeoutStopSec=30
# Send stdout/stderr to the journal (view with: journalctl -fu tcg-scraper)
StandardOutput=journal
StandardError=journal
[Install]
WantedBy=multi-user.target

View File

@@ -13,4 +13,34 @@ xlsxwriter
# selenium
# undetected_chromedriver
playwright
aioconsole
aioconsole
""" Call
EMAIL_SENDER=bot@partsltd.co.uk \
EMAIL_PASSWORD=@sJt1b5Cliu6 \
EMAIL_RECIPIENT=teddy@shuffleandskirmish.co.uk \
python3 product_scraper.py
One-time setup (edit credentials first):
# 1. Fill in your email details in the service file
nano product_scraping/tcg-scraper.service
# 2. Copy to systemd and enable
sudo cp product_scraping/tcg-scraper.service /etc/systemd/system/
sudo systemctl daemon-reload
sudo systemctl enable tcg-scraper # auto-start on boot
sudo systemctl start tcg-scraper
Day-to-day controls:
sudo systemctl stop tcg-scraper # stop immediately
sudo systemctl start tcg-scraper # start again
sudo systemctl restart tcg-scraper # restart (e.g. after editing the .py)
sudo systemctl status tcg-scraper # is it running? last exit code?
journalctl -fu tcg-scraper # live log tail (Ctrl+C to exit)
journalctl -u tcg-scraper --since today # today's log
"""