Initial commit.

This commit is contained in:
2026-01-28 22:05:35 +00:00
parent c4a0928edd
commit d72d9c705b
45 changed files with 3517 additions and 0 deletions

View File

@@ -0,0 +1,316 @@
import pandas as pd
import psycopg2
from psycopg2 import sql
from datetime import datetime
import numpy as np
import sys
# Database connection configuration
DB_CONFIG = {
'dbname': 'tcg',
'user': 'postgres',
'password': 'lick_pubes',
'host': 'localhost',
'port': '5432'
}
# Version history configuration
DEFAULT_USER_ID = 3
DEFAULT_CHANGE_SET_ID = 1
def clean_numeric(value):
"""Clean numeric values, handling various formats"""
if pd.isna(value) or value == '':
return None
if isinstance(value, str):
# Remove currency symbols, commas, and spaces
value = value.replace('£', '').replace('$', '').replace(',', '').strip()
try:
return float(value) if value else None
except:
return None
def clean_boolean(value):
"""Convert various boolean representations to Python boolean"""
if pd.isna(value):
return False
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.lower() in ['yes', 'true', '1', 'y']
return bool(value)
def clean_datetime(value):
"""Convert various date formats to datetime"""
if pd.isna(value) or value == '':
return None
try:
return pd.to_datetime(value) # , dayfirst=True)
except:
return None
def clean_string(value, max_length=None):
"""Clean string values"""
if pd.isna(value) or value == '':
return None
value = str(value).strip()
if max_length and len(value) > max_length:
value = value[:max_length]
return value
def process_spreadsheet(file_path, connection):
"""Process the spreadsheet and insert records into database"""
# Read the Excel file
df = pd.read_excel(file_path, sheet_name='MTG Card Dictionary')
"""
pd.set_option('display.max_columns', None)
print('dataframe')
print(df)
"""
cursor = connection.cursor()
inserted_count = 0
skipped_count = 0
for index, row in df.iterrows():
try:
# print(index, row)
# Prepare data for insertion
data = {
'all_parts': clean_string(row['all_parts']),
'arena_id': clean_numeric(row['arena_id']),
'artist': clean_string(row['artist']),
'artist_ids': clean_string(row['artist_ids'], 250),
'booster': clean_boolean(row['booster']),
'border_color': clean_string(row['border_color']),
'card_back_id': clean_string(row['card_back_id'], 250),
'card_faces': clean_string(row['card_faces']),
'cardmarket_id': clean_numeric(row['cardmarket_id']),
'cmc': clean_numeric(row['cmc']),
'collector_number': clean_string(row['collector_number']),
'color_identity': clean_string(row['color_identity']),
'colors': clean_string(row['colors']),
'content_warning': clean_boolean(row['content_warning']),
'digital': clean_boolean(row['digital']),
'edhrec_rank': clean_numeric(row['edhrec_rank']),
'finishes': clean_string(row['finishes']),
'flavor_name': clean_string(row['flavor_name']),
'flavor_text': clean_string(row['flavor_text']),
'foil': clean_boolean(row['foil']),
'frame': clean_string(row['frame']),
'full_art': clean_boolean(row['full_art']),
'game_changer': clean_boolean(row['game_changer']),
'games': clean_string(row['games']),
'hand_modifier': clean_string(row['hand_modifier']),
'highres_image': clean_boolean(row['highres_image']),
'id': clean_string(row['id'], 100),
'image_status': clean_string(row['image_status']),
'image_uri_art_crop': clean_string(row['image_uri_art_crop']),
'image_uri_border_crop': clean_string(row['image_uri_border_crop']),
'image_uri_large': clean_string(row['image_uri_large']),
'image_uri_normal': clean_string(row['image_uri_normal']),
'image_uri_png': clean_string(row['image_uri_png']),
'image_uri_small': clean_string(row['image_uri_small']),
'keywords': clean_string(row['keywords']),
'lang': clean_string(row['lang']),
'layout': clean_string(row['layout']),
'legal_alchemy': clean_boolean(row['legal_alchemy']),
'legal_brawl': clean_boolean(row['legal_brawl']),
'legal_commander': clean_boolean(row['legal_commander']),
'legal_duel': clean_boolean(row['legal_duel']),
'legal_future': clean_boolean(row['legal_future']),
'legal_gladiator': clean_boolean(row['legal_gladiator']),
'legal_historic': clean_boolean(row['legal_historic']),
'legal_legacy': clean_boolean(row['legal_legacy']),
'legal_modern': clean_boolean(row['legal_modern']),
'legal_oathbreaker': clean_boolean(row['legal_oathbreaker']),
'legal_oldschool': clean_boolean(row['legal_oldschool']),
'legal_pauper': clean_boolean(row['legal_pauper']),
'legal_paupercommander': clean_boolean(row['legal_paupercommander']),
'legal_penny': clean_boolean(row['legal_penny']),
'legal_pioneer': clean_boolean(row['legal_pioneer']),
'legal_predh': clean_boolean(row['legal_predh']),
'legal_premodern': clean_boolean(row['legal_premodern']),
'legal_standard': clean_boolean(row['legal_standard']),
'legal_standardbrawl': clean_boolean(row['legal_standardbrawl']),
'legal_timeless': clean_boolean(row['legal_timeless']),
'legal_vintage': clean_boolean(row['legal_vintage']),
'life_modifier': clean_string(row['life_modifier']),
'loyalty': clean_string(row['loyalty']),
'mana_cost': clean_string(row['mana_cost']),
'mtgo_id': clean_numeric(row['mtgo_id']),
'multiverse_ids': clean_string(row['multiverse_ids'], 250),
'name': clean_string(row['name']),
'nonfoil': clean_boolean(row['nonfoil']),
'oracle_id': clean_string(row['oracle_id'], 100),
'oracle_text': clean_string(row['oracle_text']),
'oversized': clean_boolean(row['oversized']),
'penny_rank': clean_numeric(row['penny_rank']),
'power': clean_string(row['power']),
'preview_date': clean_datetime(row['preview_date']),
'preview_source': clean_string(row['preview_source']),
'preview_source_uri': clean_string(row['preview_source_uri']),
'price_eur': clean_numeric(row['price_eur']),
'price_eur_foil': clean_numeric(row['price_eur_foil']),
'price_tix': clean_numeric(row['price_tix']),
'price_usd': clean_numeric(row['price_usd']),
'price_usd_etched': clean_numeric(row['price_usd_etched']),
'price_usd_foil': clean_numeric(row['price_usd_foil']),
'printed_name': clean_string(row['printed_name']),
'printed_text': clean_string(row['printed_text']),
'printed_type_line': clean_string(row['printed_type_line']),
'prints_search_uri': clean_string(row['prints_search_uri']),
'produced_mana': clean_string(row['produced_mana']),
'promo': clean_boolean(row['promo']),
'purchase_cardhoarder': clean_string(row['purchase_cardhoarder']),
'purchase_cardmarket': clean_string(row['purchase_cardmarket']),
'purchase_tcgplayer': clean_string(row['purchase_tcgplayer']),
'rarity': clean_string(row['rarity']),
'released_at': clean_datetime(row['released_at']),
'reprint': clean_boolean(row['reprint']),
'reserved': clean_boolean(row['reserved']),
'rulings_uri': clean_string(row['rulings_uri']),
'scryfall_set_uri': clean_string(row['scryfall_set_uri']),
'scryfall_uri': clean_string(row['scryfall_uri']),
'security_stamp': clean_string(row['security_stamp']),
'set': clean_string(row['set']),
'set_id': clean_string(row['set_id'], 100),
'set_name': clean_string(row['set_name']),
'set_search_uri': clean_string(row['set_search_uri']),
'set_type': clean_string(row['set_type']),
'set_uri': clean_string(row['set_uri']),
'story_spotlight': clean_boolean(row['story_spotlight']),
'tcgplayer_id': clean_numeric(row['tcgplayer_id']),
'textless': clean_boolean(row['textless']),
'toughness': clean_string(row['toughness']),
'type_line': clean_string(row['type_line']),
'uri': clean_string(row['uri']),
'uri_edhrec': clean_string(row['uri_edhrec']),
'uri_gatherer': clean_string(row['uri_gatherer']),
'uri_tcgplayer_infinite_articles': clean_string(row['uri_tcgplayer_infinite_articles']),
'uri_tcgplayer_infinite_decks': clean_string(row['uri_tcgplayer_infinite_decks']),
'variation': clean_boolean(row['variation']),
'active': True,
'created_on': 'NOW()', # You might want to use database function or Python datetime
'created_by_user_id': DEFAULT_USER_ID,
'updated_last_on': 'NOW()',
'updated_last_by_user_id': DEFAULT_USER_ID,
'change_set_id': DEFAULT_CHANGE_SET_ID
}
# Build the insert query
insert_query = """
INSERT INTO tcg.public.TCG_MTG_Card (
all_parts, arena_id, artist, artist_ids, booster, border_color, card_back_id,
card_faces, cardmarket_id, cmc, collector_number, color_identity, colors,
content_warning, digital, edhrec_rank, finishes, flavor_name, flavor_text,
foil, frame, full_art, game_changer, games, hand_modifier, highres_image,
id, image_status, image_uri_art_crop, image_uri_border_crop, image_uri_large,
image_uri_normal, image_uri_png, image_uri_small, keywords, lang, layout,
legal_alchemy, legal_brawl, legal_commander, legal_duel, legal_future,
legal_gladiator, legal_historic, legal_legacy, legal_modern, legal_oathbreaker,
legal_oldschool, legal_pauper, legal_paupercommander, legal_penny, legal_pioneer,
legal_predh, legal_premodern, legal_standard, legal_standardbrawl, legal_timeless,
legal_vintage, life_modifier, loyalty, mana_cost, mtgo_id, multiverse_ids,
name, nonfoil, oracle_id, oracle_text, oversized, penny_rank, power,
preview_date, preview_source, preview_source_uri, price_eur, price_eur_foil,
price_tix, price_usd, price_usd_etched, price_usd_foil, printed_name,
printed_text, printed_type_line, prints_search_uri, produced_mana, promo,
purchase_cardhoarder, purchase_cardmarket, purchase_tcgplayer, rarity,
released_at, reprint, reserved, rulings_uri, scryfall_set_uri, scryfall_uri,
security_stamp, set, set_id, set_name, set_search_uri, set_type, set_uri,
story_spotlight, tcgplayer_id, textless, toughness, type_line, uri,
uri_edhrec, uri_gatherer, uri_tcgplayer_infinite_articles,
uri_tcgplayer_infinite_decks, variation, active, created_on, created_by_user_id,
updated_last_on, updated_last_by_user_id, change_set_id
) VALUES (
%(all_parts)s, %(arena_id)s, %(artist)s, %(artist_ids)s, %(booster)s, %(border_color)s, %(card_back_id)s,
%(card_faces)s, %(cardmarket_id)s, %(cmc)s, %(collector_number)s, %(color_identity)s, %(colors)s,
%(content_warning)s, %(digital)s, %(edhrec_rank)s, %(finishes)s, %(flavor_name)s, %(flavor_text)s,
%(foil)s, %(frame)s, %(full_art)s, %(game_changer)s, %(games)s, %(hand_modifier)s, %(highres_image)s,
%(id)s, %(image_status)s, %(image_uri_art_crop)s, %(image_uri_border_crop)s, %(image_uri_large)s,
%(image_uri_normal)s, %(image_uri_png)s, %(image_uri_small)s, %(keywords)s, %(lang)s, %(layout)s,
%(legal_alchemy)s, %(legal_brawl)s, %(legal_commander)s, %(legal_duel)s, %(legal_future)s,
%(legal_gladiator)s, %(legal_historic)s, %(legal_legacy)s, %(legal_modern)s, %(legal_oathbreaker)s,
%(legal_oldschool)s, %(legal_pauper)s, %(legal_paupercommander)s, %(legal_penny)s, %(legal_pioneer)s,
%(legal_predh)s, %(legal_premodern)s, %(legal_standard)s, %(legal_standardbrawl)s, %(legal_timeless)s,
%(legal_vintage)s, %(life_modifier)s, %(loyalty)s, %(mana_cost)s, %(mtgo_id)s, %(multiverse_ids)s,
%(name)s, %(nonfoil)s, %(oracle_id)s, %(oracle_text)s, %(oversized)s, %(penny_rank)s, %(power)s,
%(preview_date)s, %(preview_source)s, %(preview_source_uri)s, %(price_eur)s, %(price_eur_foil)s,
%(price_tix)s, %(price_usd)s, %(price_usd_etched)s, %(price_usd_foil)s, %(printed_name)s,
%(printed_text)s, %(printed_type_line)s, %(prints_search_uri)s, %(produced_mana)s, %(promo)s,
%(purchase_cardhoarder)s, %(purchase_cardmarket)s, %(purchase_tcgplayer)s, %(rarity)s,
%(released_at)s, %(reprint)s, %(reserved)s, %(rulings_uri)s, %(scryfall_set_uri)s, %(scryfall_uri)s,
%(security_stamp)s, %(set)s, %(set_id)s, %(set_name)s, %(set_search_uri)s, %(set_type)s, %(set_uri)s,
%(story_spotlight)s, %(tcgplayer_id)s, %(textless)s, %(toughness)s, %(type_line)s, %(uri)s,
%(uri_edhrec)s, %(uri_gatherer)s, %(uri_tcgplayer_infinite_articles)s,
%(uri_tcgplayer_infinite_decks)s, %(variation)s, %(active)s, %(created_on)s, %(created_by_user_id)s,
%(updated_last_on)s, %(updated_last_by_user_id)s, %(change_set_id)s
)
"""
cursor.execute(insert_query, data)
inserted_count += 1
if (index + 1) % 500 == 0:
print(f"Processed {index + 1} rows...")
try:
connection.commit()
except psycopg2.Error as e:
print(f'error: {e}')
connection.rollback()
except Exception as e:
print(f"Error processing row {index + 2}: {str(e)}")
skipped_count += 1
connection.rollback()
continue
# Final commit
try:
connection.commit()
except psycopg2.Error as e:
print(f'error: {e}')
connection.rollback()
print(f"\nImport completed:")
print(f"- Records inserted: {inserted_count}")
print(f"- Records skipped: {skipped_count}")
cursor.close()
def main():
"""Main function to run the import"""
file_path = '/home/teddy/Downloads/Trading Cards.xlsx'
# Connect to database
try:
print("Connecting to database...")
connection = psycopg2.connect(**DB_CONFIG)
print("Connected successfully!")
# Process the spreadsheet
print(f"\nProcessing {file_path}...")
process_spreadsheet(file_path, connection)
except Exception as e:
print(f"Error: {str(e)}")
sys.exit(1)
finally:
if 'connection' in locals() and connection:
connection.close()
print("\nDatabase connection closed.")
if __name__ == "__main__":
# Configuration notes:
# 1. Update DB_CONFIG with your actual database credentials
# 2. Set DEFAULT_USER_ID to your user_id in the system
# 3. Ensure TCG_MTG_Card table has records with matching Key IDs
# 4. Ensure TCG_MTG_Finish and TCG_Condition tables have the referenced IDs
main()

View File

@@ -0,0 +1,235 @@
import pandas as pd
import psycopg2
from psycopg2 import sql
from datetime import datetime
import numpy as np
import sys
# Database connection configuration
DB_CONFIG = {
'dbname': 'tcg',
'user': 'postgres',
'password': 'lick_pubes',
'host': 'localhost',
'port': '5432'
}
# Version history configuration
DEFAULT_USER_ID = 3
DEFAULT_CHANGE_SET_ID = 1
def clean_numeric(value):
"""Clean numeric values, handling various formats"""
if pd.isna(value) or value == '':
return None
if isinstance(value, str):
# Remove currency symbols, commas, and spaces
value = value.replace('£', '').replace('$', '').replace(',', '').strip()
try:
return float(value) if value else None
except:
return None
def clean_boolean(value):
"""Convert various boolean representations to Python boolean"""
if pd.isna(value):
return False
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.lower() in ['yes', 'true', '1', 'y']
return bool(value)
def clean_datetime(value):
"""Convert various date formats to datetime"""
if pd.isna(value) or value == '':
return None
try:
return pd.to_datetime(value, dayfirst=True)
except:
return None
def clean_string(value, max_length=None):
"""Clean string values"""
if pd.isna(value) or value == '':
return None
value = str(value).strip()
if max_length and len(value) > max_length:
value = value[:max_length]
return value
"""
def get_or_create_card_id(cursor, key_id):
"" "Get card_id from TCG_MTG_Card table based on key_id, or create if needed"" "
# First check if card exists
cursor.execute("" "
SELECT card_id FROM tcg.public.TCG_MTG_Card
WHERE key_id = %s AND active = TRUE
"" ", (key_id,))
result = cursor.fetchone()
if result:
return result[0]
else:
# You might need to adjust this based on your card creation logic
print(f"Warning: No card found for key_id {key_id}. You may need to create the card record first.")
return None
"""
def process_spreadsheet(file_path, connection, minimum_row_number = 1):
"""Process the spreadsheet and insert records into database"""
# Read the Excel file
df = pd.read_excel(file_path, sheet_name='MTG Cards')
pd.set_option('display.max_columns', None)
print('dataframe')
print(df)
cursor = connection.cursor()
inserted_count = 0
skipped_count = 0
for index, row in df.iterrows():
if (index + 1 < minimum_row_number):
continue
if (clean_string(row['Name']) == 'Double-Faced Substitute Card'):
continue
try:
# print(index, row)
# Prepare data for insertion
data = {
'finish_id': int(row['Finish Id']) # if pd.notna(row['Finish Id']) else 1, # Default to 1 if missing
, 'condition_id': int(row['Condition Id']) # if pd.notna(row['Condition Id']) else 1, # Default to 1 if missing
, 'sleeve_colour_name': clean_string(row['Sleeve'], 250)
, 'location_name': clean_string(row['Location'], 250)
, 'acquired_from': clean_string(row['Acquired From'], 500)
, 'acquired_on': clean_datetime(row['Acquired On'])
, 'cost_gbp': clean_numeric(row['Cost']) # or 0.00, # Default to 0 if missing
, 'sale_price_gbp': clean_numeric(row['Sale Price'])
, 'is_sold': clean_boolean(row['Sold?'])
, 'is_destroyed': clean_boolean(row['Destroyed?'])
, 'notes': clean_string(row['Notes'])
, 'alterations_customisations': clean_string(row['Alterations / Customisations'], 250)
, 'grading_company_name': clean_string(row['Grading Company'], 250)
, 'grading_score': clean_string(row['Grading Score'], 250)
, 'subgrades': clean_string(row['Subgrades'], 250)
, 'misprint_errors': clean_string(row['Misprint Errors'], 250)
, 'miscut_errors': clean_string(row['Miscut Status'], 250)
, 'playability': clean_string(row['Playability'], 250)
, 'owner_user_id': DEFAULT_USER_ID # Using configured user ID
, 'ownership_status_name': clean_string(row['Ownership Status'], 250)
, 'trading_status_name': clean_string(row['Trading Status'], 250)
, 'loaned_to_user_id': None # You'll need to map "Loaned To" to user_id if needed
, 'loan_start_on': clean_datetime(row['Loan Start Date'])
, 'loan_end_on': clean_datetime(row['Loan End Date'])
, 'provenance': clean_string(row['Provenance - e.g. special history, tournaments used in'])
, 'signed_by_names': clean_string(row['Signed By'])
, 'signature_condition_name': clean_string(row['Signature Condition'], 250)
, 'active': True
, 'created_by_user_id': DEFAULT_USER_ID
, 'updated_last_by_user_id': DEFAULT_USER_ID
, 'change_set_id': DEFAULT_CHANGE_SET_ID
, 'set_code': clean_string(row['Set Code'])
, 'collector_number': clean_string(row['Collector Number'])
, 'display_order': int(row['ID'])
, 'name': clean_string(row['Name'])
, 'is_token': clean_boolean(row['Is Token?'])
, 'token_rear_side_name': clean_string(row['Token Rear Side Card Name'])
, 'token_rear_side_set_code': clean_string(row['Token Rear Side Card Set'])
, 'token_rear_side_collector_number': clean_string(row['Token Rear Side Card Collector Number'])
}
# Build the insert query
insert_query = """
INSERT INTO tcg.public.TCG_MTG_Inventory_Temp (
finish_id, condition_id, sleeve_colour_name, location_name,
acquired_from, acquired_on, cost_gbp, sale_price_gbp, is_sold,
is_destroyed, notes, alterations_customisations, grading_company_name,
grading_score, subgrades, misprint_errors, miscut_errors, playability,
owner_user_id, ownership_status_name, trading_status_name, loaned_to_user_id,
loan_start_on, loan_end_on, provenance, signed_by_names,
signature_condition_name, active, created_by_user_id,
updated_last_by_user_id, change_set_id,
set_code, collector_number, display_order, name,
is_token, token_rear_side_name, token_rear_side_set_code, token_rear_side_collector_number
) VALUES (
%(finish_id)s, %(condition_id)s, %(sleeve_colour_name)s, %(location_name)s,
%(acquired_from)s, %(acquired_on)s, %(cost_gbp)s, %(sale_price_gbp)s, %(is_sold)s,
%(is_destroyed)s, %(notes)s, %(alterations_customisations)s, %(grading_company_name)s,
%(grading_score)s, %(subgrades)s, %(misprint_errors)s, %(miscut_errors)s, %(playability)s,
%(owner_user_id)s, %(ownership_status_name)s, %(trading_status_name)s, %(loaned_to_user_id)s,
%(loan_start_on)s, %(loan_end_on)s, %(provenance)s, %(signed_by_names)s,
%(signature_condition_name)s, %(active)s, %(created_by_user_id)s,
%(updated_last_by_user_id)s, %(change_set_id)s,
%(set_code)s, %(collector_number)s, %(display_order)s, %(name)s,
%(is_token)s, %(token_rear_side_name)s, %(token_rear_side_set_code)s, %(token_rear_side_collector_number)s
)
"""
cursor.execute(insert_query, data)
inserted_count += 1
if (index + 1) % 100 == 0:
print(f"Processed {index + 1} rows...")
try:
connection.commit()
except psycopg2.Error as e:
print(f'error: {e}')
connection.rollback()
except Exception as e:
print(f"Error processing row {index + 2}: {str(e)}")
skipped_count += 1
connection.rollback()
continue
# Final commit
try:
connection.commit()
except psycopg2.Error as e:
print(f'error: {e}')
connection.rollback()
print(f"\nImport completed:")
print(f"- Records inserted: {inserted_count}")
print(f"- Records skipped: {skipped_count}")
cursor.close()
def main():
"""Main function to run the import"""
# file_path = '/home/teddy/Documents/Lifestyle/Trading Cards/Trading Cards upload.xlsx'
file_path = '/media/teddy/3_6GB-SSD-Storage/Documents/Lifestyle/Trading Cards/Trading Cards upload.xlsx'
minimum_row_number = 394 # ROW NUMBER - 1, or Inventory ID
# Connect to database
try:
print("Connecting to database...")
connection = psycopg2.connect(**DB_CONFIG)
print("Connected successfully!")
# Process the spreadsheet
print(f"\nProcessing {file_path}...")
process_spreadsheet(file_path, connection, minimum_row_number)
except Exception as e:
print(f"Error: {str(e)}")
sys.exit(1)
finally:
if 'connection' in locals() and connection:
connection.close()
print("\nDatabase connection closed.")
if __name__ == "__main__":
# Configuration notes:
# 1. Update DB_CONFIG with your actual database credentials
# 2. Set DEFAULT_USER_ID to your user_id in the system
# 3. Ensure TCG_MTG_Card table has records with matching Key IDs
# 4. Ensure TCG_MTG_Finish and TCG_Condition tables have the referenced IDs
main()

View File

@@ -0,0 +1,234 @@
import pandas as pd
from openpyxl import load_workbook
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import re
import time
ITEM_SHIPPING_COST_IN = 8
def get_eur_to_gbp_rate():
"""Fetch current EUR to GBP conversion rate"""
try:
response = requests.get('https://api.exchangerate-api.com/v4/latest/EUR', timeout=10)
data = response.json()
return data['rates']['GBP']
except Exception as e:
print(f"Error fetching exchange rate: {e}")
print("Using fallback rate: 0.85")
return 0.85
def parse_cardmarket_price(price_text):
"""Convert '141,30 €' format to float in EUR"""
if not price_text:
return None
price_clean = re.sub(r'[^\d,]', '', price_text)
price_clean = price_clean.replace(',', '.')
try:
return float(price_clean)
except ValueError:
return None
def setup_driver():
"""Setup Chrome driver with visible window"""
chrome_options = Options()
# Remove headless mode to see the browser
# chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
chrome_options.add_argument('--window-size=1920,1080')
try:
driver = webdriver.Chrome(options=chrome_options)
return driver
except Exception as e:
print(f"Error setting up Chrome driver: {e}")
print("Make sure Chrome and chromedriver are installed")
return None
def scrape_cardmarket_price_selenium(driver, url):
"""Scrape price from Card Market URL using Selenium"""
try:
print(f" Loading page...")
driver.get(url)
# Wait for page to load
time.sleep(3)
print(f" Page title: {driver.title}")
# Try multiple selector strategies
selectors = [
# Original selector
'#table > div:nth-child(1) > div.table-body > .row.article-row.g-0:nth-child(1) > div.col-offer > div.price-container > div > div:nth-child(1) > span:nth-child(1)',
# Simpler selectors
#'.article-row .price-container span',
#'span.price',
#'div.price span',
#'.font-weight-bold.color-primary',
]
for i, selector in enumerate(selectors):
try:
elements = driver.find_elements(By.CSS_SELECTOR, selector)
print(f" Selector {i+1}: Found {len(elements)} elements")
for elem in elements[:3]: # Check first 3
text = elem.text
print(f" Text: '{text}'")
if '' in text and re.search(r'\d', text):
print(f" ✓ Found price with selector {i+1}: {text}")
# input("Confirm")
return text
except Exception as e:
print(f" Selector {i+1} failed: {e}")
"""
# Try finding by text pattern
print(f" Trying to find price by text pattern...")
all_elements = driver.find_elements(By.XPATH, "//*[contains(text(), '')]")
print(f" Found {len(all_elements)} elements containing ''")
for elem in all_elements[:10]: # Check first 10
text = elem.text
if re.search(r'\d+[,\.]\d+\s*€', text):
print(f" ✓ Found price by pattern: {text}")
return text
"""
print(f" ✗ No price found")
input("Press Enter to continue to next URL...")
return None
except Exception as e:
print(f" Error: {e}")
return None
def main():
workbook_name = 'TCG Sole Trader Copy.xlsx'
sheet_name = 'Sourcing'
print("Loading workbook...")
wb = load_workbook(workbook_name)
if sheet_name not in wb.sheetnames:
print(f"Error: Sheet '{sheet_name}' not found")
return
sheet = wb[sheet_name]
# Find table boundaries
table_found = False
start_row = None
for row in range(1, sheet.max_row + 1):
if sheet.cell(row, 1).value == 'tbl_Sourcing' or 'Source Name' in str(sheet.cell(row, 3).value):
start_row = row + 1
table_found = True
break
if not table_found or not start_row:
for row in range(1, min(20, sheet.max_row + 1)):
if 'Source Name' in str(sheet.cell(row, 3).value):
start_row = row + 1
table_found = True
break
if not table_found:
print("Error: Could not find table 'tbl_Sourcing' or 'Source Name' column")
return
# Find column indices
header_row = start_row - 1
source_name_col = None
source_link_col = None
source_unit_cost_col = None
for col in range(1, sheet.max_column + 1):
header = str(sheet.cell(header_row, col).value).strip()
if 'Source Name' in header:
source_name_col = col
elif 'Source Link' in header:
source_link_col = col
elif 'Source Unit Cost' in header:
source_unit_cost_col = col
if not source_name_col:
source_name_col = 3
print(f"Starting from row {start_row}")
print(f"Columns - Source Name: {source_name_col}, Source Link: {source_link_col}, Source Unit Cost: {source_unit_cost_col}")
if not all([source_link_col, source_unit_cost_col]):
print("Error: Could not find required columns")
return
# Get EUR to GBP rate
eur_to_gbp = get_eur_to_gbp_rate()
print(f"Using EUR to GBP rate: {eur_to_gbp}")
# Setup Selenium driver
print("Setting up browser automation (browser will be visible)...")
driver = setup_driver()
if not driver:
return
try:
# Process rows
processed_count = 0
updated_count = 0
for row in range(start_row, sheet.max_row + 1):
source_name = sheet.cell(row, source_name_col).value
source_link = sheet.cell(row, source_link_col).value
# Check if row is empty
if not source_name and not source_link:
break
# Check conditions
if source_name == "Card Market" and source_link and str(source_link).strip():
processed_count += 1
print(f"\n{'='*60}")
print(f"Processing row {row}: {source_link}")
print(f"{'='*60}")
# Scrape price
price_text = scrape_cardmarket_price_selenium(driver, source_link)
if price_text:
print(f" Found price: {price_text}")
# Parse and convert
eur_price = parse_cardmarket_price(price_text)
if eur_price:
gbp_price = eur_price * eur_to_gbp
print(f" Converted: €{eur_price:.2f} → £{gbp_price:.2f}")
# Update cell
sheet.cell(row, source_unit_cost_col).value = gbp_price + ITEM_SHIPPING_COST_IN
updated_count += 1
else:
print(f" Error: Could not parse price")
else:
print(f" Error: Could not find price on page")
# Save workbook
print(f"\n{'='*60}")
print(f"Saving workbook...")
wb.save(workbook_name)
print(f"\nComplete!")
print(f"Processed: {processed_count} Card Market entries")
print(f"Updated: {updated_count} prices")
finally:
driver.quit()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,248 @@
import pandas as pd
from openpyxl import load_workbook
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import re
import time
def setup_driver(headless=True):
"""Setup Chrome driver"""
chrome_options = Options()
if headless:
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36')
chrome_options.add_argument('--window-size=1920,1080')
try:
driver = webdriver.Chrome(options=chrome_options)
return driver
except Exception as e:
print(f"Error setting up Chrome driver: {e}")
print("Make sure Chrome and chromedriver are installed")
return None
def parse_price_value(text):
"""Extract numeric value from price string like '$5.50' or '€5,50'"""
if not text:
return None
# Remove currency symbols and extract numbers
cleaned = re.sub(r'[^\d,.\-]', '', text)
# Replace comma with period for decimal
cleaned = cleaned.replace(',', '.')
try:
return float(cleaned)
except ValueError:
return None
def scrape_mtg_stocks_values(driver, url):
"""Scrape expected value and market value from MTG Stocks"""
try:
print(f" Loading page...")
driver.get(url)
# Wait for table to load
time.sleep(3)
# Valid booster types to match
valid_booster_types = [
'Play Booster',
'Set Booster',
'Booster',
'Play Booster Pack',
'Set Booster Pack',
'Booster Pack'
]
# Find all rows in the table
row_selector = 'mtg-sets-expected-value > mtg-product-tree > .table-responsive > table > tbody:nth-child(2) > tr'
rows = driver.find_elements(By.CSS_SELECTOR, row_selector)
print(f" Found {len(rows)} rows in table")
for row in rows:
try:
# Get the booster type from first column
booster_type_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(1) > div.d-flex.align-items-center:nth-child(1) > a:nth-child(2)')
booster_type = booster_type_elem.text.strip()
print(f" Checking row: '{booster_type}'")
# Check if this matches our valid types
if booster_type in valid_booster_types:
print(f" ✓ Match found: '{booster_type}'")
# Get expected value (3rd column)
expected_value_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(3)')
expected_value_text = expected_value_elem.text.strip()
# Get market value (5th column)
market_value_elem = row.find_element(By.CSS_SELECTOR, 'td:nth-child(5)')
market_value_text = market_value_elem.text.strip()
print(f" Expected Value: '{expected_value_text}'")
print(f" Market Value: '{market_value_text}'")
# Parse values
expected_value = parse_price_value(expected_value_text)
market_value = parse_price_value(market_value_text)
return {
'expected_value': expected_value,
'market_value': market_value,
'found': True
}
except Exception as e:
# Row doesn't match structure, continue to next
continue
print(f" ✗ No matching booster type found")
return {
'expected_value': None,
'market_value': None,
'found': False
}
except Exception as e:
print(f" Error: {e}")
return {
'expected_value': None,
'market_value': None,
'found': False
}
def main():
workbook_name = 'TCG Sole Trader Copy.xlsx'
sheet_name = 'MTG Set'
print("Loading workbook...")
wb = load_workbook(workbook_name)
if sheet_name not in wb.sheetnames:
print(f"Error: Sheet '{sheet_name}' not found")
return
sheet = wb[sheet_name]
# Find table boundaries and columns
table_found = False
start_row = None
header_row = None
# Search for table header
print("max sheet column: ", str(sheet.max_column))
for row in range(2, max(50, sheet.max_row + 1)):
cell_value = str(sheet.cell(row, 1).value)
# Check multiple columns for table indicators
for col in range(1, max(10, sheet.max_column + 1)):
cell_value = str(sheet.cell(row, col).value)
if 'EV MTG Stocks Link' in cell_value:
header_row = row
start_row = row + 1
table_found = True
break
if table_found:
break
if not table_found:
print("Error: Could not find 'EV MTG Stocks Link' column")
return
print(f"Found table header at row {header_row}")
print(f"Starting from row {start_row}")
# Find column indices
ev_link_col = None
expected_value_col = None
market_value_col = None
for col in range(1, sheet.max_column + 1):
header = str(sheet.cell(header_row, col).value).strip()
if 'EV MTG Stocks Link' in header:
ev_link_col = col
elif 'Play Booster Expected Market Value' in header:
expected_value_col = col
elif 'Play Boost Sealed Market Value' in header:
market_value_col = col
print(f"Columns - EV Link: {ev_link_col}, Expected Value: {expected_value_col}, Market Value: {market_value_col}")
if not all([ev_link_col, expected_value_col, market_value_col]):
print("Error: Could not find all required columns")
print(f" EV MTG Stocks Link: {'Found' if ev_link_col else 'NOT FOUND'}")
print(f" Play Booster Expected Market Value: {'Found' if expected_value_col else 'NOT FOUND'}")
print(f" Play Boost Sealed Market Value: {'Found' if market_value_col else 'NOT FOUND'}")
return
# Setup Selenium driver
print("Setting up browser automation...")
driver = setup_driver(headless=False) # Set to False to see browser
if not driver:
return
try:
# Process rows
processed_count = 0
updated_count = 0
cleared_count = 0
for row in range(start_row, sheet.max_row + 1):
ev_link = sheet.cell(row, ev_link_col).value
# Check if row is empty
if not ev_link:
# Check if we've passed the end of the table
empty_count = 0
for check_col in range(1, min(10, sheet.max_column + 1)):
if not sheet.cell(row, check_col).value:
empty_count += 1
if empty_count >= 5: # If most columns are empty, assume end of table
break
continue
processed_count += 1
print(f"\n{'='*80}")
print(f"Processing row {row}: {ev_link}")
print(f"{'='*80}")
# Scrape values
result = scrape_mtg_stocks_values(driver, ev_link)
if result['found']:
# Update cells with found values
sheet.cell(row, expected_value_col).value = result['expected_value']
sheet.cell(row, market_value_col).value = result['market_value']
updated_count += 1
print(f" ✓ Updated - Expected: {result['expected_value']}, Market: {result['market_value']}")
else:
# Clear cells - no matching booster type found
sheet.cell(row, expected_value_col).value = ''
sheet.cell(row, market_value_col).value = ''
cleared_count += 1
print(f" ✗ Cleared values - no matching booster type found")
# Small delay between requests
time.sleep(2)
# Save workbook
print(f"\n{'='*80}")
print(f"Saving workbook...")
wb.save(workbook_name)
print(f"\nComplete!")
print(f"Processed: {processed_count} entries")
print(f"Updated: {updated_count} entries")
print(f"Cleared: {cleared_count} entries (no matching data)")
finally:
driver.quit()
if __name__ == "__main__":
main()

11
python/requirements.txt Normal file
View File

@@ -0,0 +1,11 @@
# MTG Card Import
pandas
psycopg2
numpy
openpyxl
# MTG Booster Box Price CardMarket
requests
beautifulsoup4
selenium