Initial commit.
This commit is contained in:
351
mtg_card_fetcher.py
Normal file
351
mtg_card_fetcher.py
Normal file
@@ -0,0 +1,351 @@
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.utils import get_column_letter
|
||||
from openpyxl.styles import Font
|
||||
from datetime import datetime
|
||||
|
||||
def flatten_card_data(card):
|
||||
"""
|
||||
Flatten a single card's data structure into a dictionary suitable for Excel.
|
||||
Handles nested fields and converts lists to comma-separated strings.
|
||||
"""
|
||||
flat_card = {}
|
||||
|
||||
# Basic fields
|
||||
simple_fields = [
|
||||
'id', 'oracle_id', 'name', 'lang', 'released_at', 'uri', 'scryfall_uri',
|
||||
'layout', 'highres_image', 'image_status', 'mana_cost', 'cmc', 'type_line',
|
||||
'oracle_text', 'power', 'toughness', 'loyalty', 'life_modifier', 'hand_modifier',
|
||||
'reserved', 'foil', 'nonfoil', 'oversized', 'promo', 'reprint', 'variation',
|
||||
'set_id', 'set', 'set_name', 'set_type', 'set_uri', 'set_search_uri',
|
||||
'scryfall_set_uri', 'rulings_uri', 'prints_search_uri', 'collector_number',
|
||||
'digital', 'rarity', 'card_back_id', 'artist', 'border_color', 'frame',
|
||||
'full_art', 'textless', 'booster', 'story_spotlight', 'edhrec_rank',
|
||||
'penny_rank', 'flavor_text', 'watermark', 'printed_name', 'printed_type_line',
|
||||
'printed_text', 'security_stamp', 'preview_text', 'content_warning',
|
||||
'flavor_name', 'game_changer'
|
||||
]
|
||||
|
||||
# Copy simple fields
|
||||
for field in simple_fields:
|
||||
if field in card:
|
||||
flat_card[field] = card[field]
|
||||
|
||||
# Handle array fields - convert to comma-separated strings
|
||||
array_fields = [
|
||||
'multiverse_ids', 'colors', 'color_identity', 'keywords', 'produced_mana',
|
||||
'games', 'finishes', 'artist_ids', 'all_parts', 'card_faces', 'related_cards'
|
||||
]
|
||||
|
||||
for field in array_fields:
|
||||
if field in card:
|
||||
if isinstance(card[field], list):
|
||||
# Convert list items to strings and join
|
||||
flat_card[field] = ', '.join(str(item) for item in card[field])
|
||||
else:
|
||||
flat_card[field] = card[field]
|
||||
|
||||
# Handle MTGO and Arena IDs
|
||||
if 'mtgo_id' in card:
|
||||
flat_card['mtgo_id'] = card['mtgo_id']
|
||||
if 'arena_id' in card:
|
||||
flat_card['arena_id'] = card['arena_id']
|
||||
if 'tcgplayer_id' in card:
|
||||
flat_card['tcgplayer_id'] = card['tcgplayer_id']
|
||||
if 'cardmarket_id' in card:
|
||||
flat_card['cardmarket_id'] = card['cardmarket_id']
|
||||
|
||||
# Handle image_uris (nested dict)
|
||||
if 'image_uris' in card and isinstance(card['image_uris'], dict):
|
||||
for key, value in card['image_uris'].items():
|
||||
flat_card[f'image_uri_{key}'] = value
|
||||
|
||||
# Handle legalities (nested dict)
|
||||
if 'legalities' in card and isinstance(card['legalities'], dict):
|
||||
for format_name, status in card['legalities'].items():
|
||||
flat_card[f'legal_{format_name}'] = status
|
||||
|
||||
# Handle prices (nested dict)
|
||||
if 'prices' in card and isinstance(card['prices'], dict):
|
||||
for currency, price in card['prices'].items():
|
||||
flat_card[f'price_{currency}'] = price
|
||||
|
||||
# Handle related_uris (nested dict)
|
||||
if 'related_uris' in card and isinstance(card['related_uris'], dict):
|
||||
for uri_type, uri in card['related_uris'].items():
|
||||
flat_card[f'uri_{uri_type}'] = uri
|
||||
|
||||
# Handle purchase_uris (nested dict)
|
||||
if 'purchase_uris' in card and isinstance(card['purchase_uris'], dict):
|
||||
for store, uri in card['purchase_uris'].items():
|
||||
flat_card[f'purchase_{store}'] = uri
|
||||
|
||||
# Handle preview information
|
||||
if 'preview' in card and isinstance(card['preview'], dict):
|
||||
if 'source' in card['preview']:
|
||||
flat_card['preview_source'] = card['preview']['source']
|
||||
if 'source_uri' in card['preview']:
|
||||
flat_card['preview_source_uri'] = card['preview']['source_uri']
|
||||
if 'previewed_at' in card['preview']:
|
||||
flat_card['preview_date'] = card['preview']['previewed_at']
|
||||
|
||||
return flat_card
|
||||
|
||||
def detect_json_format(input_file):
|
||||
"""
|
||||
Detect if the file is NDJSON or JSON array format
|
||||
"""
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
# Read first few characters
|
||||
first_chars = f.read(100).strip()
|
||||
if first_chars.startswith('['):
|
||||
return 'array'
|
||||
elif first_chars.startswith('{'):
|
||||
return 'ndjson'
|
||||
else:
|
||||
# Try to read first line and detect
|
||||
f.seek(0)
|
||||
first_line = f.readline().strip()
|
||||
if first_line.startswith('[') or first_line == '[':
|
||||
return 'array'
|
||||
else:
|
||||
return 'ndjson'
|
||||
|
||||
def write_to_excel(cards_data, fieldnames, output_file):
|
||||
"""
|
||||
Write the card data to an Excel file with formatting
|
||||
"""
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws.title = "Magic Cards"
|
||||
|
||||
# Write headers with bold formatting
|
||||
header_font = Font(bold=True)
|
||||
for col, field in enumerate(fieldnames, 1):
|
||||
cell = ws.cell(row=1, column=col, value=field)
|
||||
cell.font = header_font
|
||||
|
||||
# Write data
|
||||
print("Writing to Excel file...")
|
||||
for row_num, card_data in enumerate(cards_data, 2):
|
||||
for col, field in enumerate(fieldnames, 1):
|
||||
value = card_data.get(field, '')
|
||||
# Excel has a cell character limit of 32,767
|
||||
if isinstance(value, str) and len(value) > 32767:
|
||||
value = value[:32764] + "..."
|
||||
ws.cell(row=row_num, column=col, value=value)
|
||||
|
||||
if row_num % 10000 == 0:
|
||||
print(f" Written {row_num - 1:,} cards...")
|
||||
|
||||
# Auto-adjust column widths (limited to prevent excessive widths)
|
||||
print("Adjusting column widths...")
|
||||
for column in ws.columns:
|
||||
max_length = 0
|
||||
column_letter = get_column_letter(column[0].column)
|
||||
|
||||
for cell in column[:100]: # Check first 100 rows for performance
|
||||
try:
|
||||
if cell.value:
|
||||
max_length = max(max_length, len(str(cell.value)))
|
||||
except:
|
||||
pass
|
||||
|
||||
adjusted_width = min(max_length + 2, 50) # Cap at 50 characters
|
||||
ws.column_dimensions[column_letter].width = adjusted_width
|
||||
|
||||
# Freeze the header row
|
||||
ws.freeze_panes = 'A2'
|
||||
|
||||
# Enable filters
|
||||
ws.auto_filter.ref = ws.dimensions
|
||||
|
||||
print("Saving Excel file...")
|
||||
wb.save(output_file)
|
||||
print(f"Saved {len(cards_data):,} cards to {output_file}")
|
||||
|
||||
def process_scryfall_array(input_file, output_file):
|
||||
"""
|
||||
Process a Scryfall JSON array file and convert to Excel.
|
||||
"""
|
||||
print(f"Processing {input_file} (JSON array format)...")
|
||||
print("Loading and parsing JSON data (this may take a minute for large files)...")
|
||||
|
||||
try:
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
print(f"Loaded {len(data):,} cards")
|
||||
|
||||
# Analyze structure
|
||||
print("Analyzing card structure...")
|
||||
all_fields = set()
|
||||
processed_cards = []
|
||||
|
||||
for i, card in enumerate(data):
|
||||
flat_card = flatten_card_data(card)
|
||||
all_fields.update(flat_card.keys())
|
||||
processed_cards.append(flat_card)
|
||||
if (i + 1) % 10000 == 0:
|
||||
print(f" Analyzed {i + 1:,} cards...")
|
||||
|
||||
print(f"Found {len(all_fields)} unique fields")
|
||||
fieldnames = sorted(list(all_fields))
|
||||
|
||||
# Write to Excel
|
||||
write_to_excel(processed_cards, fieldnames, output_file)
|
||||
print(f"\nComplete! You can now open {output_file} in LibreOffice Calc or Excel")
|
||||
|
||||
except MemoryError:
|
||||
print("File too large for memory. The file might be too big to process at once.")
|
||||
print("Consider using a streaming JSON parser or processing in chunks.")
|
||||
sys.exit(1)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error parsing JSON: {e}")
|
||||
print("The file might be corrupted or not in valid JSON format.")
|
||||
sys.exit(1)
|
||||
|
||||
def process_scryfall_ndjson(input_file, output_file):
|
||||
"""
|
||||
Process a Scryfall NDJSON file and convert to Excel.
|
||||
"""
|
||||
print(f"Processing {input_file} (NDJSON format)...")
|
||||
|
||||
# First pass: collect all possible fields
|
||||
print("First pass: Analyzing card structure...")
|
||||
all_fields = set()
|
||||
cards_processed = 0
|
||||
errors = 0
|
||||
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
for line_num, line in enumerate(f, 1):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
try:
|
||||
card = json.loads(line)
|
||||
flat_card = flatten_card_data(card)
|
||||
all_fields.update(flat_card.keys())
|
||||
cards_processed += 1
|
||||
|
||||
if cards_processed % 10000 == 0:
|
||||
print(f" Analyzed {cards_processed:,} cards...")
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
print(f"Found {len(all_fields)} unique fields across {cards_processed:,} cards")
|
||||
if errors > 0:
|
||||
print(f" (Skipped {errors} malformed lines)")
|
||||
|
||||
fieldnames = sorted(list(all_fields))
|
||||
|
||||
# Second pass: collect all data
|
||||
print("\nSecond pass: Reading card data...")
|
||||
processed_cards = []
|
||||
cards_read = 0
|
||||
errors = 0
|
||||
|
||||
with open(input_file, 'r', encoding='utf-8') as f:
|
||||
for line_num, line in enumerate(f, 1):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
try:
|
||||
card = json.loads(line)
|
||||
flat_card = flatten_card_data(card)
|
||||
processed_cards.append(flat_card)
|
||||
cards_read += 1
|
||||
|
||||
if cards_read % 10000 == 0:
|
||||
print(f" Read {cards_read:,} cards...")
|
||||
|
||||
except (json.JSONDecodeError, Exception) as e:
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
print(f"Read {cards_read:,} cards successfully")
|
||||
if errors > 0:
|
||||
print(f" (Skipped {errors} problematic lines)")
|
||||
|
||||
# Write to Excel
|
||||
write_to_excel(processed_cards, fieldnames, output_file)
|
||||
print(f"\nComplete! You can now open {output_file} in LibreOffice Calc or Excel")
|
||||
|
||||
def main():
|
||||
"""
|
||||
Main function to handle command line arguments and run the conversion.
|
||||
"""
|
||||
"""
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python scryfall_to_xlsx.py <input_json_file> <output_xlsx_file>")
|
||||
print("Example: python scryfall_to_xlsx.py all-cards-20241019.json cards_collection.xlsx")
|
||||
sys.exit(1)
|
||||
"""
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: python3 mtg_card_fetcher.py <input_json_file>")
|
||||
print("Example: python3 mtg_card_fetcher.py mtg-default-cards-20251018212333.json")
|
||||
sys.exit(1)
|
||||
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
|
||||
input_file = sys.argv[1]
|
||||
output_file = f'mtg_cards_{timestamp}.xlsx' # sys.argv[2]
|
||||
|
||||
# Validate input file exists
|
||||
if not Path(input_file).exists():
|
||||
print(f"Error: Input file '{input_file}' not found!")
|
||||
sys.exit(1)
|
||||
|
||||
# Check output file has xlsx extension
|
||||
if not output_file.endswith('.xlsx'):
|
||||
print("Warning: Output file should have .xlsx extension")
|
||||
response = input("Continue anyway? (y/n): ")
|
||||
if response.lower() != 'y':
|
||||
sys.exit(0)
|
||||
|
||||
# Check file size
|
||||
file_size = Path(input_file).stat().st_size / (1024 * 1024) # Size in MB
|
||||
print(f"Input file size: {file_size:.1f} MB")
|
||||
|
||||
# Detect format
|
||||
print("Detecting file format...")
|
||||
format_type = detect_json_format(input_file)
|
||||
print(f"Detected format: {format_type.upper()}")
|
||||
|
||||
# Warn if output file exists
|
||||
if Path(output_file).exists():
|
||||
response = input(f"Warning: Output file '{output_file}' already exists. Overwrite? (y/n): ")
|
||||
if response.lower() != 'y':
|
||||
print("Cancelled.")
|
||||
sys.exit(0)
|
||||
|
||||
# Check if openpyxl is installed
|
||||
try:
|
||||
import openpyxl
|
||||
except ImportError:
|
||||
print("\nError: openpyxl library is required for Excel output.")
|
||||
print("Please install it using: pip install openpyxl")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
if format_type == 'array':
|
||||
process_scryfall_array(input_file, output_file)
|
||||
else:
|
||||
process_scryfall_ndjson(input_file, output_file)
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nProcess interrupted by user.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"Error processing file: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user