import json import sys from pathlib import Path from openpyxl import Workbook from openpyxl.utils import get_column_letter from openpyxl.styles import Font from datetime import datetime def flatten_card_data(card): """ Flatten a single card's data structure into a dictionary suitable for Excel. Handles nested fields and converts lists to comma-separated strings. """ flat_card = {} # Basic fields simple_fields = [ 'id', 'oracle_id', 'name', 'lang', 'released_at', 'uri', 'scryfall_uri', 'layout', 'highres_image', 'image_status', 'mana_cost', 'cmc', 'type_line', 'oracle_text', 'power', 'toughness', 'loyalty', 'life_modifier', 'hand_modifier', 'reserved', 'foil', 'nonfoil', 'oversized', 'promo', 'reprint', 'variation', 'set_id', 'set', 'set_name', 'set_type', 'set_uri', 'set_search_uri', 'scryfall_set_uri', 'rulings_uri', 'prints_search_uri', 'collector_number', 'digital', 'rarity', 'card_back_id', 'artist', 'border_color', 'frame', 'full_art', 'textless', 'booster', 'story_spotlight', 'edhrec_rank', 'penny_rank', 'flavor_text', 'watermark', 'printed_name', 'printed_type_line', 'printed_text', 'security_stamp', 'preview_text', 'content_warning', 'flavor_name', 'game_changer' ] # Copy simple fields for field in simple_fields: if field in card: flat_card[field] = card[field] # Handle array fields - convert to comma-separated strings array_fields = [ 'multiverse_ids', 'colors', 'color_identity', 'keywords', 'produced_mana', 'games', 'finishes', 'artist_ids', 'all_parts', 'card_faces', 'related_cards' ] for field in array_fields: if field in card: if isinstance(card[field], list): # Convert list items to strings and join flat_card[field] = ', '.join(str(item) for item in card[field]) else: flat_card[field] = card[field] # Handle MTGO and Arena IDs if 'mtgo_id' in card: flat_card['mtgo_id'] = card['mtgo_id'] if 'arena_id' in card: flat_card['arena_id'] = card['arena_id'] if 'tcgplayer_id' in card: flat_card['tcgplayer_id'] = card['tcgplayer_id'] if 'cardmarket_id' in card: flat_card['cardmarket_id'] = card['cardmarket_id'] # Handle image_uris (nested dict) if 'image_uris' in card and isinstance(card['image_uris'], dict): for key, value in card['image_uris'].items(): flat_card[f'image_uri_{key}'] = value # Handle legalities (nested dict) if 'legalities' in card and isinstance(card['legalities'], dict): for format_name, status in card['legalities'].items(): flat_card[f'legal_{format_name}'] = status # Handle prices (nested dict) if 'prices' in card and isinstance(card['prices'], dict): for currency, price in card['prices'].items(): flat_card[f'price_{currency}'] = price # Handle related_uris (nested dict) if 'related_uris' in card and isinstance(card['related_uris'], dict): for uri_type, uri in card['related_uris'].items(): flat_card[f'uri_{uri_type}'] = uri # Handle purchase_uris (nested dict) if 'purchase_uris' in card and isinstance(card['purchase_uris'], dict): for store, uri in card['purchase_uris'].items(): flat_card[f'purchase_{store}'] = uri # Handle preview information if 'preview' in card and isinstance(card['preview'], dict): if 'source' in card['preview']: flat_card['preview_source'] = card['preview']['source'] if 'source_uri' in card['preview']: flat_card['preview_source_uri'] = card['preview']['source_uri'] if 'previewed_at' in card['preview']: flat_card['preview_date'] = card['preview']['previewed_at'] return flat_card def detect_json_format(input_file): """ Detect if the file is NDJSON or JSON array format """ with open(input_file, 'r', encoding='utf-8') as f: # Read first few characters first_chars = f.read(100).strip() if first_chars.startswith('['): return 'array' elif first_chars.startswith('{'): return 'ndjson' else: # Try to read first line and detect f.seek(0) first_line = f.readline().strip() if first_line.startswith('[') or first_line == '[': return 'array' else: return 'ndjson' def write_to_excel(cards_data, fieldnames, output_file): """ Write the card data to an Excel file with formatting """ wb = Workbook() ws = wb.active ws.title = "Magic Cards" # Write headers with bold formatting header_font = Font(bold=True) for col, field in enumerate(fieldnames, 1): cell = ws.cell(row=1, column=col, value=field) cell.font = header_font # Write data print("Writing to Excel file...") for row_num, card_data in enumerate(cards_data, 2): for col, field in enumerate(fieldnames, 1): value = card_data.get(field, '') # Excel has a cell character limit of 32,767 if isinstance(value, str) and len(value) > 32767: value = value[:32764] + "..." ws.cell(row=row_num, column=col, value=value) if row_num % 10000 == 0: print(f" Written {row_num - 1:,} cards...") # Auto-adjust column widths (limited to prevent excessive widths) print("Adjusting column widths...") for column in ws.columns: max_length = 0 column_letter = get_column_letter(column[0].column) for cell in column[:100]: # Check first 100 rows for performance try: if cell.value: max_length = max(max_length, len(str(cell.value))) except: pass adjusted_width = min(max_length + 2, 50) # Cap at 50 characters ws.column_dimensions[column_letter].width = adjusted_width # Freeze the header row ws.freeze_panes = 'A2' # Enable filters ws.auto_filter.ref = ws.dimensions print("Saving Excel file...") wb.save(output_file) print(f"Saved {len(cards_data):,} cards to {output_file}") def process_scryfall_array(input_file, output_file): """ Process a Scryfall JSON array file and convert to Excel. """ print(f"Processing {input_file} (JSON array format)...") print("Loading and parsing JSON data (this may take a minute for large files)...") try: with open(input_file, 'r', encoding='utf-8') as f: data = json.load(f) print(f"Loaded {len(data):,} cards") # Analyze structure print("Analyzing card structure...") all_fields = set() processed_cards = [] for i, card in enumerate(data): flat_card = flatten_card_data(card) all_fields.update(flat_card.keys()) processed_cards.append(flat_card) if (i + 1) % 10000 == 0: print(f" Analyzed {i + 1:,} cards...") print(f"Found {len(all_fields)} unique fields") fieldnames = sorted(list(all_fields)) # Write to Excel write_to_excel(processed_cards, fieldnames, output_file) print(f"\nComplete! You can now open {output_file} in LibreOffice Calc or Excel") except MemoryError: print("File too large for memory. The file might be too big to process at once.") print("Consider using a streaming JSON parser or processing in chunks.") sys.exit(1) except json.JSONDecodeError as e: print(f"Error parsing JSON: {e}") print("The file might be corrupted or not in valid JSON format.") sys.exit(1) def process_scryfall_ndjson(input_file, output_file): """ Process a Scryfall NDJSON file and convert to Excel. """ print(f"Processing {input_file} (NDJSON format)...") # First pass: collect all possible fields print("First pass: Analyzing card structure...") all_fields = set() cards_processed = 0 errors = 0 with open(input_file, 'r', encoding='utf-8') as f: for line_num, line in enumerate(f, 1): line = line.strip() if not line: continue try: card = json.loads(line) flat_card = flatten_card_data(card) all_fields.update(flat_card.keys()) cards_processed += 1 if cards_processed % 10000 == 0: print(f" Analyzed {cards_processed:,} cards...") except json.JSONDecodeError as e: errors += 1 continue print(f"Found {len(all_fields)} unique fields across {cards_processed:,} cards") if errors > 0: print(f" (Skipped {errors} malformed lines)") fieldnames = sorted(list(all_fields)) # Second pass: collect all data print("\nSecond pass: Reading card data...") processed_cards = [] cards_read = 0 errors = 0 with open(input_file, 'r', encoding='utf-8') as f: for line_num, line in enumerate(f, 1): line = line.strip() if not line: continue try: card = json.loads(line) flat_card = flatten_card_data(card) processed_cards.append(flat_card) cards_read += 1 if cards_read % 10000 == 0: print(f" Read {cards_read:,} cards...") except (json.JSONDecodeError, Exception) as e: errors += 1 continue print(f"Read {cards_read:,} cards successfully") if errors > 0: print(f" (Skipped {errors} problematic lines)") # Write to Excel write_to_excel(processed_cards, fieldnames, output_file) print(f"\nComplete! You can now open {output_file} in LibreOffice Calc or Excel") def main(): """ Main function to handle command line arguments and run the conversion. """ """ if len(sys.argv) != 3: print("Usage: python scryfall_to_xlsx.py ") print("Example: python scryfall_to_xlsx.py all-cards-20241019.json cards_collection.xlsx") sys.exit(1) """ if len(sys.argv) != 2: print("Usage: python3 mtg_card_fetcher.py ") print("Example: python3 mtg_card_fetcher.py mtg-default-cards-20251018212333.json") sys.exit(1) timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') input_file = sys.argv[1] output_file = f'mtg_cards_{timestamp}.xlsx' # sys.argv[2] # Validate input file exists if not Path(input_file).exists(): print(f"Error: Input file '{input_file}' not found!") sys.exit(1) # Check output file has xlsx extension if not output_file.endswith('.xlsx'): print("Warning: Output file should have .xlsx extension") response = input("Continue anyway? (y/n): ") if response.lower() != 'y': sys.exit(0) # Check file size file_size = Path(input_file).stat().st_size / (1024 * 1024) # Size in MB print(f"Input file size: {file_size:.1f} MB") # Detect format print("Detecting file format...") format_type = detect_json_format(input_file) print(f"Detected format: {format_type.upper()}") # Warn if output file exists if Path(output_file).exists(): response = input(f"Warning: Output file '{output_file}' already exists. Overwrite? (y/n): ") if response.lower() != 'y': print("Cancelled.") sys.exit(0) # Check if openpyxl is installed try: import openpyxl except ImportError: print("\nError: openpyxl library is required for Excel output.") print("Please install it using: pip install openpyxl") sys.exit(1) try: if format_type == 'array': process_scryfall_array(input_file, output_file) else: process_scryfall_ndjson(input_file, output_file) except KeyboardInterrupt: print("\n\nProcess interrupted by user.") sys.exit(1) except Exception as e: print(f"Error processing file: {e}") import traceback traceback.print_exc() sys.exit(1) if __name__ == "__main__": main()