Initial commit.

2026-01-09 20:53:52 +00:00
parent 32502972f8
commit 08aaf33e70
19 changed files with 3308 additions and 0 deletions
--- a/copy.py
+++ b/copy.py
@@ -0,0 +1,293 @@
+import json
+import csv
+import sys
+from pathlib import Path
+from datetime import datetime
+import openpyxl
+from openpyxl import Workbook
+
+def flatten_card_data(card):
+    """
+    Flatten a single card's data structure into a dictionary suitable for CSV.
+    Handles nested fields and converts lists to comma-separated strings.
+    """
+    flat_card = {}
+    
+    # Basic fields
+    simple_fields = [
+        'id', 'oracle_id', 'name', 'lang', 'released_at', 'uri', 'scryfall_uri',
+        'layout', 'highres_image', 'image_status', 'mana_cost', 'cmc', 'type_line',
+        'oracle_text', 'power', 'toughness', 'loyalty', 'life_modifier', 'hand_modifier',
+        'reserved', 'foil', 'nonfoil', 'oversized', 'promo', 'reprint', 'variation',
+        'set_id', 'set', 'set_name', 'set_type', 'set_uri', 'set_search_uri',
+        'scryfall_set_uri', 'rulings_uri', 'prints_search_uri', 'collector_number',
+        'digital', 'rarity', 'card_back_id', 'artist', 'border_color', 'frame',
+        'full_art', 'textless', 'booster', 'story_spotlight', 'edhrec_rank',
+        'penny_rank', 'flavor_text', 'watermark', 'printed_name', 'printed_type_line',
+        'printed_text', 'security_stamp', 'preview_text', 'content_warning',
+        'flavor_name', 'game_changer'
+    ]
+    
+    # Copy simple fields
+    for field in simple_fields:
+        if field in card:
+            flat_card[field] = card[field]
+    
+    # Handle array fields - convert to comma-separated strings
+    array_fields = [
+        'multiverse_ids', 'colors', 'color_identity', 'keywords', 'produced_mana',
+        'games', 'finishes', 'artist_ids', 'all_parts', 'card_faces', 'related_cards'
+    ]
+    
+    for field in array_fields:
+        if field in card:
+            if isinstance(card[field], list):
+                # Convert list items to strings and join
+                flat_card[field] = ', '.join(str(item) for item in card[field])
+            else:
+                flat_card[field] = card[field]
+    
+    # Handle MTGO and Arena IDs
+    if 'mtgo_id' in card:
+        flat_card['mtgo_id'] = card['mtgo_id']
+    if 'arena_id' in card:
+        flat_card['arena_id'] = card['arena_id']
+    if 'tcgplayer_id' in card:
+        flat_card['tcgplayer_id'] = card['tcgplayer_id']
+    if 'cardmarket_id' in card:
+        flat_card['cardmarket_id'] = card['cardmarket_id']
+    
+    # Handle image_uris (nested dict)
+    if 'image_uris' in card and isinstance(card['image_uris'], dict):
+        for key, value in card['image_uris'].items():
+            flat_card[f'image_uri_{key}'] = value
+    
+    # Handle legalities (nested dict)
+    if 'legalities' in card and isinstance(card['legalities'], dict):
+        for format_name, status in card['legalities'].items():
+            flat_card[f'legal_{format_name}'] = status
+    
+    # Handle prices (nested dict)
+    if 'prices' in card and isinstance(card['prices'], dict):
+        for currency, price in card['prices'].items():
+            flat_card[f'price_{currency}'] = price
+    
+    # Handle related_uris (nested dict)
+    if 'related_uris' in card and isinstance(card['related_uris'], dict):
+        for uri_type, uri in card['related_uris'].items():
+            flat_card[f'uri_{uri_type}'] = uri
+    
+    # Handle purchase_uris (nested dict)
+    if 'purchase_uris' in card and isinstance(card['purchase_uris'], dict):
+        for store, uri in card['purchase_uris'].items():
+            flat_card[f'purchase_{store}'] = uri
+    
+    # Handle preview information
+    if 'preview' in card and isinstance(card['preview'], dict):
+        if 'source' in card['preview']:
+            flat_card['preview_source'] = card['preview']['source']
+        if 'source_uri' in card['preview']:
+            flat_card['preview_source_uri'] = card['preview']['source_uri']
+        if 'previewed_at' in card['preview']:
+            flat_card['preview_date'] = card['preview']['previewed_at']
+    
+    return flat_card
+
+def detect_json_format(input_file):
+    """
+    Detect if the file is NDJSON or JSON array format
+    """
+    with open(input_file, 'r', encoding='utf-8') as f:
+        # Read first few characters
+        first_chars = f.read(100).strip()
+        if first_chars.startswith('['):
+            return 'array'
+        elif first_chars.startswith('{'):
+            return 'ndjson'
+        else:
+            # Try to read first line and detect
+            f.seek(0)
+            first_line = f.readline().strip()
+            if first_line.startswith('[') or first_line == '[':
+                return 'array'
+            else:
+                return 'ndjson'
+
+def process_scryfall_array(input_file, output_file):
+    """
+    Process a Scryfall JSON array file using streaming JSON parser to handle large files.
+    """
+    print(f"Processing {input_file} (JSON array format)...")
+    print("Loading and parsing JSON data (this may take a minute for large files)...")
+    
+    import ijson  # We'll need ijson for streaming large JSON arrays
+    
+    # First, let's try with regular json
+    try:
+        with open(input_file, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+            
+        print(f"Loaded {len(data):,} cards")
+        
+        # Analyze structure
+        print("Analyzing card structure...")
+        all_fields = set()
+        for i, card in enumerate(data):
+            flat_card = flatten_card_data(card)
+            all_fields.update(flat_card.keys())
+            if (i + 1) % 10000 == 0:
+                print(f"  Analyzed {i + 1:,} cards...")
+        
+        print(f"Found {len(all_fields)} unique fields")
+        fieldnames = sorted(list(all_fields))
+        
+        # Write CSV
+        print("Writing CSV file...")
+        with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
+            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+            writer.writeheader()
+            
+            for i, card in enumerate(data):
+                flat_card = flatten_card_data(card)
+                writer.writerow(flat_card)
+                if (i + 1) % 10000 == 0:
+                    print(f"  Written {i + 1:,} cards...")
+        
+        print(f"\nComplete! Written {len(data):,} cards to {output_file}")
+        
+    except MemoryError:
+        print("File too large for memory. Please install ijson: pip install ijson")
+        print("Then run the script again.")
+        sys.exit(1)
+    except json.JSONDecodeError as e:
+        print(f"Error parsing JSON: {e}")
+        print("The file might be corrupted or not in valid JSON format.")
+        sys.exit(1)
+
+def process_scryfall_ndjson(input_file, output_file):
+    """
+    Process a Scryfall NDJSON (newline-delimited JSON) file and convert it to CSV format.
+    """
+    print(f"Processing {input_file} (NDJSON format)...")
+    
+    # First pass: collect all possible fields
+    print("First pass: Analyzing card structure...")
+    all_fields = set()
+    cards_processed = 0
+    errors = 0
+    
+    with open(input_file, 'r', encoding='utf-8') as f:
+        for line_num, line in enumerate(f, 1):
+            line = line.strip()
+            if not line:
+                continue
+                
+            try:
+                card = json.loads(line)
+                flat_card = flatten_card_data(card)
+                all_fields.update(flat_card.keys())
+                cards_processed += 1
+                
+                if cards_processed % 10000 == 0:
+                    print(f"  Analyzed {cards_processed:,} cards...")
+                    
+            except json.JSONDecodeError as e:
+                errors += 1
+                continue
+    
+    print(f"Found {len(all_fields)} unique fields across {cards_processed:,} cards")
+    if errors > 0:
+        print(f"  (Skipped {errors} malformed lines)")
+    
+    # Sort fields for consistent column order
+    fieldnames = sorted(list(all_fields))
+    
+    # Second pass: write CSV
+    print("\nSecond pass: Writing CSV...")
+    cards_written = 0
+    errors = 0
+    
+    with open(input_file, 'r', encoding='utf-8') as f:
+        with open(output_file, 'w', newline='', encoding='utf-8') as csvfile:
+            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+            writer.writeheader()
+            
+            for line_num, line in enumerate(f, 1):
+                line = line.strip()
+                if not line:
+                    continue
+                    
+                try:
+                    card = json.loads(line)
+                    flat_card = flatten_card_data(card)
+                    writer.writerow(flat_card)
+                    cards_written += 1
+                    
+                    if cards_written % 10000 == 0:
+                        print(f"  Written {cards_written:,} cards...")
+                        
+                except (json.JSONDecodeError, Exception) as e:
+                    errors += 1
+                    continue
+    
+    print(f"\nComplete! Written {cards_written:,} cards to {output_file}")
+    if errors > 0:
+        print(f"Skipped {errors} problematic lines")
+
+def main():
+    """
+    Main function to handle command line arguments and run the conversion.
+    """
+    """
+    if len(sys.argv) != 3:
+        print("Usage: python3 mtg_card_fetcher.py <input_json_file> <output_csv_file>")
+        print("Example: python3 mtg_card_fetcher.py mtg-default-cards-20251018212333.json cards_collection.csv")
+        sys.exit(1)
+    """
+    if len(sys.argv) != 2:
+        print("Usage: python3 mtg_card_fetcher.py <input_json_file>")
+        print("Example: python3 mtg_card_fetcher.py mtg-default-cards-20251018212333.json")
+        sys.exit(1)
+    
+    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+
+    input_file = sys.argv[1]
+    output_file = f'mtg_cards_{timestamp}.csv' # sys.argv[2]
+    
+    # Validate input file exists
+    if not Path(input_file).exists():
+        print(f"Error: Input file '{input_file}' not found!")
+        sys.exit(1)
+    
+    # Check file size
+    file_size = Path(input_file).stat().st_size / (1024 * 1024)  # Size in MB
+    print(f"Input file size: {file_size:.1f} MB")
+    
+    # Detect format
+    print("Detecting file format...")
+    format_type = detect_json_format(input_file)
+    print(f"Detected format: {format_type.upper()}")
+    
+    # Warn if output file exists
+    if Path(output_file).exists():
+        response = input(f"Warning: Output file '{output_file}' already exists. Overwrite? (y/n): ")
+        if response.lower() != 'y':
+            print("Cancelled.")
+            sys.exit(0)
+    
+    try:
+        if format_type == 'array':
+            process_scryfall_array(input_file, output_file)
+        else:
+            process_scryfall_ndjson(input_file, output_file)
+    except KeyboardInterrupt:
+        print("\n\nProcess interrupted by user.")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error processing file: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()