import json import os import pandas as pd from datetime import datetime import glob def flatten_list(items): """Convert list to comma-separated string""" if items is None: return "" if isinstance(items, list): return ", ".join(str(item) for item in items) return str(items) def extract_ability_info(abilities): """Extract ability information as comma-separated values""" if not abilities: return "", "", "" names = [] texts = [] types = [] for ability in abilities: names.append(ability.get('name', '')) texts.append(ability.get('text', '')) types.append(ability.get('type', '')) return flatten_list(names), flatten_list(texts), flatten_list(types) def extract_attack_info(attacks): """Extract attack information as comma-separated values""" if not attacks: return "", "", "", "", "" names = [] costs = [] damages = [] texts = [] converted_costs = [] for attack in attacks: names.append(attack.get('name', '')) costs.append(flatten_list(attack.get('cost', []))) damages.append(attack.get('damage', '')) texts.append(attack.get('text', '')) converted_costs.append(str(attack.get('convertedEnergyCost', ''))) return (flatten_list(names), " | ".join(costs), # Use | to separate different attacks' costs flatten_list(damages), flatten_list(texts), flatten_list(converted_costs)) def extract_weakness_resistance(items): """Extract weakness or resistance information""" if not items: return "", "" types = [] values = [] for item in items: types.append(item.get('type', '')) values.append(item.get('value', '')) return flatten_list(types), flatten_list(values) def extract_prices(price_dict, prefix): """Extract price information from nested price dictionaries""" if not price_dict: return {} result = {} for price_type, prices in price_dict.items(): if isinstance(prices, dict): for metric, value in prices.items(): key = f"{prefix}_{price_type}_{metric}" result[key] = value else: # Handle direct price values key = f"{prefix}_{price_type}" result[key] = prices return result def process_card(card): """Process a single card and return a flattened dictionary""" row = { 'id': card.get('id', ''), 'name': card.get('name', ''), 'supertype': card.get('supertype', ''), 'subtypes': flatten_list(card.get('subtypes', [])), 'level': card.get('level', ''), 'hp': card.get('hp', ''), 'types': flatten_list(card.get('types', [])), 'evolvesFrom': card.get('evolvesFrom', ''), 'evolvesTo': flatten_list(card.get('evolvesTo', [])), 'rules': flatten_list(card.get('rules', [])), 'number': card.get('number', ''), 'artist': card.get('artist', ''), 'rarity': card.get('rarity', ''), 'flavorText': card.get('flavorText', ''), 'nationalPokedexNumbers': flatten_list(card.get('nationalPokedexNumbers', [])), 'regulationMark': card.get('regulationMark', ''), 'retreatCost': flatten_list(card.get('retreatCost', [])), 'convertedRetreatCost': card.get('convertedRetreatCost', ''), } # Ancient Trait ancient_trait = card.get('ancientTrait', {}) if ancient_trait: row['ancientTrait_name'] = ancient_trait.get('name', '') row['ancientTrait_text'] = ancient_trait.get('text', '') else: row['ancientTrait_name'] = '' row['ancientTrait_text'] = '' # Abilities abilities = card.get('abilities', []) row['ability_names'], row['ability_texts'], row['ability_types'] = extract_ability_info(abilities) # Attacks attacks = card.get('attacks', []) row['attack_names'], row['attack_costs'], row['attack_damages'], row['attack_texts'], row['attack_convertedCosts'] = extract_attack_info(attacks) # Weaknesses weaknesses = card.get('weaknesses', []) row['weakness_types'], row['weakness_values'] = extract_weakness_resistance(weaknesses) # Resistances resistances = card.get('resistances', []) row['resistance_types'], row['resistance_values'] = extract_weakness_resistance(resistances) # Set information set_info = card.get('set', {}) if set_info: row['set_id'] = set_info.get('id', '') row['set_name'] = set_info.get('name', '') row['set_series'] = set_info.get('series', '') row['set_printedTotal'] = set_info.get('printedTotal', '') row['set_total'] = set_info.get('total', '') row['set_ptcgoCode'] = set_info.get('ptcgoCode', '') row['set_releaseDate'] = set_info.get('releaseDate', '') # Legalities legalities = card.get('legalities', {}) row['legal_standard'] = legalities.get('standard', '') row['legal_expanded'] = legalities.get('expanded', '') row['legal_unlimited'] = legalities.get('unlimited', '') # Images images = card.get('images', {}) row['image_small'] = images.get('small', '') row['image_large'] = images.get('large', '') # TCGPlayer prices tcgplayer = card.get('tcgplayer', {}) if tcgplayer: row['tcgplayer_url'] = tcgplayer.get('url', '') row['tcgplayer_updatedAt'] = tcgplayer.get('updatedAt', '') # Extract all price types prices = tcgplayer.get('prices', {}) tcg_prices = extract_prices(prices, 'tcgplayer') row.update(tcg_prices) # Cardmarket prices cardmarket = card.get('cardmarket', {}) if cardmarket: row['cardmarket_url'] = cardmarket.get('url', '') row['cardmarket_updatedAt'] = cardmarket.get('updatedAt', '') # Extract all price types prices = cardmarket.get('prices', {}) cm_prices = extract_prices(prices, 'cardmarket') row.update(cm_prices) return row def main(): # Directory containing the JSON files data_dir = './pkm_data/cards/en' # Get all JSON files json_files = glob.glob(os.path.join(data_dir, '*.json')) if not json_files: print(f"No JSON files found in {data_dir}") return all_cards = [] # Process each JSON file for json_file in json_files: print(f"Processing {os.path.basename(json_file)}...") try: with open(json_file, 'r', encoding='utf-8') as f: # Handle newline-delimited JSON for line in f: line = line.strip() if line: try: # Try to parse as a single card card = json.loads(line) if isinstance(card, dict): all_cards.append(process_card(card)) elif isinstance(card, list): # If it's a list of cards for c in card: all_cards.append(process_card(c)) except json.JSONDecodeError: # Try parsing the entire file as one JSON array f.seek(0) data = json.load(f) if isinstance(data, list): for card in data: all_cards.append(process_card(card)) break except Exception as e: print(f"Error processing {json_file}: {str(e)}") continue # Create DataFrame df = pd.DataFrame(all_cards) # Sort by set and number """ df['number_int'] = df['number'].str.extract('(\d+)').astype(float, errors='ignore') df = df.sort_values(['set_name', 'number_int', 'number'], na_position='last') df = df.drop('number_int', axis=1) """ # Save to Excel timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') output_file = f'pkm_cards_{timestamp}.xlsx' # Create Excel writer with xlsxwriter engine with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer: df.to_excel(writer, sheet_name='Pokemon Cards', index=False) # Get the workbook and worksheet workbook = writer.book worksheet = writer.sheets['Pokemon Cards'] # Add some formatting header_format = workbook.add_format({ 'bold': True, 'text_wrap': True, 'valign': 'top', 'fg_color': '#D7E4BD', 'border': 1 }) # Write headers with formatting for col_num, value in enumerate(df.columns.values): worksheet.write(0, col_num, value, header_format) # Auto-adjust column widths for i, col in enumerate(df.columns): # Find maximum length in column max_len = df[col].astype(str).str.len().max() max_len = max(max_len, len(col)) + 2 # Cap column width at 50 max_len = min(max_len, 50) worksheet.set_column(i, i, max_len) print(f"\nSuccessfully created {output_file}") print(f"Total cards processed: {len(df)}") print(f"\nColumns in the spreadsheet:") for col in df.columns: print(f" - {col}") if __name__ == "__main__": main()