From 8a40bc6ca4fd1ce660018677e4661825d9197ce3 Mon Sep 17 00:00:00 2001 From: dnviti Date: Tue, 16 Dec 2025 15:23:59 +0100 Subject: [PATCH] feat: Enhance CSV parser to dynamically map quantity, name, finish, and ID columns from headers for robust custom imports. --- docs/development/CENTRAL.md | 1 + ...2025-12-16-152253_csv_import_robustness.md | 21 +++ src/client/src/services/CardParserService.ts | 120 +++++++++++++----- 3 files changed, 107 insertions(+), 35 deletions(-) create mode 100644 docs/development/devlog/2025-12-16-152253_csv_import_robustness.md diff --git a/docs/development/CENTRAL.md b/docs/development/CENTRAL.md index 997c8ec..1cf23a1 100644 --- a/docs/development/CENTRAL.md +++ b/docs/development/CENTRAL.md @@ -8,3 +8,4 @@ ## Recent Completions - [Game Battlefield & Manual Mode](./devlog/2025-12-14-234500_game_battlefield_plan.md): Completed. - [Helm Chart Config](./devlog/2025-12-14-214500_helm_config.md): Completed. +- [CSV Import Robustness](./devlog/2025-12-16-152253_csv_import_robustness.md): Completed. Enhanced CSV parser to dynamically map columns from headers, supporting custom user imports. diff --git a/docs/development/devlog/2025-12-16-152253_csv_import_robustness.md b/docs/development/devlog/2025-12-16-152253_csv_import_robustness.md new file mode 100644 index 0000000..6e4e93d --- /dev/null +++ b/docs/development/devlog/2025-12-16-152253_csv_import_robustness.md @@ -0,0 +1,21 @@ + +# CSV Import Robustness Update + +## Background +The user provided a specific CSV format associated with typical automated imports. The requirement was to extract relevant information (Quantity, Name, Finish, Scryfall ID) while ignoring other fields (such as Condition, Date Added, etc.). + +## Changes +- Refactored `src/client/src/services/CardParserService.ts` to implement dynamic header parsing. +- The `parse` method now: + - Detects if the first line is a CSV header containing "Quantity" and "Name". + - Maps columns to indices based on the header. + - Specifically looks for `Quantity`, `Name`, `Finish`, and `Scryfall ID` (checking common variations like 'scryfall_id', 'id', 'uuid'). + - Uses strictly mapped columns if a header is detected, ensuring other fields are ignored as requested. + - Falls back gracefully to previous generic parsing logic if no matching header is found, preserving backward compatibility with Arena/MTGO exports and simple lists. + +## Verification +- Verified manually via a test script that the provided CSV content parses correctly into the `CardIdentifier` memory structure. +- The extraction correctly identifies Quantity, Name, Finish (Normal/Foil), and Scryfall UUID. + +## Next Steps +- Ensure the frontend `CubeManager` works seamlessly with this update (no changes needed there as it uses the service). diff --git a/src/client/src/services/CardParserService.ts b/src/client/src/services/CardParserService.ts index 5c8a096..daa9017 100644 --- a/src/client/src/services/CardParserService.ts +++ b/src/client/src/services/CardParserService.ts @@ -11,53 +11,105 @@ export class CardParserService { const rawCardList: CardIdentifier[] = []; const uuidRegex = /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/i; + let colMap = { qty: 0, name: 1, finish: 2, id: -1, found: false }; + + // Check header to determine column indices dynamically + if (lines.length > 0) { + const headerLine = lines[0].toLowerCase(); + // Heuristic: if it has Quantity and Name, it's likely our CSV + if (headerLine.includes('quantity') && headerLine.includes('name')) { + const headers = this.parseCsvLine(lines[0]).map(h => h.toLowerCase().trim()); + const qtyIndex = headers.indexOf('quantity'); + const nameIndex = headers.indexOf('name'); + + if (qtyIndex !== -1 && nameIndex !== -1) { + colMap.qty = qtyIndex; + colMap.name = nameIndex; + colMap.finish = headers.indexOf('finish'); + // Find ID column: could be 'scryfall id', 'scryfall_id', 'id' + colMap.id = headers.findIndex(h => h === 'scryfall id' || h === 'scryfall_id' || h === 'id' || h === 'uuid'); + colMap.found = true; + + // Remove header row + lines.shift(); + } + } + } + lines.forEach(line => { - // Skip header + // Skip generic header repetition if it occurs if (line.toLowerCase().startsWith('quantity') && line.toLowerCase().includes('name')) return; + // Try parsing as CSV line first if we detected a header or if it looks like CSV + const parts = this.parseCsvLine(line); + + // If we have a detected map, use it strict(er) + if (colMap.found && parts.length > Math.max(colMap.qty, colMap.name)) { + const qty = parseInt(parts[colMap.qty]); + if (!isNaN(qty)) { + const name = parts[colMap.name]; + let finish: 'foil' | 'normal' | undefined = undefined; + + if (colMap.finish !== -1 && parts[colMap.finish]) { + const finishRaw = parts[colMap.finish].toLowerCase(); + finish = (finishRaw === 'foil' || finishRaw === 'etched') ? 'foil' : (finishRaw === 'normal' ? 'normal' : undefined); + } else if (!colMap.found) { + // Legacy fallback for default indices if header wasn't found but we are in this block (shouldn't happen with colMap.found=true logic) + const finishRaw = parts[2]?.toLowerCase(); + finish = (finishRaw === 'foil' || finishRaw === 'etched') ? 'foil' : (finishRaw === 'normal' ? 'normal' : undefined); + } + + let idValue: string | null = null; + + // If we have an ID column, look there + if (colMap.id !== -1 && parts[colMap.id]) { + const match = parts[colMap.id].match(uuidRegex); + if (match) idValue = match[0]; + } + + // If not found in column (or no column), check if there's a UUID anywhere in the line? + // The user said "ignore other fields". So strictly adhering to columns is better. + // BUT, to be safe for mixed usages (e.g. if ID is missing in col but present elsewhere? Unlikely). + // Let's stick to the mapped column if available. + + // If we didn't find an ID in the specific column, but we have a generic UUID in the line? + // The original logic did `parts.find`. + // If `colMap.found` is true, we should trust it. + + if (idValue) { + rawCardList.push({ type: 'id', value: idValue, quantity: qty, finish }); + return; + } else if (name) { + rawCardList.push({ type: 'name', value: name, quantity: qty, finish }); + return; + } + } + } + + // --- Fallback / Original Logic for non-header formats or failed parsings --- + const idMatch = line.match(uuidRegex); if (idMatch) { - // Extract quantity if present before ID, otherwise default to 1 - // Simple check: Look for "Nx ID" or "N, ID" pattern? - // The previous/standard logic usually treats ID lines as 1x unless specified. - // Let's try to find a quantity at the start if it exists differently from UUID. - // But usually UUID lines are direct from export. - - // But our CSV template puts ID at the end. - // If UUID is present anywhere in the line, we might trust it over the name. - // Let's stick to the previous logic: if UUID is found, use it. - // BUT, we should try to parse the whole CSV line if possible to get Finish and Quantity. - - // Let's parse with CSV logic first. - const parts = this.parseCsvLine(line); + // It has a UUID, try to extract generic CSV info if possible if (parts.length >= 2) { const qty = parseInt(parts[0]); - // If valid CSV structure if (!isNaN(qty)) { - // const name = parts[1]; // We can keep name for reference, but we use ID if present + // Assuming default 0=Qty, 2=Finish if no header map found const finishRaw = parts[2]?.toLowerCase(); const finish = (finishRaw === 'foil' || finishRaw === 'etched') ? 'foil' : (finishRaw === 'normal' ? 'normal' : undefined); - // If the last part has UUID, use it. - const uuidPart = parts.find(p => uuidRegex.test(p)); - if (uuidPart) { - const uuid = uuidPart.match(uuidRegex)![0]; - rawCardList.push({ type: 'id', value: uuid, quantity: qty, finish }); - return; - } + // Use the regex match found + rawCardList.push({ type: 'id', value: idMatch[0], quantity: qty, finish }); + return; } } - - // Fallback ID logic - rawCardList.push({ type: 'id', value: idMatch[0], quantity: 1 }); // Default simple UUID match + // Just ID flow + rawCardList.push({ type: 'id', value: idMatch[0], quantity: 1 }); return; } - // Not an ID match, try parsing as name - const parts = this.parseCsvLine(line); - + // Name-based generic parsing (Arena/MTGO or simple CSV without ID) if (parts.length >= 2 && !isNaN(parseInt(parts[0]))) { - // It looks like result of our CSV: Quantity, Name, Finish, ... const quantity = parseInt(parts[0]); const name = parts[1]; const finishRaw = parts[2]?.toLowerCase(); @@ -69,18 +121,16 @@ export class CardParserService { } } - // Fallback to simple Arena/MTGO text format: "4 Lightning Bolt" + // "4 Lightning Bolt" format const cleanLine = line.replace(/['"]/g, ''); const simpleMatch = cleanLine.match(/^(\d+)[xX\s]+(.+)$/); if (simpleMatch) { let name = simpleMatch[2].trim(); - // cleanup - name = name.replace(/\s*[\(\[].*?[\)\]]/g, ''); // remove set codes - name = name.replace(/\s+\d+$/, ''); // remove collector number + name = name.replace(/\s*[\(\[].*?[\)\]]/g, ''); + name = name.replace(/\s+\d+$/, ''); rawCardList.push({ type: 'name', value: name, quantity: parseInt(simpleMatch[1]) }); } else { - // Maybe just "Lightning Bolt" (1x) let name = cleanLine.trim(); if (name) { rawCardList.push({ type: 'name', value: name, quantity: 1 });