@@ -295,42 +295,33 @@ async function generateSqlQuery(apiKey: string, schemaInfo: string, question: st
295
295
- For total averages:
296
296
* WRONG: AVG of segment averages
297
297
* RIGHT: SUM of all values / COUNT of all parents
298
- Example pattern for segmented averages:
299
- WITH parent_totals AS (
300
- -- First aggregate ALL metrics to parent level
301
- SELECT
302
- parent_id,
303
- SUM(amount) as total_amount
304
- FROM details
305
- GROUP BY parent_id
306
- ),
307
- segments AS (
308
- -- Then segment using NTILE or other method
309
- SELECT
310
- parent_id,
311
- total_amount,
312
- NTILE(10) OVER (ORDER BY total_amount) as segment
313
- FROM parent_totals
314
- ),
315
- segment_metrics AS (
316
- -- Calculate segment metrics using SUM and COUNT
317
- SELECT
318
- 'Segment ' || segment as segment_name,
319
- COUNT(*) as parent_count,
320
- SUM(total_amount) as segment_total,
321
- SUM(total_amount) / COUNT(*) as segment_average
322
- FROM segments
323
- GROUP BY segment
324
-
325
- UNION ALL
326
-
327
- SELECT
328
- 'Total' as segment_name,
329
- COUNT(*) as parent_count,
330
- SUM(total_amount) as total_amount,
331
- SUM(total_amount) / COUNT(*) as overall_average
332
- FROM parent_totals
333
- )
298
+ - For consistent segmentation:
299
+ * Step 1: Determine parent-level attributes
300
+ WITH parent_attrs AS (
301
+ SELECT parent_id,
302
+ MAX(CASE WHEN attribute > 0 THEN 1 ELSE 0 END) as has_attr
303
+ FROM details
304
+ GROUP BY parent_id
305
+ )
306
+ * Step 2: Calculate parent-level totals
307
+ parent_totals AS (
308
+ SELECT p.parent_id,
309
+ p.has_attr,
310
+ SUM(d.amount) as total_amount
311
+ FROM parent_attrs p
312
+ JOIN details d ON d.parent_id = p.parent_id
313
+ GROUP BY p.parent_id, p.has_attr
314
+ )
315
+ * Step 3: Create segments
316
+ segments AS (
317
+ SELECT
318
+ CASE WHEN has_attr = 1 THEN 'With' ELSE 'Without' END as segment,
319
+ COUNT(*) as parent_count,
320
+ SUM(total_amount) as segment_total,
321
+ SUM(total_amount) / COUNT(*) as segment_average
322
+ FROM parent_totals
323
+ GROUP BY has_attr
324
+ )
334
325
335
326
8. Query Optimization:
336
327
- Keep queries as simple as possible while meeting requirements
@@ -601,6 +592,47 @@ function formatQueryResponse(sqlQuery: string): string {
601
592
* - Verify segment counts sum to total
602
593
* - Check for proper handling of outliers
603
594
*
595
+ * 13. "Inconsistent Segmentation Results"
596
+ * Problem: Same query produces different segment results
597
+ * Solution:
598
+ * - Use strict three-step segmentation pattern:
599
+ * 1. Determine parent-level attributes first (separate CTE)
600
+ * 2. Calculate parent-level totals using these attributes
601
+ * 3. Segment using the parent-level attributes
602
+ * - Never mix segmentation and aggregation in same step
603
+ * Example fix:
604
+ * Instead of:
605
+ * SELECT
606
+ * CASE WHEN d.attribute > 0 THEN 'With' ELSE 'Without' END,
607
+ * COUNT(DISTINCT d.parent_id),
608
+ * SUM(d.amount)
609
+ * FROM details d
610
+ * GROUP BY CASE WHEN d.attribute > 0 THEN 'With' ELSE 'Without' END
611
+ * Use:
612
+ * WITH parent_attrs AS (
613
+ * SELECT parent_id, MAX(attribute > 0) as has_attr
614
+ * FROM details
615
+ * GROUP BY parent_id
616
+ * ),
617
+ * parent_totals AS (
618
+ * SELECT p.parent_id, p.has_attr, SUM(d.amount) as total
619
+ * FROM parent_attrs p
620
+ * JOIN details d ON d.parent_id = p.parent_id
621
+ * GROUP BY p.parent_id, p.has_attr
622
+ * )
623
+ * SELECT
624
+ * CASE WHEN has_attr THEN 'With' ELSE 'Without' END,
625
+ * COUNT(*),
626
+ * SUM(total)
627
+ * FROM parent_totals
628
+ * GROUP BY has_attr
629
+ * Testing:
630
+ * - Run query multiple times to verify consistent results
631
+ * - Compare segment counts with direct parent counts
632
+ * - Verify no parent appears in multiple segments
633
+ * - Test with parents having mixed attribute values
634
+ * - Check that parent-level metrics match when calculated different ways
635
+ *
604
636
* IMPLEMENTATION REQUIREMENTS:
605
637
* 1. Schema Awareness
606
638
* - All queries must be built using actual schema information
0 commit comments