@@ -2542,6 +2542,94 @@ def intersect(self, other: "DataFrame") -> "DataFrame":
2542
2542
builder = self ._builder .intersect (other ._builder )
2543
2543
return DataFrame (builder )
2544
2544
2545
+ @DataframePublicAPI
2546
+ def intersect_all (self , other : "DataFrame" ) -> "DataFrame" :
2547
+ """Returns the intersection of two DataFrames, including duplicates.
2548
+
2549
+ Example:
2550
+ >>> import daft
2551
+ >>> df1 = daft.from_pydict({"a": [1, 2, 2], "b": [4, 6, 6]})
2552
+ >>> df2 = daft.from_pydict({"a": [1, 1, 2, 2], "b": [4, 4, 6, 6]})
2553
+ >>> df1.intersect_all(df2).collect()
2554
+ ╭───────┬───────╮
2555
+ │ a ┆ b │
2556
+ │ --- ┆ --- │
2557
+ │ Int64 ┆ Int64 │
2558
+ ╞═══════╪═══════╡
2559
+ │ 1 ┆ 4 │
2560
+ ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2561
+ │ 2 ┆ 6 │
2562
+ ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
2563
+ │ 2 ┆ 6 │
2564
+ ╰───────┴───────╯
2565
+ <BLANKLINE>
2566
+ (Showing first 3 of 3 rows)
2567
+
2568
+ Args:
2569
+ other (DataFrame): DataFrame to intersect with
2570
+
2571
+ Returns:
2572
+ DataFrame: DataFrame with the intersection of the two DataFrames, including duplicates
2573
+ """
2574
+ builder = self ._builder .intersect_all (other ._builder )
2575
+ return DataFrame (builder )
2576
+
2577
+ @DataframePublicAPI
2578
+ def except_distinct (self , other : "DataFrame" ) -> "DataFrame" :
2579
+ """Returns the set difference of two DataFrames.
2580
+
2581
+ Example:
2582
+ >>> import daft
2583
+ >>> df1 = daft.from_pydict({"a": [1, 2, 3], "b": [4, 5, 6]})
2584
+ >>> df2 = daft.from_pydict({"a": [1, 2, 3], "b": [4, 8, 6]})
2585
+ >>> df1.except_distinct(df2).collect()
2586
+ ╭───────┬───────╮
2587
+ │ a ┆ b │
2588
+ │ --- ┆ --- │
2589
+ │ Int64 ┆ Int64 │
2590
+ ╞═══════╪═══════╡
2591
+ │ 2 ┆ 5 │
2592
+ ╰───────┴───────╯
2593
+ <BLANKLINE>
2594
+ (Showing first 1 of 1 rows)
2595
+
2596
+ Args:
2597
+ other (DataFrame): DataFrame to except with
2598
+
2599
+ Returns:
2600
+ DataFrame: DataFrame with the set difference of the two DataFrames
2601
+ """
2602
+ builder = self ._builder .except_distinct (other ._builder )
2603
+ return DataFrame (builder )
2604
+
2605
+ @DataframePublicAPI
2606
+ def except_all (self , other : "DataFrame" ) -> "DataFrame" :
2607
+ """Returns the set difference of two DataFrames, considering duplicates.
2608
+
2609
+ Example:
2610
+ >>> import daft
2611
+ >>> df1 = daft.from_pydict({"a": [1, 1, 2, 2], "b": [4, 4, 6, 6]})
2612
+ >>> df2 = daft.from_pydict({"a": [1, 2, 2], "b": [4, 6, 6]})
2613
+ >>> df1.except_all(df2).collect()
2614
+ ╭───────┬───────╮
2615
+ │ a ┆ b │
2616
+ │ --- ┆ --- │
2617
+ │ Int64 ┆ Int64 │
2618
+ ╞═══════╪═══════╡
2619
+ │ 1 ┆ 4 │
2620
+ ╰───────┴───────╯
2621
+ <BLANKLINE>
2622
+ (Showing first 1 of 1 rows)
2623
+
2624
+ Args:
2625
+ other (DataFrame): DataFrame to except with
2626
+
2627
+ Returns:
2628
+ DataFrame: DataFrame with the set difference of the two DataFrames, considering duplicates
2629
+ """
2630
+ builder = self ._builder .except_all (other ._builder )
2631
+ return DataFrame (builder )
2632
+
2545
2633
def _materialize_results (self ) -> None :
2546
2634
"""Materializes the results of for this DataFrame and hold a pointer to the results."""
2547
2635
context = get_context ()
0 commit comments