@@ -372,10 +372,109 @@ In pandas, you can use :meth:`~pandas.concat` in conjunction with
372372
373373 pd.concat([df1, df2]).drop_duplicates()
374374
375+ Pandas equivalents for some SQL analytic and aggregate functions
376+ ----------------------------------------------------------------
377+
378+ Top N rows with offset
379+ ~~~~~~~~~~~~~~~~~~~~~~
380+
381+ .. code-block :: sql
382+
383+ -- MySQL
384+ SELECT * FROM tips
385+ ORDER BY tip DESC
386+ LIMIT 10 OFFSET 5;
387+
388+ .. ipython :: python
389+
390+ tips.nlargest(10 + 5 , columns = ' tip' ).tail(10 )
391+
392+ Top N rows per group
393+ ~~~~~~~~~~~~~~~~~~~~
394+
395+ .. code-block :: sql
396+
397+ -- Oracle's ROW_NUMBER() analytic function
398+ SELECT * FROM (
399+ SELECT
400+ t.*,
401+ ROW_NUMBER() OVER(PARTITION BY day ORDER BY total_bill DESC) AS rn
402+ FROM tips t
403+ )
404+ WHERE rn < 3
405+ ORDER BY day, rn;
406+
407+
408+ .. ipython :: python
409+
410+ (tips.assign(rn = tips.sort_values([' total_bill' ], ascending = False )
411+ .groupby([' day' ])
412+ .cumcount() + 1 )
413+ .query(' rn < 3' )
414+ .sort_values([' day' ,' rn' ])
415+ )
416+
417+ the same using `rank(method='first') ` function
418+
419+ .. ipython :: python
420+
421+ (tips.assign(rnk = tips.groupby([' day' ])[' total_bill' ]
422+ .rank(method = ' first' , ascending = False ))
423+ .query(' rnk < 3' )
424+ .sort_values([' day' ,' rnk' ])
425+ )
426+
427+ .. code-block :: sql
428+
429+ -- Oracle's RANK() analytic function
430+ SELECT * FROM (
431+ SELECT
432+ t.*,
433+ RANK() OVER(PARTITION BY sex ORDER BY tip) AS rnk
434+ FROM tips t
435+ WHERE tip < 2
436+ )
437+ WHERE rnk < 3
438+ ORDER BY sex, rnk;
439+
440+ Let's find tips with (rank < 3) per gender group for (tips < 2).
441+ Notice that when using ``rank(method='min') `` function
442+ `rnk_min ` remains the same for the same `tip `
443+ (as Oracle's RANK() function)
444+
445+ .. ipython :: python
446+
447+ (tips[tips[' tip' ] < 2 ]
448+ .assign(rnk_min = tips.groupby([' sex' ])[' tip' ]
449+ .rank(method = ' min' ))
450+ .query(' rnk_min < 3' )
451+ .sort_values([' sex' ,' rnk_min' ])
452+ )
453+
375454
376455 UPDATE
377456------
378457
458+ .. code-block :: sql
459+
460+ UPDATE tips
461+ SET tip = tip*2
462+ WHERE tip < 2;
463+
464+ .. ipython :: python
465+
466+ tips.loc[tips[' tip' ] < 2 , ' tip' ] *= 2
379467
380468 DELETE
381469------
470+
471+ .. code-block :: sql
472+
473+ DELETE FROM tips
474+ WHERE tip > 9;
475+
476+ In pandas we select the rows that should remain, instead of deleting them
477+
478+ .. ipython :: python
479+
480+ tips = tips.loc[tips[' tip' ] <= 9 ]
0 commit comments