Skip to content

Commit 29b865e

Browse files
committed
depr(selectors): deprecate c and r selectors in favor of cols and index
1 parent 1cf5439 commit 29b865e

File tree

14 files changed

+187
-112
lines changed

14 files changed

+187
-112
lines changed

docs/_freeze/posts/ibis-to-file/index/execute-results/html.json

+4-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/_freeze/posts/selectors/index/execute-results/html.json

+5-4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/_quarto.yml

+3-2
Original file line numberDiff line numberDiff line change
@@ -572,14 +572,15 @@ quartodoc:
572572
- matches
573573
- any_of
574574
- all_of
575-
- c
575+
- cols
576576
- across
577577
- if_any
578578
- if_all
579-
- r
579+
- index
580580
- first
581581
- last
582582
- all
583+
- none
583584

584585
- title: Type System
585586
desc: "Data types and schemas"

docs/how-to/visualization/matplotlib.qmd

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ grouped = t.group_by("species").aggregate(count=ibis._.count())
2424
grouped = grouped.mutate(row_number=ibis.row_number().over()).select(
2525
"row_number",
2626
(
27-
~s.c("row_number") & s.all()
27+
~s.cols("row_number") & s.all()
2828
), # see https://github.com/ibis-project/ibis/issues/6803
2929
)
3030
grouped

docs/posts/ibis-to-file/index.qmd

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ import ibis.selectors as s
3838
3939
expr = (
4040
t.group_by("species")
41-
.mutate(s.across(s.numeric() & ~s.c("year"), (_ - _.mean()) / _.std()))
41+
.mutate(s.across(s.numeric() & ~s.cols("year"), (_ - _.mean()) / _.std()))
4242
)
4343
expr
4444
```

docs/posts/selectors/index.qmd

+9-8
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,11 @@ sense.
4949
We can exclude `year` from the normalization using another selector:
5050

5151
```{python}
52-
t.mutate(s.across(s.numeric() & ~s.c("year"), (_ - _.mean()) / _.std()))
52+
t.mutate(s.across(s.numeric() & ~s.cols("year"), (_ - _.mean()) / _.std()))
5353
```
5454

55-
`c` is short for "column" and the `~` means "negate". Combining those we get "not the year column"!
55+
`cols` selects one or more columns, and the `~` means "negate". Combining those
56+
we get "every column except for 'year'"!
5657

5758
Pretty neat right?
5859

@@ -65,7 +66,7 @@ With selectors, all you need to do is slap a `.group_by("species")` onto `t`:
6566

6667
```{python}
6768
t.group_by("species").mutate(
68-
s.across(s.numeric() & ~s.c("year"), (_ - _.mean()) / _.std())
69+
s.across(s.numeric() & ~s.cols("year"), (_ - _.mean()) / _.std())
6970
)
7071
```
7172

@@ -81,7 +82,7 @@ Grouped min/max normalization? Easy:
8182

8283
```{python}
8384
t.group_by("species").mutate(
84-
s.across(s.numeric() & ~s.c("year"), (_ - _.min()) / (_.max() - _.min()))
85+
s.across(s.numeric() & ~s.cols("year"), (_ - _.min()) / (_.max() - _.min()))
8586
)
8687
```
8788

@@ -107,7 +108,7 @@ What if I want to compute multiple things? Heck yeah!
107108
```{python}
108109
t.group_by("sex").mutate(
109110
s.across(
110-
s.numeric() & ~s.c("year"),
111+
s.numeric() & ~s.cols("year"),
111112
dict(centered=_ - _.mean(), zscore=(_ - _.mean()) / _.std()),
112113
)
113114
).select("sex", s.endswith(("_centered", "_zscore")))
@@ -144,14 +145,14 @@ t.select(s.startswith("bill")).mutate(
144145
We've seen lots of mutate use, but selectors also work with `.agg`:
145146

146147
```{python}
147-
t.group_by("year").agg(s.across(s.numeric() & ~s.c("year"), _.mean())).order_by("year")
148+
t.group_by("year").agg(s.across(s.numeric() & ~s.cols("year"), _.mean())).order_by("year")
148149
```
149150

150151
Naturally, selectors work in grouping keys too, for even more convenience:
151152

152153
```{python}
153-
t.group_by(~s.numeric() | s.c("year")).mutate(
154-
s.across(s.numeric() & ~s.c("year"), dict(centered=_ - _.mean(), std=_.std()))
154+
t.group_by(~s.numeric() | s.cols("year")).mutate(
155+
s.across(s.numeric() & ~s.cols("year"), dict(centered=_ - _.mean(), std=_.std()))
155156
).select("species", s.endswith(("_centered", "_std")))
156157
```
157158

ibis/backends/tests/test_generic.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1345,7 +1345,7 @@ def test_memtable_column_naming_mismatch(con, monkeypatch, df, columns):
13451345
def test_pivot_longer(backend):
13461346
diamonds = backend.diamonds
13471347
df = diamonds.execute()
1348-
res = diamonds.pivot_longer(s.c("x", "y", "z"), names_to="pos", values_to="xyz")
1348+
res = diamonds.pivot_longer(s.cols("x", "y", "z"), names_to="pos", values_to="xyz")
13491349
assert res.schema().names == (
13501350
"carat",
13511351
"cut",
@@ -2469,7 +2469,7 @@ def test_union_generates_predictable_aliases(con):
24692469
assert len(df) == 2
24702470

24712471

2472-
@pytest.mark.parametrize("id_cols", [s.none(), [], s.c()])
2472+
@pytest.mark.parametrize("id_cols", [s.none(), [], s.cols()])
24732473
def test_pivot_wider_empty_id_columns(con, backend, id_cols, monkeypatch):
24742474
monkeypatch.setattr(ibis.options, "default_backend", con)
24752475
data = pd.DataFrame(

ibis/backends/tests/tpc/ds/test_queries.py

+17-13
Original file line numberDiff line numberDiff line change
@@ -1341,7 +1341,7 @@ def test_24(store_sales, store_returns, store, item, customer, customer_address)
13411341
.group_by(_.c_last_name, _.c_first_name, _.s_store_name)
13421342
.having(_.netpaid.sum() > ssales.netpaid.mean().as_scalar() * 0.05)
13431343
.agg(paid=_.netpaid.sum())
1344-
.order_by(~s.c("paid"))
1344+
.order_by(~s.cols("paid"))
13451345
)
13461346

13471347

@@ -1497,17 +1497,17 @@ def test_28(store_sales):
14971497
def test_29(store_sales, store_returns, catalog_sales, date_dim, store, item):
14981498
d1 = (
14991499
date_dim.filter(_.d_moy == 9, _.d_year == 1999)
1500-
.drop(~s.c("d_date_sk"))
1500+
.drop(~s.cols("d_date_sk"))
15011501
.rename(d1_date_sk="d_date_sk")
15021502
)
15031503
d2 = (
15041504
date_dim.filter(_.d_moy.between(9, 9 + 3), _.d_year == 1999)
1505-
.drop(~s.c("d_date_sk"))
1505+
.drop(~s.cols("d_date_sk"))
15061506
.rename(d2_date_sk="d_date_sk")
15071507
)
15081508
d3 = (
15091509
date_dim.filter(_.d_year.isin((1999, 1999 + 1, 1999 + 2)))
1510-
.drop(~s.c("d_date_sk"))
1510+
.drop(~s.cols("d_date_sk"))
15111511
.rename(d3_date_sk="d_date_sk")
15121512
)
15131513
return (
@@ -1864,7 +1864,7 @@ def test_35(
18641864
.relocate("cd_dep_employed_count", before="cnt2")
18651865
.relocate("cd_dep_college_count", before="cnt3")
18661866
.order_by(
1867-
s.across(s.startswith("cd_") | s.c("ca_state"), _.asc(nulls_first=True))
1867+
s.across(s.startswith("cd_") | s.cols("ca_state"), _.asc(nulls_first=True))
18681868
)
18691869
.limit(100)
18701870
)
@@ -1894,7 +1894,7 @@ def test_36(store_sales, date_dim, item, store):
18941894
g_category=lit(0),
18951895
g_class=lit(0),
18961896
)
1897-
.relocate(s.c("i_category", "i_class"), after="gross_margin")
1897+
.relocate(s.cols("i_category", "i_class"), after="gross_margin")
18981898
)
18991899
return (
19001900
results.select(
@@ -2035,7 +2035,9 @@ def test_39(inventory, item, warehouse, date_dim):
20352035
)
20362036
.order_by(
20372037
s.across(
2038-
s.c("wsk1", "isk1", "dmoy1", "mean1", "cov1", "d_moy", "mean", "cov"),
2038+
s.cols(
2039+
"wsk1", "isk1", "dmoy1", "mean1", "cov1", "d_moy", "mean", "cov"
2040+
),
20392041
_.asc(nulls_first=True),
20402042
)
20412043
)
@@ -2169,7 +2171,7 @@ def test_42(date_dim, store_sales, item):
21692171
.join(item.filter(_.i_manager_id == 1), [("ss_item_sk", "i_item_sk")])
21702172
.group_by(_.d_year, _.i_category_id, _.i_category)
21712173
.agg(total_sales=_.ss_ext_sales_price.sum())
2172-
.order_by(_.total_sales.desc(), ~s.c("total_sales"))
2174+
.order_by(_.total_sales.desc(), ~s.cols("total_sales"))
21732175
.limit(100)
21742176
)
21752177

@@ -2268,7 +2270,7 @@ def test_45(web_sales, customer, customer_address, date_dim, item):
22682270
)
22692271
.group_by(_.ca_zip, _.ca_city)
22702272
.agg(total_web_sales=_.ws_sales_price.sum())
2271-
.order_by(~s.c("total_web_sales"))
2273+
.order_by(~s.cols("total_web_sales"))
22722274
.limit(100)
22732275
)
22742276

@@ -2318,7 +2320,7 @@ def test_46(
23182320
_.amt,
23192321
_.profit,
23202322
)
2321-
.order_by(s.across(~s.c("amt", "profit"), _.asc(nulls_first=True)))
2323+
.order_by(s.across(~s.cols("amt", "profit"), _.asc(nulls_first=True)))
23222324
.limit(100)
23232325
)
23242326

@@ -2346,7 +2348,7 @@ def test_47(item, store_sales, date_dim, store):
23462348
.mutate(
23472349
avg_monthly_sales=_.sum_sales.mean().over(
23482350
# TODO: add support for selectors in window over specification
2349-
# group_by=~s.c("sum_sales", "d_moy")
2351+
# group_by=~s.cols("sum_sales", "d_moy")
23502352
group_by=(
23512353
_.i_category,
23522354
_.i_brand,
@@ -2966,7 +2968,9 @@ def test_57(item, catalog_sales, date_dim, call_center):
29662968
)
29672969
> 0.1,
29682970
)
2969-
.order_by((_.sum_sales - _.avg_monthly_sales).asc(nulls_first=True), s.r[1:10])
2971+
.order_by(
2972+
(_.sum_sales - _.avg_monthly_sales).asc(nulls_first=True), s.index[1:10]
2973+
)
29702974
.limit(100)
29712975
)
29722976

@@ -4885,7 +4889,7 @@ def test_89(item, store_sales, date_dim, store):
48854889
.order_by(
48864890
_.sum_sales - _.avg_monthly_sales,
48874891
_.s_store_name,
4888-
s.r[:9] & ~s.c("s_store_name"),
4892+
s.index[:9] & ~s.cols("s_store_name"),
48894893
)
48904894
).limit(100)
48914895

ibis/expr/types/relations.py

+9-17
Original file line numberDiff line numberDiff line change
@@ -1881,7 +1881,7 @@ def mutate(self, *exprs: Sequence[ir.Expr] | None, **mutations: ir.Value) -> Tab
18811881
18821882
Mutate across multiple columns
18831883
1884-
>>> t.mutate(s.across(s.numeric() & ~s.c("year"), _ - _.mean())).head()
1884+
>>> t.mutate(s.across(s.numeric() & ~s.cols("year"), _ - _.mean())).head()
18851885
┏━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━┓
18861886
┃ species ┃ year ┃ bill_length_mm ┃
18871887
┡━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━┩
@@ -2051,7 +2051,7 @@ def select(
20512051
Projection with a selector
20522052
20532053
>>> import ibis.selectors as s
2054-
>>> t.select(s.numeric() & ~s.c("year")).head()
2054+
>>> t.select(s.numeric() & ~s.cols("year")).head()
20552055
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
20562056
┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃
20572057
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
@@ -2067,7 +2067,7 @@ def select(
20672067
Projection + aggregation across multiple columns
20682068
20692069
>>> from ibis import _
2070-
>>> t.select(s.across(s.numeric() & ~s.c("year"), _.mean())).head()
2070+
>>> t.select(s.across(s.numeric() & ~s.cols("year"), _.mean())).head()
20712071
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
20722072
┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃
20732073
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
@@ -2161,7 +2161,7 @@ def rename(
21612161
>>> import ibis
21622162
>>> import ibis.selectors as s
21632163
>>> ibis.options.interactive = True
2164-
>>> first3 = s.r[:3] # first 3 columns
2164+
>>> first3 = s.index[:3] # first 3 columns
21652165
>>> t = ibis.examples.penguins_raw_raw.fetch().select(first3)
21662166
>>> t
21672167
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
@@ -3597,7 +3597,7 @@ def pivot_longer(
35973597
Here we convert column names not matching the selector for the `religion` column
35983598
and convert those names into values
35993599
3600-
>>> relig_income.pivot_longer(~s.c("religion"), names_to="income", values_to="count")
3600+
>>> relig_income.pivot_longer(~s.cols("religion"), names_to="income", values_to="count")
36013601
┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━┓
36023602
┃ religion ┃ income ┃ count ┃
36033603
┡━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━┩
@@ -3718,7 +3718,7 @@ def pivot_longer(
37183718
>>> len(who.columns)
37193719
60
37203720
>>> who.pivot_longer(
3721-
... s.r["new_sp_m014":"newrel_f65"],
3721+
... s.index["new_sp_m014":"newrel_f65"],
37223722
... names_to=["diagnosis", "gender", "age"],
37233723
... names_pattern="new_?(.*)_(.)(.*)",
37243724
... values_to="count",
@@ -3749,7 +3749,7 @@ def pivot_longer(
37493749
Let's recode gender and age to numeric values using a mapping
37503750
37513751
>>> who.pivot_longer(
3752-
... s.r["new_sp_m014":"newrel_f65"],
3752+
... s.index["new_sp_m014":"newrel_f65"],
37533753
... names_to=["diagnosis", "gender", "age"],
37543754
... names_pattern="new_?(.*)_(.)(.*)",
37553755
... names_transform=dict(
@@ -3784,7 +3784,7 @@ def pivot_longer(
37843784
The number of match groups in `names_pattern` must match the length of `names_to`
37853785
37863786
>>> who.pivot_longer( # quartodoc: +EXPECTED_FAILURE
3787-
... s.r["new_sp_m014":"newrel_f65"],
3787+
... s.index["new_sp_m014":"newrel_f65"],
37883788
... names_to=["diagnosis", "gender", "age"],
37893789
... names_pattern="new_?(.*)_.(.*)",
37903790
... )
@@ -3795,7 +3795,7 @@ def pivot_longer(
37953795
`names_transform` must be a mapping or callable
37963796
37973797
>>> who.pivot_longer(
3798-
... s.r["new_sp_m014":"newrel_f65"], names_transform="upper"
3798+
... s.index["new_sp_m014":"newrel_f65"], names_transform="upper"
37993799
... ) # quartodoc: +EXPECTED_FAILURE
38003800
Traceback (most recent call last):
38013801
...
@@ -4429,14 +4429,6 @@ def relocate(
44294429
├────────┼────────┼────────┼───────┼───────┼───────┤
44304430
│ a │ a │ a │ 1 │ 1 │ 1 │
44314431
└────────┴────────┴────────┴───────┴───────┴───────┘
4432-
>>> t.relocate(s.any_of(s.c(*"ae")))
4433-
┏━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━┓
4434-
┃ a ┃ e ┃ b ┃ c ┃ d ┃ f ┃
4435-
┡━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━┩
4436-
│ int64 │ string │ int64 │ int64 │ string │ string │
4437-
├───────┼────────┼───────┼───────┼────────┼────────┤
4438-
│ 1 │ a │ 1 │ 1 │ a │ a │
4439-
└───────┴────────┴───────┴───────┴────────┴────────┘
44404432
44414433
When multiple columns are selected with `before` or `after`, those
44424434
selected columns are moved before and after the `selectors` input

0 commit comments

Comments
 (0)