Skip to content

Commit 1fe5140

Browse files
committed
Merge branch 'master' of github.com:hadley/dplyr
# Conflicts: # NEWS.md
2 parents 3d254d6 + c583127 commit 1fe5140

File tree

7 files changed

+98
-53
lines changed

7 files changed

+98
-53
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ inst/doc
99
.httr-oauth
1010
*.Rds
1111
vignettes/*.R
12+
.DS_Store

NEWS.md

+7-3
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@
1313

1414
* `print.tbl_df()` is considerably faster if you have very wide data frames.
1515
It will now also only list the first 100 additional variables not already
16-
on screen - control this with the new `n_extra` parameter to `print()`
16+
on screen - control this with the new `n_extra` parameter to `print()`
1717
(#1161).
1818

19-
* `data_frame()` and `as_data_frame()` now check that you don't have any
19+
* `data_frame()` and `as_data_frame()` now check that you don't have any
2020
`POSIXlt` columns, and tell you to use `POSIXct` if you do (#813).
2121

2222
* `chain()`, `chain_q()` and `%.%` have been removed
@@ -63,7 +63,11 @@
6363

6464
* grouped `mutate` promotes up results that consists of all NA in one group (#1463).
6565

66-
* joins avoid repetitions of column names (#1460).
66+
* joins avoid repetitions of column names (#1460).
67+
68+
* `min` and `max` handle empty sets (#1481).
69+
70+
* grouped and rowwise `mutate` disambiguate `NA` and `NaN` (#1448).
6771

6872
# dplyr 0.4.3
6973

inst/include/dplyr/Result/max.h

+23-21
Original file line numberDiff line numberDiff line change
@@ -2,79 +2,81 @@
22
#define dplyr_Result_Max_H
33

44
namespace dplyr {
5-
5+
66
template <int RTYPE, bool NA_RM>
77
class Max : public Processor<RTYPE, Max<RTYPE,NA_RM> > {
88
public:
99
typedef Processor<RTYPE, Max<RTYPE,NA_RM> > Base ;
1010
typedef typename Rcpp::traits::storage_type<RTYPE>::type STORAGE ;
11-
11+
1212
Max(SEXP x, bool is_summary_ = false) :
13-
Base(x),
14-
data_ptr( Rcpp::internal::r_vector_start<RTYPE>(x) ),
13+
Base(x),
14+
data_ptr( Rcpp::internal::r_vector_start<RTYPE>(x) ),
1515
is_summary(is_summary_) {}
1616
~Max(){}
17-
17+
1818
STORAGE process_chunk( const SlicingIndex& indices ){
19+
if( indices.size() == 0) return R_NegInf ;
1920
if( is_summary ) return data_ptr[indices.group()] ;
2021
int n = indices.size() ;
21-
22+
2223
// find the first non NA value
2324
STORAGE res = data_ptr[ indices[0] ] ;
2425
int i=1 ;
2526
while( i<n && Rcpp::Vector<RTYPE>::is_na(res) ){
26-
res = data_ptr[ indices[i++] ] ;
27+
res = data_ptr[ indices[i++] ] ;
2728
}
28-
29+
2930
// we enter this loop if we did not scan the full vector
3031
if( i < n ) for( ; i<n; i++){
3132
STORAGE current = data_ptr[indices[i]] ;
3233
if( !Rcpp::Vector<RTYPE>::is_na(current) && internal::is_smaller<RTYPE>( res, current ) ) res = current ;
3334
}
3435
return res ;
3536
}
36-
37+
3738
private:
38-
STORAGE* data_ptr ;
39+
STORAGE* data_ptr ;
3940
bool is_summary ;
4041
} ;
41-
42+
4243
// quit early version for NA_RM = false
4344
template <int RTYPE>
4445
class Max<RTYPE,false> : public Processor<RTYPE, Max<RTYPE,false> > {
4546
public:
4647
typedef Processor<RTYPE, Max<RTYPE,false> > Base ;
4748
typedef typename Rcpp::traits::storage_type<RTYPE>::type STORAGE ;
48-
49+
4950
Max(SEXP x, bool is_summary_ = false) :
50-
Base(x),
51-
data_ptr( Rcpp::internal::r_vector_start<RTYPE>(x) ),
51+
Base(x),
52+
data_ptr( Rcpp::internal::r_vector_start<RTYPE>(x) ),
5253
is_summary(is_summary_) {}
5354
~Max(){}
54-
55+
5556
STORAGE process_chunk( const SlicingIndex& indices ){
57+
if( indices.size() == 0) return R_NegInf ;
5658
if( is_summary ) return data_ptr[indices.group()] ;
57-
59+
5860
int n = indices.size() ;
59-
61+
6062
// find the first non NA value
6163
STORAGE res = data_ptr[ indices[0] ] ;
6264
if( Rcpp::Vector<RTYPE>::is_na(res) ) return res;
63-
65+
6466
for( int i=1; i<n; i++){
6567
STORAGE current = data_ptr[indices[i]] ;
6668
if( Rcpp::Vector<RTYPE>::is_na(current) ) return current ;
6769
if( internal::is_smaller<RTYPE>( res, current ) ) res = current ;
6870
}
6971
return res ;
7072
}
71-
73+
7274
private:
7375
STORAGE* data_ptr ;
7476
bool is_summary ;
7577
} ;
76-
77-
78+
79+
7880
}
7981

8082
#endif

inst/include/dplyr/Result/min.h

+29-27
Original file line numberDiff line numberDiff line change
@@ -2,82 +2,84 @@
22
#define dplyr_Result_Min_H
33

44
namespace dplyr {
5-
5+
66
template <int RTYPE, bool NA_RM>
77
class Min : public Processor<RTYPE, Min<RTYPE,NA_RM> > {
88
public:
99
typedef Processor<RTYPE, Min<RTYPE,NA_RM> > Base ;
1010
typedef typename Rcpp::traits::storage_type<RTYPE>::type STORAGE ;
11-
12-
Min(SEXP x, bool is_summary_ = false) :
13-
Base(x),
14-
data_ptr( Rcpp::internal::r_vector_start<RTYPE>(x) ),
15-
is_summary(is_summary_)
11+
12+
Min(SEXP x, bool is_summary_ = false) :
13+
Base(x),
14+
data_ptr( Rcpp::internal::r_vector_start<RTYPE>(x) ),
15+
is_summary(is_summary_)
1616
{}
1717
~Min(){}
18-
18+
1919
STORAGE process_chunk( const SlicingIndex& indices ){
20+
if( indices.size() == 0) return R_PosInf ;
2021
if( is_summary ) return data_ptr[ indices.group() ] ;
21-
22+
2223
int n = indices.size() ;
2324
// find the first non NA value
2425
STORAGE res = data_ptr[ indices[0] ] ;
2526
int i=1 ;
2627
while( i<n && Rcpp::Vector<RTYPE>::is_na(res) ){
27-
res = data_ptr[ indices[i++] ] ;
28+
res = data_ptr[ indices[i++] ] ;
2829
}
29-
30+
3031
// we enter this loop if we did not scan the full vector
3132
if( i < n ) for( ; i<n; i++){
3233
STORAGE current = data_ptr[indices[i]] ;
3334
if( !Rcpp::Vector<RTYPE>::is_na(current) && internal::is_smaller<RTYPE>( current, res ) ) res = current ;
3435
}
35-
36+
3637
return res ;
3738
}
38-
39+
3940
private:
4041
STORAGE* data_ptr ;
4142
bool is_summary ;
4243
} ;
43-
44+
4445
// quit early version for NA_RM = false
4546
template <int RTYPE>
4647
class Min<RTYPE,false> : public Processor<RTYPE, Min<RTYPE,false> > {
4748
public:
48-
typedef Processor<RTYPE, Min<RTYPE,false> > Base ;
49+
typedef Processor<RTYPE, Min<RTYPE,false> > Base ;
4950
typedef typename Rcpp::traits::storage_type<RTYPE>::type STORAGE ;
50-
51-
Min(SEXP x, bool is_summary_ = false) :
52-
Base(x),
53-
data_ptr( Rcpp::internal::r_vector_start<RTYPE>(x) ),
54-
is_summary(is_summary_)
51+
52+
Min(SEXP x, bool is_summary_ = false) :
53+
Base(x),
54+
data_ptr( Rcpp::internal::r_vector_start<RTYPE>(x) ),
55+
is_summary(is_summary_)
5556
{}
5657
~Min(){}
57-
58+
5859
STORAGE process_chunk( const SlicingIndex& indices ){
60+
if( indices.size() == 0) return R_PosInf ;
5961
if( is_summary ) return data_ptr[ indices.group() ] ;
60-
62+
6163
int n = indices.size() ;
62-
64+
6365
// find the first non NA value
6466
STORAGE res = data_ptr[ indices[0] ] ;
6567
if( Rcpp::Vector<RTYPE>::is_na(res) ) return res;
66-
68+
6769
for( int i=1; i<n; i++){
6870
STORAGE current = data_ptr[indices[i]] ;
6971
if( Rcpp::Vector<RTYPE>::is_na(current) ) return current ;
7072
if( internal::is_smaller<RTYPE>( current, res ) ) res = current ;
7173
}
7274
return res ;
7375
}
74-
76+
7577
private:
76-
STORAGE* data_ptr ;
78+
STORAGE* data_ptr ;
7779
bool is_summary ;
7880
} ;
79-
80-
81+
82+
8183
}
8284

8385
#endif

inst/include/tools/all_na.h

+7-2
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,16 @@
33

44
template <int RTYPE>
55
inline bool all_na_impl( const Vector<RTYPE>& x ){
6-
return all( is_na(x) ).is_true() ;
6+
return all( is_na(x) ).is_true() ;
7+
}
8+
9+
template <>
10+
inline bool all_na_impl<REALSXP>( const NumericVector& x ){
11+
return all( is_na(x) & !is_nan(x) ).is_true() ;
712
}
813

914
inline bool all_na( SEXP x ){
10-
RCPP_RETURN_VECTOR( all_na_impl, x ) ;
15+
RCPP_RETURN_VECTOR( all_na_impl, x ) ;
1116
}
1217

1318
#endif

tests/testthat/test-mutate.r

+24
Original file line numberDiff line numberDiff line change
@@ -469,3 +469,27 @@ test_that("mutate handles results from one group with all NA values (#1463) ", {
469469
expect_true( is.na(res$z[2]) )
470470
expect_is( res$z, "numeric")
471471
})
472+
473+
test_that("rowwise mutate handles the NA special case (#1448)", {
474+
res <- data.frame(k = c(-1, 1, 1)) %>%
475+
rowwise() %>%
476+
mutate(l = ifelse(k > 0, 1, NA))
477+
expect_is(res$l, "numeric")
478+
expect_true( is.na(res$l[1]) )
479+
expect_true( !anyNA(res$l[-1]) )
480+
481+
res <- data.frame(k = rnorm(10)) %>%
482+
rowwise() %>%
483+
mutate(l = ifelse(k > 0, 1L, NA_integer_))
484+
expect_true( all(is.na(res$l[res$k <= 0]) ) )
485+
expect_true( !any(is.na(res$l[res$k > 0]) ) )
486+
})
487+
488+
test_that("mutate disambiguates NA and NaN (#1448)", {
489+
Pass <- data.frame(P2 = c(0,3,2), F2 = c(0,2,0), id = 1:3)
490+
res <- Pass %>% group_by(id) %>% mutate(pass2 = P2/(P2 + F2))
491+
expect_true( is.nan(res$pass2[1]) )
492+
493+
res <- Pass %>% rowwise %>% mutate(pass2 = P2/(P2 + F2))
494+
expect_true( is.nan(res$pass2[1]) )
495+
})

tests/testthat/test-summarise.r

+7
Original file line numberDiff line numberDiff line change
@@ -532,3 +532,10 @@ test_that("hybrid max works when not used on columns (#1369)", {
532532
expect_equal( summarise(df, z = max(y))$z, 10 )
533533
expect_equal( summarise(df, z = max(10))$z, 10 )
534534
})
535+
536+
test_that( "min and max handle empty sets in summarise (#1481)", {
537+
df <- data_frame(A=numeric())
538+
res <- df %>% summarise(Min=min(A, na.rm=T), Max = max(A, na.rm=TRUE))
539+
expect_equal( res$Min, Inf )
540+
expect_equal( res$Max, -Inf )
541+
})

0 commit comments

Comments
 (0)