JuliaData · nalimilan · Mar 6, 2017 · Feb 20, 2017 · Feb 20, 2017 · Feb 20, 2017
diff --git a/src/datatablerow/datatablerow.jl b/src/datatablerow/datatablerow.jl
@@ -37,18 +37,17 @@ Base.convert(::Type{Array}, r::DataTableRow) = convert(Array, r.dt[r.row,:])
 
 Base.collect(r::DataTableRow) = Tuple{Symbol, Any}[x for x in r]
 
-# the equal elements of nullable and normal arrays would have the same hashes
-const NULL_MAGIC = 0xBADDEED # what to hash if the element is null
-
 # hash column element
 Base.@propagate_inbounds hash_colel(v::AbstractArray, i, h::UInt = zero(UInt)) = hash(v[i], h)
+Base.@propagate_inbounds hash_colel{T<:Nullable}(v::AbstractArray{T}, i, h::UInt = zero(UInt)) =
+    isnull(v[i]) ? hash(Base.nullablehash_seed, h) : hash(get(v[i]), h)
 Base.@propagate_inbounds hash_colel{T}(v::NullableArray{T}, i, h::UInt = zero(UInt)) =
-    isnull(v, i) ? hash(NULL_MAGIC, h) : hash(get(v[i]), h)
+    isnull(v, i) ? hash(Base.nullablehash_seed, h) : hash(v.values[i], h)
 Base.@propagate_inbounds hash_colel{T}(v::AbstractCategoricalArray{T}, i, h::UInt = zero(UInt)) =
     hash(CategoricalArrays.index(v.pool)[v.refs[i]], h)
-Base.@propagate_inbounds function hash_colel{T}(v::AbstractCategoricalArray{T}, i, h::UInt = zero(UInt))
+Base.@propagate_inbounds function hash_colel{T}(v::AbstractNullableCategoricalArray{T}, i, h::UInt = zero(UInt))
     ref = v.refs[i]
-    ref == 0 ? hash(NULL_MAGIC, h) : hash(CategoricalArrays.index(v.pool)[ref], h)
+    ref == 0 ? hash(Base.nullablehash_seed, h) : hash(CategoricalArrays.index(v.pool)[ref], h)
 end
 
 # hash of DataTable rows based on its values
@@ -79,7 +78,7 @@ function @compat(Base.:(==))(r1::DataTableRow, r2::DataTableRow)
         end
         return eq
     else
-    	r1.row == r2.row && return Nullable(true)
+        r1.row == r2.row && return Nullable(true)
         eq = Nullable(true)
         @inbounds for col in columns(r1.dt)
             eq_col = convert(Nullable{Bool}, col[r1.row] == col[r2.row])
@@ -104,13 +103,13 @@ function isequal_colel{T}(col::Union{NullableArray{T},
 end
 
 isequal_colel(a::Any, b::Any) = isequal(a, b)
-isequal_colel(a::Nullable, b::Any) = !isnull(a) && isequal(get(a), b)
+isequal_colel(a::Nullable, b::Any) = !isnull(a) & isequal(unsafe_get(a), b)
 isequal_colel(a::Any, b::Nullable) = isequal_colel(b, a)
-isequal_colel(a::Nullable, b::Nullable) = isnull(a)==isnull(b) && (isnull(a) || isequal(get(a), get(b)))
+isequal_colel(a::Nullable, b::Nullable) = isnull(a)==isnull(b) && (isnull(a) || isequal(a, b))
 
 # comparison of DataTable rows
 function isequal_row(dt::AbstractDataTable, r1::Int, r2::Int)
-    (r1 == r2) && return true # same raw
+    (r1 == r2) && return true # same row
     @inbounds for col in columns(dt)
         isequal_colel(col, r1, r2) || return false
     end
@@ -120,7 +119,7 @@ end
 function isequal_row(dt1::AbstractDataTable, r1::Int, dt2::AbstractDataTable, r2::Int)
     (dt1 === dt2) && return isequal_row(dt1, r1, r2)
     (ncol(dt1) == ncol(dt2)) ||
-        throw(ArgumentError("Rows of the data frames that have different number of columns cannot be compared ($(ncol(dt1)) and $(ncol(dt2)))"))
+        throw(ArgumentError("Rows of the data tables that have different number of columns cannot be compared ($(ncol(dt1)) and $(ncol(dt2)))"))
     @inbounds for (col1, col2) in zip(columns(dt1), columns(dt2))
         isequal_colel(col1[r1], col2[r2]) || return false
     end

diff --git a/src/datatablerow/utils.jl b/src/datatablerow/utils.jl
@@ -25,11 +25,11 @@ function hashrows_col!(h::Vector{UInt}, v::AbstractVector)
     h
 end
 
-function hashrows_col!{T}(h::Vector{UInt}, v::AbstractVector{T})
+function hashrows_col!{T<:Nullable}(h::Vector{UInt}, v::AbstractVector{T})
     @inbounds for i in eachindex(h)
         h[i] = isnull(v[i]) ?
-               hash(NULL_MAGIC, h[i]) :
-               hash(v[i], h[i])
+               hash(Base.nullablehash_seed, h[i]) :
+               hash(unsafe_get(v[i]), h[i])
     end
     h
 end
@@ -48,7 +48,7 @@ function hashrows_col!{T}(h::Vector{UInt}, v::AbstractNullableCategoricalVector{
     # TODO is it possible to optimize by hashing the pool values once?
     @inbounds for (i, ref) in enumerate(v.refs)
         h[i] = ref == 0 ?
-               hash(NULL_MAGIC, h[i]) :
+               hash(Base.nullablehash_seed, h[i]) :
                hash(CategoricalArrays.index(v.pool)[ref], h[i])
     end
     h
@@ -72,7 +72,7 @@ end
 #    the indices of the first row in a group
 # Optional group vector is set to the group indices of each row
 function row_group_slots(dt::AbstractDataTable,
-                          groups::Union{Vector{Int}, Void} = nothing)
+                         groups::Union{Vector{Int}, Void} = nothing)
     @assert groups === nothing || length(groups) == nrow(dt)
     rhashes = hashrows(dt)
     sz = Base._tablesz(length(rhashes))
@@ -106,7 +106,7 @@ function row_group_slots(dt::AbstractDataTable,
             end
             slotix = slotix & szm1 + 1 # check the next slot
             probe += 1
-            probe < sz || error("Cannot find free row slot")
+            @assert probe < sz
         end
         if groups !== nothing
             groups[i] = gix
@@ -115,15 +115,15 @@ function row_group_slots(dt::AbstractDataTable,
     return ngroups, rhashes, gslots
 end
 
-# Builds RowGroupDict for a given dataframe.
+# Builds RowGroupDict for a given datatable.
 # Partly uses the code of Wes McKinney's groupsort_indexer in pandas (file: src/groupby.pyx).
 function group_rows(dt::AbstractDataTable)
     groups = Vector{Int}(nrow(dt))
     ngroups, rhashes, gslots = row_group_slots(dt, groups)
 
     # count elements in each group
     stops = zeros(Int, ngroups)
-    for g_ix in groups
+    @inbounds for g_ix in groups
         stops[g_ix] += 1
     end
 
@@ -170,20 +170,20 @@ function findrow(gd::RowGroupDict, dt::DataTable, row::Int)
     return 0 # not found
 end
 
-# Finds indices of rows in 'gd' that match given row by content.
-# returns empty set if no row matches
+# Find indices of rows in 'gd' that match given row by content.
+# return empty set if no row matches
 function Base.get(gd::RowGroupDict, dt::DataTable, row::Int)
     g_row = findrow(gd, dt, row)
-    (g_row == 0) && return Compat.view(gd.rperm, 0:-1)
+    (g_row == 0) && return view(gd.rperm, 0:-1)
     gix = gd.groups[g_row]
-    return Compat.view(gd.rperm, gd.starts[gix]:gd.stops[gix])
+    return view(gd.rperm, gd.starts[gix]:gd.stops[gix])
 end
 
 function Base.getindex(gd::RowGroupDict, dtr::DataTableRow)
     g_row = findrow(gd, dtr.dt, dtr.row)
     (g_row == 0) && throw(KeyError(dtr))
     gix = gd.groups[g_row]
-    return Compat.view(gd.rperm, gd.starts[gix]:gd.stops[gix])
+    return view(gd.rperm, gd.starts[gix]:gd.stops[gix])
 end
 
 # Check if there is matching row in gd

diff --git a/src/groupeddatatable/grouping.jl b/src/groupeddatatable/grouping.jl
@@ -122,9 +122,9 @@ function groupby{T}(dt::AbstractDataTable, cols::Vector{T}; sort::Bool = false)
     dt_groups = group_rows(sdt)
     # sort the groups
     if sort
-        group_perm = sortperm(sub(sdt, dt_groups.rperm[dt_groups.starts]))
+        group_perm = sortperm(view(sdt, dt_groups.rperm[dt_groups.starts]))
         permute!(dt_groups.starts, group_perm)
-        permute!(dt_groups.stops, group_perm)
+        Base.permute!!(dt_groups.stops, group_perm)
     end
     GroupedDataTable(dt, cols, dt_groups.rperm,
                      dt_groups.starts, dt_groups.stops)