Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds default order for Daru::DataFrame (issue #130) #329

Merged
merged 4 commits into from
Apr 12, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ AllCops:
Include:
- 'lib/**/*'
Exclude:
- 'daru.gemspec'
- 'Rakefile'
- 'Gemfile'
- 'Guardfile'
- '**/*.erb'
- 'spec/*'
- 'spec/**/*'
Expand Down
2 changes: 1 addition & 1 deletion lib/daru/accessors/gsl_wrapper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def compact
::GSL::Vector.alloc(@data.to_a - [Float::NAN])
end

[:mean, :min, :max, :prod, :sum].each do |method|
%i[mean min max prod sum].each do |method|
define_method(method) do
compact.send(method.to_sym) rescue nil
end
Expand Down
2 changes: 1 addition & 1 deletion lib/daru/category.rb
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ def coding_scheme= scheme
@coding_scheme = scheme
end

CODING_SCHEMES = [:dummy, :deviation, :helmert, :simple].freeze
CODING_SCHEMES = %i[dummy deviation helmert simple].freeze

# Contrast code the vector acording to the coding scheme set.
# @note To set the coding scheme use #coding_scheme=
Expand Down
8 changes: 4 additions & 4 deletions lib/daru/core/query.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ def initialize barry
end

def & other
BoolArray.new @barry.zip(other.barry).map { |b, o| b && o }
BoolArray.new(@barry.zip(other.barry).map { |b, o| b && o })
end

alias :and :&

def | other
BoolArray.new @barry.zip(other.barry).map { |b, o| b || o }
BoolArray.new(@barry.zip(other.barry).map { |b, o| b || o })
end

alias :or :|
Expand All @@ -39,11 +39,11 @@ def inspect

class << self
def apply_scalar_operator operator, data, other
BoolArray.new data.map { |d| !!d.send(operator, other) if d.respond_to?(operator) }
BoolArray.new(data.map { |d| !!d.send(operator, other) if d.respond_to?(operator) })
end

def apply_vector_operator operator, vector, other
BoolArray.new vector.zip(other).map { |d, o| !!d.send(operator, o) }
BoolArray.new(vector.zip(other).map { |d, o| !!d.send(operator, o) })
end

def df_where data_frame, bool_array
Expand Down
21 changes: 17 additions & 4 deletions lib/daru/dataframe.rb
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,17 @@ def from_vector_rows source, opts
# # b 7 2
# # c 8 3
# # d 9 4
#
# df = Daru::DataFrame.new([[1,2,3,4],[6,7,8,9]], name: :bat_man)
#
# # =>
# # #<Daru::DataFrame: bat_man (4x2)>
# # 0 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given your example, shouldn't [6,7,8,9] be the 1st vector?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing this out - fixed! 😄

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't you use actual executed code for the example? Its better if you do that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hadn't used the actual executed code in the initial commit (I had copy-pasted the previous rdoc example - my bad). I've added the output I get in the irb console while initializing the DataFrame given in the rdoc example, in the subsequent commit.

# # 0 1 6
# # 1 2 7
# # 2 3 8
# # 3 4 9

def initialize source, opts={} # rubocop:disable Metrics/MethodLength
vectors, index = opts[:order], opts[:index] # FIXME: just keyword arges after Ruby 2.1
@data = []
Expand Down Expand Up @@ -918,7 +929,7 @@ def keep_vector_if

# creates a new vector with the data of a given field which the block returns true
def filter_vector vec, &block
Daru::Vector.new each_row.select(&block).map { |row| row[vec] }
Daru::Vector.new(each_row.select(&block).map { |row| row[vec] })
end

# Iterates over each row and retains it in a new DataFrame if the block returns
Expand Down Expand Up @@ -1036,7 +1047,7 @@ def missing_values_rows missing_values=[nil]
alias :vector_missing_values :missing_values_rows

def has_missing_data?
!!@data.any? { |vec| vec.include_values?(*Daru::MISSING_VALUES) }
@data.any? { |vec| vec.include_values?(*Daru::MISSING_VALUES) }
end
alias :flawed? :has_missing_data?
deprecate :has_missing_data?, :include_values?, 2016, 10
Expand Down Expand Up @@ -1382,7 +1393,7 @@ def vectors= new_index
# df.rename_vectors :a => :alpha, :c => :gamma
# df.vectors.to_a #=> [:alpha, :b, :gamma]
def rename_vectors name_map
existing_targets = name_map.select { |k,v| k != v }.values & vectors.to_a
existing_targets = name_map.reject { |k,v| k == v }.values & vectors.to_a
delete_vectors(*existing_targets)

new_names = vectors.to_a.map { |v| name_map[v] ? name_map[v] : v }
Expand Down Expand Up @@ -2055,7 +2066,7 @@ def dispatch_to_axis_pl(axis, method, *args, &block)
end
end

AXES = [:row, :vector].freeze
AXES = %i[row vector].freeze

def extract_axis names, default=:vector
if AXES.include?(names.last)
Expand Down Expand Up @@ -2278,8 +2289,10 @@ def initialize_from_array source, vectors, index, opts

case source.first
when Array
vectors ||= (0..source.size-1).to_a
initialize_from_array_of_arrays source, vectors, index, opts
when Vector
vectors ||= (0..source.size-1).to_a
initialize_from_array_of_vectors source, vectors, index, opts
when Hash
initialize_from_array_of_hashes source, vectors, index, opts
Expand Down
2 changes: 1 addition & 1 deletion lib/daru/date_time/index.rb
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ def self._load data
# @return [Array<Integer>] Array containing minutes of each index.
# @!method sec
# @return [Array<Integer>] Array containing seconds of each index.
[:year, :month, :day, :hour, :min, :sec].each do |meth|
%i[year month day hour min sec].each do |meth|
define_method(meth) do
each_with_object([]) do |d, arr|
arr << d.send(meth)
Expand Down
6 changes: 3 additions & 3 deletions lib/daru/index/index.rb
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def subset *indexes
Daru::Index.new indexes
else
# Assume 'indexes' contain positions not indexes
Daru::Index.new indexes.map { |k| key k }
Daru::Index.new(indexes.map { |k| key k })
end
end

Expand Down Expand Up @@ -290,11 +290,11 @@ def by_range rng

def by_multi_key *key
if include? key[0]
Daru::Index.new key.map { |k| k }
Daru::Index.new(key.map { |k| k })
else
# Assume the user is specifing values for index not keys
# Return index object having keys corresponding to values provided
Daru::Index.new key.map { |k| key k }
Daru::Index.new(key.map { |k| key k })
end
end

Expand Down
2 changes: 1 addition & 1 deletion lib/daru/io/io.rb
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def load filename

private

DARU_OPT_KEYS = [:clone, :order, :index, :name].freeze
DARU_OPT_KEYS = %i[clone order index name].freeze

def from_csv_prepare_opts opts
opts[:col_sep] ||= ','
Expand Down
10 changes: 5 additions & 5 deletions lib/daru/maths/statistics/dataframe.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ module DataFrame
# Calculate the minimum value of each numeric vector
# @!method product
# Compute the product of each numeric vector
[:mean, :variance_sample, :range, :median, :mode, :std, :sum, :count, :min, :product].each do |meth|
%i[mean variance_sample range median mode std sum count min product].each do |meth|
define_method(meth) do
compute_stats meth
end
Expand Down Expand Up @@ -70,9 +70,9 @@ def max opts={}
# @!method rolling_variance
# Calculate moving variance
# @param [Integer] n (10) Loopback length. Default to 10.
[
:cumsum,:standardize,:acf,:ema,:rolling_mean,:rolling_median,:rolling_max,
:rolling_min,:rolling_count,:rolling_std,:rolling_variance, :rolling_sum
%i[
cumsum standardize acf ema rolling_mean rolling_median rolling_max
rolling_min rolling_count rolling_std rolling_variance rolling_sum
].each do |meth|
define_method(meth) do |*args|
apply_method_to_numerics meth, *args
Expand All @@ -88,7 +88,7 @@ def max opts={}
# be applied to numeric vectors. Default is [:count, :mean, :std, :max,
# :min]. Methods will be applied in the specified order.
def describe methods=nil
methods ||= [:count, :mean, :std, :min, :max]
methods ||= %i[count mean std min max]

description_hash = {}
numeric_vectors.each do |vec|
Expand Down
4 changes: 2 additions & 2 deletions lib/daru/maths/statistics/vector.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def mode
# be applied to vectors. Default is [:count, :mean, :std, :max,
# :min]. Methods will be applied in the specified order.
def describe methods=nil
methods ||= [:count, :mean, :std, :min, :max]
methods ||= %i[count mean std min max]
description = methods.map { |m| send(m) }
Daru::Vector.new(description, index: methods, name: :statistics)
end
Expand Down Expand Up @@ -571,7 +571,7 @@ def rolling function, n=10
# @!method rolling_variance
# Calculate rolling variance
# @param [Integer] n (10) Loopback length
[:count, :mean, :median, :max, :min, :sum, :std, :variance].each do |meth|
%i[count mean median max min sum std variance].each do |meth|
define_method("rolling_#{meth}".to_sym) do |n=10|
rolling(meth, n)
end
Expand Down
4 changes: 2 additions & 2 deletions lib/daru/plotting/nyaplot/dataframe.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def plot_without_category opts

diagram =
case
when !([:scatter, :bar, :line, :histogram] & types).empty?
when !(%i[scatter bar line histogram] & types).empty?
plot_regular_diagrams plot, opts
when types.include?(:box)
plot_box_diagram plot
Expand Down Expand Up @@ -102,7 +102,7 @@ def apply_variant_to_diagrams diagrams, category_opts, type
end
end

SHAPES = %w(circle triangle-up diamond square triangle-down cross).freeze
SHAPES = %w[circle triangle-up diamond square triangle-down cross].freeze
def get_shape type
validate_type type, :scatter
SHAPES.cycle
Expand Down
4 changes: 2 additions & 2 deletions lib/daru/vector.rb
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ def category?
# Get index of element
def index_of element
case dtype
when :array then @index.key @data.index { |x| x.eql? element }
when :array then @index.key(@data.index { |x| x.eql? element })
else @index.key @data.index(element)
end
end
Expand Down Expand Up @@ -669,7 +669,7 @@ def recode! dt=nil, &block
def delete_if
return to_enum(:delete_if) unless block_given?

keep_e, keep_i = each_with_index.select { |n, _i| !yield(n) }.transpose
keep_e, keep_i = each_with_index.reject { |n, _i| yield(n) }.transpose

@data = cast_vector_to @dtype, keep_e
@index = Daru::Index.new(keep_i)
Expand Down
8 changes: 8 additions & 0 deletions spec/dataframe_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,14 @@
expect(df[:a]) .to eq(Daru::Vector.new([1,2,3,4,5]))
end

it "allows creation of dataframe with a default order" do
arr_of_arrs_df = Daru::DataFrame.new([[1,2,3], [4,5,6], [7,8,9]])
arr_of_vectors_df = Daru::DataFrame.new([Daru::Vector.new([1,2,3]), Daru::Vector.new([4,5,6]), Daru::Vector.new([7,8,9])])

expect(arr_of_arrs_df.vectors.to_a).to eq([0,1,2])
expect(arr_of_vectors_df.vectors.to_a).to eq([0,1,2])
end

it "raises error for incomplete DataFrame index" do
expect {
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
Expand Down