From f1f02593bd523d01d20167ad6966af0bca28c04f Mon Sep 17 00:00:00 2001 From: Anmol Chopra Date: Wed, 19 Sep 2018 20:10:48 +0530 Subject: [PATCH] Introduce GC'able cache for Roo::Excelx::Extractor#doc (#456) Memory Profiling script: ``` MemoryProfiler.report{Roo::Excelx.new('test/files/Bibelbund.xlsx').tap{|x|(2..x.last_row).each{|i| x.row(i)}}}.pretty_print ``` Benchmark Script: ``` puts Benchmark.measure{ Roo::Excelx.new('test/files/Bibelbund.xlsx').tap{|x|(2..x.last_row).each{|i| x.row(i)}} } ``` Result on Master: ``` Total allocated: 37131810 bytes (517026 objects) Total retained: 5562913 bytes (103010 objects) allocated memory by gem ----------------------------------- 19288066 roo/lib 11049821 nokogiri-1.8.4 6792403 rubyzip-1.2.2 1304 tmpdir 216 other retained memory by gem ----------------------------------- 5560934 roo/lib 782 rubyzip-1.2.2 725 nokogiri-1.8.4 296 tmpdir 176 other 0.720000 0.020000 0.740000 ( 0.733750) ``` Result after patch: ``` Total allocated: 34561842 bytes (504998 objects) Total retained: 5563553 bytes (103026 objects) allocated memory by gem ----------------------------------- 19254338 roo/lib 8513101 nokogiri-1.8.4 6792403 rubyzip-1.2.2 1304 tmpdir 320 weakref 216 other 160 ref-2.0.0 retained memory by gem ----------------------------------- 5561094 roo/lib 782 rubyzip-1.2.2 725 nokogiri-1.8.4 320 weakref 296 tmpdir 176 other 160 ref-2.0.0 0.610000 0.010000 0.620000 ( 0.618642) ``` --- lib/roo/attr_reader_helper.rb | 18 --------------- lib/roo/excelx/cell/base.rb | 4 ++-- lib/roo/excelx/extractor.rb | 9 ++++++-- lib/roo/helpers/default_attr_reader.rb | 20 ++++++++++++++++ lib/roo/helpers/weak_instance_cache.rb | 32 ++++++++++++++++++++++++++ 5 files changed, 61 insertions(+), 22 deletions(-) delete mode 100644 lib/roo/attr_reader_helper.rb create mode 100644 lib/roo/helpers/default_attr_reader.rb create mode 100644 lib/roo/helpers/weak_instance_cache.rb diff --git a/lib/roo/attr_reader_helper.rb b/lib/roo/attr_reader_helper.rb deleted file mode 100644 index a07c963f..00000000 --- a/lib/roo/attr_reader_helper.rb +++ /dev/null @@ -1,18 +0,0 @@ -# frozen_string_literal: true - -module Roo - module AttrReaderHelper - def attr_reader_with_default(attr_hash) - attr_hash.each do |attr_name, default_value| - instance_variable = :"@#{attr_name}" - define_method attr_name do - if instance_variable_defined? instance_variable - instance_variable_get instance_variable - else - default_value - end - end - end - end - end -end diff --git a/lib/roo/excelx/cell/base.rb b/lib/roo/excelx/cell/base.rb index fb2ff779..9e2cc7ee 100644 --- a/lib/roo/excelx/cell/base.rb +++ b/lib/roo/excelx/cell/base.rb @@ -1,12 +1,12 @@ # frozen_string_literal: true -require "roo/attr_reader_helper" +require "roo/helpers/default_attr_reader" module Roo class Excelx class Cell class Base - extend Roo::AttrReaderHelper + extend Roo::Helpers::DefaultAttrReader attr_reader :cell_type, :cell_value, :value # FIXME: I think style should be deprecated. Having a style attribute diff --git a/lib/roo/excelx/extractor.rb b/lib/roo/excelx/extractor.rb index 0b3006a1..b87a84ed 100755 --- a/lib/roo/excelx/extractor.rb +++ b/lib/roo/excelx/extractor.rb @@ -1,8 +1,11 @@ # frozen_string_literal: true +require "roo/helpers/weak_instance_cache" + module Roo class Excelx class Extractor + include Roo::Helpers::WeakInstanceCache COMMON_STRINGS = { t: "t", @@ -21,9 +24,11 @@ def initialize(path, options = {}) private def doc - raise FileNotFound, "#{@path} file not found" unless doc_exists? + instance_cache(:@doc) do + raise FileNotFound, "#{@path} file not found" unless doc_exists? - ::Roo::Utils.load_xml(@path).remove_namespaces! + ::Roo::Utils.load_xml(@path).remove_namespaces! + end end def doc_exists? diff --git a/lib/roo/helpers/default_attr_reader.rb b/lib/roo/helpers/default_attr_reader.rb new file mode 100644 index 00000000..a02ba84c --- /dev/null +++ b/lib/roo/helpers/default_attr_reader.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +module Roo + module Helpers + module DefaultAttrReader + def attr_reader_with_default(attr_hash) + attr_hash.each do |attr_name, default_value| + instance_variable = :"@#{attr_name}" + define_method attr_name do + if instance_variable_defined? instance_variable + instance_variable_get instance_variable + else + default_value + end + end + end + end + end + end +end diff --git a/lib/roo/helpers/weak_instance_cache.rb b/lib/roo/helpers/weak_instance_cache.rb new file mode 100644 index 00000000..c50c7c58 --- /dev/null +++ b/lib/roo/helpers/weak_instance_cache.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +require "weakref" + +module Roo + module Helpers + module WeakInstanceCache + private + + def instance_cache(key) + object = nil + + if (ref = instance_variable_get(key)) && ref.weakref_alive? + begin + object = ref.__getobj__ + rescue => e + unless (defined?(::WeakRef::RefError) && e.is_a?(::WeakRef::RefError)) || (defined?(RefError) && e.is_a?(RefError)) + raise e + end + end + end + + unless object + object = yield + instance_variable_set(key, WeakRef.new(object)) + end + + object + end + end + end +end