From c939c0121332210369be913c3fd3f0ec13fa2a87 Mon Sep 17 00:00:00 2001 From: chopraanmol1 Date: Tue, 18 Sep 2018 18:14:04 +0530 Subject: [PATCH] Introduce GC'able cache for Roo::Excelx::Extractor#doc Memory Profiling script: ``` MemoryProfiler.report{Roo::Excelx.new('test/files/Bibelbund.xlsx').tap{|x|(2..x.last_row).each{|i| x.row(i)}}}.pretty_print ``` Benchmark Script: ``` puts Benchmark.measure{ Roo::Excelx.new('test/files/Bibelbund.xlsx').tap{|x|(2..x.last_row).each{|i| x.row(i)}} } ``` Result on Master: ``` Total allocated: 37131810 bytes (517026 objects) Total retained: 5562913 bytes (103010 objects) allocated memory by gem ----------------------------------- 19288066 roo/lib 11049821 nokogiri-1.8.4 6792403 rubyzip-1.2.2 1304 tmpdir 216 other retained memory by gem ----------------------------------- 5560934 roo/lib 782 rubyzip-1.2.2 725 nokogiri-1.8.4 296 tmpdir 176 other 0.720000 0.020000 0.740000 ( 0.733750) ``` Result after patch: ``` Total allocated: 34561842 bytes (504998 objects) Total retained: 5563553 bytes (103026 objects) allocated memory by gem ----------------------------------- 19254338 roo/lib 8513101 nokogiri-1.8.4 6792403 rubyzip-1.2.2 1304 tmpdir 320 weakref 216 other 160 ref-2.0.0 retained memory by gem ----------------------------------- 5561094 roo/lib 782 rubyzip-1.2.2 725 nokogiri-1.8.4 320 weakref 296 tmpdir 176 other 160 ref-2.0.0 0.610000 0.010000 0.620000 ( 0.618642) ``` Note: Ruby does have native implementation of WeakRef. But I've choosed to go with 'ref' gem for this following reason: https://github.com/ruby-concurrency/ref#problems-with-weakref --- lib/roo/excelx/extractor.rb | 24 +++++++++++++++++++++--- roo.gemspec | 1 + 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/lib/roo/excelx/extractor.rb b/lib/roo/excelx/extractor.rb index 0b3006a1..adc83f39 100755 --- a/lib/roo/excelx/extractor.rb +++ b/lib/roo/excelx/extractor.rb @@ -1,9 +1,10 @@ # frozen_string_literal: true +require "ref" + module Roo class Excelx class Extractor - COMMON_STRINGS = { t: "t", r: "r", @@ -21,14 +22,31 @@ def initialize(path, options = {}) private def doc - raise FileNotFound, "#{@path} file not found" unless doc_exists? + instance_cache(:@doc) do + raise FileNotFound, "#{@path} file not found" unless doc_exists? - ::Roo::Utils.load_xml(@path).remove_namespaces! + ::Roo::Utils.load_xml(@path).remove_namespaces! + end end def doc_exists? @path && File.exist?(@path) end + + def instance_cache(key) + object = nil + + if ref = instance_variable_get(key) + object = ref.object + end + + unless object + object = yield + instance_variable_set(key, Ref::WeakReference.new(object)) + end + + object + end end end end diff --git a/roo.gemspec b/roo.gemspec index 62dbb82a..75fe4fda 100644 --- a/roo.gemspec +++ b/roo.gemspec @@ -18,6 +18,7 @@ Gem::Specification.new do |spec| spec.require_paths = ['lib'] spec.add_dependency 'nokogiri', '~> 1' + spec.add_dependency 'ref', '~> 2' spec.add_dependency 'rubyzip', '>= 1.2.1', '< 2.0.0' spec.add_development_dependency 'rake', '~> 10.1'