Skip to content

Commit

Permalink
Merge pull request #2 from nvdk/feature/bached-insert
Browse files Browse the repository at this point in the history
support large files
  • Loading branch information
erikap authored Nov 5, 2018
2 parents 82667da + e52d29a commit a760c87
Showing 1 changed file with 19 additions and 1 deletion.
20 changes: 19 additions & 1 deletion web.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
require 'net/http'
require 'securerandom'

MU_MIGRATIONS = RDF::Vocabulary.new('http://mu.semte.ch/vocabularies/migrations/')

Expand Down Expand Up @@ -55,7 +56,7 @@ def execute!
elsif filename.end_with? ".ttl"
log.debug "Importing the migration file"
data = RDF::Graph.load(self.location, format: :ttl)
sparql_client.insert_data(data, graph: graph)
batch_insert(data, graph: graph)
else
log.warn "Unsupported file format #{filename}"
end
Expand All @@ -73,6 +74,23 @@ def execute!
def to_s
"#{self.location} #{if executed? then "[DONE]" else "[NOT EXECUTED]" end}"
end

private
def batch_insert(data, graph:, batch_size: 3000)
log.info("dataset of #{data.size} triples will be inserted in batches of #{batch_size} triples")
temp_graph = "http://migrations.mu.semte.ch/#{SecureRandom.uuid}"
begin
data.each_slice(batch_size) do |slice|
sparql_client.insert_data(slice, graph: temp_graph)
end
update("ADD <#{temp_graph}> TO <#{graph}>")
rescue => e
log.error("error batch loading triples, batch_size #{batch_size}")
raise e
ensure
update("DROP SILENT GRAPH <#{temp_graph}>")
end
end
end

def execute_migrations
Expand Down

0 comments on commit a760c87

Please sign in to comment.