Skip to content
This repository has been archived by the owner on Jun 10, 2020. It is now read-only.

harvested wac files keep wav file as original, closes #35 #49

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lib/baw-workers/config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,9 @@ def run(opts)
mp3splt: BawWorkers::Settings.audio_tools.mp3splt_executable,
sox: BawWorkers::Settings.audio_tools.sox_executable,
wavpack: BawWorkers::Settings.audio_tools.wavpack_executable,
shntool: BawWorkers::Settings.audio_tools.shntool_executable
shntool: BawWorkers::Settings.audio_tools.shntool_executable,
wav2png: BawWorkers::Settings.audio_tools.wav2png_executable,
wac2wav: BawWorkers::Settings.audio_tools.wac2wav_executable
})

BawWorkers::Config.spectrogram_helper = BawAudioTools::Spectrogram.from_executables(
Expand Down
17 changes: 17 additions & 0 deletions lib/baw-workers/file_info.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ module BawWorkers
# Helpers to get info from files.
class FileInfo

# Create a FileInfo instance.
# @param [BawAudioTools::AudioBase] audio_base
def initialize(audio_base)
@audio = audio_base
end
Expand Down Expand Up @@ -183,5 +185,20 @@ def file_name_datetime(file_name, utc_offset = nil)
result
end

# Convert a .wac file to a .wav file in the same directory.
# @param [String] file_path
# @return [String] converted file path
def convert_wac_to_wav(file_path)
ext = File.extname(file_path)
fail BawAudioTools::Exceptions::AudioToolError, 'Can only convert from .wac' if ext != '.wac'

source = File.expand_path(file_path)
target = File.join(File.dirname(source), File.basename(source, '.wac') + '.wav')

# only try to create the wav file if it doesn't already exist
@audio.modify(source, target) unless File.size?(target)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cofiem - why this unless condition? Couldn't this lead to non-idempotent behaviour on repeated runs?

I guess you're assuming an identical .wav file may have already been uploaded and you don't want to run wac2wav if that is the case... in which case, that check should be explicit.

Adding to that, the output name of the .wav file should be unique so duplicate runs don't mistake converted wav files as original wav files.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI: This concept of converting a file for storage won't be a once off - see #50 and #36.

target
end

end
end
29 changes: 24 additions & 5 deletions lib/baw-workers/harvest/gather_files.rb
Original file line number Diff line number Diff line change
Expand Up @@ -147,16 +147,16 @@ def file(path, top_dir, dir_settings = {})

dir_settings = get_folder_settings(File.join(File.dirname(path), @config_file_name)) if dir_settings.blank?

basic_info, advanced_info = file_info(path, dir_settings[:utc_offset])
basic_info, advanced_info, new_path = file_info(path, dir_settings[:utc_offset])

if basic_info.blank? || advanced_info.blank?
@logger.warn(@class_name) { "Not enough information for #{path}." }
@logger.warn(@class_name) { "Not enough information for #{new_path}." }
{}
else
@logger.debug(@class_name) { "Complete information found for #{path}." }
@logger.debug(@class_name) { "Complete information found for #{new_path}." }
result = {}
result = result.merge(basic_info).merge(dir_settings).merge(advanced_info)
result[:file_rel_path] = Pathname.new(path).relative_path_from(Pathname.new(top_dir)).to_s
result[:file_rel_path] = Pathname.new(new_path).relative_path_from(Pathname.new(top_dir)).to_s
result
end
end
Expand All @@ -168,6 +168,25 @@ def file(path, top_dir, dir_settings = {})
def file_info(file, utc_offset)
basic_info, advanced_info = nil

if File.extname(file) == '.wac'
begin
wac_file = file
@logger.warn(@class_name) {
"Converting from .wac file #{wac_file}"
}
wav_file = @file_info_helper.convert_wac_to_wav(wac_file)
@logger.warn(@class_name) {
"Converted to .wav file #{wav_file} from .wac file #{wac_file}"
}
file = wav_file
rescue StandardError => e
@logger.error(@class_name) {
"Could not convert .wac file #{file} using utc offset '#{utc_offset}': #{format_error(e)}"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm assuming the utc_offset part of the exception has some relevance?

}
raise
end
end

begin
basic_info = @file_info_helper.basic(file)
advanced_info = @file_info_helper.advanced(file, utc_offset)
Expand All @@ -186,7 +205,7 @@ def file_info(file, utc_offset)
}
end

[basic_info, advanced_info]
[basic_info, advanced_info, file]
end

# Get all files in a directory.
Expand Down
18 changes: 18 additions & 0 deletions lib/baw-workers/harvest/single_file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,24 @@ def rename_harvested_file(file_path, storage_target_paths)
@logger.info(@class_name) {
"Source file #{file_path} was successfully renamed to #{renamed_source_file}."
}

# if this is a wav file, and a .wac file exists, rename the .wac file as well
file_ext = File.extname(file_path)
wac_file = file_path[0..-4]+'wac'
if file_ext == '.wav' && File.exists?(wac_file)
renamed_wac_file = wac_file + '.completed'
File.rename(wac_file, renamed_wac_file)
if File.exists?(renamed_wac_file)
@logger.info(@class_name) {
"Source .wac file #{wac_file} was successfully renamed to #{renamed_wac_file}."
}
else
@logger.warn(@class_name) {
"Source .wac file #{wac_file} was not renamed."
}
end
end
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this block itself is fine, but the implication is disturbing: if we harvest files in WAac then we'll use triplicate storage until harvester_to_do is cleaned up. Given #50 & #36 this pattern will become more frequent as well.

I think we should keep source file in harvest_to_do, and delete the converted file out of harvester_to_do after it is copied to original_audio. Thoughts?


else
@logger.warn(@class_name) {
"Source file #{file_path} was not renamed."
Expand Down
2 changes: 1 addition & 1 deletion spec/lib/baw-workers/harvest/gather_files_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@
item[:file_rel_path] == 'one/two/six/prefix_20140101_235959+10.flac' }).to_not be_nil

expect(results.find { |item| !item.include?(:metadata) &&
item[:file_rel_path] == 'one/two/seven/prefix_20140101_235959+10.wac' }).to_not be_nil
item[:file_rel_path] == 'one/two/seven/prefix_20140101_235959+10.wav' }).to_not be_nil
end

it 'should error on read-only directory' do
Expand Down
28 changes: 15 additions & 13 deletions spec/lib/baw-workers/harvest/single_file_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,10 @@
password = 'password'
auth_token = 'auth token this is'

file_hash = 'SHA256::c6d561c91664a92a1598bda3b79734d5ee29266ad0411ffaea1188f29d5b6439'
file_hash = 'SHA256::b06458c96ac2de75d77b83e1b3acb3e64b84a4bfe671c794153855bf7b5e96fd'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Related to the changes below as well: what information is stored in the database?

It appears the hash, original file name, and relative path all refer to the wav file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely do not change the meaning of existing fields. Do this properly. Otherwise it's a quick way to shoot yourself in the foot.

First of all, why do you care what format the file used to be?

If you must store info about pre-conversion, what info do you want to store?
Add new fields to audio_recording to store this information. Or, maybe, store it in the existing machine readable notes field.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well it's less changing what the fields mean and more clarifying what they mean in a new scenario. Does original file name refer to the source file or the converted file? That needs to be defined.

Why do we care? The only reason we have both of those fields (name and path (in notes)) is so we can map back to the original files in other storage mediums (E.g. Availae or a portable hdd). Storing the name of the converted file breaks that.

recorded_date = '2014-10-12T18:14:55.000+10:00'
uuid = 'fb4af424-04c1-4739-96e3-23f8dc719665'
original_format = 'wac'
original_format = 'wav'

request_login_body = get_api_security_request(email, password)
response_login_body = get_api_security_response(user_name, auth_token)
Expand All @@ -196,16 +196,16 @@
uploader_id: 30,
recorded_date: recorded_date,
site_id: 20,
duration_seconds: 6.577,
duration_seconds: 6.576,
sample_rate_hertz: 22050,
channels: 2,
bit_rate_bps: 16,
media_type: 'audio/x-waac',
data_length_bytes: 394644,
bit_rate_bps: 705600,
media_type: 'audio/wav',
data_length_bytes: 580082,
file_hash: file_hash,
original_file_name: 'test_20141012_181455.wac',
original_file_name: 'test_20141012_181455.wav',
notes: {
relative_path: 'test_20141012_181455.wac',
relative_path: 'test_20141012_181455.wav',
sensor_type: 'SM2',
information: [
'stripped left channel due to bad mic',
Expand All @@ -221,15 +221,15 @@
data: {
recorded_date: '2014-10-12T08:14:55Z',
site_id: 20,
duration_seconds: 6.577,
duration_seconds: 6.576,
sample_rate_hertz: 22050,
channels: 2,
bit_rate_bps: 16,
media_type: 'audio/x-waac',
data_length_bytes: 394644,
bit_rate_bps: 705600,
media_type: 'audio/wav',
data_length_bytes: 580082,
file_hash: file_hash,
status: 'new',
original_file_name: 'test_20141012_181455.wac',
original_file_name: 'test_20141012_181455.wav',

created_at: '2014-10-13T05:21:13Z',
id: 177,
Expand Down Expand Up @@ -289,7 +289,9 @@

# ensure source file is renamed to *.completed
expect(File.exists?(dest_audio_file)).to be_falsey
expect(File.exists?(dest_audio_file[0..-4]+'wav')).to be_falsey
expect(File.exists?(dest_audio_file+'.completed')).to be_truthy
expect(File.exists?(dest_audio_file[0..-4]+'wav.completed')).to be_truthy

# clean up
FileUtils.rm_rf(sub_folder)
Expand Down