-
Notifications
You must be signed in to change notification settings - Fork 0
harvested wac files keep wav file as original, closes #35 #49
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -147,16 +147,16 @@ def file(path, top_dir, dir_settings = {}) | |
|
||
dir_settings = get_folder_settings(File.join(File.dirname(path), @config_file_name)) if dir_settings.blank? | ||
|
||
basic_info, advanced_info = file_info(path, dir_settings[:utc_offset]) | ||
basic_info, advanced_info, new_path = file_info(path, dir_settings[:utc_offset]) | ||
|
||
if basic_info.blank? || advanced_info.blank? | ||
@logger.warn(@class_name) { "Not enough information for #{path}." } | ||
@logger.warn(@class_name) { "Not enough information for #{new_path}." } | ||
{} | ||
else | ||
@logger.debug(@class_name) { "Complete information found for #{path}." } | ||
@logger.debug(@class_name) { "Complete information found for #{new_path}." } | ||
result = {} | ||
result = result.merge(basic_info).merge(dir_settings).merge(advanced_info) | ||
result[:file_rel_path] = Pathname.new(path).relative_path_from(Pathname.new(top_dir)).to_s | ||
result[:file_rel_path] = Pathname.new(new_path).relative_path_from(Pathname.new(top_dir)).to_s | ||
result | ||
end | ||
end | ||
|
@@ -168,6 +168,25 @@ def file(path, top_dir, dir_settings = {}) | |
def file_info(file, utc_offset) | ||
basic_info, advanced_info = nil | ||
|
||
if File.extname(file) == '.wac' | ||
begin | ||
wac_file = file | ||
@logger.warn(@class_name) { | ||
"Converting from .wac file #{wac_file}" | ||
} | ||
wav_file = @file_info_helper.convert_wac_to_wav(wac_file) | ||
@logger.warn(@class_name) { | ||
"Converted to .wav file #{wav_file} from .wac file #{wac_file}" | ||
} | ||
file = wav_file | ||
rescue StandardError => e | ||
@logger.error(@class_name) { | ||
"Could not convert .wac file #{file} using utc offset '#{utc_offset}': #{format_error(e)}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm assuming the |
||
} | ||
raise | ||
end | ||
end | ||
|
||
begin | ||
basic_info = @file_info_helper.basic(file) | ||
advanced_info = @file_info_helper.advanced(file, utc_offset) | ||
|
@@ -186,7 +205,7 @@ def file_info(file, utc_offset) | |
} | ||
end | ||
|
||
[basic_info, advanced_info] | ||
[basic_info, advanced_info, file] | ||
end | ||
|
||
# Get all files in a directory. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -292,6 +292,24 @@ def rename_harvested_file(file_path, storage_target_paths) | |
@logger.info(@class_name) { | ||
"Source file #{file_path} was successfully renamed to #{renamed_source_file}." | ||
} | ||
|
||
# if this is a wav file, and a .wac file exists, rename the .wac file as well | ||
file_ext = File.extname(file_path) | ||
wac_file = file_path[0..-4]+'wac' | ||
if file_ext == '.wav' && File.exists?(wac_file) | ||
renamed_wac_file = wac_file + '.completed' | ||
File.rename(wac_file, renamed_wac_file) | ||
if File.exists?(renamed_wac_file) | ||
@logger.info(@class_name) { | ||
"Source .wac file #{wac_file} was successfully renamed to #{renamed_wac_file}." | ||
} | ||
else | ||
@logger.warn(@class_name) { | ||
"Source .wac file #{wac_file} was not renamed." | ||
} | ||
end | ||
end | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this block itself is fine, but the implication is disturbing: if we harvest files in WAac then we'll use triplicate storage until I think we should keep source file in |
||
|
||
else | ||
@logger.warn(@class_name) { | ||
"Source file #{file_path} was not renamed." | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -183,10 +183,10 @@ | |
password = 'password' | ||
auth_token = 'auth token this is' | ||
|
||
file_hash = 'SHA256::c6d561c91664a92a1598bda3b79734d5ee29266ad0411ffaea1188f29d5b6439' | ||
file_hash = 'SHA256::b06458c96ac2de75d77b83e1b3acb3e64b84a4bfe671c794153855bf7b5e96fd' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Related to the changes below as well: what information is stored in the database? It appears the hash, original file name, and relative path all refer to the wav file.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Definitely do not change the meaning of existing fields. Do this properly. Otherwise it's a quick way to shoot yourself in the foot. First of all, why do you care what format the file used to be? If you must store info about pre-conversion, what info do you want to store? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well it's less changing what the fields mean and more clarifying what they mean in a new scenario. Does original file name refer to the source file or the converted file? That needs to be defined. Why do we care? The only reason we have both of those fields (name and path (in notes)) is so we can map back to the original files in other storage mediums (E.g. Availae or a portable hdd). Storing the name of the converted file breaks that. |
||
recorded_date = '2014-10-12T18:14:55.000+10:00' | ||
uuid = 'fb4af424-04c1-4739-96e3-23f8dc719665' | ||
original_format = 'wac' | ||
original_format = 'wav' | ||
|
||
request_login_body = get_api_security_request(email, password) | ||
response_login_body = get_api_security_response(user_name, auth_token) | ||
|
@@ -196,16 +196,16 @@ | |
uploader_id: 30, | ||
recorded_date: recorded_date, | ||
site_id: 20, | ||
duration_seconds: 6.577, | ||
duration_seconds: 6.576, | ||
sample_rate_hertz: 22050, | ||
channels: 2, | ||
bit_rate_bps: 16, | ||
media_type: 'audio/x-waac', | ||
data_length_bytes: 394644, | ||
bit_rate_bps: 705600, | ||
media_type: 'audio/wav', | ||
data_length_bytes: 580082, | ||
file_hash: file_hash, | ||
original_file_name: 'test_20141012_181455.wac', | ||
original_file_name: 'test_20141012_181455.wav', | ||
notes: { | ||
relative_path: 'test_20141012_181455.wac', | ||
relative_path: 'test_20141012_181455.wav', | ||
sensor_type: 'SM2', | ||
information: [ | ||
'stripped left channel due to bad mic', | ||
|
@@ -221,15 +221,15 @@ | |
data: { | ||
recorded_date: '2014-10-12T08:14:55Z', | ||
site_id: 20, | ||
duration_seconds: 6.577, | ||
duration_seconds: 6.576, | ||
sample_rate_hertz: 22050, | ||
channels: 2, | ||
bit_rate_bps: 16, | ||
media_type: 'audio/x-waac', | ||
data_length_bytes: 394644, | ||
bit_rate_bps: 705600, | ||
media_type: 'audio/wav', | ||
data_length_bytes: 580082, | ||
file_hash: file_hash, | ||
status: 'new', | ||
original_file_name: 'test_20141012_181455.wac', | ||
original_file_name: 'test_20141012_181455.wav', | ||
|
||
created_at: '2014-10-13T05:21:13Z', | ||
id: 177, | ||
|
@@ -289,7 +289,9 @@ | |
|
||
# ensure source file is renamed to *.completed | ||
expect(File.exists?(dest_audio_file)).to be_falsey | ||
expect(File.exists?(dest_audio_file[0..-4]+'wav')).to be_falsey | ||
expect(File.exists?(dest_audio_file+'.completed')).to be_truthy | ||
expect(File.exists?(dest_audio_file[0..-4]+'wav.completed')).to be_truthy | ||
|
||
# clean up | ||
FileUtils.rm_rf(sub_folder) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@cofiem - why this
unless
condition? Couldn't this lead to non-idempotent behaviour on repeated runs?I guess you're assuming an identical .wav file may have already been uploaded and you don't want to run wac2wav if that is the case... in which case, that check should be explicit.
Adding to that, the output name of the .wav file should be unique so duplicate runs don't mistake converted wav files as original wav files.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FYI: This concept of converting a file for storage won't be a once off - see #50 and #36.