From 69fd2fa4436363f30afdb7ada6645eb3cd624e33 Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Thu, 8 Jan 2015 12:22:06 -0800 Subject: [PATCH 01/25] added basic levenshtein measure and loop waterfall --- bin/mediatidy-media | 3 +++ lib/media.coffee | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/bin/mediatidy-media b/bin/mediatidy-media index 5319648..ee61974 100755 --- a/bin/mediatidy-media +++ b/bin/mediatidy-media @@ -37,6 +37,9 @@ program.command('update') (callback) -> media.deleteOthers -> callback() + (callback) -> + media.deleteDupes -> + callback() ], (err, results) -> throw err if err console.log 'Media update complete.' diff --git a/lib/media.coffee b/lib/media.coffee index 42c6f9e..42e9320 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -5,6 +5,7 @@ async = require 'async' colors = require 'colors' prompt = require 'prompt' Database = require './db' +levenshtein = require 'fast-levenshtein' class Media extends Database @@ -136,6 +137,40 @@ class Media extends Database @promptUserBulkDelete files, promptMessage, -> callback() + levenshtein: (array, callback) -> + possibleDupes = [] + arrayLength = array.length + + ldiggity = (iteration, diggityCallback) => + dupe = {} + i = iteration + 1 + while i < arrayLength + levenshtein.getAsync array[iteration].filtered_filename, array[i].filtered_filename, (err, distance) => + if distance <= 4 + console.log distance, array[iteration], array[i] + i++ + if i is arrayLength + ldiggity(iteration + 1) + if arrayLength is iteration + 1 + console.log 'donedone...' + callback() + if arrayLength > 0 + ldiggity(0) + else + console.log 'No files in database to check...' + callback() + + deleteDupes: (callback) -> + console.log '==> '.cyan.bold + 'delete duplicate lower quality video files' + # get all files with tag 'CORRUPT' + @dbBulkFileGetTag '\'HEALTHY\'', (files) => + @levenshtein files, (dupes) => + + + + # console.log files + callback() + deleteOthers: (callback) -> console.log '==> '.cyan.bold + 'delete files which are not video types' # get all files with tag 'OTHER' From 7461feade0137f0644ad1499fa85c506eea650de Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Fri, 9 Jan 2015 11:43:03 -0800 Subject: [PATCH 02/25] filename matching based on levenshtein --- lib/media.coffee | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/lib/media.coffee b/lib/media.coffee index 42e9320..a676036 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -141,19 +141,33 @@ class Media extends Database possibleDupes = [] arrayLength = array.length - ldiggity = (iteration, diggityCallback) => - dupe = {} + ldiggity = (iteration) => + dupe = [] + + # waterfall loop looking for duplicate matches based on filename i = iteration + 1 while i < arrayLength + # levenshtein algorithm to find fuzzy matches levenshtein.getAsync array[iteration].filtered_filename, array[i].filtered_filename, (err, distance) => - if distance <= 4 - console.log distance, array[iteration], array[i] - i++ - if i is arrayLength - ldiggity(iteration + 1) - if arrayLength is iteration + 1 - console.log 'donedone...' - callback() + # if a match occurs push to temp array + if distance is 0 and array[i].dupe is undefined + array[i].dupe = 1 + dupe.push array[i] + + # if we reached the last loop of loops callback! + if i is arrayLength - 1 and iteration is arrayLength - 2 + callback(possibleDupes) + + # if we reached the end of the while loop, push dupe array and + # continue to execute function + else if i is arrayLength - 1 + if dupe.length > 0 + array[iteration].dupe = 1 + dupe.push array[iteration] + possibleDupes.push dupe + ldiggity(iteration + 1) + i++ + if arrayLength > 0 ldiggity(0) else @@ -165,11 +179,12 @@ class Media extends Database # get all files with tag 'CORRUPT' @dbBulkFileGetTag '\'HEALTHY\'', (files) => @levenshtein files, (dupes) => + console.log dupes # console.log files - callback() + callback() deleteOthers: (callback) -> console.log '==> '.cyan.bold + 'delete files which are not video types' From 4c82327166f9779e5c7215f58014b1928bf8f126 Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Fri, 9 Jan 2015 12:31:05 -0800 Subject: [PATCH 03/25] added print debug --- lib/media.coffee | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/media.coffee b/lib/media.coffee index a676036..c1e08d2 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -147,6 +147,7 @@ class Media extends Database # waterfall loop looking for duplicate matches based on filename i = iteration + 1 while i < arrayLength + console.log arrayLength, iteration, i, array[i].path # levenshtein algorithm to find fuzzy matches levenshtein.getAsync array[iteration].filtered_filename, array[i].filtered_filename, (err, distance) => # if a match occurs push to temp array @@ -157,7 +158,7 @@ class Media extends Database # if we reached the last loop of loops callback! if i is arrayLength - 1 and iteration is arrayLength - 2 callback(possibleDupes) - + # if we reached the end of the while loop, push dupe array and # continue to execute function else if i is arrayLength - 1 From 629ec14c2300a3b6a65d97fad77457cc5499cb83 Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Fri, 9 Jan 2015 13:02:01 -0800 Subject: [PATCH 04/25] added levenshtein modules to package --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index b64e618..68ff914 100755 --- a/package.json +++ b/package.json @@ -20,6 +20,7 @@ "coffee-script": "^1.8.0", "colors": "^0.6.2", "commander": "git://github.com/zhiyelee/commander.js.git#buginfo", + "fast-levenshtein": "^1.0.6", "fs-extra": "^0.11.1", "lodash": "^2.4.1", "nconf": "^0.7.1", From bcf4fd7981a68999134314b41de8a1dba4d61b6f Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Fri, 9 Jan 2015 13:26:27 -0800 Subject: [PATCH 05/25] added dupe arg for testing --- bin/mediatidy-media | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/bin/mediatidy-media b/bin/mediatidy-media index ee61974..079beda 100755 --- a/bin/mediatidy-media +++ b/bin/mediatidy-media @@ -44,6 +44,20 @@ program.command('update') throw err if err console.log 'Media update complete.' +program.command('dupe') + .description('find dupes') + .action () -> + media = new Media + + # Perform action in series with async + async.series [ + (callback) -> + media.deleteDupes -> + callback() + ], (err, results) -> + throw err if err + console.log 'Dupe complete.' + program.parse process.argv program.help() if program.args.length is 0 From 74fdbfc7a372b6d68889c20a9fc3ef3c7db804a0 Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Fri, 9 Jan 2015 13:34:19 -0800 Subject: [PATCH 06/25] removed print debug; add . progress --- lib/media.coffee | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/media.coffee b/lib/media.coffee index c1e08d2..43834a7 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -138,6 +138,7 @@ class Media extends Database callback() levenshtein: (array, callback) -> + console.log possibleDupes = [] arrayLength = array.length @@ -147,7 +148,6 @@ class Media extends Database # waterfall loop looking for duplicate matches based on filename i = iteration + 1 while i < arrayLength - console.log arrayLength, iteration, i, array[i].path # levenshtein algorithm to find fuzzy matches levenshtein.getAsync array[iteration].filtered_filename, array[i].filtered_filename, (err, distance) => # if a match occurs push to temp array @@ -157,6 +157,7 @@ class Media extends Database # if we reached the last loop of loops callback! if i is arrayLength - 1 and iteration is arrayLength - 2 + process.stdout.write(".done\n") callback(possibleDupes) # if we reached the end of the while loop, push dupe array and @@ -166,6 +167,7 @@ class Media extends Database array[iteration].dupe = 1 dupe.push array[iteration] possibleDupes.push dupe + process.stdout.write(".") ldiggity(iteration + 1) i++ From 4ca018037290105d76ca1fa76097d6c359a2bdb4 Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Fri, 9 Jan 2015 14:22:37 -0800 Subject: [PATCH 07/25] find exact filtered filename dupes --- lib/media.coffee | 82 ++++++++++++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 31 deletions(-) diff --git a/lib/media.coffee b/lib/media.coffee index 43834a7..31c5fed 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -137,42 +137,62 @@ class Media extends Database @promptUserBulkDelete files, promptMessage, -> callback() - levenshtein: (array, callback) -> - console.log + findDupes: (array, callback) -> possibleDupes = [] + objectStore = {} arrayLength = array.length - ldiggity = (iteration) => - dupe = [] + superDuper = (iteration) => - # waterfall loop looking for duplicate matches based on filename - i = iteration + 1 - while i < arrayLength - # levenshtein algorithm to find fuzzy matches - levenshtein.getAsync array[iteration].filtered_filename, array[i].filtered_filename, (err, distance) => - # if a match occurs push to temp array - if distance is 0 and array[i].dupe is undefined - array[i].dupe = 1 - dupe.push array[i] - - # if we reached the last loop of loops callback! - if i is arrayLength - 1 and iteration is arrayLength - 2 - process.stdout.write(".done\n") - callback(possibleDupes) - - # if we reached the end of the while loop, push dupe array and - # continue to execute function - else if i is arrayLength - 1 - if dupe.length > 0 - array[iteration].dupe = 1 - dupe.push array[iteration] - possibleDupes.push dupe - process.stdout.write(".") - ldiggity(iteration + 1) - i++ + console.log 'inside', array[iteration].filtered_filename + objectStore[array[iteration].filtered_filename] = [] unless objectStore.hasOwnProperty(array[iteration].filtered_filename) + objectStore[array[iteration].filtered_filename].push array[iteration] + + if iteration is arrayLength - 1 + console.log 'Processing...' + for key of objectStore + if objectStore.hasOwnProperty(key) + if objectStore[key].length > 1 + possibleDupes.push objectStore[key] + callback possibleDupes + else + superDuper(iteration + 1) + + # for key of objectByString + # if objectByString.hasOwnProperty(key) + # if objectByString[key].length > 1 + # data.fileMatches.push objectByString[key] + # process.stdout.write "." + # process.stdout.write "...done\n" + + # # waterfall loop looking for duplicate matches based on filename + # i = iteration + 1 + # while i < arrayLength + # # levenshtein algorithm to find fuzzy matches + # levenshtein.getAsync array[iteration].filtered_filename, array[i].filtered_filename, (err, distance) => + # # if a match occurs push to temp array + # if distance is 0 and array[i].dupe is undefined + # array[i].dupe = 1 + # dupe.push array[i] + # + # # if we reached the last loop of loops callback! + # if i is arrayLength - 1 and iteration is arrayLength - 2 + # process.stdout.write(".done\n") + # callback(possibleDupes) + # + # # if we reached the end of the while loop, push dupe array and + # # continue to execute function + # else if i is arrayLength - 1 + # if dupe.length > 0 + # array[iteration].dupe = 1 + # dupe.push array[iteration] + # possibleDupes.push dupe + # process.stdout.write(".") + # ldiggity(iteration + 1) + # i++ if arrayLength > 0 - ldiggity(0) + superDuper(0) else console.log 'No files in database to check...' callback() @@ -181,7 +201,7 @@ class Media extends Database console.log '==> '.cyan.bold + 'delete duplicate lower quality video files' # get all files with tag 'CORRUPT' @dbBulkFileGetTag '\'HEALTHY\'', (files) => - @levenshtein files, (dupes) => + @findDupes files, (dupes) => console.log dupes From 83b06245ee55e3bedb29a1bae7b433973cec126b Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Fri, 9 Jan 2015 14:24:59 -0800 Subject: [PATCH 08/25] remove print debugging --- lib/media.coffee | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/media.coffee b/lib/media.coffee index 31c5fed..812db0c 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -144,7 +144,6 @@ class Media extends Database superDuper = (iteration) => - console.log 'inside', array[iteration].filtered_filename objectStore[array[iteration].filtered_filename] = [] unless objectStore.hasOwnProperty(array[iteration].filtered_filename) objectStore[array[iteration].filtered_filename].push array[iteration] From c1461c47349d24a9694f2faab60e8a4d5ff2adf0 Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Fri, 9 Jan 2015 14:27:06 -0800 Subject: [PATCH 09/25] debug large object --- lib/media.coffee | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/media.coffee b/lib/media.coffee index 812db0c..abb0d83 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -149,11 +149,11 @@ class Media extends Database if iteration is arrayLength - 1 console.log 'Processing...' - for key of objectStore - if objectStore.hasOwnProperty(key) - if objectStore[key].length > 1 - possibleDupes.push objectStore[key] - callback possibleDupes + # for key of objectStore + # if objectStore.hasOwnProperty(key) + # if objectStore[key].length > 1 + # possibleDupes.push objectStore[key] + # callback possibleDupes else superDuper(iteration + 1) From 181014ef517a083a47de1dfd99c91e79388232e2 Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Fri, 9 Jan 2015 14:29:56 -0800 Subject: [PATCH 10/25] testing object loop --- lib/media.coffee | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/media.coffee b/lib/media.coffee index abb0d83..f8549b1 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -149,6 +149,11 @@ class Media extends Database if iteration is arrayLength - 1 console.log 'Processing...' + + keys = [] + for i of objectStore + keys.push i if objectStore.hasOwnProperty(i) + console.log keys # for key of objectStore # if objectStore.hasOwnProperty(key) # if objectStore[key].length > 1 From 5a6aab7fc3fd141d6d663812c44946286b5a7679 Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Fri, 9 Jan 2015 14:30:28 -0800 Subject: [PATCH 11/25] fix indent --- lib/media.coffee | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/media.coffee b/lib/media.coffee index f8549b1..ae38206 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -151,8 +151,8 @@ class Media extends Database console.log 'Processing...' keys = [] - for i of objectStore - keys.push i if objectStore.hasOwnProperty(i) + for i of objectStore + keys.push i if objectStore.hasOwnProperty(i) console.log keys # for key of objectStore # if objectStore.hasOwnProperty(key) From 6d61b561734ed89298653aefdd3d473aa3314da1 Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Fri, 9 Jan 2015 14:56:56 -0800 Subject: [PATCH 12/25] iterate async with unique array --- lib/media.coffee | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/lib/media.coffee b/lib/media.coffee index ae38206..1635e2c 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -137,6 +137,21 @@ class Media extends Database @promptUserBulkDelete files, promptMessage, -> callback() + arrayUnique: (a, callback) -> + seen = {} + out = [] + len = a.length + j = 0 + i = 0 + + while i < len + item = a[i] + if seen[item] isnt 1 + seen[item] = 1 + out[j++] = item + i++ + callback out + findDupes: (array, callback) -> possibleDupes = [] objectStore = {} @@ -150,15 +165,21 @@ class Media extends Database if iteration is arrayLength - 1 console.log 'Processing...' - keys = [] - for i of objectStore - keys.push i if objectStore.hasOwnProperty(i) - console.log keys - # for key of objectStore - # if objectStore.hasOwnProperty(key) - # if objectStore[key].length > 1 - # possibleDupes.push objectStore[key] - # callback possibleDupes + uniqify = [] + asyncObject = (i) => + if objectStore.hasOwnProperty(array[i].filtered_filename) + if uniqify.indexOf(array[i].filtered_filename) is -1 + if objectStore[array[i].filtered_filename].length > 1 + uniqify.push array[i].filtered_filename + possibleDupes.push objectStore[array[i].filtered_filename] + + if i is arrayLength - 1 + console.log 'done!' + callback possibleDupes + else + asyncObject(i + 1) + asyncObject(0) + else superDuper(iteration + 1) From aad61f6bb789b42fb762000be6eb21de716b4117 Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Fri, 9 Jan 2015 17:54:05 -0800 Subject: [PATCH 13/25] should now handle massive objects with lodash --- lib/media.coffee | 100 +++++++++++------------------------------------ 1 file changed, 23 insertions(+), 77 deletions(-) diff --git a/lib/media.coffee b/lib/media.coffee index 1635e2c..321557a 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -5,7 +5,7 @@ async = require 'async' colors = require 'colors' prompt = require 'prompt' Database = require './db' -levenshtein = require 'fast-levenshtein' +_ = require 'lodash' class Media extends Database @@ -137,87 +137,33 @@ class Media extends Database @promptUserBulkDelete files, promptMessage, -> callback() - arrayUnique: (a, callback) -> - seen = {} - out = [] - len = a.length - j = 0 - i = 0 - - while i < len - item = a[i] - if seen[item] isnt 1 - seen[item] = 1 - out[j++] = item - i++ - callback out - findDupes: (array, callback) -> - possibleDupes = [] - objectStore = {} arrayLength = array.length - superDuper = (iteration) => - - objectStore[array[iteration].filtered_filename] = [] unless objectStore.hasOwnProperty(array[iteration].filtered_filename) - objectStore[array[iteration].filtered_filename].push array[iteration] - - if iteration is arrayLength - 1 - console.log 'Processing...' - - uniqify = [] - asyncObject = (i) => - if objectStore.hasOwnProperty(array[i].filtered_filename) - if uniqify.indexOf(array[i].filtered_filename) is -1 - if objectStore[array[i].filtered_filename].length > 1 - uniqify.push array[i].filtered_filename - possibleDupes.push objectStore[array[i].filtered_filename] - - if i is arrayLength - 1 - console.log 'done!' - callback possibleDupes - else - asyncObject(i + 1) - asyncObject(0) + if arrayLength > 0 - else - superDuper(iteration + 1) - - # for key of objectByString - # if objectByString.hasOwnProperty(key) - # if objectByString[key].length > 1 - # data.fileMatches.push objectByString[key] - # process.stdout.write "." - # process.stdout.write "...done\n" - - # # waterfall loop looking for duplicate matches based on filename - # i = iteration + 1 - # while i < arrayLength - # # levenshtein algorithm to find fuzzy matches - # levenshtein.getAsync array[iteration].filtered_filename, array[i].filtered_filename, (err, distance) => - # # if a match occurs push to temp array - # if distance is 0 and array[i].dupe is undefined - # array[i].dupe = 1 - # dupe.push array[i] - # - # # if we reached the last loop of loops callback! - # if i is arrayLength - 1 and iteration is arrayLength - 2 - # process.stdout.write(".done\n") - # callback(possibleDupes) - # - # # if we reached the end of the while loop, push dupe array and - # # continue to execute function - # else if i is arrayLength - 1 - # if dupe.length > 0 - # array[iteration].dupe = 1 - # dupe.push array[iteration] - # possibleDupes.push dupe - # process.stdout.write(".") - # ldiggity(iteration + 1) - # i++ + async.waterfall [ + (callback) -> + objectStore = {} + _.forEach array, (file, iteration) => + objectStore[file.filtered_filename] = [] unless objectStore.hasOwnProperty(file.filtered_filename) + objectStore[file.filtered_filename].push file + + if iteration is arrayLength - 1 + callback null, objectStore + (objectStore, callback) -> + possibleDupes = [] + objectLength = _.size(objectStore) + count = 1 + _.forEach objectStore, (fileCollection) => + if fileCollection.length > 1 + possibleDupes.push fileCollection + if count is objectLength - 1 + callback null, possibleDupes + count++ + ], (err, result) -> + callback result - if arrayLength > 0 - superDuper(0) else console.log 'No files in database to check...' callback() From b573e9532414dd63e96062789e2a371da5b3d414 Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Fri, 9 Jan 2015 17:54:38 -0800 Subject: [PATCH 14/25] removed unneeded modules --- package.json | 2 -- 1 file changed, 2 deletions(-) diff --git a/package.json b/package.json index 68ff914..3f7d7ec 100755 --- a/package.json +++ b/package.json @@ -20,10 +20,8 @@ "coffee-script": "^1.8.0", "colors": "^0.6.2", "commander": "git://github.com/zhiyelee/commander.js.git#buginfo", - "fast-levenshtein": "^1.0.6", "fs-extra": "^0.11.1", "lodash": "^2.4.1", - "nconf": "^0.7.1", "node-dir": "^0.1.6", "node-ffprobe": "^1.2.2", "prompt": "^0.2.14", From 038dfc52ced260fca3141bc8614a1c5412b1fdcf Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Tue, 13 Jan 2015 12:19:44 -0800 Subject: [PATCH 15/25] able to detect and delete exact dupes --- lib/media.coffee | 78 ++++++++++++++++++++++++++++++++++++++++++------ package.json | 1 + 2 files changed, 70 insertions(+), 9 deletions(-) diff --git a/lib/media.coffee b/lib/media.coffee index 321557a..837786c 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -6,6 +6,7 @@ colors = require 'colors' prompt = require 'prompt' Database = require './db' _ = require 'lodash' +prettyBytes = require 'pretty-bytes' class Media extends Database @@ -168,17 +169,70 @@ class Media extends Database console.log 'No files in database to check...' callback() + promptUserDupeDelete: (array, callback) -> + + _.forEach array, (file, j) => + if j is 0 + console.log "KEEP:".green, file.path, "width:", file.width, "height:", file.height, "size:", prettyBytes(file.size) + else + console.log "DELETE(?):".yellow, file.path, "width:", file.width, "height:", file.height, "size:", prettyBytes(file.size) + + prompt.message = "mediatidy".yellow + prompt.delimiter = ": ".green + prompt.properties = + yesno: + default: 'no' + message: 'Keep highest quality file; delete lower quality duplicates?' + required: true + warning: "Must respond yes or no" + validator: /y[es]*|n[o]?/ + + # Start the prompt + prompt.start() + + # get the simple yes or no property + prompt.get ['yesno'], (err, result) => + if result.yesno.match(/yes/i) + + _.forEach array.slice(1), (file, j) => + fs.unlink file.path, (err) => + throw err if err + console.log "DELETED:".red, file.path + + if array.slice(1).length is j + 1 + @dbBulkFileDelete array.slice(1), -> + console.log 'files deleted and removed from database...' + callback() + else + console.log "No files deleted..." + callback() + + dupeSort: (array, callback) -> + sortedDupes = [] + _.forEach array, (dupes, i) => + + # sort files by size + dupes.sort (a, b) -> + (a.size) - (b.size) + dupes.reverse() + sortedDupes.push dupes + + if array.length - 1 is i + callback sortedDupes + deleteDupes: (callback) -> console.log '==> '.cyan.bold + 'delete duplicate lower quality video files' - # get all files with tag 'CORRUPT' + # get all files with tag 'HEALTHY' @dbBulkFileGetTag '\'HEALTHY\'', (files) => @findDupes files, (dupes) => - console.log dupes - - - - # console.log files - callback() + @dupeSort dupes, (sortedDupes) => + deleteDupes = (iteration) => + @promptUserDupeDelete sortedDupes[iteration], -> + if sortedDupes.length is iteration + 1 + callback() + else + deleteDupes(iteration + 1) + deleteDupes(0) deleteOthers: (callback) -> console.log '==> '.cyan.bold + 'delete files which are not video types' @@ -258,11 +312,17 @@ class Media extends Database # otherwise continue else if probeData["streams"].length > 0 - # filter file name for future matching + # remove file extension filteredFileName = probeData.filename.replace(/\.\w*$/, "") + + # remove white space filteredFileName = filteredFileName.replace(/\s/g, "") + + # remove any non word character filteredFileName = filteredFileName.replace(/\W/g, "") - filteredFileName = filteredFileName.replace(/\d{4}.*$/g, "") + + # filteredFileName = filteredFileName.replace(/\d{4}.*$/g, "") + # make all uppercase filteredFileName = filteredFileName.toUpperCase() # set filename and filtered file name diff --git a/package.json b/package.json index 3f7d7ec..5cca5af 100755 --- a/package.json +++ b/package.json @@ -24,6 +24,7 @@ "lodash": "^2.4.1", "node-dir": "^0.1.6", "node-ffprobe": "^1.2.2", + "pretty-bytes": "^1.0.2", "prompt": "^0.2.14", "sqlite3": "^2.2.7" }, From 75275f622d34343e2d8c3ee4aa6f000dc1e60bdf Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Tue, 13 Jan 2015 13:01:10 -0800 Subject: [PATCH 16/25] added progress bar to probing function --- lib/media.coffee | 10 ++++++---- package.json | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/media.coffee b/lib/media.coffee index 837786c..6806815 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -7,6 +7,7 @@ prompt = require 'prompt' Database = require './db' _ = require 'lodash' prettyBytes = require 'pretty-bytes' +ProgressBar = require 'progress' class Media extends Database @@ -295,6 +296,9 @@ class Media extends Database # gather information about media files probedFiles = [] arrayLength = array.length + bar = new ProgressBar("probing files [:bar] :percent :etas", + total: arrayLength + ) singleFileProbe = (iteration) => probe array[iteration].path, (err, probeData) => @@ -314,7 +318,7 @@ class Media extends Database # remove file extension filteredFileName = probeData.filename.replace(/\.\w*$/, "") - + # remove white space filteredFileName = filteredFileName.replace(/\s/g, "") @@ -342,17 +346,15 @@ class Media extends Database # push object to array probedFiles.push array[iteration] + bar.tick() streamCallback() if arrayLength is iteration + 1 - process.stdout.write(".done\n") console.log probedFiles.length + ' out of ' + arrayLength + ' files probed...' callback probedFiles else - process.stdout.write('.') singleFileProbe(iteration + 1) if arrayLength > 0 - process.stdout.write('.') singleFileProbe(0) else console.log 'No files in database needed to be probed...' diff --git a/package.json b/package.json index 5cca5af..45805db 100755 --- a/package.json +++ b/package.json @@ -25,6 +25,7 @@ "node-dir": "^0.1.6", "node-ffprobe": "^1.2.2", "pretty-bytes": "^1.0.2", + "progress": "^1.1.8", "prompt": "^0.2.14", "sqlite3": "^2.2.7" }, From f92ed81b350951e90a06b3a8987127903f859adc Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Tue, 13 Jan 2015 13:17:42 -0800 Subject: [PATCH 17/25] updated progress message --- lib/media.coffee | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/media.coffee b/lib/media.coffee index 6806815..a72c9fe 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -296,7 +296,7 @@ class Media extends Database # gather information about media files probedFiles = [] arrayLength = array.length - bar = new ProgressBar("probing files [:bar] :percent :etas", + bar = new ProgressBar("probing files: Total: :total Current: :current Completion Time: :etas", total: arrayLength ) From 2aa7ae2d73399641308777ef1ca96c80b5f04410 Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Tue, 13 Jan 2015 13:46:30 -0800 Subject: [PATCH 18/25] fixed existing files removal from db bug --- lib/media.coffee | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/media.coffee b/lib/media.coffee index a72c9fe..93bf04f 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -119,7 +119,7 @@ class Media extends Database if arrayLength is iteration + 1 and missingFiles.length > 0 console.log missingFiles.length + ' out of ' + arrayLength + ' files removed from database...' callback missingFiles - if arrayLength is iteration + 1 and missingFiles.length is 0 + else if arrayLength is iteration + 1 and missingFiles.length is 0 console.log 'No files needed to be removed from database...' callback missingFiles else @@ -296,7 +296,7 @@ class Media extends Database # gather information about media files probedFiles = [] arrayLength = array.length - bar = new ProgressBar("probing files: Total: :total Current: :current Completion Time: :etas", + bar = new ProgressBar("Probing files: Total: :total Current: :current Completion Time: :etas", total: arrayLength ) From 418e6dc46e806c939691e4d1e84167f70cb94a8e Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Tue, 13 Jan 2015 13:53:55 -0800 Subject: [PATCH 19/25] updated progress bar --- lib/media.coffee | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/media.coffee b/lib/media.coffee index 93bf04f..b913955 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -296,7 +296,7 @@ class Media extends Database # gather information about media files probedFiles = [] arrayLength = array.length - bar = new ProgressBar("Probing files: Total: :total Current: :current Completion Time: :etas", + bar = new ProgressBar("Probing files: :current of :total :percent [:elapseds elapsed, eta :etas]", total: arrayLength ) From c3b5b5ca2b99bfed477b991495957a4c1b22f3f5 Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Tue, 13 Jan 2015 14:00:02 -0800 Subject: [PATCH 20/25] add searching message to add files --- lib/media.coffee | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/media.coffee b/lib/media.coffee index b913955..9c1d2da 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -43,6 +43,7 @@ class Media extends Database fs.exists basePath.path, (exists) => if exists + console.log basePath.path, 'searching for files...' # get files for given path dir.paths basePath.path, (err, paths) => throw err if err From 8c893115f83ae7a567276083e0e70405d999f050 Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Tue, 13 Jan 2015 14:04:00 -0800 Subject: [PATCH 21/25] added colon to search logging --- lib/media.coffee | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/media.coffee b/lib/media.coffee index 9c1d2da..208b702 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -43,7 +43,7 @@ class Media extends Database fs.exists basePath.path, (exists) => if exists - console.log basePath.path, 'searching for files...' + console.log basePath.path + ':', 'searching for files...' # get files for given path dir.paths basePath.path, (err, paths) => throw err if err From eab6cdc076e601c1dc5f63dce2e032a239ab5c40 Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Tue, 13 Jan 2015 16:58:23 -0800 Subject: [PATCH 22/25] fixed files table wipe option --- lib/config.coffee | 2 +- lib/db.coffee | 6 ++++++ lib/media.coffee | 25 +++++++++++++++++-------- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/lib/config.coffee b/lib/config.coffee index ffb9b7e..0b423fd 100755 --- a/lib/config.coffee +++ b/lib/config.coffee @@ -26,7 +26,7 @@ class Config extends Database # get the simple yes or no property prompt.get ['yesno'], (err, result) => if result.yesno.match(/yes/i) - @dbBulkFileDelete => + @dbFileTableDeleteAll => console.log "All files removed from mediatidy..." callback() else diff --git a/lib/db.coffee b/lib/db.coffee index e7e2ac7..6f5ed26 100755 --- a/lib/db.coffee +++ b/lib/db.coffee @@ -79,6 +79,12 @@ class Database db.close -> callback rows + dbFileTableDeleteAll: (callback) -> + db = new sqlite3.Database('data.db') + db.run "DELETE FROM MEDIAFILES", -> + db.close -> + callback() + dbPathAdd: (path, tag, callback) -> db = new sqlite3.Database(@dbFile) diff --git a/lib/media.coffee b/lib/media.coffee index 208b702..d28678e 100755 --- a/lib/media.coffee +++ b/lib/media.coffee @@ -172,6 +172,7 @@ class Media extends Database callback() promptUserDupeDelete: (array, callback) -> + arrayLength = array.length _.forEach array, (file, j) => if j is 0 @@ -196,15 +197,22 @@ class Media extends Database prompt.get ['yesno'], (err, result) => if result.yesno.match(/yes/i) - _.forEach array.slice(1), (file, j) => - fs.unlink file.path, (err) => - throw err if err - console.log "DELETED:".red, file.path + fileDelete = (iteration) => + if iteration is 0 + fileDelete(iteration + 1) + else + fs.unlink array[iteration].path, (err) => + throw err if err + console.log "DELETED:".red, array[iteration].path + + if arrayLength is iteration + 1 + @dbBulkFileDelete array.slice(1), -> + console.log 'files deleted and removed from database...' + callback() + else + fileDelete(iteration + 1) + fileDelete(0) - if array.slice(1).length is j + 1 - @dbBulkFileDelete array.slice(1), -> - console.log 'files deleted and removed from database...' - callback() else console.log "No files deleted..." callback() @@ -228,6 +236,7 @@ class Media extends Database @dbBulkFileGetTag '\'HEALTHY\'', (files) => @findDupes files, (dupes) => @dupeSort dupes, (sortedDupes) => + console.log sortedDupes deleteDupes = (iteration) => @promptUserDupeDelete sortedDupes[iteration], -> if sortedDupes.length is iteration + 1 From 88e40464f56c517dfefca67aa4361ed466e7cc2a Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Tue, 13 Jan 2015 17:01:50 -0800 Subject: [PATCH 23/25] fixed database location --- lib/db.coffee | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/db.coffee b/lib/db.coffee index 6f5ed26..0230f25 100755 --- a/lib/db.coffee +++ b/lib/db.coffee @@ -80,7 +80,7 @@ class Database callback rows dbFileTableDeleteAll: (callback) -> - db = new sqlite3.Database('data.db') + db = new sqlite3.Database(@dbFile) db.run "DELETE FROM MEDIAFILES", -> db.close -> callback() From 5c65ea7b1ae6ff168413d6b05c4f2bdd2f23ff2b Mon Sep 17 00:00:00 2001 From: Daniel Bohannon Date: Tue, 13 Jan 2015 17:52:05 -0800 Subject: [PATCH 24/25] simplified bins; cleaner prompting --- README.md | 7 ++-- bin/mediatidy | 96 +++++++++++++++++++++++++++++++++++++++++--- bin/mediatidy-config | 64 ----------------------------- bin/mediatidy-media | 63 ----------------------------- lib/config.coffee | 4 +- lib/media.coffee | 52 +++++++++++++----------- 6 files changed, 124 insertions(+), 162 deletions(-) delete mode 100755 bin/mediatidy-config delete mode 100755 bin/mediatidy-media diff --git a/README.md b/README.md index 825322d..71296ec 100755 --- a/README.md +++ b/README.md @@ -11,11 +11,10 @@ Point **mediatidy** at your movie or TV show directory and it will do the follow * Delete all non-video type files * Delete all corrupt/incomplete video files * Delete all sample files +* Process files to find dupes; keep the highest quality of the dupes and delete the rest Coming soon: * Delete files under a specified size -* Process files to find dupes; keep the highest resolution of the dupes and delete the rest -* Process files to find dupes; keep the largest sized file of the dupes (if they are the same resolution) and delete the rest * Delete empty directories