Skip to content

Commit

Permalink
fixes #78, forcing all types to lower case
Browse files Browse the repository at this point in the history
  • Loading branch information
dbashford committed Mar 28, 2016
1 parent 549061b commit e719671
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
6 changes: 4 additions & 2 deletions lib/extract.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ var fs = require( 'fs' )
, totalExtractors = 0
, satisfiedExtractors = 0
, hasInitialized = false

, WHITELIST_PRESERVE_LINEBREAKS = /[^A-Za-z\x80-\xFF 0-9 \u2018\u2019\u201C|\u201D\u2026 \u00C0-\u1FFF \u2C00-\uD7FF \.,\?""!@#\$%\^&\*\(\)-_=\+;:<>\/\\\|\}\{\[\]`~'-\w\n\r]*/g
, WHITELIST_STRIP_LINEBREAKS = /[^A-Za-z\x80-\xFF 0-9 \u2018\u2019\u201C|\u201D\u2026 \u00C0-\u1FFF \u2C00-\uD7FF \.,\?""!@#\$%\^&\*\(\)-_=\+;:<>\/\\\|\}\{\[\]`~'-\w]*/g
, SINGLE_QUOTES = /[\u2018|\u2019]/g
Expand All @@ -22,6 +22,7 @@ var registerExtractor = function( extractor ) {
if ( extractor.types ) {
extractor.types.forEach( function( type ) {
if ( typeof type === "string" ) {
type = type.toLowerCase();
typeExtractors[type] = extractor.extract;
} else {
if ( type instanceof RegExp ) {
Expand All @@ -35,7 +36,7 @@ var registerExtractor = function( extractor ) {
var registerFailedExtractor = function( extractor, failedMessage ) {
if ( extractor.types ) {
extractor.types.forEach( function( type ) {
failedExtractorTypes[type] = failedMessage;
failedExtractorTypes[type.toLowerCase()] = failedMessage;
});
}
};
Expand Down Expand Up @@ -100,6 +101,7 @@ var initializeExtractors = function(options) {
};

var findExtractor = function( type, filePath ) {
type = type.toLowerCase();
if ( typeExtractors[type] ) {
return typeExtractors[type];
} else {
Expand Down
11 changes: 11 additions & 0 deletions test/general_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,17 @@ describe('textract', function() {

});

it('can handle types of varying cases', function(done) {
var filePath = path.join(__dirname, 'files', 'new docx(1).docx');
fromFileWithMimeAndPath('appLication/vnd.openXMLformats-Officedocument.WordProcessingml.Document', filePath, function(error, text) {
expect(error).to.be.null;
expect(text).to.be.a('string');
expect(text.substring(0, 38)).to.eql( "This is a test Just so you know: Lorem" );
done();
});
});


it('can handle a text file with parens', function(done) {
var filePath = path.join(__dirname, 'files', 'new doc(1).txt');
fromFileWithPath(filePath, function( error, text ) {
Expand Down

0 comments on commit e719671

Please sign in to comment.