Skip to content

Commit

Permalink
Return error on unknown text file encoding (#175)
Browse files Browse the repository at this point in the history
* Implement checking if text file encoding was succesfully detected
* Format code
  • Loading branch information
Konijnendijk authored and dbashford committed Mar 10, 2019
1 parent 9ad4aa6 commit c2660fd
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 1 deletion.
11 changes: 10 additions & 1 deletion lib/extractors/text.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
var fs = require( 'fs' )
, iconv = require( 'iconv-lite' )
, jschardet = require( 'jschardet' )
, path = require( 'path' )
;

function extractText( filePath, options, cb ) {
Expand All @@ -11,7 +12,15 @@ function extractText( filePath, options, cb ) {
return;
}
try {
encoding = jschardet.detect( data ).encoding.toLowerCase();
var detectedEncoding = jschardet.detect( data ).encoding;
if ( !detectedEncoding ) {
error = new Error( 'Could not detect encoding for file named [[ ' +
path.basename( filePath ) + ' ]]' );
cb( error, null );
return;
}
encoding = detectedEncoding.toLowerCase();

decoded = iconv.decode( data, encoding );
} catch ( e ) {
cb( e );
Expand Down
10 changes: 10 additions & 0 deletions test/extract_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,16 @@ describe( 'textract', function() {
});
});

it( 'will error when .txt file encoding cannot be detected', function( done ) {
var filePath = path.join( __dirname, 'files', 'unknown-encoding.txt' );
fromFileWithPath( filePath, function( error ) {
expect( error ).to.be.an( 'object' );
expect( error.message ).to.be.a( 'string' );
expect( error.message ).to.eql( 'Could not detect encoding for file named [[ unknown-encoding.txt ]]' );
done();
});
});

it( 'will extract text specifically from a .css file', function( done ) {
var filePath = path.join( __dirname, 'files', 'css.css' );
fromFileWithPath( filePath, function( error, text ) {
Expand Down
Empty file added test/files/unknown-encoding.txt
Empty file.

0 comments on commit c2660fd

Please sign in to comment.