Skip to content

Commit b406dc7

Browse files
author
Julien Balian
committed
Init repo
0 parents  commit b406dc7

23 files changed

+542
-0
lines changed

Diff for: .gitignore

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Projects files
2+
keys.sh
3+
4+
# OS Artifacts
5+
*.DS_Store
6+
7+
# Vagrant Artifacts
8+
.vagrant/*
9+
10+
# NPM Artifacts
11+
node_modules/*
12+
npm-debug.log
13+
libpeerconnection.log
14+
15+
# Docco artifacts
16+
docs/*
17+
18+
# IDE Artifacts
19+
.idea/*
20+
*.iml
21+
*.sublime-*
22+
23+
# Jekyll artifacts
24+
_site/*

Diff for: .jshintrc

+75
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
2+
{
3+
"bitwise" : true,
4+
"curly" : true,
5+
"eqeqeq" : true,
6+
"immed" : true,
7+
"latedef" : true,
8+
"newcap" : true,
9+
"noarg" : true,
10+
"noempty" : true,
11+
"nonew" : true,
12+
"plusplus" : true,
13+
"regexp" : true,
14+
"undef" : true,
15+
"strict" : true,
16+
"trailing" : true,
17+
"unused" : true,
18+
"loopfunc" : true,
19+
20+
"asi" : false,
21+
"boss" : false,
22+
"debug" : false,
23+
"eqnull" : false,
24+
"es5" : false,
25+
"esnext" : false,
26+
"evil" : false,
27+
"expr" : false,
28+
"forin" : false,
29+
"funcscope" : false,
30+
"globalstrict" : false,
31+
"quotmark" : false,
32+
"iterator" : false,
33+
"lastsemic" : false,
34+
"laxbreak" : false,
35+
"laxcomma" : false,
36+
"multistr" : false,
37+
"onecase" : false,
38+
"proto" : false,
39+
"regexdash" : false,
40+
"scripturl" : false,
41+
"smarttabs" : false,
42+
"shadow" : false,
43+
"sub" : false,
44+
"supernew" : false,
45+
"validthis" : false,
46+
47+
"browser" : false,
48+
"couch" : false,
49+
"devel" : false,
50+
"dojo" : false,
51+
"jquery" : false,
52+
"mootools" : false,
53+
"node" : true,
54+
"nonstandard" : false,
55+
"prototypejs" : false,
56+
"rhino" : false,
57+
"wsh" : false,
58+
59+
"nomen" : false,
60+
"onevar" : false,
61+
"passfail" : false,
62+
"white" : false,
63+
64+
"maxerr" : 100,
65+
"predef" : [
66+
"describe",
67+
"it",
68+
"after",
69+
"before",
70+
"beforeEach"
71+
],
72+
"indent" : 2,
73+
"maxstatements" : 20,
74+
"maxcomplexity" : 10
75+
}

Diff for: Berksfile

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
site :opscode
2+
3+
cookbook 'apt', '~> 2.1.0'
4+
cookbook 'java', '~> 1.12.0'
5+
cookbook 'nodejs', '~> 1.3.0'

Diff for: Berksfile.lock

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"sources": {
3+
"apt": {
4+
"locked_version": "2.1.0"
5+
},
6+
"java": {
7+
"locked_version": "1.12.0"
8+
},
9+
"nodejs": {
10+
"locked_version": "1.3.0"
11+
},
12+
"windows": {
13+
"locked_version": "1.10.0"
14+
},
15+
"chef_handler": {
16+
"locked_version": "1.1.4"
17+
},
18+
"yum": {
19+
"locked_version": "2.3.4"
20+
},
21+
"build-essential": {
22+
"locked_version": "1.4.2"
23+
}
24+
}
25+
}

Diff for: README.md

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# pdf2htmlEX 0.10 Cluestr Hydrater
2+
> Visit http://cluestr.com for details about Cluestr.
3+
4+
Cluestr Hydrater for pdf files
5+
6+
# How to install?
7+
Vagrant up everything (`vagrant up`, `vagrant ssh`).
8+
9+
10+
Support: `[email protected]`.

Diff for: Vagrantfile

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# -*- mode: ruby -*-
2+
# vi: set ft=ruby :
3+
4+
# TODO: use our recipe for tika
5+
$script = <<SCRIPT
6+
//>script inline
7+
sudo apt-get update;
8+
sudo apt-get install -y autoconf libtool \
9+
libpng12-dev zlibc zlib1g-dev libtiff-dev libungif4-dev libjpeg-dev \
10+
libxml2-dev libuninameslist-dev xorg-dev subversion cvs gettext git \
11+
libpango1.0-dev libcairo2-dev python-dev uuid-dev libreadline-dev \
12+
cmake util-linux pkg-config libpng-dev \
13+
poppler-data git libfontconfig-dev gettext libcairo2-dev \
14+
libtool;
15+
16+
mkdir deps;cd deps;
17+
wget http://poppler.freedesktop.org/poppler-0.24.2.tar.xz;
18+
tar -xvJf poppler-0.24.2.tar.xz;
19+
cd poppler-0.24.2;
20+
./configure --enable-xpdf-headers;
21+
make;
22+
sudo make install;
23+
cd ..;
24+
25+
# may need a FreeType Install
26+
# and libSpiro
27+
28+
git clone https://github.com/coolwanglu/fontforge.git;
29+
cd fontforge;
30+
./autogen.sh;
31+
./configure;
32+
make;
33+
sudo make install;
34+
35+
cd ..;
36+
git clone git://github.com/coolwanglu/pdf2htmlEX.git;
37+
cd pdf2htmlEX;
38+
cmake . && make && sudo make install;
39+
echo "\nexport LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH;" > /home/vagrant/.bashrc
40+
SCRIPT
41+
42+
Vagrant.configure("2") do |config|
43+
config.vm.hostname = "cluestrhydraterpdf"
44+
45+
config.vm.box = "precise64"
46+
config.vm.box_url = "http://files.vagrantup.com/precise64.box"
47+
48+
config.vm.network :forwarded_port, host: 8000, guest: 8000
49+
50+
config.berkshelf.berksfile_path = "./Berksfile"
51+
config.berkshelf.enabled = true
52+
config.omnibus.chef_version = '11.6.0'
53+
54+
config.vm.provision :chef_solo do |chef|
55+
chef.run_list = [
56+
"recipe[apt]",
57+
"recipe[java]",
58+
"recipe[nodejs]",
59+
]
60+
61+
chef.json = {
62+
:java => {
63+
:install_flavor => "openjdk",
64+
:jdk_version => "7"
65+
}
66+
}
67+
end
68+
69+
config.vm.provision :shell,
70+
:inline => $script
71+
end

Diff for: app.js

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
'use strict';
2+
3+
// Load configuration and initialize server
4+
var restify = require('restify');
5+
var async = require('async');
6+
7+
var configuration = require('./config/configuration.js');
8+
var lib = require('./lib/hydrater-pdf');
9+
10+
var handlers = lib.handlers;
11+
var server = restify.createServer();
12+
13+
14+
// Middleware Goes Here
15+
server.use(restify.acceptParser(server.acceptable));
16+
server.use(restify.queryParser());
17+
server.use(restify.bodyParser());
18+
19+
server.queue = async.queue(lib.helpers.hydrate, configuration.concurrency);
20+
21+
// Load routes
22+
require('./config/routes.js')(server, handlers);
23+
24+
// Expose the server
25+
module.exports = server;

Diff for: bin/server

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/bin/env node
2+
// # bin/server
3+
// Launch a node app
4+
5+
// Load configuration
6+
var config = require("../config/configuration.js");
7+
var server = require('../app.js');
8+
9+
// Start the server
10+
var spawner = require('sspawn')(server, {port: config.port, workers: config.workers});
11+
spawner.start();

Diff for: config/configuration.js

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/**
2+
* @file Defines the hydrater settings.
3+
*/
4+
5+
// node_env can either be "development" or "production"
6+
var node_env = process.env.NODE_ENV || "development";
7+
var default_port = 8000;
8+
9+
var default_pdf_version = "1.4";
10+
var default_pdf_path = "/etc/pdf-" + default_pdf_version + "/pdf-app-" + default_pdf_version + ".jar";
11+
12+
// Number of pdf instance to run simultaneously per cluster
13+
var default_concurrency = 1;
14+
15+
if(node_env === "production") {
16+
default_port = 80;
17+
}
18+
19+
// Exports configuration
20+
module.exports = {
21+
env: node_env,
22+
port: process.env.PORT || default_port,
23+
workers: process.env.WORKERS || 2,
24+
25+
pdf_version: process.env.pdf_VERSION || default_pdf_version,
26+
pdf_path: process.env.pdf_PATH || default_pdf_path,
27+
concurrency: process.env.pdf_CONCURRENCY || default_concurrency
28+
};

Diff for: config/routes.js

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
'use strict';
2+
/**
3+
* @file Defines the routes available on the server
4+
5+
* Will define all availables exposed HTTP paths, and their methods (GET / POST / ...).
6+
*/
7+
8+
// Routes client requests to handlers
9+
module.exports = function router(server, handlers) {
10+
server.post('/hydrate', handlers.hydrater);
11+
};

Diff for: lib/hydrater-pdf/handlers/hydrater.js

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
'use strict';
2+
3+
/**
4+
* @file Define the pdf hydrater endpoint
5+
*
6+
* Extract generic information from files
7+
*
8+
*/
9+
10+
var restify = require('restify');
11+
12+
13+
/**
14+
* This handler receives a document on a POST request and process the document
15+
* to find the content, the language and the associated meta-datas
16+
*
17+
* @param {Object} req Request object from the client
18+
* @param {Object} res Response we want to return
19+
* @param {Function} next Callback to call once res has been populated.
20+
*/
21+
module.exports = function(req, res, next) {
22+
if(!req.params.file_path) {
23+
return next(new restify.BadMethodError('No file to process'));
24+
} else if(!req.params.callback) {
25+
return next(new restify.BadMethodError('No specified callback'));
26+
}
27+
28+
res.send(204);
29+
next();
30+
31+
// Push to queue
32+
require('../../../app.js').queue.push({
33+
file_path: req.params.file_path,
34+
callback: req.params.callback
35+
});
36+
37+
console.log("Queuing: ", req.params.file_path);
38+
};

0 commit comments

Comments
 (0)