-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Hello octocat!
- Loading branch information
0 parents
commit ef983bb
Showing
6 changed files
with
435 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
DjVuMaker Plugin for Calibre | ||
--- | ||
This plugin adds DJVU output conversion for Postscript documents (*.pdf, .ps). | ||
Files can be converted through a GUI menu and optionally as FileType hook for automatically converting imports of all Postscript documents. | ||
|
||
[Screenshot of GUI Menu & Job log] | ||
|
||
DjVU files are best for rendering large image-based documents (100+ pg. black-and-white-scaned archive-books) on markup-ebook readers with sub-300MiB memory and sub-2GiB storage capacities. | ||
The community-made Kindle readers kindlepdfviewer & koreader support DjVu files and can deliver noticable speed increase over PDF originals of such documents. | ||
Some massive 1000+ page books can only be read unsplit on these devices in DjVu format. | ||
|
||
PDF is still better for vector/markup based "ebooks" so this plugin will not try to convert documents it detects having less than 1 raster image per page. | ||
|
||
Under the Hood | ||
--- | ||
There are a few implementations of DjVU tools in the wild, but the fastest and most robust free one is the DjVuLibre suite and its Ghostscript plugin "GsDjvu". | ||
GsDjvu was witlessly licensed by AT&T with a "free" but GPL-incompatible license which makes pre-compiled packages impossible to publically distribute. | ||
Therefore both packages must be built by the user in a complicated procedure, which the plugin tries to facilitate when installed into Calibre. | ||
|
||
Installation | ||
--- | ||
* Right click the preferences button in calibre, select get new plugins, scroll down the list and choose the DjVuMaker plugin to install | ||
[screenshot] | ||
* Or, download the zip and install it from the shell | ||
```bash | ||
wget github.com/kfix/calibre_plugin_djvumaker/master/zipfile | ||
calibre-customize -b master.zip | ||
``` | ||
* Or++, clone this repo and install from source | ||
```bash | ||
git clone github.com/kfix/calibre-plugin-djvumaker | ||
cd calibre-plugin-djvumaker | ||
./__init__.py | ||
``` | ||
|
||
* [Required] Build the conversion programs | ||
```calibre-debug -R djvumaker install_deps``` | ||
**Only works on OSX for now** | ||
|
||
* [Optional] run a test conversion out-of-GUI against the included PDF. Press CTRL-C to exit. | ||
```calibre-debug -R djvumaker test.pdf``` | ||
|
||
* (Re)start Calibre and start converting your PDF books! | ||
|
||
Q: Why not write a "standard" Conversion Plugin for DjVU? | ||
--- | ||
Calibre's conversion API currently supports two pipelines: | ||
1) markup-based ebooks (book.xfmt > book.OEB > book.yfmt): useless for working on image-based scans. | ||
2) comic books (*.cbz): unusably slow for library books due to its over-reliance on Python for its transform pipeline. | ||
|
||
Only ghostscript+gsdjvu delivers usable conversion times for large scanned books. | ||
Patching Calibre's conversion API to add a 3rd pipeline to support them would be far more involved than this sub-500-line plugin (excluding these explanations :-). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,284 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
|
||
__license__ = 'GPL 3' | ||
__copyright__ = '2014, Joey Korkames <github.com/kfix>' | ||
__docformat__ = 'restructuredtext en' | ||
|
||
PLUGINNAME = 'djvumaker' # Name of the plugin | ||
|
||
#run this file directly to self-install the plugins to calibre | ||
if __name__ == '__main__': | ||
import os, sys | ||
os.system("calibre-customize -b %s" % os.path.dirname(os.path.abspath(__file__))) | ||
print 'plugin installed. test & debug with: `calibre-debug -r djvumaker -- test.pdf`' | ||
sys.exit() | ||
|
||
import errno, os, sys, subprocess, traceback | ||
from functools import partial | ||
from calibre.ebooks import ConversionError | ||
from calibre.constants import (isosx, iswindows, islinux, isbsd) | ||
from calibre.utils.ipc.simple_worker import fork_job, WorkerError | ||
from calibre import force_unicode, prints | ||
from calibre.ptempfile import PersistentTemporaryFile | ||
#from calibre.ebooks.pdf.pdftohtml import PDFTOHTML | ||
#from xml.dom.minidom import parse, parseString | ||
|
||
from calibre.customize import FileTypePlugin, InterfaceActionBase | ||
|
||
if iswindows and hasattr(sys, 'frozen'): | ||
subprocess.Popen = partial(subprocess.Popen, creationflags=0x08) # CREATE_NO_WINDOW=0x08 so that no ugly console is popped up | ||
if (islinux or isbsd or isosx) and getattr(sys, 'frozen', False): | ||
pass #shell messes up escaping of spaced filenames to the script #popen = partial(subprocess.Popen, shell=True) | ||
|
||
# -- DJVU conversion utilities wrapper functions -- see http://en.wikisource.org/wiki/User:Doug/DjVu_Files | ||
|
||
def c44(srcdoc, cmdflags=[], log=None): | ||
#part of djvulibre, converts jpegs to djvu | ||
# then combine with djvm -c book.djvu pageN.djvu pageN+1.djvu .. | ||
#files end up being huge | ||
pass | ||
|
||
def cjb2(srcdoc, cmdflags=[], log=None): | ||
#part of djvulibre, converts tiff to djvu | ||
# need to bitone/greyscale the tiff beforehand | ||
# gs -sDEVICE=pdfwrite -sColorConversionStrategy=Gray -dProcessColorModel=DeviceGray -dOverrideICC -f input.pdf -o output.pdf | ||
#osx has Quartz and a little cocoa app can break down a pdf into tiffs: http://lists.apple.com/archives/cocoa-dev/2002/Jun/msg00729.html | ||
# http://scraplab.net/print-production-with-quartz-and-cocoa/ | ||
# then combine with djvm -c book.djvu pageN.djvu pageN+1.djvu .. | ||
pass | ||
|
||
def minidjvu(srcdoc, cmdflags=[], log=None): | ||
#http://minidjvu.sourceforge.net/ | ||
#^foss license, supports raw TIFF images | ||
#https://code.google.com/p/mupdf-converter/source/browse/trunk/MuPDF/MuPDFConverter.cs | ||
pass | ||
|
||
def k2pdfopt(srcdoc, cmdflags=[], log=None): | ||
#brilliant, if quirky, app for reflowing a raster doc to layout suitable on e-readers, | ||
#reads DJVUs but only writes PDFs | ||
pass | ||
|
||
def pdf2djvu(srcdoc, cmdflags=[], log=None): | ||
#https://code.google.com/p/pdf2djvu/ | ||
#pdf2djvu -o output_file input_file | ||
pass | ||
|
||
def mupdf(srcdoc, cmdflags=[], log=None): | ||
#https://github.com/Ernest0x/mupdf | ||
#can dump pdfs into tiffs and vice versa | ||
#mutool extract | ||
pass | ||
|
||
def djvudigital(srcdoc, cmdflags=[], log=None): | ||
#only supports pdf and ps, tricky to get compiled and installed, but is the fastest converter | ||
#win32: http://code.google.com/p/osspack32/downloads/detail?name=ghostscript8.71_gsdjvu1.5_src.7z | ||
#gentoo: emerge ghostscript-gpl --use-djvu | ||
#osx: brew install --build-from-source --with-djvu ghostscript | ||
|
||
if 'CALIBRE_WORKER' in os.environ: | ||
cmdbuf=0 #running as a fork_job, all process output piped to logfile, so don't buffer | ||
else: | ||
cmdbuf=1 #line-buffered | ||
|
||
if log: #divert our streaming output printing to the caller's logger | ||
prints = partial(log.prints, 1) #log.print(INFO, yaddayadda) | ||
else: | ||
#def prints(p): print p | ||
prints = sys.stdout.write | ||
#prints = sys.__stdout__.write #unredirectable original fd | ||
#`pip sarge` makes streaming subprocesses easier than sbp.Popen | ||
|
||
bookname = os.path.splitext(os.path.basename(srcdoc))[0] | ||
with PersistentTemporaryFile(bookname + '.djvu') as djvu: #note, PTF() is from calibre | ||
try: | ||
cmd = ['djvudigital'] + cmdflags + [srcdoc, djvu.name] | ||
prints('%s: subprocess: %s' % (PLUGINNAME, cmd)) | ||
proc = subprocess.Popen(cmd, bufsize=cmdbuf, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) #stderr:csepdjvu, stdout: ghostscript & djvudigital | ||
if cmdbuf > 0: #stream the output | ||
while proc.poll() is None: prints(proc.stdout.readline()) | ||
for line in proc.stdout.read().split('\n'): prints(line) #remainder of post-polled buffer | ||
else: | ||
proc.communicate() | ||
prints('%s: subprocess returned %s' % (PLUGINNAME, proc.returncode)) | ||
except OSError as e: | ||
if e.errno == errno.ENOENT: | ||
prints('%s: $PATH[%s]/djvudigital not available: you may need to install djvu' % (self.name, os.environ['PATH'])) | ||
return False | ||
|
||
if proc.returncode != 0: return False #10 djvudigital shell/usage error | ||
|
||
return djvu.name | ||
|
||
def is_rasterbook_pdfimages(path): | ||
'''identify whether this is a raster doc (ie. a scan) or a digitally authored text+graphic doc. skip conversion if source doc is not mostly raster-image based | ||
ascertain this by checking whether there are as many image objects in the PDF as there are pages +/- 5 (google books and other scanners add pure-text preambles to their pdfs)''' | ||
is_raster = False | ||
#from calibre.ebooks.pdf.pdftohtml import PDFTOHTML | ||
#^^need to get poppler-utils' pdfimages added to calibre makefile | ||
# http://cgit.freedesktop.org/poppler/poppler/tree/utils/pdfimages.cc | ||
try: | ||
pdfimages = subprocess.check_output(["pdfimages", "-f", "2", "-l", "30", "-q", "-list", path]) #use pages 2-30 for the test, don't waste time parsing giant pdfs | ||
pdfimages = [l for l in pdfimages.splitlines()[2:] if l.split()[2] == 'image'] #filter out real rgb images from masks and other weird entities | ||
if len(pdfimages) > 0: #we found actual rasters in the PDF | ||
is_raster = reduce(lambda p,i: p - i < 5, [int(i) for i in pdfimages.pop().split()[:2]]) #get the page# & image# of last obj scanned, cmp them +/- 5 | ||
except OSError as e: | ||
if e.errno == errno.ENOENT: | ||
prints('%s: $PATH[%s]/pdfimages not available: you may need to install poppler-utils' % (self.name, os.environ['PATH'])) | ||
prints(traceback.format_exc()) | ||
raise #ConversionError | ||
except subprocess.CalledProcessError as e: | ||
prints('%s: subprocess failed with return code %d:\n\t%s\n' % (self.name, e.returncode, ' '.join(e.cmd), e.output)) | ||
prints(traceback.format_exc()) | ||
raise #ConversionError | ||
|
||
return is_raster | ||
|
||
# -- Calibre Plugin class -- | ||
|
||
class DJVUmaker(FileTypePlugin, InterfaceActionBase): #multiple inheritance for gui hooks! | ||
name = PLUGINNAME # Name of the plugin | ||
description = 'Convert raster-based document files (Postscript, PDF) to DJVU with GUI button and on-import' | ||
supported_platforms = ['linux', 'osx', 'windows'] # Platforms this plugin will run on | ||
author = 'Joey Korkames' # The author of this plugin | ||
version = (1, 0, 0) # The version number of this plugin | ||
file_types = set(['pdf','ps', 'eps']) # The file types that this plugin will be automatically applied to | ||
on_postimport = True # Run this plugin after books are addded to the database | ||
minimum_calibre_version = (1, 0, 0) #needs the new db api and id bugfix | ||
actual_plugin = 'calibre_plugins.djvumaker.gui:ConvertToDJVUAction' #InterfaceAction plugin location | ||
|
||
def customization_help(self, gui=False): | ||
return 'Enter additional `djvudigital --help` command-flags here:' # os.system('MANPAGER=cat djvudigital --help') | ||
#todo: make custom config widget so we can have attrs for each of the wrappers: djvudigital minidjvu, c44, etc. | ||
#todo: `man2html djvumaker` and gui=True for comprehensive help? | ||
|
||
def cli_main(self, args): | ||
def prints(p): print p | ||
id_or_path = args[1] | ||
|
||
if id_or_path.isdigit(): | ||
'`calibre-debug -r 123 #id(123).pdf` -> tempfile(id(123).djvu)' | ||
self.postimport(id_or_path, fmt) | ||
elif id_or_path == "convert_all": | ||
'`calibre-debug -r djvumaker convert_all`' | ||
prints("Copy-convert all PDFs to DJVU? (press CTRL+C to abort)") | ||
icl_user = raw_input('') | ||
from calibre.library import db | ||
db = db() # initialize calibre library database | ||
for book_id in list(db.all_ids()): | ||
if db.has_format(book_id, 'DJVU', index_is_id=True): | ||
continue | ||
if db.has_format(book_id, 'PDF', index_is_id=True): | ||
db.run_plugins_on_postimport(book_id, 'pdf') | ||
continue | ||
elif id_or_path == "install_deps": | ||
if isosx: | ||
os.system("install_deps_osx.sh") #hmm, need to extract this from the self.plugin_path zipfile | ||
#todo: make more install scripts | ||
elif islinux: raise | ||
elif iswindows: raise | ||
elif isbsd: raise | ||
else: | ||
'`calibre-debug -r djvumaker test.pdf` -> tempfile(test.djvu)' | ||
if is_rasterbook_pdfimages(id_or_path): | ||
djvu = djvudigital(id_or_path) | ||
if djvu: | ||
prints("\n\nopening djvused in subshell, press Ctrl+D to exit and delete '%s'\n\n" % djvu) | ||
#de-munge the tty | ||
sys.stdin = sys.__stdin__ | ||
sys.stdout = sys.__stdout__ | ||
sys.stderr = sys.__stderr__ | ||
os.system("stat '%s'" % djvu) | ||
os.system("djvused -e dump '%s'" % djvu) | ||
os.system("djvused -v '%s'" % djvu) | ||
|
||
# -- calibre filetype plugin mandatory methods -- | ||
|
||
def run(self, path_to_ebook): | ||
return path_to_ebook #noop | ||
|
||
def postimport(self, book_id, book_format, db=None, log=None, fork_job=True): | ||
if log: #divert our printing to the caller's logger | ||
prints = partial(log.prints, 1) #log.print(INFO, yaddayadda) | ||
else: | ||
def prints(p): print p+'\n' | ||
|
||
if sys.__stdin__.isatty(): | ||
fork_job = False #probably being run as `calibredb add`, do all conversions in main loop | ||
rpc_refresh = True #use the calibre RPC to signal a GUI refresh | ||
|
||
if db is None: | ||
from calibre.library import db | ||
db = db() # initialize calibre library database | ||
|
||
if db.has_format(book_id, 'DJVU', index_is_id=True): | ||
prints("%s: already have 'DJVU' format document for book ID #%s" % (PLUGINNAME, book_id)) | ||
return None #don't auto convert, we already have a DJVU for this document | ||
|
||
path_to_ebook = db.format_abspath(book_id, book_format, index_is_id=True) | ||
|
||
if book_format == 'pdf': | ||
if is_rasterbook_pdfimages(path_to_ebook): | ||
pass #should add a 'scanned' or 'djvumaker' tag | ||
else: | ||
#this is a marked-up/vector-based pdf, no advantages to having another copy in DJVU format | ||
prints("%s: %s document from book ID #%s determined to be a markup-based ebook, not converting to DJVU" % (self.name, book_format, book_id)) | ||
return None #no-error in job panel | ||
#todo: test the DPI to determine if a document is from a broad-sheeted book. if so, queue up k2pdfopt to try and chunk the content appropriately to letter size | ||
|
||
prints("%s: scheduling new %s document from book ID #%s for post-import DJVU conversion: %s" % (self.name, book_format, book_id, path_to_ebook)) | ||
|
||
cmdflags = [] | ||
if self.site_customization is not None: cmdflags.extend(self.site_customization.split()) | ||
#`--gsarg=-dFirstPage=1,-dLastPage=1` how to limit page range | ||
#more gsargs: https://leanpub.com/pdfkungfoo | ||
|
||
if fork_job: #useful for not blocking calibre GUI when large PDFs are dropped into the automatic-import-folder | ||
try: | ||
#https://github.com/kovidgoyal/calibre/blob/master/src/calibre/utils/ipc/simple_worker.py #dispatch API for Worker() | ||
#src/calibre/utils/ipc/launch.py #Worker() uses sbp.Popen to run a second Python to a logfile | ||
#note that Calibre bungs the python loader to check the plugin directory when modules with calibre_plugin. prefixed are passed | ||
# https://github.com/kovidgoyal/calibre/blob/master/src/calibre/customize/zipplugin.py#L192 | ||
jobret = fork_job('calibre_plugins.%s' % self.name, 'djvudigital', | ||
args=[path_to_ebook, cmdflags, log], | ||
kwargs={}, | ||
env={'PATH': os.environ['PATH'] + ':/usr/local/bin'}, #djvu and poppler-utils on osx | ||
timeout=600) #todo: determine a resonable timeout= based on filesize or make a heartbeat= check | ||
|
||
except WorkerError as e: | ||
prints('%s: djvudigital background conversion failed: \n%s' % (self.name,force_unicode(e.orig_tb))) | ||
raise #ConversionError | ||
except: | ||
prints(traceback.format_exc()) | ||
raise | ||
|
||
#dump djvudigital output logged in file by the Worker to calibre proc's (gui or console) log/stdout | ||
with open(jobret['stdout_stderr'], 'rb') as f: | ||
raw = f.read().strip() | ||
prints(raw) | ||
|
||
if jobret['result']: | ||
djvu = jobret['result'] | ||
else: | ||
WorkerError("djvu conversion error: %s" % jobret['result']) | ||
#elif hasattr(self, gui): #if we have the calibre gui running, we can give it a threadedjob and not use fork_job | ||
else: #!fork_job & !gui | ||
djvu = djvudigital(path_to_ebook, cmdflags, log) | ||
|
||
if djvu: | ||
db.new_api.add_format(book_id, 'DJVU', djvu, run_hooks=True) | ||
prints("%s: added new 'DJVU' document to book ID #%s" % (PLUGINNAME, book_id)) | ||
|
||
if sys.__stdin__.isatty(): | ||
#update calibre gui Out-Of-Band. Like if we were run as a command-line scripted import | ||
#this resets current gui views/selections, no cleaner way to do it :-( | ||
from calibre.utils.ipc import RC | ||
t = RC(print_error=False) | ||
t.start() | ||
t.join(3) | ||
if t.done: #GUI is running | ||
t.conn.send('refreshdb:') | ||
t.conn.close() | ||
prints("%s: signalled Calibre GUI refresh" % PLUGINNAME) | ||
else: | ||
raise #ConversionError |
Oops, something went wrong.