diff --git a/invenio/modules/encoder/daemon.py b/invenio/modules/encoder/daemon.py index db526f80bb..a151e52721 100644 --- a/invenio/modules/encoder/daemon.py +++ b/invenio/modules/encoder/daemon.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of Invenio. -# Copyright (C) 2011 CERN. +# Copyright (C) 2011, 2015 CERN. # # Invenio is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as @@ -17,43 +17,44 @@ # along with Invenio; if not, write to the Free Software Foundation, Inc., # 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -"""Bibencode daemon submodule""" +"""Bibencode daemon submodule.""" import os + import re + import shutil -from invenio.utils.json import json_decode_file + +from invenio.base.globals import cfg +from invenio.legacy.bibsched.bibtask import task_get_task_param, \ + task_low_level_submission, task_update_progress, write_message from invenio.modules.encoder.utils import generate_timestamp, getval -from invenio.legacy.bibsched.bibtask import ( - task_low_level_submission, - task_get_task_param, - write_message, - task_update_progress - ) -from invenio.modules.encoder.config import ( - CFG_BIBENCODE_DAEMON_DIR_NEWJOBS, - CFG_BIBENCODE_DAEMON_DIR_OLDJOBS - ) +from invenio.utils.json import json_decode_file + # Globals used to generate a unique task name _TASKID = None _TIMESTAMP = generate_timestamp() _NUMBER = 0 + def has_signature(string_to_check): - """ Checks if the given string has the signature of a job file - """ + """Check if the given string has the signature of a job file.""" sig_re = re.compile("^.*\.job$") if sig_re.match(string_to_check): return True else: return False + def job_to_args(job): - """ Maps the key-value pairs of the job file to CLI arguments for a + """Map the key-value pairs. + + Map the key-value pairs of the job file to CLI arguments for a low-level task submission - @param job: job dictionary to process - @type job: dictionary + + :param job: job dictionary to process + :type job: dictionary """ argument_mapping = { 'profile': '-p', @@ -83,40 +84,44 @@ def job_to_args(job): ## Set a unique name for the task, this way there can be more than ## one bibencode task running at the same time task_unique_name = '%(mode)s-%(tid)d-%(ts)s-%(num)d' % { - 'mode': job['mode'], - 'tid': _TASKID, - 'ts': _TIMESTAMP, - 'num': _NUMBER - } + 'mode': job['mode'], + 'tid': _TASKID, + 'ts': _TIMESTAMP, + 'num': _NUMBER + } args.append('-N') args.append(task_unique_name) ## Transform the pairs of the job dictionary to CLI arguments for key in job: if key in argument_mapping: - args.append(argument_mapping[key]) # This is the new key - args.append(job[key]) # This is the value from the job file + args.append(argument_mapping[key]) # This is the new key + args.append(job[key]) # This is the value from the job file return args + def launch_task(args): - """ Launches the job as a new bibtask through the low-level submission + """Launch the job. + + Launche the job as a new bibtask through the low-level submission interface """ return task_low_level_submission('bibencode', 'bibencode:daemon', *args) + def process_batch(jobfile_path): """ Processes the job if it is a batch job - @param jobfile_path: fullpath to the batchjob file - @type jobfile_path: string - @return: True if the task was successfully launche, False if not - @rtype: bool + :param jobfile_path: fullpath to the batchjob file + :type jobfile_path: string + :return: True if the task was successfully launche, False if not + :rtype: bool """ args = [] task_unique_name = '%(mode)s-%(tid)d-%(ts)s-%(num)d' % { - 'mode': 'batch', - 'tid': _TASKID, - 'ts': _TIMESTAMP, - 'num': _NUMBER - } + 'mode': 'batch', + 'tid': _TASKID, + 'ts': _TIMESTAMP, + 'num': _NUMBER + } args.append('-N') args.append(task_unique_name) args.append('-m') @@ -125,14 +130,16 @@ def process_batch(jobfile_path): args.append(jobfile_path) return launch_task(args) -def watch_directory(new_job_dir=CFG_BIBENCODE_DAEMON_DIR_NEWJOBS, - old_job_dir=CFG_BIBENCODE_DAEMON_DIR_OLDJOBS): + +def watch_directory(new_job_dir=None, old_job_dir=None): """ Checks a folder job files, parses and executes them - @param new_job_dir: path to the directory with new jobs - @type new_job_dir: string - @param old_job_dir: path to the directory where the old jobs are moved - @type old_job_dir: string + :param new_job_dir: path to the directory with new jobs + :type new_job_dir: string + :param old_job_dir: path to the directory where the old jobs are moved + :type old_job_dir: string """ + new_job_dir = new_job_dir or cfg['CFG_BIBENCODE_DAEMON_DIR_NEWJOBS'] + old_job_dir = old_job_dir or cfg['CFG_BIBENCODE_DAEMON_DIR_OLDJOBS'] global _NUMBER, _TASKID write_message('Checking directory %s for new jobs' % new_job_dir) task_update_progress('Checking for new jobs') diff --git a/invenio/modules/encoder/encode.py b/invenio/modules/encoder/encode.py index 4d637220bf..be7c453a81 100644 --- a/invenio/modules/encoder/encode.py +++ b/invenio/modules/encoder/encode.py @@ -19,36 +19,25 @@ """BibEncode encoding submodule""" -from six import iteritems - -from invenio.base.globals import cfg -from invenio.legacy.bibsched.bibtask import ( - write_message, - task_update_progress, - ) -from invenio.modules.encoder.config import ( - CFG_BIBENCODE_FFMPEG_ENCODING_LOG, - CFG_BIBENCODE_FFMPEG_PASSLOGFILE_PREFIX, - CFG_BIBENCODE_FFMPEG_METADATA_ARGUMENT, - CFG_BIBENCODE_FFMPEG_ENCODE_TIME - ) -from invenio.modules.encoder.utils import ( - timecode_to_seconds, - generate_timestamp, - chose, - getval, - aspect_string_to_float - ) -from invenio.modules.encoder.profiles import get_encoding_profile -from invenio.modules.encoder.metadata import ( - ffprobe_metadata, - mediainfo_metadata - ) -import time import os + import subprocess + +import time + import uuid +from invenio.base.globals import cfg +from invenio.legacy.bibsched.bibtask import task_update_progress, write_message +from invenio.modules.encoder.metadata import ffprobe_metadata, \ + mediainfo_metadata +from invenio.modules.encoder.profiles import get_encoding_profile +from invenio.modules.encoder.utils import aspect_string_to_float, chose, \ + generate_timestamp, getval, timecode_to_seconds + +from six import iteritems + + def _filename_log(output_filename, nofpass=1): """ Constructs the filename including path for the encoding err file @param output_filename: name of the video file to be created @@ -60,8 +49,8 @@ def _filename_log(output_filename, nofpass=1): """ fname = os.path.split(output_filename)[1] fname = os.path.splitext(fname)[0] - return CFG_BIBENCODE_FFMPEG_ENCODING_LOG % (generate_timestamp() + - "_" + fname + "_%d" % nofpass) + return cfg['CFG_BIBENCODE_FFMPEG_ENCODING_LOG'] % \ + (generate_timestamp() + "_" + fname + "_%d" % nofpass) def determine_aspect(input_file): """ Checks video metadata to find the display aspect ratio. @@ -408,8 +397,8 @@ def insert(key, value): for key, value in iteritems(metadata): if value is not None: meta_arg = ( - CFG_BIBENCODE_FFMPEG_METADATA_ARGUMENT % (key, value) - ) + cfg['CFG_BIBENCODE_FFMPEG_METADATA_ARGUMENT'] % \ + (key, value)) insert("-metadata", meta_arg) ## Special argument additions if passes == 1: @@ -469,9 +458,10 @@ def graphical(value): ## try to parse the status for line in reversed(lines): - if CFG_BIBENCODE_FFMPEG_ENCODE_TIME.match(line): + if cfg['CFG_BIBENCODE_FFMPEG_ENCODE_TIME'].match(line): time_string = ( - CFG_BIBENCODE_FFMPEG_ENCODE_TIME.match(line).groups() + cfg['CFG_BIBENCODE_FFMPEG_ENCODE_TIME'].match( + line).groups() )[0] break filehandle.close() @@ -521,9 +511,9 @@ def graphical(value): ## Run the encoding - pass_log_file = CFG_BIBENCODE_FFMPEG_PASSLOGFILE_PREFIX % ( - os.path.splitext(os.path.split(input_file)[1])[0], - str(uuid.uuid4())) + pass_log_file = cfg['CFG_BIBENCODE_FFMPEG_PASSLOGFILE_PREFIX'] % ( + os.path.splitext(os.path.split(input_file)[1])[0], + str(uuid.uuid4())) no_error = True ## For every encoding pass to do for apass in range(0, passes): @@ -614,3 +604,4 @@ def get_res_for_weird_aspect(width, aspect, avail_res): return [str(width) + 'x' + str(height)] else: return possible_res + diff --git a/invenio/modules/encoder/extract.py b/invenio/modules/encoder/extract.py index d113ba0239..9b28dfd96a 100644 --- a/invenio/modules/encoder/extract.py +++ b/invenio/modules/encoder/extract.py @@ -17,35 +17,23 @@ # along with Invenio; if not, write to the Free Software Foundation, Inc., # 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -"""BibEncode frame extraction module. -""" +"""BibEncode frame extraction module.""" -__revision__ = "$Id$" +import os -from invenio.modules.encoder.config import ( - CFG_BIBENCODE_FFMPEG_EXTRACT_COMMAND, - ) -from invenio.legacy.bibsched.bibtask import ( - task_update_progress, - write_message - ) -from invenio.modules.encoder.utils import ( - timecode_to_seconds, - seconds_to_timecode, - is_timecode, - is_seconds, - normalize_string, - getval, - chose - ) -from invenio.modules.encoder.metadata import ( - ffprobe_metadata - ) import subprocess -import os + +from invenio.base.globals import cfg +from invenio.legacy.bibsched.bibtask import task_update_progress, write_message +from invenio.modules.encoder.encode import \ + determine_resolution_preserving_aspect +from invenio.modules.encoder.metadata import ffprobe_metadata from invenio.modules.encoder.profiles import get_extract_profile -from invenio.modules.encoder.encode import determine_resolution_preserving_aspect -import re +from invenio.modules.encoder.utils import chose, is_seconds, is_timecode, \ + seconds_to_timecode, timecode_to_seconds + +__revision__ = "$Id$" + # rename size to resolution def extract_frames(input_file, output_file=None, size=None, positions=None, @@ -220,7 +208,7 @@ def extract_frames(input_file, output_file=None, size=None, positions=None, #-------------# ## Build the command for ffmpeg - command = (CFG_BIBENCODE_FFMPEG_EXTRACT_COMMAND % ( + command = (cfg['CFG_BIBENCODE_FFMPEG_EXTRACT_COMMAND'] % ( position, input_file, size, output_filename )).split() ## Start subprocess and poll the output until it finishes @@ -247,3 +235,4 @@ def extract_frames(input_file, output_file=None, size=None, positions=None, ## Everything should be fine if this position is reached message_fnc("Extraction of frames was successful") return 1 + diff --git a/invenio/modules/encoder/metadata.py b/invenio/modules/encoder/metadata.py index 89d42fcc8b..c4938fe898 100644 --- a/invenio/modules/encoder/metadata.py +++ b/invenio/modules/encoder/metadata.py @@ -23,24 +23,23 @@ __revision__ = "$Id$" -import subprocess import re -from six import iteritems +import subprocess +from invenio.base.globals import cfg +from invenio.legacy.bibsched.bibtask import write_message +from invenio.modules.encoder.utils import getval, mediainfo, probe +from invenio.utils.json import json, json_decode_file + from xml.dom import minidom -from invenio.utils.json import json, json_decode_file -from invenio.legacy.bibsched.bibtask import write_message -from invenio.modules.encoder.config import ( - CFG_BIBENCODE_FFMPEG_METADATA_ARGUMENT, - CFG_BIBENCODE_FFMPEG_METADATA_SET_COMMAND, - CFG_BIBENCODE_PBCORE_MAPPINGS - ) -from invenio.modules.encoder.utils import probe, getval, mediainfo, seconds_to_timecode +from six import iteritems + # Stores metadata for the process. Many different functions in BibEncode # need access to video metadata regularly. Because we dont pass objects arount # we need to call the functions of this submodule again and again. To not -# call ffprobe and mediainfo all the time, the metadata is stored in this cache. +# call ffprobe and mediainfo all the time, the metadata is stored in this +# cache. _FFPROBE_METADATA_CACHE = {} _MEDIAINFO_METADATA_CACHE = {} @@ -57,12 +56,15 @@ def write_metadata(input_file, output_file, metadata): ## build metadata arguments for ffmpeg for key, value in iteritems(metadata): if value is not None: - meta_args.append(CFG_BIBENCODE_FFMPEG_METADATA_ARGUMENT % (key, value)) + meta_args.append( + cfg['CFG_BIBENCODE_FFMPEG_METADATA_ARGUMENT'] % \ + (key, value)) else: write_message("metadata arg no dict") return 0 ## build the command - command = (CFG_BIBENCODE_FFMPEG_METADATA_SET_COMMAND % (input_file, output_file)).split() + command = (cfg['CFG_BIBENCODE_FFMPEG_METADATA_SET_COMMAND'] % \ + (input_file, output_file)).split() for meta_arg in meta_args: command.insert(-1, '-metadata') command.insert(-1, meta_arg) @@ -317,7 +319,7 @@ def _map_values(mapping, locals_u, meta_dict, probe_dict, stream_number=None): probe_dict = ffprobe_metadata(input_file) # parse the mappings - pbcore_mappings = json_decode_file(CFG_BIBENCODE_PBCORE_MAPPINGS) + pbcore_mappings = json_decode_file(cfg['CFG_BIBENCODE_PBCORE_MAPPINGS']) ## INSTANTIATION ## # According to the PBcore standard, this strict order MUST be followed @@ -371,4 +373,3 @@ def _map_values(mapping, locals_u, meta_dict, probe_dict, stream_number=None): joined = joined % {"xmlns" : ""} return joined - diff --git a/invenio/modules/encoder/profiles.py b/invenio/modules/encoder/profiles.py index ec8fe62339..e70317e29e 100644 --- a/invenio/modules/encoder/profiles.py +++ b/invenio/modules/encoder/profiles.py @@ -20,17 +20,14 @@ """ BibEncode profile submodule """ import os + import shutil -from invenio.utils.json import json_decode_file -from invenio.modules.encoder.config import ( - CFG_BIBENCODE_PROFILES_ENCODING, - CFG_BIBENCODE_PROFILES_EXTRACT, - CFG_BIBENCODE_PROFILES_ENCODING_LOCAL, - CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL - ) -from invenio.modules.encoder.utils import getval +from invenio.base.globals import cfg from invenio.ext.logging import register_exception +from invenio.modules.encoder.utils import getval +from invenio.utils.json import json_decode_file + #-------------------# # Encoding profiles # @@ -39,10 +36,10 @@ def get_encoding_profiles(): """ Returns a dictionary representation of the encoding profiles """ - if not os.path.exists(CFG_BIBENCODE_PROFILES_ENCODING_LOCAL): - shutil.copy(CFG_BIBENCODE_PROFILES_ENCODING, CFG_BIBENCODE_PROFILES_ENCODING_LOCAL) - default_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_ENCODING) - local_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_ENCODING_LOCAL) + if not os.path.exists(cfg['CFG_BIBENCODE_PROFILES_ENCODING_LOCAL']): + shutil.copy(cfg['CFG_BIBENCODE_PROFILES_ENCODING'], cfg['CFG_BIBENCODE_PROFILES_ENCODING_LOCAL']) + default_profiles = json_decode_file(cfg['CFG_BIBENCODE_PROFILES_ENCODING']) + local_profiles = json_decode_file(cfg['CFG_BIBENCODE_PROFILES_ENCODING_LOCAL']) default_profiles.update(local_profiles) return default_profiles @@ -84,10 +81,10 @@ def san_bitrate(bitrate): def get_extract_profiles(): """ Returns a dictionary representation of the frame extraction profiles """ - if not os.path.exists(CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL): - shutil.copy(CFG_BIBENCODE_PROFILES_EXTRACT, CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL) - default_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_EXTRACT) - local_profiles = json_decode_file(CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL) + if not os.path.exists(cfg['CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL']): + shutil.copy(cfg['CFG_BIBENCODE_PROFILES_EXTRACT'], cfg['CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL']) + default_profiles = json_decode_file(cfg['CFG_BIBENCODE_PROFILES_EXTRACT']) + local_profiles = json_decode_file(cfg['CFG_BIBENCODE_PROFILES_EXTRACT_LOCAL']) default_profiles.update(local_profiles) return default_profiles @@ -95,4 +92,3 @@ def get_extract_profile(key): """ Returns a dictionary representation of an extrtaction profile by key """ return get_extract_profiles()[key] - diff --git a/invenio/modules/encoder/websubmit.py b/invenio/modules/encoder/websubmit.py index 4d5a4e2c53..f9724ff80a 100644 --- a/invenio/modules/encoder/websubmit.py +++ b/invenio/modules/encoder/websubmit.py @@ -31,14 +31,13 @@ js/swfobject.js """ + import os + import pkg_resources from invenio.base.globals import cfg -from invenio.modules.encoder.config import ( - CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR, - CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_FNAME - ) + def gcd(a,b): """ the euclidean algorithm """ @@ -103,7 +102,7 @@ def websubmit_singlepage(curdir, doctype, uid, access, session_id): 'indir': indir, 'doctype': doctype, 'access': access, - 'key': CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR, + 'key': cfg['CFG_BIBENCODE_WEBSUBMIT_ASPECT_SAMPLE_DIR'], 'uid': uid, 'session_id': session_id, 'resume': resume, diff --git a/invenio/modules/oauthclient/upgrades/oauthclient_2014_10_21_encrypted_token_column.py b/invenio/modules/oauthclient/upgrades/oauthclient_2014_10_21_encrypted_token_column.py index 7d02541d5a..bdff3d7ab0 100644 --- a/invenio/modules/oauthclient/upgrades/oauthclient_2014_10_21_encrypted_token_column.py +++ b/invenio/modules/oauthclient/upgrades/oauthclient_2014_10_21_encrypted_token_column.py @@ -30,10 +30,10 @@ def info(): def do_upgrade(): """Implement your upgrades here.""" - from invenio.base.globals import cfg + from invenio.config import SECRET_KEY from sqlalchemy_utils.types.encrypted import AesEngine engine = AesEngine() - engine._update_key(cfg['SECRET_KEY']) + engine._update_key(SECRET_KEY) for row in run_sql( "SELECT id_remote_account, token_type, access_token " "FROM remoteTOKEN"): diff --git a/invenio/modules/upgrader/upgrades/invenio_2013_03_20_idxINDEX_synonym_kb.py b/invenio/modules/upgrader/upgrades/invenio_2013_03_20_idxINDEX_synonym_kb.py index 665c413446..ae7c0bb676 100644 --- a/invenio/modules/upgrader/upgrades/invenio_2013_03_20_idxINDEX_synonym_kb.py +++ b/invenio/modules/upgrader/upgrades/invenio_2013_03_20_idxINDEX_synonym_kb.py @@ -37,10 +37,10 @@ def do_upgrade(): #second step: fill tables run_sql("UPDATE idxINDEX SET synonym_kbrs='INDEX-SYNONYM-TITLE,exact' WHERE name IN ('global','title')") #third step: check invenio.conf - from invenio.base.globals import cfg - if cfg['CFG_BIBINDEX_SYNONYM_KBRS']: - for index in cfg['CFG_BIBINDEX_SYNONYM_KBRS']: - synonym = ",".join(cfg['CFG_BIBINDEX_SYNONYM_KBRS'][index]) + from invenio.config import CFG_BIBINDEX_SYNONYM_KBRS + if CFG_BIBINDEX_SYNONYM_KBRS: + for index in CFG_BIBINDEX_SYNONYM_KBRS: + synonym = ",".join(CFG_BIBINDEX_SYNONYM_KBRS[index]) query = "UPDATE idxINDEX SET synonym_kbrs='%s' WHERE name=%s" % (synonym, index) run_sql(query) diff --git a/invenio/modules/upgrader/upgrades/invenio_2013_03_21_idxINDEX_stopwords.py b/invenio/modules/upgrader/upgrades/invenio_2013_03_21_idxINDEX_stopwords.py index 57a8243744..c6b627231f 100644 --- a/invenio/modules/upgrader/upgrades/invenio_2013_03_21_idxINDEX_stopwords.py +++ b/invenio/modules/upgrader/upgrades/invenio_2013_03_21_idxINDEX_stopwords.py @@ -29,16 +29,16 @@ def info(): def do_upgrade(): - # first step: change tables + #first step: change tables stmt = run_sql('SHOW CREATE TABLE idxINDEX')[0][1] if '`remove_stopwords` varchar' not in stmt: run_sql("ALTER TABLE idxINDEX ADD COLUMN remove_stopwords varchar(255) NOT NULL default '' AFTER synonym_kbrs") - # second step: fill tables + #second step: fill tables run_sql("UPDATE idxINDEX SET remove_stopwords='No'") - # third step: load from invenio.cfg if necessary - from invenio.base.globals import cfg - if cfg['CFG_BIBINDEX_REMOVE_STOPWORDS']: - if cfg['CFG_BIBINDEX_REMOVE_STOPWORDS'] == 1: + #third step: load from invenio.cfg if necessary + from invenio.config import CFG_BIBINDEX_REMOVE_STOPWORDS + if CFG_BIBINDEX_REMOVE_STOPWORDS: + if CFG_BIBINDEX_REMOVE_STOPWORDS == 1: run_sql("UPDATE idxINDEX SET remove_stopwords='Yes'") diff --git a/invenio/modules/upgrader/upgrades/invenio_2013_03_25_idxINDEX_html_markup.py b/invenio/modules/upgrader/upgrades/invenio_2013_03_25_idxINDEX_html_markup.py index 00e3767014..e1c961781c 100644 --- a/invenio/modules/upgrader/upgrades/invenio_2013_03_25_idxINDEX_html_markup.py +++ b/invenio/modules/upgrader/upgrades/invenio_2013_03_25_idxINDEX_html_markup.py @@ -29,23 +29,23 @@ def info(): def do_upgrade(): - # first step: change tables + #first step: change tables stmt = run_sql('SHOW CREATE TABLE idxINDEX')[0][1] if '`remove_html_markup` varchar(10)' not in stmt: run_sql("ALTER TABLE idxINDEX ADD COLUMN remove_html_markup varchar(10) NOT NULL default '' AFTER remove_stopwords") if '`remove_latex_markup` varchar(10)' not in stmt: run_sql("ALTER TABLE idxINDEX ADD COLUMN remove_latex_markup varchar(10) NOT NULL default '' AFTER remove_html_markup") - # second step: fill tables + #second step: fill tables run_sql("UPDATE idxINDEX SET remove_html_markup='No'") run_sql("UPDATE idxINDEX SET remove_latex_markup='No'") - # third step: check invenio.conf and update db if necessary + #third step: check invenio.conf and update db if necessary try: - from invenio.base.globals import cfg - if cfg['CFG_BIBINDEX_REMOVE_HTML_MARKUP']: - if cfg['CFG_BIBINDEX_REMOVE_HTML_MARKUP'] == 1: + from invenio.config import CFG_BIBINDEX_REMOVE_HTML_MARKUP, CFG_BIBINDEX_REMOVE_LATEX_MARKUP + if CFG_BIBINDEX_REMOVE_HTML_MARKUP: + if CFG_BIBINDEX_REMOVE_HTML_MARKUP == 1: run_sql("UPDATE idxINDEX SET remove_html_markup='Yes'") - if cfg['CFG_BIBINDEX_REMOVE_LATEX_MARKUP']: - if cfg['CFG_BIBINDEX_REMOVE_LATEX_MARKUP'] == 1: + if CFG_BIBINDEX_REMOVE_LATEX_MARKUP: + if CFG_BIBINDEX_REMOVE_LATEX_MARKUP == 1: run_sql("UPDATE idxINDEX SET remove_latex_markup='Yes'") except: pass diff --git a/invenio/modules/upgrader/upgrades/invenio_2013_03_28_bibindex_bibrank_type_index.py b/invenio/modules/upgrader/upgrades/invenio_2013_03_28_bibindex_bibrank_type_index.py index eddbe18649..5dddea0b3f 100644 --- a/invenio/modules/upgrader/upgrades/invenio_2013_03_28_bibindex_bibrank_type_index.py +++ b/invenio/modules/upgrader/upgrades/invenio_2013_03_28_bibindex_bibrank_type_index.py @@ -17,8 +17,8 @@ # along with Invenio; if not, write to the Free Software Foundation, Inc., # 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. -from invenio.base.globals import cfg from invenio.legacy.dbquery import run_sql +from invenio.config import CFG_DATABASE_NAME depends_on = ['invenio_release_1_1_0'] @@ -37,12 +37,11 @@ def do_upgrade(): def estimate(): - """Estimate running time of upgrade in seconds (optional).""" - count_rows = run_sql( - "SELECT SUM(TABLE_ROWS) FROM INFORMATION_SCHEMA.TABLES " - "WHERE TABLE_SCHEMA = '%s' " - "AND (TABLE_NAME like 'idx%%R' or TABLE_NAME like 'rnk%%R')" - % (cfg['CFG_DATABASE_NAME'],))[0][0] + """ Estimate running time of upgrade in seconds (optional). """ + count_rows = run_sql("SELECT SUM(TABLE_ROWS) FROM INFORMATION_SCHEMA.TABLES " + "WHERE TABLE_SCHEMA = '%s' " + "AND (TABLE_NAME like 'idx%%R' or TABLE_NAME like 'rnk%%R')" + % (CFG_DATABASE_NAME,))[0][0] return count_rows / 1000 diff --git a/invenio/modules/upgrader/upgrades/invenio_2013_03_29_idxINDEX_stopwords_update.py b/invenio/modules/upgrader/upgrades/invenio_2013_03_29_idxINDEX_stopwords_update.py index 437f306755..22dfb21372 100644 --- a/invenio/modules/upgrader/upgrades/invenio_2013_03_29_idxINDEX_stopwords_update.py +++ b/invenio/modules/upgrader/upgrades/invenio_2013_03_29_idxINDEX_stopwords_update.py @@ -29,11 +29,11 @@ def info(): def do_upgrade(): - # different stopwords file for every index: - # need to update default stopwords path for every index - from invenio.base.globals import cfg - if cfg['CFG_BIBINDEX_REMOVE_STOPWORDS']: - if cfg['CFG_BIBINDEX_REMOVE_STOPWORDS'] == 1: + #different stopwords file for every index: + #need to update default stopwords path for every index + from invenio.config import CFG_BIBINDEX_REMOVE_STOPWORDS + if CFG_BIBINDEX_REMOVE_STOPWORDS: + if CFG_BIBINDEX_REMOVE_STOPWORDS == 1: run_sql("UPDATE idxINDEX SET remove_stopwords='stopwords.kb'") diff --git a/invenio/utils/plotextractor/cli.py b/invenio/utils/plotextractor/cli.py index 75e57e31ae..6b57a67fa1 100644 --- a/invenio/utils/plotextractor/cli.py +++ b/invenio/utils/plotextractor/cli.py @@ -33,9 +33,13 @@ import sys import time + from tempfile import mkstemp -from invenio.base.globals import cfg +from invenio.config import CFG_PLOTEXTRACTOR_CONTEXT_EXTRACT_LIMIT, \ + CFG_PLOTEXTRACTOR_CONTEXT_SENTENCE_LIMIT, \ + CFG_PLOTEXTRACTOR_CONTEXT_WORD_LIMIT, CFG_PLOTEXTRACTOR_DISALLOWED_TEX, \ + CFG_SITE_URL, CFG_TMPSHAREDDIR from invenio.legacy.bibsched.bibtask import task_low_level_submission from invenio.utils.shell import Timeout, run_process_with_timeout, \ run_shell_command @@ -115,7 +119,7 @@ def main(): with_docformat = None arXiv = None clean = False - refno_url = cfg['CFG_SITE_URL'] + refno_url = CFG_SITE_URL skip_refno = False upload_mode = 'append' @@ -171,14 +175,14 @@ def main(): sys.exit() if sdir is None: - sdir = cfg['CFG_TMPSHAREDDIR'] + sdir = CFG_TMPSHAREDDIR elif not os.path.isdir(sdir): try: os.makedirs(sdir) except Exception: write_message('Error: We can\'t use this sdir. using ' + 'CFG_TMPSHAREDDIR') - sdir = cfg['CFG_TMPSHAREDDIR'] + sdir = CFG_TMPSHAREDDIR if skip_refno: refno_url = "" @@ -230,7 +234,7 @@ def main(): upload_to_site(squash_path, yes_i_know, upload_mode) -def process_single(tarball, sdir=None, xtract_text=False, +def process_single(tarball, sdir=CFG_TMPSHAREDDIR, xtract_text=False, upload_plots=False, force=False, squash="", yes_i_know=False, refno_url="", clean=False, recid=None, upload_mode='append', @@ -260,7 +264,6 @@ def process_single(tarball, sdir=None, xtract_text=False, (when C{upload_plots} is set to True. :return: marc_name(string): path to generated marcxml file """ - sdir = sdir or cfg['CFG_TMPSHAREDDIR'] sub_dir, refno = get_defaults(tarball, sdir, refno_url, recid) if not squash: marc_name = os.path.join(sub_dir, '%s.xml' % (refno,)) @@ -358,7 +361,7 @@ def get_defaults(tarball, sdir, refno_url, recid=None): """ if not sdir or recid: # Missing sdir: using default directory: CFG_TMPDIR - sdir = cfg['CFG_TMPSHAREDDIR'] + sdir = CFG_TMPSHAREDDIR else: sdir = os.path.split(tarball)[0] @@ -489,10 +492,10 @@ def get_context(lines, backwards=False): # 2. If not, see if this is a TeX tag and see if its 'illegal' # 3. Otherwise, add word to context for word in word_list: - if len(context) >= cfg['CFG_PLOTEXTRACTOR_CONTEXT_WORD_LIMIT']: + if len(context) >= CFG_PLOTEXTRACTOR_CONTEXT_WORD_LIMIT: break match = tex_tag.match(word) - if match and match.group(1) in cfg['CFG_PLOTEXTRACTOR_DISALLOWED_TEX']: + if match and match.group(1) in CFG_PLOTEXTRACTOR_DISALLOWED_TEX: # TeX Construct matched, return if backwards: # When reversed we need to go back and @@ -513,9 +516,9 @@ def get_context(lines, backwards=False): if backwards: sentence_list.reverse() - if len(sentence_list) > cfg['CFG_PLOTEXTRACTOR_CONTEXT_SENTENCE_LIMIT']: + if len(sentence_list) > CFG_PLOTEXTRACTOR_CONTEXT_SENTENCE_LIMIT: return " ".join( - sentence_list[:cfg['CFG_PLOTEXTRACTOR_CONTEXT_SENTENCE_LIMIT']]) + sentence_list[:CFG_PLOTEXTRACTOR_CONTEXT_SENTENCE_LIMIT]) else: return " ".join(sentence_list) @@ -554,7 +557,7 @@ def extract_context(tex_file, extracted_image_data): lines)] for startindex, endindex in indicies: # Retrive all lines before label until beginning of file - i = startindex - cfg['CFG_PLOTEXTRACTOR_CONTEXT_EXTRACT_LIMIT'] + i = startindex - CFG_PLOTEXTRACTOR_CONTEXT_EXTRACT_LIMIT if i < 0: text_before = lines[:startindex] else: @@ -562,7 +565,7 @@ def extract_context(tex_file, extracted_image_data): context_before = get_context(text_before, backwards=True) # Retrive all lines from label until end of file and get context - i = endindex + cfg['CFG_PLOTEXTRACTOR_CONTEXT_EXTRACT_LIMIT'] + i = endindex + CFG_PLOTEXTRACTOR_CONTEXT_EXTRACT_LIMIT text_after = lines[endindex:i] context_after = get_context(text_after) context_list.append( diff --git a/invenio/utils/plotextractor/getter.py b/invenio/utils/plotextractor/getter.py index 8f9559c261..7681f6c00e 100644 --- a/invenio/utils/plotextractor/getter.py +++ b/invenio/utils/plotextractor/getter.py @@ -18,7 +18,11 @@ # 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. import time, os, sys, re -from invenio.base.globals import cfg +from invenio.config import CFG_TMPDIR, \ + CFG_PLOTEXTRACTOR_SOURCE_BASE_URL, \ + CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER, \ + CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER, \ + CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT from .config import CFG_PLOTEXTRACTOR_DESY_BASE, \ CFG_PLOTEXTRACTOR_DESY_PIECE from invenio.legacy.search_engine import get_record @@ -312,7 +316,7 @@ def tarballs_by_arXiv_id(arXiv_ids, sdir): tarball, dummy_pdf = harvest_single(arXiv_id, sdir, ("tarball",)) if tarball != None: tarballs.append(tarball) - time.sleep(cfg['CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT']) + time.sleep(CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT) return tarballs @@ -351,7 +355,7 @@ def parse_and_download(infile, sdir): write_message(sys.exc_info()[0]) filename = os.path.join(tardir, filename) tarfiles.append(filename) - time.sleep(cfg['CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT']) # be nice! + time.sleep(CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT) # be nice! elif line.startswith('arXiv'): tarfiles.extend(tarballs_by_arXiv_id([line.strip()], sdir)) @@ -370,18 +374,18 @@ def harvest_single(single, to_dir, selection=("tarball", "pdf")): if not found """ - if single.find('arXiv') > -1 and 'arxiv.org' in cfg['CFG_PLOTEXTRACTOR_SOURCE_BASE_URL'].lower(): + if single.find('arXiv') > -1 and 'arxiv.org' in CFG_PLOTEXTRACTOR_SOURCE_BASE_URL.lower(): id_str = re.findall('[a-zA-Z\\-]+/\\d+|\\d+\\.\\d+', single)[0] idno = id_str.split('/') if len(idno) > 0: idno = idno[-1] yymm = int(idno[:4]) yymm_dir = make_useful_directories(yymm, to_dir) - url_for_file = cfg['CFG_PLOTEXTRACTOR_SOURCE_BASE_URL'] + \ - cfg['CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER'] + \ + url_for_file = CFG_PLOTEXTRACTOR_SOURCE_BASE_URL + \ + CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER + \ id_str - url_for_pdf = cfg['CFG_PLOTEXTRACTOR_SOURCE_BASE_URL'] + \ - cfg['CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER'] + \ + url_for_pdf = CFG_PLOTEXTRACTOR_SOURCE_BASE_URL + \ + CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER + \ id_str + '.pdf' # adds '.pdf' to avoid arXiv internal redirect from arXivID to arXivID.pdf individual_file = 'arXiv:' + id_str.replace('/', '_') individual_dir = make_single_directory(yymm_dir, individual_file) @@ -409,14 +413,14 @@ def harvest_single(single, to_dir, selection=("tarball", "pdf")): return (tarball, pdf) - elif single.find('arXiv') > -1 and cfg['CFG_PLOTEXTRACTOR_SOURCE_BASE_URL'] != '': + elif single.find('arXiv') > -1 and CFG_PLOTEXTRACTOR_SOURCE_BASE_URL != '': # hmm... is it a filesystem? - if cfg['CFG_PLOTEXTRACTOR_SOURCE_BASE_URL'].startswith('/'): - if not os.path.exists(cfg['CFG_PLOTEXTRACTOR_SOURCE_BASE_URL']): + if CFG_PLOTEXTRACTOR_SOURCE_BASE_URL.startswith('/'): + if not os.path.exists(CFG_PLOTEXTRACTOR_SOURCE_BASE_URL): write_message('PROBLEM WITH CFG_PLOTEXTRACTOR_SOURCE_BASE_URL: we cannot ' + \ 'find this folder!') return (None, None) - for root, files, dummy in os.walk(cfg['CFG_PLOTEXTRACTOR_SOURCE_BASE_URL']): + for root, files, dummy in os.walk(CFG_PLOTEXTRACTOR_SOURCE_BASE_URL): for file_name in files: id_no = single.replace('arXiv', '') if file_name.find(id_no) > -1 or\ @@ -430,8 +434,8 @@ def harvest_single(single, to_dir, selection=("tarball", "pdf")): return (None, None) # okay... is it... a website? - elif cfg['CFG_PLOTEXTRACTOR_SOURCE_BASE_URL'].startswith('http') and "tarball" in selection: - url_for_file = cfg['CFG_PLOTEXTRACTOR_SOURCE_BASE_URL'] + single + elif CFG_PLOTEXTRACTOR_SOURCE_BASE_URL.startswith('http') and "tarball" in selection: + url_for_file = CFG_PLOTEXTRACTOR_SOURCE_BASE_URL + single individual_file = os.path.join(to_dir, single) abs_path = os.path.join(to_dir, individual_file) try: @@ -499,7 +503,7 @@ def src_pdf_from_marc(marc_file): DESY_match = 'DESY-\\d{2,4}-\\d{3}' pdf_loc = None - to_dir = os.path.join(cfg['CFG_TMPDIR'], 'plotdata') + to_dir = os.path.join(CFG_TMPDIR, 'plotdata') possible_match = re.search(arXiv_match, marc_text) if possible_match != None: @@ -534,7 +538,7 @@ def harvest_from_file(filename, to_dir): write_message('error on ' + arXiv_name + '. continuing.') continue harvest_single(arXiv_name, to_dir) - time.sleep(cfg['CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT']) + time.sleep(CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT) except IOError: write_message('Something is wrong with the file!') @@ -577,20 +581,20 @@ def old_URL_harvest(from_date, to_date, to_dir, area): arXiv_id = area[AREA_STRING_INDEX] + next_to_harvest individual_dir = make_single_directory(sub_dir, arXiv_id) - full_url = cfg['CFG_PLOTEXTRACTOR_SOURCE_BASE_URL'] + cfg['CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER'] + \ + full_url = CFG_PLOTEXTRACTOR_SOURCE_BASE_URL + CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER + \ area[URL] + next_to_harvest abs_path = os.path.join(individual_dir, area[AREA_STRING_INDEX] + next_to_harvest) if not download_url(url=full_url, content_type='tar', download_to_file=abs_path): break - full_pdf_url = cfg['CFG_PLOTEXTRACTOR_SOURCE_BASE_URL'] + cfg['CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER'] + \ + full_pdf_url = CFG_PLOTEXTRACTOR_SOURCE_BASE_URL + CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER + \ area[URL] + next_to_harvest abs_path = os.path.join(individual_dir, area[AREA_STRING_INDEX] + next_to_harvest + PDF_EXTENSION) download_url(url=full_pdf_url, content_type='pdf', download_to_file=abs_path) - time.sleep(cfg['CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT']) + time.sleep(CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT) if yearmonthindex % 100 == 12: # we reached the end of the year! yearmonthindex = yearmonthindex + FIX_FOR_YEAR_END @@ -634,7 +638,7 @@ def new_URL_harvest(from_date, from_index, to_dir): arXiv_id = ARXIV_HEADER + next_to_harvest individual_dir = make_single_directory(sub_dir, arXiv_id) - full_url = cfg['CFG_PLOTEXTRACTOR_SOURCE_BASE_URL'] + cfg['CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER'] + \ + full_url = CFG_PLOTEXTRACTOR_SOURCE_BASE_URL + CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER + \ next_to_harvest abs_path = os.path.join(individual_dir, ARXIV_HEADER + next_to_harvest) if not download_url(url=full_url, @@ -642,13 +646,13 @@ def new_URL_harvest(from_date, from_index, to_dir): download_to_file=abs_path): break - full_pdf_url = cfg['CFG_PLOTEXTRACTOR_SOURCE_BASE_URL'] + cfg['CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER'] + \ + full_pdf_url = CFG_PLOTEXTRACTOR_SOURCE_BASE_URL + CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER + \ next_to_harvest abs_path = os.path.join(individual_dir, ARXIV_HEADER + next_to_harvest + PDF_EXTENSION) download_url(url=full_pdf_url, content_type='pdf', download_to_file=abs_path) - time.sleep(cfg['CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT']) # be nice to remote server + time.sleep(CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT) # be nice to remote server if yearmonthindex % 100 == 12: # we reached the end of the year!