From 407e6f7c4e64190e470e156d0eb20f2bbb20e2c8 Mon Sep 17 00:00:00 2001 From: Jiri Kuncar Date: Wed, 25 Feb 2015 15:12:45 +0100 Subject: [PATCH] collections: initial release * NOTE adds new calculated field '_collections' to records from which the 'collection' index is created. (closes #2638) * NOTE collection reclist is not populated anymore. Use collection phrase index using query matcher based on record data, hence no second order operator will work in collection query definition. Signed-off-by: Jiri Kuncar --- invenio/base/scripts/demosite.py | 19 +- invenio/base/templates/footer_base.html | 2 +- invenio/base/templates/header_base.html | 2 +- invenio/ext/legacy/__init__.py | 16 +- invenio/ext/sqlalchemy/utils.py | 104 ++ invenio/ext/template/context_processor.py | 4 +- invenio/legacy/bibcirculation/webinterface.py | 2 +- invenio/legacy/bibdocfile/cli.py | 5 +- invenio/legacy/bibdocfile/webinterface.py | 6 +- invenio/legacy/bibedit/utils.py | 2 +- invenio/legacy/bibexport/sitemap.py | 29 +- invenio/legacy/bibindex/engine.py | 7 +- invenio/legacy/bibindex/engine_utils.py | 3 + invenio/legacy/bibknowledge/adminlib.py | 2 +- invenio/legacy/search_engine/__init__.py | 14 +- invenio/legacy/webalert/alert_engine.py | 2 +- invenio/legacy/webcomment/webinterface.py | 2 +- invenio/legacy/weblinkback/webinterface.py | 2 +- invenio/legacy/websearch/scripts/webcoll.py | 60 - invenio/legacy/websearch/webcoll.py | 1223 ----------------- invenio/legacy/websearch/webinterface.py | 85 +- invenio/legacy/webstat/engine.py | 2 +- invenio/legacy/webstat/templates.py | 2 +- invenio/legacy/webstyle/templates.py | 4 +- invenio/legacy/websubmit/webinterface.py | 2 +- invenio/modules/baskets/models.py | 2 +- invenio/modules/classifier/models.py | 2 +- invenio/modules/collections/__init__.py | 0 invenio/modules/collections/cache.py | 235 ++++ invenio/modules/collections/decorators.py | 57 + .../admin_forms.py => collections/forms.py} | 8 +- invenio/modules/collections/models.py | 750 ++++++++++ .../modules/collections/recordext/__init__.py | 0 .../recordext/fields/collections.cfg | 24 + .../recordext/functions/__init__.py | 0 .../functions/get_record_collections.py | 62 + .../modules/collections/searchext/__init__.py | 0 .../collections/searchext/units/__init__.py | 0 .../searchext/units/collection.py | 0 invenio/modules/collections/views/__init__.py | 25 + .../{search => collections}/views/admin.py | 100 +- .../modules/collections/views/collections.py | 105 ++ invenio/modules/comments/api.py | 2 +- invenio/modules/communities/models.py | 6 +- invenio/modules/communities/tasks.py | 2 +- .../communities/testsuite/test_communities.py | 29 +- invenio/modules/formatter/__init__.py | 6 +- .../formatter/templates/format/records/xr.tpl | 2 +- invenio/modules/knowledge/admin.py | 2 +- invenio/modules/knowledge/api.py | 2 +- invenio/modules/knowledge/forms.py | 2 +- invenio/modules/knowledge/models.py | 2 +- invenio/modules/ranker/models.py | 2 +- invenio/modules/records/access.py | 4 +- invenio/modules/records/models.py | 4 +- invenio/modules/records/views.py | 2 +- invenio/modules/search/admin.py | 2 +- invenio/modules/search/cache.py | 220 +-- invenio/modules/search/engine.py | 2 +- invenio/modules/search/facet_builders.py | 24 +- invenio/modules/search/fixtures.py | 6 +- invenio/modules/search/forms.py | 2 +- invenio/modules/search/models.py | 821 +---------- invenio/modules/search/registry.py | 2 +- .../templates/search/form/controls_base.html | 2 +- .../search/testsuite/test_search_engine.py | 4 +- .../modules/search/testsuite/test_views.py | 4 +- invenio/modules/search/utils.py | 69 +- invenio/modules/search/views/__init__.py | 8 +- invenio/modules/search/views/search.py | 158 +-- invenio/modules/sorter/models.py | 2 +- invenio/modules/tags/views.py | 2 +- .../testsuite/test_textminer_documents.py | 2 +- invenio/testsuite/test_ext_template.py | 8 +- setup.py | 1 - 75 files changed, 1632 insertions(+), 2745 deletions(-) delete mode 100644 invenio/legacy/websearch/scripts/webcoll.py delete mode 100644 invenio/legacy/websearch/webcoll.py create mode 100644 invenio/modules/collections/__init__.py create mode 100644 invenio/modules/collections/cache.py create mode 100644 invenio/modules/collections/decorators.py rename invenio/modules/{search/admin_forms.py => collections/forms.py} (90%) create mode 100644 invenio/modules/collections/models.py create mode 100644 invenio/modules/collections/recordext/__init__.py create mode 100644 invenio/modules/collections/recordext/fields/collections.cfg create mode 100644 invenio/modules/collections/recordext/functions/__init__.py create mode 100644 invenio/modules/collections/recordext/functions/get_record_collections.py create mode 100644 invenio/modules/collections/searchext/__init__.py create mode 100644 invenio/modules/collections/searchext/units/__init__.py rename invenio/modules/{search => collections}/searchext/units/collection.py (100%) create mode 100644 invenio/modules/collections/views/__init__.py rename invenio/modules/{search => collections}/views/admin.py (76%) create mode 100644 invenio/modules/collections/views/collections.py diff --git a/invenio/base/scripts/demosite.py b/invenio/base/scripts/demosite.py index 514f096cdf..47b14a4496 100644 --- a/invenio/base/scripts/demosite.py +++ b/invenio/base/scripts/demosite.py @@ -31,6 +31,8 @@ import pkg_resources import sys +from itertools import count + from invenio.ext.script import Manager manager = Manager(usage=__doc__) @@ -102,23 +104,22 @@ def populate(packages=[], default_data=True, files=None, print("ERROR: failed execution of", cmd) sys.exit(1) + i = count(1).next for cmd in ["bin/bibdocfile --textify --with-ocr --recid 97", "bin/bibdocfile --textify --all", "bin/bibindex -u admin", - "bin/bibindex %d" % (job_id + 1,), + "bin/bibindex %d" % (job_id + i(),), "bin/bibindex -u admin -w global", - "bin/bibindex %d" % (job_id + 2,), + "bin/bibindex %d" % (job_id + i(),), "bin/bibreformat -u admin -o HB", - "bin/bibreformat %d" % (job_id + 3,), - "bin/webcoll -u admin", - "bin/webcoll %d" % (job_id + 4,), + "bin/bibreformat %d" % (job_id + i(),), "bin/bibrank -u admin", - "bin/bibrank %d" % (job_id + 5,), + "bin/bibrank %d" % (job_id + i(),), "bin/bibsort -u admin -R", - "bin/bibsort %d" % (job_id + 6,), + "bin/bibsort %d" % (job_id + i(),), "bin/oairepositoryupdater -u admin", - "bin/oairepositoryupdater %d" % (job_id + 7,), - "bin/bibupload %d" % (job_id + 8,)]: + "bin/oairepositoryupdater %d" % (job_id + i(),), + "bin/bibupload %d" % (job_id + i(),)]: cmd = os.path.join(CFG_PREFIX, cmd) if os.system(cmd): print("ERROR: failed execution of", cmd) diff --git a/invenio/base/templates/footer_base.html b/invenio/base/templates/footer_base.html index 2c31f08a0c..9359c00893 100644 --- a/invenio/base/templates/footer_base.html +++ b/invenio/base/templates/footer_base.html @@ -31,7 +31,7 @@ diff --git a/invenio/ext/legacy/__init__.py b/invenio/ext/legacy/__init__.py index 56604199b5..ca05dceeed 100644 --- a/invenio/ext/legacy/__init__.py +++ b/invenio/ext/legacy/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # This file is part of Invenio. -# Copyright (C) 2011, 2012, 2013, 2014 CERN. +# Copyright (C) 2011, 2012, 2013, 2014, 2015 CERN. # # Invenio is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as @@ -24,11 +24,11 @@ import sys # Import the remote debugger as a first thing, if allowed -#FIXME enable remote_debugger when invenio.config is ready -#try: -# from invenio.utils import remote_debugger -#except: -# remote_debugger = None +# FIXME enable remote_debugger when invenio.config is ready +# try: +# from invenio.utils import remote_debugger +# except: +# remote_debugger = None from werkzeug.exceptions import HTTPException from werkzeug.wrappers import BaseResponse @@ -57,10 +57,8 @@ def cli_cmd_reset(sender, yes_i_know=False, drop=True, **kwargs): # cli_cmd_reset_fieldnames(conf) for cmd in ["%s/bin/webaccessadmin -u admin -c -a -D" % CFG_PREFIX, - "%s/bin/webcoll -u admin" % CFG_PREFIX, - "%s/bin/webcoll 1" % CFG_PREFIX, "%s/bin/bibsort -u admin --load-config" % CFG_PREFIX, - "%s/bin/bibsort 2" % CFG_PREFIX, ]: + "%s/bin/bibsort 1" % CFG_PREFIX, ]: if os.system(cmd): print("ERROR: failed execution of", cmd) sys.exit(1) diff --git a/invenio/ext/sqlalchemy/utils.py b/invenio/ext/sqlalchemy/utils.py index 12cfdefbdd..29aab23275 100644 --- a/invenio/ext/sqlalchemy/utils.py +++ b/invenio/ext/sqlalchemy/utils.py @@ -41,6 +41,11 @@ def save(self): from sqlalchemy.exc import OperationalError from sqlalchemy.ext.declarative import declared_attr from sqlalchemy.orm import class_mapper, properties +from sqlalchemy.orm.collections import ( + InstrumentedList, + attribute_mapped_collection, + collection, +) first_cap_re = re.compile('(.)([A-Z][a-z]+)') all_cap_re = re.compile('([a-z0-9])([A-Z])') @@ -258,3 +263,102 @@ def test_sqla_utf8_chain(): table.drop(bind=db.engine) print(" [OK]") + + +class IntbitsetPickle(object): + + """Pickle implementation for intbitset.""" + + def dumps(self, obj, protocol=None): + """Dump intbitset to byte stream.""" + if obj is not None: + return obj.fastdump() + return intbitset([]).fastdump() + + def loads(self, obj): + """Load byte stream to intbitset.""" + try: + return intbitset(obj) + except Exception: + return intbitset() + + +def IntbitsetCmp(x, y): + """Compare two intbitsets.""" + if x is None or y is None: + return False + else: + return x == y + + +class OrderedList(InstrumentedList): + + """Implemented ordered instrumented list.""" + + def append(self, item): + if self: + s = sorted(self, key=lambda obj: obj.score) + item.score = s[-1].score + 1 + else: + item.score = 1 + InstrumentedList.append(self, item) + + def set(self, item, index=0): + if self: + s = sorted(self, key=lambda obj: obj.score) + if index >= len(s): + item.score = s[-1].score + 1 + elif index < 0: + item.score = s[0].score + index = 0 + else: + item.score = s[index].score + 1 + + for i, it in enumerate(s[index:]): + it.score = item.score + i + 1 + # if s[i+1].score more then break + else: + item.score = index + InstrumentedList.append(self, item) + + def pop(self, item): + # FIXME + if self: + obj_list = sorted(self, key=lambda obj: obj.score) + for i, it in enumerate(obj_list): + if obj_list[i] == item: + return InstrumentedList.pop(self, i) + + +def attribute_multi_dict_collection(creator, key_attr, val_attr): + """Define new attribute based mapping.""" + class MultiMappedCollection(dict): + + def __init__(self, data=None): + self._data = data or {} + + @collection.appender + def _append(self, obj): + l = self._data.setdefault(key_attr(obj), []) + l.append(obj) + + def __setitem__(self, key, value): + self._append(creator(key, value)) + + def __getitem__(self, key): + return tuple(val_attr(obj) for obj in self._data[key]) + + @collection.remover + def _remove(self, obj): + self._data[key_attr(obj)].remove(obj) + + @collection.iterator + def _iterator(self): + for objs in self._data.itervalues(): + for obj in objs: + yield obj + + def __repr__(self): + return '%s(%r)' % (type(self).__name__, self._data) + + return MultiMappedCollection diff --git a/invenio/ext/template/context_processor.py b/invenio/ext/template/context_processor.py index f6411a6deb..74298dbadf 100644 --- a/invenio/ext/template/context_processor.py +++ b/invenio/ext/template/context_processor.py @@ -52,7 +52,7 @@ class template_args(object): def setup_app(app): - @template_args('search.index', app=app) + @template_args('collections.index', app=app) def foo(): return dict(foo='bar') @@ -61,7 +61,7 @@ def foo(): .. code-block:: python - from invenio.modules.search.views.search import index + from invenio.modules.collections.views.collections import index @template_args(index) def bar(): diff --git a/invenio/legacy/bibcirculation/webinterface.py b/invenio/legacy/bibcirculation/webinterface.py index c4cda83b93..cff0866087 100644 --- a/invenio/legacy/bibcirculation/webinterface.py +++ b/invenio/legacy/bibcirculation/webinterface.py @@ -71,7 +71,7 @@ CFG_BIBCIRCULATION_ACQ_STATUS_NEW, \ AMZ_ACQUISITION_IDENTIFIER_TAG -from invenio.modules.search.models import Collection +from invenio.modules.collections.models import Collection get_colID = lambda name: Collection.query.filter_by(name=name).value('id') diff --git a/invenio/legacy/bibdocfile/cli.py b/invenio/legacy/bibdocfile/cli.py index ba2c5dd283..8ee9ce4f58 100644 --- a/invenio/legacy/bibdocfile/cli.py +++ b/invenio/legacy/bibdocfile/cli.py @@ -433,10 +433,11 @@ def print_table(title, table): for row in table: print("\t".join(str(elem) for elem in row)) - for collection, reclist in run_sql("SELECT name, reclist FROM collection ORDER BY name"): + from invenio.modules.collections.cache import get_collection_reclist + for collection, in run_sql("SELECT name FROM collection ORDER BY name"): print("-" * 79) print("Statistic for: %s " % collection) - reclist = intbitset(reclist) + reclist = get_collection_reclist(collection) if reclist: sqlreclist = "(" + ','.join(str(elem) for elem in reclist) + ')' print_table("Formats", run_sql("SELECT COUNT(format) as c, format FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true GROUP BY format ORDER BY c DESC" % sqlreclist)) # kwalitee: disable=sql diff --git a/invenio/legacy/bibdocfile/webinterface.py b/invenio/legacy/bibdocfile/webinterface.py index 54bc6e76f9..68cc96f86a 100644 --- a/invenio/legacy/bibdocfile/webinterface.py +++ b/invenio/legacy/bibdocfile/webinterface.py @@ -52,14 +52,14 @@ from invenio.base.i18n import gettext_set_language from invenio.legacy.search_engine import \ guess_primary_collection_of_a_record, record_exists, \ - create_navtrail_links, check_user_can_view_record, \ - is_user_owner_of_record + create_navtrail_links, check_user_can_view_record +from invenio.modules.records.access import is_user_owner_of_record from invenio.legacy.bibdocfile.api import BibRecDocs, normalize_format, file_strip_ext, \ stream_restricted_icon, BibDoc, InvenioBibDocFileError, \ get_subformat_from_format from invenio.ext.logging import register_exception from invenio.legacy.websearch.adminlib import get_detailed_page_tabs, get_detailed_page_tabs_counts -from invenio.modules.search.models import Collection +from invenio.modules.collections.models import Collection import invenio.legacy.template bibdocfile_templates = invenio.legacy.template.load('bibdocfile') webstyle_templates = invenio.legacy.template.load('webstyle') diff --git a/invenio/legacy/bibedit/utils.py b/invenio/legacy/bibedit/utils.py index bc22bd3b55..20a4b495fc 100644 --- a/invenio/legacy/bibedit/utils.py +++ b/invenio/legacy/bibedit/utils.py @@ -87,7 +87,7 @@ from invenio.base.globals import cfg from invenio.legacy.bibcatalog.api import BIBCATALOG_SYSTEM -from invenio.modules.search.models import Collection +from invenio.modules.collections.models import Collection try: from cPickle import loads diff --git a/invenio/legacy/bibexport/sitemap.py b/invenio/legacy/bibexport/sitemap.py index 401682e442..4e5651f835 100644 --- a/invenio/legacy/bibexport/sitemap.py +++ b/invenio/legacy/bibexport/sitemap.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of Invenio. -# Copyright (C) 2008, 2010, 2011, 2014 CERN. +# Copyright (C) 2008, 2010, 2011, 2014, 2015 CERN. # # Invenio is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as @@ -39,7 +39,6 @@ from invenio.config import CFG_SITE_URL, CFG_WEBDIR, CFG_ETCDIR, \ CFG_SITE_RECORD, CFG_SITE_LANGS, CFG_TMPSHAREDDIR from intbitset import intbitset -from invenio.legacy.websearch.webcoll import Collection from invenio.legacy.bibsched.bibtask import write_message, task_update_progress, task_sleep_now_if_required from invenio.utils.text import encode_for_xml from invenio.utils.url import get_canonical_and_alternates_urls @@ -115,19 +114,19 @@ def get_collection_last_modification(collection): return max(minimum_timestamp, last_mod) output = [] - for coll_name in base_collections: - mother_collection = Collection(coll_name) - if not mother_collection.restricted_p(): - last_mod = get_collection_last_modification(mother_collection) - output.append((coll_name, last_mod)) - for descendant in mother_collection.get_descendants(type='r'): - if not descendant.restricted_p(): - last_mod = get_collection_last_modification(descendant) - output.append((descendant.name, last_mod)) - for descendant in mother_collection.get_descendants(type='v'): - if not descendant.restricted_p(): - last_mod = get_collection_last_modification(descendant) - output.append((descendant.name, last_mod)) + # for coll_name in base_collections: + # mother_collection = Collection(coll_name) + # if not mother_collection.restricted_p(): + # last_mod = get_collection_last_modification(mother_collection) + # output.append((coll_name, last_mod)) + # for descendant in mother_collection.get_descendants(type='r'): + # if not descendant.restricted_p(): + # last_mod = get_collection_last_modification(descendant) + # output.append((descendant.name, last_mod)) + # for descendant in mother_collection.get_descendants(type='v'): + # if not descendant.restricted_p(): + # last_mod = get_collection_last_modification(descendant) + # output.append((descendant.name, last_mod)) return output def filter_fulltexts(recids, fulltext_type=None): diff --git a/invenio/legacy/bibindex/engine.py b/invenio/legacy/bibindex/engine.py index 386744b395..2d00cacedc 100644 --- a/invenio/legacy/bibindex/engine.py +++ b/invenio/legacy/bibindex/engine.py @@ -1435,7 +1435,7 @@ def add_recID_range(self, recID1, recID2): wlist[recID]) marc, nonmarc = self.find_nonmarc_records(recID1, recID2) - if marc: + if marc and len(self.tags): collector = TermCollector(self.tokenizer, self.tokenizer_type, self.table_type, @@ -1443,14 +1443,15 @@ def add_recID_range(self, recID1, recID2): [recID1, recID2]) collector.set_special_tags(self.special_tags) wlist = collector.collect(marc, wlist) - if nonmarc: + if nonmarc or (not len(self.tags) and len(self.nonmarc_tags)): collector = NonmarcTermCollector(self.tokenizer, self.tokenizer_type, self.table_type, self.nonmarc_tags, [recID1, recID2]) collector.set_special_tags(self.special_tags) - wlist = collector.collect(nonmarc, wlist) + toindex = nonmarc if len(self.tags) else marc + wlist = collector.collect(toindex, wlist) # lookup index-time synonyms: synonym_kbrs = get_all_synonym_knowledge_bases() diff --git a/invenio/legacy/bibindex/engine_utils.py b/invenio/legacy/bibindex/engine_utils.py index 4c071ecda0..3ac8e3a166 100644 --- a/invenio/legacy/bibindex/engine_utils.py +++ b/invenio/legacy/bibindex/engine_utils.py @@ -35,6 +35,7 @@ CFG_BIBINDEX_CHARS_PUNCTUATION, \ CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS from invenio.legacy.bibindex.engine_config import CFG_BIBINDEX_COLUMN_VALUE_SEPARATOR +from invenio.utils.memoise import memoize latex_formula_re = re.compile(r'\$.*?\$|\\\[.*?\\\]') @@ -288,6 +289,7 @@ def get_index_name_from_index_id(index_id): return '' +@memoize def get_field_tags(field, tagtype="marc"): """Returns a list of tags for the field code 'field'. Works for both MARC and nonMARC tags. @@ -378,6 +380,7 @@ def get_nonmarc_tag_indexes(nonmarc_tag, virtual=True): return () +@memoize def get_index_tags(indexname, virtual=True, tagtype="marc"): """Returns the list of tags that are indexed inside INDEXNAME. Returns empty list in case there are no tags indexed in this index. diff --git a/invenio/legacy/bibknowledge/adminlib.py b/invenio/legacy/bibknowledge/adminlib.py index 8c49532f8b..b6473eb5af 100644 --- a/invenio/legacy/bibknowledge/adminlib.py +++ b/invenio/legacy/bibknowledge/adminlib.py @@ -107,7 +107,7 @@ def perform_request_knowledge_base_show(kb_id, ln=CFG_SITE_LANG, sortby="to", dyn_config = None collections = None if kb_type == 'd': - from invenio.modules.search.models import Collection + from invenio.modules.collections.models import Collection collections = [ c[0] for c in Collection.query.order_by('name').values('name') ] diff --git a/invenio/legacy/search_engine/__init__.py b/invenio/legacy/search_engine/__init__.py index e0235bd42b..5d7a3f8e7b 100644 --- a/invenio/legacy/search_engine/__init__.py +++ b/invenio/legacy/search_engine/__init__.py @@ -161,17 +161,17 @@ "rt_portalbox" : "Prt", "search_services": "SER"}; -from invenio.modules.search.cache import collection_reclist_cache -from invenio.modules.search.cache import collection_restricted_p -from invenio.modules.search.cache import restricted_collection_cache +from invenio.modules.collections.cache import collection_reclist_cache +from invenio.modules.collections.cache import collection_restricted_p +from invenio.modules.collections.cache import restricted_collection_cache from invenio.modules.search.utils import get_permitted_restricted_collections -from invenio.modules.search.cache import get_all_restricted_recids +from invenio.modules.collections.cache import get_all_restricted_recids from invenio.modules.records.access import check_user_can_view_record -from invenio.modules.search.cache import get_collection_reclist -from invenio.modules.search.cache import get_coll_i18nname +from invenio.modules.collections.cache import get_collection_reclist +from invenio.modules.collections.cache import get_coll_i18nname from invenio.modules.search.cache import get_field_i18nname from invenio.modules.indexer.models import IdxINDEX @@ -223,7 +223,7 @@ def get_coll_ancestors(coll): return coll_ancestors -from invenio.modules.search.cache import get_collection_allchildren +from invenio.modules.collections.cache import get_collection_allchildren def browse_pattern_phrases(req, colls, p, f, rg, ln=CFG_SITE_LANG): diff --git a/invenio/legacy/webalert/alert_engine.py b/invenio/legacy/webalert/alert_engine.py index 6396c88479..e3c1d409c1 100644 --- a/invenio/legacy/webalert/alert_engine.py +++ b/invenio/legacy/webalert/alert_engine.py @@ -54,7 +54,7 @@ CFG_EXTERNAL_COLLECTION_MAXRESULTS_ALERTS from invenio.legacy.websearch_external_collections.getter import HTTPAsyncPageGetter, async_download from invenio.legacy.websearch_external_collections.utils import get_collection_id -from invenio.modules.search.models import Collection +from invenio.modules.collections.models import Collection import invenio.legacy.template websearch_templates = invenio.legacy.template.load('websearch') diff --git a/invenio/legacy/webcomment/webinterface.py b/invenio/legacy/webcomment/webinterface.py index 619416d3a4..a777dabf49 100644 --- a/invenio/legacy/webcomment/webinterface.py +++ b/invenio/legacy/webcomment/webinterface.py @@ -91,7 +91,7 @@ stream_file, \ decompose_file, \ propose_next_docname -from invenio.modules.search.models import Collection +from invenio.modules.collections.models import Collection class WebInterfaceCommentsPages(WebInterfaceDirectory): """Defines the set of /comments pages.""" diff --git a/invenio/legacy/weblinkback/webinterface.py b/invenio/legacy/weblinkback/webinterface.py index 98b8d04e47..2774c1205e 100644 --- a/invenio/legacy/weblinkback/webinterface.py +++ b/invenio/legacy/weblinkback/webinterface.py @@ -45,7 +45,7 @@ from invenio.legacy.webpage import pageheaderonly, pagefooteronly from invenio.legacy.websearch.adminlib import get_detailed_page_tabs from invenio.modules.access.engine import acc_authorize_action -from invenio.modules.search.models import Collection +from invenio.modules.collections.models import Collection import invenio.legacy.template webstyle_templates = invenio.legacy.template.load('webstyle') diff --git a/invenio/legacy/websearch/scripts/webcoll.py b/invenio/legacy/websearch/scripts/webcoll.py deleted file mode 100644 index e9b8b618b8..0000000000 --- a/invenio/legacy/websearch/scripts/webcoll.py +++ /dev/null @@ -1,60 +0,0 @@ -# This file is part of Invenio. -# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 CERN. -# -# Invenio is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; either version 2 of the -# License, or (at your option) any later version. -# -# Invenio is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Invenio; if not, write to the Free Software Foundation, Inc., -# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. - -from invenio.base.factory import with_app_context - - -@with_app_context() -def main(): - """Main that construct all the bibtask.""" - from invenio.legacy.bibsched.bibtask import task_init - from invenio.legacy.websearch.webcoll import ( - task_submit_elaborate_specific_parameter, task_submit_check_options, - task_run_core, __revision__) - - task_init(authorization_action="runwebcoll", - authorization_msg="WebColl Task Submission", - description="""Description: - webcoll updates the collection cache (record universe for a - given collection plus web page elements) based on invenio.conf and DB - configuration parameters. If the collection name is passed as an argument, - only this collection's cache will be updated. If the recursive option is - set as well, the collection's descendants will also be updated.\n""", - help_specific_usage=" -c, --collection\t Update cache for the given " - "collection only. [all]\n" - " -r, --recursive\t Update cache for the given collection and all its\n" - "\t\t\t descendants (to be used in combination with -c). [no]\n" - " -q, --quick\t\t Skip webpage cache update for those collections whose\n" - "\t\t\t reclist was not changed. Note: if you use this option, it is advised\n" - "\t\t\t to schedule, e.g. a nightly 'webcoll --force'. [no]\n" - " -f, --force\t\t Force update even if cache is up to date. [no]\n" - " -p, --part\t\t Update only certain cache parts (1=reclist," - " 2=webpage). [both]\n" - " -l, --language\t Update pages in only certain language" - " (e.g. fr,it,...). [all]\n", - version=__revision__, - specific_params=("c:rqfp:l:", [ - "collection=", - "recursive", - "quick", - "force", - "part=", - "language=" - ]), - task_submit_elaborate_specific_parameter_fnc=task_submit_elaborate_specific_parameter, - task_submit_check_options_fnc=task_submit_check_options, - task_run_fnc=task_run_core) diff --git a/invenio/legacy/websearch/webcoll.py b/invenio/legacy/websearch/webcoll.py deleted file mode 100644 index a6a9e165bf..0000000000 --- a/invenio/legacy/websearch/webcoll.py +++ /dev/null @@ -1,1223 +0,0 @@ -# -*- coding: utf-8 -*- -# This file is part of Invenio. -# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 CERN. -# -# Invenio is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; either version 2 of the -# License, or (at your option) any later version. -# -# Invenio is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Invenio; if not, write to the Free Software Foundation, Inc., -# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. - -from __future__ import print_function - -"""Create Invenio collection cache.""" - -__revision__ = "$Id$" - -import calendar -import copy -import datetime -import sys -import cgi -import re -import os -import string -import time -from six.moves import cPickle - -from invenio.config import \ - CFG_CERN_SITE, \ - CFG_WEBSEARCH_INSTANT_BROWSE, \ - CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS, \ - CFG_WEBSEARCH_I18N_LATEST_ADDITIONS, \ - CFG_CACHEDIR, \ - CFG_SITE_LANG, \ - CFG_SITE_NAME, \ - CFG_SITE_LANGS, \ - CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, \ - CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS, \ - CFG_SCOAP3_SITE, \ - CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES -from invenio.base.i18n import gettext_set_language -from invenio.modules.sorter.engine import sort_records -from invenio.modules.records.recordext.functions.get_creation_date import get_creation_date -from invenio.legacy.search_engine import get_field_i18nname, collection_restricted_p, EM_REPOSITORY -from invenio.legacy.dbquery import run_sql, Error, get_table_update_time -from invenio.legacy.bibrank.record_sorter import get_bibrank_methods -from invenio.utils.date import convert_datestruct_to_dategui, strftime -from invenio.modules.search.api import SearchEngine -from invenio.modules.formatter import format_record -from invenio.utils.shell import mymkdir -from intbitset import intbitset -from invenio.legacy.websearch_external_collections import \ - external_collection_load_states, \ - dico_collection_external_searches, \ - external_collection_sort_engine_by_name -from invenio.legacy.bibsched.bibtask import task_init, task_get_option, task_set_option, \ - write_message, task_has_option, task_update_progress, task_set_task_param, \ - task_sleep_now_if_required -import invenio.legacy.template -websearch_templates = invenio.legacy.template.load('websearch') - -from invenio.legacy.websearch_external_collections.searcher import external_collections_dictionary -from invenio.legacy.websearch_external_collections.config import CFG_EXTERNAL_COLLECTION_TIMEOUT -from invenio.legacy.websearch_external_collections.config import CFG_HOSTED_COLLECTION_TIMEOUT_NBRECS - -from invenio.base.signals import webcoll_after_webpage_cache_update, \ - webcoll_after_reclist_cache_update - -# global vars -COLLECTION_HOUSE = {} # will hold collections we treat in this run of the program; a dict of {collname2, collobject1}, ... - -# CFG_CACHE_LAST_UPDATED_TIMESTAMP_TOLERANCE -- cache timestamp -# tolerance (in seconds), to account for the fact that an admin might -# accidentally happen to edit the collection definitions at exactly -# the same second when some webcoll process was about to be started. -# In order to be safe, let's put an exaggerated timestamp tolerance -# value such as 20 seconds: -CFG_CACHE_LAST_UPDATED_TIMESTAMP_TOLERANCE = 20 - -# CFG_CACHE_LAST_UPDATED_TIMESTAMP_FILE -- location of the cache -# timestamp file: -CFG_CACHE_LAST_UPDATED_TIMESTAMP_FILE = "%s/collections/last_updated" % CFG_CACHEDIR - -# CFG_CACHE_LAST_FAST_UPDATED_TIMESTAMP_FILE -- location of the cache -# timestamp file usef when running webcoll in the fast-mode. -CFG_CACHE_LAST_FAST_UPDATED_TIMESTAMP_FILE = "%s/collections/last_fast_updated" % CFG_CACHEDIR - - -def get_collection(colname): - """Return collection object from the collection house for given colname. - If does not exist, then create it.""" - if colname not in COLLECTION_HOUSE: - colobject = Collection(colname) - COLLECTION_HOUSE[colname] = colobject - return COLLECTION_HOUSE[colname] - -# auxiliary functions: -def is_selected(var, fld): - "Checks if the two are equal, and if yes, returns ' selected'. Useful for select boxes." - if var == fld: - return ' selected="selected"' - else: - return "" - -def get_field(recID, tag): - "Gets list of field 'tag' for the record with 'recID' system number." - - out = [] - digit = tag[0:2] - - bx = "bib%sx" % digit - bibx = "bibrec_bib%sx" % digit - query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag='%s'" \ - % (bx, bibx, recID, tag) - res = run_sql(query) - for row in res: - out.append(row[0]) - return out - -def check_nbrecs_for_all_external_collections(): - """Check if any of the external collections have changed their total number of records, aka nbrecs. - Return True if any of the total numbers of records have changed and False if they're all the same.""" - res = run_sql("SELECT name FROM collection WHERE dbquery LIKE 'hostedcollection:%';") - for row in res: - coll_name = row[0] - if (get_collection(coll_name)).check_nbrecs_for_external_collection(): - return True - return False - -class Collection: - "Holds the information on collections (id,name,dbquery)." - - def __init__(self, name=""): - "Creates collection instance by querying the DB configuration database about 'name'." - self.calculate_reclist_run_already = 0 # to speed things up without much refactoring - self.update_reclist_run_already = 0 # to speed things up without much refactoring - self.reclist_updated_since_start = 0 # to check if webpage cache need rebuilding - self.reclist_with_nonpublic_subcolls = intbitset() - # temporary counters for the number of records in hosted collections - self.nbrecs_tmp = None # number of records in a hosted collection - self.nbrecs_from_hosted_collections = 0 # total number of records from - # descendant hosted collections - if not name: - self.name = CFG_SITE_NAME # by default we are working on the home page - self.id = 1 - self.dbquery = None - self.nbrecs = None - self.reclist = intbitset() - self.old_reclist = intbitset() - self.reclist_updated_since_start = 1 - else: - self.name = name - try: - res = run_sql("""SELECT id,name,dbquery,nbrecs,reclist FROM collection - WHERE name=%s""", (name,)) - if res: - self.id = res[0][0] - self.name = res[0][1] - self.dbquery = res[0][2] - self.nbrecs = res[0][3] - try: - self.reclist = intbitset(res[0][4]) - except: - self.reclist = intbitset() - self.reclist_updated_since_start = 1 - else: # collection does not exist! - self.id = None - self.dbquery = None - self.nbrecs = None - self.reclist = intbitset() - self.reclist_updated_since_start = 1 - self.old_reclist = intbitset(self.reclist) - except Error as e: - print("Error %d: %s" % (e.args[0], e.args[1])) - sys.exit(1) - - def get_example_search_queries(self): - """Returns list of sample search queries for this collection. - """ - res = run_sql("""SELECT example.body FROM example - LEFT JOIN collection_example on example.id=collection_example.id_example - WHERE collection_example.id_collection=%s ORDER BY collection_example.score""", (self.id,)) - return [query[0] for query in res] - - def get_name(self, ln=CFG_SITE_LANG, name_type="ln", prolog="", epilog="", prolog_suffix=" ", epilog_suffix=""): - """Return nicely formatted collection name for language LN. - The NAME_TYPE may be 'ln' (=long name), 'sn' (=short name), etc.""" - out = prolog - i18name = "" - res = run_sql("SELECT value FROM collectionname WHERE id_collection=%s AND ln=%s AND type=%s", (self.id, ln, name_type)) - try: - i18name += res[0][0] - except IndexError: - pass - if i18name: - out += i18name - else: - out += self.name - out += epilog - return out - - def get_collectionbox_name(self, ln=CFG_SITE_LANG, box_type="r"): - """ - Return collection-specific labelling of 'Focus on' (regular - collection), 'Narrow by' (virtual collection) and 'Latest - addition' boxes. - - If translation for given language does not exist, use label - for CFG_SITE_LANG. If no custom label is defined for - CFG_SITE_LANG, return default label for the box. - - @param ln: the language of the label - @param box_type: can be 'r' (=Narrow by), 'v' (=Focus on), 'l' (=Latest additions) - """ - i18name = "" - res = run_sql("SELECT value FROM collectionboxname WHERE id_collection=%s AND ln=%s AND type=%s", (self.id, ln, box_type)) - try: - i18name = res[0][0] - except IndexError: - res = run_sql("SELECT value FROM collectionboxname WHERE id_collection=%s AND ln=%s AND type=%s", (self.id, CFG_SITE_LANG, box_type)) - try: - i18name = res[0][0] - except IndexError: - pass - - if not i18name: - # load the right message language - _ = gettext_set_language(ln) - if box_type == "v": - i18name = _('Focus on:') - elif box_type == "r": - if CFG_SCOAP3_SITE: - i18name = _('Narrow by publisher/journal:') - else: - i18name = _('Narrow by collection:') - elif box_type == "l": - i18name = _('Latest additions:') - - return i18name - - def get_ancestors(self): - "Returns list of ancestors of the current collection." - ancestors = [] - ancestors_ids = intbitset() - id_son = self.id - while 1: - query = "SELECT cc.id_dad,c.name FROM collection_collection AS cc, collection AS c "\ - "WHERE cc.id_son=%d AND c.id=cc.id_dad" % int(id_son) - res = run_sql(query, None, 1) - if res: - col_ancestor = get_collection(res[0][1]) - # looking for loops - if self.id in ancestors_ids: - write_message("Loop found in collection %s" % self.name, stream=sys.stderr) - raise OverflowError("Loop found in collection %s" % self.name) - else: - ancestors.append(col_ancestor) - ancestors_ids.add(col_ancestor.id) - id_son = res[0][0] - else: - break - ancestors.reverse() - return ancestors - - def restricted_p(self): - """Predicate to test if the collection is restricted or not. Return the contect of the - `restrited' column of the collection table (typically Apache group). Otherwise return - None if the collection is public.""" - - if collection_restricted_p(self.name): - return 1 - return None - - def get_sons(self, type='r'): - "Returns list of direct sons of type 'type' for the current collection." - sons = [] - id_dad = self.id - query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\ - "WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score ASC, c.name ASC" % (int(id_dad), type) - res = run_sql(query) - for row in res: - sons.append(get_collection(row[1])) - return sons - - def get_descendants(self, type='r'): - "Returns list of all descendants of type 'type' for the current collection." - descendants = [] - descendant_ids = intbitset() - id_dad = self.id - query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\ - "WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score ASC" % (int(id_dad), type) - res = run_sql(query) - for row in res: - col_desc = get_collection(row[1]) - # looking for loops - if self.id in descendant_ids: - write_message("Loop found in collection %s" % self.name, stream=sys.stderr) - raise OverflowError("Loop found in collection %s" % self.name) - else: - descendants.append(col_desc) - descendant_ids.add(col_desc.id) - tmp_descendants = col_desc.get_descendants() - for descendant in tmp_descendants: - descendant_ids.add(descendant.id) - descendants += tmp_descendants - return descendants - - def write_cache_file(self, filename='', filebody={}): - "Write a file inside collection cache." - # open file: - dirname = "%s/collections" % (CFG_CACHEDIR) - mymkdir(dirname) - fullfilename = dirname + "/%s.html" % filename - try: - os.umask(0o022) - f = open(fullfilename, "wb") - except IOError as v: - try: - (code, message) = v - except: - code = 0 - message = v - print("I/O Error: " + str(message) + " (" + str(code) + ")") - sys.exit(1) - # print user info: - write_message("... creating %s" % fullfilename, verbose=6) - # print page body: - cPickle.dump(filebody, f, cPickle.HIGHEST_PROTOCOL) - # close file: - f.close() - - def update_webpage_cache(self, lang): - """Create collection page header, navtrail, body (including left and right stripes) and footer, and - call write_cache_file() afterwards to update the collection webpage cache.""" - - return {} ## webpage cache update is not really needed in - ## Invenio-on-Flask, so let's return quickly here - ## for great speed-up benefit - ## precalculate latest additions for non-aggregate - ## collections (the info is ln and as independent) - if self.dbquery: - if CFG_WEBSEARCH_I18N_LATEST_ADDITIONS: - self.create_latest_additions_info(ln=lang) - else: - self.create_latest_additions_info() - - # load the right message language - _ = gettext_set_language(lang) - - # create dictionary with data - cache = {"te_portalbox" : self.create_portalbox(lang, 'te'), - "np_portalbox" : self.create_portalbox(lang, 'np'), - "ne_portalbox" : self.create_portalbox(lang, 'ne'), - "tp_portalbox" : self.create_portalbox(lang, "tp"), - "lt_portalbox" : self.create_portalbox(lang, "lt"), - "rt_portalbox" : self.create_portalbox(lang, "rt"), - "last_updated" : convert_datestruct_to_dategui(time.localtime(), - ln=lang)} - for aas in CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES: # do light, simple and advanced search pages: - cache["navtrail_%s" % aas] = self.create_navtrail_links(aas, lang) - cache["searchfor_%s" % aas] = self.create_searchfor(aas, lang) - cache["narrowsearch_%s" % aas] = self.create_narrowsearch(aas, lang, 'r') - cache["focuson_%s" % aas] = self.create_narrowsearch(aas, lang, "v")+ \ - self.create_external_collections_box(lang) - cache["instantbrowse_%s" % aas] = self.create_instant_browse(aas=aas, ln=lang) - # write cache file - self.write_cache_file("%s-ln=%s"%(self.name, lang), cache) - - return cache - - def create_navtrail_links(self, aas=CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG): - """Creates navigation trail links, i.e. links to collection - ancestors (except Home collection). If aas==1, then links to - Advanced Search interfaces; otherwise Simple Search. - """ - - dads = [] - for dad in self.get_ancestors(): - if dad.name != CFG_SITE_NAME: # exclude Home collection - dads.append((dad.name, dad.get_name(ln))) - - return websearch_templates.tmpl_navtrail_links( - aas=aas, ln=ln, dads=dads) - - - def create_portalbox(self, lang=CFG_SITE_LANG, position="rt"): - """Creates portalboxes of language CFG_SITE_LANG of the position POSITION by consulting DB configuration database. - The position may be: 'lt'='left top', 'rt'='right top', etc.""" - out = "" - query = "SELECT p.title,p.body FROM portalbox AS p, collection_portalbox AS cp "\ - " WHERE cp.id_collection=%d AND p.id=cp.id_portalbox AND cp.ln='%s' AND cp.position='%s' "\ - " ORDER BY cp.score DESC" % (self.id, lang, position) - res = run_sql(query) - for row in res: - title, body = row[0], row[1] - if title: - out += websearch_templates.tmpl_portalbox(title = title, - body = body) - else: - # no title specified, so print body ``as is'' only: - out += body - return out - - def create_narrowsearch(self, aas=CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG, type="r"): - """Creates list of collection descendants of type 'type' under title 'title'. - If aas==1, then links to Advanced Search interfaces; otherwise Simple Search. - Suitable for 'Narrow search' and 'Focus on' boxes.""" - - # get list of sons and analyse it - sons = self.get_sons(type) - - if not sons: - return '' - - # get descendents - descendants = self.get_descendants(type) - - grandsons = [] - if CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS: - # load grandsons for each son - for son in sons: - grandsons.append(son.get_sons()) - - # return "" - return websearch_templates.tmpl_narrowsearch( - aas = aas, - ln = ln, - type = type, - father = self, - has_grandchildren = len(descendants)>len(sons), - sons = sons, - display_grandsons = CFG_WEBSEARCH_NARROW_SEARCH_SHOW_GRANDSONS, - grandsons = grandsons - ) - - def create_external_collections_box(self, ln=CFG_SITE_LANG): - external_collection_load_states() - if self.id not in dico_collection_external_searches: - return "" - - engines_list = external_collection_sort_engine_by_name(dico_collection_external_searches[self.id]) - - return websearch_templates.tmpl_searchalso(ln, engines_list, self.id) - - def create_latest_additions_info(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, ln=CFG_SITE_LANG): - """ - Create info about latest additions that will be used for - create_instant_browse() later. - """ - self.latest_additions_info = [] - if self.nbrecs and self.reclist: - # firstly, get last 'rg' records: - recIDs = list(self.reclist) - of = 'hb' - # CERN hack begins: tweak latest additions for selected collections: - if CFG_CERN_SITE: - # alter recIDs list for some CERN collections: - this_year = time.strftime("%Y", time.localtime()) - if self.name in ['CERN Yellow Reports','Videos']: - last_year = str(int(this_year) - 1) - # detect recIDs only from this and past year: - recIDs = list(self.reclist & SearchEngine( - 'year:%s or year:%s' % (this_year, last_year) - ).search()) - # apply special filters: - if self.name in ['Videos']: - # select only videos with movies: - recIDs = list(intbitset(recIDs) & SearchEngine( - 'collection:"PUBLVIDEOMOVIE" -"Virtual Visit"' - ).search()) - of = 'hvp' - if self.name in ['General Talks', 'Academic Training Lectures', 'Summer Student Lectures']: - #select only the lectures with material - recIDs = list(self.reclist & SearchEngine( - '856:MediaArchive' - ).search()) - # sort some CERN collections specially: - if self.name in ['Videos', - 'Video Clips', - 'Video Movies', - 'Video News', - 'Video Rushes', - 'Webcast', - 'ATLAS Videos', - 'Restricted Video Movies', - 'Restricted Video Rushes', - 'LHC First Beam Videos', - 'CERN openlab Videos']: - recIDs = sort_records(recIDs, '269__c', 'a') - elif self.name in ['LHCb Talks']: - recIDs = sort_records(recIDs, 'reportnumber', 'a') - elif self.name in ['CERN Yellow Reports']: - recIDs = sort_records(recIDs, '084__a', 'a') - elif self.name in ['CERN Courier Issues', - 'CERN Courier Articles', - 'CERN Bulletin Issues', - 'CERN Bulletin Articles']: - recIDs = sort_records(recIDs, '773__y', 'a') - # CERN hack ends. - - total = len(recIDs) - to_display = min(rg, total) - - for idx in range(total-1, total-to_display-1, -1): - recid = recIDs[idx] - creation_date = get_creation_date(recid) or datetime.now() - self.latest_additions_info.append({'id': recid, - 'format': format_record(recid, of, ln=ln), - 'date': datetime.strptime(creation_date, "%Y-%m-%d
%H:%i")}) - return - - def create_instant_browse(self, rg=CFG_WEBSEARCH_INSTANT_BROWSE, aas=CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG): - "Searches database and produces list of last 'rg' records." - - if self.restricted_p(): - return websearch_templates.tmpl_box_restricted_content(ln = ln) - - if str(self.dbquery).startswith("hostedcollection:"): - return websearch_templates.tmpl_box_hosted_collection(ln = ln) - - if rg == 0: - # do not show latest additions box - return "" - - # CERN hack: do not display latest additions for some CERN collections: - if CFG_CERN_SITE and self.name in ['Periodicals', 'Electronic Journals', - 'Press Office Photo Selection', - 'Press Office Video Selection']: - return "" - - try: - self.latest_additions_info - latest_additions_info_p = True - except: - latest_additions_info_p = False - - if latest_additions_info_p: - passIDs = [] - for idx in range(0, min(len(self.latest_additions_info), rg)): - # CERN hack: display the records in a grid layout, so do not show the related links - if CFG_CERN_SITE and self.name in ['Videos']: - passIDs.append({'id': self.latest_additions_info[idx]['id'], - 'body': self.latest_additions_info[idx]['format'], - 'date': self.latest_additions_info[idx]['date']}) - else: - passIDs.append({'id': self.latest_additions_info[idx]['id'], - 'body': self.latest_additions_info[idx]['format'] + \ - websearch_templates.tmpl_record_links(recid=self.latest_additions_info[idx]['id'], - rm='citation', - ln=ln), - 'date': self.latest_additions_info[idx]['date']}) - - if self.nbrecs > rg: - url = websearch_templates.build_search_url( - cc=self.name, jrec=rg+1, ln=ln, aas=aas) - else: - url = "" - # CERN hack: display the records in a grid layout - if CFG_CERN_SITE and self.name in ['Videos']: - return websearch_templates.tmpl_instant_browse( - aas=aas, ln=ln, recids=passIDs, more_link=url, grid_layout=True, father=self) - - return websearch_templates.tmpl_instant_browse( - aas=aas, ln=ln, recids=passIDs, more_link=url, father=self) - - return websearch_templates.tmpl_box_no_records(ln=ln) - - def create_searchoptions(self): - "Produces 'Search options' portal box." - box = "" - query = """SELECT DISTINCT(cff.id_field),f.code,f.name FROM collection_field_fieldvalue AS cff, field AS f - WHERE cff.id_collection=%d AND cff.id_fieldvalue IS NOT NULL AND cff.id_field=f.id - ORDER BY cff.score DESC""" % self.id - res = run_sql(query) - if res: - for row in res: - field_id = row[0] - field_code = row[1] - field_name = row[2] - query_bis = """SELECT fv.value,fv.name FROM fieldvalue AS fv, collection_field_fieldvalue AS cff - WHERE cff.id_collection=%d AND cff.type='seo' AND cff.id_field=%d AND fv.id=cff.id_fieldvalue - ORDER BY cff.score_fieldvalue DESC, cff.score DESC, fv.name ASC""" % (self.id, field_id) - res_bis = run_sql(query_bis) - if res_bis: - values = [{'value' : '', 'text' : 'any' + ' ' + field_name}] # FIXME: internationalisation of "any" - for row_bis in res_bis: - values.append({'value' : cgi.escape(row_bis[0], 1), 'text' : row_bis[1]}) - - box += websearch_templates.tmpl_select( - fieldname = field_code, - values = values - ) - return box - - def create_sortoptions(self, ln=CFG_SITE_LANG): - """Produces 'Sort options' portal box.""" - - - # load the right message language - _ = gettext_set_language(ln) - - box = "" - query = """SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff - WHERE id_collection=%d AND cff.type='soo' AND cff.id_field=f.id - ORDER BY cff.score DESC, f.name ASC""" % self.id - values = [{'value' : '', 'text': "- %s -" % _("latest first")}] - res = run_sql(query) - if res: - for row in res: - values.append({'value' : row[0], 'text': get_field_i18nname(row[1], ln)}) - else: - for tmp in ('title', 'author', 'report number', 'year'): - values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)}) - - box = websearch_templates.tmpl_select( - fieldname = 'sf', - css_class = 'address', - values = values - ) - box += websearch_templates.tmpl_select( - fieldname = 'so', - css_class = 'address', - values = [ - {'value' : 'a' , 'text' : _("asc.")}, - {'value' : 'd' , 'text' : _("desc.")} - ] - ) - return box - - def create_rankoptions(self, ln=CFG_SITE_LANG): - "Produces 'Rank options' portal box." - - # load the right message language - _ = gettext_set_language(ln) - - values = [{'value' : '', 'text': "- %s %s -" % (string.lower(_("OR")), _("rank by"))}] - for (code, name) in get_bibrank_methods(self.id, ln): - values.append({'value' : code, 'text': name}) - box = websearch_templates.tmpl_select( - fieldname = 'rm', - css_class = 'address', - values = values - ) - return box - - def create_displayoptions(self, ln=CFG_SITE_LANG): - "Produces 'Display options' portal box." - - # load the right message language - _ = gettext_set_language(ln) - - values = [] - for i in ['10', '25', '50', '100', '250', '500']: - values.append({'value' : i, 'text' : i + ' ' + _("results")}) - - box = websearch_templates.tmpl_select( - fieldname = 'rg', - selected = str(CFG_WEBSEARCH_DEF_RECORDS_IN_GROUPS), - css_class = 'address', - values = values - ) - - if self.get_sons(): - box += websearch_templates.tmpl_select( - fieldname = 'sc', - css_class = 'address', - values = [ - {'value' : '1' , 'text' : CFG_SCOAP3_SITE and _("split by publisher/journal") or _("split by collection")}, - {'value' : '0' , 'text' : _("single list")} - ] - ) - return box - - def create_formatoptions(self, ln=CFG_SITE_LANG): - "Produces 'Output format options' portal box." - - # load the right message language - _ = gettext_set_language(ln) - - box = "" - values = [] - query = """SELECT f.code,f.name FROM format AS f, collection_format AS cf - WHERE cf.id_collection=%d AND cf.id_format=f.id AND f.visibility='1' - ORDER BY cf.score DESC, f.name ASC""" % self.id - res = run_sql(query) - if res: - for row in res: - values.append({'value' : row[0], 'text': row[1]}) - else: - values.append({'value' : 'hb', 'text' : "HTML %s" % _("brief")}) - box = websearch_templates.tmpl_select( - fieldname = 'of', - css_class = 'address', - values = values - ) - return box - - def create_searchwithin_selection_box(self, fieldname='f', value='', ln='en'): - """Produces 'search within' selection box for the current collection.""" - - - # get values - query = """SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff - WHERE cff.type='sew' AND cff.id_collection=%d AND cff.id_field=f.id - ORDER BY cff.score DESC, f.name ASC""" % self.id - res = run_sql(query) - values = [{'value' : '', 'text' : get_field_i18nname("any field", ln)}] - if res: - for row in res: - values.append({'value' : row[0], 'text' : get_field_i18nname(row[1], ln)}) - else: - if CFG_CERN_SITE: - for tmp in ['title', 'author', 'abstract', 'report number', 'year']: - values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)}) - else: - for tmp in ['title', 'author', 'abstract', 'keyword', 'report number', 'journal', 'year', 'fulltext', 'reference']: - values.append({'value' : tmp.replace(' ', ''), 'text' : get_field_i18nname(tmp, ln)}) - - return websearch_templates.tmpl_searchwithin_select( - fieldname = fieldname, - ln = ln, - selected = value, - values = values - ) - def create_searchexample(self): - "Produces search example(s) for the current collection." - out = "$collSearchExamples = getSearchExample(%d, $se);" % self.id - return out - - def create_searchfor(self, aas=CFG_WEBSEARCH_DEFAULT_SEARCH_INTERFACE, ln=CFG_SITE_LANG): - "Produces either Simple or Advanced 'Search for' box for the current collection." - if aas == 2: - return self.create_searchfor_addtosearch(ln) - elif aas == 1: - return self.create_searchfor_advanced(ln) - elif aas == 0: - return self.create_searchfor_simple(ln) - else: - return self.create_searchfor_light(ln) - - def create_searchfor_addtosearch(self, ln=CFG_SITE_LANG): - "Produces add-to-search 'Search for' box for the current collection." - - return websearch_templates.tmpl_searchfor_addtosearch( - ln=ln, - collection_id=self.name, - record_count=self.nbrecs, - searchwithin= self.create_searchwithin_selection_box(fieldname='f1', ln=ln), - ) - - def create_searchfor_light(self, ln=CFG_SITE_LANG): - "Produces light 'Search for' box for the current collection." - - return websearch_templates.tmpl_searchfor_light( - ln=ln, - collection_id = self.name, - collection_name=self.get_name(ln=ln), - record_count=self.nbrecs, - example_search_queries=self.get_example_search_queries(), - ) - - def create_searchfor_simple(self, ln=CFG_SITE_LANG): - "Produces simple 'Search for' box for the current collection." - - return websearch_templates.tmpl_searchfor_simple( - ln=ln, - collection_id = self.name, - collection_name=self.get_name(ln=ln), - record_count=self.nbrecs, - middle_option = self.create_searchwithin_selection_box(ln=ln), - ) - - def create_searchfor_advanced(self, ln=CFG_SITE_LANG): - "Produces advanced 'Search for' box for the current collection." - - return websearch_templates.tmpl_searchfor_advanced( - ln = ln, - collection_id = self.name, - collection_name=self.get_name(ln=ln), - record_count=self.nbrecs, - - middle_option_1 = self.create_searchwithin_selection_box('f1', ln=ln), - middle_option_2 = self.create_searchwithin_selection_box('f2', ln=ln), - middle_option_3 = self.create_searchwithin_selection_box('f3', ln=ln), - - searchoptions = self.create_searchoptions(), - sortoptions = self.create_sortoptions(ln), - rankoptions = self.create_rankoptions(ln), - displayoptions = self.create_displayoptions(ln), - formatoptions = self.create_formatoptions(ln) - ) - - def calculate_reclist(self): - """ - Calculate, set and return the (reclist, - reclist_with_nonpublic_subcolls, - nbrecs_from_hosted_collections) - tuple for the given collection.""" - - if str(self.dbquery).startswith("hostedcollection:"): - # we don't normally use this function to calculate the reclist - # for hosted collections. In case we do, recursively for a regular - # ancestor collection, then quickly return the object attributes. - return (self.reclist, - self.reclist_with_nonpublic_subcolls, - self.nbrecs) - - if self.calculate_reclist_run_already: - # do we really have to recalculate? If not, - # then return the object attributes - return (self.reclist, - self.reclist_with_nonpublic_subcolls, - self.nbrecs_from_hosted_collections) - - write_message("... calculating reclist of %s" % self.name, verbose=6) - - reclist = intbitset() # will hold results for public sons only; good for storing into DB - reclist_with_nonpublic_subcolls = intbitset() # will hold results for both public and nonpublic sons; good for deducing total - # number of documents - nbrecs_from_hosted_collections = 0 # will hold the total number of records from descendant hosted collections - - if not self.dbquery: - # A - collection does not have dbquery, so query recursively all its sons - # that are either non-restricted or that have the same restriction rules - for coll in self.get_sons(): - coll_reclist,\ - coll_reclist_with_nonpublic_subcolls,\ - coll_nbrecs_from_hosted_collection = coll.calculate_reclist() - - if ((coll.restricted_p() is None) or - (coll.restricted_p() == self.restricted_p())): - # add this reclist ``for real'' only if it is public - reclist.union_update(coll_reclist) - reclist_with_nonpublic_subcolls.union_update(coll_reclist_with_nonpublic_subcolls) - - # increment the total number of records from descendant hosted collections - nbrecs_from_hosted_collections += coll_nbrecs_from_hosted_collection - - else: - # B - collection does have dbquery, so compute it: - # (note: explicitly remove DELETED records) - if CFG_CERN_SITE: - reclist = SearchEngine( - self.dbquery + ' -980__:"DELETED" -980__:"DUMMY"' - ).search() - else: - reclist = SearchEngine( - self.dbquery + ' -980__:"DELETED"' - ).search() - reclist_with_nonpublic_subcolls = copy.deepcopy(reclist) - - # store the results: - self.nbrecs_from_hosted_collections = nbrecs_from_hosted_collections - self.nbrecs = len(reclist_with_nonpublic_subcolls) + \ - nbrecs_from_hosted_collections - self.reclist = reclist - self.reclist_with_nonpublic_subcolls = reclist_with_nonpublic_subcolls - # last but not least, update the speed-up flag: - self.calculate_reclist_run_already = 1 - # return the two sets, as well as - # the total number of records from descendant hosted collections: - return (self.reclist, - self.reclist_with_nonpublic_subcolls, - self.nbrecs_from_hosted_collections) - - def calculate_nbrecs_for_external_collection(self, timeout=CFG_EXTERNAL_COLLECTION_TIMEOUT): - """Calculate the total number of records, aka nbrecs, for given external collection.""" - #if self.calculate_reclist_run_already: - # do we have to recalculate? - #return self.nbrecs - #write_message("... calculating nbrecs of external collection %s" % self.name, verbose=6) - if self.name in external_collections_dictionary: - engine = external_collections_dictionary[self.name] - if engine.parser: - self.nbrecs_tmp = engine.parser.parse_nbrecs(timeout) - if self.nbrecs_tmp >= 0: return self.nbrecs_tmp - # the parse_nbrecs() function returns negative values for some specific cases - # maybe we can handle these specific cases, some warnings or something - # for now the total number of records remains silently the same - else: return self.nbrecs - else: write_message("External collection %s does not have a parser!" % self.name, verbose=6) - else: write_message("External collection %s not found!" % self.name, verbose=6) - return 0 - # last but not least, update the speed-up flag: - #self.calculate_reclist_run_already = 1 - - def check_nbrecs_for_external_collection(self): - """Check if the external collections has changed its total number of records, aka nbrecs. - Rerurns True if the total number of records has changed and False if it's the same""" - - write_message("*** self.nbrecs = %s / self.cal...ion = %s ***" % (str(self.nbrecs), str(self.calculate_nbrecs_for_external_collection())), verbose=6) - write_message("*** self.nbrecs != self.cal...ion = %s ***" % (str(self.nbrecs != self.calculate_nbrecs_for_external_collection()),), verbose=6) - return self.nbrecs != self.calculate_nbrecs_for_external_collection(CFG_HOSTED_COLLECTION_TIMEOUT_NBRECS) - - def set_nbrecs_for_external_collection(self): - """Set this external collection's total number of records, aka nbrecs""" - - if self.calculate_reclist_run_already: - # do we have to recalculate? - return - write_message("... calculating nbrecs of external collection %s" % self.name, verbose=6) - if self.nbrecs_tmp: - self.nbrecs = self.nbrecs_tmp - else: - self.nbrecs = self.calculate_nbrecs_for_external_collection(CFG_HOSTED_COLLECTION_TIMEOUT_NBRECS) - # last but not least, update the speed-up flag: - self.calculate_reclist_run_already = 1 - - def get_added_records(self): - """Return new records added since last run.""" - return self.reclist - self.old_reclist - - def update_reclist(self): - "Update the record universe for given collection; nbrecs, reclist of the collection table." - if self.update_reclist_run_already: - # do we have to reupdate? - return 0 - write_message("... updating reclist of %s (%s recs)" % (self.name, self.nbrecs), verbose=6) - sys.stdout.flush() - try: - ## In principle we could skip this update if old_reclist==reclist - ## however we just update it here in case of race-conditions. - run_sql("UPDATE collection SET nbrecs=%s, reclist=%s WHERE id=%s", - (self.nbrecs, self.reclist.fastdump(), self.id)) - if self.old_reclist != self.reclist: - self.reclist_updated_since_start = 1 - else: - write_message("... no changes in reclist detected", verbose=6) - except Error as e: - print("Database Query Error %d: %s." % (e.args[0], e.args[1])) - sys.exit(1) - # last but not least, update the speed-up flag: - self.update_reclist_run_already = 1 - return 0 - -def perform_display_collection(colID, colname, aas, ln, em, show_help_boxes): - """Returns the data needed to display a collection page - The arguments are as follows: - colID - id of the collection to display - colname - name of the collection to display - aas - 0 if simple search, 1 if advanced search - ln - language of the page - em - code to display just part of the page - show_help_boxes - whether to show the help boxes or not""" - # check and update cache if necessary - cachedfile = open("%s/collections/%s-ln=%s.html" % - (CFG_CACHEDIR, colname, ln), "rb") - try: - data = cPickle.load(cachedfile) - except ValueError: - data = get_collection(colname).update_webpage_cache(ln) - cachedfile.close() - # check em value to return just part of the page - if em != "": - if EM_REPOSITORY["search_box"] not in em: - data["searchfor_%s" % aas] = "" - if EM_REPOSITORY["see_also_box"] not in em: - data["focuson_%s" % aas] = "" - if EM_REPOSITORY["all_portalboxes"] not in em: - if EM_REPOSITORY["te_portalbox"] not in em: - data["te_portalbox"] = "" - if EM_REPOSITORY["np_portalbox"] not in em: - data["np_portalbox"] = "" - if EM_REPOSITORY["ne_portalbox"] not in em: - data["ne_portalbox"] = "" - if EM_REPOSITORY["tp_portalbox"] not in em: - data["tp_portalbox"] = "" - if EM_REPOSITORY["lt_portalbox"] not in em: - data["lt_portalbox"] = "" - if EM_REPOSITORY["rt_portalbox"] not in em: - data["rt_portalbox"] = "" - c_body = websearch_templates.tmpl_webcoll_body(ln, colID, data.get("te_portalbox", ""), - data.get("searchfor_%s"%aas,''), data.get("np_portalbox", ''), data.get("narrowsearch_%s"%aas, ''), - data.get("focuson_%s"%aas, ''), data.get("instantbrowse_%s"%aas, ''), data.get("ne_portalbox", ''), - em=="" or EM_REPOSITORY["body"] in em) - if show_help_boxes <= 0: - data["rt_portalbox"] = "" - return (c_body, data.get("navtrail_%s"%aas, ''), data.get("lt_portalbox", ''), data.get("rt_portalbox", ''), - data.get("tp_portalbox", ''), data.get("te_portalbox", ''), data.get("last_updated", '')) - -def get_datetime(var, format_string="%Y-%m-%d %H:%M:%S"): - """Returns a date string according to the format string. - It can handle normal date strings and shifts with respect - to now.""" - date = time.time() - shift_re = re.compile("([-\+]{0,1})([\d]+)([dhms])") - factors = {"d":24*3600, "h":3600, "m":60, "s":1} - m = shift_re.match(var) - if m: - sign = m.groups()[0] == "-" and -1 or 1 - factor = factors[m.groups()[2]] - value = float(m.groups()[1]) - date = time.localtime(date + sign * factor * value) - date = strftime(format_string, date) - else: - date = time.strptime(var, format_string) - date = strftime(format_string, date) - return date - -def get_current_time_timestamp(): - """Return timestamp corresponding to the current time.""" - return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) - -def compare_timestamps_with_tolerance(timestamp1, - timestamp2, - tolerance=0): - """Compare two timestamps TIMESTAMP1 and TIMESTAMP2, of the form - '2005-03-31 17:37:26'. Optionally receives a TOLERANCE argument - (in seconds). Return -1 if TIMESTAMP1 is less than TIMESTAMP2 - minus TOLERANCE, 0 if they are equal within TOLERANCE limit, - and 1 if TIMESTAMP1 is greater than TIMESTAMP2 plus TOLERANCE. - """ - # remove any trailing .00 in timestamps: - timestamp1 = re.sub(r'\.[0-9]+$', '', timestamp1) - timestamp2 = re.sub(r'\.[0-9]+$', '', timestamp2) - # first convert timestamps to Unix epoch seconds: - timestamp1_seconds = calendar.timegm(time.strptime(timestamp1, "%Y-%m-%d %H:%M:%S")) - timestamp2_seconds = calendar.timegm(time.strptime(timestamp2, "%Y-%m-%d %H:%M:%S")) - # now compare them: - if timestamp1_seconds < timestamp2_seconds - tolerance: - return -1 - elif timestamp1_seconds > timestamp2_seconds + tolerance: - return 1 - else: - return 0 - -def get_database_last_updated_timestamp(): - """Return last updated timestamp for collection-related and - record-related database tables. - """ - database_tables_timestamps = [] - database_tables_timestamps.append(get_table_update_time('bibrec')) - ## In INSPIRE bibfmt is on innodb and there is not such configuration - bibfmt_last_update = run_sql("SELECT max(last_updated) FROM bibfmt") - if bibfmt_last_update and bibfmt_last_update[0][0]: - database_tables_timestamps.append(str(bibfmt_last_update[0][0])) - try: - database_tables_timestamps.append(get_table_update_time('idxWORD%')) - except ValueError: - # There are no indexes in the database. That's OK. - pass - database_tables_timestamps.append(get_table_update_time('collection%')) - database_tables_timestamps.append(get_table_update_time('portalbox')) - database_tables_timestamps.append(get_table_update_time('field%')) - database_tables_timestamps.append(get_table_update_time('format%')) - database_tables_timestamps.append(get_table_update_time('rnkMETHODNAME')) - database_tables_timestamps.append(get_table_update_time('accROLE_accACTION_accARGUMENT', run_on_slave=True)) - return max(database_tables_timestamps) - -def get_cache_last_updated_timestamp(): - """Return last updated cache timestamp.""" - try: - f = open(CFG_CACHE_LAST_UPDATED_TIMESTAMP_FILE, "r") - except: - return "1970-01-01 00:00:00" - timestamp = f.read() - f.close() - - # Remove trailing newlines and whitespace. - timestamp = timestamp.strip() - return timestamp or "1970-01-01 00:00:00" - -def set_cache_last_updated_timestamp(timestamp): - """Set last updated cache timestamp to TIMESTAMP.""" - try: - with open(CFG_CACHE_LAST_UPDATED_TIMESTAMP_FILE, "w") as f: - f.write(timestamp) - except: - # FIXME: do something here - pass - return timestamp - -def task_submit_elaborate_specific_parameter(key, value, opts, args): - """ Given the string key it checks it's meaning, eventually using the value. - Usually it fills some key in the options dict. - It must return True if it has elaborated the key, False, if it doesn't - know that key. - eg: - if key in ['-n', '--number']: - self.options['number'] = value - return True - return False - """ - if key in ("-c", "--collection"): - task_set_option("collection", value) - elif key in ("-r", "--recursive"): - task_set_option("recursive", 1) - elif key in ("-f", "--force"): - task_set_option("force", 1) - elif key in ("-q", "--quick"): - task_set_option("quick", 1) - elif key in ("-p", "--part"): - task_set_option("part", int(value)) - elif key in ("-l", "--language"): - languages = task_get_option("language", []) - languages += value.split(',') - for ln in languages: - if ln not in CFG_SITE_LANGS: - print('ERROR: "%s" is not a recognized language code' % ln) - return False - task_set_option("language", languages) - else: - return False - return True - -def task_submit_check_options(): - if task_has_option('collection'): - coll = get_collection(task_get_option("collection")) - if coll.id is None: - print('ERROR: Collection "%s" does not exist' % coll.name) - return False - return True - -def task_run_core(): - """ Reimplement to add the body of the task.""" -# -# ------->--->time--->------> -# (-1) | ( 0) | ( 1) -# | | | -# [T.db] | [T.fc] | [T.db] -# | | | -# |<-tol|tol->| -# -# the above is the compare_timestamps_with_tolerance result "diagram" -# [T.db] stands fore the database timestamp and [T.fc] for the file cache timestamp -# ( -1, 0, 1) stand for the returned value -# tol stands for the tolerance in seconds -# -# When a record has been added or deleted from one of the collections the T.db becomes greater that the T.fc -# and when webcoll runs it is fully ran. It recalculates the reclists and nbrecs, and since it updates the -# collections db table it also updates the T.db. The T.fc is set as the moment the task started running thus -# slightly before the T.db (practically the time distance between the start of the task and the last call of -# update_reclist). Therefore when webcoll runs again, and even if no database changes have taken place in the -# meanwhile, it fully runs (because compare_timestamps_with_tolerance returns 0). This time though, and if -# no databases changes have taken place, the T.db remains the same while T.fc is updated and as a result if -# webcoll runs again it will not be fully ran -# - task_run_start_timestamp = get_current_time_timestamp() - colls = [] - params = {} - task_set_task_param("post_process_params", params) - # decide whether we need to run or not, by comparing last updated timestamps: - write_message("Database timestamp is %s." % get_database_last_updated_timestamp(), verbose=3) - write_message("Collection cache timestamp is %s." % get_cache_last_updated_timestamp(), verbose=3) - if task_has_option("part"): - write_message("Running cache update part %s only." % task_get_option("part"), verbose=3) - if check_nbrecs_for_all_external_collections() or task_has_option("force") or \ - compare_timestamps_with_tolerance(get_database_last_updated_timestamp(), - get_cache_last_updated_timestamp(), - CFG_CACHE_LAST_UPDATED_TIMESTAMP_TOLERANCE) >= 0: - ## either forced update was requested or cache is not up to date, so recreate it: - # firstly, decide which collections to do: - if task_has_option("collection"): - coll = get_collection(task_get_option("collection")) - colls.append(coll) - if task_has_option("recursive"): - r_type_descendants = coll.get_descendants(type='r') - colls += r_type_descendants - v_type_descendants = coll.get_descendants(type='v') - colls += v_type_descendants - else: - res = run_sql("SELECT name FROM collection ORDER BY id") - for row in res: - colls.append(get_collection(row[0])) - # secondly, update collection reclist cache: - if task_get_option('part', 1) == 1: - all_recids_added = intbitset() - i = 0 - for coll in colls: - i += 1 - write_message("%s / reclist cache update" % coll.name) - if str(coll.dbquery).startswith("hostedcollection:"): - coll.set_nbrecs_for_external_collection() - else: - coll.calculate_reclist() - coll.update_reclist() - all_recids_added.update(coll.get_added_records()) - task_update_progress("Part 1/2: done %d/%d" % (i, len(colls))) - task_sleep_now_if_required(can_stop_too=True) - webcoll_after_reclist_cache_update.send('webcoll', collections=colls) - params.update({'recids': list(all_recids_added)}) - # thirdly, update collection webpage cache: - if task_get_option("part", 2) == 2: - # Updates cache only for chosen languages or for all available ones if none was chosen - languages = task_get_option("language", CFG_SITE_LANGS) - write_message("Cache update for the following languages: %s" % str(languages), verbose=3) - i = 0 - for coll in colls: - i += 1 - if coll.reclist_updated_since_start or task_has_option("collection") or task_get_option("force") or not task_get_option("quick"): - write_message("%s / webpage cache update" % coll.name) - for lang in languages: - coll.update_webpage_cache(lang) - webcoll_after_webpage_cache_update.send(coll.name, collection=coll, lang=lang) - else: - write_message("%s / webpage cache seems not to need an update and --quick was used" % coll.name, verbose=2) - task_update_progress("Part 2/2: done %d/%d" % (i, len(colls))) - task_sleep_now_if_required(can_stop_too=True) - - # finally update the cache last updated timestamp: - # (but only when all collections were updated, not when only - # some of them were forced-updated as per admin's demand) - if not task_has_option("collection"): - set_cache_last_updated_timestamp(task_run_start_timestamp) - write_message("Collection cache timestamp is set to %s." % get_cache_last_updated_timestamp(), verbose=3) - task_set_task_param("post_process_params", params) - else: - ## cache up to date, we don't have to run - write_message("Collection cache is up to date, no need to run.") - ## we are done: - return True - -### okay, here we go: -if __name__ == '__main__': - main() diff --git a/invenio/legacy/websearch/webinterface.py b/invenio/legacy/websearch/webinterface.py index 65ec21db41..afb0d7d56e 100644 --- a/invenio/legacy/websearch/webinterface.py +++ b/invenio/legacy/websearch/webinterface.py @@ -78,16 +78,15 @@ perform_request_search, \ restricted_collection_cache, \ EM_REPOSITORY -from invenio.modules.search.models import Collection -from invenio.legacy.websearch.webcoll import perform_display_collection +from invenio.modules.collections.models import Collection from invenio.legacy.bibrecord import get_fieldvalues, \ get_fieldvalues_alephseq_like from invenio.modules.access.engine import acc_authorize_action from invenio.modules.access.local_config import VIEWRESTRCOLL from invenio.modules.access.mailcookie import mail_cookie_create_authorize_action +from invenio.modules.collections.cache import get_collection_reclist from invenio.modules.formatter import format_records from invenio.modules.formatter.engine import get_output_formats -from invenio.legacy.websearch.webcoll import get_collection from intbitset import intbitset from invenio.legacy.bibupload.engine import find_record_from_sysno from invenio.legacy.bibrank.citation_searcher import get_cited_by_list @@ -447,7 +446,7 @@ def __call__(self, req, form): for collname in restricted_collection_cache.cache: (auth_code, auth_msg) = acc_authorize_action(user_info, VIEWRESTRCOLL, collection=collname) if auth_code and user_info['email'] == 'guest': - coll_recids = get_collection(collname).reclist + coll_recids = get_collection_reclist(collname) if coll_recids & recids: cookie = mail_cookie_create_authorize_action(VIEWRESTRCOLL, {'collection' : collname}) target = CFG_SITE_SECURE_URL + '/youraccount/login' + \ @@ -827,82 +826,8 @@ def display_collection(req, c, aas, verbose, ln, em=""): req=req, navmenuid='search') - if normalised_name != c: - redirect_to_url(req, normalised_name, apache.HTTP_MOVED_PERMANENTLY) - - # start display: - req.content_type = "text/html" - req.send_http_header() - - c_body, c_navtrail, c_portalbox_lt, c_portalbox_rt, c_portalbox_tp, c_portalbox_te, \ - c_last_updated = perform_display_collection(colID, c, aas, ln, em, - user_preferences.get('websearch_helpbox', 1)) - - if em == "" or EM_REPOSITORY["body"] in em: - try: - title = get_coll_i18nname(c, ln) - except: - title = "" - else: - title = "" - show_title_p = True - body_css_classes = [] - if c == CFG_SITE_NAME: - # Do not display title on home collection - show_title_p = False - body_css_classes.append('home') - - if len(collection_reclist_cache.cache.keys()) == 1: - # if there is only one collection defined, do not print its - # title on the page as it would be displayed repetitively. - show_title_p = False - - if aas == -1: - show_title_p = False - - if CFG_INSPIRE_SITE == 1: - # INSPIRE should never show title, but instead use css to - # style collections - show_title_p = False - body_css_classes.append(nmtoken_from_string(c)) - - # RSS: - rssurl = CFG_SITE_URL + '/rss' - rssurl_params = [] - if c != CFG_SITE_NAME: - rssurl_params.append('cc=' + quote(c)) - if ln != CFG_SITE_LANG and \ - c in CFG_WEBSEARCH_RSS_I18N_COLLECTIONS: - rssurl_params.append('ln=' + ln) - - if rssurl_params: - rssurl += '?' + '&'.join(rssurl_params) - - if 'hb' in CFG_WEBSEARCH_USE_MATHJAX_FOR_FORMATS: - metaheaderadd = get_mathjax_header(req.is_https()) - else: - metaheaderadd = '' - - return page(title=title, - body=c_body, - navtrail=c_navtrail, - description="%s - %s" % (CFG_SITE_NAME, c), - keywords="%s, %s" % (CFG_SITE_NAME, c), - metaheaderadd=metaheaderadd, - uid=uid, - language=ln, - req=req, - cdspageboxlefttopadd=c_portalbox_lt, - cdspageboxrighttopadd=c_portalbox_rt, - titleprologue=c_portalbox_tp, - titleepilogue=c_portalbox_te, - lastupdated=c_last_updated, - navmenuid='search', - rssurl=rssurl, - body_css_classes=body_css_classes, - show_title_p=show_title_p, - show_header=em == "" or EM_REPOSITORY["header"] in em, - show_footer=em == "" or EM_REPOSITORY["footer"] in em) + from flask import redirect, url_for + return redirect(url_for('collections.collection', name=collection.name)) def resolve_doi(req, doi, ln=CFG_SITE_LANG, verbose=0): diff --git a/invenio/legacy/webstat/engine.py b/invenio/legacy/webstat/engine.py index 7648435370..683e1b7de9 100644 --- a/invenio/legacy/webstat/engine.py +++ b/invenio/legacy/webstat/engine.py @@ -51,7 +51,7 @@ book_information_from_MARC from invenio.legacy.bibcirculation.db_layer import get_id_bibrec, \ get_borrower_data -from invenio.legacy.websearch.webcoll import CFG_CACHE_LAST_UPDATED_TIMESTAMP_FILE +CFG_CACHE_LAST_UPDATED_TIMESTAMP_FILE = None from invenio.utils.date import convert_datetext_to_datestruct, convert_datestruct_to_dategui from invenio.legacy.bibsched.bibtask import get_modified_records_since diff --git a/invenio/legacy/webstat/templates.py b/invenio/legacy/webstat/templates.py index 23034991e9..99fe06d766 100644 --- a/invenio/legacy/webstat/templates.py +++ b/invenio/legacy/webstat/templates.py @@ -312,7 +312,7 @@ def tmpl_collection_stats_main_list(self, ln=CFG_SITE_LANG): """ out = """

Collections stats