collections: initial release

* NOTE adds new calculated field '_collections' to records from which the 'collection' index is created. (closes inveniosoftware#2638) * NOTE collection reclist is not populated anymore. Use collection phrase index using query matcher based on record data, hence no second order operator will work in collection query definition. Signed-off-by: Jiri Kuncar <[email protected]>
jirikuncar · Jan 27, 2015 · c9b9a93 · c9b9a93
1 parent 7fb880d
commit c9b9a93
Show file tree

Hide file tree

Showing 72 changed files with 1,600 additions and 2,699 deletions.
diff --git a/invenio/base/scripts/database.py b/invenio/base/scripts/database.py
@@ -207,7 +207,7 @@ def cfv_after_create(target, connection, **kw):
         run_sql('ALTER TABLE collection_field_fieldvalue CHANGE id_fieldvalue id_fieldvalue mediumint(9) unsigned')
         #print(run_sql('SHOW CREATE TABLE collection_field_fieldvalue'))
 
-    from invenio.modules.search.models import CollectionFieldFieldvalue
+    from invenio.modules.collections.models import CollectionFieldFieldvalue
     event.listen(CollectionFieldFieldvalue.__table__, "after_create", cfv_after_create)
 
     tables = db.metadata.sorted_tables

diff --git a/invenio/base/scripts/demosite.py b/invenio/base/scripts/demosite.py
@@ -31,6 +31,8 @@
 import pkg_resources
 import sys
 
+from itertools import count
+
 from invenio.ext.script import Manager
 
 manager = Manager(usage=__doc__)
@@ -102,23 +104,22 @@ def populate(packages=[], default_data=True, files=None,
                 print("ERROR: failed execution of", cmd)
                 sys.exit(1)
 
+    i = count(1).next
     for cmd in ["bin/bibdocfile --textify --with-ocr --recid 97",
                 "bin/bibdocfile --textify --all",
                 "bin/bibindex -u admin",
-                "bin/bibindex %d" % (job_id + 1,),
+                "bin/bibindex %d" % (job_id + i(),),
                 "bin/bibindex -u admin -w global",
-                "bin/bibindex %d" % (job_id + 2,),
+                "bin/bibindex %d" % (job_id + i(),),
                 "bin/bibreformat -u admin -o HB",
-                "bin/bibreformat %d" % (job_id + 3,),
-                "bin/webcoll -u admin",
-                "bin/webcoll %d" % (job_id + 4,),
+                "bin/bibreformat %d" % (job_id + i(),),
                 "bin/bibrank -u admin",
-                "bin/bibrank %d" % (job_id + 5,),
+                "bin/bibrank %d" % (job_id + i(),),
                 "bin/bibsort -u admin -R",
-                "bin/bibsort %d" % (job_id + 6,),
+                "bin/bibsort %d" % (job_id + i(),),
                 "bin/oairepositoryupdater -u admin",
-                "bin/oairepositoryupdater %d" % (job_id + 7,),
-                "bin/bibupload %d" % (job_id + 8,)]:
+                "bin/oairepositoryupdater %d" % (job_id + i(),),
+                "bin/bibupload %d" % (job_id + i(),)]:
         cmd = os.path.join(CFG_PREFIX, cmd)
         if os.system(cmd):
             print("ERROR: failed execution of", cmd)

diff --git a/invenio/base/templates/footer_base.html b/invenio/base/templates/footer_base.html
@@ -31,7 +31,7 @@
     <div class="col-md-6">
 {%- block footer_credits %}
       {{ config["CFG_SITE_NAME_INTL"][g.ln] }}
-      &nbsp;::&nbsp;<a class="footer" href="{{ url_for('search.index') }}">
+      &nbsp;::&nbsp;<a class="footer" href="{{ url_for('collections.index') }}">
         {{- _("Search") -}}
       </a>&nbsp;::&nbsp;<a class="footer" href="{{ url_for('webdeposit.index') }}">
         {{- _("Deposit") -}}

diff --git a/invenio/base/templates/header_base.html b/invenio/base/templates/header_base.html
@@ -33,7 +33,7 @@
           <span class="icon-bar"></span>
           <span class="icon-bar"></span>
         </button>
-        <a class="navbar-brand" href="{{ url_for('search.index') }}">
+        <a class="navbar-brand" href="{{ url_for('collections.index') }}">
           <img src="{{ url_for('static', filename='img/logo_white.png') }}" alt="{{ config.CFG_SITE_NAME_INTL[g.ln] }}" />
       </a>
       </div>

diff --git a/invenio/ext/legacy/__init__.py b/invenio/ext/legacy/__init__.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
+##
 ## This file is part of Invenio.
-## Copyright (C) 2011, 2012, 2013, 2014 CERN.
+## Copyright (C) 2011, 2012, 2013, 2014, 2015 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
@@ -23,12 +24,12 @@
 import os
 import sys
 
-## Import the remote debugger as a first thing, if allowed
-#FIXME enable remote_debugger when invenio.config is ready
-#try:
-#    from invenio.utils import remote_debugger
-#except:
-#    remote_debugger = None
+# Import the remote debugger as a first thing, if allowed
+# FIXME enable remote_debugger when invenio.config is ready
+# try:
+#     from invenio.utils import remote_debugger
+# except:
+#     remote_debugger = None
 
 from werkzeug.exceptions import HTTPException
 from werkzeug.wrappers import BaseResponse
@@ -57,10 +58,8 @@ def cli_cmd_reset(sender, yes_i_know=False, drop=True, **kwargs):
     # cli_cmd_reset_fieldnames(conf)
 
     for cmd in ["%s/bin/webaccessadmin -u admin -c -a -D" % CFG_PREFIX,
-                "%s/bin/webcoll -u admin" % CFG_PREFIX,
-                "%s/bin/webcoll 1" % CFG_PREFIX,
                 "%s/bin/bibsort -u admin --load-config" % CFG_PREFIX,
-                "%s/bin/bibsort 2" % CFG_PREFIX, ]:
+                "%s/bin/bibsort 1" % CFG_PREFIX, ]:
         if os.system(cmd):
             print("ERROR: failed execution of", cmd)
             sys.exit(1)

diff --git a/invenio/ext/sqlalchemy/utils.py b/invenio/ext/sqlalchemy/utils.py
@@ -41,6 +41,11 @@ def save(self):
 from sqlalchemy.exc import OperationalError
 from sqlalchemy.ext.declarative import declared_attr
 from sqlalchemy.orm import class_mapper, properties
+from sqlalchemy.orm.collections import (
+    InstrumentedList,
+    attribute_mapped_collection,
+    collection,
+)
 
 first_cap_re = re.compile('(.)([A-Z][a-z]+)')
 all_cap_re = re.compile('([a-z0-9])([A-Z])')
@@ -258,3 +263,102 @@ def test_sqla_utf8_chain():
     table.drop(bind=db.engine)
 
     print(" [OK]")
+
+
+class IntbitsetPickle(object):
+
+    """Pickle implementation for intbitset."""
+
+    def dumps(self, obj, protocol=None):
+        """Dump intbitset to byte stream."""
+        if obj is not None:
+            return obj.fastdump()
+        return intbitset([]).fastdump()
+
+    def loads(self, obj):
+        """Load byte stream to intbitset."""
+        try:
+            return intbitset(obj)
+        except:
+            return intbitset()
+
+
+def IntbitsetCmp(x, y):
+    """Compare two intbitsets."""
+    if x is None or y is None:
+        return False
+    else:
+        return x == y
+
+
+class OrderedList(InstrumentedList):
+
+    """Implemented ordered instrumented list."""
+
+    def append(self, item):
+        if self:
+            s = sorted(self, key=lambda obj: obj.score)
+            item.score = s[-1].score + 1
+        else:
+            item.score = 1
+        InstrumentedList.append(self, item)
+
+    def set(self, item, index=0):
+        if self:
+            s = sorted(self, key=lambda obj: obj.score)
+            if index >= len(s):
+                item.score = s[-1].score + 1
+            elif index < 0:
+                item.score = s[0].score
+                index = 0
+            else:
+                item.score = s[index].score + 1
+
+            for i, it in enumerate(s[index:]):
+                it.score = item.score + i + 1
+                # if s[i+1].score more then break
+        else:
+            item.score = index
+        InstrumentedList.append(self, item)
+
+    def pop(self, item):
+        # FIXME
+        if self:
+            obj_list = sorted(self, key=lambda obj: obj.score)
+            for i, it in enumerate(obj_list):
+                if obj_list[i] == item:
+                    return InstrumentedList.pop(self, i)
+
+
+def attribute_multi_dict_collection(creator, key_attr, val_attr):
+    """Define new attribute based mapping."""
+    class MultiMappedCollection(dict):
+
+        def __init__(self, data=None):
+            self._data = data or {}
+
+        @collection.appender
+        def _append(self, obj):
+            l = self._data.setdefault(key_attr(obj), [])
+            l.append(obj)
+
+        def __setitem__(self, key, value):
+            self._append(creator(key, value))
+
+        def __getitem__(self, key):
+            return tuple(val_attr(obj) for obj in self._data[key])
+
+        @collection.remover
+        def _remove(self, obj):
+            self._data[key_attr(obj)].remove(obj)
+
+        @collection.iterator
+        def _iterator(self):
+            for objs in self._data.itervalues():
+                for obj in objs:
+                    yield obj
+
+        def __repr__(self):
+            return '%s(%r)' % (type(self).__name__, self._data)
+
+    return MultiMappedCollection
diff --git a/invenio/ext/template/context_processor.py b/invenio/ext/template/context_processor.py
@@ -52,7 +52,7 @@ class template_args(object):
 
         def setup_app(app):
 
-            @template_args('search.index', app=app)
+            @template_args('collections.index', app=app)
             def foo():
                 return dict(foo='bar')
 
@@ -61,7 +61,7 @@ def foo():
 
     .. code-block:: python
 
-        from invenio.modules.search.views.search import index
+        from invenio.modules.collections.views.collections import index
 
         @template_args(index)
         def bar():

diff --git a/invenio/legacy/bibcirculation/webinterface.py b/invenio/legacy/bibcirculation/webinterface.py
@@ -72,7 +72,7 @@
                                           CFG_BIBCIRCULATION_ACQ_STATUS_NEW, \
                                           AMZ_ACQUISITION_IDENTIFIER_TAG
 
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
 get_colID = lambda name: Collection.query.filter_by(name=name).value('id')
 
 

diff --git a/invenio/legacy/bibdocfile/cli.py b/invenio/legacy/bibdocfile/cli.py
@@ -433,10 +433,11 @@ def print_table(title, table):
             for row in table:
                 print("\t".join(str(elem) for elem in row))
 
-    for collection, reclist in run_sql("SELECT name, reclist FROM collection ORDER BY name"):
+    from invenio.modules.collections.cache import get_collection_reclist
+    for collection, in run_sql("SELECT name FROM collection ORDER BY name"):
         print("-" * 79)
         print("Statistic for: %s " % collection)
-        reclist = intbitset(reclist)
+        reclist = get_collection_reclist(collection)
         if reclist:
             sqlreclist = "(" + ','.join(str(elem) for elem in reclist) + ')'
             print_table("Formats", run_sql("SELECT COUNT(format) as c, format FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true GROUP BY format ORDER BY c DESC" % sqlreclist)) # kwalitee: disable=sql

diff --git a/invenio/legacy/bibdocfile/webinterface.py b/invenio/legacy/bibdocfile/webinterface.py
@@ -52,14 +52,14 @@
 from invenio.base.i18n import gettext_set_language
 from invenio.legacy.search_engine import \
      guess_primary_collection_of_a_record, record_exists, \
-     create_navtrail_links, check_user_can_view_record, \
-     is_user_owner_of_record
+     create_navtrail_links, check_user_can_view_record
+from invenio.modules.records.access import is_user_owner_of_record
 from invenio.legacy.bibdocfile.api import BibRecDocs, normalize_format, file_strip_ext, \
     stream_restricted_icon, BibDoc, InvenioBibDocFileError, \
     get_subformat_from_format
 from invenio.ext.logging import register_exception
 from invenio.legacy.websearch.adminlib import get_detailed_page_tabs, get_detailed_page_tabs_counts
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
 import invenio.legacy.template
 bibdocfile_templates = invenio.legacy.template.load('bibdocfile')
 webstyle_templates = invenio.legacy.template.load('webstyle')

diff --git a/invenio/legacy/bibedit/utils.py b/invenio/legacy/bibedit/utils.py
@@ -87,7 +87,7 @@
 
 from invenio.base.globals import cfg
 from invenio.legacy.bibcatalog.api import BIBCATALOG_SYSTEM
-from invenio.modules.search.models import Collection
+from invenio.modules.collections.models import Collection
 
 try:
     from cPickle import loads

diff --git a/invenio/legacy/bibexport/sitemap.py b/invenio/legacy/bibexport/sitemap.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 ##
 ## This file is part of Invenio.
-## Copyright (C) 2008, 2010, 2011, 2014 CERN.
+## Copyright (C) 2008, 2010, 2011, 2014, 2015 CERN.
 ##
 ## Invenio is free software; you can redistribute it and/or
 ## modify it under the terms of the GNU General Public License as
@@ -39,7 +39,6 @@
 from invenio.config import CFG_SITE_URL, CFG_WEBDIR, CFG_ETCDIR, \
     CFG_SITE_RECORD, CFG_SITE_LANGS, CFG_TMPSHAREDDIR
 from intbitset import intbitset
-from invenio.legacy.websearch.webcoll import Collection
 from invenio.legacy.bibsched.bibtask import write_message, task_update_progress, task_sleep_now_if_required
 from invenio.utils.text import encode_for_xml
 from invenio.utils.url import get_canonical_and_alternates_urls
@@ -115,19 +114,19 @@ def get_collection_last_modification(collection):
         return max(minimum_timestamp, last_mod)
 
     output = []
-    for coll_name in base_collections:
-        mother_collection = Collection(coll_name)
-        if not mother_collection.restricted_p():
-            last_mod = get_collection_last_modification(mother_collection)
-            output.append((coll_name, last_mod))
-            for descendant in mother_collection.get_descendants(type='r'):
-                if not descendant.restricted_p():
-                    last_mod = get_collection_last_modification(descendant)
-                    output.append((descendant.name, last_mod))
-            for descendant in mother_collection.get_descendants(type='v'):
-                if not descendant.restricted_p():
-                    last_mod = get_collection_last_modification(descendant)
-                    output.append((descendant.name, last_mod))
+    # for coll_name in base_collections:
+    #     mother_collection = Collection(coll_name)
+    #     if not mother_collection.restricted_p():
+    #         last_mod = get_collection_last_modification(mother_collection)
+    #         output.append((coll_name, last_mod))
+    #         for descendant in mother_collection.get_descendants(type='r'):
+    #             if not descendant.restricted_p():
+    #                 last_mod = get_collection_last_modification(descendant)
+    #                 output.append((descendant.name, last_mod))
+    #         for descendant in mother_collection.get_descendants(type='v'):
+    #             if not descendant.restricted_p():
+    #                 last_mod = get_collection_last_modification(descendant)
+    #                 output.append((descendant.name, last_mod))
     return output
 
 def filter_fulltexts(recids, fulltext_type=None):

diff --git a/invenio/legacy/bibindex/engine.py b/invenio/legacy/bibindex/engine.py
@@ -1416,22 +1416,23 @@ def add_recID_range(self, recID1, recID2):
                                           wlist[recID])
 
         marc, nonmarc = self.find_nonmarc_records(recID1, recID2)
-        if marc:
+        if marc and len(self.tags):
             collector = TermCollector(self.tokenizer,
                                       self.tokenizer_type,
                                       self.table_type,
                                       self.tags,
                                       [recID1, recID2])
             collector.set_special_tags(self.special_tags)
             wlist = collector.collect(marc, wlist)
-        if nonmarc:
+        if nonmarc or (not len(self.tags) and len(self.nonmarc_tags)):
             collector = NonmarcTermCollector(self.tokenizer,
                                              self.tokenizer_type,
                                              self.table_type,
                                              self.nonmarc_tags,
                                              [recID1, recID2])
             collector.set_special_tags(self.special_tags)
-            wlist = collector.collect(nonmarc, wlist)
+            toindex = nonmarc if len(self.tags) else marc
+            wlist = collector.collect(toindex, wlist)
 
         # lookup index-time synonyms:
         synonym_kbrs = get_all_synonym_knowledge_bases()

diff --git a/invenio/legacy/bibindex/engine_utils.py b/invenio/legacy/bibindex/engine_utils.py
@@ -35,6 +35,7 @@
      CFG_BIBINDEX_CHARS_PUNCTUATION, \
      CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS
 from invenio.legacy.bibindex.engine_config import CFG_BIBINDEX_COLUMN_VALUE_SEPARATOR
+from invenio.utils.memoise import memoize
 
 
 latex_formula_re = re.compile(r'\$.*?\$|\\\[.*?\\\]')
@@ -288,6 +289,7 @@ def get_index_name_from_index_id(index_id):
     return ''
 
 
+@memoize
 def get_field_tags(field, tagtype="marc"):
     """Returns a list of tags for the field code 'field'. Works
        for both MARC and nonMARC tags.
@@ -378,6 +380,7 @@ def get_nonmarc_tag_indexes(nonmarc_tag, virtual=True):
     return ()
 
 
+@memoize
 def get_index_tags(indexname, virtual=True, tagtype="marc"):
     """Returns the list of tags that are indexed inside INDEXNAME.
        Returns empty list in case there are no tags indexed in this index.