Skip to content

Commit

Permalink
collections: initial release
Browse files Browse the repository at this point in the history
* NOTE adds new calculated field '_collections' to records from which
  the 'collection' index is created.  (closes inveniosoftware#2638)

* NOTE collection reclist is not populated anymore. Use collection
  phrase index using query matcher based on record data, hence no second
  order operator will work in collection query definition.

Signed-off-by: Jiri Kuncar <[email protected]>
  • Loading branch information
jirikuncar committed Jan 27, 2015
1 parent 7fb880d commit c9b9a93
Show file tree
Hide file tree
Showing 72 changed files with 1,600 additions and 2,699 deletions.
2 changes: 1 addition & 1 deletion invenio/base/scripts/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def cfv_after_create(target, connection, **kw):
run_sql('ALTER TABLE collection_field_fieldvalue CHANGE id_fieldvalue id_fieldvalue mediumint(9) unsigned')
#print(run_sql('SHOW CREATE TABLE collection_field_fieldvalue'))

from invenio.modules.search.models import CollectionFieldFieldvalue
from invenio.modules.collections.models import CollectionFieldFieldvalue
event.listen(CollectionFieldFieldvalue.__table__, "after_create", cfv_after_create)

tables = db.metadata.sorted_tables
Expand Down
19 changes: 10 additions & 9 deletions invenio/base/scripts/demosite.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import pkg_resources
import sys

from itertools import count

from invenio.ext.script import Manager

manager = Manager(usage=__doc__)
Expand Down Expand Up @@ -102,23 +104,22 @@ def populate(packages=[], default_data=True, files=None,
print("ERROR: failed execution of", cmd)
sys.exit(1)

i = count(1).next
for cmd in ["bin/bibdocfile --textify --with-ocr --recid 97",
"bin/bibdocfile --textify --all",
"bin/bibindex -u admin",
"bin/bibindex %d" % (job_id + 1,),
"bin/bibindex %d" % (job_id + i(),),
"bin/bibindex -u admin -w global",
"bin/bibindex %d" % (job_id + 2,),
"bin/bibindex %d" % (job_id + i(),),
"bin/bibreformat -u admin -o HB",
"bin/bibreformat %d" % (job_id + 3,),
"bin/webcoll -u admin",
"bin/webcoll %d" % (job_id + 4,),
"bin/bibreformat %d" % (job_id + i(),),
"bin/bibrank -u admin",
"bin/bibrank %d" % (job_id + 5,),
"bin/bibrank %d" % (job_id + i(),),
"bin/bibsort -u admin -R",
"bin/bibsort %d" % (job_id + 6,),
"bin/bibsort %d" % (job_id + i(),),
"bin/oairepositoryupdater -u admin",
"bin/oairepositoryupdater %d" % (job_id + 7,),
"bin/bibupload %d" % (job_id + 8,)]:
"bin/oairepositoryupdater %d" % (job_id + i(),),
"bin/bibupload %d" % (job_id + i(),)]:
cmd = os.path.join(CFG_PREFIX, cmd)
if os.system(cmd):
print("ERROR: failed execution of", cmd)
Expand Down
2 changes: 1 addition & 1 deletion invenio/base/templates/footer_base.html
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
<div class="col-md-6">
{%- block footer_credits %}
{{ config["CFG_SITE_NAME_INTL"][g.ln] }}
&nbsp;::&nbsp;<a class="footer" href="{{ url_for('search.index') }}">
&nbsp;::&nbsp;<a class="footer" href="{{ url_for('collections.index') }}">
{{- _("Search") -}}
</a>&nbsp;::&nbsp;<a class="footer" href="{{ url_for('webdeposit.index') }}">
{{- _("Deposit") -}}
Expand Down
2 changes: 1 addition & 1 deletion invenio/base/templates/header_base.html
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="{{ url_for('search.index') }}">
<a class="navbar-brand" href="{{ url_for('collections.index') }}">
<img src="{{ url_for('static', filename='img/logo_white.png') }}" alt="{{ config.CFG_SITE_NAME_INTL[g.ln] }}" />
</a>
</div>
Expand Down
19 changes: 9 additions & 10 deletions invenio/ext/legacy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2011, 2012, 2013, 2014 CERN.
## Copyright (C) 2011, 2012, 2013, 2014, 2015 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
Expand All @@ -23,12 +24,12 @@
import os
import sys

## Import the remote debugger as a first thing, if allowed
#FIXME enable remote_debugger when invenio.config is ready
#try:
# from invenio.utils import remote_debugger
#except:
# remote_debugger = None
# Import the remote debugger as a first thing, if allowed
# FIXME enable remote_debugger when invenio.config is ready
# try:
# from invenio.utils import remote_debugger
# except:
# remote_debugger = None

from werkzeug.exceptions import HTTPException
from werkzeug.wrappers import BaseResponse
Expand Down Expand Up @@ -57,10 +58,8 @@ def cli_cmd_reset(sender, yes_i_know=False, drop=True, **kwargs):
# cli_cmd_reset_fieldnames(conf)

for cmd in ["%s/bin/webaccessadmin -u admin -c -a -D" % CFG_PREFIX,
"%s/bin/webcoll -u admin" % CFG_PREFIX,
"%s/bin/webcoll 1" % CFG_PREFIX,
"%s/bin/bibsort -u admin --load-config" % CFG_PREFIX,
"%s/bin/bibsort 2" % CFG_PREFIX, ]:
"%s/bin/bibsort 1" % CFG_PREFIX, ]:
if os.system(cmd):
print("ERROR: failed execution of", cmd)
sys.exit(1)
Expand Down
104 changes: 104 additions & 0 deletions invenio/ext/sqlalchemy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ def save(self):
from sqlalchemy.exc import OperationalError
from sqlalchemy.ext.declarative import declared_attr
from sqlalchemy.orm import class_mapper, properties
from sqlalchemy.orm.collections import (
InstrumentedList,
attribute_mapped_collection,
collection,
)

first_cap_re = re.compile('(.)([A-Z][a-z]+)')
all_cap_re = re.compile('([a-z0-9])([A-Z])')
Expand Down Expand Up @@ -258,3 +263,102 @@ def test_sqla_utf8_chain():
table.drop(bind=db.engine)

print(" [OK]")


class IntbitsetPickle(object):

"""Pickle implementation for intbitset."""

def dumps(self, obj, protocol=None):
"""Dump intbitset to byte stream."""
if obj is not None:
return obj.fastdump()
return intbitset([]).fastdump()

def loads(self, obj):
"""Load byte stream to intbitset."""
try:
return intbitset(obj)
except:
return intbitset()


def IntbitsetCmp(x, y):
"""Compare two intbitsets."""
if x is None or y is None:
return False
else:
return x == y


class OrderedList(InstrumentedList):

"""Implemented ordered instrumented list."""

def append(self, item):
if self:
s = sorted(self, key=lambda obj: obj.score)
item.score = s[-1].score + 1
else:
item.score = 1
InstrumentedList.append(self, item)

def set(self, item, index=0):
if self:
s = sorted(self, key=lambda obj: obj.score)
if index >= len(s):
item.score = s[-1].score + 1
elif index < 0:
item.score = s[0].score
index = 0
else:
item.score = s[index].score + 1

for i, it in enumerate(s[index:]):
it.score = item.score + i + 1
# if s[i+1].score more then break
else:
item.score = index
InstrumentedList.append(self, item)

def pop(self, item):
# FIXME
if self:
obj_list = sorted(self, key=lambda obj: obj.score)
for i, it in enumerate(obj_list):
if obj_list[i] == item:
return InstrumentedList.pop(self, i)


def attribute_multi_dict_collection(creator, key_attr, val_attr):
"""Define new attribute based mapping."""
class MultiMappedCollection(dict):

def __init__(self, data=None):
self._data = data or {}

@collection.appender
def _append(self, obj):
l = self._data.setdefault(key_attr(obj), [])
l.append(obj)

def __setitem__(self, key, value):
self._append(creator(key, value))

def __getitem__(self, key):
return tuple(val_attr(obj) for obj in self._data[key])

@collection.remover
def _remove(self, obj):
self._data[key_attr(obj)].remove(obj)

@collection.iterator
def _iterator(self):
for objs in self._data.itervalues():
for obj in objs:
yield obj

def __repr__(self):
return '%s(%r)' % (type(self).__name__, self._data)

return MultiMappedCollection
4 changes: 2 additions & 2 deletions invenio/ext/template/context_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class template_args(object):
def setup_app(app):
@template_args('search.index', app=app)
@template_args('collections.index', app=app)
def foo():
return dict(foo='bar')
Expand All @@ -61,7 +61,7 @@ def foo():
.. code-block:: python
from invenio.modules.search.views.search import index
from invenio.modules.collections.views.collections import index
@template_args(index)
def bar():
Expand Down
2 changes: 1 addition & 1 deletion invenio/legacy/bibcirculation/webinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
CFG_BIBCIRCULATION_ACQ_STATUS_NEW, \
AMZ_ACQUISITION_IDENTIFIER_TAG

from invenio.modules.search.models import Collection
from invenio.modules.collections.models import Collection
get_colID = lambda name: Collection.query.filter_by(name=name).value('id')


Expand Down
5 changes: 3 additions & 2 deletions invenio/legacy/bibdocfile/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,10 +433,11 @@ def print_table(title, table):
for row in table:
print("\t".join(str(elem) for elem in row))

for collection, reclist in run_sql("SELECT name, reclist FROM collection ORDER BY name"):
from invenio.modules.collections.cache import get_collection_reclist
for collection, in run_sql("SELECT name FROM collection ORDER BY name"):
print("-" * 79)
print("Statistic for: %s " % collection)
reclist = intbitset(reclist)
reclist = get_collection_reclist(collection)
if reclist:
sqlreclist = "(" + ','.join(str(elem) for elem in reclist) + ')'
print_table("Formats", run_sql("SELECT COUNT(format) as c, format FROM bibrec_bibdoc AS bb JOIN bibdocfsinfo AS fs ON bb.id_bibdoc=fs.id_bibdoc WHERE id_bibrec in %s AND last_version=true GROUP BY format ORDER BY c DESC" % sqlreclist)) # kwalitee: disable=sql
Expand Down
6 changes: 3 additions & 3 deletions invenio/legacy/bibdocfile/webinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@
from invenio.base.i18n import gettext_set_language
from invenio.legacy.search_engine import \
guess_primary_collection_of_a_record, record_exists, \
create_navtrail_links, check_user_can_view_record, \
is_user_owner_of_record
create_navtrail_links, check_user_can_view_record
from invenio.modules.records.access import is_user_owner_of_record
from invenio.legacy.bibdocfile.api import BibRecDocs, normalize_format, file_strip_ext, \
stream_restricted_icon, BibDoc, InvenioBibDocFileError, \
get_subformat_from_format
from invenio.ext.logging import register_exception
from invenio.legacy.websearch.adminlib import get_detailed_page_tabs, get_detailed_page_tabs_counts
from invenio.modules.search.models import Collection
from invenio.modules.collections.models import Collection
import invenio.legacy.template
bibdocfile_templates = invenio.legacy.template.load('bibdocfile')
webstyle_templates = invenio.legacy.template.load('webstyle')
Expand Down
2 changes: 1 addition & 1 deletion invenio/legacy/bibedit/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@

from invenio.base.globals import cfg
from invenio.legacy.bibcatalog.api import BIBCATALOG_SYSTEM
from invenio.modules.search.models import Collection
from invenio.modules.collections.models import Collection

try:
from cPickle import loads
Expand Down
29 changes: 14 additions & 15 deletions invenio/legacy/bibexport/sitemap.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2008, 2010, 2011, 2014 CERN.
## Copyright (C) 2008, 2010, 2011, 2014, 2015 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
Expand Down Expand Up @@ -39,7 +39,6 @@
from invenio.config import CFG_SITE_URL, CFG_WEBDIR, CFG_ETCDIR, \
CFG_SITE_RECORD, CFG_SITE_LANGS, CFG_TMPSHAREDDIR
from intbitset import intbitset
from invenio.legacy.websearch.webcoll import Collection
from invenio.legacy.bibsched.bibtask import write_message, task_update_progress, task_sleep_now_if_required
from invenio.utils.text import encode_for_xml
from invenio.utils.url import get_canonical_and_alternates_urls
Expand Down Expand Up @@ -115,19 +114,19 @@ def get_collection_last_modification(collection):
return max(minimum_timestamp, last_mod)

output = []
for coll_name in base_collections:
mother_collection = Collection(coll_name)
if not mother_collection.restricted_p():
last_mod = get_collection_last_modification(mother_collection)
output.append((coll_name, last_mod))
for descendant in mother_collection.get_descendants(type='r'):
if not descendant.restricted_p():
last_mod = get_collection_last_modification(descendant)
output.append((descendant.name, last_mod))
for descendant in mother_collection.get_descendants(type='v'):
if not descendant.restricted_p():
last_mod = get_collection_last_modification(descendant)
output.append((descendant.name, last_mod))
# for coll_name in base_collections:
# mother_collection = Collection(coll_name)
# if not mother_collection.restricted_p():
# last_mod = get_collection_last_modification(mother_collection)
# output.append((coll_name, last_mod))
# for descendant in mother_collection.get_descendants(type='r'):
# if not descendant.restricted_p():
# last_mod = get_collection_last_modification(descendant)
# output.append((descendant.name, last_mod))
# for descendant in mother_collection.get_descendants(type='v'):
# if not descendant.restricted_p():
# last_mod = get_collection_last_modification(descendant)
# output.append((descendant.name, last_mod))
return output

def filter_fulltexts(recids, fulltext_type=None):
Expand Down
7 changes: 4 additions & 3 deletions invenio/legacy/bibindex/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1416,22 +1416,23 @@ def add_recID_range(self, recID1, recID2):
wlist[recID])

marc, nonmarc = self.find_nonmarc_records(recID1, recID2)
if marc:
if marc and len(self.tags):
collector = TermCollector(self.tokenizer,
self.tokenizer_type,
self.table_type,
self.tags,
[recID1, recID2])
collector.set_special_tags(self.special_tags)
wlist = collector.collect(marc, wlist)
if nonmarc:
if nonmarc or (not len(self.tags) and len(self.nonmarc_tags)):
collector = NonmarcTermCollector(self.tokenizer,
self.tokenizer_type,
self.table_type,
self.nonmarc_tags,
[recID1, recID2])
collector.set_special_tags(self.special_tags)
wlist = collector.collect(nonmarc, wlist)
toindex = nonmarc if len(self.tags) else marc
wlist = collector.collect(toindex, wlist)

# lookup index-time synonyms:
synonym_kbrs = get_all_synonym_knowledge_bases()
Expand Down
3 changes: 3 additions & 0 deletions invenio/legacy/bibindex/engine_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
CFG_BIBINDEX_CHARS_PUNCTUATION, \
CFG_BIBINDEX_CHARS_ALPHANUMERIC_SEPARATORS
from invenio.legacy.bibindex.engine_config import CFG_BIBINDEX_COLUMN_VALUE_SEPARATOR
from invenio.utils.memoise import memoize


latex_formula_re = re.compile(r'\$.*?\$|\\\[.*?\\\]')
Expand Down Expand Up @@ -288,6 +289,7 @@ def get_index_name_from_index_id(index_id):
return ''


@memoize
def get_field_tags(field, tagtype="marc"):
"""Returns a list of tags for the field code 'field'. Works
for both MARC and nonMARC tags.
Expand Down Expand Up @@ -378,6 +380,7 @@ def get_nonmarc_tag_indexes(nonmarc_tag, virtual=True):
return ()


@memoize
def get_index_tags(indexname, virtual=True, tagtype="marc"):
"""Returns the list of tags that are indexed inside INDEXNAME.
Returns empty list in case there are no tags indexed in this index.
Expand Down
Loading

0 comments on commit c9b9a93

Please sign in to comment.