Skip to content

Commit 3efdf6a

Browse files
Refatora Solr (#3585)
1 parent 8adfe11 commit 3efdf6a

File tree

9 files changed

+230
-35
lines changed

9 files changed

+230
-35
lines changed

dist/bin/upload_configset.sh

100644100755
File mode changed.

docker/Dockerfile

+9-9
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
1-
FROM python:3.7-slim-buster
1+
FROM python:3.9-slim-buster
22

33
# Setup env
44
ENV LANG C.UTF-8
55
ENV LC_ALL C.UTF-8
66
ENV PYTHONDONTWRITEBYTECODE 1
7-
8-
#ENV PYTHONFAULTHANDLER 1
9-
7+
ENV PYTHONUNBUFFERED=1
108
ENV DEBIAN_FRONTEND noninteractive
119

1210
ENV BUILD_PACKAGES apt-utils apt-file libpq-dev graphviz-dev build-essential git pkg-config \
@@ -34,12 +32,13 @@ RUN apt-get update && \
3432
SUDO_FORCE_REMOVE=yes apt-get purge -y --auto-remove $BUILD_PACKAGES && \
3533
apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*
3634

37-
ENV HOME=/var/interlegis/sapl
35+
WORKDIR /var/interlegis/sapl/
36+
ADD . /var/interlegis/sapl/
3837

3938
COPY docker/start.sh $HOME
40-
COPY docker/check_solr.sh $HOME
41-
COPY docker/solr_api.py $HOME
42-
COPY docker/busy-wait.sh $HOME
39+
COPY docker/solr_cli.py $HOME
40+
COPY docker/wait-for-pg.sh $HOME
41+
COPY docker/wait-for-solr.sh $HOME
4342
COPY docker/create_admin.py $HOME
4443
COPY docker/genkey.py $HOME
4544
COPY docker/gunicorn_start.sh $HOME
@@ -55,7 +54,8 @@ RUN rm -rf /var/interlegis/sapl/sapl/.env && \
5554
rm -rf /var/interlegis/sapl/sapl.db
5655

5756
RUN chmod +x /var/interlegis/sapl/start.sh && \
58-
chmod +x /var/interlegis/sapl/check_solr.sh && \
57+
chmod +x /var/interlegis/sapl/wait-for-solr.sh && \
58+
chmod +x /var/interlegis/sapl/wait-for-pg.sh && \
5959
ln -sf /dev/stdout /var/log/nginx/access.log && \
6060
ln -sf /dev/stderr /var/log/nginx/error.log && \
6161
mkdir /var/log/sapl/ && touch /var/interlegis/sapl/sapl.log && \

docker/docker-compose.yaml

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
version: "3.7"
2+
services:
3+
sapldb:
4+
image: postgres:10.5-alpine
5+
restart: always
6+
container_name: postgres
7+
labels:
8+
NAME: "postgres"
9+
environment:
10+
POSTGRES_PASSWORD: sapl
11+
POSTGRES_USER: sapl
12+
POSTGRES_DB: sapl
13+
PGDATA : /var/lib/postgresql/data/
14+
volumes:
15+
- sapldb_data:/var/lib/postgresql/data/
16+
ports:
17+
- "5433:5432"
18+
networks:
19+
- sapl-net
20+
saplsolr:
21+
image: solr:8.11
22+
restart: always
23+
command: bin/solr start -c -f
24+
container_name: solr
25+
labels:
26+
NAME: "solr"
27+
volumes:
28+
- solr_data:/opt/solr/server/solr
29+
- solr_configsets:/opt/solr/server/solr/configsets
30+
ports:
31+
- "8983:8983"
32+
networks:
33+
- sapl-net
34+
sapl:
35+
# image: interlegis/sapl:3.1.162-RC13
36+
build:
37+
context: ../
38+
dockerfile: ./docker/Dockerfile
39+
container_name: sapl
40+
labels:
41+
NAME: "sapl"
42+
restart: always
43+
environment:
44+
ADMIN_PASSWORD: interlegis
45+
ADMIN_EMAIL: [email protected]
46+
DEBUG: 'False'
47+
EMAIL_PORT: 587
48+
EMAIL_USE_TLS: 'False'
49+
EMAIL_HOST: smtp.dominio.net
50+
EMAIL_HOST_USER: usuariosmtp
51+
EMAIL_SEND_USER: usuariosmtp
52+
EMAIL_HOST_PASSWORD: senhasmtp
53+
USE_SOLR: 'True'
54+
SOLR_COLLECTION: sapl
55+
SOLR_URL: http://solr:solr@saplsolr:8983
56+
IS_ZK_EMBEDDED: 'True'
57+
TZ: America/Sao_Paulo
58+
volumes:
59+
- sapl_data:/var/interlegis/sapl/data
60+
- sapl_media:/var/interlegis/sapl/media
61+
depends_on:
62+
- sapldb
63+
- saplsolr
64+
ports:
65+
- "80:80"
66+
networks:
67+
- sapl-net
68+
networks:
69+
sapl-net:
70+
name: sapl-net
71+
driver: bridge
72+
volumes:
73+
sapldb_data:
74+
sapl_data:
75+
sapl_media:
76+
solr_data:
77+
solr_configsets:

docker/solr_api.py docker/solr_cli.py

+120-9
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,102 @@
1-
from io import BytesIO
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
23
import argparse
3-
import os
4-
import requests
4+
import logging
5+
import re
6+
import secrets
57
import subprocess
68
import sys
79
import zipfile
10+
from base64 import b64encode, b64decode
11+
from hashlib import sha256
12+
from io import BytesIO
813
from pathlib import Path
914

10-
##
11-
## Este módulo deve ser executado na raiz do projeto
12-
##
15+
import requests
16+
from kazoo.client import KazooClient
17+
18+
#
19+
# Este módulo deve ser executado na raiz do projeto
20+
#
21+
22+
logging.basicConfig()
23+
24+
SECURITY_FILE_TEMPLATE = """
25+
{
26+
"authentication":{
27+
"blockUnknown": true,
28+
"class":"solr.BasicAuthPlugin",
29+
"credentials":{"%s":"%s %s"},
30+
"forwardCredentials": false,
31+
"realm": "Solr Login"
32+
},
33+
"authorization":{
34+
"class":"solr.RuleBasedAuthorizationPlugin",
35+
"permissions":[{"name":"security-edit", "role":"admin"}],
36+
"user-role":{"%s":"admin"}
37+
}
38+
}
39+
"""
40+
41+
URL_PATTERN = 'https?://(([a-zA-Z0-9]+):([a-zA-Z0-9]+)@)?([a-zA-Z0-9.-]+)(:[0-9]{4})?'
42+
43+
44+
def solr_hash_password(password: str, salt: str = None):
45+
"""
46+
Generates a password and salt to be used in Basic Auth Solr
47+
48+
password: clean text password string
49+
salt (optional): base64 salt string
50+
returns: sha256 hash of password and salt (both base64 strings)
51+
"""
52+
m = sha256()
53+
if salt is None:
54+
salt = secrets.token_bytes(32)
55+
else:
56+
salt = b64decode(salt)
57+
m.update(salt + password.encode('utf-8'))
58+
digest = m.digest()
1359

14-
class SolrClient:
60+
m = sha256()
61+
m.update(digest)
62+
digest = m.digest()
63+
64+
cypher = b64encode(digest).decode('utf-8')
65+
salt = b64encode(salt).decode('utf-8')
66+
return cypher, salt
67+
68+
69+
def create_security_file(username, password):
70+
print("Creating security.json file...")
71+
with open("security.json", "w") as f:
72+
cypher, salt = solr_hash_password(password)
73+
f.write(SECURITY_FILE_TEMPLATE % (username, cypher, salt, username))
74+
print("file created!")
75+
76+
77+
def upload_security_file(zk_host):
78+
zk_port = 9983 # embedded ZK port
79+
print(f"Uploading security file to Solr, ZK server={zk_host}:{zk_port}...")
80+
try:
81+
with open('security.json', 'r') as f:
82+
data = f.read()
83+
zk = KazooClient(hosts=f"{zk_host}:{zk_port}")
84+
zk.start()
85+
print("Uploading security.json file...")
86+
if zk.exists('/security.json'):
87+
zk.set("/security.json", str.encode(data))
88+
else:
89+
zk.create("/security.json", str.encode(data))
90+
data, stat = zk.get('/security.json')
91+
print("file uploaded!")
92+
print(data.decode('utf-8'))
93+
zk.stop()
94+
except Exception as e:
95+
print(e)
96+
sys.exit(-1)
1597

98+
99+
class SolrClient:
16100
LIST_CONFIGSETS = "{}/solr/admin/configs?action=LIST&omitHeader=true&wt=json"
17101
UPLOAD_CONFIGSET = "{}/solr/admin/configs?action=UPLOAD&name={}&wt=json"
18102
LIST_COLLECTIONS = "{}/solr/admin/collections?action=LIST&wt=json"
@@ -129,7 +213,7 @@ def create_collection(self, collection_name, shards=1, replication_factor=1, max
129213
print(res.content)
130214
return False
131215
return True
132-
216+
133217
def delete_collection(self, collection_name):
134218
if collection_name == '*':
135219
collections = self.list_collections()
@@ -160,6 +244,22 @@ def delete_index_data(self, collection_name):
160244
print("Num docs: %s" % num_docs)
161245

162246

247+
def setup_embedded_zk(solr_url):
248+
match = re.match(URL_PATTERN, solr_url)
249+
if match:
250+
_, solr_user, solr_pwd, solr_host, solr_port = match.groups()
251+
252+
if solr_user and solr_pwd and solr_host:
253+
create_security_file(solr_user, solr_pwd)
254+
upload_security_file(solr_host)
255+
else:
256+
print(f"Missing Solr's username, password, and host: {solr_user}/{solr_pwd}/{solr_host}")
257+
sys.exit(-1)
258+
else:
259+
print(f"Solr URL path doesn't match the required format: {solr_url}")
260+
sys.exit(-1)
261+
262+
163263
if __name__ == '__main__':
164264

165265
parser = argparse.ArgumentParser(description='Cria uma collection no Solr')
@@ -178,17 +278,27 @@ def delete_index_data(self, collection_name):
178278
parser.add_argument('-ms', type=int, dest='max_shards_per_node', nargs='?',
179279
help='Max shards per node (default=1)', default=1)
180280

281+
parser.add_argument("--embedded_zk", default=False, action="store_true",
282+
help="Embedded ZooKeeper")
283+
181284
try:
182285
args = parser.parse_args()
183286
except IOError as msg:
184287
parser.error(str(msg))
185288
sys.exit(-1)
186289

187290
url = args.url.pop()
188-
collection = args.collection.pop()
189291

292+
if args.embedded_zk:
293+
print("Setup embedded ZooKeeper...")
294+
setup_embedded_zk(url)
295+
296+
collection = args.collection.pop()
190297
client = SolrClient(url=url)
191298

299+
## Add --force to force upload security.json, configset upload and collection recreation
300+
## it will clean the solr server before proceeding
301+
## Add --clean option to clean uploadconfig and collection
192302
if not client.exists_collection(collection):
193303
print("Collection '%s' doesn't exists. Creating a new one..." % collection)
194304
created = client.create_collection(collection,
@@ -200,6 +310,7 @@ def delete_index_data(self, collection_name):
200310
else:
201311
print("Collection '%s' exists." % collection)
202312

313+
## Add --disable-index to disable auto index
203314
num_docs = client.get_num_docs(collection)
204315
if num_docs == 0:
205316
print("Performing a full reindex of '%s' collection..." % collection)

docker/start.sh

+18-12
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ create_env() {
2222

2323
touch $FILENAME
2424

25-
2625
# explicitly use '>' to erase any previous content
2726
echo "SECRET_KEY="$KEY > $FILENAME
2827
# now only appends
@@ -39,14 +38,14 @@ create_env() {
3938
echo "USE_SOLR = ""${USE_SOLR-False}" >> $FILENAME
4039
echo "SOLR_COLLECTION = ""${SOLR_COLLECTION-sapl}" >> $FILENAME
4140
echo "SOLR_URL = ""${SOLR_URL-http://localhost:8983}" >> $FILENAME
41+
echo "IS_ZK_EMBEDDED = ""${IS_ZK_EMBEDDED-False}" >> $FILENAME
4242

43-
4443
echo "[ENV FILE] done."
4544
}
4645

4746
create_env
4847

49-
/bin/bash busy-wait.sh $DATABASE_URL
48+
/bin/bash wait-for-pg.sh $DATABASE_URL
5049

5150
yes yes | python3 manage.py migrate
5251

@@ -55,39 +54,46 @@ yes yes | python3 manage.py migrate
5554
USE_SOLR="${USE_SOLR:=False}"
5655
SOLR_URL="${SOLR_URL:=http://localhost:8983}"
5756
SOLR_COLLECTION="${SOLR_COLLECTION:=sapl}"
58-
5957
NUM_SHARDS=${NUM_SHARDS:=1}
6058
RF=${RF:=1}
6159
MAX_SHARDS_PER_NODE=${MAX_SHARDS_PER_NODE:=1}
60+
IS_ZK_EMBEDDED="${IS_ZK_EMBEDDED:=False}"
6261

6362
if [ "${USE_SOLR-False}" == "True" ] || [ "${USE_SOLR-False}" == "true" ]; then
6463

65-
echo "SOLR configurations"
64+
echo "Solr configurations"
6665
echo "==================="
6766
echo "URL: $SOLR_URL"
6867
echo "COLLECTION: $SOLR_COLLECTION"
6968
echo "NUM_SHARDS: $NUM_SHARDS"
7069
echo "REPLICATION FACTOR: $RF"
7170
echo "MAX SHARDS PER NODE: $MAX_SHARDS_PER_NODE"
71+
echo "ASSUME ZK EMBEDDED: $IS_ZK_EMBEDDED"
7272
echo "========================================="
7373

74-
echo "running solr script"
75-
/bin/bash check_solr.sh $SOLR_URL
74+
echo "running Solr script"
75+
/bin/bash wait-for-solr.sh $SOLR_URL
7676
CHECK_SOLR_RETURN=$?
7777

7878
if [ $CHECK_SOLR_RETURN == 1 ]; then
79-
echo "Connecting to solr..."
80-
python3 solr_api.py -u $SOLR_URL -c $SOLR_COLLECTION -s $NUM_SHARDS -rf $RF -ms $MAX_SHARDS_PER_NODE &
81-
# python3 manage.py rebuild_index --noinput &
79+
echo "Connecting to Solr..."
80+
81+
82+
if [ "${IS_ZK_EMBEDDED-False}" == "True" ] || [ "${IS_ZK_EMBEDDED-False}" == "true" ]; then
83+
ZK_EMBEDDED="--embedded_zk"
84+
echo "Assuming embedded ZooKeeper instalation..."
85+
fi
86+
87+
python3 solr_cli.py -u $SOLR_URL -c $SOLR_COLLECTION -s $NUM_SHARDS -rf $RF -ms $MAX_SHARDS_PER_NODE $ZK_EMBEDDED &
8288
else
8389
echo "Solr is offline, not possible to connect."
8490
fi
8591

8692
else
87-
echo "Suporte a SOLR não inicializado."
93+
echo "Solr support is not initialized."
8894
fi
8995

90-
echo "Criando usuário admin..."
96+
echo "Creating admin user..."
9197

9298
user_created=$(python3 create_admin.py 2>&1)
9399

File renamed without changes.

0 commit comments

Comments
 (0)