Skip to content

Commit 9d9ff05

Browse files
committed
Performance and app properties details
1 parent 469b8b9 commit 9d9ff05

File tree

2 files changed

+102
-2
lines changed

2 files changed

+102
-2
lines changed

example_performance_error.txt

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
lexi_1 | 2021-08-24 07:29:19 ERROR server.Server:175 - QUERY FAILED!
2+
lexi_1 | com.fasterxml.jackson.databind.exc.MismatchedInputException: No content to map due to end-of-input
3+
lexi_1 | at [Source: (String)""; line: 1, column: 0]
4+
lexi_1 | at com.fasterxml.jackson.databind.exc.MismatchedInputException.from(MismatchedInputException.java:59)
5+
lexi_1 | at com.fasterxml.jackson.databind.ObjectMapper._initForReading(ObjectMapper.java:4134)
6+
lexi_1 | at com.fasterxml.jackson.databind.ObjectMapper._readMapAndClose(ObjectMapper.java:3988)
7+
lexi_1 | at com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:2992)
8+
lexi_1 | at io.javalin.translator.json.JavalinJacksonPlugin.toObject(Jackson.kt:27)
9+
lexi_1 | at io.javalin.Context.bodyAsClass(Context.kt:81)
10+
lexi_1 | at server.Server.query(Server.java:168)
11+
lexi_1 | at server.Server.lambda$new$1(Server.java:47)
12+
lexi_1 | at io.javalin.core.JavalinServlet.service(JavalinServlet.kt:52)
13+
lexi_1 | at io.javalin.embeddedserver.jetty.EmbeddedJettyServer$start$$inlined$apply$lambda$1.doHandle(EmbeddedJettyServer.kt:29)
14+
lexi_1 | at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1568)
15+
lexi_1 | at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
16+
lexi_1 | at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
17+
lexi_1 | at org.eclipse.jetty.server.Server.handle(Server.java:564)
18+
lexi_1 | at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:358)
19+
lexi_1 | at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251)
20+
lexi_1 | at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:279)
21+
lexi_1 | at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:110)
22+
lexi_1 | at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:124)
23+
lexi_1 | at org.eclipse.jetty.util.thread.Invocable.invokePreferred(Invocable.java:128)
24+
lexi_1 | at org.eclipse.jetty.util.thread.Invocable$InvocableExecutor.invoke(Invocable.java:222)
25+
lexi_1 | at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:294)
26+
lexi_1 | at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:126)
27+
lexi_1 | at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:673)
28+
lexi_1 | at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:591)
29+
lexi_1 | at java.base/java.lang.Thread.run(Thread.java:831)
30+
lexi_1 | 2021-08-24 07:29:19 ERROR server.Server:175 - QUERY FAILED!
31+
lexi_1 | com.fasterxml.jackson.databind.exc.MismatchedInputException: No content to map due to end-of-input
32+
lexi_1 | at [Source: (String)""; line: 1, column: 0]
33+
lexi_1 | at com.fasterxml.jackson.databind.exc.MismatchedInputException.from(MismatchedInputException.java:59)
34+
lexi_1 | at com.fasterxml.jackson.databind.ObjectMapper._initForReading(ObjectMapper.java:4134)
35+
lexi_1 | at com.fasterxml.jackson.databind.ObjectMapper._readMapAndClose(ObjectMapper.java:3988)
36+
lexi_1 | at com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:2992)
37+
lexi_1 | at io.javalin.translator.json.JavalinJacksonPlugin.toObject(Jackson.kt:27)
38+
lexi_1 | at io.javalin.Context.bodyAsClass(Context.kt:81)
39+
lexi_1 | at server.Server.query(Server.java:168)
40+
lexi_1 | at server.Server.lambda$new$1(Server.java:47)
41+
lexi_1 | at io.javalin.core.JavalinServlet.service(JavalinServlet.kt:52)
42+
lexi_1 | at io.javalin.embeddedserver.jetty.EmbeddedJettyServer$start$$inlined$apply$lambda$1.doHandle(EmbeddedJettyServer.kt:29)
43+
lexi_1 | at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1568)
44+
lexi_1 | at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
45+
lexi_1 | at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
46+
lexi_1 | at org.eclipse.jetty.server.Server.handle(Server.java:564)
47+
lexi_1 | at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:358)
48+
lexi_1 | at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251)
49+
lexi_1 | at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:279)
50+
lexi_1 | at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:110)
51+
lexi_1 | at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:124)
52+
lexi_1 | at org.eclipse.jetty.util.thread.Invocable.invokePreferred(Invocable.java:128)
53+
lexi_1 | at org.eclipse.jetty.util.thread.Invocable$InvocableExecutor.invoke(Invocable.java:222)
54+
lexi_1 | at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:294)
55+
lexi_1 | at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:126)
56+
lexi_1 | at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:673)
57+
lexi_1 | at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:591)
58+
lexi_1 | at java.base/java.lang.Thread.run(Thread.java:831)
59+

readme.md

+43-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
![Build Status](https://github.com/matthewcoole/cdb/workflows/build/badge.svg)
33
![codecov](https://codecov.io/gh/matthewcoole/cdb/branch/master/graph/badge.svg?token=XdKEOwSdnQ)
44
![Language grade: Java](https://img.shields.io/lgtm/grade/java/g/matthewcoole/lexidb.svg?logo=lgtm&logoWidth=18)
5+
56
## Build
7+
68
### Required
79
- [Java JDK 12](https://jdk.java.net/archive/) (In the docker build we used OpenJDK not Oracle as it is licensed under [GPLv2 + Classpath Exception](https://openjdk.java.net/legal/gplv2+ce.html)). Further in the current docker image we build with Java JDK 12 **alpine Operating System (OS)**, **but** run the LexiDB jar file using [Java JDK 16 **alpine OS** from OpenJDK](https://openjdk.java.net/projects/jdk/16/), the reason for using the Alpine OS is that it is smaller in size and [tends to have fewer security vulnerabilities compared to other Operating Systems.](https://snyk.io/blog/docker-for-java-developers/)
810
- [Gradle version 5.2](https://gradle.org/)
@@ -30,7 +32,9 @@ There is a docker instance of LexiDB which can be ran using the following comman
3032
docker run -it -p 127.0.0.1:3000:1189 --rm --init ghcr.io/ucrel/lexidb:latest
3133
```
3234

33-
By default it uses the [app.properties from ./src/main/resources/app.properties](./src/main/resources/app.properties).
35+
By default it uses the [app.properties from ./src/main/resources/app.properties](./src/main/resources/app.properties).
36+
37+
For more detail on the configuration settings within [app.properties see the app properties section below.](#app-properties)
3438

3539
### Custom docker run command examples
3640

@@ -50,7 +54,7 @@ docker run -it -p 127.0.0.1:3000:1189 --init --entrypoint "java" --memory=8g --m
5054

5155
#### Formatting / Importing data
5256

53-
If you would like to import data into LexiDB without having to use the web API, you can do this through the java insert script. The java insert script converts the data files you want to import into a format that LexiDB can read. The insert script takes 3 arguments:
57+
If you would like to import data into LexiDB without having to use the web API, you can do this through the [java insert script](./src/main/java/util/Insert.java). The java insert script converts the data files you want to import into a format that LexiDB can read. The insert script takes 4 arguments:
5458

5559
1. File path to a `app.properties` file.
5660
2. Name of the corpus / database. This is equivalent to the name of the database in a MySQL database.
@@ -135,3 +139,40 @@ POST /mycorpus/query
135139
```
136140

137141
This will query the `"tokens"` table and the `"pos"` (part-of-speech) column for the value `"JJ"` and return the results in the form of a `"kwic"` (keyword in context).
142+
143+
## App Properties
144+
145+
The app.properties file should be a JSON file with the following keys, if any of keys are missing in the file the default value will be used, if no file is given the default values will be used:
146+
147+
| Key | Default Value | Description |
148+
|-----|---------------|-------------|
149+
| `block.cache.size` | 100 | |
150+
| `block.cache.timeout` | 1000 | |
151+
| `corpus.cache.size` | 10 | |
152+
| `corpus.cache.timeout` | 1000 | |
153+
| `result.cache.size` | 100 | |
154+
| `result.cache.timeout` | 30 | |
155+
| `data.path` | lexi-data | Relative or absolute file path to the top level directory that LexiDB will use to store new and/or current data, if the directory does not exist it will create the directory. For more details on how to format / import data into LexiDB see the [formatting / importing data section above.](#formatting-importing-data) |
156+
| `kwic.context` | 5 | Default context size for Key Word In Context (KWIC) searches. With the default this would result in 5 words before and after the key word. |
157+
| `result.page.size` | 100 | Default number of KWIC results to display per page when querying the KWIC API. |
158+
| `block.size` | 10000000 | The number of words to store per block within LexiDB. The large this number is the more memory (RAM) your machine will require, but it will increase the speed of your queries. |
159+
160+
## Performance
161+
162+
One of the main key performance bottle necks with respect to query speed is the `block.size` that is set within [app.properties](#app-properties). The larger the block size the faster the querying, but it will require more memory (RAM).
163+
164+
### Issues
165+
166+
If you see an error like the one below, full error output can be found in the [example_performance_error.txt file](./example_performance_error.txt), then this is likely to be due to not having enough RAM allocated to the Java Virtual Machine (JVM). To increase the RAM allocation to the JVM use the `-Xmx` flag, on most Ubuntu machine the default value for `-Xmx` is ~4GB to increase it to 6GB use `-Xmx6g`.
167+
168+
``` bash
169+
lexi_1 | 2021-08-24 07:29:19 ERROR server.Server:175 - QUERY FAILED!
170+
lexi_1 | com.fasterxml.jackson.databind.exc.MismatchedInputException: No content to map due to end-of-input
171+
lexi_1 | at [Source: (String)""; line: 1, column: 0]
172+
lexi_1 | at com.fasterxml.jackson.databind.exc.MismatchedInputException.from(MismatchedInputException.java:59)
173+
lexi_1 | at com.fasterxml.jackson.databind.ObjectMapper._initForReading(ObjectMapper.java:4134)
174+
lexi_1 | at com.fasterxml.jackson.databind.ObjectMapper._readMapAndClose(ObjectMapper.java:3988)
175+
lexi_1 | at com.fasterxml.jackson.databind.ObjectMapper.readValue(ObjectMapper.java:2992)
176+
lexi_1 | at io.javalin.translator.json.JavalinJacksonPlugin.toObject(Jackson.kt:27)
177+
lexi_1 | at io.javalin.Context.bodyAsClass(Context.kt:81)
178+
```

0 commit comments

Comments
 (0)