Skip to content

Commit

Permalink
bugfixing
Browse files Browse the repository at this point in the history
  • Loading branch information
Orbiter committed Aug 22, 2020
1 parent 1577e03 commit d866d7d
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 12 deletions.
2 changes: 1 addition & 1 deletion src/org/json/JSONObject.java
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ public JSONObject(JSONTokener x) throws JSONException {
*/
public JSONObject(Map<?, ?> m) {
if (m == null) {
this.map = new HashMap<String, Object>();
this.map = new LinkedHashMap<String, Object>();
} else {
this.map = new HashMap<String, Object>(m.size());
for (final Entry<?, ?> e : m.entrySet()) {
Expand Down
8 changes: 6 additions & 2 deletions src/org/loklak/data/DAO.java
Original file line number Diff line number Diff line change
Expand Up @@ -1302,7 +1302,11 @@ public static ResultList<QueryEntry> SearchLocalQueries(final String q, final in
ResultList<Map<String, Object>> result = elasticsearch_client.fuzzyquery(IndexName.queries.name(), "query", q, resultCount, sort_field, default_sort_type, sort_order, since, until, range_field);
queries.setHits(result.getHits());
for (Map<String, Object> map: result) {
queries.add(new QueryEntry(new JSONObject(map)));
QueryEntry qe = new QueryEntry(new JSONObject(map));
// check a flag value for queries that probably never get new messages
if (qe.getMessagePeriod() != QueryEntry.DAY_MILLIS) {
queries.add(qe);
}
}
return queries;
}
Expand Down Expand Up @@ -1415,7 +1419,7 @@ public static TwitterTimeline scrapeTwitter(
DAO.severe(e);
}

if (recordQuery && Caretaker.acceptQuery4Retrieval(q)) {
if (recordQuery && Caretaker.acceptQuery4Retrieval(q) && tl.size() > 0) {
if (qe == null) {
// a new query occurred
qe = new QueryEntry(q, timezoneOffset, tl.period(), SourceType.TWITTER, byUserQuery);
Expand Down
12 changes: 7 additions & 5 deletions src/org/loklak/harvester/TwitterScraper.java
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ private static TwitterTimeline[] search(

// parse
Elements items = doc.getElementsByClass("stream-item");
for (int itemc = 0; itemc < items.size(); itemc++) {
itemloop: for (int itemc = 0; itemc < items.size(); itemc++) {
Element item = items.get(itemc);
if (debuglog) System.out.println(item.toString());

Expand Down Expand Up @@ -258,10 +258,12 @@ private static TwitterTimeline[] search(
}

String tweettimes = timestamp.attr("data-time-ms");
if (tweettimes.length() == 0) continue itemloop; // sometimes tweets are not available any more
long tweettime = Long.parseLong(tweettimes);
long snowflaketime = snowflake2millis(Long.parseLong(tweetID));
assert tweettime / 1000 == snowflaketime / 1000;

// this assertion holds in most, but unfortunately not in all cases, so it is commented out
// long snowflaketime = snowflake2millis(Long.parseLong(tweetID));
// assert tweettime / 1000L == snowflaketime / 1000L : "tweettime = " + tweettime + ", snowflaketime = " + snowflaketime;

Elements reply = item.getElementsByClass("ProfileTweet-action--reply").get(0).children();
Elements retweet = item.getElementsByClass("ProfileTweet-action--retweet").get(0).children();
Elements favourite = item.getElementsByClass("ProfileTweet-action--favorite").get(0).children();
Expand Down Expand Up @@ -747,7 +749,7 @@ public Post toJSON(final UserEntry user, final boolean calculatedData, final int
// the tweet; the cleanup is a helper function which cleans mistakes from the past in scraping
MessageEntry.TextLinkMap tlm = this.moreData.getText(iflinkexceedslength, urlstub, this.text, this.getLinks(), this.getPostId());
this.put("text", tlm);
if (this.status_id_url != null) this.put("link", this.status_id_url.toExternalForm());
if (this.status_id_url != null) this.put("link", this.status_id_url.toExternalForm()); // this is the primary key for retrieval in elasticsearch
this.put("id_str", this.postId);
this.put("conversation_id", this.conversationID);
this.put("conversation_user", this.conversationUserIDs);
Expand Down
4 changes: 2 additions & 2 deletions src/org/loklak/objects/AbstractObjectEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@

public abstract class AbstractObjectEntry extends Post implements ObjectEntry {

public final static String TIMESTAMP_FIELDNAME = "timestamp";
public final static String CREATED_AT_FIELDNAME = "created_at";
public final static String TIMESTAMP_FIELDNAME = "timestamp"; // the harvesting time, NOT used for identification
public final static String CREATED_AT_FIELDNAME = "created_at"; // the tweet time as embedded in the tweet, not used for identification either

public AbstractObjectEntry() {
}
Expand Down
8 changes: 6 additions & 2 deletions src/org/loklak/objects/QueryEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
*/
public class QueryEntry extends AbstractObjectEntry implements ObjectEntry {

private final static long DAY_MILLIS = 1000L * 60L * 60L * 24L;
public final static long DAY_MILLIS = 1000L * 60L * 60L * 24L;
private final static int RETRIEVAL_CONSTANT = 20; // the number of messages that we get with each retrieval at maximum

protected String query; // the query in the exact way as the user typed it in
Expand Down Expand Up @@ -144,7 +144,7 @@ public void update(final long message_period, final boolean byUserQuery) {
this.query_last = this.retrieval_last;
}
long new_message_period = message_period; // can be Long.MAX_VALUE if less than 2 messages are in timeline!
int new_messages_per_day = (int) (DAY_MILLIS / new_message_period); // this is an interpolation based on the last tweet list, can be 0!
int new_messages_per_day = (int) (DAY_MILLIS / (new_message_period + 1)); // this is an interpolation based on the last tweet list, can be 0!
if (new_message_period == Long.MAX_VALUE || new_messages_per_day == 0) {
this.message_period = DAY_MILLIS;
} else {
Expand Down Expand Up @@ -217,6 +217,10 @@ public int getQueryCount() {
public int getRetrievalCount() {
return this.retrieval_count;
}

public long getMessagePeriod() {
return this.message_period;
}

public int getMessagesPerDay() {
return this.messages_per_day;
Expand Down

0 comments on commit d866d7d

Please sign in to comment.