-
Notifications
You must be signed in to change notification settings - Fork 284
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Making languages dynamic with command arguments #125
Changes from 9 commits
00933ac
68ae44f
0621d01
35bcfde
e9fda4b
dacb243
84a37c2
a7ac5eb
0636de8
b4c5d42
b7e6ff3
fee73da
41ee0d1
d81292a
756aeb2
f3332f7
0b7f608
e4c6a6e
0c82ecb
d586d52
80116a9
8b67963
1a50011
c4726f2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,3 +7,7 @@ dependency-reduced-pom.xml | |
*.~* | ||
|
||
__pycache__ | ||
|
||
nb-configuration.xml | ||
|
||
nbactions.xml |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,12 @@ public class CommandLineArgs { | |
|
||
@Parameter(names = "-nominatim-import", description = "import nominatim database into photon (this will delete previous index)") | ||
private boolean nominatimImport = false; | ||
|
||
@Parameter(names = "-nominatim-import-languages", description = "languages nominatim importer should import, comma seperated (default: 'en,fr,de,it')") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we unify both arguments? could be cleaner There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's a good idea. I was struggling with a unified name, what do you suggest? Something like '-using-languages' or just '-supported-languages' maybe? |
||
private String nominatimImportLanguages = "en,fr,de,it"; | ||
|
||
@Parameter(names = "-supported-languages", description = "languages that are supported at run-time, comma seperated (default: 'en,fr,de,it')") | ||
private String supportedLanguages = "en,fr,de,it"; | ||
|
||
@Parameter(names = "-json", description = "import nominatim database and dump it to a json like files in (useful for developing)") | ||
private String jsonDump = null; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,8 @@ | |
|
||
import com.google.common.base.Joiner; | ||
import de.komoot.photon.importer.elasticsearch.Searcher; | ||
import org.elasticsearch.common.collect.ImmutableSet; | ||
import java.util.Arrays; | ||
import java.util.HashSet; | ||
import org.json.JSONArray; | ||
import org.json.JSONObject; | ||
import spark.Request; | ||
|
@@ -19,11 +20,12 @@ | |
*/ | ||
public class RequestHandler extends Route { | ||
private final Searcher searcher; | ||
private static final Set<String> supportedLanguages = ImmutableSet.of("de", "en", "fr", "it"); | ||
|
||
protected RequestHandler(String path, Searcher searcher) { | ||
private final Set<String> supportedLanguages; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see spaces here, while the lines around have tabs, but I'm not sure what's the choice here. @christophlingg? :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably a formatting setting of the Netbeans config I use. I will change it to tabs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Spaces still seem to be there ;) |
||
protected RequestHandler(String path, Searcher searcher, String languages) { | ||
super(path); | ||
this.searcher = searcher; | ||
this.supportedLanguages = new HashSet<String>(Arrays.asList(languages.split(","))); | ||
} | ||
|
||
@Override | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,9 +22,9 @@ | |
public class Utils { | ||
static final Joiner commaJoiner = Joiner.on(", ").skipNulls(); | ||
|
||
final static String[] languages = new String[]{"de", "en", "fr", "it"}; | ||
//final static String[] languages = new String[]{"de", "en", "fr", "it"}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we don't need this line, let's remove it :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for catching that, a bit sloppy of me |
||
|
||
public static XContentBuilder convert(PhotonDoc doc) throws IOException { | ||
public static XContentBuilder convert(PhotonDoc doc, String[] languages) throws IOException { | ||
XContentBuilder builder = XContentFactory.jsonBuilder().startObject() | ||
.field(Tags.KEY_OSM_ID, doc.getOsmId()) | ||
.field(Tags.KEY_OSM_TYPE, doc.getOsmType()) | ||
|
@@ -47,11 +47,11 @@ public static XContentBuilder convert(PhotonDoc doc) throws IOException { | |
builder.field("postcode", doc.getPostcode()); | ||
} | ||
|
||
writeName(builder, doc.getName()); | ||
writeIntlNames(builder, doc.getCity(), "city"); | ||
writeIntlNames(builder, doc.getCountry(), "country"); | ||
writeIntlNames(builder, doc.getStreet(), "street"); | ||
writeContext(builder, doc.getContext()); | ||
writeName(builder, doc.getName(), languages); | ||
writeIntlNames(builder, doc.getCity(), "city", languages); | ||
writeIntlNames(builder, doc.getCountry(), "country", languages); | ||
writeIntlNames(builder, doc.getStreet(), "street", languages); | ||
writeContext(builder, doc.getContext(), languages); | ||
writeExtent(builder, doc.getBbox()); | ||
|
||
return builder; | ||
|
@@ -74,8 +74,8 @@ private static void writeExtent(XContentBuilder builder, Envelope bbox) throws I | |
builder.endObject(); | ||
} | ||
|
||
private static void writeName(XContentBuilder builder, Map<String, String> name) throws IOException { | ||
Map<String, String> fNames = filterNames(name); | ||
private static void writeName(XContentBuilder builder, Map<String, String> name, String[] languages) throws IOException { | ||
Map<String, String> fNames = filterNames(name, languages); | ||
|
||
if(name.get("alt_name") != null) | ||
fNames.put("alt", name.get("alt_name")); | ||
|
@@ -102,7 +102,7 @@ private static void write(XContentBuilder builder, Map<String, String> fNames, S | |
builder.endObject(); | ||
} | ||
|
||
protected static void writeContext(XContentBuilder builder, Set<Map<String, String>> contexts) throws IOException { | ||
protected static void writeContext(XContentBuilder builder, Set<Map<String, String>> contexts, String[] languages) throws IOException { | ||
final SetMultimap<String, String> multimap = HashMultimap.create(); | ||
|
||
for(Map<String, String> context : contexts) { | ||
|
@@ -129,16 +129,16 @@ protected static void writeContext(XContentBuilder builder, Set<Map<String, Stri | |
} | ||
} | ||
|
||
private static void writeIntlNames(XContentBuilder builder, Map<String, String> names, String name) throws IOException { | ||
Map<String, String> fNames = filterNames(names); | ||
private static void writeIntlNames(XContentBuilder builder, Map<String, String> names, String name, String[] languages) throws IOException { | ||
Map<String, String> fNames = filterNames(names, languages); | ||
write(builder, fNames, name); | ||
} | ||
|
||
private static Map<String, String> filterNames(Map<String, String> names) { | ||
return filterNames(names, new HashMap<String, String>()); | ||
private static Map<String, String> filterNames(Map<String, String> names, String[] languages) { | ||
return filterNames(names, new HashMap<String, String>(), languages); | ||
} | ||
|
||
private static Map<String, String> filterNames(Map<String, String> names, HashMap<String, String> filteredNames) { | ||
private static Map<String, String> filterNames(Map<String, String> names, HashMap<String, String> filteredNames, String[] languages) { | ||
if(names == null) return filteredNames; | ||
|
||
if(names.get("name") != null) { | ||
|
@@ -153,4 +153,18 @@ private static Map<String, String> filterNames(Map<String, String> names, HashMa | |
|
||
return filteredNames; | ||
} | ||
|
||
// http://stackoverflow.com/a/4031040/1437096 | ||
public static String stripNonDigits( | ||
final CharSequence input /* inspired by seh's comment */){ | ||
final StringBuilder sb = new StringBuilder( | ||
input.length() /* also inspired by seh's comment */); | ||
for(int i = 0; i < input.length(); i++){ | ||
final char c = input.charAt(i); | ||
if(c > 47 && c < 58){ | ||
sb.append(c); | ||
} | ||
} | ||
return sb.toString(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
import de.komoot.photon.importer.Tags; | ||
import de.komoot.photon.importer.osm.OSMTags; | ||
import lombok.extern.slf4j.Slf4j; | ||
import de.komoot.photon.importer.Utils; | ||
import org.apache.commons.io.IOUtils; | ||
import org.apache.commons.lang3.StringEscapeUtils; | ||
import org.apache.commons.lang3.text.StrSubstitutor; | ||
|
@@ -66,14 +67,14 @@ public List<JSONObject> search(String query, String lang, Double lon, Double lat | |
|
||
SearchResponse response = client.prepareSearch("photon").setSearchType(SearchType.QUERY_AND_FETCH).setQuery(query).setSize(limit).setTimeout(TimeValue.timeValueSeconds(7)).execute().actionGet(); | ||
List<JSONObject> results = convert(response.getHits().getHits(), lang); | ||
results = removeStreetDuplicates(results); | ||
results = removeStreetDuplicates(results, lang); | ||
if(results.size() > limit) { | ||
results = results.subList(0, limit); | ||
} | ||
return results; | ||
} | ||
|
||
private List<JSONObject> removeStreetDuplicates(List<JSONObject> results) { | ||
private List<JSONObject> removeStreetDuplicates(List<JSONObject> results, String lang) { | ||
List<JSONObject> filteredItems = Lists.newArrayListWithCapacity(results.size()); | ||
final HashSet<String> keys = Sets.newHashSet(); | ||
for(JSONObject result : results) { | ||
|
@@ -84,12 +85,35 @@ private List<JSONObject> removeStreetDuplicates(List<JSONObject> results) { | |
// street has a postcode and name | ||
String postcode = properties.getString(OSMTags.KEY_POSTCODE); | ||
String name = properties.getString(OSMTags.KEY_NAME); | ||
String key = postcode + ":" + name; | ||
String key = postcode + ":" + name; | ||
|
||
if(keys.contains(key)) { | ||
// a street with this name + postcode is already part of the result list | ||
continue; | ||
} | ||
} else if (lang.equals("nl")) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should better land in a separate PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah I wasn't sure about having this in the same PR. Because it is dependent of the previous changes from this PR, I thought I would add it here. Will submit this as a separate PR 👍 |
||
// check for dutch postcodes (i.e. 6532RA). If a street has the same name and the same 4 numbers in the postcode, | ||
// we can assume it is part of the same street, so only use the first | ||
try { | ||
String letterlessPostcode = Utils.stripNonDigits(postcode); | ||
int postcodeNumbers = Integer.parseInt(letterlessPostcode); | ||
boolean foundMatch = false; | ||
|
||
for (String keyString : keys) { | ||
String letterlessKey = Utils.stripNonDigits(keyString); | ||
// also check if name equals, | ||
// which is a safety check for streets that partially match and have the same postcode numbers | ||
String keyName = keyString.split(":")[1]; | ||
if (postcodeNumbers == Integer.parseInt(letterlessKey) && keyName.equals(name)) { | ||
foundMatch = true; | ||
break; | ||
} | ||
} | ||
|
||
if (foundMatch) { | ||
continue; | ||
} | ||
} catch (NumberFormatException e) {} | ||
} | ||
keys.add(key); | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will cast floats to strings, not sure we really want to do that. What was your rationale here (apart from making the json a vaild one ;) )?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Woops! That is an oversight on my part.. Did not realize it would be casted as string. Will revert this change