diff --git a/src/java/org/apache/nutch/crawl/Injector.java b/src/java/org/apache/nutch/crawl/Injector.java index 0d3740eb4..fbb4516af 100644 --- a/src/java/org/apache/nutch/crawl/Injector.java +++ b/src/java/org/apache/nutch/crawl/Injector.java @@ -130,23 +130,28 @@ public static class InjectMapper @Override public void setup(Context context) { - Configuration conf = context.getConfiguration(); - boolean normalize = conf.getBoolean(CrawlDbFilter.URL_NORMALIZING, true); - boolean filter = conf.getBoolean(CrawlDbFilter.URL_FILTERING, true); - filterNormalizeAll = conf.getBoolean(URL_FILTER_NORMALIZE_ALL, false); - if (normalize) { - scope = conf.get(URL_NORMALIZING_SCOPE, URLNormalizers.SCOPE_INJECT); - urlNormalizers = new URLNormalizers(conf, scope); - } - interval = conf.getInt("db.fetch.interval.default", 2592000); - if (filter) { - filters = new URLFilters(conf); + try { + Configuration conf = context.getConfiguration(); + boolean normalize = conf.getBoolean(CrawlDbFilter.URL_NORMALIZING, true); + boolean filter = conf.getBoolean(CrawlDbFilter.URL_FILTERING, true); + filterNormalizeAll = conf.getBoolean(URL_FILTER_NORMALIZE_ALL, false); + if (normalize) { + scope = conf.get(URL_NORMALIZING_SCOPE, URLNormalizers.SCOPE_INJECT); + urlNormalizers = new URLNormalizers(conf, scope); + } + interval = conf.getInt("db.fetch.interval.default", 2592000); + if (filter) { + filters = new URLFilters(conf); + } + scfilters = new ScoringFilters(conf); + scoreInjected = conf.getFloat("db.score.injected", 1.0f); + curTime = conf.getLong("injector.current.time", + System.currentTimeMillis()); + url404Purging = conf.getBoolean(CrawlDb.CRAWLDB_PURGE_404, false); + } catch (Exception e) { + LOG.error("Could not configure InjectMapper", e); + throw e; } - scfilters = new ScoringFilters(conf); - scoreInjected = conf.getFloat("db.score.injected", 1.0f); - curTime = conf.getLong("injector.current.time", - System.currentTimeMillis()); - url404Purging = conf.getBoolean(CrawlDb.CRAWLDB_PURGE_404, false); } /* Filter and normalize the input url */