Skip to content

Commit

Permalink
[DarkReadingBridge] Fix content extraction (#2315)
Browse files Browse the repository at this point in the history
Also:
- Add article limit (main feed was broken due to too many articles)
- Add support for article thumbnail
  • Loading branch information
ORelio authored Oct 29, 2021
1 parent b86ed70 commit 970bdd4
Showing 1 changed file with 7 additions and 6 deletions.
13 changes: 7 additions & 6 deletions bridges/DarkReadingBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,22 +48,25 @@ public function collectData(){
if ($feed_id != '000') {
$feed_url .= '?f_n=' . $feed_id . '&f_ln=' . $feed_name;
}
$this->collectExpandableDatas($feed_url);
$this->collectExpandableDatas($feed_url, 20);
}

protected function parseItem($newsItem){
$item = parent::parseItem($newsItem);
if (empty($item['content']))
return null; //ignore dummy articles
$article = getSimpleHTMLDOMCached($item['uri'])
or returnServerError('Could not request Dark Reading: ' . $item['uri']);
$item['content'] = $this->extractArticleContent($article);
$item['enclosures'] = array(); //remove author profile picture
$image = $article->find('meta[property="og:image"]', 0);
if (is_object($image)) {
$image = $image->content;
$item['enclosures'] = array($image);
}
return $item;
}

private function extractArticleContent($article){
$content = $article->find('div#article-main', 0)->innertext;
$content = $article->find('div.article-content', 0)->innertext;

foreach (array(
'<div class="divsplitter',
Expand All @@ -74,8 +77,6 @@ private function extractArticleContent($article){
$content = stripRecursiveHTMLSection($content, 'div', $div_start);
}

$content = stripWithDelimiters($content, '<h1 ', '</h1>');

return $content;
}
}

0 comments on commit 970bdd4

Please sign in to comment.