Skip to content
This repository was archived by the owner on Sep 24, 2018. It is now read-only.

Commit 7685e78

Browse files
authored
Merge pull request #7 from pitpit/feature/lbc-upgrade
upgrade to new leboncoin version
2 parents a90f997 + db06bfa commit 7685e78

12 files changed

+13212
-12718
lines changed

composer.json

+5
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,10 @@
2828
},
2929
"scripts": {
3030
"test": "phpunit"
31+
},
32+
"extra": {
33+
"branch-alias": {
34+
"dev-master": "2.0.x-dev"
35+
}
3136
}
3237
}

src/Crawler/SearchResultAdCrawler.php

+28-26
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
<?php namespace Lbc\Crawler;
22

3-
use Lbc\Parser\SearchResultDateTimeParser;
43
use League\Url\Url;
54
use Symfony\Component\DomCrawler\Crawler;
65

@@ -74,7 +73,7 @@ public function getTitle()
7473
*/
7574
public function getPrice()
7675
{
77-
$node = $this->node->filter('.price');
76+
$node = $this->node->filter('*[itemprop=price]');
7877

7978
return $this->getFieldValue($node, 0, function ($value) {
8079
return (int) preg_replace('/[^\d]/', '', trim($value));
@@ -96,17 +95,17 @@ public function getUrl()
9695
/**
9796
* Return the data and time the ad was created
9897
*
99-
* @return \Carbon\Carbon
98+
* @return string
10099
*/
101100
public function getCreatedAt()
102101
{
103-
list($date, $time) = $this->node
104-
->filter('.date > div')
105-
->each(function (Crawler $node) {
106-
return $node->text();
107-
});
102+
$date = $this->node
103+
->filter('*[itemprop=availabilityStarts]')
104+
->first()
105+
->attr('content')
106+
;
108107

109-
return SearchResultDateTimeParser::toDt($date, $time);
108+
return (new \DateTime($date))->format('Y-m-d H:m');
110109
}
111110

112111
/**
@@ -116,13 +115,16 @@ public function getCreatedAt()
116115
*/
117116
public function getThumb()
118117
{
119-
$node = $this->node->filter('.image-and-nb > img');
118+
$src = $this->node
119+
->filter('.item_imagePic .lazyload[data-imgsrc]')
120+
->first()
121+
->attr('data-imgsrc')
122+
;
120123

121-
return $this->getFieldValue($node, null, function ($value) {
122-
return Url::createFromUrl($value)
124+
return Url::createFromUrl($src)
123125
->setScheme('http')
124-
->__toString();
125-
}, 'attr', 'src');
126+
->__toString()
127+
;
126128
}
127129

128130
/**
@@ -132,7 +134,7 @@ public function getThumb()
132134
*/
133135
public function getNbImage()
134136
{
135-
$node = $this->node->filter('.image-and-nb > .nb > .value');
137+
$node = $this->node->filter('.item_imageNumber');
136138

137139
return $this->getFieldValue($node, 0, function ($value) {
138140
return (int)trim($value);
@@ -144,7 +146,7 @@ public function getNbImage()
144146
*/
145147
public function getPlacement()
146148
{
147-
$node = $this->node->filter('.placement');
149+
$node = $this->node->filter('*[itemprop=availableAtOrFrom]');
148150

149151
return $this->getFieldValue($node, '', function ($value) {
150152
return preg_replace('/\s+/', ' ', trim($value));
@@ -157,7 +159,7 @@ public function getPlacement()
157159
*/
158160
public function getType()
159161
{
160-
$node = $this->node->filter('.detail > .category');
162+
$node = $this->node->filter('*[itemprop=category]');
161163

162164
return $this->getFieldValue($node, false, function ($value) {
163165
if ('pro' == preg_replace('/[\s()]+/', '', $value)) {
@@ -170,16 +172,16 @@ public function getType()
170172

171173
public function getAll()
172174
{
173-
return (object)[
174-
'id' => $this->getId(),
175-
'title' => $this->getTitle(),
176-
'price' => $this->getPrice(),
177-
'url' => $this->getUrl(),
175+
return (object) [
176+
'id' => $this->getId(),
177+
'title' => $this->getTitle(),
178+
'price' => $this->getPrice(),
179+
'url' => $this->getUrl(),
178180
'created_at' => $this->getCreatedAt(),
179-
'thumb' => $this->getThumb(),
180-
'nb_image' => $this->getNbImage(),
181-
'placement' => $this->getPlacement(),
182-
'type' => $this->getType(),
181+
'thumb' => $this->getThumb(),
182+
'nb_image' => $this->getNbImage(),
183+
'placement' => $this->getPlacement(),
184+
'type' => $this->getType(),
183185
];
184186
}
185187

src/Crawler/SearchResultCrawler.php

+3-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ class SearchResultCrawler extends CrawlerAbstract
1010
public function getNbAds()
1111
{
1212
$nbAds = $this->crawler
13-
->filter('nav > ul.navlist.type > li.selected > span.value > b')
13+
->filter('a.tabsSwitch span.tabsSwitchNumbers')
1414
->first();
1515

1616
if ($nbAds->count()) {
@@ -40,7 +40,7 @@ public function getAds()
4040
{
4141
$ads = array();
4242

43-
$this->crawler->filter('div.list-lbc > a')
43+
$this->crawler->filter('[itemtype="http://schema.org/Offer"] > a')
4444
->each(function ($node) use (&$ads) {
4545
$ad = (new SearchResultAdCrawler($node))->getAll();
4646
$ads [$ad->id] = $ad;
@@ -58,7 +58,7 @@ public function getAdsId()
5858
{
5959
$adsID = array();
6060

61-
$this->crawler->filter('div.list-lbc > a')
61+
$this->crawler->filter('[itemtype="http://schema.org/Offer"] > a')
6262
->each(function ($node) use (&$adsID) {
6363
$adsID [] = (new SearchResultAdCrawler($node))->getId();
6464
});

src/GetFrom.php

+2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ public function getHttpClient()
3131
*
3232
* @param $url
3333
* @param bool $detailedAd
34+
*
3435
* @return array
3536
*/
3637
public function search($url, $detailedAd = false)
@@ -61,6 +62,7 @@ public function search($url, $detailedAd = false)
6162
* Retrieve the ad's data from an ad's ID and its category
6263
*
6364
* @param $url
65+
*
6466
* @return array
6567
*/
6668
private function adById($id, $category)

src/Helper/Encoding.php

-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ public static function toAscii($string)
2424
return $chr;
2525
}
2626
return '';
27-
2827
}, str_split($string));
2928

3029
return implode($ret);

src/Parser/SearchResultDateTimeParser.php

-72
This file was deleted.

tests/Lbc/Crawler/SearchResultAdCrawlerTest.php

+11-12
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
<?php namespace Lbc\Crawler;
22

3-
use Lbc\Parser\SearchResultDateTimeParser;
43
use Symfony\Component\DomCrawler\Crawler;
54

65
class SearchResultAdCrawlerTest extends \PHPUnit_Framework_TestCase
@@ -17,20 +16,20 @@ public function setUp()
1716

1817
public function testTheInformationOfAnAdAreCorrectlyExtracted()
1918
{
20-
$node = (new Crawler($this->adContent))->filter('.list-lbc > a')->first();
19+
$node = (new Crawler($this->adContent))->filter('[itemtype="http://schema.org/Offer"] > a')->first();
2120

2221
$search = new SearchResultAdCrawler($node);
2322

24-
$expected = (object)[
25-
'id' => '896305873',
26-
'title' => 'Mercedes Classe B II 180 Design Automatique Diesel',
27-
'price' => 20500,
28-
'url' => 'http://www.leboncoin.fr/voitures/896305873.htm?ca=4_s',
29-
'created_at' => SearchResultDateTimeParser::toDt("Aujourd'hui", '20:01'),
30-
'thumb' => 'http://img3.leboncoin.fr/thumbs/67d/67db00ee1186b81bd3177f0d9b92fe8d012f1778.jpg',
31-
'nb_image' => 3,
32-
'placement' => 'Pont-l\'Evêque / Calvados',
33-
'type' => 'part',
23+
$expected = (object) [
24+
'id' => '1046613551',
25+
'title' => 'BMW 635d coupe',
26+
'price' => 33499,
27+
'url' => 'http://www.leboncoin.fr/voitures/1046613551.htm?ca=4_s',
28+
'created_at' => '2016-11-07 00:11',
29+
'thumb' => 'http://img2.leboncoin.fr/ad-thumb/cd38e9ebe6abc86e2568de2a4ab14e8fa9f5196f.jpg',
30+
'nb_image' => 3,
31+
'placement' => 'Manche',
32+
'type' => 'part',
3433
];
3534

3635
$this->assertEquals($expected, $search->getAll());

tests/Lbc/Crawler/SearchResultCrawlerTest.php

+11-11
Original file line numberDiff line numberDiff line change
@@ -38,27 +38,27 @@ public function testTheOfflineContentHasTheRightNumberOfAdsAndPages()
3838
{
3939
$search = new SearchResultCrawler($this->searchContent);
4040

41-
$this->assertEquals(690, $search->getNbAds());
42-
$this->assertEquals(20, $search->getNbPages());
41+
$this->assertEquals(799, $search->getNbAds());
42+
$this->assertEquals(23, $search->getNbPages());
4343

4444
$search = new SearchResultCrawler($this->searchContent2);
4545

46-
$this->assertEquals(1965, $search->getNbAds());
47-
$this->assertEquals(57, $search->getNbPages());
46+
$this->assertEquals(5047, $search->getNbAds());
47+
$this->assertEquals(145, $search->getNbPages());
4848
}
4949

5050
public function testTheAdsId()
5151
{
5252
$search = new SearchResultCrawler($this->searchContent);
5353

5454
$expected = [
55-
'896305873', '918388326', '918353717', '891325771', '918340050',
56-
'918339265', '918335090', '914545627', '918236863', '899214543',
57-
'918145675', '917182471', '918130073', '902341065', '911107968',
58-
'918099489', '918099192', '918057958', '918055703', '918041045',
59-
'918035420', '918115788', '918015740', '869266253', '917977535',
60-
'917967176', '879087195', '917921773', '917920350', '898591934',
61-
'917934779', '917937286', '917869520', '917934759', '917789397',
55+
'1046613551', '1046609163', '1046583800', '1046572010', '1028952521',
56+
'1046447258', '1046411771', '1016363207', '1035200374', '1046352820',
57+
'1031298162', '1030691674', '1046321177', '1032248300', '1000713898',
58+
'1046288334', '1018108325', '1025795963', '1046233711', '1015545496',
59+
'1046218853', '1042802333', '1040954432', '1046387866', '996082887',
60+
'1035218075', '1033979644', '1046158278', '1046156273', '1046149609',
61+
'1046101160', '1046048811', '1046069122', '1046059631', '1046002930',
6262
];
6363

6464
$this->assertEquals($expected, $search->getAdsId());

tests/Lbc/GetFromTest.php

+12-13
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
use GuzzleHttp\Message\Response;
44
use GuzzleHttp\Stream\Stream;
55
use GuzzleHttp\Subscriber\Mock;
6-
use Lbc\Parser\SearchResultDateTimeParser;
76

87
class GetFromTest extends \PHPUnit_Framework_TestCase
98
{
@@ -23,8 +22,8 @@ public function testGetTheSearchResultData()
2322
$data = $getFrom->search($url);
2423

2524
$this->assertEquals(1, $data['page']);
26-
$this->assertEquals(690, $data['total_ads']);
27-
$this->assertEquals(20, $data['total_page']);
25+
$this->assertEquals(799, $data['total_ads']);
26+
$this->assertEquals(23, $data['total_page']);
2827
$this->assertEquals('voitures', $data['category']);
2928
$this->assertEquals('basse_normandie', $data['search_area']);
3029
$this->assertEquals('date', $data['sort_by']);
@@ -47,16 +46,16 @@ public function testGetTheDetailedAdInTheSearchResult()
4746
$url = 'http://www.leboncoin.fr/voitures/offres/basse_normandie/?f=a&th=1&ms=30000&me=70000&fu=2&gb=2';
4847
$data = $getFrom->search($url, true);
4948

50-
$expected = (object)[
51-
'id' => '917789397',
52-
'title' => 'Volvo xc90 r design',
53-
'price' => 30000,
54-
'url' => 'http://www.leboncoin.fr/voitures/917789397.htm?ca=4_s',
55-
'created_at' => SearchResultDateTimeParser::toDt("Hier", "18:01"),
56-
'thumb' => 'http://img6.leboncoin.fr/thumbs/907/90783d4040062193c703d48e4929f95c15bf1233.jpg',
57-
'nb_image' => 3,
58-
'placement' => 'Saint-Hilaire-du-Harcouët / Manche',
59-
'type' => 'part',
49+
$expected = (object) [
50+
'id' => '1046002930',
51+
'title' => 'Golf 7 vii carat 105 cv dsg',
52+
'price' => 15490,
53+
'url' => 'http://www.leboncoin.fr/voitures/1046002930.htm?ca=4_s',
54+
'created_at' => '2016-11-06 00:11',
55+
'thumb' => 'http://img5.leboncoin.fr/ad-thumb/e57c3f460fc5f6581e72fbac70c196ca660627fb.jpg',
56+
'nb_image' => 3,
57+
'placement' => 'Caen / Calvados',
58+
'type' => 'part',
6059
];
6160

6261
$this->assertEquals($expected, array_pop($data['ads']));

0 commit comments

Comments
 (0)