Skip to content

Commit c6081ba

Browse files
committed
fix in response.css query
1 parent b89b7ae commit c6081ba

File tree

1 file changed

+25
-24
lines changed

1 file changed

+25
-24
lines changed

scrapy-project/ycombinator/spiders/yscraper.py

+25-24
Original file line numberDiff line numberDiff line change
@@ -16,27 +16,28 @@ class YCombinator(scrapy.Spider):
1616
def parse(self, response):
1717
rc = response.css
1818
# get the JSON object inside the <script> tag
19-
cl = 'script.js-react-on-rails-component'
20-
st = rc(f'{cl}[data-component-name="CompaniesShowPage"]::text').get()
21-
22-
# load the JSON object and set the variable for the 'Company' data
23-
jo = json.loads(st)
24-
jc = jo['company']
25-
yield {
26-
'company_id': jc['id'],
27-
'company_name': jc['name'],
28-
'short_description': jc['one_liner'],
29-
'long_description': jc['long_description'],
30-
'batch': jc['batch_name'],
31-
'status': jc['ycdc_status'],
32-
'tags': jc['tags'],
33-
'location': jc['location'],
34-
'country': jc['country'],
35-
'year_founded': jc['year_founded'],
36-
'num_founders': len(jc['founders']),
37-
'founders_names': [f['full_name'] for f in jc['founders']],
38-
'team_size': jc['team_size'],
39-
'website': jc['website'],
40-
'cb_url': jc['cb_url'],
41-
'linkedin_url': jc['linkedin_url'],
42-
}
19+
# cl = 'script.js-react-on-rails-component'
20+
# st = rc(f'{cl}[data-component-name="CompaniesShowPage"]::text').get()
21+
st = response.css('[data-page]::attr(data-page)').get()
22+
if 1 is not None:
23+
# load the JSON object and set the variable for the 'Company' data
24+
jo = json.loads(st)['props']
25+
jc = jo['company']
26+
yield {
27+
'company_id': jc['id'],
28+
'company_name': jc['name'],
29+
'short_description': jc['one_liner'],
30+
'long_description': jc['long_description'],
31+
'batch': jc['batch_name'],
32+
'status': jc['ycdc_status'],
33+
'tags': jc['tags'],
34+
'location': jc['location'],
35+
'country': jc['country'],
36+
'year_founded': jc['year_founded'],
37+
'num_founders': len(jc['founders']),
38+
'founders_names': [f['full_name'] for f in jc['founders']],
39+
'team_size': jc['team_size'],
40+
'website': jc['website'],
41+
'cb_url': jc['cb_url'],
42+
'linkedin_url': jc['linkedin_url'],
43+
}

0 commit comments

Comments
 (0)