@@ -16,27 +16,28 @@ class YCombinator(scrapy.Spider):
16
16
def parse (self , response ):
17
17
rc = response .css
18
18
# get the JSON object inside the <script> tag
19
- cl = 'script.js-react-on-rails-component'
20
- st = rc (f'{ cl } [data-component-name="CompaniesShowPage"]::text' ).get ()
21
-
22
- # load the JSON object and set the variable for the 'Company' data
23
- jo = json .loads (st )
24
- jc = jo ['company' ]
25
- yield {
26
- 'company_id' : jc ['id' ],
27
- 'company_name' : jc ['name' ],
28
- 'short_description' : jc ['one_liner' ],
29
- 'long_description' : jc ['long_description' ],
30
- 'batch' : jc ['batch_name' ],
31
- 'status' : jc ['ycdc_status' ],
32
- 'tags' : jc ['tags' ],
33
- 'location' : jc ['location' ],
34
- 'country' : jc ['country' ],
35
- 'year_founded' : jc ['year_founded' ],
36
- 'num_founders' : len (jc ['founders' ]),
37
- 'founders_names' : [f ['full_name' ] for f in jc ['founders' ]],
38
- 'team_size' : jc ['team_size' ],
39
- 'website' : jc ['website' ],
40
- 'cb_url' : jc ['cb_url' ],
41
- 'linkedin_url' : jc ['linkedin_url' ],
42
- }
19
+ # cl = 'script.js-react-on-rails-component'
20
+ # st = rc(f'{cl}[data-component-name="CompaniesShowPage"]::text').get()
21
+ st = response .css ('[data-page]::attr(data-page)' ).get ()
22
+ if 1 is not None :
23
+ # load the JSON object and set the variable for the 'Company' data
24
+ jo = json .loads (st )['props' ]
25
+ jc = jo ['company' ]
26
+ yield {
27
+ 'company_id' : jc ['id' ],
28
+ 'company_name' : jc ['name' ],
29
+ 'short_description' : jc ['one_liner' ],
30
+ 'long_description' : jc ['long_description' ],
31
+ 'batch' : jc ['batch_name' ],
32
+ 'status' : jc ['ycdc_status' ],
33
+ 'tags' : jc ['tags' ],
34
+ 'location' : jc ['location' ],
35
+ 'country' : jc ['country' ],
36
+ 'year_founded' : jc ['year_founded' ],
37
+ 'num_founders' : len (jc ['founders' ]),
38
+ 'founders_names' : [f ['full_name' ] for f in jc ['founders' ]],
39
+ 'team_size' : jc ['team_size' ],
40
+ 'website' : jc ['website' ],
41
+ 'cb_url' : jc ['cb_url' ],
42
+ 'linkedin_url' : jc ['linkedin_url' ],
43
+ }
0 commit comments