forked from egrcc/zhihu-python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
executable file
·302 lines (266 loc) · 11 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
# -*- coding: utf-8 -*-
'''
$$
$$$ &&&&$$$$##$$$$$$$$$$$$$$$$$$#$$$
$$$ $$$$$$$$$$$$$$$ ##$$$$$$$$$$$$$$$$$$o; ;
$$$$$$$$$$$$$$$ $$$$$$$$$$$$$$$ *$$o #
$$$ $$$ $$$ $$$ $$$ *$$o $$$$
$$* $$$ $$$ $$$ $$$$ *$$o $$$$
$$$ $$$ $$$ $$$$ *$$o $$$$
$$o $$$ $$$ $$$ *$$o $$$o
;$$$$$$$$$$$$$$$$ $$$ $$$ *$$o
$$$$$$$$$$$$$$$$$* $$$ $$$ ;$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
$$$ $$$ $$$ *$$o
$$$ $$$ $$$ *$$o
$$$$$$$ $$$ $$$ *$$o
$$$; $$$$ $$$ $$$ *$$o
$$$$ $$$ $$$$$ $$$$$$$$$ *$$o
$$$$! $$ $$$$* $$$;
$$$$$ ; $$$$$$$$$$$
$$$$$$
'''
from zhihu import Question
from zhihu import Answer
from zhihu import User
from zhihu import Collection
from zhihu import Post
from zhihu import Column
def question_test(url):
question = Question(url)
# 获取该问题的标题
title = question.get_title()
# 获取该问题的详细描述
detail = question.get_detail()
# 获取回答个数
answers_num = question.get_answers_num()
# 获取关注该问题的人数
followers_num = question.get_followers_num()
# 获取该问题所属话题
topics = question.get_topics()
# 获取该问题被浏览次数
visit_times = question.get_visit_times()
# 获取排名第一的回答
top_answer = question.get_top_answer()
# 获取排名前十的十个回答
top_answers = question.get_top_i_answers(10)
# 获取所有回答
answers = question.get_all_answers()
print title # 输出:现实可以有多美好?
print detail
# 输出:
# 本问题相对于“现实可以多残酷?传送门:现实可以有多残酷?
# 题主: 昨天看了“现实可以有多残酷“。感觉不太好,所以我
# 开了这个问题以相对应,希望能够“中和一下“。和那个问题题主不想
# 把它变成“比惨大会“一样,我也不想把这个变成“鸡汤故事会“,或者
# 是“晒幸福“比赛。所以大家从“现实,实际”的角度出发,讲述自己的
# 美好故事,让大家看看社会的冷和暖,能更加辨证地看待世界,是此
# 题和彼题共同的“心愿“吧。
print answers_num # 输出:2441
print followers_num # 输出:26910
for topic in topics:
print topic, # 输出:情感克制 现实 社会 个人经历
print visit_times # 输出: 该问题当前被浏览的次数
print top_answer # 输出:<zhihu.Answer instance at 0x7f8b6582d0e0>(Answer类对象)
print top_answers # 输出:<generator object get_top_i_answers at 0x7fed676eb320>(代表前十的Answer的生成器)
print answers # 输出:<generator object get_all_answer at 0x7f8b66ba30a0>(代表所有Answer的生成器)
def answer_test(answer_url):
answer = Answer(answer_url)
# 获取该答案回答的问题
question = answer.get_question()
# 获取该答案的作者
author = answer.get_author()
# 获取该答案获得的赞同数
upvote = answer.get_upvote()
# 获取改该答案所属问题被浏览次数
visit_times = answer.get_visit_times()
# 获取所有给该答案点赞的用户信息
voters = answer.get_voters()
# 把答案输出为txt文件
answer.to_txt()
# 把答案输出为markdown文件
answer.to_md()
print question
# <zhihu.Question instance at 0x7f0b25d13f80>
# 一个Question对象
print question.get_title() # 输出:现实可以有多美好?
print author
# <zhihu.User instance at 0x7f0b25425b90>
# 一个User对象
print voters # <generator object get_voters at 0x7f32fbe55730>(代表所有该答案点赞的用户的生成器)
print author.get_user_id() # 输出:田浩
print upvote # 输出:9320
print visit_times # 输出: 改答案所属问题被浏览次数
def user_test(user_url):
user = User(user_url)
# 获取用户ID
user_id = user.get_user_id()
# 获取用户性别
user_gender = user.get_gender()
# 获取该用户的关注者人数
followers_num = user.get_followers_num()
# 获取该用户关注的人数
followees_num = user.get_followees_num()
# 获取该用户提问的个数
asks_num = user.get_asks_num()
# 获取该用户回答的个数
answers_num = user.get_answers_num()
# 获取该用户收藏夹个数
collections_num = user.get_collections_num()
# 获取该用户获得的赞同数
agree_num = user.get_agree_num()
# 获取该用户获得的感谢数
thanks_num = user.get_thanks_num()
# 获取该用户的头像url
head_img_url = user.get_head_img_url()
# 获取该用户关注的人
followees = user.get_followees()
# 获取关注该用户的人
followers = user.get_followers()
topics = user.get_topics()
# 获取该用户提的问题
asks = user.get_asks()
# 获取该用户回答的问题的答案
answers = user.get_answers()
# 获取该用户的收藏夹
collections = user.get_collections()
print user_id # 黄继新
print user_gender #male
print followers_num # 614840
print followees_num # 8408
print asks_num # 1323
print answers_num # 786
print collections_num # 44
print agree_num # 46387
print thanks_num # 11477
print head_img_url # https://pic2.zhimg.com/0626f4164009f291b26a79d96c6962c5_l.jpg
print followees
# <generator object get_followee at 0x7ffcac3af050>
# 代表所有该用户关注的人的生成器对象
i = 0
for followee in followees:
print followee.get_user_id()
i = i + 1
if i == 41:
break
print followers
# <generator object get_follower at 0x7ffcac3af0f0>
# 代表所有关注该用户的人的生成器对象
i = 0
for follower in followers:
print follower.get_user_id()
i = i + 1
if i == 41:
break
for topic in topics:
print topic
print asks
# <generator object get_ask at 0x7ffcab9db780>
# 代表该用户提的所有问题的生成器对象
print answers
# <generator object get_answer at 0x7ffcab9db7d0>
# 代表该用户回答的所有问题的答案的生成器对象
print collections
# <generator object get_collection at 0x7ffcab9db820>
# 代表该用户收藏夹的生成器对象
def collection_test(collection_url):
collection = Collection(collection_url)
# 获取该收藏夹的创建者
creator = collection.get_creator()
# 获取该收藏夹的名字
name = collection.get_name()
# 获取该收藏夹下的前十个答案
top_answers = collection.get_top_i_answers(10)
# 获取该收藏夹下的所有答案
answers = collection.get_all_answers()
print creator
# <zhihu.User instance at 0x7fe1296f29e0>
# 一个User对象
print creator.get_user_id() # 稷黍
print name # 给你一个不同的视角
print top_answers
# <generator object get_top_i_answers at 0x7f378465dc80>
# 代表前十个答案的生成器对象
print answers
# <generator object get_all_answer at 0x7fe12a29b280>
# 代表所有答案的生成器对象
def post_test(post_url):
post = Post(post_url)
# 获取该文章的标题
title = post.get_title()
# 获取该文章的内容
content = post.get_content()
# 获取该文章的作者
author = post.get_author()
# 获取该文章的所属专栏
column = post.get_column()
# 获取该文章所属话题
topics = post.get_topics()
print title # 输出:
print content
for topic in topics:
print topic, # 输出:
print "\n"
print author
# 输出:<zhihu.User instance at 0x7f8b6582d0e0>
# User类对象
print column
# 输出:<zhihu.Column instance at 0x7f8b6582d0e0>
# Column类对象
def column_test(column_url):
column = Column(column_url)
# 获取该专栏的标题
title = column.get_title()
# 获取该专栏的描述
description = column.get_description()
# 获取该专栏的作者
creator = column.get_creator()
# 获取该专栏的文章数
posts_num = column.get_posts_num()
# 获取该专栏的所有文章
posts = column.get_all_posts()
print title
print description
print creator
# 输出:<zhihu.User instance at 0x7f8b6582d0e0>
# User类对象
print posts_num
print posts
def test():
url = "http://www.zhihu.com/question/24269892"
question = Question(url)
# 得到排名第一的答案
answer = question.get_top_answer()
# 得到排名第一的答案的作者
user = answer.get_author()
# 得到该作者回答过的所有问题的答案
user_answers = user.get_answers()
# 输出该作者回答过的所有问题的标题
for answer in user_answers:
print answer.get_question().get_title()
# 得到该用户的所有收藏夹
user_collections = user.get_collections()
for collection in user_collections:
# 输出每一个收藏夹的名字
print collection.get_name()
# 得到该收藏夹下的前十个回答
top_answers = collection.get_top_i_answers(10)
# 把答案内容转成txt,markdown
for answer in top_answers:
answer.to_txt()
answer.to_md()
def main():
url = "http://www.zhihu.com/question/24269892"
question_test(url)
answer_url = "http://www.zhihu.com/question/24269892/answer/29960616"
answer_test(answer_url)
user_url = "http://www.zhihu.com/people/jixin"
user_test(user_url)
collection_url = "http://www.zhihu.com/collection/36750683"
collection_test(collection_url)
post_url = "http://zhuanlan.zhihu.com/p/20770968"
post_test(post_url)
column_url = "http://zhuanlan.zhihu.com/daily"
column_test(column_url)
test()
if __name__ == '__main__':
main()