-
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathharvestFrom20Q.php
75 lines (67 loc) · 2.79 KB
/
harvestFrom20Q.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
<?php
header('Content-type: text/plain');
################################################################################
## This is your scraping framework
################################################################################
class Scraper {
var $referrer;
var $page;
function loadUrl($url, $post = '')
{
if (preg_match('|^/|', $url)) {
$url = preg_replace('|(://[^/]+)/.*$|', '$1', $this->referrer) . $url;
}
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_REFERER, $this->referrer);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
if (!empty($post)) {
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post);
}
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch,CURLOPT_USERAGENT,'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
$this->referrer = $url;
$this->page = curl_exec($ch);
echo "##\n## LOADED URL {$url}\n##\n{$this->page}\n\n";
}
function preg_extract_first($pattern)
{
if (!preg_match($pattern, $this->page, $matches)) return null;
if (count($matches) < 2) return null;
echo "##\n## EXTRACTED: {$pattern}\n## GOT: {$matches[1]}\n##\n\n";
return $matches[1];
}
}
################################################################################
## Now get to work
################################################################################
$scraper = new Scraper();
$scraper->referrer = 'http://y.20q.net/play';
// Load login page
$scraper->loadUrl('http://y.20q.net/gsq-en');
$actionUrl = $scraper->preg_extract_first('/<form method=post action="([^"]+)">/');
// Post form to start game
$scraper->loadUrl($actionUrl, ['submit' => ' Play ']);
$i = 1;
do {
$scraper->page = preg_replace('|<hr.*|', '', $scraper->page);
$buttons = [];
$buttons[] = $scraper->preg_extract_first('|<a href="([^"]+)" [^>]*>Unknown</a></nobr>|');
$buttons[] = $scraper->preg_extract_first('|<a href="([^"]+)" [^>]*> Yes </a>|');
$buttons[] = $scraper->preg_extract_first('|<a href="([^"]+)" [^>]*> No </a>|');
$buttons[] = $scraper->preg_extract_first('|<a href="([^"]+)"[^>]*>Right</a>|');
$buttons = array_filter($buttons);
if (!count($buttons)) break;
$url = $buttons[array_rand($buttons)];
$scraper->loadUrl($url);
$i++;
} while($i < 25);
#TODO: Output the stuff we learned, like:
#
# <big><b>You were thinking of gunpowder.</b></big><br>
# You said it's classified as Unknown, 20Q was taught by other players that the answer is Mineral.<br>
# Is it shiny? You said Yes, 20Q was taught by other players that the answer is No.<br>
# Do you use it when it rains? You said No, 20Q was taught by other players that the answer is Yes.<br>
#