-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfunctions.php
219 lines (168 loc) · 6.28 KB
/
functions.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
<?php
//Global veriable to hold the array of data used to create the chart
$chartData;
//Gets the required information out of the request
function get_or_default($arr, $key, $default) {
if(isset($arr[$key])) {
return $arr[$key];
} else {
return $default;
}
}
//function to take a serch query and a minimum year
function searchScholarMinYear($ThingToSearchFor, $year){
//Create an array to hold values
$chartArray = array();
//While the submitted year is less then the current year
for(;$year <= date("Y"); $year++){
//get the value for the year
$temp = searchScholar($ThingToSearchFor, $year);
//add the year to the associative array
$chartArray[$year] = $temp.",";
}
//build the charts array
buildChart($chartArray);
}
//takes a php associatve array and converts it to work with google charts
function buildChart($data){
//Use the global charts veriable
global $chartData ;
//transfrom the old php array into a JSON object
$chartDataTemp = json_encode($data);
//using string replacement convert the JSON to the exact array type for google charts
$chartDataTemp = str_replace('{', '[', $chartDataTemp);
$chartDataTemp = str_replace('}', '],', $chartDataTemp);
$chartDataTemp = str_replace(':', ',', $chartDataTemp);
$chartDataTemp = str_replace('"', '', $chartDataTemp);
$chartDataTemp = str_replace(',,', ',],[', $chartDataTemp);
$chartDataTemp = substr($chartDataTemp, 0 ,-1);
//set the chartdata to be the converted JSON object
$chartData = $chartDataTemp;
}
//Take a thing to search for and a year to search for
function searchScholar($ThingToSearchFor, $year){
//Replace any spaces with + signs for the url
$formattedThingToSearchFor = str_replace(' ', '+', $ThingToSearchFor);
//URL component parts
//I am going to assume that google won't change there URL any time soon....
$baseURL = 'https://scholar.google.com.au/scholar?q="';
$yearlower = '"&hl=en&as_sdt=0%2C5&as_ylo=';
$yearHigh = '&as_yhi=';
//Create the actual request
$requestToMake = $baseURL.$formattedThingToSearchFor.$yearlower.$year.$yearHigh.$year;
//sends off the request to get the page
$html = file_get_contents($requestToMake);
//gets the first instance of the phrase we want, we are using about to find the navbar
$pos = stripos ($html , 'About');
//remove everything before about inclusive and only give us the next 50 charters
$total = substr ($html , $pos+6, 50);
//find the end of our number by searching for the next element
$sPos = stripos($total, '<b>');
//remove everything after the number
$output = substr($total, 0 ,$sPos-10);
//remove the comma in the middle of the number and return just the number
return str_replace(',', '', $output);
}
?>
<!-- HTML5 doctype declaration -->
<!DOCTYPE html>
<html>
<head>
<!-- if this is the first page load or after we have made a request -->
<?php if(isset($_POST['query'])){
//get the value sent or replace it with the default
$query = get_or_default($_POST, 'query', '');
$startYear = get_or_default($_POST, 'year', '');
//invokes the search for the papers
searchScholarMinYear($query , $startYear);
}?>
<!-- Page title -->
<title>Google Scholar Scaper</title>
<!-- if this is the first page load or after we have made a request -->
<?php if(isset($_POST['query'])){?>
<!--Google charts stuff, by having it inside an if statement like we have it only gets added to the page after the first load -->
<script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
<script type="text/javascript">
google.charts.load('current', {'packages':['corechart']});
google.charts.setOnLoadCallback(drawChart);
function drawChart() {
var data = google.visualization.arrayToDataTable([
['Year', 'papers'],
//Puts the data into the chart
<?php global $chartData; echo $chartData ?>
]);
var options = {
title: 'Number of papers per year',
hAxis: {title: 'Year', titleTextStyle: {color: '#333'}},
vAxis: {minValue: 0}
};
var chart = new google.visualization.AreaChart(document.getElementById('chart_div'));
chart.draw(data, options);
}
</script>
<!-- Ends the php if statement -->
<?php }?>
<!-- some basic css styles to improve the look of the page -->
<style>
* {
text-align: center;
color: DarkSlateGray;
}
li{
list-style: none;
}
input {
text-align: left;
}
</style>
</head>
<body>
<section>
<!-- Title heading and information -->
<h1>Google Scholar Scraper and graph</h1>
<p>
This page exists as a way to quickly graph a year over year view of the use of a phrase in papers, this is useful to get a sense of when a field boomed
note, it may take awhile for the page to reload after clicking submit <br><br>
Note if you send off too many requests too quickly google will think you are a robot and get really mad and break everything.... <br><br>
This should fixitself after a few days
</p>
</section>
<!-- Form to take info from the user -->
<section>
<!-- Post the form back to this page -->
<form action="functions.php" method="POST">
<ul>
<li>
<!-- Fields for the thing to search for -->
<label for="Query">Query</label>
<input type="text" size="50" name="query" id="query">
</li>
<li>
<!-- Fields for the year to search for -->
<label for="year">Start Year</label>
<input type="number" size="10" name="year" id="year">
</li>
</ul>
<!-- submit button-->
<button>Search scholar</button>
</form>
</section>
<!-- Chart / raw data section -->
<section>
<!-- Chart canvas -->
<div id="chart_div" style="width: 100%; height: 500px;"></div>
<!-- Will print out the raw data -->
<?php
//if this is the first page load or after we have made a request
if(isset($_POST['query'])){
//Use the global veriable
global $chartData;
//Loop through all the values and plot them on a chart
foreach ($chartData as $key => $value) {
echo $key." had ".$value." number of papers<br>";
}
}
?>
</section>
</body>
</html>