-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathPageRankImplementation.py
109 lines (95 loc) · 4.28 KB
/
PageRankImplementation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from src.parse import htmlParser as parse
from src.purifier import purifier as pure
from src.uniq_purifier import unqpurifier as unqpure
from src.createPdf import pdfcreator as PdfC
import os
import numpy as np
from src.pagerankutils.utils import *
from numpy import linalg as LA
from copy import deepcopy
from tkinter import *
import tkinter.messagebox
def sitesGui(numSites):
mw = Tk()
mw.title("Pagerank Implementation") #window's title
rows = []
for i in range(numSites):
cols = []
for j in range(1):
e = Entry(relief=RIDGE,bd=5)
e.grid(row=i+1, column=j+1, sticky=NSEW) #create window's widgets
e.insert(END, '%s' % 'http://www.')
cols.append(e)
rows.append(cols)
def getSites():
urls=[]
n=0
for row in rows:
for col in row:
urls.append(col.get()) #get widgets' input as string
n=n+1
print(urls)
mw.destroy() #destroy window
A=np.zeros((n,n), dtype=float) #create A-matrix as a zero matrix
urls2=urls
col=0
for link in urls:
basetocheck=unqpure.getBaseToCheck(link) #find base urls (for example: 'http://www.rt.com/ base is 'rt'
checkin, outlinks=unqpure.find_outlinks(link, False, basetocheck, 1) #find all outlinks with first parser ((link, False, basetocheck, 2) for second parser
print(outlinks)
if (checkin): #if there is at least one inlink
A[col,col]=1
counter=0
for otherLink in urls2: #check which urls belong to link's outlink list
if otherLink!=link:
counter=counter+1
if otherLink in outlinks:
A[counter,col]=1 #if this url belong to link's outlink mark 1 in the corresponding collumn and row
col=col+1
print(A)
A=getAready(A,n) #we make our matrix collumn stohastic by dividing every collumn's elements with the total number of non-zero elemnts in that collumn
print(A)
A=removeSpiderTraps(A,n) #We gurantee that our graph is connected
ranking=getRank(A,n) #get the sites' ranking
path=os.path.dirname(os.path.abspath(".")) #that's the path to .../PageRankImplementation/src
path = path + "/PageRankImplementation"
print("The ranking of the sites' is:")
print(ranking)
source=path #that's the path to .../PageRankImplementation/src/main
destination=path+'/getRanking' #that's the path to .../PageRankImplementation/getRanking
PdfC.CreateRankPdf(ranking,urls,source,destination) #call function to create pdf with your sites' ranking
Button(text='give Sites', command=getSites).grid()
mainloop()
#---------------------------------------------------------------------------#
print("Welcome to BurnYourPc project of PageRank implementation..!")
print(" ")
answer=input("Sites in txt[1] or input-window[2]\n")
answer=int(answer)
path=os.path.dirname(os.path.abspath(".")) #that's the path to .../PageRankImplementation/src
path = path+"/PageRankImplementation"
print(path)
if (answer==1):
input("Edit the 'sites.txt' in getRanking folder and press enter\n")
path2txt=path[0:len(path)-3]+'getRanking/sites.txt' #the path to the sites' txt
myfile = open(path2txt, 'r')
urls = []
counter = 0
for line in myfile:
counter=counter+1
site=line
site=site[0:(len(site)-1)]
urls.append(site) #append sites from txt in url list
print(urls)
ranking= rankUrls(urls, counter) #call function to compute the sites' ranking
print(ranking)
source = path #that's the path to .../PageRankImplementation/src/main
destination = path + '/getRanking' #that's the path to .../PageRankImplementation/getRanking
print(destination)
PdfC.CreateRankPdf(ranking, urls, source, destination) #call function to create pdf with your sites' ranking
elif (answer == 2):
num=input("How many sites do you want to rank?\n") #give number of sites you want to rank
num=int(num)
sitesGui(num) #create a window for input urls
else:
print(" ")
print("Wrong inputs! Try again..")