from src.parse import htmlParser as parse from src.purifier import purifier as pure from src.uniq_purifier import unqpurifier as unqpure from src.createPdf import pdfcreator as PdfC import os import numpy as np from src.pagerankutils.utils import * from numpy import linalg as LA from copy import deepcopy from tkinter import * import tkinter.messagebox def sitesGui(numSites): mw = Tk() mw.title("Pagerank Implementation") #window's title rows = [] for i in range(numSites): cols = [] for j in range(1): e = Entry(relief=RIDGE,bd=5) e.grid(row=i+1, column=j+1, sticky=NSEW) #create window's widgets e.insert(END, '%s' % 'http://www.') cols.append(e) rows.append(cols) def getSites(): urls=[] n=0 for row in rows: for col in row: urls.append(col.get()) #get widgets' input as string n=n+1 print(urls) mw.destroy() #destroy window A=np.zeros((n,n), dtype=float) #create A-matrix as a zero matrix urls2=urls col=0 for link in urls: basetocheck=unqpure.getBaseToCheck(link) #find base urls (for example: 'http://www.rt.com/ base is 'rt' checkin, outlinks=unqpure.find_outlinks(link, False, basetocheck, 1) #find all outlinks with first parser ((link, False, basetocheck, 2) for second parser print(outlinks) if (checkin): #if there is at least one inlink A[col,col]=1 counter=0 for otherLink in urls2: #check which urls belong to link's outlink list if otherLink!=link: counter=counter+1 if otherLink in outlinks: A[counter,col]=1 #if this url belong to link's outlink mark 1 in the corresponding collumn and row col=col+1 print(A) A=getAready(A,n) #we make our matrix collumn stohastic by dividing every collumn's elements with the total number of non-zero elemnts in that collumn print(A) A=removeSpiderTraps(A,n) #We gurantee that our graph is connected ranking=getRank(A,n) #get the sites' ranking path=os.path.dirname(os.path.abspath(".")) #that's the path to .../PageRankImplementation/src path = path + "/PageRankImplementation" print("The ranking of the sites' is:") print(ranking) source=path #that's the path to .../PageRankImplementation/src/main destination=path+'/getRanking' #that's the path to .../PageRankImplementation/getRanking PdfC.CreateRankPdf(ranking,urls,source,destination) #call function to create pdf with your sites' ranking Button(text='give Sites', command=getSites).grid() mainloop() #---------------------------------------------------------------------------# print("Welcome to BurnYourPc project of PageRank implementation..!") print(" ") answer=input("Sites in txt[1] or input-window[2]\n") answer=int(answer) path=os.path.dirname(os.path.abspath(".")) #that's the path to .../PageRankImplementation/src path = path+"/PageRankImplementation" print(path) if (answer==1): input("Edit the 'sites.txt' in getRanking folder and press enter\n") path2txt=path[0:len(path)-3]+'getRanking/sites.txt' #the path to the sites' txt myfile = open(path2txt, 'r') urls = [] counter = 0 for line in myfile: counter=counter+1 site=line site=site[0:(len(site)-1)] urls.append(site) #append sites from txt in url list print(urls) ranking= rankUrls(urls, counter) #call function to compute the sites' ranking print(ranking) source = path #that's the path to .../PageRankImplementation/src/main destination = path + '/getRanking' #that's the path to .../PageRankImplementation/getRanking print(destination) PdfC.CreateRankPdf(ranking, urls, source, destination) #call function to create pdf with your sites' ranking elif (answer == 2): num=input("How many sites do you want to rank?\n") #give number of sites you want to rank num=int(num) sitesGui(num) #create a window for input urls else: print(" ") print("Wrong inputs! Try again..")