Skip to content

Commit

Permalink
Change in Scrapper and minor bug solve
Browse files Browse the repository at this point in the history
  • Loading branch information
hvudeshi committed Nov 4, 2021
1 parent 90349cc commit 0e724de
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 24 deletions.
2 changes: 1 addition & 1 deletion Code/Database/schema/srijas.sql
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ INSERT INTO `skill_master` (`skill_id`, `skill_title`, `is_active`, `created_by`
(50, 'Hadoop', b'1', -1, '2021-11-01 04:44:31', 0, '2021-11-01 04:44:31'),
(51, 'Kafka', b'1', -1, '2021-11-01 04:44:31', 0, '2021-11-01 04:44:31'),
(52, 'Cassandra', b'1', -1, '2021-11-01 04:44:31', 0, '2021-11-01 04:44:31'),
(53, 'Elasticeearch', b'1', -1, '2021-11-01 04:44:31', 0, '2021-11-01 04:44:31');
(53, 'Elasticsearch', b'1', -1, '2021-11-01 04:44:31', 0, '2021-11-01 04:44:31');


-- --------------------------------------------------------
Expand Down
26 changes: 14 additions & 12 deletions Code/Scrapper/Scrapper_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,21 +48,21 @@ def get_location(connection):
cursor=connection.cursor()
cursor.execute(sql_select_query)
records2=cursor.fetchall()
return records2[0]
return records2[-1][0]

def get_threshold(connection):
sql_select_query = "select user_threshold from user_master um join user_resume ur where um.user_id=ur.user_id"
cursor=connection.cursor()
cursor.execute(sql_select_query)
records2=cursor.fetchall()
return records2[0]
return records2[-1][0]

def get_role(connection):
sql_select_query = "select job_title from job_master jm join user_master um where jm.job_id=um.user_preferred_job_id"
cursor=connection.cursor()
cursor.execute(sql_select_query)
records2=cursor.fetchall()
return records2[0]
return records2[-1][0]

def get_resume_skills(connection):
sql_select_Query2="select resume_id,skill_id from resume_skills where is_active=1"
Expand Down Expand Up @@ -102,16 +102,18 @@ def get_emailing_list(connection):
#print(resume_skills)
email_id_list = get_emailing_list(connection)
# print(email_list)
location = get_location(connection)
role = get_role(connection)
location = str(get_location(connection))
role = str(get_role(connection))
print(role)
no_of_jobs_to_retrieve = 5
match_threshold = get_threshold(connection)
# final_result_linkedIn = sl.get_job_description(resume_skills,all_skills, match_threshold, role, location, no_of_jobs_to_retrieve, data)
final_result_glassdoor = sg.get_job_description(resume_skills,all_skills, match_threshold, role, location, no_of_jobs_to_retrieve, data)
# final_result_indeed = si.get_job_description(resume_skills,all_skills, match_threshold, role, location, no_of_jobs_to_retrieve, data)

# final_results = final_result_linkedIn + final_result_glassdoor + final_result_indeed
match_threshold = int(get_threshold(connection))
# role_name_linkedIn, final_result_linkedIn = sl.get_job_description(resume_skills,all_skills, match_threshold, role, location, no_of_jobs_to_retrieve, data)
job_role, final_result_glassdoor = sg.get_job_description(resume_skills,all_skills, match_threshold, role, location, no_of_jobs_to_retrieve, data)
# role_name_indeed, final_result_indeed = si.get_job_description(resume_skills,all_skills, match_threshold, role, location, no_of_jobs_to_retrieve, data)
print(final_result_glassdoor)
ea.sendmail(final_result_glassdoor,email_id_list)
# final_results = final_result_linkedIn + final_result_glassdoor + final_result_indeed
# role_name = role_name_linkedIn + role_name_glassdoor + role_name_indeed

# ea.sendmail(final_result_linkedIn,email_id_list,role_name_linkedIn)


6 changes: 3 additions & 3 deletions Code/Scrapper/email_alert.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from socket import gaierror
import json

def sendmail(final_result,email_id_list):
def sendmail(final_result,email_id_list, job_role):
port = 587
smtp_server = "smtp.gmail.com"
login = "[email protected]"
Expand All @@ -29,8 +29,8 @@ def sendmail(final_result,email_id_list):
print(link)
pre = """<a href='"""
embedded_link = link
post = """'>View Position</a>"""
temp_str += (str(counter) + ". " + pre + embedded_link + post+ '\n')
post = """'>Click here</a>"""
temp_str += (str(counter) + ". " + job_role[counter-1] + ': ' + pre + embedded_link + post+ '\n')
counter += 1
body += temp_str
msg.attach(MIMEText(body, 'html'))
Expand Down
14 changes: 9 additions & 5 deletions Code/Scrapper/scrapper_glassdoor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

def get_job_description(resume_skills,all_skills, match_threshold, role, location, no_of_jobs_to_retrieve, data):
options = Options()
options.add_argument("--window-size-1920,1200")
options.add_argument('--headless')
# options.add_argument("--window-size-1920,1200")
options.headless = True
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome (options=options,executable_path=ChromeDriverManager().install())
Expand All @@ -24,7 +24,7 @@ def get_job_description(resume_skills,all_skills, match_threshold, role, locatio
job_urls = []
c=0
job_buttons = driver.find_elements_by_xpath('.//a[@class = "jobLink job-search-key-1rd3saf eigr9kq1"]') #jl for Job Listing. These are the buttons we're going to click.
# time.sleep(2)
time.sleep(2)
print(len(job_buttons))
for text in job_buttons:
if text.get_attribute('href'): ### get all the job postings URL's
Expand All @@ -36,17 +36,21 @@ def get_job_description(resume_skills,all_skills, match_threshold, role, locatio
final_dict = {}

# ========== Iterate through each url and get the job description =================================

job_role = []
for i in job_urls:
time.sleep(5)
jobs = []
driver.get(i)
button = driver.find_element_by_xpath('//*[@id="JobDescriptionContainer"]/div[2]')
button.click()
job_description = driver.find_element_by_xpath('//*[@id="JobDescriptionContainer"]/div[1]').text
jobs.append(job_description)
final_dict[i] = job_description
job_title=driver.find_element_by_xpath("//div[@class='css-17x2pwl e11nt52q6']").text
company_details=driver.find_element_by_xpath("//div[@class='css-16nw49e e11nt52q1']").text
job_role.append(job_title,company_details)

final_result = ke.get_user_id_to_list_of_job_ids(resume_skills,final_dict,all_skills,match_threshold)

return final_result
return job_role, final_result

10 changes: 7 additions & 3 deletions Code/Scrapper/scrapper_linkedIn.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,20 @@ def get_job_description(resume_skills,all_skills, match_threshold, role, locatio
# role = role.replace(' ', '%20')
#Form a dynamic URL to fetch the details using Beautiful soup for the given filters
url = "https://www.linkedin.com/jobs/jobs-in-"+location+"?keywords="+role+"&f_JT=F%2CP&f_E=1%2C3&position=1&pageNum=0"

url = url.replace(' ', '%20')
print(url)

# Add number of jobs to retrieve to limit
limit = no_of_jobs_to_retrieve

k1 = requests.get(url)
# Run the beautiful soup
soup1 = BeautifulSoup(k1.content, 'html.parser')

print(soup1)
string1 = soup1.find_all("a",{"class":"base-card__full-link"})
print(string1)
description_dict = {}
job_role = []
for i in range(len(string1)):
if role.lower() in string1[i].get_text().lower() and limit>0:
dictionary = {}
Expand All @@ -37,6 +40,7 @@ def get_job_description(resume_skills,all_skills, match_threshold, role, locatio
#Replace the next line parameter with the blank space
#Iterate the different job suggestions according to the given filters and fetch description for the jobs matching the search criteria of the user given.
dictionary["Job Link"] = string1[i]['href']
job_role.append(dictionary["Job Title"])
limit-=1
k = requests.get(string1[i]['href']).text
soup=BeautifulSoup(k,'html.parser')
Expand All @@ -48,4 +52,4 @@ def get_job_description(resume_skills,all_skills, match_threshold, role, locatio

final_result=ke.get_user_id_to_list_of_job_ids(resume_skills,description_dict,all_skills,match_threshold)

return final_result
return job_role, final_result

0 comments on commit 0e724de

Please sign in to comment.