Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,26 @@
# Upwatch
An Upwork webscraper that will notify you of newly published job posts in your field of work

# PyQt5

# MyPy
python3 -m pip install mypy

mypy *.py


# PyQt5 Stub files for MyPy
python3 -m pip install PyQt5-stubs


# black
python3 -m pip install black

black *.py



#LOGIC
#BeautifulSoup
#Requests
#LXML
55 changes: 39 additions & 16 deletions upwatch.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,41 @@
import requests
from bs4 import BeautifulSoup # type: ignore
from typing import TypedDict
import requests
import json
import time
import pathlib
from typing import Any, List

# TODO: use TypedDict for these
JsonContent = Any
JobPost = Any
from typing import Optional, List, Tuple

# !import re # For looking for eventual word counts in job posts & controlling the validity of url input.

JobPost = TypedDict(
"JobPost",
{
"Job Title": str,
"Payment Type": str,
"Budget": str,
"Job Description": str,
"Job Post URL": str,
},
)

JsonContent = TypedDict(
"JsonContent",
{
"Requests URL": str,
"Run on startup": bool,
"Scrape interval": int,
"DBMR": bool,
"Fixed Lowest Rate": int,
"Hourly Lowest Rate": int,
"Ignore no budget": bool,
"Job Posts": Optional[List[JobPost]],
},
)


# TODO: Add to json: user agent
def read_from_json(json_path: pathlib.Path) -> JsonContent:
def read_from_json(json_path: pathlib.Path) -> Tuple[JsonContent, bool]:
""" Reads all the job posts from job_posts.json """
try:
with open(json_path / "job_posts.json", "r") as job_posts_json:
Expand Down Expand Up @@ -84,6 +106,7 @@ def json_difference_checker(
"""Checks the difference between current scrape and job posts
stored in json to print any new job posts"""

assert json_content["Job Posts"] is not None
old_job_urls = [job_post["Job Post URL"] for job_post in json_content["Job Posts"]]

new_job_posts = [
Expand Down Expand Up @@ -124,14 +147,14 @@ def job_post_scraper(json_content: JsonContent) -> List[JobPost]:
) # TODO: Figure out how to fetch User Agent on current system.
response.raise_for_status()
break
# except requests.exceptions.HTTPError as errh: # TODO Error messages need to be communicated to user in a different way.
# print("HTTP Error:", errh)
# print("Please try a different URL")
# return
# except requests.exceptions.ConnectionError:
# print("Error Connecting")
# print("Please check you internet connection and try again.")
# return
# except requests.exceptions.HTTPError as errh: # TODO Error messages need to be communicated to user in a different way.
# print("HTTP Error:", errh)
# print("Please try a different URL")
# return
# except requests.exceptions.ConnectionError:
# print("Error Connecting")
# print("Please check you internet connection and try again.")
# return
except requests.exceptions.Timeout:
print("Your request timed out.")
if connection_attempts == 3:
Expand Down Expand Up @@ -163,7 +186,7 @@ def job_post_scraper(json_content: JsonContent) -> List[JobPost]:

job_post_url = job_post.find("a", class_="job-title-link").attrs["href"]

job_post_dict = {
job_post_dict: JobPost = {
"Job Title": job_title,
"Payment Type": job_payment_type,
"Budget": job_budget,
Expand Down
Loading