ginglis13 · bji219 · Mar 13, 2023 · Sep 15, 2023 · Sep 15, 2023 · ginglis13
diff --git a/scraper.py b/scraper.py
@@ -4,12 +4,12 @@
 # pulls company information from site to save time that would be spent manually typing out the info
 # Gavin Inglis
 # January 2019
+# Updated September 2023 BJI
 
 from selenium import webdriver
-from selenium.webdriver.common.keys import Keys
-from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
 
-import zipfile
 import time
 import datetime
 import gspread
@@ -18,51 +18,39 @@
 import re
 import getpass
 
-# Get latest chromedriver zip file for mac, extract into same folder
-try:
-    version = requests.get('https://chromedriver.storage.googleapis.com/LATEST_RELEASE').text
-    url = 'https://chromedriver.storage.googleapis.com/{0}/{1}'.format(version, 'chromedriver_mac64.zip')
-    r = requests.get(url, allow_redirects=True)
-    open('chromedriver.zip', 'wb').write(r.content)
-    with zipfile.ZipFile("chromedriver.zip", "r") as zip_ref:
-        zip_ref.extractall()
-except:
-    pass
-
 '''Globals'''
-
 GOOGLE_URL = 'http://www.google.com/search'
 
 # scope of access for api
 scope = ['https://spreadsheets.google.com/feeds',
          'https://www.googleapis.com/auth/drive']
 
 # credentials file generated by google developer console when creating sheets api
-credentials = ServiceAccountCredentials.from_json_keyfile_name('PATH TO YOUR CREDENTIALS', scope)
+credentials = ServiceAccountCredentials.from_json_keyfile_name('PATH_TO_YOUR_CREDS', scope)
 gc = gspread.authorize(credentials)
 
 # login url for site
 url = 'https://www.magicformulainvesting.com/Account/LogOn'
 
+# declare driver as chrome headless instance
+service = Service()
 options = webdriver.ChromeOptions()
 options.add_argument('headless')
 
-# declare driver as chrome headless instance
-driver = webdriver.Chrome(executable_path="./chromedriver", options=options)
+driver = webdriver.Chrome(service=service, options=options)
 
 '''Functions'''
 def scrapeSite():
-
-    print("Scraping stock info...")  # update for terminal
+    print('Scraping stock info...')
 
     # find all td elements, write needed elements to file
-    trs=driver.find_elements_by_xpath('//table[@class="divheight screeningdata"]/tbody/tr')
+    trs=driver.find_elements(By.XPATH,'//table[@class="divheight screeningdata"]/tbody/tr')
 
     names = []
     tikrs = []
 
     for tr in trs:
-        td = tr.find_elements_by_xpath(".//td")
+        td = tr.find_elements(By.XPATH,".//td")
 
         company_name=td[0].get_attribute("innerHTML")
         company_tikr=td[1].get_attribute("innerHTML")
@@ -73,16 +61,15 @@ def scrapeSite():
     return names, tikrs
 
 def writeSheet(names, tikrs):
-
-    print("Writing to sheet...")  # update to terminal
+    print('Writing to sheet...')
 
     # access sheet by url
-    wks = gc.open_by_url("YOUR URL HERE").get_worksheet(1) # worksheet number
-
-    #wks.append_row([' '], table_range='A1') # append a blank line before tickers as requested by OC
-         
-    date=datetime.datetime.today().strftime('%Y-%m-%d') # current date
-    wks.append_row([date], table_range='A1') # append the date, starts in first column
+    wks = gc.open_by_url("YOUR URL HERE"
+                         "/edit?usp=sharing").get_worksheet(1)  # worksheet num 1 is Research
+
+    date=datetime.datetime.today().strftime('%Y-%m-%d')  # current date
+    # wks.append_row([date], table_range='A1')  # append the date starting in first column
+    wks.append_row([date])
 
     for i in range(len(names)):
         price = '=GOOGLEFINANCE("' + tikrs[i] + '","price")'
@@ -91,13 +78,14 @@ def writeSheet(names, tikrs):
 
         url = getUrl(query)
 
-        wks.append_row([names[i],tikrs[i], price, url], table_range='A1', value_input_option="USER_ENTERED") # start in first column
+        # wks.append_row([names[i],tikrs[i], price, url], table_range='A1', value_input_option="USER_ENTERED")
+        wks.append_row([names[i],tikrs[i], price, url], value_input_option="USER_ENTERED")
 
 def getUrl(companyName):
-    url    = GOOGLE_URL + '?q=' + companyName
+    url= GOOGLE_URL + '?q=' + companyName
     result = requests.get(url)
     # fancy regex courtesy of pbui
-    urls     = re.findall('/url\?q=([^&]*)', result.text)
+    urls= re.findall('/url\?q=([^&]*)', result.text)
     return urls[0]
 
 '''Main Execution'''
@@ -106,30 +94,31 @@ def getUrl(companyName):
 driver.get(url)
 
 # find the input elements for logging in
-username=driver.find_element_by_name("Email")
-password=driver.find_element_by_name("Password")
+username=driver.find_element(By.NAME,"Email")
+password=driver.find_element(By.NAME,"Password")
 
 # enter email and password. uses getpass to hide password (i.e. not using plaintext)
-your_email=raw_input("Please enter your email for magicformulainvesting.com: ")
+# Replace with raw_input() with input() for python 3
+your_email= input("Please enter your email for magicformulainvesting.com: ")
+
+# Have to run scaper.py from terminal so getpass will work :)
 your_password=getpass.getpass("Please enter your password for magicformulainvesting.com: ")
+
+# selenium sends info to mfi.com
 username.send_keys(your_email)
 password.send_keys(your_password)
 
-# enter email and password (for hard coding only)
-# username.send_keys("EMAIL")
-# password.send_keys("PASSWORD")
-
 # click login button
-button=driver.find_element_by_name("login")
+button=driver.find_element(By.NAME,"login")
 button.click()
 
-time.sleep(1) # seconds
+time.sleep(1)  # seconds
 
-# use xpathing to find the radio button element for 50 stocks and click it
-radio = driver.find_element_by_xpath('//input[@value="false" and contains(@name,"Select30")]')
+# use xpath to find the radio button element for 50 stocks and click it
+radio = driver.find_element(By.XPATH,'//*[@id="Select30" and @value="false"]')
 radio.click()
 
-button2=driver.find_element_by_name("stocks")
+button2=driver.find_element(By.NAME,"stocks")
 button2.click()
 
 time.sleep(.5)