diff --git a/.DS_Store b/.DS_Store index 5b928bb7..c4187498 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.gitignore b/.gitignore index 41635879..ad4d0412 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,10 @@ service-account-credentials.json # File Types *.env *.zip + +# Python bytecode +__pycache__/ +*.pyc + +# macOS +.DS_Store \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index efb2be08..0263584f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,7 +3,26 @@ services: image: cornellappdev/transit-dev:${IMAGE_TAG} env_file: .env volumes: - - ./service-account-credentials.json:/app/service-account-credentials.json:ro + - .:/usr/src/app + - /usr/src/app/node_modules + + ghopper: + image: cornellappdev/transit-ghopper:03_01_25 ports: - - "8080:3000" - + - "8988:8988" + + map: + image: cornellappdev/transit-map + ports: + - "8989:8989" + + ghopper-walking: + image: cornellappdev/transit-ghopper-walking:latest + ports: + - "8987:8987" + + live-tracking: + image: cornellappdev/transit-python:03_01_25 + env_file: python.envrc + ports: + - "5000:5000" diff --git a/package-lock.json b/package-lock.json index 826744f3..605701ae 100644 --- a/package-lock.json +++ b/package-lock.json @@ -3242,6 +3242,7 @@ "resolved": "https://registry.npmjs.org/nodemon/-/nodemon-3.1.9.tgz", "integrity": "sha512-hdr1oIb2p6ZSxu3PB2JWWYS7ZQ0qvaZsc3hK8DR8f02kRzc8rjYmxAIvdz+aYC+8F2IjNaB7HMcSDg8nQpJxyg==", "dev": true, + "license": "MIT", "dependencies": { "chokidar": "^3.5.2", "debug": "^4", diff --git a/python.envrc b/python.envrc new file mode 100644 index 00000000..f426cffc --- /dev/null +++ b/python.envrc @@ -0,0 +1,5 @@ +TOKEN=TOKEN +TWITTER_KEY=TWITTER_KEY +TWITTER_KEY_SECRET=TWITTER_KEY_SECRET +TWITTER_TOKEN=TWITTER_TOKEN +TWITTER_TOKEN_SECRET=TWITTER_TOKEN_SECRET diff --git a/src/.DS_Store b/src/.DS_Store index a39c96c4..13a4eedd 100644 Binary files a/src/.DS_Store and b/src/.DS_Store differ diff --git a/src/controllers/EcosystemController.js b/src/controllers/EcosystemController.js index b302eb29..4dc6ed8e 100644 --- a/src/controllers/EcosystemController.js +++ b/src/controllers/EcosystemController.js @@ -25,4 +25,15 @@ router.get("/printers", async (req, res) => { } }); +// Fetch all restaurants +router.get("/restaurants", async (req, res) => { + try { + const restaurants = await EcosystemUtils.fetchAllRestaurants(); + res.status(200).json({ success: true, data: restaurants }); + } catch (error) { + console.error("Error fetching restaurants:", error.message); + res.status(500).json({ error: "Failed to fetch restaurants" }); + } +}); + export default router; diff --git a/src/data/.DS_Store b/src/data/.DS_Store new file mode 100644 index 00000000..8230c96c Binary files /dev/null and b/src/data/.DS_Store differ diff --git a/src/data/db/__pycache__/database.cpython-312.pyc b/src/data/db/__pycache__/database.cpython-312.pyc new file mode 100644 index 00000000..bdff2262 Binary files /dev/null and b/src/data/db/__pycache__/database.cpython-312.pyc differ diff --git a/src/data/db/__pycache__/models.cpython-312.pyc b/src/data/db/__pycache__/models.cpython-312.pyc new file mode 100644 index 00000000..ec7e1f0c Binary files /dev/null and b/src/data/db/__pycache__/models.cpython-312.pyc differ diff --git a/src/data/db/database.py b/src/data/db/database.py index 8ef6331f..608300a0 100644 --- a/src/data/db/database.py +++ b/src/data/db/database.py @@ -32,5 +32,18 @@ def insert_printer(location, description, latitude, longitude): VALUES (?, ?, ?, ?) ''', (location, description, latitude, longitude)) + conn.commit() + conn.close() + +def insert_restaurant(name, category, address, latitude, longitude, image_url, web_url): + """Insert a restaurant into the database.""" + conn = get_db_connection() + cursor = conn.cursor() + + cursor.execute(''' + INSERT OR IGNORE INTO restaurants (name, category, address, latitude, longitude, image_url, web_url) + VALUES (?, ?, ?, ?, ?, ?, ?) + ''', (name, category, address, latitude, longitude, image_url, web_url)) + conn.commit() conn.close() \ No newline at end of file diff --git a/src/data/db/models.py b/src/data/db/models.py index d35f9269..69567993 100644 --- a/src/data/db/models.py +++ b/src/data/db/models.py @@ -30,6 +30,19 @@ def create_tables(): ) ''') + cursor.execute(''' + CREATE TABLE IF NOT EXISTS restaurants ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT UNIQUE, + category TEXT, + address TEXT, + latitude REAL, + longitude REAL, + image_url TEXT, + web_url TEXT + ) + ''') + conn.commit() conn.close() diff --git a/src/data/scrapers/__pycache__/libraries.cpython-312.pyc b/src/data/scrapers/__pycache__/libraries.cpython-312.pyc new file mode 100644 index 00000000..3781a20b Binary files /dev/null and b/src/data/scrapers/__pycache__/libraries.cpython-312.pyc differ diff --git a/src/data/scrapers/__pycache__/printers.cpython-312.pyc b/src/data/scrapers/__pycache__/printers.cpython-312.pyc new file mode 100644 index 00000000..8a76130d Binary files /dev/null and b/src/data/scrapers/__pycache__/printers.cpython-312.pyc differ diff --git a/src/data/scrapers/__pycache__/restaurants.cpython-312.pyc b/src/data/scrapers/__pycache__/restaurants.cpython-312.pyc new file mode 100644 index 00000000..733477b7 Binary files /dev/null and b/src/data/scrapers/__pycache__/restaurants.cpython-312.pyc differ diff --git a/src/data/scrapers/libraries.py b/src/data/scrapers/libraries.py index 51a7eb84..20fe79be 100644 --- a/src/data/scrapers/libraries.py +++ b/src/data/scrapers/libraries.py @@ -1,26 +1,32 @@ -import requests +from playwright.sync_api import sync_playwright from bs4 import BeautifulSoup -# URL of the CU Print directory page -URL = "https://www.cornell.edu/about/maps/directory/?notes=Library&caption=%20Libraries" +URL = "https://www.cornell.edu/about/maps/directory/?layer=Library&sublayer=" def scrape_libraries(): - # Send a GET request to fetch the HTML content - response = requests.get(URL) - soup = BeautifulSoup(response.text, 'html.parser') + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + page = browser.new_page() + page.goto(URL, wait_until="networkidle") - # Locate the table + # Get the rendered HTML after JS loads + content = page.content() + browser.close() + + soup = BeautifulSoup(content, 'html.parser') table = soup.find("table", {"id": "directoryTable"}) - rows = table.find("tbody").find_all("tr") + if not table: + print("Could not find the table.") + return [] - # Extract data + rows = table.find("tbody").find_all("tr") data = [] for row in rows: cols = row.find_all("td") if len(cols) < 3: continue - - location_name = cols[0].text.strip().split('\n\n\n')[0] + + location_name = cols[0].text.strip() address = cols[1].text.strip() coordinates_string = cols[2].text.strip() coordinates = [float(x) for x in coordinates_string.split(', ')] @@ -30,5 +36,8 @@ def scrape_libraries(): "Address": address, "Coordinates": coordinates }) - - return data \ No newline at end of file + + return data + +if __name__ == "__main__": + scrape_libraries() \ No newline at end of file diff --git a/src/data/scrapers/printers.py b/src/data/scrapers/printers.py index e972046f..1cc7242b 100644 --- a/src/data/scrapers/printers.py +++ b/src/data/scrapers/printers.py @@ -1,37 +1,33 @@ -import requests -from bs4 import BeautifulSoup - -# URL of the CU Print directory page -URL = "https://www.cornell.edu/about/maps/directory/?layer=CUPrint&caption=%20CU%20Print%20Printers" # Replace with the actual URL +from playwright.sync_api import sync_playwright def scrape_printers(): - # Send a GET request to fetch the HTML content - response = requests.get(URL) - soup = BeautifulSoup(response.text, 'html.parser') + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + page = browser.new_page() + page.goto("https://www.cornell.edu/about/maps/directory/?layer=CUPrint") + + # Wait for the dynamic table to load + page.wait_for_selector("table#directoryTable") + + rows = page.query_selector_all("table#directoryTable > tbody > tr") + data = [] - # Locate the table - table = soup.find("table", {"id": "directoryTable"}) - rows = table.find("tbody").find_all("tr") + for row in rows: + cols = row.query_selector_all("td") + if len(cols) < 3: + continue + location = cols[0].inner_text().strip() + description = cols[1].inner_text().strip() + coordinates = [float(x.strip()) for x in cols[2].inner_text().split(",")] - # Extract data - data = [] - for row in rows: - cols = row.find_all("td") - if len(cols) < 3: # Ensure row has enough columns - continue - - location_name = cols[0].text.strip() - description = cols[1].text.strip() - - # Extract coordinates from the hyperlink tag inside - coordinates_link = cols[2].find("a") - coordinates_string = coordinates_link.text.strip() if coordinates_link else "" - coordinates = [float(x) for x in coordinates_string.split(', ')] + data.append({ + "Location": location, + "Description": description, + "Coordinates": coordinates + }) + browser.close() + return data - data.append({ - "Location": location_name, - "Description": description, - "Coordinates": coordinates - }) - return data \ No newline at end of file +if __name__ == "__main__": + scrape_printers() \ No newline at end of file diff --git a/src/data/scrapers/restaurants.py b/src/data/scrapers/restaurants.py new file mode 100644 index 00000000..d3307302 --- /dev/null +++ b/src/data/scrapers/restaurants.py @@ -0,0 +1,70 @@ +import requests +import pprint +from playwright.sync_api import sync_playwright + +def scrape_restaurants(): + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + context = browser.new_context() + page = context.new_page() + page.goto("https://www.visitithaca.com") + page.wait_for_timeout(1000) + + # Get a fresh token from token endpoint + token_response = context.request.get("https://www.visitithaca.com/plugins/core/get_simple_token/") + + token = token_response.text() + + # Build your API request URL with the new token + api_url = ( + "https://www.visitithaca.com/includes/rest_v2/plugins_listings_listings/find/" + "?json=%7B%22filter%22%3A%7B%22%24and%22%3A%5B%7B%22filter_tags%22%3A%7B%22%24in%22%3A%5B" + "%22site_primary_subcatid_307%22%2C%22site_primary_subcatid_308%22%2C%22site_primary_subcatid_309%22%2C" + "%22site_primary_subcatid_311%22%2C%22site_primary_subcatid_312%22%2C%22site_primary_subcatid_504%22%2C" + "%22site_primary_subcatid_505%22%2C%22site_primary_subcatid_506%22%2C%22site_primary_subcatid_508%22%2C" + "%22site_primary_subcatid_509%22%2C%22site_primary_subcatid_510%22%2C%22site_primary_subcatid_511%22%2C" + "%22site_primary_subcatid_512%22%2C%22site_primary_subcatid_513%22%2C%22site_primary_subcatid_514%22%2C" + "%22site_primary_subcatid_516%22%2C%22site_primary_subcatid_520%22%2C%22site_primary_subcatid_532%22%2C" + "%22site_primary_subcatid_536%22%5D%7D%7D%2C%7B%22regionid%22%3A%7B%22%24in%22%3A%5B8%5D%7D%7D%5D%7D%2C" + "%22options%22%3A%7B%22limit%22%3A100%2C%22skip%22%3A0%2C%22count%22%3Atrue%2C%22castDocs%22%3Afalse%2C" + "%22fields%22%3A%7B%22recid%22%3A1%2C%22title%22%3A1%2C%22primary_category%22%3A1%2C%22address1%22%3A1%2C" + "%22city%22%3A1%2C%22url%22%3A1%2C%22isDTN%22%3A1%2C%22latitude%22%3A1%2C%22longitude%22%3A1%2C" + "%22primary_image_url%22%3A1%2C%22qualityScore%22%3A1%2C%22rankOrder%22%3A1%2C%22weburl%22%3A1%2C" + "%22dtn.rank%22%3A1%2C%22yelp.rating%22%3A1%2C%22yelp.url%22%3A1%2C%22yelp.review_count%22%3A1%2C" + "%22yelp.price%22%3A1%2C%22booking_price_avg%22%3A1%2C%22booking_price_total%22%3A1%2C%22booking_full%22%3A1%7D%2C" + "%22hooks%22%3A%5B%5D%2C%22sort%22%3A%7B%22rankorder%22%3A1%2C%22sortcompany%22%3A1%7D%7D%7D" + f"&token={token}" + ) + + # Make the API request + api_response = context.request.get(api_url) + + # Parse JSON data + json_body = api_response.json() + + # Extract the restaurant data + restaurants_data = json_body.get("docs", {}).get("docs", []) + + data = [] + for item in restaurants_data: + name = item.get("title") + category = item.get("primary_category", {}).get("subcatname") + address = item.get("address1") + coordinates = [item.get("latitude"), item.get("longitude")] + image_url = item.get("primary_image_url") + web_url = item.get("weburl") + + data.append({ + "Name": name, + "Category": category, + "Address": address, + "Coordinates": coordinates, + "Image URL": image_url, + "Web URL": web_url, + }) + + browser.close() + return data + +if __name__ == "__main__": + scrape_restaurants() \ No newline at end of file diff --git a/src/data/scripts/populate_db.py b/src/data/scripts/populate_db.py index fa6a23f4..fd5e8717 100644 --- a/src/data/scripts/populate_db.py +++ b/src/data/scripts/populate_db.py @@ -3,7 +3,8 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))) from data.scrapers.libraries import scrape_libraries from data.scrapers.printers import scrape_printers -from data.db.database import insert_library, insert_printer +from data.scrapers.restaurants import scrape_restaurants +from data.db.database import insert_library, insert_printer, insert_restaurant from data.db.models import create_tables def populate_db(): @@ -19,6 +20,11 @@ def populate_db(): printers = scrape_printers() for printer in printers: insert_printer(printer['Location'], printer['Description'], printer['Coordinates'][0], printer['Coordinates'][1]) + + # Insert restaurants + restaurants = scrape_restaurants() + for restaurant in restaurants: + insert_restaurant(restaurant['Name'], restaurant['Category'], restaurant['Address'], restaurant['Coordinates'][0], restaurant['Coordinates'][1], restaurant['Image URL'], restaurant['Web URL']) if __name__ == "__main__": populate_db() \ No newline at end of file diff --git a/src/data/transit.db b/src/data/transit.db index 48ec910f..40bc3f7a 100644 Binary files a/src/data/transit.db and b/src/data/transit.db differ diff --git a/src/swagger.json b/src/swagger.json index ff9b0afe..dc399cd8 100644 --- a/src/swagger.json +++ b/src/swagger.json @@ -49,10 +49,7 @@ ], "responses": { "200": { - "description": "{\"success\": true, \"data\": [\"id\": 1, \"location\": \"Africana Studies and Research Center\", \"address\": \"310 Triphammer Rd, Ithaca, NY 14850\", \"latitude\": 42.4574, \"longitude\": -76.4823]}", - "schema": { - "$ref": "#/components/schemas/BusStop" - } + "description": "{\"success\": true, \"data\": [\"id\": 1, \"location\": \"Africana Studies and Research Center\", \"address\": \"310 Triphammer Rd, Ithaca, NY 14850\", \"latitude\": 42.4574, \"longitude\": -76.4823]}" } } } @@ -66,10 +63,21 @@ ], "responses": { "200": { - "description": "{\"success\": true, \"data\": [{\"id\": 1, \"location\": \"Akwe:kon\", \"description\": \"Color - Room 115\", \"latitude\": 42.4563, \"longitude\": -76.4806}]}", - "schema": { - "$ref": "#/components/schemas/BusStop" - } + "description": "{\"success\": true, \"data\": [{\"id\": 1, \"location\": \"Akwe:kon\", \"description\": \"Color - Room 115\", \"latitude\": 42.4563, \"longitude\": -76.4806}]}" + } + } + } + }, + "/api/v1/restaurants": { + "get": { + "summary": "Returns a list of all restaurants in Ithaca.", + "description": "A list of all restaurants.", + "produces": [ + "application/json" + ], + "responses": { + "200": { + "description": "{\"success\": true, \"data\": [{\"id\": 1, \"name\": \"Alley Cat Cafe\", \"category\": \"Coffee\", \"address\": \"112 N Cayuga St.\", \"latitude\": 42.4407309, \"longitude\": -76.4950526, \"image_url\": \"https://assets.simpleviewinc.com/simpleview/image/upload/crm/ithacany/Alley-Cat-Logo_1872F7A2-5056-A36A-09631186ACCA298F-1872f6fe5056a36_1872f7f6-5056-a36a-098fa97e4ad0cd49.jpg\", \"web_url\": \"https://www.alleycatithaca.com\"}]}" } } } diff --git a/src/utils/EcosystemUtils.js b/src/utils/EcosystemUtils.js index 5aadd2b8..8e93d4b3 100644 --- a/src/utils/EcosystemUtils.js +++ b/src/utils/EcosystemUtils.js @@ -60,4 +60,31 @@ function fetchAllPrinters() { }); } -export default { fetchAllLibraries, fetchAllPrinters }; +function fetchAllRestaurants() { + return new Promise((resolve, reject) => { + // Open the database + const db = new sqlite3.Database(dbPath, (err) => { + if (err) { + console.error(err.message); + return reject(err); + } + console.log("Connected to the SQLite database."); + }); + + // Fetch printers + db.all("SELECT * FROM restaurants", (err, rows) => { + if (err) { + console.error(err.message); + return reject(err); + } + db.close((err) => { + if (err) console.error(err.message); + console.log("Closed the database connection."); + }); + + resolve(rows); + }); + }); +} + +export default { fetchAllLibraries, fetchAllPrinters, fetchAllRestaurants };