add --urls flag so user doesn't have to manually edit list

tevko · tevko · commit 6245acd5faf8 · 2024-12-19T21:06:26.000-06:00
diff --git a/README.md b/README.md
@@ -29,28 +29,20 @@ A concurrent web scraper that extracts links from a list of URLs.
 
 1. Make sure you have Go installed on your system.
 2. Save the code as a `.go` file (e.g., `scraper.go`).
-3. edit the list of URLs to fetch
-4. Run the program using `go run scraper.go`.
+3. Run the program using `go run scraper.go --urls="https://example.com,https://example2.com, ...etc"`.
 
 **Dependencies:**
 
 * `golang.org/x/net/html`:  For HTML parsing.
 
 **Example Usage:**
 
-The program currently scrapes the following URLs:
-
-* "https://google.com"
-* "https://old.reddit.com/"
-* "https://timevko.website"
-
-You can modify the `urls` slice in the `main` function to scrape different websites.
+`go run fue.go --urls="https://timevko.website,https://old.reddit.com"`
 
 **Potential Improvements:**
 
 * **Error Handling:**  More robust error handling for network requests and HTML parsing.
 * **Politeness:** Implement delays between requests to avoid overloading the target servers.
 * **Data Storage:**  Store the extracted links in a file or database.
-* **Command-line Arguments:** Allow users to specify the URLs and other options through command-line arguments.
 * **Deduplication:**  Remove duplicate links from the output.
 * **Advanced Extraction:** Use CSS selectors (e.g., with the `goquery` library) for more specific link extraction.
diff --git a/fue.go b/fue.go
@@ -1,9 +1,11 @@
 package main
 
 import (
+	"flag"
 	"fmt"
 	"net/http"
 	"net/url"
+	"strings"
 	"sync"
 
 	"golang.org/x/net/html"
@@ -61,20 +63,16 @@ func extractLinks(doc *html.Node, baseURL *url.URL) map[string]string {
 
 func main() {
 	var wg sync.WaitGroup
-	// URLs to scrape
-	urls := []string{
-		"https://google.com",
-		"https://old.reddit.com/",
-		"https://timevko.website",
-	}
+	urlList := flag.String("urls", "https://google.com", "Comma separated list of URL's to crawl")
+	flag.Parse()
 	urlChan := make(chan string)
 	results := make(chan map[string]string)
 
 	for i := 1; i <= 3; i++ {
 		wg.Add(1)
 		go worker(i, urlChan, results, &wg)
 	}
-	for _, url := range urls {
+	for _, url := range strings.Split(*urlList, ",") {
 		urlChan <- url
 	}
 		close(urlChan)