diff --git a/.gitignore b/.gitignore index 9ac453a..31b97c9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ app -trades.json *.log *.service *.bak +*.org diff --git a/clerk/clerk.go b/clerk/clerk.go index e62825d..54bac17 100644 --- a/clerk/clerk.go +++ b/clerk/clerk.go @@ -12,10 +12,11 @@ import ( ) const ( - URL = "https://disclosures-clerk.house.gov/" - SEARCH = "FinancialDisclosure#Search" - pass = "financial-pdfs" - FILE_LINKS = "links.json" + URL = "https://disclosures-clerk.house.gov/" + SEARCH = "FinancialDisclosure#Search" + pass = "financial-pdfs" + FILE_LINKS = "links.json" + FILE_BACKUP = ".links.json.backup" ) var verbose bool @@ -24,12 +25,17 @@ func SetVerbose(v bool) { verbose = v } -func SiteCheck(links []string, name string) ([]string, error) { +func SiteCheck(links []string, name string) ([]string, error, bool) { var newLinks []string + var loop bool pw, err := playwright.Run() if err != nil { - return nil, fmt.Errorf("failed to start Playwright: %v", err) + log.Fatalln(`failed to start Playwright. install/update with +go run github.com/playwright-community/playwright-go/cmd/playwright@latest install --with-deps +or +go install github.com/playwright-community/playwright-go/cmd/playwright@latest +playwright install --with-deps`) } defer pw.Stop() @@ -37,18 +43,18 @@ func SiteCheck(links []string, name string) ([]string, error) { Headless: playwright.Bool(true), }) if err != nil { - return nil, fmt.Errorf("failed to launch browser: %v", err) + return nil, fmt.Errorf("failed to launch browser: %v", err), false } defer browser.Close() page, err := browser.NewPage() if err != nil { - return nil, fmt.Errorf("failed to create page: %v", err) + return nil, fmt.Errorf("failed to create page: %v", err), false } _, err = page.Goto(URL + SEARCH) if err != nil { - return nil, fmt.Errorf("failed to go to URL: %v", err) + return nil, fmt.Errorf("failed to go to URL: %v", err), false } // select the current year @@ -58,27 +64,27 @@ func SiteCheck(links []string, name string) ([]string, error) { Values: &[]string{thisYear}, }) if err != nil { - return nil, fmt.Errorf("failed to select Filing Year %s: %v", thisYear, err) + return nil, fmt.Errorf("failed to select Filing Year %s: %v", thisYear, err), false } // click search form and wait for result table if err := page.Click(`button[aria-label="search button"]`); err != nil { - return nil, fmt.Errorf("failed to click search button: %v", err) + return nil, fmt.Errorf("failed to click search button: %v", err), false } if _, err = page.WaitForSelector(`#DataTables_Table_0`, playwright.PageWaitForSelectorOptions{ State: playwright.WaitForSelectorStateVisible, }); err != nil { - return nil, fmt.Errorf("failed to wait for results table to load: %v", err) + return nil, fmt.Errorf("failed to wait for results table to load: %v", err), false } // get number of pages lastPaginationButtonText, err := page.Locator(`.paginate_button:not(.ellipsis):not(.next):last-child`).InnerText() if err != nil { - return nil, fmt.Errorf("failed to find the last pagination button: %v", err) + return nil, fmt.Errorf("failed to find the last pagination button: %v", err), false } pageCount, err := strconv.Atoi(lastPaginationButtonText) if err != nil { - return nil, fmt.Errorf("failed to convert page count to integer: %v", err) + return nil, fmt.Errorf("failed to convert page count to integer: %v", err), false } if verbose { @@ -91,13 +97,13 @@ func SiteCheck(links []string, name string) ([]string, error) { if _, err := page.WaitForSelector(`#DataTables_Table_0`, playwright.PageWaitForSelectorOptions{ State: playwright.WaitForSelectorStateVisible, }); err != nil { - return nil, fmt.Errorf("failed to wait for results table on page %d: %v", pageNum, err) + return nil, fmt.Errorf("failed to wait for results table on page %d: %v", pageNum, err), loop } // Scrape the rows rows, err := page.QuerySelectorAll(`#DataTables_Table_0 tbody tr`) if err != nil { - return nil, fmt.Errorf("failed to query table rows on page %d: %v", pageNum, err) + return nil, fmt.Errorf("failed to query table rows on page %d: %v", pageNum, err), loop } for _, row := range rows { @@ -134,6 +140,12 @@ func SiteCheck(links []string, name string) ([]string, error) { } } + if len(newLinks) > 5 { + newLinks = newLinks[:5] + loop = true + break + } + if pageNum >= pageCount { break } @@ -163,10 +175,10 @@ func SiteCheck(links []string, name string) ([]string, error) { links = append(links, newLinks...) err = utils.WriteJSON[[]string](FILE_LINKS, links) if err != nil { - return links, err + return links, err, loop } log.Printf("updated %s. contains %d reports.\n", FILE_LINKS, len(links)) } - return newLinks, nil + return newLinks, nil, loop } diff --git a/gemini/gemini.go b/gemini/gemini.go index 10b1562..d06cc32 100644 --- a/gemini/gemini.go +++ b/gemini/gemini.go @@ -1,8 +1,8 @@ package gemini import ( + "clerk_trades/utils" "context" - "encoding/json" "fmt" "log" "os" @@ -86,8 +86,8 @@ Rule2: in Type field (Transaction Type): if "P" input "Purchase", if "S" input " if len(out) == 0 { return nil, fmt.Errorf("no output data from gemini") } - if err := json.Unmarshal([]byte(out), &Trades); err != nil { - return nil, fmt.Errorf("failed to unmarshalling JSON: %v, output: %s", err, out) + if err := utils.SafeUnmarshal(out, &Trades); err != nil { + log.Fatalf("safe unmarshal failed: %v", err) } // print trades @@ -95,7 +95,6 @@ Rule2: in Type field (Transaction Type): if "P" input "Purchase", if "S" input " log.Print("\r\n", strTrades) // Trades = checkTrades(Trades) - if verbose { log.Printf("%d trades in %d reports.\n", len(Trades), len(links)) } @@ -130,41 +129,41 @@ func PrintTrades(trades []Trade) string { return output } -func checkTrades(Trades []Trade) []Trade { - var count int - var trades []Trade +// func checkTrades(Trades []Trade) []Trade { +// var count int +// var trades []Trade - for _, newTrade := range Trades { - // empty fileds are not accepted - if newTrade.Ticker == "" { - count++ - continue - } - if newTrade.Type == "" { - count++ - continue - } - if newTrade.Date == "" { - count++ - continue - } - if newTrade.Filed == "" { - count++ - continue - } - trades = append(trades, newTrade) - } +// for _, newTrade := range Trades { +// // empty fileds are not accepted +// if newTrade.Ticker == "" { +// count++ +// continue +// } +// if newTrade.Type == "" { +// count++ +// continue +// } +// if newTrade.Date == "" { +// count++ +// continue +// } +// if newTrade.Filed == "" { +// count++ +// continue +// } +// trades = append(trades, newTrade) +// } - if count == 0 { - return Trades - } +// if count == 0 { +// return Trades +// } - if verbose { - log.Printf("removed 3 trades has bad gemini data.\n") - } +// if verbose { +// log.Printf("removed 3 trades has bad gemini data.\n") +// } - return trades -} +// return trades +// } // func hasMatchingWord(new, old string) bool { // if new == "" || old == "" { diff --git a/links.json b/links.json index 4e4aa60..2a36781 100644 --- a/links.json +++ b/links.json @@ -420,5 +420,8 @@ "https://disclosures-clerk.house.gov/public_disc/ptr-pdfs/2024/8220692.pdf", "https://disclosures-clerk.house.gov/public_disc/ptr-pdfs/2024/20026340.pdf", "https://disclosures-clerk.house.gov/public_disc/ptr-pdfs/2024/20026346.pdf", - "https://disclosures-clerk.house.gov/public_disc/ptr-pdfs/2024/8220695.pdf" + "https://disclosures-clerk.house.gov/public_disc/ptr-pdfs/2024/8220695.pdf", + "https://disclosures-clerk.house.gov/public_disc/ptr-pdfs/2024/20026360.pdf", + "https://disclosures-clerk.house.gov/public_disc/ptr-pdfs/2024/20026380.pdf", + "https://disclosures-clerk.house.gov/public_disc/ptr-pdfs/2024/20026376.pdf" ] \ No newline at end of file diff --git a/main.go b/main.go index 4ac45f2..e2305f2 100644 --- a/main.go +++ b/main.go @@ -11,9 +11,11 @@ import ( "log" "net/http" "os" + "os/signal" "strconv" "strings" "sync" + "syscall" "time" ) @@ -22,24 +24,24 @@ func usage(code int) { Usage: %s [ | ] [OPTIONS] Arguments: - ticker_duration Duration for the application ticker to check for new - reports on Clerk website. Minimum 3h (e.g. 24h, 72h). - Only accepts 'h' for hours before the integer. - If not specified, it will not check for new reports. - list Specify the number of reports to list their trades. - (type=int). This argument must be betweengreater than - 0 but less that 6. - If used, the program will exit after printing. + ticker_duration Duration for the application ticker to check for new + reports on Clerk website. Minimum 3h (e.g. 24h, 72h). + Only accepts 'h' for hours before the integer. + If not specified, it will not check for new reports. + list Specify the number of reports to list their trades. + (type=int). This argument must be betweengreater than + 0 but less that 6. + If used, the program will exit after printing. Note: Only one of these two arguments may be provided at a time. OPTIONS: - -n List reports of a specific individual. - -e, --email Enable email notifications for trade results via Mailgun. - Configure settings in 'gunmail.config' to activate. - --log Save logs to file. - -v, --verbose Enable verbose output for detailed logging and information. - -h, --help Display this help menu. + -n, --name List reports of a specific individual. + -e, --email Enable email notifications for trade results via Mailgun. + Configure settings in 'gunmail.config' to activate. + --log Save logs to file. + -v, --verbose Enable verbose output for detailed logging and information. + -h, --help Display this help menu. `, os.Args[0]) os.Exit(code) } @@ -82,7 +84,7 @@ func main() { mail = true log.Printf("results will be sent to %v\n", email.Mailgun.EmailTo) - case strings.HasPrefix(arg, "-n"): + case strings.HasPrefix(arg, "-n") || strings.HasPrefix(arg, "--name"): if strings.Contains(arg, "=") { name = strings.SplitN(arg, "=", 2)[1] } else if i+1 < len(os.Args) && !strings.HasPrefix(os.Args[i+1], "-") { @@ -101,11 +103,27 @@ func main() { } update = duration } else { + if name != "" { + continue + } log.Fatalln("error: invalid argument:", arg) } } } + if name != "" { + update = time.Duration(24 * time.Hour) + listReports = 0 + + if err := copyFile(clerk.FILE_LINKS, clerk.FILE_BACKUP); err != nil { + log.Println("error copying file:", err) + } + if err := os.Remove(clerk.FILE_LINKS); err != nil { + log.Println("error removing file:", err) + } + go HandleInterrupt() + } + if update == 0 && (listReports > 5 || listReports <= 0) || update != 0 && listReports != 0 { usage(1) } @@ -160,6 +178,7 @@ func checkReports(update time.Duration, listReports int, name string) error { var err error var files []string var links []string + var loop bool links, _ = utils.ReadJSON[[]string](clerk.FILE_LINKS) if verbose { @@ -172,7 +191,7 @@ func checkReports(update time.Duration, listReports int, name string) error { } else { log.Println("checking for new reports.") } - files, err = clerk.SiteCheck(links, name) + files, err, loop = clerk.SiteCheck(links, name) if err != nil { return err } @@ -244,6 +263,12 @@ func checkReports(update time.Duration, listReports int, name string) error { } } + if loop { + defer func() { + go checkReports(update, listReports, name) + }() + } + return nil } @@ -277,3 +302,37 @@ func parseCustomDuration(input string) (time.Duration, error) { } return 0, fmt.Errorf("invalid duration format; only hours (h) are accepted") } + +func HandleInterrupt() { + closeChan := make(chan os.Signal, 1) + signal.Notify(closeChan, os.Interrupt, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) + + s := <-closeChan + log.Printf("\n%+v recived. shutting down.\n", s) + + if err := copyFile(clerk.FILE_BACKUP, clerk.FILE_LINKS); err != nil { + log.Println("error copying file:", err) + } + if err := os.Remove(clerk.FILE_BACKUP); err != nil { + log.Println("error removing file:", err) + } + + os.Exit(0) +} + +func copyFile(src, dst string) error { + sourceFile, err := os.Open(src) + if err != nil { + return err + } + defer sourceFile.Close() + + destFile, err := os.Create(dst) + if err != nil { + return err + } + defer destFile.Close() + + _, err = io.Copy(destFile, sourceFile) + return err +} diff --git a/utils/utils.go b/utils/utils.go index a1a2773..b5e824f 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -5,6 +5,7 @@ import ( "fmt" "io" "os" + "strings" ) func ReadJSON[T any](file string) (T, error) { @@ -57,3 +58,26 @@ func Contains(slice []string, str string) bool { } return false } + +func EnsureValidJSON(input string) (string, error) { + input = strings.TrimSpace(input) + if !strings.HasSuffix(input, "}") && !strings.HasSuffix(input, "]") { + input = input + "}" + } + var temp interface{} + if err := json.Unmarshal([]byte(input), &temp); err != nil { + return "", fmt.Errorf("invalid JSON format: %w", err) + } + return input, nil +} + +func SafeUnmarshal(out string, target interface{}) error { + validJSON, err := EnsureValidJSON(out) + if err != nil { + return fmt.Errorf("failed to ensure valid JSON: %v, output: %s", err, out) + } + if err := json.Unmarshal([]byte(validJSON), target); err != nil { + return fmt.Errorf("failed to unmarshall JSON: %v, output: %s", err, validJSON) + } + return nil +}