Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
slicken committed Dec 12, 2024
1 parent 1b11737 commit 33244f1
Show file tree
Hide file tree
Showing 6 changed files with 168 additions and 71 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
app
trades.json
*.log
*.service
*.bak
*.org
48 changes: 30 additions & 18 deletions clerk/clerk.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@ import (
)

const (
URL = "https://disclosures-clerk.house.gov/"
SEARCH = "FinancialDisclosure#Search"
pass = "financial-pdfs"
FILE_LINKS = "links.json"
URL = "https://disclosures-clerk.house.gov/"
SEARCH = "FinancialDisclosure#Search"
pass = "financial-pdfs"
FILE_LINKS = "links.json"
FILE_BACKUP = ".links.json.backup"
)

var verbose bool
Expand All @@ -24,31 +25,36 @@ func SetVerbose(v bool) {
verbose = v
}

func SiteCheck(links []string, name string) ([]string, error) {
func SiteCheck(links []string, name string) ([]string, error, bool) {
var newLinks []string
var loop bool

pw, err := playwright.Run()
if err != nil {
return nil, fmt.Errorf("failed to start Playwright: %v", err)
log.Fatalln(`failed to start Playwright. install/update with
go run github.com/playwright-community/playwright-go/cmd/playwright@latest install --with-deps
or
go install github.com/playwright-community/playwright-go/cmd/playwright@latest
playwright install --with-deps`)
}
defer pw.Stop()

browser, err := pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
Headless: playwright.Bool(true),
})
if err != nil {
return nil, fmt.Errorf("failed to launch browser: %v", err)
return nil, fmt.Errorf("failed to launch browser: %v", err), false
}
defer browser.Close()

page, err := browser.NewPage()
if err != nil {
return nil, fmt.Errorf("failed to create page: %v", err)
return nil, fmt.Errorf("failed to create page: %v", err), false
}

_, err = page.Goto(URL + SEARCH)
if err != nil {
return nil, fmt.Errorf("failed to go to URL: %v", err)
return nil, fmt.Errorf("failed to go to URL: %v", err), false
}

// select the current year
Expand All @@ -58,27 +64,27 @@ func SiteCheck(links []string, name string) ([]string, error) {
Values: &[]string{thisYear},
})
if err != nil {
return nil, fmt.Errorf("failed to select Filing Year %s: %v", thisYear, err)
return nil, fmt.Errorf("failed to select Filing Year %s: %v", thisYear, err), false
}

// click search form and wait for result table
if err := page.Click(`button[aria-label="search button"]`); err != nil {
return nil, fmt.Errorf("failed to click search button: %v", err)
return nil, fmt.Errorf("failed to click search button: %v", err), false
}
if _, err = page.WaitForSelector(`#DataTables_Table_0`, playwright.PageWaitForSelectorOptions{
State: playwright.WaitForSelectorStateVisible,
}); err != nil {
return nil, fmt.Errorf("failed to wait for results table to load: %v", err)
return nil, fmt.Errorf("failed to wait for results table to load: %v", err), false
}

// get number of pages
lastPaginationButtonText, err := page.Locator(`.paginate_button:not(.ellipsis):not(.next):last-child`).InnerText()
if err != nil {
return nil, fmt.Errorf("failed to find the last pagination button: %v", err)
return nil, fmt.Errorf("failed to find the last pagination button: %v", err), false
}
pageCount, err := strconv.Atoi(lastPaginationButtonText)
if err != nil {
return nil, fmt.Errorf("failed to convert page count to integer: %v", err)
return nil, fmt.Errorf("failed to convert page count to integer: %v", err), false
}

if verbose {
Expand All @@ -91,13 +97,13 @@ func SiteCheck(links []string, name string) ([]string, error) {
if _, err := page.WaitForSelector(`#DataTables_Table_0`, playwright.PageWaitForSelectorOptions{
State: playwright.WaitForSelectorStateVisible,
}); err != nil {
return nil, fmt.Errorf("failed to wait for results table on page %d: %v", pageNum, err)
return nil, fmt.Errorf("failed to wait for results table on page %d: %v", pageNum, err), loop
}

// Scrape the rows
rows, err := page.QuerySelectorAll(`#DataTables_Table_0 tbody tr`)
if err != nil {
return nil, fmt.Errorf("failed to query table rows on page %d: %v", pageNum, err)
return nil, fmt.Errorf("failed to query table rows on page %d: %v", pageNum, err), loop
}

for _, row := range rows {
Expand Down Expand Up @@ -134,6 +140,12 @@ func SiteCheck(links []string, name string) ([]string, error) {
}
}

if len(newLinks) > 5 {
newLinks = newLinks[:5]
loop = true
break
}

if pageNum >= pageCount {
break
}
Expand Down Expand Up @@ -163,10 +175,10 @@ func SiteCheck(links []string, name string) ([]string, error) {
links = append(links, newLinks...)
err = utils.WriteJSON[[]string](FILE_LINKS, links)
if err != nil {
return links, err
return links, err, loop
}
log.Printf("updated %s. contains %d reports.\n", FILE_LINKS, len(links))
}

return newLinks, nil
return newLinks, nil, loop
}
69 changes: 34 additions & 35 deletions gemini/gemini.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package gemini

import (
"clerk_trades/utils"
"context"
"encoding/json"
"fmt"
"log"
"os"
Expand Down Expand Up @@ -86,16 +86,15 @@ Rule2: in Type field (Transaction Type): if "P" input "Purchase", if "S" input "
if len(out) == 0 {
return nil, fmt.Errorf("no output data from gemini")
}
if err := json.Unmarshal([]byte(out), &Trades); err != nil {
return nil, fmt.Errorf("failed to unmarshalling JSON: %v, output: %s", err, out)
if err := utils.SafeUnmarshal(out, &Trades); err != nil {
log.Fatalf("safe unmarshal failed: %v", err)
}

// print trades
strTrades := PrintTrades(Trades)
log.Print("\r\n", strTrades)

// Trades = checkTrades(Trades)

if verbose {
log.Printf("%d trades in %d reports.\n", len(Trades), len(links))
}
Expand Down Expand Up @@ -130,41 +129,41 @@ func PrintTrades(trades []Trade) string {
return output
}

func checkTrades(Trades []Trade) []Trade {
var count int
var trades []Trade
// func checkTrades(Trades []Trade) []Trade {
// var count int
// var trades []Trade

for _, newTrade := range Trades {
// empty fileds are not accepted
if newTrade.Ticker == "" {
count++
continue
}
if newTrade.Type == "" {
count++
continue
}
if newTrade.Date == "" {
count++
continue
}
if newTrade.Filed == "" {
count++
continue
}
trades = append(trades, newTrade)
}
// for _, newTrade := range Trades {
// // empty fileds are not accepted
// if newTrade.Ticker == "" {
// count++
// continue
// }
// if newTrade.Type == "" {
// count++
// continue
// }
// if newTrade.Date == "" {
// count++
// continue
// }
// if newTrade.Filed == "" {
// count++
// continue
// }
// trades = append(trades, newTrade)
// }

if count == 0 {
return Trades
}
// if count == 0 {
// return Trades
// }

if verbose {
log.Printf("removed 3 trades has bad gemini data.\n")
}
// if verbose {
// log.Printf("removed 3 trades has bad gemini data.\n")
// }

return trades
}
// return trades
// }

// func hasMatchingWord(new, old string) bool {
// if new == "" || old == "" {
Expand Down
5 changes: 4 additions & 1 deletion links.json
Original file line number Diff line number Diff line change
Expand Up @@ -420,5 +420,8 @@
"https://disclosures-clerk.house.gov/public_disc/ptr-pdfs/2024/8220692.pdf",
"https://disclosures-clerk.house.gov/public_disc/ptr-pdfs/2024/20026340.pdf",
"https://disclosures-clerk.house.gov/public_disc/ptr-pdfs/2024/20026346.pdf",
"https://disclosures-clerk.house.gov/public_disc/ptr-pdfs/2024/8220695.pdf"
"https://disclosures-clerk.house.gov/public_disc/ptr-pdfs/2024/8220695.pdf",
"https://disclosures-clerk.house.gov/public_disc/ptr-pdfs/2024/20026360.pdf",
"https://disclosures-clerk.house.gov/public_disc/ptr-pdfs/2024/20026380.pdf",
"https://disclosures-clerk.house.gov/public_disc/ptr-pdfs/2024/20026376.pdf"
]
Loading

0 comments on commit 33244f1

Please sign in to comment.