forked from markusmobius/go-domdistiller
-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Create a command line tool and add file and server commands
- Loading branch information
Showing
5 changed files
with
163 additions
and
70 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,4 +17,4 @@ RUN go build -o app . | |
EXPOSE 8080 | ||
|
||
# Command to run the executable | ||
CMD ["./app"] | ||
CMD ["./app", "server"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
/* | ||
Copyright © 2023 NAME HERE <EMAIL ADDRESS> | ||
*/ | ||
package cmd | ||
|
||
import ( | ||
"fmt" | ||
"os" | ||
|
||
"github.com/go-shiori/dom" | ||
"github.com/omnivore-app/go-domdistiller/distiller" | ||
"github.com/spf13/cobra" | ||
) | ||
|
||
// fileCmd represents the file command | ||
var fileCmd = &cobra.Command{ | ||
Use: "file", | ||
Short: "Extracts the main content from a file", | ||
Run: func(cmd *cobra.Command, args []string) { | ||
extractFromFile(cmd.Flag("input").Value.String(), cmd.Flag("output").Value.String()) | ||
}, | ||
} | ||
|
||
func extractFromFile(inputPath string, outputPath string) { | ||
// Apply distiller | ||
result, err := distiller.ApplyForFile(inputPath, nil) | ||
if err != nil { | ||
panic(err) | ||
} | ||
|
||
// Print result | ||
rawHTML := dom.OuterHTML(result.Node) | ||
|
||
file, err := os.Create(outputPath) | ||
if err != nil { | ||
panic(err) | ||
} | ||
fmt.Fprint(file, rawHTML) | ||
} | ||
|
||
func init() { | ||
rootCmd.AddCommand(fileCmd) | ||
|
||
fileCmd.Flags().StringP("input", "i", "", "Path to the file to extract the main content from") | ||
fileCmd.MarkFlagRequired("input") | ||
fileCmd.Flags().StringP("output", "o", "", "Path to the file to write the extracted content to") | ||
fileCmd.MarkFlagRequired("output") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
package cmd | ||
|
||
import ( | ||
"os" | ||
|
||
"github.com/spf13/cobra" | ||
) | ||
|
||
var rootCmd = &cobra.Command{ | ||
Use: "go-domdistiller", | ||
Short: "go-domdistiller is a CLI tool for extracting the main content of a web page", | ||
} | ||
|
||
func Execute() { | ||
err := rootCmd.Execute() | ||
if err != nil { | ||
os.Exit(1) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
/* | ||
Copyright © 2023 NAME HERE <EMAIL ADDRESS> | ||
*/ | ||
package cmd | ||
|
||
import ( | ||
"fmt" | ||
"io/ioutil" | ||
"log" | ||
"net/http" | ||
"os" | ||
"strings" | ||
|
||
"github.com/go-shiori/dom" | ||
"github.com/golang-jwt/jwt" | ||
"github.com/omnivore-app/go-domdistiller/distiller" | ||
"github.com/spf13/cobra" | ||
) | ||
|
||
// serverCmd represents the server command | ||
var serverCmd = &cobra.Command{ | ||
Use: "server", | ||
Short: "Starts a server that accepts HTML and returns the main content", | ||
Run: func(cmd *cobra.Command, args []string) { | ||
start() | ||
}, | ||
} | ||
|
||
func start() { | ||
log.Print("starting server...") | ||
http.HandleFunc("/", handler) | ||
|
||
// Determine port for HTTP service. | ||
port := os.Getenv("PORT") | ||
if port == "" { | ||
port = "8080" | ||
log.Printf("defaulting to port %s", port) | ||
} | ||
|
||
// Start HTTP server. | ||
log.Printf("listening on port %s", port) | ||
if err := http.ListenAndServe(":"+port, nil); err != nil { | ||
log.Fatal(err) | ||
} | ||
} | ||
|
||
func handler(w http.ResponseWriter, r *http.Request) { | ||
// decode JWT token and check if it's valid | ||
token, err := jwt.Parse(r.Header.Get("Authorization"), func(token *jwt.Token) (interface{}, error) { | ||
if _, ok := token.Method.(*jwt.SigningMethodHMAC); !ok { | ||
return nil, fmt.Errorf("unexpected signing method: %v", token.Header["alg"]) | ||
} | ||
return []byte(os.Getenv("JWT_SECRET")), nil | ||
}) | ||
if err != nil { | ||
w.WriteHeader(http.StatusUnauthorized) | ||
fmt.Fprint(w, "Unauthorized") | ||
return | ||
} | ||
if !token.Valid { | ||
w.WriteHeader(http.StatusUnauthorized) | ||
fmt.Fprint(w, "Unauthorized") | ||
return | ||
} | ||
|
||
// Parse request body | ||
body, err := ioutil.ReadAll(r.Body) | ||
if err != nil { | ||
log.Println("Failed to read request body:", err) | ||
w.WriteHeader(http.StatusBadRequest) | ||
fmt.Fprint(w, "Failed to read request body") | ||
return | ||
} | ||
|
||
// Apply distiller | ||
result, err := distiller.ApplyForReader(strings.NewReader(string(body)), nil) | ||
if err != nil { | ||
fmt.Println("Failed to apply distiller:", err) | ||
w.WriteHeader(http.StatusInternalServerError) | ||
fmt.Fprint(w, "Failed to apply distiller") | ||
return | ||
} | ||
|
||
// Print result | ||
rawHTML := dom.OuterHTML(result.Node) | ||
fmt.Fprint(w, rawHTML) | ||
} | ||
|
||
func init() { | ||
rootCmd.AddCommand(serverCmd) | ||
|
||
serverCmd.Flags().StringP("port", "p", "8080", "Port to listen on") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,74 +1,7 @@ | ||
package main | ||
|
||
import ( | ||
"fmt" | ||
"io/ioutil" | ||
"log" | ||
"net/http" | ||
"os" | ||
"strings" | ||
|
||
"github.com/go-shiori/dom" | ||
"github.com/omnivore-app/go-domdistiller/distiller" | ||
"github.com/golang-jwt/jwt" | ||
) | ||
import "github.com/omnivore-app/go-domdistiller/cmd" | ||
|
||
func main() { | ||
log.Print("starting server...") | ||
http.HandleFunc("/", handler) | ||
|
||
// Determine port for HTTP service. | ||
port := os.Getenv("PORT") | ||
if port == "" { | ||
port = "8080" | ||
log.Printf("defaulting to port %s", port) | ||
} | ||
|
||
// Start HTTP server. | ||
log.Printf("listening on port %s", port) | ||
if err := http.ListenAndServe(":"+port, nil); err != nil { | ||
log.Fatal(err) | ||
} | ||
} | ||
|
||
func handler(w http.ResponseWriter, r *http.Request) { | ||
// decode JWT token and check if it's valid | ||
token, err := jwt.Parse(r.Header.Get("Authorization"), func(token *jwt.Token) (interface{}, error) { | ||
if _, ok := token.Method.(*jwt.SigningMethodHMAC); !ok { | ||
return nil, fmt.Errorf("unexpected signing method: %v", token.Header["alg"]) | ||
} | ||
return []byte(os.Getenv("JWT_SECRET")), nil | ||
}) | ||
if err != nil { | ||
w.WriteHeader(http.StatusUnauthorized) | ||
fmt.Fprint(w, "Unauthorized") | ||
return | ||
} | ||
if !token.Valid { | ||
w.WriteHeader(http.StatusUnauthorized) | ||
fmt.Fprint(w, "Unauthorized") | ||
return | ||
} | ||
|
||
// Parse request body | ||
body, err := ioutil.ReadAll(r.Body) | ||
if err != nil { | ||
log.Println("Failed to read request body:", err) | ||
w.WriteHeader(http.StatusBadRequest) | ||
fmt.Fprint(w, "Failed to read request body") | ||
return | ||
} | ||
|
||
// Apply distiller | ||
result, err := distiller.ApplyForReader(strings.NewReader(string(body)), nil) | ||
if err != nil { | ||
fmt.Println("Failed to apply distiller:", err) | ||
w.WriteHeader(http.StatusInternalServerError) | ||
fmt.Fprint(w, "Failed to apply distiller") | ||
return | ||
} | ||
|
||
// Print result | ||
rawHTML := dom.OuterHTML(result.Node) | ||
fmt.Fprint(w, rawHTML) | ||
cmd.Execute() | ||
} |