Skip to content

Commit

Permalink
Supports charset other than UTF-8 (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
soranoba authored Jun 30, 2022
1 parent 7894f0d commit 1490591
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 11 deletions.
12 changes: 12 additions & 0 deletions data/6.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<html xmlns:og="http://ogp.me/ns#">
<head>
<meta charset="shift_jis" />
<title>ƒ^ƒCƒgƒ‹</title>
<meta property="og:title" content="ShiftJISƒ^ƒCƒgƒ‹" />
<meta property="og:type" content="website" />
<meta property="og:url" content="http://example.com" />
<meta property="og:image" content="http://example.com/image.png" />
</head>
<body>
</body>
</html>
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ module github.com/soranoba/googp

go 1.15

require golang.org/x/net v0.0.0-20200904194848-62affa334b73
require golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e
17 changes: 8 additions & 9 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20200904194848-62affa334b73 h1:MXfv8rhZWmFeqX3GNZRsd6vOLoaCHjYEX3qkRo3YBUA=
golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e h1:TsQ7F31D3bUCLeqPT0u+yjp1guoArKaNKmCr22PYgTQ=
golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
12 changes: 11 additions & 1 deletion googp.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,13 @@
package googp

import (
"bufio"
"fmt"
"io"
"mime"
"net/http"

"golang.org/x/net/html/charset"
)

// Fetch the content from the URL and parse OGP information.
Expand Down Expand Up @@ -43,5 +47,11 @@ func Parse(res *http.Response, i interface{}, opts ...ParserOpts) error {
}
}

return NewParser(opts...).Parse(res.Body, i)
br := bufio.NewReader(res.Body)
var reader io.Reader = br
data, _ := br.Peek(1024)
enc, _, _ := charset.DetermineEncoding(data, ct)
reader = enc.NewDecoder().Reader(reader)

return NewParser(opts...).Parse(reader, i)
}
20 changes: 20 additions & 0 deletions googp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,26 @@ func TestParse(t *testing.T) {
assertEqual(t, ogp.Images[0].URL, "http://example.com/image.png")
}

func TestParseWithEncoding(t *testing.T) {
client := &http.Client{}
req, err := http.NewRequest("GET", endpoint()+"/6.html", nil)
if err != nil {
t.Error(err)
}

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()

res, err := client.Do(req.WithContext(ctx))
var ogp OGP
assertNoError(t, Parse(res, &ogp))

assertEqual(t, ogp.Title, "ShiftJISタイトル")
assertEqual(t, ogp.Type, "website")
assertEqual(t, ogp.URL, "http://example.com")
assertEqual(t, ogp.Images[0].URL, "http://example.com/image.png")
}

func ExampleFetch() {
var ogp OGP
if err := Fetch(endpoint()+"/5.html", &ogp); err != nil {
Expand Down

0 comments on commit 1490591

Please sign in to comment.