diff --git a/data/6.html b/data/6.html new file mode 100644 index 0000000..1c858d2 --- /dev/null +++ b/data/6.html @@ -0,0 +1,12 @@ + + + + タイトル + + + + + + + + \ No newline at end of file diff --git a/go.mod b/go.mod index fd53816..c5ff38c 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,4 @@ module github.com/soranoba/googp go 1.15 -require golang.org/x/net v0.0.0-20200904194848-62affa334b73 +require golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e diff --git a/go.sum b/go.sum index 76714ba..bc8550b 100644 --- a/go.sum +++ b/go.sum @@ -1,9 +1,8 @@ -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20200904194848-62affa334b73 h1:MXfv8rhZWmFeqX3GNZRsd6vOLoaCHjYEX3qkRo3YBUA= -golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e h1:TsQ7F31D3bUCLeqPT0u+yjp1guoArKaNKmCr22PYgTQ= +golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/googp.go b/googp.go index 8af61d7..4a387b3 100644 --- a/googp.go +++ b/googp.go @@ -11,9 +11,13 @@ package googp import ( + "bufio" "fmt" + "io" "mime" "net/http" + + "golang.org/x/net/html/charset" ) // Fetch the content from the URL and parse OGP information. @@ -43,5 +47,11 @@ func Parse(res *http.Response, i interface{}, opts ...ParserOpts) error { } } - return NewParser(opts...).Parse(res.Body, i) + br := bufio.NewReader(res.Body) + var reader io.Reader = br + data, _ := br.Peek(1024) + enc, _, _ := charset.DetermineEncoding(data, ct) + reader = enc.NewDecoder().Reader(reader) + + return NewParser(opts...).Parse(reader, i) } diff --git a/googp_test.go b/googp_test.go index 9fb41e1..84e9911 100644 --- a/googp_test.go +++ b/googp_test.go @@ -47,6 +47,26 @@ func TestParse(t *testing.T) { assertEqual(t, ogp.Images[0].URL, "http://example.com/image.png") } +func TestParseWithEncoding(t *testing.T) { + client := &http.Client{} + req, err := http.NewRequest("GET", endpoint()+"/6.html", nil) + if err != nil { + t.Error(err) + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + res, err := client.Do(req.WithContext(ctx)) + var ogp OGP + assertNoError(t, Parse(res, &ogp)) + + assertEqual(t, ogp.Title, "ShiftJIS繧ソ繧、繝医Ν") + assertEqual(t, ogp.Type, "website") + assertEqual(t, ogp.URL, "http://example.com") + assertEqual(t, ogp.Images[0].URL, "http://example.com/image.png") +} + func ExampleFetch() { var ogp OGP if err := Fetch(endpoint()+"/5.html", &ogp); err != nil {