diff --git a/go.mod b/go.mod index 6d8f3de..ab26b42 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,4 @@ module github.com/google/go-tika go 1.11 -require golang.org/x/net v0.0.0-20190522155817-f3200d17e092 +require golang.org/x/net v0.0.0-20220325170049-de3da57026de diff --git a/go.sum b/go.sum index fedb312..7b4eb43 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/net v0.0.0-20190522155817-f3200d17e092 h1:4QSRKanuywn15aTZvI/mIDEgPQpswuFndXpOj3rKEco= -golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/net v0.0.0-20220325170049-de3da57026de h1:pZB1TWnKi+o4bENlbzAgLrEbY4RMYmUIRobMcSmfeYc= +golang.org/x/net v0.0.0-20220325170049-de3da57026de/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/tika/tika.go b/tika/tika.go index 6877ea5..e040508 100644 --- a/tika/tika.go +++ b/tika/tika.go @@ -24,8 +24,6 @@ import ( "net/http" "reflect" "strings" - - "golang.org/x/net/context/ctxhttp" ) // ClientError is returned by Client's various parse methods and @@ -97,7 +95,7 @@ type Detector struct { // Translator represents the Java package of a Tika Translator. type Translator string -// Translators available by defult in Tika. You must configure all required +// Translators available by default in Tika. You must configure all required // authentication details in Tika Server (for example, an API key). const ( Lingo24Translator Translator = "org.apache.tika.language.translate.Lingo24Translator" @@ -119,13 +117,13 @@ func (c *Client) call(ctx context.Context, input io.Reader, method, path string, c.httpClient = http.DefaultClient } - req, err := http.NewRequest(method, c.url+path, input) + req, err := http.NewRequestWithContext(ctx, method, c.url+path, input) if err != nil { return nil, err } req.Header = header - resp, err := ctxhttp.Do(ctx, c.httpClient, req) + resp, err := c.httpClient.Do(req) if err != nil { return nil, err } @@ -138,8 +136,8 @@ func (c *Client) call(ctx context.Context, input io.Reader, method, path string, // callString makes the given request to c and returns the result as a string // and error. callString returns an error if the response code is not 200 StatusOK. -func (c *Client) callString(ctx context.Context, input io.Reader, method, path string) (string, error) { - body, err := c.call(ctx, input, method, path, nil) +func (c *Client) callString(ctx context.Context, input io.Reader, method, path string, header http.Header) (string, error) { + body, err := c.call(ctx, input, method, path, header) if err != nil { return "", err } @@ -156,13 +154,27 @@ func (c *Client) callString(ctx context.Context, input io.Reader, method, path s // Parse parses the given input, returning the body of the input as a string and an error. // If the error is not nil, the body is undefined. func (c *Client) Parse(ctx context.Context, input io.Reader) (string, error) { - return c.callString(ctx, input, "PUT", "/tika") + return c.ParseWithHeader(ctx, input, nil) } // ParseReader parses the given input, returning the body of the input as a reader and an error. // If the error is nil, the returned reader must be closed, else, the reader is nil. func (c *Client) ParseReader(ctx context.Context, input io.Reader) (io.ReadCloser, error) { - return c.call(ctx, input, "PUT", "/tika", nil) + return c.ParseReaderWithHeader(ctx, input, nil) +} + +// ParseWithHeader parses the given input, returning the body of the input as a string and an error. +// If the error is not nil. the body is undefined. +// This function also accepts a header so the caller can specify things like `Accept` +func (c *Client) ParseWithHeader(ctx context.Context, input io.Reader, header http.Header) (string, error) { + return c.callString(ctx, input, "PUT", "/tika", header) +} + +// ParseReaderWithHeader parses the given input, returning the body of the input as a reader and an error. +// If the error is nil, the returned reader must be closed, else, the reader is nil. +// This function also accepts a header so the caller can specify things like `Accept` +func (c *Client) ParseReaderWithHeader(ctx context.Context, input io.Reader, header http.Header) (io.ReadCloser, error) { + return c.call(ctx, input, "PUT", "/tika", header) } // ParseRecursive parses the given input and all embedded documents, returning a @@ -186,26 +198,40 @@ func (c *Client) ParseRecursive(ctx context.Context, input io.Reader) ([]string, // Meta parses the metadata from the given input, returning the metadata and an // error. If the error is not nil, the metadata is undefined. func (c *Client) Meta(ctx context.Context, input io.Reader) (string, error) { - return c.callString(ctx, input, "PUT", "/meta") + return c.MetaWithHeader(ctx, input, nil) +} + +// MetaWithHeader parses the metadata from the given input, returning the metadata and an +// error. If the error is not nil, the metadata is undefined. +// This function also accepts a header so the caller can specify things like `Accept` +func (c *Client) MetaWithHeader(ctx context.Context, input io.Reader, header http.Header) (string, error) { + return c.callString(ctx, input, "PUT", "/meta", header) } // MetaField parses the metadata from the given input and returns the given // field. If the error is not nil, the result string is undefined. func (c *Client) MetaField(ctx context.Context, input io.Reader, field string) (string, error) { - return c.callString(ctx, input, "PUT", fmt.Sprintf("/meta/%v", field)) + return c.MetaFieldWithHeader(ctx, input, field, nil) +} + +// MetaFieldWithHeader parses the metadata from the given input and returns the given +// field. If the error is not nil, the result string is undefined. +// This function also accepts a header so the caller can specify things like `Accept` +func (c *Client) MetaFieldWithHeader(ctx context.Context, input io.Reader, field string, header http.Header) (string, error) { + return c.callString(ctx, input, "PUT", fmt.Sprintf("/meta/%v", field), header) } // Detect gets the mimetype of the given input, returning the mimetype and an // error. If the error is not nil, the mimetype is undefined. func (c *Client) Detect(ctx context.Context, input io.Reader) (string, error) { - return c.callString(ctx, input, "PUT", "/detect/stream") + return c.callString(ctx, input, "PUT", "/detect/stream", nil) } // Language detects the language of the given input, returning the two letter // language code and an error. If the error is not nil, the language is // undefined. func (c *Client) Language(ctx context.Context, input io.Reader) (string, error) { - return c.callString(ctx, input, "PUT", "/language/stream") + return c.callString(ctx, input, "PUT", "/language/stream", nil) } // LanguageString detects the language of the given string, returning the two letter @@ -213,7 +239,7 @@ func (c *Client) Language(ctx context.Context, input io.Reader) (string, error) // undefined. func (c *Client) LanguageString(ctx context.Context, input string) (string, error) { r := strings.NewReader(input) - return c.callString(ctx, r, "PUT", "/language/string") + return c.callString(ctx, r, "PUT", "/language/string", nil) } // MetaRecursive parses the given input and all embedded documents. The result @@ -272,7 +298,7 @@ func (c *Client) MetaRecursiveType(ctx context.Context, input io.Reader, content // Translate returns an error and the translated input from src language to // dst language using t. If the error is not nil, the translation is undefined. func (c *Client) Translate(ctx context.Context, input io.Reader, t Translator, src, dst string) (string, error) { - return c.callString(ctx, input, "POST", fmt.Sprintf("/translate/all/%s/%s/%s", t, src, dst)) + return c.callString(ctx, input, "POST", fmt.Sprintf("/translate/all/%s/%s/%s", t, src, dst), nil) } // TranslateReader translates the given input from src language to dst language using t. @@ -284,7 +310,7 @@ func (c *Client) TranslateReader(ctx context.Context, input io.Reader, t Transla // Version returns the default hello message from Tika server. func (c *Client) Version(ctx context.Context) (string, error) { - return c.callString(ctx, nil, "GET", "/version") + return c.callString(ctx, nil, "GET", "/version", nil) } var jsonHeader = http.Header{"Accept": []string{"application/json"}} diff --git a/tika/tika_test.go b/tika/tika_test.go index 98bd590..e10f4c4 100644 --- a/tika/tika_test.go +++ b/tika/tika_test.go @@ -75,6 +75,30 @@ func TestParse(t *testing.T) { } } +func TestParseWithHeader(t *testing.T) { + want := "test value" + wantHeader := "application/json" + gotHeader := "" + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotHeader = r.Header.Get("Accept") + fmt.Fprint(w, want) + })) + defer ts.Close() + hdr := http.Header{} + hdr["Accept"] = []string{"application/json"} + c := NewClient(nil, ts.URL) + got, err := c.ParseWithHeader(context.Background(), nil, hdr) + if err != nil { + t.Fatalf("Parse returned nil, want %q", want) + } + if got != want { + t.Errorf("Parse got %q, want %q", got, want) + } + if gotHeader != wantHeader { + t.Errorf("ParseWithHeader Header incorrect got %q, want %q", gotHeader, wantHeader) + } +} + func TestParseReader(t *testing.T) { want := "test value" ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { @@ -175,6 +199,30 @@ func TestMeta(t *testing.T) { } } +func TestMetaWithHeader(t *testing.T) { + want := "test value" + wantHeader := "application/json" + gotHeader := "" + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotHeader = r.Header.Get("Accept") + fmt.Fprint(w, want) + })) + defer ts.Close() + c := NewClient(nil, ts.URL) + hdr := http.Header{} + hdr["Accept"] = []string{"application/json"} + got, err := c.MetaWithHeader(context.Background(), nil, hdr) + if err != nil { + t.Fatalf("MetaWithHeader returned an error: %v, want %q", err, want) + } + if got != want { + t.Errorf("MetaWithHeader got %q, want %q", got, want) + } + if gotHeader != wantHeader { + t.Errorf("TestMetaWithHeader Header incorrect got %q, want %q", gotHeader, wantHeader) + } +} + func TestMetaField(t *testing.T) { want := "test value" ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { @@ -189,6 +237,31 @@ func TestMetaField(t *testing.T) { if got != want { t.Errorf("MetaField got %q, want %q", got, want) } + +} + +func TestMetaFieldWithHeader(t *testing.T) { + want := "test value" + wantHeader := "application/json" + gotHeader := "" + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotHeader = r.Header.Get("Accept") + fmt.Fprint(w, want) + })) + defer ts.Close() + c := NewClient(nil, ts.URL) + hdr := http.Header{} + hdr["Accept"] = []string{"application/json"} + got, err := c.MetaFieldWithHeader(context.Background(), nil, "", hdr) + if err != nil { + t.Errorf("MetaFieldWithHeader returned an error: %v, want %q", err, want) + } + if got != want { + t.Errorf("MetaFieldWithHeader got %q, want %q", got, want) + } + if gotHeader != wantHeader { + t.Errorf("TestMetaFieldWithHeader Header incorrect got %q, want %q", gotHeader, wantHeader) + } } func TestDetect(t *testing.T) {