33"""
44Python client for the Rosette API.
55
6- Copyright (c) 2014-2019 Basis Technology Corporation.
6+ Copyright (c) 2014-2022 Basis Technology Corporation.
77
88Licensed under the Apache License, Version 2.0 (the "License");
99you may not use this file except in compliance with the License.
2828import requests
2929import platform
3030
31- _BINDING_VERSION = '1.14.4'
31+ _APPLICATION_JSON = 'application/json'
32+ _BINDING_LANGUAGE = 'python'
33+ _BINDING_VERSION = '1.20.0'
34+ _CONCURRENCY_HEADER = 'x-rosetteapi-concurrency'
35+ _CUSTOM_HEADER_PREFIX = 'X-RosetteAPI-'
36+ _CUSTOM_HEADER_PATTERN = re .compile ('^' + _CUSTOM_HEADER_PREFIX )
3237_GZIP_BYTEARRAY = bytearray ([0x1F , 0x8b , 0x08 ])
3338
3439_ISPY3 = sys .version_info [0 ] == 3
3540
36-
3741if _ISPY3 :
3842 _GZIP_SIGNATURE = _GZIP_BYTEARRAY
3943else :
@@ -49,7 +53,6 @@ def __init__(self, js, code):
4953 self .status_code = code
5054
5155 def json (self ):
52- """ return json"""
5356 return self ._json
5457
5558
@@ -112,7 +115,7 @@ def serialize(self, options):
112115 values = {}
113116 for (key , val ) in self .__params .items ():
114117 if val is None :
115- pass
118+ continue
116119 else :
117120 values [key ] = val
118121
@@ -242,7 +245,7 @@ def validate(self):
242245 if self [option ] is None :
243246 raise RosetteException (
244247 "missingParameter" ,
245- "Required Name Translation parameter, " + option + ", not supplied" ,
248+ "Required Name Translation parameter is missing: " + option ,
246249 repr (option ))
247250
248251
@@ -268,7 +271,7 @@ def validate(self):
268271 if self [option ] is None :
269272 raise RosetteException (
270273 "missingParameter" ,
271- "Required Address Similarity parameter, " + option + ", not supplied" ,
274+ "Required Address Similarity parameter is missing: " + option ,
272275 repr (option ))
273276
274277
@@ -301,7 +304,7 @@ def validate(self):
301304 if self [option ] is None :
302305 raise RosetteException (
303306 "missingParameter" ,
304- "Required Name Similarity parameter, " + option + ", not supplied" ,
307+ "Required Name Similarity parameter is missing: " + option ,
305308 repr (option ))
306309
307310
@@ -321,7 +324,7 @@ def validate(self):
321324 if self ["names" ] is None : # required
322325 raise RosetteException (
323326 "missingParameter" ,
324- "Required Name De-Duplication parameter, names, not supplied " ,
327+ "Required Name De-Duplication parameter is missing: names " ,
325328 repr ("names" ))
326329
327330
@@ -372,31 +375,37 @@ def __finish_result(self, response, ename):
372375 raise RosetteException (code , complaint_url +
373376 " : failed to communicate with Rosette" , msg )
374377
375- def info (self ):
376- """Issues an "info" request to the L{EndpointCaller}'s specific endpoint.
377- @return: A dictionary telling server version and other
378- identifying data."""
379- url = self .service_url + self .api .endpoints ["INFO" ]
380- headers = {'Accept' : 'application/json' , 'X-RosetteAPI-Binding' : 'python' ,
381- 'X-RosetteAPI-Binding-Version' : _BINDING_VERSION }
378+ def __set_headers (self ):
379+ headers = {'Accept' : _APPLICATION_JSON ,
380+ _CUSTOM_HEADER_PREFIX + 'Binding' : _BINDING_LANGUAGE ,
381+ _CUSTOM_HEADER_PREFIX + 'Binding-Version' : _BINDING_VERSION }
382382
383383 custom_headers = self .api .get_custom_headers ()
384- pattern = re .compile ('^X-RosetteAPI-' )
385384 if custom_headers is not None :
386385 for key in custom_headers .keys ():
387- if pattern .match (key ) is not None :
386+ if _CUSTOM_HEADER_PATTERN .match (key ) is not None :
388387 headers [key ] = custom_headers [key ]
389388 else :
390389 raise RosetteException ("badHeader" ,
391- "Custom header name must begin with \" X-RosetteAPI- \" " ,
390+ "Custom header name must begin with \" " + _CUSTOM_HEADER_PREFIX + " \" " ,
392391 key )
393392 self .api .clear_custom_headers ()
394393
395394 if self .debug :
396- headers ['X-RosetteAPI- Devel' ] = 'true'
397- self . logger . info ( 'info: ' + url )
395+ headers [_CUSTOM_HEADER_PREFIX + ' Devel' ] = 'true'
396+
398397 if self .user_key is not None :
399- headers ["X-RosetteAPI-Key" ] = self .user_key
398+ headers [_CUSTOM_HEADER_PREFIX + "Key" ] = self .user_key
399+
400+ return headers
401+
402+ def info (self ):
403+ """Issues an "info" request to the L{EndpointCaller}'s specific endpoint.
404+ @return: A dictionary telling server version and other
405+ identifying data."""
406+ url = self .service_url + self .api .endpoints ["INFO" ]
407+ headers = self .__set_headers ()
408+ self .logger .info ('info: ' + url )
400409 response = self .api .get_http (url , headers = headers )
401410 return self .__finish_result (response , "info" )
402411
@@ -407,26 +416,8 @@ def ping(self):
407416 signalled."""
408417
409418 url = self .service_url + self .api .endpoints ['PING' ]
410- headers = {'Accept' : 'application/json' , 'X-RosetteAPI-Binding' : 'python' ,
411- 'X-RosetteAPI-Binding-Version' : _BINDING_VERSION }
412-
413- custom_headers = self .api .get_custom_headers ()
414- pattern = re .compile ('^X-RosetteAPI-' )
415- if custom_headers is not None :
416- for key in custom_headers .keys ():
417- if pattern .match (key ) is not None :
418- headers [key ] = custom_headers [key ]
419- else :
420- raise RosetteException ("badHeader" ,
421- "Custom header name must begin with \" X-RosetteAPI-\" " ,
422- key )
423- self .api .clear_custom_headers ()
424-
425- if self .debug :
426- headers ['X-RosetteAPI-Devel' ] = 'true'
419+ headers = self .__set_headers ()
427420 self .logger .info ('Ping: ' + url )
428- if self .user_key is not None :
429- headers ["X-RosetteAPI-Key" ] = self .user_key
430421 response = self .api .get_http (url , headers = headers )
431422 return self .__finish_result (response , "ping" )
432423
@@ -454,9 +445,9 @@ def call(self, parameters):
454445
455446 if not isinstance (parameters , _DocumentParamSetBase ):
456447 if self .suburl != self .api .endpoints ['NAME_SIMILARITY' ] \
457- and self .suburl != self .api .self .api .endpoints ['NAME_TRANSLATION' ] \
458- and self .suburl != self .api .self .api .endpoints ['NAME_DEDUPLICATION' ] \
459- and self .suburl != self .api .self .api .endpoints ['ADDRESS_SIMILARITY' ]:
448+ and self .suburl != self .api .self .api .endpoints ['NAME_TRANSLATION' ] \
449+ and self .suburl != self .api .self .api .endpoints ['NAME_DEDUPLICATION' ] \
450+ and self .suburl != self .api .self .api .endpoints ['ADDRESS_SIMILARITY' ]:
460451 text = parameters
461452 parameters = DocumentParameters ()
462453 parameters ['content' ] = text
@@ -471,22 +462,7 @@ def call(self, parameters):
471462 params_to_serialize = parameters .serialize (self .api .options )
472463 headers = {}
473464 if self .user_key is not None :
474- custom_headers = self .api .get_custom_headers ()
475- pattern = re .compile ('^X-RosetteAPI-' )
476- if custom_headers is not None :
477- for key in custom_headers .keys ():
478- if pattern .match (key ) is not None :
479- headers [key ] = custom_headers [key ]
480- else :
481- raise RosetteException ("badHeader" ,
482- "Custom header name must "
483- "begin with \" X-RosetteAPI-\" " ,
484- key )
485- self .api .clear_custom_headers ()
486-
487- headers ["X-RosetteAPI-Key" ] = self .user_key
488- headers ["X-RosetteAPI-Binding" ] = "python"
489- headers ["X-RosetteAPI-Binding-Version" ] = _BINDING_VERSION
465+ headers = self .__set_headers ()
490466
491467 if self .use_multipart :
492468 payload = None
@@ -496,7 +472,7 @@ def call(self, parameters):
496472 params = dict (
497473 (key ,
498474 value ) for key ,
499- value in params_to_serialize .items () if key == 'language' )
475+ value in params_to_serialize .items () if key == 'language' )
500476 files = {
501477 'content' : (
502478 os .path .basename (
@@ -506,7 +482,7 @@ def call(self, parameters):
506482 'request' : (
507483 'request_options' ,
508484 json .dumps (params ),
509- 'application/json' )}
485+ _APPLICATION_JSON )}
510486 request = requests .Request (
511487 'POST' , url , files = files , headers = headers , params = payload )
512488 prepared_request = self .api .session .prepare_request (request )
@@ -519,11 +495,11 @@ def call(self, parameters):
519495 _my_loads (rdata , response_headers ), status )
520496 else :
521497 if self .debug :
522- headers ['X-RosetteAPI- Devel' ] = True
498+ headers [_CUSTOM_HEADER_PREFIX + ' Devel' ] = True
523499 self .logger .info ('operate: ' + url )
524- headers ['Accept' ] = "application/json"
500+ headers ['Accept' ] = _APPLICATION_JSON
525501 headers ['Accept-Encoding' ] = "gzip"
526- headers ['Content-Type' ] = "application/json"
502+ headers ['Content-Type' ] = _APPLICATION_JSON
527503 response = self .api .post_http (url , params_to_serialize , headers )
528504 return self .__finish_result (response , "operate" )
529505
@@ -613,13 +589,21 @@ def get_user_agent_string(self):
613589 """ Return the User-Agent string """
614590 return self .user_agent_string
615591
616- def _set_pool_size (self ):
592+ def set_pool_size (self , new_pool_size ):
593+ """Sets the connection pool size.
594+ @parameter new_pool_size: pool size to set
595+ """
596+ self .max_pool_size = new_pool_size
617597 adapter = requests .adapters .HTTPAdapter (
618- pool_maxsize = self . max_pool_size )
598+ pool_maxsize = new_pool_size )
619599 if 'https:' in self .service_url :
620600 self .session .mount ('https://' , adapter )
621601 else :
622- self .session .mount ('http://' , adapter )
602+ self .session .mount ('http://' , adapter ) # NOSONAR
603+
604+ def __adjust_concurrency (self , dict_headers ):
605+ if _CONCURRENCY_HEADER in dict_headers and dict_headers [_CONCURRENCY_HEADER ] != self .max_pool_size :
606+ self .set_pool_size (dict_headers [_CONCURRENCY_HEADER ])
623607
624608 def _make_request (self , operation , url , data , headers ):
625609 """
@@ -650,11 +634,8 @@ def _make_request(self, operation, url, data, headers):
650634 status = response .status_code
651635 rdata = response .content
652636 dict_headers = dict (response .headers )
637+ self .__adjust_concurrency (dict_headers )
653638 response_headers = {"responseHeaders" : dict_headers }
654- if 'x-rosetteapi-concurrency' in dict_headers :
655- if dict_headers ['x-rosetteapi-concurrency' ] != self .max_pool_size :
656- self .max_pool_size = dict_headers ['x-rosetteapi-concurrency' ]
657- self ._set_pool_size ()
658639
659640 if status == 200 :
660641 return rdata , status , response_headers
@@ -670,9 +651,11 @@ def _make_request(self, operation, url, data, headers):
670651 if not message :
671652 message = rdata
672653 raise RosetteException (code , message , url )
673-
674- except :
675- raise
654+ except json .JSONDecodeError as exception :
655+ raise RosetteException (
656+ exception ,
657+ "Problem decoding JSON" ,
658+ rdata )
676659 except requests .exceptions .RequestException as exception :
677660 raise RosetteException (
678661 exception ,
@@ -964,12 +947,12 @@ def name_deduplication(self, parameters):
964947 return EndpointCaller (self , self .endpoints ['NAME_DEDUPLICATION' ]).call (parameters )
965948
966949 def text_embedding (self , parameters ):
967- """
950+ """ deprecated
968951 Create an L{EndpointCaller} to identify text vectors found in the texts
969952 to which it is applied and call it.
970953 @type parameters: L{DocumentParameters} or L{str}
971954 @return: A python dictionary containing the results of text embedding."""
972- return EndpointCaller ( self , self . endpoints [ 'TEXT_EMBEDDING' ]). call (parameters )
955+ return self . semantic_vectors (parameters )
973956
974957 def semantic_vectors (self , parameters ):
975958 """
0 commit comments