Skip to content

Latest commit

 

History

History
35 lines (28 loc) · 925 Bytes

README.md

File metadata and controls

35 lines (28 loc) · 925 Bytes

Useful functions when working with spaCy.

def get_entity_spans(doc):
    '''
    Returns the combined tokens that refer to an entity. Assumes doc has NER info.
    '''

    spans = list(doc.ents)
    for span in spans:
        span.merge()

    return spans
def get_n_gram(doc, token, n):
    """
    Returns an n-gram with the token as centre. Start at 0 if the token doesn't have as many left neighbours as n.
    """
    
    start = 0 if token.i < n else token.i - n
    end = token.i + n + 1

    return doc[start:end]
def is_head_of_np(token):
    """
    Returns true if the token's dependency label is that of a typical head of noun phrase (based on https://stackoverflow.com/questions/33289820/noun-phrases-with-spacy).
    """
    np_head_deps = ['nsubj', 'nsubjpass', 'dobj', 'iobj', 'pobj']
    return True if token.dep_ in np_head_deps else False