-
Notifications
You must be signed in to change notification settings - Fork 258
/
pattern_matcher.py
43 lines (31 loc) · 995 Bytes
/
pattern_matcher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import sys
import videogrep
import spacy
from spacy.matcher import Matcher
"""
Uses rule-based matching from spacy to make supercuts:
https://spacy.io/usage/rule-based-matching
Requires spacy. To install:
pip3 install spacy
python -m spacy download en_core_web_sm
"""
# the videos we are working with
videos = sys.argv[1:]
# load spacy and the pattern matcher
nlp = spacy.load("en_core_web_sm")
# grabs all instances of adjectives followed by nouns
patterns = [[{"POS": "ADJ"}, {"POS": "NOUN"}]]
matcher = Matcher(nlp.vocab)
matcher.add("Patterns", patterns)
searches = []
for video in videos:
transcript = videogrep.parse_transcript(video)
for sentence in transcript:
doc = nlp(sentence["content"])
matches = matcher(doc)
for match_id, start, end in matches:
span = doc[start:end] # The matched span
searches.append(span.text)
videogrep.videogrep(
videos, searches, search_type="fragment", output="pattern_matcher.mp4"
)