-
Notifications
You must be signed in to change notification settings - Fork 73
/
transcoder.py
executable file
·512 lines (442 loc) · 16.6 KB
/
transcoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
#!/usr/bin/env python
"""
=========================
Boto 3 Sample Application
=========================
This application implements a drop video transcoder that lets
you easily convert media files by dragging and dropping them
on your computer. See the README for more details.
https://github.com/boto3/boto3-sample
"""
from __future__ import print_function
import glob
import json
import os
import boto3
from botocore.client import ClientError
class AutoTranscodeError(Exception):
pass
class AutoTranscode(object):
"""
This is the main transcoder class, which exposes a ``run`` method
to put everything into motion. It watches for new files, uploads
them to S3, converts them using Elastic Transcoder and downloads
the output from S3 back to your computer.
"""
# The following policies are for the IAM role.
basic_role_policy = {
'Statement': [
{
'Principal': {
'Service': ['elastictranscoder.amazonaws.com']
},
'Effect': 'Allow',
'Action': ['sts:AssumeRole']
},
]
}
more_permissions_policy = {
'Statement': [
{
'Effect':'Allow',
'Action': [
's3:ListBucket',
's3:Put*',
's3:Get*',
's3:*MultipartUpload*'
],
'Resource': '*'
},
{
'Effect': 'Allow',
'Action': [
'sns:*',
],
'Resource': '*',
},
{
'Effect': 'Allow',
'Action': [
'sqs:*',
],
'Resource': '*',
},
{
'Effect': 'Deny',
'Action': [
's3:*Policy*',
'sns:*Permission*',
'sns:*Delete*',
'sqs:*Delete*',
's3:*Delete*',
'sns:*Remove*'
],
'Resource':'*'
},
]
}
# The SQS queue needs a policy to allow the SNS topic to post to it.
queue_policy_statement = {
"Sid": "auto-transcode",
"Effect": "Allow",
"Principal": {
"AWS": "*"
},
"Action": "SQS:SendMessage",
"Resource": "<SQS QUEUE ARN>",
"Condition": {
"StringLike": {
"aws:SourceArn": "<SNS TOPIC ARN>"
}
}
}
# This is the default configuration which must be edited.
empty_config_data = {
'unconverted_directory': "<PLEASE PROVIDE A LOCAL DIRECTORY FOR INPUT FILES>",
'converted_directory': "<PLEASE PROVIDE A LOCAL DIRECTORY FOR OUTPUT FILES>",
'in_bucket_name': "<PLEASE PROVIDE AN INPUT BUCKET NAME>",
'out_bucket_name': "<PLEASE PROVIDE AN OUTPUT BUCKET NAME>",
'role_name': 'autotranscode-user',
'topic_name': 'autotranscode-complete',
'queue_name': 'autotranscode',
'pipeline_name': 'autotranscode-pipe',
'poll_interval': 10,
'region_name': 'us-west-2',
'file_pattern': '*.mov'
}
def __init__(self, unconverted_directory, converted_directory,
in_bucket_name, out_bucket_name,
role_name='autotranscode-user',
topic_name='autotranscode-complete',
queue_name='autotranscode',
pipeline_name='autotranscode-pipe', poll_interval=10,
region_name='us-west-2',
file_pattern='*.mov'):
super(AutoTranscode, self).__init__()
# Local (filesystem) related.
self.unconverted_directory = unconverted_directory
self.converted_directory = converted_directory
self.existing_files = set()
self.file_pattern = file_pattern
# AWS related.
self.in_bucket_name = in_bucket_name
self.out_bucket_name = out_bucket_name
self.role_name = role_name
self.topic_name = topic_name
self.queue_name = queue_name
self.pipeline_name = pipeline_name
self.region_name = region_name
self.role_arn = None
self.topic_arn = None
self.queue_arn = None
self.pipeline_id = None
self.in_bucket = None
self.out_bucket = None
self.role = None
self.queue = None
# How often should we look at the local FS for updates?
self.poll_interval = int(poll_interval)
self.s3 = boto3.resource('s3')
self.iam = boto3.resource('iam')
self.sns = boto3.resource('sns', self.region_name)
self.sqs = boto3.resource('sqs', self.region_name)
self.transcoder = boto3.client('elastictranscoder', self.region_name)
@classmethod
def load_from_config(cls, config_filepath):
"""
Load a new transcoder from a JSON config file.
"""
with open(config_filepath, 'r') as config:
config_data = json.load(config)
return AutoTranscode(**config_data)
@classmethod
def create_empty_config(cls, config_filepath):
"""
Create a default JSON config file. After this method is run,
you must edit the file before this application can work.
"""
with open(config_filepath, 'w') as config:
json.dump(cls.empty_config_data, config, indent=4)
def ensure_local_setup(self):
"""
Ensures that the local directory setup is sane by making sure
that the directories exist and aren't the same.
"""
if self.unconverted_directory == self.converted_directory:
raise AutoTranscodeError(
"The unconverted & converted directories can not be the same."
)
if not os.path.exists(self.unconverted_directory):
os.makedirs(self.unconverted_directory)
else:
# If it's there, it may already have files in it, which may have
# already been processed. Keep these filenames & only process
# new ones.
self.existing_files = set(self.collect_files())
if not os.path.exists(self.converted_directory):
os.makedirs(self.converted_directory)
def ensure_aws_setup(self):
"""
Ensures that the AWS services, resources, and policies are set
up so that they can all talk to one another and so that we
can transcode media files.
"""
if self.bucket_exists(self.in_bucket_name):
self.in_bucket = self.s3.Bucket(self.in_bucket_name)
else:
self.in_bucket = self.s3.create_bucket(
Bucket=self.in_bucket_name)
if self.bucket_exists(self.out_bucket_name):
self.out_bucket = self.s3.Bucket(self.out_bucket_name)
else:
self.out_bucket = self.s3.create_bucket(
Bucket=self.out_bucket_name)
if self.iam_role_exists():
self.role = self.iam.Role(self.role_name)
else:
self.role = self.setup_iam_role()
self.topic_arn = self.get_sns_topic()
self.queue = self.get_sqs_queue()
self.pipeline_id = self.get_pipeline()
def collect_files(self):
"""
Get a list of all relevant files (based on the file extension)
in the local unconverted media file directory.
"""
path = os.path.join(self.unconverted_directory, self.file_pattern)
return glob.glob(path)
def check_unconverted(self):
"""
Get a list of files that are present in the unconverted directory
but not present in the converted directory. These are the files
that should be uploaded and transcoded.
If no files are present then an empty set is returned.
"""
current_files = set(self.collect_files())
if not current_files:
return set()
# Check the new set against the old, returning only new files not found
# in the old set.
return current_files.difference(self.existing_files)
def start_converting(self, files_found):
"""
Upload and convert each file. Uploads are processed in series
while transcoding happens in parallel.
"""
for filepath in files_found:
filename = self.upload_to_s3(filepath)
self.start_transcode(filename)
self.existing_files.add(filepath)
def process_completed(self):
"""
Check the queue and download any completed files from S3 to your
hard drive.
"""
to_fetch = self.check_queue()
for s3_file in to_fetch:
self.download_from_s3(s3_file)
# The boto-specific methods.
def bucket_exists(self, bucket_name):
"""
Returns ``True`` if a bucket exists and you have access to
call ``HeadBucket`` on it, otherwise ``False``.
"""
try:
self.s3.meta.client.head_bucket(Bucket=bucket_name)
return True
except ClientError:
return False
def iam_role_exists(self):
"""
Returns ``True`` if an IAM role exists.
"""
try:
self.iam.meta.client.get_role(
RoleName=self.role_name)
return True
except ClientError:
return None
def setup_iam_role(self):
"""
Set up a new IAM role and set its policy to allow Elastic
Transcoder access to S3 and SNS. Returns the role.
"""
role = self.iam.create_role(
RoleName=self.role_name,
AssumeRolePolicyDocument=json.dumps(self.basic_role_policy))
self.iam.RolePolicy(self.role_name, 'more-permissions').put(
PolicyDocument=json.dumps(self.more_permissions_policy))
return role
def get_sns_topic(self):
"""
Get or create the SNS topic.
"""
# Creating a topic is idempotent, so if it already exists
# then we will just get the topic returned.
return self.sns.create_topic(Name=self.topic_name).arn
def get_sqs_queue(self):
"""
Get or create the SQS queue. If it is created, then it is
also subscribed to the SNS topic, and a policy is set to allow
the SNS topic to send messages to the queue.
"""
# Creating a queue is idempotent, so if it already exists
# then we will just get the queue returned.
queue = self.sqs.create_queue(QueueName=self.queue_name)
self.queue_arn = queue.attributes['QueueArn']
# Ensure that we are subscribed to the SNS topic
subscribed = False
topic = self.sns.Topic(self.topic_arn)
for subscription in topic.subscriptions.all():
if subscription.attributes['Endpoint'] == self.queue_arn:
subscribed = True
break
if not subscribed:
topic.subscribe(Protocol='sqs', Endpoint=self.queue_arn)
# Set up a policy to allow SNS access to the queue
if 'Policy' in queue.attributes:
policy = json.loads(queue.attributes['Policy'])
else:
policy = {'Version': '2008-10-17'}
if 'Statement' not in policy:
statement = self.queue_policy_statement
statement['Resource'] = self.queue_arn
statement['Condition']['StringLike']['aws:SourceArn'] = \
self.topic_arn
policy['Statement'] = [statement]
queue.set_attributes(Attributes={
'Policy': json.dumps(policy)
})
return queue
def get_pipeline(self):
"""
Get or create a pipeline. When creating, it is configured
with the previously set up S3 buckets, SNS topic, and IAM
role. Returns its ID.
"""
paginator = self.transcoder.get_paginator('list_pipelines')
for page in paginator.paginate():
for pipeline in page['Pipelines']:
if pipeline['Name'] == self.pipeline_name:
return pipeline['Id']
response = self.transcoder.create_pipeline(
Name=self.pipeline_name,
InputBucket=self.in_bucket_name,
OutputBucket=self.out_bucket_name,
Role=self.role.arn,
Notifications={
'Progressing': '',
'Completed': self.topic_arn,
'Warning': '',
'Error': ''
})
return response['Pipeline']['Id']
def upload_to_s3(self, filepath):
"""
Upload a file to the S3 input file bucket.
"""
filename = os.path.basename(filepath)
with open(filepath, 'rb') as data:
self.in_bucket.Object(filename).put(Body=data)
print("Uploaded raw video {0}".format(filename))
return filename
def start_transcode(self, filename):
"""
Submit a job to transcode a file by its filename. The
built-in web system preset is used for the single output.
"""
self.transcoder.create_job(
PipelineId=self.pipeline_id,
Input={
'Key': filename,
'FrameRate': 'auto',
'Resolution': 'auto',
'AspectRatio': 'auto',
'Interlaced': 'auto',
'Container': 'auto'
},
Outputs=[{
'Key': '.'.join(filename.split('.')[:-1]) + '.mp4',
'PresetId': '1351620000001-100070'
}]
)
print("Started transcoding {0}".format(filename))
def check_queue(self):
"""
Check the queue for completed files and set them to be
downloaded.
"""
queue = self.queue
to_fetch = []
for msg in queue.receive_messages(WaitTimeSeconds=self.poll_interval):
body = json.loads(msg.body)
message = body.get('Message', '{}')
outputs = json.loads(message).get('outputs', [])
if not len(outputs):
print("Saw no output in {0}".format(body))
continue
key = outputs[0].get('key')
if not key:
print("Saw no key in outputs in {0}".format(body))
continue
to_fetch.append(key)
print("Completed {0}".format(key))
msg.delete()
return to_fetch
def download_from_s3(self, s3_file):
"""
Download a file from the S3 output bucket to your hard drive.
"""
destination_path = os.path.join(
self.converted_directory,
os.path.basename(s3_file)
)
body = self.out_bucket.Object(s3_file).get()['Body']
with open(destination_path, 'wb') as dest:
# Here we write the file in chunks to prevent
# loading everything into memory at once.
for chunk in iter(lambda: body.read(4096), b''):
dest.write(chunk)
print("Downloaded {0}".format(destination_path))
# End boto-specific methods.
def run(self):
"""
Start the main loop. This repeatedly checks for new files,
uploads them and starts jobs if needed, and checks for and
downloads completed files. It sleeps for ``poll_interval``
seconds between checks.
"""
# Make sure everything we need is setup, both locally & on AWS.
self.ensure_local_setup()
self.ensure_aws_setup()
# Run forever, or until the user says stop.
while True:
print("Checking for new files.")
files_found = self.check_unconverted()
if files_found:
print("Found {0} new file(s).".format(len(files_found)))
self.start_converting(files_found)
# Here we check the queue, which will long-poll
# for up to ``self.poll_interval`` seconds.
self.process_completed()
if __name__ == '__main__':
import sys
config_filepath = os.path.abspath(
os.path.expanduser(
'~/.autotranscode.json'
)
)
# Check if the config file exists.
# If not, create an empty one & prompt the user to edit it.
if not os.path.exists(config_filepath):
AutoTranscode.create_empty_config(config_filepath)
print("Created an empty config file at %s." % config_filepath)
print("Please modify it & re-run this command.")
sys.exit(1)
# If so, load from it & run.
auto = AutoTranscode.load_from_config(config_filepath)
try:
auto.run()
except KeyboardInterrupt:
# We're done. Bail out without dumping a traceback.
sys.exit(0)