Skip to content

Commit 1912eff

Browse files
Download api (#325)
changelog: - addition of download API endpoints - addition of default metadatasets, files, and submissions to the test db Co-authored-by: Leon Kuchenbecker <[email protected]>
1 parent 899d0a1 commit 1912eff

36 files changed

+702
-121
lines changed

.devcontainer/devcontainer.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@
3535
"alexcvzz.vscode-sqlite",
3636
"njpwerner.autodocstring",
3737
"arjun.swagger-viewer",
38-
"ms-azuretools.vscode-docker"
38+
"ms-azuretools.vscode-docker",
39+
"ms-toolsai.jupyter"
3940
]
4041

4142
// Use 'forwardPorts' to make a list of ports inside the container available locally.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
"""add download token
2+
3+
Revision ID: 4cf970aec869
4+
Revises: 68a28d6ed9cb
5+
Create Date: 2021-04-26 06:54:35.516091
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
from sqlalchemy.dialects import postgresql
11+
12+
# revision identifiers, used by Alembic.
13+
revision = '4cf970aec869'
14+
down_revision = '68a28d6ed9cb'
15+
branch_labels = None
16+
depends_on = None
17+
18+
def upgrade():
19+
# ### commands auto generated by Alembic - please adjust! ###
20+
op.create_table('downloadtokens',
21+
sa.Column('id', sa.Integer(), nullable=False),
22+
sa.Column('uuid', postgresql.UUID(as_uuid=True), nullable=False),
23+
sa.Column('file_id', sa.Integer(), nullable=False),
24+
sa.Column('value', sa.Text(), nullable=False),
25+
sa.Column('expires', sa.DateTime(), nullable=False),
26+
sa.ForeignKeyConstraint(['file_id'], ['files.id'], name=op.f('fk_downloadtokens_file_id_files')),
27+
sa.PrimaryKeyConstraint('id', name=op.f('pk_downloadtokens')),
28+
sa.UniqueConstraint('uuid', name=op.f('uq_downloadtokens_uuid')),
29+
sa.UniqueConstraint('value', name=op.f('uq_downloadtokens_value'))
30+
)
31+
# ### end Alembic commands ###
32+
33+
def downgrade():
34+
# ### commands auto generated by Alembic - please adjust! ###
35+
op.drop_table('downloadtokens')
36+
# ### end Alembic commands ###

datameta/api/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,11 @@ def includeme(config: Configurator) -> None:
6262
config.add_route("groups_id", base_url + "/groups/{id}")
6363
config.add_route("rpc_delete_files", base_url + "/rpc/delete-files")
6464
config.add_route("rpc_delete_metadatasets", base_url + "/rpc/delete-metadatasets")
65+
config.add_route("rpc_get_file_url", base_url + "/rpc/get-file-url/{id}")
6566

6667
# Endpoint outside of openapi
6768
config.add_route("upload", base_url + "/upload/{id}")
69+
config.add_route("download_by_token", base_url + "/download/{token}")
6870

6971

7072
@view_config(

datameta/api/apikeys.py

+2-7
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,8 @@ def get_expiration_date_from_str(expires_str:Optional[str]):
9090

9191

9292
def generate_api_key(request:Request, user:models.User, label:str, expires:Optional[datetime]=None):
93-
"""For Token Composition:
94-
Tokens consist of a core, which is stored as hash in the database,
95-
plus a prefix that contains the user and the label of that ApiKey.
96-
The user always provides the entire token, which is then split up
97-
into prefic and core component. The prefix is used to identify the
98-
ApiKey object in the database and the core component is matched
99-
against the hash for validating it.
93+
"""
94+
Generate API Token and store unsalted hash in db.
10095
"""
10196
token = security.generate_token()
10297
token_hash = security.hash_token(token)

datameta/api/download.py

+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
# Copyright 2021 Universität Tübingen, DKFZ and EMBL for the German Human Genome-Phenome Archive (GHGA)
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from pyramid.view import view_config
16+
from pyramid.httpexceptions import HTTPNotFound, HTTPOk, HTTPTemporaryRedirect
17+
from pyramid.request import Request
18+
from pyramid.response import FileResponse
19+
from datetime import datetime, timedelta
20+
import urllib.parse
21+
from .. import security, models, storage
22+
from . import base_url
23+
from .files import access_file_by_user
24+
from ..errors import get_validation_error
25+
from sqlalchemy import and_
26+
27+
28+
@view_config(
29+
route_name = "rpc_get_file_url",
30+
renderer = "json",
31+
request_method = "GET",
32+
openapi = True
33+
)
34+
def get_file_url(request) -> HTTPTemporaryRedirect:
35+
"""Redirects to a temporary, pre-sign HTTP-URL for downloading a file.
36+
"""
37+
file_id = request.openapi_validated.parameters.path['id']
38+
expires_after = request.openapi_validated.parameters.query['expires']
39+
auth_user = security.revalidate_user(request)
40+
41+
# get file from db:
42+
db_file = access_file_by_user(
43+
request,
44+
user = auth_user,
45+
file_id = file_id
46+
)
47+
48+
# retrieve URL:
49+
url = storage.get_download_url(
50+
request=request,
51+
db_file=db_file,
52+
expires_after=expires_after
53+
)
54+
55+
return HTTPTemporaryRedirect(url)
56+
57+
58+
@view_config(
59+
route_name = "download_by_token",
60+
renderer = "json",
61+
request_method = "GET"
62+
)
63+
def download_by_token(request) -> HTTPOk:
64+
"""Download a file using a file download token.
65+
66+
Usage: /download/{download_token}
67+
"""
68+
token = request.matchdict['token']
69+
hashed_token = security.hash_token(token)
70+
71+
# get download token from db
72+
db = request.dbsession
73+
db_token = db.query(models.DownloadToken).filter(
74+
and_(
75+
models.DownloadToken.value==hashed_token,
76+
models.DownloadToken.expires>datetime.now()
77+
)
78+
).one_or_none()
79+
80+
if db_token is None:
81+
raise HTTPNotFound()
82+
83+
# serve file:
84+
response = FileResponse(
85+
storage.get_local_storage_path(request, db_token.file.storage_uri),
86+
request=request,
87+
content_type='application/octet-stream'
88+
)
89+
response.content_disposition = f"attachment; filename=\"{db_token.file.name}\""
90+
91+
return response

datameta/api/files.py

+21-11
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,23 @@ def delete_staged_file_from_db(file_id, db, auth_user):
7373
return user_uuid, file_uuid, storage_uri
7474

7575

76+
def access_file_by_user(
77+
request:Request,
78+
user: models.User,
79+
file_id: str
80+
) -> models.File:
81+
db = request.dbsession
82+
db_file = resource.resource_by_id(db, models.File, file_id)
7683

84+
# Check if file could be found
85+
if db_file is None:
86+
raise HTTPNotFound(json=None)
87+
88+
# Check if requesting user has access to the file
89+
if not authz.view_file(user, db_file):
90+
raise HTTPForbidden(json=None)
91+
92+
return db_file
7793

7894
@view_config(
7995
route_name = "rpc_delete_files",
@@ -174,18 +190,12 @@ def get_file(request: Request) -> FileResponse:
174190
# Check authentication and raise 401 if unavailable
175191
auth_user = security.revalidate_user(request)
176192

177-
db = request.dbsession
178-
179193
# Obtain file from database
180-
db_file = resource.resource_by_id(db, models.File, request.matchdict['id'])
181-
182-
# Check if file could be found
183-
if db_file is None:
184-
raise HTTPNotFound(json=None)
185-
186-
# Check if requesting user has access to the file
187-
if not authz.view_file(auth_user, db_file):
188-
raise HTTPForbidden(json=None)
194+
db_file = access_file_by_user(
195+
request,
196+
user = auth_user,
197+
file_id = request.matchdict['id']
198+
)
189199

190200
# Return details
191201
return FileResponse(

datameta/api/openapi.yaml

+49-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
openapi: 3.0.0
1616
info:
1717
description: DataMeta
18-
version: 0.14.0
18+
version: 0.15.0
1919
title: DataMeta
2020

2121
servers:
@@ -190,6 +190,54 @@ paths:
190190
description: Internal Server Error
191191

192192

193+
/rpc/get-file-url/{id}:
194+
get:
195+
summary: "[Not RESTful]: Redirects to a temporary, pre-signed HTTP-URL for downloading a file."
196+
description: >-
197+
For the file with the given ID, this enpoint will redirect to a pre-signed HTTP URL for
198+
downloading the requested file. The pre-signed URL times out after a certain amount of
199+
time which can be configured with the "expires" query string.
200+
[Attention this endpoint is not RESTful, the result should not be cached.]
201+
tags:
202+
- Remote Procedure Calls
203+
operationId: GetFileUrl
204+
parameters:
205+
- name: id
206+
in: path
207+
description: ID of the file
208+
required: true
209+
schema:
210+
type: string
211+
- name: expires
212+
in: query
213+
description: Minutes until the pre-signed URL will expire, defaults to 1
214+
schema:
215+
type: integer
216+
default: 1
217+
responses:
218+
'307':
219+
description: Redirecting to the pre-signed URL of the file
220+
headers:
221+
location:
222+
description: Location to redirect to
223+
schema:
224+
type: string
225+
'400':
226+
description: Validation Error
227+
content:
228+
application/json:
229+
schema:
230+
$ref: "#/components/schemas/ErrorModel"
231+
'401':
232+
description: Unauthorized
233+
'403':
234+
description: Forbidden
235+
'404':
236+
description: The specified file does not exist.
237+
'500':
238+
description: Internal Server Error
239+
240+
193241
/users/{id}/keys:
194242
get:
195243
summary: All API keys for a user

datameta/models/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@
3030
MetaDataSet,
3131
ApplicationSettings,
3232
DateTimeMode,
33-
ApiKey
33+
ApiKey,
34+
DownloadToken
3435
) # flake8: noqa
3536

3637
# run configure_mappers after defining all of the models to ensure

datameta/models/db.py

+11
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,17 @@ class File(Base):
127127
# Relationships
128128
metadatumrecord = relationship('MetaDatumRecord', back_populates='file', uselist=False)
129129
user = relationship('User', back_populates='files')
130+
downloadtokens = relationship('DownloadToken', back_populates='file')
131+
132+
class DownloadToken(Base):
133+
__tablename__ = 'downloadtokens'
134+
id = Column(Integer, primary_key=True)
135+
uuid = Column(UUID(as_uuid=True), unique=True, default=uuid.uuid4, nullable=False)
136+
file_id = Column(Integer, ForeignKey('files.id'), nullable=False)
137+
value = Column(Text, nullable=False, unique=True)
138+
expires = Column(DateTime, nullable=False)
139+
# Relationships
140+
file = relationship('File', back_populates='downloadtokens')
130141

131142
class Submission(Base):
132143
__tablename__ = 'submissions'

datameta/storage.py

+45-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@
1616
import shutil
1717
import logging
1818
import hashlib
19-
from . import security
19+
from datetime import datetime, timedelta
20+
from pyramid.request import Request
21+
from typing import Optional
22+
from . import security, models
23+
from .api import base_url
2024

2125
log = logging.getLogger(__name__)
2226

@@ -141,3 +145,43 @@ def freeze(request, db_file):
141145
if db_file.storage_uri.startswith("s3://"):
142146
return _freeze_s3(request, db_file)
143147
raise NotImplementedError()
148+
149+
def _get_download_url_local(request:Request, db_file:models.File, expires_after:Optional[int]=None):
150+
if expires_after is None:
151+
expires_after = 1
152+
153+
token = security.generate_token()
154+
token_hash = security.hash_token(token)
155+
expires = datetime.now() + timedelta(minutes=float(expires_after))
156+
157+
db = request.dbsession
158+
download_token = models.DownloadToken(
159+
file_id = db_file.id,
160+
value = token_hash,
161+
expires = expires
162+
)
163+
db.add(download_token)
164+
165+
return f"{base_url}/download/{token}"
166+
167+
def _get_download_url_s3(request:Request, db_file:models.File, expires_after:Optional[int]=None):
168+
# TODO
169+
raise NotImplementedError()
170+
171+
def get_download_url(request:Request, db_file:models.File, expires_after:Optional[int]=None):
172+
"""Get a presigned URL to download a file
173+
174+
Args:
175+
request (Request): The calling HTTP request
176+
db_file (models.File): The database 'File' object
177+
expires_after (Optional[int]): Number of minutes after which the URL will expire
178+
"""
179+
if db_file.storage_uri is None:
180+
raise NoDataError() # No data has been uploaded yet
181+
if db_file.storage_uri.startswith("file://"):
182+
return _get_download_url_local(request, db_file, expires_after=expires_after)
183+
if db_file.storage_uri.startswith("s3://"):
184+
return _get_download_url_s3(request, db_file, expires_after=expires_after)
185+
raise NotImplementedError()
186+
187+

0 commit comments

Comments
 (0)