Skip to content

Commit

Permalink
mf
Browse files Browse the repository at this point in the history
  • Loading branch information
mam10eks committed Dec 1, 2024
1 parent 3ac877e commit 4a0a1df
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 43 deletions.
87 changes: 49 additions & 38 deletions application/src/tira_app/endpoints/v1/_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,63 +63,74 @@ def load_mirrored_resource(md5_sum):


def mirrors_for_dataset(dataset_id: str):
ret = {}
ret = {'truths': {}, 'inputs': {}}
for i in modeldb.DatasetHasMirroredResource.objects.filter(dataset__dataset_id=dataset_id):
resource_type = i.resource_type
i = load_mirrored_resource(i.mirrored_resource.md5_sum)
if not i or not i["mirrors"]:
if not i or not i["mirrors"] or not resource_type or resource_type not in ret:
continue

for k, v in i["mirrors"].items():
ret[k] = v
ret[resource_type][k] = v

return ret


def add_mirrored_resource(dataset_id: str, url: str, name: str):
for i in modeldb.DatasetHasMirroredResource.objects.filter(dataset__dataset_id=dataset_id):
i = load_mirrored_resource(i.mirrored_resource.md5_sum)
if not i:
raise ValueError("could not read existing resources")
if url in i["mirrors"].values():
print(f"Mirrored URL {url} already exists: {i}")
return
def add_mirrored_resource(dataset_id: str, url_inputs: str, url_truths: str, name: str):
urls = []
for url in [url_inputs, url_truths]:
found = False
for i in modeldb.DatasetHasMirroredResource.objects.filter(dataset__dataset_id=dataset_id):
i = load_mirrored_resource(i.mirrored_resource.md5_sum)
if not i:
raise ValueError("could not read existing resources")
if url_inputs in i["mirrors"].values():
print(f"Mirrored URL {url_inputs} already exists: {i}")
found = True
if not found:
urls += [url]

dataset = modeldb.Dataset.objects.filter(dataset_id=dataset_id).first()
response = requests.get(url)

if response.status_code != 200 or not response.ok:
raise ValueError(f"Failed to load {url}. Response code {response.status_code}.")
for url in urls:
resource_type = 'inputs' if url == url_inputs else 'truths'
response = requests.get(url)

if response.status_code != 200 or not response.ok:
raise ValueError(f"Failed to load {url}. Response code {response.status_code}.")

md5_sum = str(md5(response.content).hexdigest())
md5_first_kilobyte = str(md5(response.content[:1024]).hexdigest())
size = len(response.content)
md5_sum = str(md5(response.content).hexdigest())
md5_first_kilobyte = str(md5(response.content[:1024]).hexdigest())
size = len(response.content)

target_dir = Path(settings.TIRA_ROOT) / "data" / "mirrored-resources"
target_dir.mkdir(exist_ok=True, parents=True)
target_dir = target_dir / md5_sum
target_dir = Path(settings.TIRA_ROOT) / "data" / "mirrored-resources"
target_dir.mkdir(exist_ok=True, parents=True)
target_dir = target_dir / md5_sum

mirrors = {}
existing_resource = load_mirrored_resource(md5_sum)
mirrors = {}
existing_resource = load_mirrored_resource(md5_sum)

if not existing_resource:
with open(target_dir, "wb") as f:
f.write(response.content)
if not existing_resource:
with open(target_dir, "wb") as f:
f.write(response.content)

if existing_resource and "mirrors" in existing_resource and existing_resource["mirrors"]:
mirrors = existing_resource["mirrors"]
if existing_resource and "mirrors" in existing_resource and existing_resource["mirrors"]:
mirrors = existing_resource["mirrors"]

mirrors[name] = url
mirrors[name] = url

if not existing_resource:
modeldb.MirroredResource.objects.create(
md5_sum=md5_sum, md5_first_kilobyte=md5_first_kilobyte, size=size, mirrors=json.dumps(mirrors)
)
else:
modeldb.MirroredResource.objects.update(md5_sum=md5_sum, mirrors=json.dumps(mirrors))
if not existing_resource:
modeldb.MirroredResource.objects.create(
md5_sum=md5_sum, md5_first_kilobyte=md5_first_kilobyte, size=size, mirrors=json.dumps(mirrors)
)
else:
modeldb.MirroredResource.objects.update(md5_sum=md5_sum, mirrors=json.dumps(mirrors))

mirror = modeldb.MirroredResource.objects.filter(md5_sum=md5_sum).first()
if not modeldb.DatasetHasMirroredResource.objects.filter(dataset=dataset, mirrored_resource=mirror):
modeldb.DatasetHasMirroredResource.objects.create(dataset=dataset, mirrored_resource=mirror)
mirror = modeldb.MirroredResource.objects.filter(md5_sum=md5_sum).first()
if not modeldb.DatasetHasMirroredResource.objects.filter(dataset=dataset, mirrored_resource=mirror, resource_type=resource_type):
modeldb.DatasetHasMirroredResource.objects.create(dataset=dataset, mirrored_resource=mirror, resource_type=resource_type)

print(load_mirrored_resource(md5_sum))
print(load_mirrored_resource(md5_sum))


endpoints = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,19 @@ def handle(self, *args, **options):
if "dataset_id" not in options or not options["dataset_id"]:
raise ValueError("Please pass --dataset_id")

if "url" not in options or not options["url"]:
raise ValueError("Please pass --url")
if "url_inputs" not in options or not options["url_inputs"]:
raise ValueError("Please pass --url_inputs")

if "url_truths" not in options or not options["url_truths"]:
raise ValueError("Please pass --url_truths")

if "name" not in options or not options["name"]:
raise ValueError("Please pass --name")

add_mirrored_resource(options["dataset_id"], options["url"], options["name"])
add_mirrored_resource(options["dataset_id"], options["url_inputs"], options["url_truths"], options["name"])

def add_arguments(self, parser):
parser.add_argument("--dataset_id", default=None, type=str)
parser.add_argument("--url", default=None, type=str)
parser.add_argument("--url_inputs", default=None, type=str)
parser.add_argument("--url_truths", default=None, type=str)
parser.add_argument("--name", default=None, type=str)
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 5.0.9 on 2024-12-01 18:54

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("tira", "0004_mirroredresource_datasethasmirroredresource"),
]

operations = [
migrations.AddField(
model_name="datasethasmirroredresource",
name="resource_type",
field=models.CharField(default="inputs", max_length=15),
preserve_default=False,
),
]
1 change: 1 addition & 0 deletions application/src/tira_app/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ class MirroredResource(models.Model):
class DatasetHasMirroredResource(models.Model):
dataset = models.ForeignKey(Dataset, on_delete=models.CASCADE)
mirrored_resource = models.ForeignKey(MirroredResource, on_delete=models.CASCADE)
resource_type = models.CharField(max_length=15)


class Software(models.Model):
Expand Down
12 changes: 11 additions & 1 deletion frontend/src/Datasets.vue
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
<span v-if="!item.is_confidential">Public Training</span>
</template>
<template #item.mirrors="{ item }">
<p v-for="[k, v] of Object.entries(item.mirrors)">
<p v-for="[k, v] of Object.entries(mirrored_resources(item.mirrors))">
<a :href="v + ''" style="text-decoration: none !important;" target="_blank">{{k}}</a>
</p>
</template>
Expand Down Expand Up @@ -69,6 +69,16 @@
methods: {
chatnoir_url(dataset: DatasetInfo) { return chatNoirUrl(dataset)},
ir_datasets_url(dataset: DatasetInfo) { return irDatasetsUrl(dataset)},
mirrored_resources(mirrors: any) {
let ret : Record<string, string> = {}
for (let resource_type of Object.keys(mirrors)) {
for (let resource_name of Object.keys(mirrors[resource_type])) {
ret[resource_name + ' (' + resource_type + ')'] = mirrors[resource_type][resource_name]
}
}
return ret
}
},
beforeMount() {
this.query = this.$route.query.query as string|undefined
Expand Down

0 comments on commit 4a0a1df

Please sign in to comment.