Skip to content

Commit

Permalink
Implement support for migrating RDF serializations other than turtle (#…
Browse files Browse the repository at this point in the history
…61)

* Look for rdf files with extensions that match the configured src rdf lang

* Write rdf in same format as source since filename extensions are based on that

* Test ntriples in F4->F5 and F5->F6 migrations

* Exit with error message if source-rdf lang cannot be resolved
  • Loading branch information
cjcolvar authored Feb 13, 2025
1 parent 9242276 commit 8082e5a
Show file tree
Hide file tree
Showing 211 changed files with 1,280 additions and 15 deletions.
7 changes: 7 additions & 0 deletions src/main/java/org/fcrepo/upgrade/utils/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,13 @@ public Lang getSrcRdfLang() {
return srcRdfLang;
}

/**
* @return the extension of the rdf lang of the export
*/
public String getSrcRdfExt() {
return srcRdfLang.getFileExtensions().get(0);
}

/**
* Sets the rdf lang of the export
* @param srcRdfLang the rdf lang of the export
Expand Down
25 changes: 12 additions & 13 deletions src/main/java/org/fcrepo/upgrade/utils/F47ToF5UpgradeManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ class F47ToF5UpgradeManager extends UpgradeManagerBase implements UpgradeManager
private static final String FCR_VERSIONS_PATH_SEGMENT = "fcr%3Aversions";
private static final String FCR_ACL_PATH_SEGMENT = "fcr%3Aacl";
private static final String TYPE_RELATION = "type";
private static final String TURTLE_EXTENSION = ".ttl";
private static final String HEADERS_SUFFIX = ".headers";
public static final String APPLICATION_OCTET_STREAM_MIMETYPE = "application/octet-stream";
/**
Expand Down Expand Up @@ -120,7 +119,7 @@ private void processDirectory(final File dir) {
private void processFile(final Path path) {

//skip versions container
if (path.endsWith(FCR_VERSIONS_PATH_SEGMENT + TURTLE_EXTENSION)) {
if (path.endsWith(FCR_VERSIONS_PATH_SEGMENT + "." + config.getSrcRdfExt())) {
LOGGER.debug("version containers are not required for import. Skipping {}...", path);
return;
}
Expand All @@ -145,7 +144,7 @@ private void processFile(final Path path) {
Files.createDirectories(newLocation.getParent());
LOGGER.debug("copy file {} to {}", path, newLocation);
FileUtils.copyFile(path.toFile(), newLocation.toFile());
if (newLocation.toString().endsWith(TURTLE_EXTENSION)) {
if (newLocation.toString().endsWith(config.getSrcRdfExt())) {
upgradeRdfAndCreateHeaders(versionTimestamp, newLocation);
}
LOGGER.info("Resource upgraded: {}", path);
Expand All @@ -161,7 +160,7 @@ private void upgradeRdfAndCreateHeaders(final TemporalAccessor versionTimestamp,
//parse the file
final Model model = ModelFactory.createDefaultModel();
try (final InputStream is = new BufferedInputStream(new FileInputStream(newLocation.toFile()))) {
RDFDataMgr.read(model, is, Lang.TTL);
RDFDataMgr.read(model, is, config.getSrcRdfLang());
}

final Map<String, List<String>> metadataHeaders = new HashMap<>();
Expand Down Expand Up @@ -284,7 +283,7 @@ private void upgradeRdfAndCreateHeaders(final TemporalAccessor versionTimestamp,
// rewrite only if the model has changed.
if (rewriteModel.get()) {
try {
RDFDataMgr.write(new BufferedOutputStream(new FileOutputStream(newLocation.toFile())), model, Lang.TTL);
RDFDataMgr.write(new BufferedOutputStream(new FileOutputStream(newLocation.toFile())), model, config.getSrcRdfLang());
} catch (IOException e) {
throw new RuntimeException(e);
}
Expand Down Expand Up @@ -321,7 +320,7 @@ private void convertAcl(final Path convertedProtectedResourceLocation, String pr
//locate the exported acl rdf on disk based on aclURI
final var relativeAclPath = create(aclUri).getPath();
final var aclDirectory = Path.of(this.config.getInputDir().toPath().toString(), relativeAclPath);
final var aclRdfFilePath = aclDirectory + TURTLE_EXTENSION;
final var aclRdfFilePath = aclDirectory + "." + config.getSrcRdfExt();
final var newAclResource = ResourceFactory.createResource(protectedResource + "/fcr:acl");
final var aclModel = createModelFromFile(Path.of(aclRdfFilePath));
final var aclTriples = new ArrayList<Statement>();
Expand All @@ -338,7 +337,7 @@ private void convertAcl(final Path convertedProtectedResourceLocation, String pr

final var newAclFilePath = Path
.of(FilenameUtils.removeExtension(convertedProtectedResourceLocation.toString()),
FCR_ACL_PATH_SEGMENT + TURTLE_EXTENSION);
FCR_ACL_PATH_SEGMENT + "." + config.getSrcRdfExt());
newAclFilePath.getParent().toFile().mkdirs();

//determine the location of new acl
Expand Down Expand Up @@ -385,7 +384,7 @@ private void convertAcl(final Path convertedProtectedResourceLocation, String pr

//save to new acl to file
try (final OutputStream os = new BufferedOutputStream(new FileOutputStream(newAclFilePath.toFile()))) {
RDFDataMgr.write(os, newModel, Lang.TTL);
RDFDataMgr.write(os, newModel, config.getSrcRdfLang());
} catch (IOException e) {
throw new RuntimeException(e);
}
Expand Down Expand Up @@ -422,7 +421,7 @@ private Path resolveVersionsContainer(final Path path) {
while (currentPath != path.getRoot()) {
final var parent = currentPath.getParent();
if (parent.endsWith(FCR_VERSIONS_PATH_SEGMENT)) {
return Path.of(parent.toString() + TURTLE_EXTENSION);
return Path.of(parent.toString() + "." + config.getSrcRdfExt());
}

currentPath = parent;
Expand All @@ -432,10 +431,10 @@ private Path resolveVersionsContainer(final Path path) {

private TemporalAccessor resolveMementoTimestamp(final Path path) {
var metadataPath = path;
if (!path.toString().endsWith(TURTLE_EXTENSION)) {
if (!path.toString().endsWith(config.getSrcRdfExt())) {
final var metadataPathStr = metadataPath.toString();
final var newMetadataPathStr = FilenameUtils.removeExtension(metadataPathStr) + File.separator +
FCR_METADATA_PATH_SEGMENT + TURTLE_EXTENSION;
FCR_METADATA_PATH_SEGMENT + "." + config.getSrcRdfExt();
metadataPath = Path.of(newMetadataPathStr);
}

Expand All @@ -456,7 +455,7 @@ private TemporalAccessor resolveMementoTimestamp(final Path path) {
private Model createModelFromFile(final Path path) {
final Model model = ModelFactory.createDefaultModel();
try (final InputStream is = new BufferedInputStream(new FileInputStream(path.toFile()))) {
RDFDataMgr.read(model, is, Lang.TTL);
RDFDataMgr.read(model, is, config.getSrcRdfLang());
} catch (IOException ex) {
throw new RuntimeException(ex);
}
Expand All @@ -466,7 +465,7 @@ private Model createModelFromFile(final Path path) {
private Path resolveNewVersionedResourceLocation(final Path path, final TemporalAccessor mementoTimestamp) {
final var mementoId = MEMENTO_FORMATTER.format(mementoTimestamp);
//create a new location compatible with an F5 export.
final var isDescription = path.endsWith(FCR_METADATA_PATH_SEGMENT + TURTLE_EXTENSION);
final var isDescription = path.endsWith(FCR_METADATA_PATH_SEGMENT + "." + config.getSrcRdfExt());
final var inputPath = this.config.getInputDir().toPath();
final var relativePath = inputPath.relativize(path);
final var relativePathStr = relativePath.toString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFLanguages;

import java.io.File;
Expand Down Expand Up @@ -140,7 +141,12 @@ private Config parseOptions(final Options configOptions, final String[] args) {
config.setOutputDir(outputDir);

if (cmd.hasOption("source-rdf")) {
config.setSrcRdfLang(RDFLanguages.contentTypeToLang(cmd.getOptionValue("source-rdf")));
final Lang lang = RDFLanguages.contentTypeToLang(cmd.getOptionValue("source-rdf"));
if (lang == null) {
printHelpAndExit(format("invalid RDF content-type (%s) provided", cmd.getOptionValue("source-rdf")),
configOptions);
}
config.setSrcRdfLang(lang);
}

if (cmd.hasOption("threads")) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ public ResourceMigrator(final Config config,

this.baseUri = stripTrailingSlash(config.getBaseUri());
this.srcRdfLang = config.getSrcRdfLang();
this.srcRdfExt = "." + srcRdfLang.getFileExtensions().get(0);
this.srcRdfExt = "." + config.getSrcRdfExt();

// Currently, this is all F6 supports
this.dstRdfLang = Lang.NT;
Expand Down
114 changes: 114 additions & 0 deletions src/test/java/org/fcrepo/upgrade/utils/F47ToF5UpgradeManagerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,120 @@ public void testUpgrade() throws Exception {
lastModifiedStatement.getSubject().getURI());
}

@Test
public void testUpgradeWithNTriples() throws Exception {
//prepare
final File tmpDir = tempFolder.newFolder();
final File input = new File(TARGET_DIR + "/test-classes/4.7.5-export-ntriples");
final File output = new File(tmpDir, "output");
output.mkdir();

final var config = new Config();
config.setSourceVersion(FedoraVersion.V_4_7_5);
config.setTargetVersion(FedoraVersion.V_5);
config.setInputDir(input);
config.setOutputDir(output);
config.setSrcRdfLang(Lang.NT);
//run
UpgradeManager upgradeManager = UpgradeManagerFactory.create(config);
upgradeManager.start();
//ensure all expected files exist
final String[] expectedFiles =
new String[]{"rest.nt",
"rest.nt.headers",
"rest/external1",
"rest/external1/fcr%3Ametadata.nt",
"rest/container1.nt",
"rest/container1.nt.headers",
"rest/container1/fcr%3Aacl.nt",
"rest/container1/fcr%3Aversions/20201015053947.nt",
"rest/container1/fcr%3Aversions/20201015053947.nt.headers",
"rest/container1/fcr%3Aversions/20201015053526.nt",
"rest/container1/fcr%3Aversions/20201015053526.nt.headers",
"rest/container1/testbinary.binary",
"rest/container1/testbinary/fcr%3Ametadata.nt",
"rest/container1/testbinary.binary.headers",
"rest/container1/testbinary/fcr%3Ametadata/fcr%3Aversions/20201015053717.nt",
"rest/container1/testbinary/fcr%3Ametadata/fcr%3Aversions/20201015053717.nt.headers",
"rest/container1/testbinary/fcr%3Ametadata/fcr%3Aversions/20201015053848.nt",
"rest/container1/testbinary/fcr%3Ametadata/fcr%3Aversions/20201015053848.nt.headers",
"rest/container1/testbinary/fcr%3Aversions/20201015053848.binary",
"rest/container1/testbinary/fcr%3Aversions/20201015053848.binary.headers",
"rest/container1/testbinary/fcr%3Aversions/20201015053717.binary",
"rest/external1.external.headers",
"rest/external1.external"};

for (String f : expectedFiles) {
assertTrue(f + " does not exist as expected", new File(output, f).exists());
}

final String[] unexpectedFiles =
new String[]{"rest/acl.nt",
"rest/acl/authZ1.nt",
"rest/acl/authZ2.nt"};

for (String f : unexpectedFiles) {
assertFalse(f + " should not exist.", new File(output, f).exists());
}
//ensure external content has been transformed properly

final String externalContent = FileUtils
.readFileToString(new File(output, "rest/external1/fcr%3Ametadata.nt"), "UTF-8");
assertFalse("external content metadata should contain the mimetype", externalContent.contains("image/jpg"));
assertFalse("message/external-body should not be present in the external content metadata",
externalContent.contains("message/external-body"));

//ensure the binaries contain NonRDFSource types in their headers
final Map<String, List<String>> bheadders =
deserializeHeaders(new File(output, "rest/container1/testbinary.binary.headers"));
assertTrue("binary does not contain NonRDFSource type in the link headers",
bheadders.get("Link").stream().anyMatch(x -> x.contains("NonRDFSource")));

for (String f : expectedFiles) {
final var file = new File(output, f);
if (f.contains("fcr%3Aversions")) {

if (f.endsWith(".headers")) {
final Map<String, List<String>> mHeaders =
deserializeHeaders(file);
assertTrue("Memento headers do not contain memento type link header",
mHeaders.get("Link").stream().anyMatch(x -> x.contains("Memento")));
assertTrue("Memento headers do not contain Memento-Datetime header",
mHeaders.get("Memento-Datetime") != null);
} else if (f.contains("fcr:%3Ametadata")) {
final var contents = IOUtils.toString(new FileInputStream(file), Charset.defaultCharset());
assertTrue("Mementos should not contain links to other mementos",
!contents.contains("fcr:versions/"));
}
}
}


//validate acl
//ensure there are two authorizations under the hash uri #auth0 and #auth1
final var model = RdfUtil.parseRdf(Path.of(output.toString(), "rest/container1/fcr%3Aacl.nt"), Lang.TTL);
final var authSubjects = new ArrayList<String>();
model.listStatements().toList()
.stream().filter(x -> x.getPredicate().equals(RDF.type) && x.getObject().equals(AUTHORIZATION))
.forEach(x -> {
authSubjects.add(x.getSubject().asResource().getURI());
});

assertEquals("There should be two authorizations.", 2, authSubjects.size());
assertTrue("There should be a subject with #auth0 hash uri",
authSubjects.contains("http://localhost:8080/rest/container1/fcr:acl#auth0"));
assertTrue("There should be a subject with #auth1 hash uri",
authSubjects.contains("http://localhost:8080/rest/container1/fcr:acl#auth1"));

final var lastModifiedStatement = model.listStatements().toList().stream()
.filter(x -> x.getPredicate().equals(FEDORA_LAST_MODIFIED_DATE))
.findFirst().get();
assertTrue("There should be a last modified date", lastModifiedStatement != null);
assertEquals("The subject should be be the acl: ",
"http://localhost:8080/rest/container1/fcr:acl",
lastModifiedStatement.getSubject().getURI());
}

private Map<String, List<String>> deserializeHeaders(final File headerFile) throws IOException {
final byte[] mapData = Files.readAllBytes(Paths.get(headerFile.toURI()));
final ObjectMapper objectMapper = new ObjectMapper();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.stream.Collectors;

import org.apache.commons.codec.digest.DigestUtils;
import org.apache.jena.riot.Lang;
import org.hamcrest.Matchers;
import org.junit.Before;
import org.junit.Rule;
Expand Down Expand Up @@ -95,6 +96,18 @@ public void migrateExportWithMissingTimestamps() {
checkInstantNotOlderThan(lastUpdated2, 3000);
}

@Test
public void migrateEntireExportNTriples() {
config.setInputDir(new File("src/test/resources/5.1-export-ntriples"));
config.setSrcRdfLang(Lang.NT);

final var upgradeManager = UpgradeManagerFactory.create(config);

upgradeManager.start();

assertMigration(Paths.get("src/test/resources/5.1-to-6-expected"));
}

private void checkInstantNotOlderThan(Instant created, int ms) {
assertTrue("the timestamp was created in the last " + ms + " milliseconds ",
Instant.now().toEpochMilli()-created.toEpochMilli() < ms);
Expand Down
14 changes: 14 additions & 0 deletions src/test/resources/4.7.5-export-ntriples/rest.nt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<http://localhost:8080/rest/> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/ldp#BasicContainer> .
<http://localhost:8080/rest/> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://fedora.info/definitions/v4/repository#Container> .
<http://localhost:8080/rest/> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://fedora.info/definitions/v4/repository#Resource> .
<http://localhost:8080/rest/> <http://fedora.info/definitions/v4/repository#lastModifiedBy> "bypassAdmin" .
<http://localhost:8080/rest/> <http://fedora.info/definitions/v4/repository#lastModified> "2020-10-15T05:35:00.219Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> .
<http://localhost:8080/rest/> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/ldp#RDFSource> .
<http://localhost:8080/rest/> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/ldp#Container> .
<http://localhost:8080/rest/> <http://fedora.info/definitions/v4/repository#writable> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> .
<http://localhost:8080/rest/> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://fedora.info/definitions/v4/repository#RepositoryRoot> .
<http://localhost:8080/rest/> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://fedora.info/definitions/v4/repository#Resource> .
<http://localhost:8080/rest/> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://fedora.info/definitions/v4/repository#Container> .
<http://localhost:8080/rest/> <http://www.w3.org/ns/ldp#contains> <http://localhost:8080/rest/container1> .
<http://localhost:8080/rest/> <http://www.w3.org/ns/ldp#contains> <http://localhost:8080/rest/external1> .
<http://localhost:8080/rest/> <http://fedora.info/definitions/v4/repository#hasTransactionProvider> <http://localhost:8080/rest/fcr:tx> .
13 changes: 13 additions & 0 deletions src/test/resources/4.7.5-export-ntriples/rest/acl.nt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<http://localhost:8080/rest/acl> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://fedora.info/definitions/v4/repository#Container> .
<http://localhost:8080/rest/acl> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://fedora.info/definitions/v4/repository#Resource> .
<http://localhost:8080/rest/acl> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://fedora.info/definitions/v4/webac#Acl> .
<http://localhost:8080/rest/acl> <http://fedora.info/definitions/v4/repository#lastModifiedBy> "bypassAdmin" .
<http://localhost:8080/rest/acl> <http://fedora.info/definitions/v4/repository#createdBy> "bypassAdmin" .
<http://localhost:8080/rest/acl> <http://fedora.info/definitions/v4/repository#created> "2020-11-30T04:37:48.795Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> .
<http://localhost:8080/rest/acl> <http://fedora.info/definitions/v4/repository#lastModified> "2020-11-30T04:42:18.614Z"^^<http://www.w3.org/2001/XMLSchema#dateTime> .
<http://localhost:8080/rest/acl> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/ldp#RDFSource> .
<http://localhost:8080/rest/acl> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/ns/ldp#Container> .
<http://localhost:8080/rest/acl> <http://fedora.info/definitions/v4/repository#writable> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> .
<http://localhost:8080/rest/acl> <http://fedora.info/definitions/v4/repository#hasParent> <http://localhost:8080/rest/> .
<http://localhost:8080/rest/acl> <http://www.w3.org/ns/ldp#contains> <http://localhost:8080/rest/acl/authZ1> .
<http://localhost:8080/rest/acl> <http://www.w3.org/ns/ldp#contains> <http://localhost:8080/rest/acl/authZ2> .
Loading

0 comments on commit 8082e5a

Please sign in to comment.