Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Explore remote N5 datasets #6520

Merged
merged 8 commits into from
Oct 6, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.unreleased.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ For upgrade instructions, please check the [migration guide](MIGRATIONS.released
[Commits](https://github.com/scalableminds/webknossos/compare/22.10.0...HEAD)

### Added
- Remote n5 datasets can now also be explored and added. [#6520](https://github.com/scalableminds/webknossos/pull/6520)

### Changed
- Creating tasks in bulk now also supports referencing task types by their summary instead of id. [#6486](https://github.com/scalableminds/webknossos/pull/6486)
Expand Down
1 change: 1 addition & 0 deletions app/controllers/DataSetController.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import com.scalableminds.webknossos.datastore.models.datasource.{DataLayer, Data
import io.swagger.annotations._
import models.analytics.{AnalyticsService, ChangeDatasetSettingsEvent, OpenDatasetEvent}
import models.binary._
import models.binary.explore.{ExploreRemoteDatasetParameters, ExploreRemoteLayerService}
import models.organization.OrganizationDAO
import models.team.TeamDAO
import models.user.{User, UserDAO, UserService}
Expand Down
291 changes: 0 additions & 291 deletions app/models/binary/ExploreRemoteLayerService.scala

This file was deleted.

133 changes: 133 additions & 0 deletions app/models/binary/explore/ExploreRemoteLayerService.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
package models.binary.explore

import com.scalableminds.util.geometry.Vec3Double
import com.scalableminds.util.tools.{Fox, FoxImplicits}
import com.scalableminds.webknossos.datastore.dataformats.n5.{N5DataLayer, N5SegmentationLayer}
import com.scalableminds.webknossos.datastore.dataformats.zarr._
import com.scalableminds.webknossos.datastore.datareaders.zarr._
import com.scalableminds.webknossos.datastore.models.datasource._
import com.scalableminds.webknossos.datastore.storage.FileSystemsHolder
import com.typesafe.scalalogging.LazyLogging
import net.liftweb.common.{Box, Empty, Failure, Full}
import net.liftweb.util.Helpers.tryo
import play.api.libs.json.{Json, OFormat}

import java.net.URI
import java.nio.file.Path
import javax.inject.Inject
import scala.collection.mutable.ListBuffer
import scala.concurrent.ExecutionContext

case class ExploreRemoteDatasetParameters(remoteUri: String, user: Option[String], password: Option[String])
object ExploreRemoteDatasetParameters {
implicit val jsonFormat: OFormat[ExploreRemoteDatasetParameters] = Json.format[ExploreRemoteDatasetParameters]
}

class ExploreRemoteLayerService @Inject()() extends FoxImplicits with LazyLogging {

def exploreRemoteDatasource(
urisWithCredentials: List[ExploreRemoteDatasetParameters],
reportMutable: ListBuffer[String])(implicit ec: ExecutionContext): Fox[GenericDataSource[DataLayer]] =
for {
exploredLayersNested <- Fox.serialCombined(urisWithCredentials)(parameters =>
exploreRemoteLayersForUri(parameters.remoteUri, parameters.user, parameters.password, reportMutable))
layersWithVoxelSizes = exploredLayersNested.flatten
_ <- bool2Fox(layersWithVoxelSizes.nonEmpty) ?~> "Detected zero layers"
voxelSize <- commonVoxelSize(layersWithVoxelSizes.map(_._2)) ?~> "Could not extract common voxel size from layers"
layers = makeLayerNamesUnique(layersWithVoxelSizes.map(_._1))
dataSetName <- dataSetName(urisWithCredentials.map(_.remoteUri))
dataSource = GenericDataSource[DataLayer](
DataSourceId(dataSetName, ""),
layers,
voxelSize
)
} yield dataSource

private def makeLayerNamesUnique(layers: List[DataLayer]): List[DataLayer] = {
val namesSetMutable = scala.collection.mutable.Set[String]()
layers.map { layer: DataLayer =>
var nameCandidate = layer.name
var index = 1
while (namesSetMutable.contains(nameCandidate)) {
index += 1
nameCandidate = f"${layer.name}_$index"
}
namesSetMutable.add(nameCandidate)
if (nameCandidate == layer.name) {
layer
} else
layer match {
case l: ZarrDataLayer => l.copy(name = nameCandidate)
case l: ZarrSegmentationLayer => l.copy(name = nameCandidate)
case l: N5DataLayer => l.copy(name = nameCandidate)
case l: N5SegmentationLayer => l.copy(name = nameCandidate)
case _ => throw new Exception("Encountered unsupported layer format during explore remote")
}
}
}

private def dataSetName(uris: List[String])(implicit ec: ExecutionContext): Fox[String] =
if (uris.length == 1) uris.headOption.map(normalizeUri(_).split("/").last).toFox
else Fox.successful("explored_remote_dataset")

private def commonVoxelSize(voxelSizes: List[Vec3Double])(implicit ec: ExecutionContext): Fox[Vec3Double] =
for {
head <- voxelSizes.headOption.toFox
_ <- bool2Fox(voxelSizes.forall(_ == head)) ?~> s"voxel sizes for layers are not uniform, got $voxelSizes"
} yield head

private def exploreRemoteLayersForUri(
layerUri: String,
user: Option[String],
password: Option[String],
reportMutable: ListBuffer[String])(implicit ec: ExecutionContext): Fox[List[(DataLayer, Vec3Double)]] =
for {
remoteSource <- tryo(RemoteSourceDescriptor(new URI(normalizeUri(layerUri)), user, password)).toFox ?~> s"Received invalid URI: $layerUri"
fileSystem <- FileSystemsHolder.getOrCreate(remoteSource).toFox ?~> "Failed to set up remote file system"
remotePath <- tryo(fileSystem.getPath(remoteSource.remotePath)) ?~> "Failed to get remote path"
layersWithVoxelSizes <- exploreRemoteLayersForRemotePath(
remotePath,
remoteSource.credentials,
reportMutable,
List(new ZarrArrayExplorer, new NgffExplorer, new N5ArrayExplorer, new N5MultiscalesExplorer))
} yield layersWithVoxelSizes

private def normalizeUri(uri: String): String =
if (uri.endsWith(ZarrHeader.FILENAME_DOT_ZARRAY)) uri.dropRight(ZarrHeader.FILENAME_DOT_ZARRAY.length)
else if (uri.endsWith(NgffMetadata.FILENAME_DOT_ZATTRS)) uri.dropRight(NgffMetadata.FILENAME_DOT_ZATTRS.length)
else if (uri.endsWith(NgffGroupHeader.FILENAME_DOT_ZGROUP))
uri.dropRight(NgffGroupHeader.FILENAME_DOT_ZGROUP.length)
else uri

private def exploreRemoteLayersForRemotePath(
remotePath: Path,
credentials: Option[FileSystemCredentials],
reportMutable: ListBuffer[String],
explorers: List[RemoteLayerExplorer])(implicit ec: ExecutionContext): Fox[List[(DataLayer, Vec3Double)]] =
explorers match {
case Nil => Fox.empty
case currentExplorer :: remainingExplorers =>
reportMutable += s"Trying to explore $remotePath as ${currentExplorer.name}..."
currentExplorer.explore(remotePath, credentials).futureBox.flatMap {
case Full(layersWithVoxelSizes) =>
reportMutable += s"Found ${layersWithVoxelSizes.length} ${currentExplorer.name} layers at $remotePath."
Fox.successful(layersWithVoxelSizes)
case f: Failure =>
reportMutable += s"Error when reading $remotePath as ${currentExplorer.name}: ${formatFailureForReport(f)}"
exploreRemoteLayersForRemotePath(remotePath, credentials, reportMutable, remainingExplorers)
case Empty =>
reportMutable += s"Error when reading $remotePath as ${currentExplorer.name}: Empty"
exploreRemoteLayersForRemotePath(remotePath, credentials, reportMutable, remainingExplorers)
}
}

def formatFailureForReport(failure: Failure): String = {
def formatChain(chain: Box[Failure]): String = chain match {
case Full(failure) =>
" <~ " + failure.msg + formatChain(failure.chain)
case _ => ""
}
failure.msg + formatChain(failure.chain)
}

}
32 changes: 32 additions & 0 deletions app/models/binary/explore/N5ArrayExplorer.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package models.binary.explore
import com.scalableminds.util.geometry.{Vec3Double, Vec3Int}
import com.scalableminds.util.tools.Fox
import com.scalableminds.webknossos.datastore.dataformats.MagLocator
import com.scalableminds.webknossos.datastore.dataformats.n5.{N5DataLayer, N5Layer, N5SegmentationLayer}
import com.scalableminds.webknossos.datastore.dataformats.zarr.FileSystemCredentials
import com.scalableminds.webknossos.datastore.datareaders.AxisOrder
import com.scalableminds.webknossos.datastore.datareaders.n5.N5Header
import com.scalableminds.webknossos.datastore.models.datasource.Category

import java.nio.file.Path
import scala.concurrent.ExecutionContext.Implicits.global

class N5ArrayExplorer extends RemoteLayerExplorer {

override def name: String = "N5 Array"

override def explore(remotePath: Path, credentials: Option[FileSystemCredentials]): Fox[List[(N5Layer, Vec3Double)]] =
for {
headerPath <- Fox.successful(remotePath.resolve(N5Header.FILENAME_ATTRIBUTES_JSON))
name <- guessNameFromPath(remotePath)
n5Header <- parseJsonFromPath[N5Header](headerPath) ?~> s"failed to read n5 header at $headerPath"
elementClass <- n5Header.elementClass ?~> "failed to read element class from n5 header"
guessedAxisOrder = AxisOrder.asZyxFromRank(n5Header.rank)
boundingBox <- n5Header.boundingBox(guessedAxisOrder) ?~> "failed to read bounding box from zarr header. Make sure data is in (T/C)ZYX format"
magLocator = MagLocator(Vec3Int.ones, Some(remotePath.toString), credentials, Some(guessedAxisOrder))
layer: N5Layer = if (looksLikeSegmentationLayer(name, elementClass)) {
N5SegmentationLayer(name, boundingBox, elementClass, List(magLocator), largestSegmentId = None)
} else N5DataLayer(name, Category.color, boundingBox, elementClass, List(magLocator))
} yield List((layer, Vec3Double(1.0, 1.0, 1.0)))

}
129 changes: 129 additions & 0 deletions app/models/binary/explore/N5MultiscalesExplorer.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
package models.binary.explore

import com.scalableminds.util.geometry.{Vec3Double, Vec3Int}
import com.scalableminds.util.tools.{Fox, FoxImplicits}
import com.scalableminds.webknossos.datastore.dataformats.MagLocator
import com.scalableminds.webknossos.datastore.dataformats.n5.{N5DataLayer, N5Layer, N5SegmentationLayer}
import com.scalableminds.webknossos.datastore.dataformats.zarr.FileSystemCredentials
import com.scalableminds.webknossos.datastore.datareaders.AxisOrder
import com.scalableminds.webknossos.datastore.datareaders.n5.{
N5Header,
N5Metadata,
N5MultiscalesDataset,
N5MultiscalesItem,
N5Transform
}
import com.scalableminds.webknossos.datastore.models.datasource.Category
import net.liftweb.util.Helpers.tryo

import java.nio.file.Path
import scala.concurrent.ExecutionContext.Implicits.global

class N5MultiscalesExplorer extends RemoteLayerExplorer with FoxImplicits {

override def name: String = "N5 Multiscales"

override def explore(remotePath: Path, credentials: Option[FileSystemCredentials]): Fox[List[(N5Layer, Vec3Double)]] =
for {
zattrsPath <- Fox.successful(remotePath.resolve(N5Metadata.FILENAME_ATTRIBUTES_JSON))
n5Metadata <- parseJsonFromPath[N5Metadata](zattrsPath) ?~> s"Failed to read OME NGFF header at $zattrsPath"
layers <- Fox.serialCombined(n5Metadata.multiscales)(layerFromN5MultiscalesItem(_, remotePath, credentials))
} yield layers

private def layerFromN5MultiscalesItem(multiscalesItem: N5MultiscalesItem,
remotePath: Path,
credentials: Option[FileSystemCredentials]): Fox[(N5Layer, Vec3Double)] =
for {
voxelSizeNanometers <- extractVoxelSize(multiscalesItem.datasets.map(_.transform))
magsWithAttributes <- Fox.serialCombined(multiscalesItem.datasets)(d =>
n5MagFromDataset(d, remotePath, voxelSizeNanometers, credentials))
_ <- bool2Fox(magsWithAttributes.nonEmpty) ?~> "zero mags in layer"
elementClass <- elementClassFromMags(magsWithAttributes) ?~> "Could not extract element class from mags"
boundingBox = boundingBoxFromMags(magsWithAttributes)
name <- guessNameFromPath(remotePath)
layer: N5Layer = if (looksLikeSegmentationLayer(name, elementClass)) {
N5SegmentationLayer(name, boundingBox, elementClass, magsWithAttributes.map(_.mag), largestSegmentId = None)
} else N5DataLayer(name, Category.color, boundingBox, elementClass, magsWithAttributes.map(_.mag))
} yield (layer, voxelSizeNanometers)

private def extractAxisOrder(axes: List[String]): Fox[AxisOrder] = {
val x = axes.indexWhere(_ == "x")
val y = axes.indexWhere(_ == "y")
val z = axes.indexWhere(_ == "z")
val c = axes.indexWhere(_ == "c")

val cOpt = if (c == -1) None else Some(c)
for {
_ <- bool2Fox(x >= 0 && y >= 0 && z >= 0) ?~> s"invalid xyz axis order: $x,$y,$z."
} yield AxisOrder(x, y, z, cOpt)
}

private def extractAxisUnitFactors(units: List[String], axisOrder: AxisOrder): Fox[Vec3Double] =
for {
xUnitFactor <- spaceUnitToNmFactor(units(axisOrder.x))
yUnitFactor <- spaceUnitToNmFactor(units(axisOrder.y))
zUnitFactor <- spaceUnitToNmFactor(units(axisOrder.z))
} yield Vec3Double(xUnitFactor, yUnitFactor, zUnitFactor)

private def spaceUnitToNmFactor(unit: String): Fox[Double] =
unit.toLowerCase match {
case "" => Fox.successful(1.0)
case "ym" => Fox.successful(1e-15)
case "zm" => Fox.successful(1e-12)
case "am" => Fox.successful(1e-9)
case "fm" => Fox.successful(1e-6)
case "pm" => Fox.successful(1e-3)
case "nm" => Fox.successful(1.0)
case "µm" => Fox.successful(1e3)
case "mm" => Fox.successful(1e6)
case "cm" => Fox.successful(1e7)
case "dm" => Fox.successful(1e8)
case "m" => Fox.successful(1e9)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this can be shared with the other unit definitions? Or are those too different?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From what I’ve seen, ngff uses nanometer while n5 uses nm so they are pretty distinct. Of course, we could still merge the matching into one big function, what would you prefer?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's keep it separate as-is for now 👍
The only reference I found is https://github.com/saalfeldlab/n5-viewer/blob/23f86b32e77ff1856893fa955523fb016cef3d82/README.md#container-structure,
where it says that not setting anything assumes µm, I think that is currently not the case. Also, it specifically shows the string um, which is missing in this list.

case unknownUnit => Fox.failure(s"Unknown space axis unit: $unknownUnit")
}

private def extractVoxelSize(transforms: List[N5Transform]): Fox[Vec3Double] =
for {
voxelSizes <- Fox.serialCombined(transforms)(t => voxelSizeFromTransform(t))
} yield voxelSizes.minBy(_.maxDim)

private def voxelSizeFromTransform(transform: N5Transform): Fox[Vec3Double] =
for {
axisOrder <- extractAxisOrder(transform.axes) ?~> "Could not extract XYZ axis order mapping. Does the data have x, y and z axes, stated in multiscales metadata?"
axisUnitFactors <- extractAxisUnitFactors(transform.units, axisOrder) ?~> "Could not extract axis unit-to-nm factors"
voxelSizeInAxisUnits <- extractVoxelSizeInAxisUnits(transform.scale, axisOrder) ?~> "Could not extract voxel size from scale transforms"
} yield voxelSizeInAxisUnits * axisUnitFactors

private def extractVoxelSizeInAxisUnits(scale: List[Double], axisOrder: AxisOrder): Fox[Vec3Double] =
tryo(Vec3Double(scale(axisOrder.x), scale(axisOrder.y), scale(axisOrder.z)))

private def n5MagFromDataset(n5Dataset: N5MultiscalesDataset,
layerPath: Path,
voxelSize: Vec3Double,
credentials: Option[FileSystemCredentials]): Fox[MagWithAttributes] =
for {
axisOrder <- extractAxisOrder(n5Dataset.transform.axes) ?~> "Could not extract XYZ axis order mapping. Does the data have x, y and z axes, stated in multiscales metadata?"
mag <- magFromTransform(voxelSize, n5Dataset.transform) ?~> "Could not extract mag from transforms"
magPath = layerPath.resolve(n5Dataset.path)
headerPath = magPath.resolve(N5Header.FILENAME_ATTRIBUTES_JSON)
n5Header <- parseJsonFromPath[N5Header](headerPath) ?~> s"failed to read n5 header at $headerPath"
elementClass <- n5Header.elementClass ?~> s"failed to read element class from n5 header at $headerPath"
boundingBox <- n5Header.boundingBox(axisOrder) ?~> s"failed to read bounding box from n5 header at $headerPath"
} yield
MagWithAttributes(MagLocator(mag, Some(magPath.toString), credentials, Some(axisOrder)),
magPath,
elementClass,
boundingBox)

private def magFromTransform(voxelSize: Vec3Double, transform: N5Transform): Fox[Vec3Int] = {
def isPowerOfTwo(x: Int): Boolean =
x != 0 && (x & (x - 1)) == 0

for {
magVoxelSize <- voxelSizeFromTransform(transform)
mag = (magVoxelSize / voxelSize).round.toVec3Int
_ <- bool2Fox(isPowerOfTwo(mag.x) && isPowerOfTwo(mag.x) && isPowerOfTwo(mag.x)) ?~> s"invalid mag: $mag. Must all be powers of two"
} yield mag
}

}
Loading