Skip to content

Commit

Permalink
Merge pull request #64 from NVIDIAGameWorks/main-minimal-stage
Browse files Browse the repository at this point in the history
v1.6.0
  • Loading branch information
nv-jdeligiannis authored Nov 4, 2024
2 parents bfbaa42 + 652523e commit 2e73fb7
Show file tree
Hide file tree
Showing 10 changed files with 498 additions and 124 deletions.
15 changes: 10 additions & 5 deletions omm-sdk/include/omm.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
#include <stddef.h>

#define OMM_VERSION_MAJOR 1
#define OMM_VERSION_MINOR 5
#define OMM_VERSION_BUILD 1
#define OMM_VERSION_MINOR 6
#define OMM_VERSION_BUILD 0

#define OMM_MAX_TRANSIENT_POOL_BUFFERS 8

Expand Down Expand Up @@ -420,8 +420,13 @@ typedef struct ommCpuBakeInputDesc
const ommFormat* formats;
// Determines how to promote mixed states
ommUnknownStatePromotion unknownStatePromotion;
// Determines the state of unresolvable/degenerate triangles (nan/inf or zeroa area UV-triangles)
ommSpecialIndex degenTriState;
// Determines the state of unresolvable(nan/inf UV-triangles) and disabled triangles. Note that degenerate triangles (points/lines) will be resolved correctly.
union
{
OMM_DEPRECATED_MSG("unresolvedTriState has been deprecated, please use unresolvedTriState instead")
ommSpecialIndex degenTriState;
ommSpecialIndex unresolvedTriState;
};
// Micro triangle count is 4^N, where N is the subdivision level.
// maxSubdivisionLevel level must be in range [0, 12].
// When dynamicSubdivisionScale is enabled maxSubdivisionLevel is the max subdivision level allowed.
Expand Down Expand Up @@ -466,7 +471,7 @@ inline ommCpuBakeInputDesc ommCpuBakeInputDescDefault()
v.format = ommFormat_OC1_4_State;
v.formats = NULL;
v.unknownStatePromotion = ommUnknownStatePromotion_ForceOpaque;
v.degenTriState = ommSpecialIndex_FullyUnknownOpaque;
v.unresolvedTriState = ommSpecialIndex_FullyUnknownOpaque;
v.maxSubdivisionLevel = 8;
v.subdivisionLevels = NULL;
v.maxWorkloadSize = 0xFFFFFFFFFFFFFFFF;
Expand Down
9 changes: 7 additions & 2 deletions omm-sdk/include/omm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,13 @@ namespace omm
const Format* formats = nullptr;
// Determines how to promote mixed states
UnknownStatePromotion unknownStatePromotion = UnknownStatePromotion::ForceOpaque;
// Determines the state of unresolvable/degenerate triangles (nan/inf or zeroa area UV-triangles)
SpecialIndex degenTriState = SpecialIndex::FullyUnknownOpaque;
// Determines the state of unresolvable(nan/inf UV-triangles) and disabled triangles. Note that degenerate triangles (points/lines) will be resolved correctly.
union
{
OMM_DEPRECATED_MSG("degenTriState has been deprecated, please use unresolvedTriState instead")
omm::SpecialIndex degenTriState;
omm::SpecialIndex unresolvedTriState = SpecialIndex::FullyUnknownOpaque;
};
// Micro triangle count is 4^N, where N is the subdivision level.
// maxSubdivisionLevel level must be in range [0, 12].
// When dynamicSubdivisionScale is enabled maxSubdivisionLevel is the max subdivision level allowed.
Expand Down
134 changes: 101 additions & 33 deletions omm-sdk/src/bake_cpu_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ namespace Cpu
DisableLevelLineIntersection = 1u << 8,
DisableFineClassification = 1u << 9,
EnableNearDuplicateDetectionBruteForce = 1u << 10,
EnableEdgeHeuristic = 1u << 11,
};

constexpr void ValidateInternalBakeFlags()
Expand All @@ -70,7 +71,8 @@ namespace Cpu
enableAABBTesting(((uint32_t)flags& (uint32_t)BakeFlagsInternal::EnableAABBTesting) == (uint32_t)BakeFlagsInternal::EnableAABBTesting),
disableRemovePoorQualityOMM(((uint32_t)flags& (uint32_t)BakeFlagsInternal::DisableRemovePoorQualityOMM) == (uint32_t)BakeFlagsInternal::DisableRemovePoorQualityOMM),
disableLevelLineIntersection(((uint32_t)flags& (uint32_t)BakeFlagsInternal::DisableLevelLineIntersection) == (uint32_t)BakeFlagsInternal::DisableLevelLineIntersection),
disableFineClassification(((uint32_t)flags& (uint32_t)BakeFlagsInternal::DisableFineClassification) == (uint32_t)BakeFlagsInternal::DisableFineClassification)
disableFineClassification(((uint32_t)flags& (uint32_t)BakeFlagsInternal::DisableFineClassification) == (uint32_t)BakeFlagsInternal::DisableFineClassification),
enableEdgeHeuristic(((uint32_t)flags& (uint32_t)BakeFlagsInternal::EnableEdgeHeuristic) == (uint32_t)BakeFlagsInternal::EnableEdgeHeuristic)
{ }
const bool enableInternalThreads;
const bool disableSpecialIndices;
Expand All @@ -82,6 +84,7 @@ namespace Cpu
const bool disableRemovePoorQualityOMM;
const bool disableLevelLineIntersection;
const bool disableFineClassification;
const bool enableEdgeHeuristic;
};

BakerImpl::~BakerImpl()
Expand Down Expand Up @@ -411,7 +414,7 @@ namespace Cpu
return GetArea2D(uvTri.p0, uvTri.p1, uvTri.p2);
};

static const uint32_t CalculateSuitableSubdivisionLevel(const ommCpuBakeInputDesc& desc, const Triangle& uvTri, uint2 texSize)
static const uint32_t ComputeAreaHeuristic(const ommCpuBakeInputDesc& desc, const Triangle& uvTri, uint2 texSize)
{
auto GetNextPow2 = [](uint v)->uint
{
Expand Down Expand Up @@ -442,7 +445,6 @@ namespace Cpu
// Solves the following eqn:
// targetPixelArea / (4^N) = pixelUvArea

// Questionable heuristic... micro-triangle should cover 8x8 pixel region?
const float targetPixelArea = desc.dynamicSubdivisionScale * desc.dynamicSubdivisionScale;
const uint ratio = uint(pixelUvArea / targetPixelArea);
const uint ratioNextPow2 = GetNextPow2(ratio);
Expand All @@ -453,19 +455,38 @@ namespace Cpu
return std::min<uint>(SubdivisionLevel, desc.maxSubdivisionLevel);
}

static bool IsDegenerate(const Triangle& t)
static const uint32_t ComputeEdgeHeuristic(const ommCpuBakeInputDesc& desc, const Triangle& uvTri, uint2 texSize)
{
const bool anyNan = glm::any(glm::isnan(t.p0)) || glm::any(glm::isnan(t.p1)) || glm::any(glm::isnan(t.p2));
const bool anyInf = glm::any(glm::isinf(t.p0)) || glm::any(glm::isinf(t.p1)) || glm::any(glm::isinf(t.p2));

const float3 N = glm::cross(float3(t.p2 - t.p0, 0), float3(t.p1 - t.p0, 0));
const float N2 = N.z * N.z;
const bool bIsZeroArea = N2 < 1e-9;

return anyNan || anyInf || bIsZeroArea;
// Adapted from 3.1.1 https://fileadmin.cs.lth.se/graphics/research/papers/2024/succinct_opacity_micromaps/paper-author-version.pdf
const float2 ve0 = (float2)texSize * (uvTri.p1 - uvTri.p0);
const float2 ve1 = (float2)texSize * (uvTri.p2 - uvTri.p0);
const float2 ve2 = (float2)texSize * (uvTri.p2 - uvTri.p1);

const float le0 = glm::dot(ve0, ve0);
const float le1 = glm::dot(ve1, ve1);
const float le2 = glm::dot(ve2, ve2);

const float eMax = std::max({ le0, le1, le2 });

const float n = eMax < 1e-6 ? 0 : std::log2(eMax) / 2.f - std::log2(desc.dynamicSubdivisionScale);

const int SubdivisionLevel = (int)std::ceil(n);
return std::clamp<int>(SubdivisionLevel, 0, desc.maxSubdivisionLevel);
}

static const uint32_t CalculateSuitableSubdivisionLevel(const ommCpuBakeInputDesc& desc, const Options& options, const Triangle& uvTri, uint2 texSize)
{
if (uvTri.GetIsDegenerate() || options.enableEdgeHeuristic)
{
return ComputeEdgeHeuristic(desc, uvTri, texSize);
}
else
{
return ComputeAreaHeuristic(desc, uvTri, texSize);
}
}

static int32_t GetSubdivisionLevelForPrimitive(const ommCpuBakeInputDesc& desc, uint32_t i, const Triangle& uvTri, uint2 texSize)
static int32_t GetSubdivisionLevelForPrimitive(const ommCpuBakeInputDesc& desc, const Options& options, uint32_t i, const Triangle& uvTri, uint2 texSize)
{
if (desc.subdivisionLevels && desc.subdivisionLevels[i] <= 12)
{
Expand All @@ -477,14 +498,29 @@ namespace Cpu

if (enableDynamicSubdivisionLevel)
{
return CalculateSuitableSubdivisionLevel(desc, uvTri, texSize);
return CalculateSuitableSubdivisionLevel(desc, options, uvTri, texSize);
}
else
{
return desc.maxSubdivisionLevel;
}
}

static bool GetIsInvalid(const Options& options, const Triangle& uvTriangle)
{
if (uvTriangle.GetIsInvalid())
{
return true;
}

if (options.disableLevelLineIntersection && uvTriangle.GetIsDegenerate())
{
return true; // we only support degen triangles in level line intersection mode.
}

return false;
}

namespace impl
{
static ommResult SetupWorkItems(
Expand All @@ -506,7 +542,7 @@ namespace Cpu
{
const uint32_t texCoordStrideInBytes = desc.texCoordStrideInBytes == 0 ? GetTexCoordFormatSize(desc.texCoordFormat) : desc.texCoordStrideInBytes;

uint32_t numDegenTri = 0;
uint32_t numDisabledTri = 0;

for (int32_t i = 0; i < triangleCount; ++i)
{
Expand All @@ -515,14 +551,13 @@ namespace Cpu

const Triangle uvTri = FetchUVTriangle(desc.texCoords, texCoordStrideInBytes, desc.texCoordFormat, triangleIndices);

const int32_t subdivisionLevel = GetSubdivisionLevelForPrimitive(desc, i, uvTri, texture->GetSize(0 /*always based on mip 0*/));
const int32_t subdivisionLevel = GetSubdivisionLevelForPrimitive(desc, options, i, uvTri, texture->GetSize(0 /*always based on mip 0*/));

const bool bIsDisabled = subdivisionLevel == kDisabledPrimitive;
const bool bIsDegenerate = IsDegenerate(uvTri);

if (bIsDisabled || bIsDegenerate)

if (bIsDisabled || GetIsInvalid(options, uvTri))
{
numDegenTri++;
numDisabledTri++;
continue; // These indices will be set to special index unknown later.
}

Expand Down Expand Up @@ -556,11 +591,11 @@ namespace Cpu
}
}

if (options.enableValidation && numDegenTri != 0)
if (options.enableValidation && numDisabledTri != 0)
{
const char* specialIndex = ToString(desc.degenTriState);
log.Infof("[Info] - The workload consists of %d degenerate triangles, these will be classified as Fully Unknown Opaque (this behaviour can be changed by degenTriState).",
numDegenTri, specialIndex);
const char* specialIndex = ToString(desc.unresolvedTriState);
log.Infof("[Info] - The workload consists of %d unclassifiable triangles, these will be classified as unresolvedTriState = %s.",
numDisabledTri, specialIndex);
}
}
return ommResult_SUCCESS;
Expand Down Expand Up @@ -714,7 +749,13 @@ namespace Cpu
return ommResult_SUCCESS;
}

template<ommCpuTextureFormat eFormat, TilingMode eTilingMode, ommTextureAddressMode eTextureAddressMode, ommTextureFilterMode eFilterMode>
enum TriangleClass
{
Normal,
Degenerate
};

template<ommCpuTextureFormat eFormat, TilingMode eTilingMode, ommTextureAddressMode eTextureAddressMode, ommTextureFilterMode eFilterMode, TriangleClass eTriangleClass>
static ommResult ResampleFine(const ommCpuBakeInputDesc& desc, const Logger& log, const Options& options, vector<OmmWorkItem>& vmWorkItems)
{
if (options.enableAABBTesting && !options.disableLevelLineIntersection)
Expand All @@ -738,6 +779,17 @@ namespace Cpu
{
// Subdivide the input triangle in to smaller triangles. They will be "bird-curve" ordered.
OmmWorkItem& workItem = vmWorkItems[workItemIt];
const bool isDegenerate = workItem.uvTri.GetIsDegenerate();

if (eTriangleClass == TriangleClass::Normal && isDegenerate)
{
continue;
}

if (eTriangleClass == TriangleClass::Degenerate && !isDegenerate)
{
continue;
}

const uint32_t numMicroTriangles = omm::bird::GetNumMicroTriangles(workItem.subdivisionLevel);

Expand Down Expand Up @@ -779,8 +831,18 @@ namespace Cpu
else
vmCoverage.numBelowAlpha++;

auto kernel = &LevelLineIntersectionKernel::run<eFormat, eTextureAddressMode, eTilingMode>;
RasterizeConservativeSerialWithOffsetCoverage(subTri, rasterSize, pixelOffset, kernel, &params);

if constexpr (eTriangleClass == TriangleClass::Normal)
{
auto kernel = &LevelLineIntersectionKernel::run<eFormat, eTextureAddressMode, eTilingMode, false /*degenerate*/>;
RasterizeConservativeSerialWithOffsetCoverage(subTri, rasterSize, pixelOffset, kernel, &params);
}
else
{
auto kernel = &LevelLineIntersectionKernel::run<eFormat, eTextureAddressMode, eTilingMode, true /*degenerate*/>;
Line l(subTri.aabb_s, subTri.aabb_e);
RasterizeConservativeLineWithOffset(l, rasterSize, pixelOffset, kernel, &params);
}

OMM_ASSERT(vmCoverage.numAboveAlpha != 0 || vmCoverage.numBelowAlpha != 0);
const ommOpacityState state = GetStateFromCoverage(desc.format, desc.unknownStatePromotion, desc.alphaCutoffGreater, desc.alphaCutoffLessEqual, vmCoverage);
Expand Down Expand Up @@ -867,7 +929,7 @@ namespace Cpu

params.vmState = &vmCoverage;

auto kernel = [](int2 pixel, float3* bc, void* ctx)
auto kernel = [](int2 pixel, void* ctx)
{
KernelParams* p = (KernelParams*)ctx;

Expand Down Expand Up @@ -1513,7 +1575,7 @@ namespace Cpu
// Set special indices...
{
res.ommIndexBuffer.resize(triangleCount);
std::fill(res.ommIndexBuffer.begin(), res.ommIndexBuffer.end(), (int32_t)desc.degenTriState);
std::fill(res.ommIndexBuffer.begin(), res.ommIndexBuffer.end(), (int32_t)desc.unresolvedTriState);
for (const OmmWorkItem& vm : vmWorkItems)
{
for (uint32_t primitiveIndex : vm.primitiveIndices)
Expand Down Expand Up @@ -1564,8 +1626,12 @@ namespace Cpu
return impl::ResampleCoarse<eFormat, eTilingMode, eTextureAddressMode, eFilterMode>(desc, log, options, vmWorkItems);
};

auto impl__ResampleFine = [](const ommCpuBakeInputDesc& desc, const Logger& log, const Options& options, vector<OmmWorkItem>& vmWorkItems) {
return impl::ResampleFine<eFormat, eTilingMode, eTextureAddressMode, eFilterMode>(desc, log, options, vmWorkItems);
auto impl__ResampleFineNormal = [](const ommCpuBakeInputDesc& desc, const Logger& log, const Options& options, vector<OmmWorkItem>& vmWorkItems) {
return impl::ResampleFine<eFormat, eTilingMode, eTextureAddressMode, eFilterMode, impl::TriangleClass::Normal>(desc, log, options, vmWorkItems);
};

auto impl__ResampleFineDegen = [](const ommCpuBakeInputDesc& desc, const Logger& log, const Options& options, vector<OmmWorkItem>& vmWorkItems) {
return impl::ResampleFine<eFormat, eTilingMode, eTextureAddressMode, eFilterMode, impl::TriangleClass::Degenerate>(desc, log, options, vmWorkItems);
};

{
Expand All @@ -1577,7 +1643,9 @@ namespace Cpu

RETURN_STATUS_IF_FAILED(impl__ResampleCoarse(desc, m_log, options, vmWorkItems));

RETURN_STATUS_IF_FAILED(impl__ResampleFine(desc, m_log, options, vmWorkItems));
RETURN_STATUS_IF_FAILED(impl__ResampleFineNormal(desc, m_log, options, vmWorkItems));

RETURN_STATUS_IF_FAILED(impl__ResampleFineDegen(desc, m_log, options, vmWorkItems));

RETURN_STATUS_IF_FAILED(impl::PromoteToSpecialIndices(desc, options, vmWorkItems));

Expand All @@ -1604,4 +1672,4 @@ namespace Cpu
}

} // namespace Cpu
} // namespace omm
} // namespace omm
Loading

0 comments on commit 2e73fb7

Please sign in to comment.