diff --git a/.gitignore b/.gitignore index 51e0346..c4dae26 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ *.prof *.pprof +*bin + *.png # Exclude all png files except for test files !render/testdata/defacto*.png diff --git a/render/internal_test.go b/render/internal_test.go index 4383eeb..11808b4 100644 --- a/render/internal_test.go +++ b/render/internal_test.go @@ -3,8 +3,11 @@ package render import ( "bytes" "errors" + "io" + "os" "testing" + "github.com/soypat/sdf/form3/must3" "github.com/soypat/sdf/form3/obj3/thread" "github.com/soypat/sdf/internal/d3" "gonum.org/v1/gonum/spatial/r3" @@ -71,3 +74,22 @@ func TestSTLWriteReadback(t *testing.T) { } } } + +func TestOctreeMultithread(t *testing.T) { + oct := NewOctreeRenderer(must3.Sphere(1), 8) + oct.concurrent = 2 + buf := make([]Triangle3, oct.concurrent*10) + var err error + var nt int + var model []Triangle3 + for err == nil { + nt, err = oct.ReadTriangles(buf) + model = append(model, buf[nt:]...) + } + if err != io.EOF { + t.Fatal(err) + } + fp, _ := os.Create("mt.stl") + defer fp.Close() + WriteSTL(fp, model) +} diff --git a/render/marchingcubes.go b/render/marchingcubes.go index 83f657f..3fe975b 100644 --- a/render/marchingcubes.go +++ b/render/marchingcubes.go @@ -7,7 +7,8 @@ import ( ) const ( - marchingCubesEpsilon = 1e-12 + marchingCubesEpsilon = 1e-12 + // max number of triangles that can be formed from a single cube. marchingCubesMaxTriangles = 5 ) diff --git a/render/octree_renderer.go b/render/octree_renderer.go index b426d2e..8eb5454 100644 --- a/render/octree_renderer.go +++ b/render/octree_renderer.go @@ -13,6 +13,7 @@ import ( // MarchingCubesOctree renders using marching cubes with octree space sampling. type octree struct { dc dc3 + mu sync.Mutex todo []cube unwritten triangle3Buffer // concurrent goroutine processing. @@ -56,6 +57,7 @@ func NewOctreeRenderer(s sdf.SDF3, meshCells int) *octree { dc: *newDc3(s, bb.Min, resolution, levels), unwritten: triangle3Buffer{buf: make([]Triangle3, 0, 1024)}, todo: cubes, //[]cube{{sdf.V3i{0, 0, 0}, levels - 1}}, // process the octree, start at the top level + // concurrent: 2, } } @@ -75,50 +77,100 @@ func (oc *octree) ReadTriangles(dst []Triangle3) (n int, err error) { // Done rendering model. return n, io.EOF } + // Number of additional triangles proccessed. var nt int - if oc.concurrent <= 1 { - nt = oc.readTriangles(dst[n:]) + if oc.concurrent <= 1 || len(oc.todo) < oc.concurrent || n < oc.concurrent { + tproc, nc, newCubes := oc.readTriangles(dst[n:], oc.todo) + oc.todo = append(oc.todo, newCubes...) + oc.todo = oc.todo[nc:] // this leaks, luckily this is a short lived function? + // oc.todo = append(newCubes, oc.todo[cubesProcessed:]...) // Non leaking slow implementation + nt = tproc } else { - // multi core processing - panic("no concurrency yet") + nt = oc.readTrianglesThreaded(dst[n:]) } n += nt return n, err } -// readTriangles is single threaded implementation of ReadTriangles and only returns -// number of triangles written. -func (oc *octree) readTriangles(dst []Triangle3) (n int) { - cubesProcessed := 0 - var newCubes []cube - for _, cube := range oc.todo { +// readTriangles is single threaded implementation of ReadTriangles. +// todo is the slice of cubes that shall be proccessed. n is the number of triangles written to dst. +// Returned cubesProcessed is the number of cubes of todo that were completely processed. +// Returned newCubes are non-empty cubes that should be processed in future calls to readTriangles. +// Triangles that were not succesfully written to dst are stored in octree unwritten buffer. +// This function is safe to call concurrently. +func (oc *octree) readTriangles(dst []Triangle3, todo []cube) (n, cubesProcessed int, newCubes []cube) { + for _, cube := range todo { if n == len(dst) { // Finished writing all the buffer break } if n+marchingCubesMaxTriangles > len(dst) { // Not enough room in buffer to write all triangles that could be found by marching cubes. - tmp := make([]Triangle3, 5) + tmp := make([]Triangle3, marchingCubesMaxTriangles) tri, cubes := oc.processCube(tmp, cube) + oc.mu.Lock() oc.unwritten.Write(tmp[:tri]) + oc.mu.Unlock() newCubes = append(newCubes, cubes...) cubesProcessed++ break } tri, cubes := oc.processCube(dst[n:], cube) - newCubes = append(newCubes, cubes...) - cubesProcessed++ n += tri } - oc.todo = append(oc.todo, newCubes...) - oc.todo = oc.todo[cubesProcessed:] // this leaks, luckily this is a short lived function? - // oc.todo = append(newCubes, oc.todo[cubesProcessed:]...) // Non leaking slow implementation - return n + return n, cubesProcessed, newCubes +} + +// readTrianglesThreaded is a multithreaded triangle reader implementation for octree. +func (oc *octree) readTrianglesThreaded(dst []Triangle3) (nt int) { + var wg sync.WaitGroup + div := len(dst) / oc.concurrent + work := make([][]Triangle3, oc.concurrent) + cubeWork := make([][]cube, oc.concurrent) + newCubesC := make([][]cube, oc.concurrent) + divC := len(oc.todo) / oc.concurrent + for i := 0; i < oc.concurrent; i++ { + i := i // Escape loop variable. + wg.Add(1) + go func() { + start := div * i + work[i] = dst[start : start+div] + cubeWork[i] = oc.todo[i*divC : (i+1)*divC] + ntc, nc, newC := oc.readTriangles(work[i], cubeWork[i]) + newCubesC[i] = newC + work[i] = work[i][:ntc] + cubeWork[i] = cubeWork[i][nc:] + wg.Done() + }() + } + wg.Wait() + // Consolidate work done. + offset := 0 + oc.todo = oc.todo[len(oc.todo):] + for i := 0; i < oc.concurrent; i++ { + // Triangles written. + start := div*i - offset + if i != oc.concurrent-1 && len(work[i]) != div { + offset += div - len(work[i]) + copy(dst[start+len(work[i]):], dst[start+div:]) + } + nt += len(work[i]) + // Cubes unprocessed. + if len(cubeWork[i]) != 0 { + oc.todo = append(oc.todo, cubeWork[i]...) + } + // New Cubes + if len(newCubesC[i]) != 0 { + oc.todo = append(oc.todo, newCubesC[i]...) + } + } + return nt } // Process a cube. Generate triangles, or more cubes. +// Safe to call concurrently. func (oc *octree) processCube(dst []Triangle3, c cube) (writtenTriangles int, newCubes []cube) { if c.n == 1 { // this cube is at the required resolution