Skip to content

Commit 3c6c762

Browse files
perf(dijkstra): prefetch instructions ~10-20% improvement (#14)
* perf(dijkstra): prefetch instructions ~10-20% improvement * fix: _mm_prefetch accepts char* not float* * fix: stop building py39 on appveyor * install: drop support for py27 and py35
1 parent 663e5fe commit 3c6c762

8 files changed

+33
-32
lines changed

appveyor.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ environment:
44
- PYTHON: "C:\\Python36"
55
- PYTHON: "C:\\Python37"
66
- PYTHON: "C:\\Python38"
7-
- PYTHON: "C:\\Python39"
7+
# - PYTHON: "C:\\Python39"
88
- PYTHON: "C:\\Python36-x64"
99
- PYTHON: "C:\\Python37-x64"
1010
- PYTHON: "C:\\Python38-x64"
11-
- PYTHON: "C:\\Python39-x64"
11+
# - PYTHON: "C:\\Python39-x64"
1212

1313
install:
1414
# We need wheel installed to build wheels

dijkstra3d.hpp

+26-2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <cstdint>
2121
#include <queue>
2222
#include <vector>
23+
#include <xmmintrin.h>
2324

2425
#include "./libdivide.h"
2526

@@ -202,12 +203,35 @@ std::vector<OUT> dijkstra3d(
202203

203204
while (!queue.empty()) {
204205
loc = queue.top().value;
206+
_mm_prefetch(reinterpret_cast<char*>(&dist[loc - 1]), _MM_HINT_T0);
205207
queue.pop();
206208

207209
if (std::signbit(dist[loc])) {
208210
continue;
209211
}
210212

213+
// As early as possible, start fetching the
214+
// data from RAM b/c the annotated lines below
215+
// have 30-50% cache miss.
216+
_mm_prefetch(reinterpret_cast<char*>(&field[loc - 1]), _MM_HINT_T0);
217+
_mm_prefetch(reinterpret_cast<char*>(&field[loc + sxy - 1]), _MM_HINT_T0);
218+
_mm_prefetch(reinterpret_cast<char*>(&field[loc - sxy - 1]), _MM_HINT_T0);
219+
_mm_prefetch(reinterpret_cast<char*>(&field[loc + sxy + sx - 1]), _MM_HINT_T0);
220+
_mm_prefetch(reinterpret_cast<char*>(&field[loc + sxy - sx - 1]), _MM_HINT_T0);
221+
_mm_prefetch(reinterpret_cast<char*>(&field[loc - sxy + sx - 1]), _MM_HINT_T0);
222+
_mm_prefetch(reinterpret_cast<char*>(&field[loc - sxy - sx - 1]), _MM_HINT_T0);
223+
_mm_prefetch(reinterpret_cast<char*>(&field[loc + sx - 1]), _MM_HINT_T0);
224+
_mm_prefetch(reinterpret_cast<char*>(&field[loc - sx - 1]), _MM_HINT_T0);
225+
226+
_mm_prefetch(reinterpret_cast<char*>(&dist[loc + sxy - 1]), _MM_HINT_T0);
227+
_mm_prefetch(reinterpret_cast<char*>(&dist[loc - sxy - 1]), _MM_HINT_T0);
228+
_mm_prefetch(reinterpret_cast<char*>(&dist[loc + sxy + sx - 1]), _MM_HINT_T0);
229+
_mm_prefetch(reinterpret_cast<char*>(&dist[loc + sxy - sx - 1]), _MM_HINT_T0);
230+
_mm_prefetch(reinterpret_cast<char*>(&dist[loc - sxy + sx - 1]), _MM_HINT_T0);
231+
_mm_prefetch(reinterpret_cast<char*>(&dist[loc - sxy - sx - 1]), _MM_HINT_T0);
232+
_mm_prefetch(reinterpret_cast<char*>(&dist[loc + sx - 1]), _MM_HINT_T0);
233+
_mm_prefetch(reinterpret_cast<char*>(&dist[loc - sx - 1]), _MM_HINT_T0);
234+
211235
if (power_of_two) {
212236
z = loc >> (xshift + yshift);
213237
y = (loc - (z << (xshift + yshift))) >> xshift;
@@ -227,12 +251,12 @@ std::vector<OUT> dijkstra3d(
227251
}
228252

229253
neighboridx = loc + neighborhood[i];
230-
delta = static_cast<float>(field[neighboridx]);
254+
delta = static_cast<float>(field[neighboridx]); // high cache miss
231255

232256
// Visited nodes are negative and thus the current node
233257
// will always be less than as field is filled with non-negative
234258
// integers.
235-
if (dist[loc] + delta < dist[neighboridx]) {
259+
if (dist[loc] + delta < dist[neighboridx]) { // high cache miss
236260
dist[neighboridx] = dist[loc] + delta;
237261
parents[neighboridx] = loc + 1; // +1 to avoid 0 ambiguity
238262

manylinux1.Dockerfile

-10
Original file line numberDiff line numberDiff line change
@@ -9,32 +9,22 @@ ENV CXX "g++"
99

1010
RUN rm -rf *.so build __pycache__ dist
1111

12-
RUN /opt/python/cp27-cp27m/bin/pip2.7 install pip --upgrade
13-
RUN /opt/python/cp35-cp35m/bin/pip3.5 install pip --upgrade
1412
RUN /opt/python/cp36-cp36m/bin/pip3.6 install pip --upgrade
1513
RUN /opt/python/cp37-cp37m/bin/pip3.7 install pip --upgrade
1614
RUN /opt/python/cp38-cp38/bin/pip3.8 install pip --upgrade
1715

18-
RUN /opt/python/cp27-cp27m/bin/pip2.7 install -r requirements.txt pytest
19-
RUN /opt/python/cp35-cp35m/bin/pip3.5 install -r requirements.txt pytest
2016
RUN /opt/python/cp36-cp36m/bin/pip3.6 install -r requirements.txt pytest
2117
RUN /opt/python/cp37-cp37m/bin/pip3.7 install -r requirements.txt pytest
2218
RUN /opt/python/cp38-cp38/bin/pip3.8 install -r requirements.txt pytest
2319

24-
RUN /opt/python/cp27-cp27m/bin/python2.7 setup.py develop
25-
RUN /opt/python/cp35-cp35m/bin/python3.5 setup.py develop
2620
RUN /opt/python/cp36-cp36m/bin/python3.6 setup.py develop
2721
RUN /opt/python/cp37-cp37m/bin/python3.7 setup.py develop
2822
RUN /opt/python/cp38-cp38/bin/python3.8 setup.py develop
2923

30-
RUN /opt/python/cp27-cp27m/bin/python2.7 -m pytest -v -x automated_test.py
31-
RUN /opt/python/cp35-cp35m/bin/python3.5 -m pytest -v -x automated_test.py
3224
RUN /opt/python/cp36-cp36m/bin/python3.6 -m pytest -v -x automated_test.py
3325
RUN /opt/python/cp37-cp37m/bin/python3.7 -m pytest -v -x automated_test.py
3426
RUN /opt/python/cp38-cp38/bin/python3.8 -m pytest -v -x automated_test.py
3527

36-
RUN /opt/python/cp27-cp27m/bin/python2.7 setup.py bdist_wheel
37-
RUN /opt/python/cp35-cp35m/bin/python3.5 setup.py bdist_wheel
3828
RUN /opt/python/cp36-cp36m/bin/python3.6 setup.py bdist_wheel
3929
RUN /opt/python/cp37-cp37m/bin/python3.7 setup.py bdist_wheel
4030
RUN /opt/python/cp38-cp38/bin/python3.8 setup.py bdist_wheel

manylinux2010.Dockerfile

-10
Original file line numberDiff line numberDiff line change
@@ -9,32 +9,22 @@ ENV CXX "g++"
99

1010
RUN rm -rf *.so build __pycache__ dist
1111

12-
RUN /opt/python/cp27-cp27m/bin/pip2.7 install pip --upgrade
13-
RUN /opt/python/cp35-cp35m/bin/pip3.5 install pip --upgrade
1412
RUN /opt/python/cp36-cp36m/bin/pip3.6 install pip --upgrade
1513
RUN /opt/python/cp37-cp37m/bin/pip3.7 install pip --upgrade
1614
RUN /opt/python/cp38-cp38/bin/pip3.8 install pip --upgrade
1715

18-
RUN /opt/python/cp27-cp27m/bin/pip2.7 install -r requirements.txt pytest
19-
RUN /opt/python/cp35-cp35m/bin/pip3.5 install -r requirements.txt pytest
2016
RUN /opt/python/cp36-cp36m/bin/pip3.6 install -r requirements.txt pytest
2117
RUN /opt/python/cp37-cp37m/bin/pip3.7 install -r requirements.txt pytest
2218
RUN /opt/python/cp38-cp38/bin/pip3.8 install -r requirements.txt pytest
2319

24-
RUN /opt/python/cp27-cp27m/bin/python2.7 setup.py develop
25-
RUN /opt/python/cp35-cp35m/bin/python3.5 setup.py develop
2620
RUN /opt/python/cp36-cp36m/bin/python3.6 setup.py develop
2721
RUN /opt/python/cp37-cp37m/bin/python3.7 setup.py develop
2822
RUN /opt/python/cp38-cp38/bin/python3.8 setup.py develop
2923

30-
RUN /opt/python/cp27-cp27m/bin/python2.7 -m pytest -v -x automated_test.py
31-
RUN /opt/python/cp35-cp35m/bin/python3.5 -m pytest -v -x automated_test.py
3224
RUN /opt/python/cp36-cp36m/bin/python3.6 -m pytest -v -x automated_test.py
3325
RUN /opt/python/cp37-cp37m/bin/python3.7 -m pytest -v -x automated_test.py
3426
RUN /opt/python/cp38-cp38/bin/python3.8 -m pytest -v -x automated_test.py
3527

36-
RUN /opt/python/cp27-cp27m/bin/python2.7 setup.py bdist_wheel
37-
RUN /opt/python/cp35-cp35m/bin/python3.5 setup.py bdist_wheel
3828
RUN /opt/python/cp36-cp36m/bin/python3.6 setup.py bdist_wheel
3929
RUN /opt/python/cp37-cp37m/bin/python3.7 setup.py bdist_wheel
4030
RUN /opt/python/cp38-cp38/bin/python3.8 setup.py bdist_wheel

manylinux2014.Dockerfile

-5
Original file line numberDiff line numberDiff line change
@@ -9,31 +9,26 @@ ENV CXX "g++"
99

1010
RUN rm -rf *.so build __pycache__ dist
1111

12-
RUN /opt/python/cp35-cp35m/bin/pip3.5 install pip --upgrade
1312
RUN /opt/python/cp36-cp36m/bin/pip3.6 install pip --upgrade
1413
RUN /opt/python/cp37-cp37m/bin/pip3.7 install pip --upgrade
1514
RUN /opt/python/cp38-cp38/bin/pip3.8 install pip --upgrade
1615
RUN /opt/python/cp39-cp39/bin/pip3.9 install pip --upgrade
1716

18-
RUN /opt/python/cp35-cp35m/bin/pip3.5 install -r requirements.txt pytest
1917
RUN /opt/python/cp36-cp36m/bin/pip3.6 install -r requirements.txt pytest
2018
RUN /opt/python/cp37-cp37m/bin/pip3.7 install -r requirements.txt pytest
2119
RUN /opt/python/cp38-cp38/bin/pip3.8 install -r requirements.txt pytest
2220
RUN /opt/python/cp39-cp39/bin/pip3.9 install -r requirements.txt pytest
2321

24-
RUN /opt/python/cp35-cp35m/bin/python3.5 setup.py develop
2522
RUN /opt/python/cp36-cp36m/bin/python3.6 setup.py develop
2623
RUN /opt/python/cp37-cp37m/bin/python3.7 setup.py develop
2724
RUN /opt/python/cp38-cp38/bin/python3.8 setup.py develop
2825
RUN /opt/python/cp39-cp39/bin/python3.9 setup.py develop
2926

30-
RUN /opt/python/cp35-cp35m/bin/python3.5 -m pytest -v -x automated_test.py
3127
RUN /opt/python/cp36-cp36m/bin/python3.6 -m pytest -v -x automated_test.py
3228
RUN /opt/python/cp37-cp37m/bin/python3.7 -m pytest -v -x automated_test.py
3329
RUN /opt/python/cp38-cp38/bin/python3.8 -m pytest -v -x automated_test.py
3430
RUN /opt/python/cp39-cp39/bin/python3.9 -m pytest -v -x automated_test.py
3531

36-
RUN /opt/python/cp35-cp35m/bin/python3.5 setup.py bdist_wheel
3732
RUN /opt/python/cp36-cp36m/bin/python3.6 setup.py bdist_wheel
3833
RUN /opt/python/cp37-cp37m/bin/python3.7 setup.py bdist_wheel
3934
RUN /opt/python/cp38-cp38/bin/python3.8 setup.py bdist_wheel

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ def read(fname):
2121
setuptools.setup(
2222
name="dijkstra3d",
2323
version="1.6.0",
24+
python_requires="~=3.6", # >= 3.6 < 4.0
2425
setup_requires=['numpy'],
2526
extras_require={
2627
':python_version == "2.7"': ['futures'],

test.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
#include "dijkstra3d.hpp"
22
#include <cstdint>
33
#include <cstdio>
4+
#include <vector>
45

56
int main () {
67
const int dim = 256;
78
const int voxels = dim * dim * dim;
8-
uint8_t* labels = new uint8_t[voxels];
9+
uint32_t* labels = new uint32_t[voxels];
910
for (int i = 0; i < voxels; i++) {
1011
labels[i] = 1;
1112
}
1213

13-
uint32_t* x = dijkstra::parental_field3d<uint8_t>(labels, dim, dim, dim, 0);
14+
std::vector<uint32_t> x = dijkstra::dijkstra3d<uint32_t>(labels, dim, dim, dim, 0, voxels - 1);
1415

1516
printf("\n%d\n", x[0]);
1617

tox.ini

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[tox]
2-
envlist = py27,py36,py37,py38,py39
2+
envlist = py36,py37,py38,py39
33

44
[testenv]
55
platform = darwin

0 commit comments

Comments
 (0)