Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion cmake/DaemonFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ else()
if (NOT FLAG_GNUXX23)
message(FATAL_ERROR "GNU++23 is not supported by the compiler")
endif()
else()
elseif(NACL)
# PNaCl only defines isascii if __STRICT_ANSI__ is not defined,
# always prefer GNU dialect.
try_cxx_flag(GNUXX14 "-std=gnu++14")
Expand All @@ -359,6 +359,21 @@ else()
message(FATAL_ERROR "GNU++14 is not supported by the compiler")
endif()
endif()
else()
try_cxx_flag(GNUXX17 "-std=gnu++17")
if (NOT FLAG_GNUXX17)
try_cxx_flag(GNUXX1Z "-std=gnu++1z")
if (NOT FLAG_GNUXX1Z)
message(FATAL_ERROR "GNU++17 is not supported by the compiler")
endif()
endif()
endif()
endif()

if (NOT NACL AND (BUILD_CLIENT OR BUILD_TTY_CLIENT OR BUILD_SERVER OR BUILD_DUMMY_APP))
try_cxx_flag(FOPENMP "-fopenmp")
if (FLAG_FOPENMP)
add_definitions(-DUSE_OPENMP)
endif()
endif()

Expand Down
2 changes: 2 additions & 0 deletions src/engine/framework/System.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,8 @@ __declspec(dllexport) void DummyPreventingLinkerFromBreakingASLR() {}
// This is why ALIGN_STACK_FOR_MINGW is needed (normally gcc would generate alignment code in main()).
ALIGN_STACK_FOR_MINGW int main(int argc, char** argv)
{
Com_ReadOmpMaxThreads();

// Initialize the engine. Any errors here are fatal.
try {
Sys::Init(argc, argv);
Expand Down
36 changes: 36 additions & 0 deletions src/engine/qcommon/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ Maryland 20850 USA.
#include "sys/sys_events.h"
#include <common/FileSystem.h>

#if defined(__GNUC__) && defined(USE_OPENMP)
#include <parallel/algorithm>
static Cvar::Range<Cvar::Cvar<int>> common_ompThreads(
"common.ompThreads", "OpenMP threads", Cvar::NONE, 0, 0, 32 );
#endif

cvar_t *com_speeds;
cvar_t *com_timescale;
cvar_t *com_dropsim; // 0.0 to 1.0, simulated packet drops
Expand Down Expand Up @@ -765,6 +771,34 @@ int Com_ModifyMsec( int msec )
return msec;
}

static int ompMaxThreads = 1;
static int ompThreads = 1;

void Com_ReadOmpMaxThreads()
{
#if defined(__GNUC__) && defined(USE_OPENMP)
ompMaxThreads = omp_get_max_threads();
#endif
}

void Com_ReadOmpThreads()
{
#if defined(__GNUC__) && defined(USE_OPENMP)
ompThreads = common_ompThreads.Get()
? common_ompThreads.Get()
: ompMaxThreads <= 2
? ompMaxThreads
: std::max( 2, ompMaxThreads / 2 );
#endif
}

void Com_ApplyOmpThreads()
{
#if defined(__GNUC__) && defined(USE_OPENMP)
omp_set_num_threads( ompThreads );
#endif
}

/*
=================
Com_Frame
Expand Down Expand Up @@ -804,6 +838,8 @@ void Com_Frame()
timeBeforeClient = 0;
timeAfter = 0;

Com_ReadOmpThreads();

// Check to make sure we don't have any http data waiting
// comment this out until I get things going better under win32
// old net chan encryption key
Expand Down
4 changes: 4 additions & 0 deletions src/engine/qcommon/qcommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,10 @@ void Hunk_FreeTempMemory( void *buf );
void Com_Init();
void Com_Frame();

void Com_ReadOmpMaxThreads();
void Com_ReadOmpThreads();
void Com_ApplyOmpThreads();

/*
==============================================================

Expand Down
56 changes: 39 additions & 17 deletions src/engine/renderer/tr_surface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#include "gl_shader.h"
#include "Material.h"

#if defined(__GNUC__) && defined(USE_OPENMP)
#include <parallel/algorithm>
#endif

/*
==============================================================================
THIS ENTIRE FILE IS BACK END!
Expand Down Expand Up @@ -1286,48 +1290,66 @@ void Tess_SurfaceIQM( srfIQModel_t *surf ) {
byte *modelBlendIndex = model->blendIndexes + 4 * firstVertex;
byte *modelBlendWeight = model->blendWeights + 4 * firstVertex;

for ( ; tessVertex < lastVertex; tessVertex++,
modelPosition += 3, modelNormal += 3,
modelTangent += 3, modelBitangent += 3,
modelTexcoord += 2 )
std::vector<size_t> indexes;
indexes.resize( surf->num_vertexes );
for ( size_t i = 0; i < indexes.size(); i++ ) { indexes[ i ] = i; }

auto process = [&]( const size_t& i ) -> void
{
shaderVertex_t *tessVertex_ = tessVertex + i;
float *modelPosition_ = modelPosition + 3 * i;
float *modelNormal_ = modelNormal + 3 * i;
float *modelTangent_ = modelTangent + 3 * i;
float *modelBitangent_ = modelBitangent + 3 * i;
float *modelTexcoord_ = modelTexcoord + 2 * i;
byte *modelBlendIndex_ = modelBlendIndex + 4 * i;
byte *modelBlendWeight_ = modelBlendWeight + 4 * i;

vec3_t position = {}, tangent = {}, binormal = {}, normal = {};

byte *lastBlendIndex = modelBlendIndex + 4;
byte *lastBlendIndex = modelBlendIndex_ + 4;

for ( ; modelBlendIndex < lastBlendIndex; modelBlendIndex++,
modelBlendWeight++ )
for ( ; modelBlendIndex_ < lastBlendIndex; modelBlendIndex_++,
modelBlendWeight_++ )
{
if ( *modelBlendWeight == 0 )
if ( *modelBlendWeight_ == 0 )
{
continue;
}

float weight = *modelBlendWeight * weightFactor;
float weight = *modelBlendWeight_ * weightFactor;
vec3_t tmp;

TransformPoint( &bones[ *modelBlendIndex ], modelPosition, tmp );
TransformPoint( &bones[ *modelBlendIndex_ ], modelPosition_, tmp );
VectorMA( position, weight, tmp, position );

TransformNormalVector( &bones[ *modelBlendIndex ], modelNormal, tmp );
TransformNormalVector( &bones[ *modelBlendIndex_ ], modelNormal_, tmp );
VectorMA( normal, weight, tmp, normal );

TransformNormalVector( &bones[ *modelBlendIndex ], modelTangent, tmp );
TransformNormalVector( &bones[ *modelBlendIndex_ ], modelTangent_, tmp );
VectorMA( tangent, weight, tmp, tangent );

TransformNormalVector( &bones[ *modelBlendIndex ], modelBitangent, tmp );
TransformNormalVector( &bones[ *modelBlendIndex_ ], modelBitangent_, tmp );
VectorMA( binormal, weight, tmp, binormal );
}

VectorNormalizeFast( normal );
VectorNormalizeFast( tangent );
VectorNormalizeFast( binormal );
VectorCopy( position, tessVertex->xyz );
VectorCopy( position, tessVertex_->xyz );

R_TBNtoQtangentsFast( tangent, binormal, normal, tessVertex->qtangents );
R_TBNtoQtangentsFast( tangent, binormal, normal, tessVertex_->qtangents );

Vector2Copy( modelTexcoord, tessVertex->texCoords );
}
Vector2Copy( modelTexcoord_, tessVertex_->texCoords );
};

#if defined(__GNUC__) && defined(USE_OPENMP)
Com_ApplyOmpThreads();

__gnu_parallel::for_each( indexes.cbegin(), indexes.cend(), process );
#else
std::for_each( indexes.cbegin(), indexes.cend(), process );
#endif
}
}
else
Expand Down
Loading