From c19d35f7ffefdf91a2833f2689866398892ed8f9 Mon Sep 17 00:00:00 2001 From: Marek Wydmuch Date: Tue, 3 Jan 2023 02:27:54 +0100 Subject: [PATCH 1/9] Remove most of the assembly code --- src/vizdoom/src/CMakeLists.txt | 209 +- src/vizdoom/src/asm_ia32/a.asm | 812 -------- src/vizdoom/src/asm_ia32/misc.asm | 200 -- src/vizdoom/src/asm_ia32/tmap.asm | 1893 ------------------- src/vizdoom/src/asm_ia32/tmap2.asm | 640 ------- src/vizdoom/src/asm_ia32/tmap3.asm | 344 ---- src/vizdoom/src/asm_x86_64/tmap3.asm | 150 -- src/vizdoom/src/asm_x86_64/tmap3.s | 141 -- src/vizdoom/src/d_main.cpp | 2 + src/vizdoom/src/doomtype.h | 52 - src/vizdoom/src/mscinlines.h | 351 ---- src/vizdoom/src/nodebuild.cpp | 92 - src/vizdoom/src/nodebuild.h | 31 - src/vizdoom/src/nodebuild_classify_sse2.cpp | 144 -- src/vizdoom/src/posix/sdl/i_main.cpp | 48 - src/vizdoom/src/r_draw.cpp | 123 +- src/vizdoom/src/r_draw.h | 35 - src/vizdoom/src/r_drawt.cpp | 24 - src/vizdoom/src/r_drawt_copy.cpp | 1372 -------------- src/vizdoom/src/r_main.cpp | 3 - src/vizdoom/src/r_plane.cpp | 25 +- src/vizdoom/src/v_palette.cpp | 24 - src/vizdoom/src/v_video.h | 4 - src/vizdoom/src/x86.cpp | 66 +- 24 files changed, 12 insertions(+), 6773 deletions(-) delete mode 100644 src/vizdoom/src/asm_ia32/a.asm delete mode 100644 src/vizdoom/src/asm_ia32/misc.asm delete mode 100644 src/vizdoom/src/asm_ia32/tmap.asm delete mode 100644 src/vizdoom/src/asm_ia32/tmap2.asm delete mode 100644 src/vizdoom/src/asm_ia32/tmap3.asm delete mode 100644 src/vizdoom/src/asm_x86_64/tmap3.asm delete mode 100644 src/vizdoom/src/asm_x86_64/tmap3.s delete mode 100644 src/vizdoom/src/mscinlines.h delete mode 100644 src/vizdoom/src/nodebuild_classify_sse2.cpp delete mode 100644 src/vizdoom/src/r_drawt_copy.cpp diff --git a/src/vizdoom/src/CMakeLists.txt b/src/vizdoom/src/CMakeLists.txt index 5d8543634..1072a980b 100644 --- a/src/vizdoom/src/CMakeLists.txt +++ b/src/vizdoom/src/CMakeLists.txt @@ -12,21 +12,6 @@ include( CheckCXXCompilerFlag ) include( CheckLibraryExists ) include( FindPkgConfig ) -if( NOT APPLE ) - option( NO_ASM "Disable assembly code" OFF ) -else( NOT APPLE ) - # At the moment asm code doesn't work with OS X, so disable by default - option( NO_ASM "Disable assembly code" ON ) -endif( NOT APPLE ) -if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) - option( NO_STRIP "Do not strip Release or MinSizeRel builds" ) - # At least some versions of Xcode fail if you strip with the linker - # instead of the separate strip utility. - if( APPLE ) - set( NO_STRIP ON ) - endif( APPLE ) -endif( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) - option( DYN_FLUIDSYNTH "Dynamically load fluidsynth" ON ) if( CMAKE_SIZEOF_VOID_P MATCHES "8" ) @@ -100,7 +85,6 @@ if( WIN32 ) ) set( FMOD_INC_PATH_SUFFIXES PATH_SUFFIXES inc ) set( FMOD_LIB_PATH_SUFFIXES PATH_SUFFIXES lib ) - set( NASM_NAMES nasmw nasm ) message( "Could not find xinput.h. XInput will be disabled." ) @@ -189,8 +173,7 @@ else( WIN32 ) endif( GTK2_FOUND ) endif( NOT NO_GTK ) endif( APPLE ) - set( NASM_NAMES nasm ) - + if( NO_GTK ) add_definitions( -DNO_GTK=1 ) endif( NO_GTK ) @@ -298,143 +281,6 @@ endif( NO_OPENAL ) find_package( FluidSynth ) -# Search for NASM - -if( NOT NO_ASM ) - if( UNIX AND X64 ) - find_program( GAS_PATH as ) - - if( GAS_PATH ) - set( ASSEMBLER ${GAS_PATH} ) - else( GAS_PATH ) - message( STATUS "Could not find as. Disabling assembly code." ) - set( NO_ASM ON ) - endif( GAS_PATH ) - else( UNIX AND X64 ) - find_program( NASM_PATH NAMES ${NASM_NAMES} ) - find_program( YASM_PATH yasm ) - - if( X64 ) - if( YASM_PATH ) - set( ASSEMBLER ${YASM_PATH} ) - else( YASM_PATH ) - message( STATUS "Could not find YASM. Disabling assembly code." ) - set( NO_ASM ON ) - endif( YASM_PATH ) - else( X64 ) - if( NASM_PATH ) - set( ASSEMBLER ${NASM_PATH} ) - else( NASM_PATH ) - message( STATUS "Could not find NASM. Disabling assembly code." ) - set( NO_ASM ON ) - endif( NASM_PATH ) - endif( X64 ) - endif( UNIX AND X64 ) - - # I think the only reason there was a version requirement was because the - # executable name for Windows changed from 0.x to 2.0, right? This is - # how to do it in case I need to do something similar later. - - # execute_process( COMMAND ${NASM_PATH} -v - # OUTPUT_VARIABLE NASM_VER_STRING ) - # string( REGEX REPLACE ".*version ([0-9]+[.][0-9]+).*" "\\1" NASM_VER "${NASM_VER_STRING}" ) - # if( NOT NASM_VER LESS 2 ) - # message( SEND_ERROR "NASM version should be 2 or later. (Installed version is ${NASM_VER}.)" ) - # endif( NOT NASM_VER LESS 2 ) -endif( NOT NO_ASM ) - -if( NOT NO_ASM ) - # Valgrind support is meaningless without assembly code. - if( VALGRIND ) - add_definitions( -DVALGRIND_AWARE=1 ) - # If you're Valgrinding, you probably want to keep symbols around. - set( NO_STRIP ON ) - endif( VALGRIND ) - - # Tell CMake how to assemble our files - if( UNIX ) - set( ASM_OUTPUT_EXTENSION .o ) - if( X64 ) - set( ASM_FLAGS ) - set( ASM_SOURCE_EXTENSION .s ) - else( X64 ) - if( APPLE ) - set( ASM_FLAGS -fmacho -DM_TARGET_MACHO ) - else( APPLE ) - set( ASM_FLAGS -felf -DM_TARGET_LINUX ) - endif( APPLE ) - set( ASM_FLAGS "${ASM_FLAGS}" -i${CMAKE_CURRENT_SOURCE_DIR}/ ) - set( ASM_SOURCE_EXTENSION .asm ) - endif( X64 ) - else( UNIX ) - set( ASM_OUTPUT_EXTENSION .obj ) - set( ASM_SOURCE_EXTENSION .asm ) - if( X64 ) - set( ASM_FLAGS -f win64 -DWIN32 -DWIN64 ) - else( X64 ) - set( ASM_FLAGS -f win32 -DWIN32 -i${CMAKE_CURRENT_SOURCE_DIR}/ ) - endif( X64 ) - endif( UNIX ) - if( WIN32 AND NOT X64 ) - set( FIXRTEXT fixrtext ) - else( WIN32 AND NOT X64 ) - set( FIXRTEXT "" ) - endif( WIN32 AND NOT X64 ) - message( STATUS "Selected assembler: ${ASSEMBLER}" ) - MACRO( ADD_ASM_FILE indir infile ) - set( ASM_OUTPUT_${infile} "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}/${infile}${ASM_OUTPUT_EXTENSION}" ) - if( WIN32 AND NOT X64 ) - set( FIXRTEXT_${infile} COMMAND ${FIXRTEXT} "${ASM_OUTPUT_${infile}}" ) - else( WIN32 AND NOT X64 ) - set( FIXRTEXT_${infile} COMMAND "" ) - endif( WIN32 AND NOT X64 ) - add_custom_command( OUTPUT ${ASM_OUTPUT_${infile}} - COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir} - COMMAND ${ASSEMBLER} ${ASM_FLAGS} -o"${ASM_OUTPUT_${infile}}" "${CMAKE_CURRENT_SOURCE_DIR}/${indir}/${infile}${ASM_SOURCE_EXTENSION}" - ${FIXRTEXT_${infile}} - DEPENDS ${indir}/${infile}.asm ${FIXRTEXT} ) - set( ASM_SOURCES ${ASM_SOURCES} "${ASM_OUTPUT_${infile}}" ) - ENDMACRO( ADD_ASM_FILE ) -endif( NOT NO_ASM ) - -# Decide on SSE setup - -set( SSE_MATTERS NO ) - -# with global use of SSE 2 we do not need special handling for selected files -if (NOT ZDOOM_USE_SSE2) - # SSE only matters on 32-bit targets. We check compiler flags to know if we can do it. - if( CMAKE_SIZEOF_VOID_P MATCHES "4" AND NOT CMAKE_OSX_ARCHITECTURES MATCHES ppc ) - CHECK_CXX_COMPILER_FLAG( "-msse2 -mfpmath=sse" CAN_DO_MFPMATH ) - CHECK_CXX_COMPILER_FLAG( -arch:SSE2 CAN_DO_ARCHSSE2 ) - if( CAN_DO_MFPMATH ) - set( SSE1_ENABLE "-msse -mfpmath=sse" ) - set( SSE2_ENABLE "-msse2 -mfpmath=sse" ) - set( SSE_MATTERS YES ) - elseif( CAN_DO_ARCHSSE2 ) - set( SSE1_ENABLE -arch:SSE ) - set( SSE2_ENABLE -arch:SSE2 ) - set( SSE_MATTERS YES ) - endif( CAN_DO_MFPMATH ) - endif( CMAKE_SIZEOF_VOID_P MATCHES "4" AND NOT CMAKE_OSX_ARCHITECTURES MATCHES ppc ) -endif (NOT ZDOOM_USE_SSE2) - -if( SSE_MATTERS ) - if( WIN32 ) - set( BACKPATCH 1 CACHE BOOL "Enable backpatching." ) - else( WIN32 ) - CHECK_FUNCTION_EXISTS(mprotect HAVE_MPROTECT) - if( HAVE_MPROTECT ) - set( BACKPATCH 1 CACHE BOOL "Enable backpatching." ) - else( HAVE_MPROTECT ) - set( BACKPATCH 0 ) - endif( HAVE_MPROTECT ) - endif( WIN32 ) - set( SSE 1 CACHE BOOL "Build SSE and SSE2 versions of key code." ) -else( SSE_MATTERS ) - set( BACKPATCH 0 ) -endif( SSE_MATTERS ) - # Set up flags for GCC if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) @@ -643,24 +489,6 @@ else( WIN32 ) set( OTHER_SYSTEM_SOURCES ${PLAT_WIN32_SOURCES} ${PLAT_OSX_SOURCES} ${PLAT_COCOA_SOURCES} ) endif( WIN32 ) -if( NOT ASM_SOURCES ) - set( ASM_SOURCES "" ) -endif( NOT ASM_SOURCES ) - -if( NO_ASM ) - add_definitions( -DNOASM ) -else( NO_ASM ) - if( X64 ) - ADD_ASM_FILE( asm_x86_64 tmap3 ) - else( X64 ) - ADD_ASM_FILE( asm_ia32 a ) - ADD_ASM_FILE( asm_ia32 misc ) - ADD_ASM_FILE( asm_ia32 tmap ) - ADD_ASM_FILE( asm_ia32 tmap2 ) - ADD_ASM_FILE( asm_ia32 tmap3 ) - endif( X64 ) -endif( NO_ASM ) - add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.c ${CMAKE_CURRENT_BINARY_DIR}/xlat_parser.h COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/xlat/xlat_parser.y . COMMAND lemon xlat_parser.y @@ -673,18 +501,6 @@ add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/sc_man_scanner.h include_directories( ${CMAKE_CURRENT_BINARY_DIR} ) -if( SSE_MATTERS ) - if( SSE ) - set( X86_SOURCES nodebuild_classify_sse2.cpp ) - set_source_files_properties( nodebuild_classify_sse2.cpp PROPERTIES COMPILE_FLAGS "${SSE2_ENABLE}" ) - else( SSE ) - add_definitions( -DDISABLE_SSE ) - endif( SSE ) -else( SSE_MATTERS ) - add_definitions( -DDISABLE_SSE ) - set( X86_SOURCES ) -endif( SSE_MATTERS ) - if( SNDFILE_FOUND ) add_definitions( -DHAVE_SNDFILE ) endif( SNDFILE_FOUND ) @@ -701,6 +517,8 @@ endif( DYN_FLUIDSYNTH ) # there's generally a new cpp for every header so this file will get changed if( WIN32 ) set( EXTRA_HEADER_DIRS win32/*.h ) +elseif( APPLE ) + set( EXTRA_HEADER_DIRS posix/*.h posix/sdl/*.h ) else( WIN32 ) set( EXTRA_HEADER_DIRS posix/*.h posix/sdl/*.h ) endif( WIN32 ) @@ -812,23 +630,12 @@ set( NOT_COMPILED_SOURCE_FILES xlat/xlat_parser.y xlat_parser.c xlat_parser.h - - # We could have the ASM macro add these files, but it wouldn't add all - # platforms. - asm_ia32/a.asm - asm_ia32/misc.asm - asm_ia32/tmap.asm - asm_ia32/tmap2.asm - asm_ia32/tmap3.asm - asm_x86_64/tmap3.asm - asm_x86_64/tmap3.s ) add_executable( vizdoom WIN32 MACOSX_BUNDLE ${HEADER_FILES} ${NOT_COMPILED_SOURCE_FILES} __autostart.cpp - ${ASM_SOURCES} ${SYSTEM_SOURCES} ${X86_SOURCES} x86.cpp @@ -1153,7 +960,7 @@ add_executable( vizdoom WIN32 MACOSX_BUNDLE viz_labels.cpp viz_main.cpp viz_message_queue.cpp - viz_buffers.cpp + viz_buffers.cpp viz_shared_memory.cpp viz_system.cpp) @@ -1233,12 +1040,6 @@ if( CMAKE_COMPILER_IS_GNUCXX ) # GCC misoptimizes this file set_source_files_properties( oplsynth/fmopl.cpp PROPERTIES COMPILE_FLAGS "-fno-tree-dominator-opts -fno-tree-fre" ) endif( CMAKE_COMPILER_IS_GNUCXX ) -if( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) - # Need to enable intrinsics for this file. - if( SSE_MATTERS ) - set_source_files_properties( x86.cpp PROPERTIES COMPILE_FLAGS "-msse2 -mmmx" ) - endif( SSE_MATTERS ) -endif( ZD_CMAKE_COMPILER_IS_GNUCXX_COMPATIBLE ) if( APPLE ) set_target_properties(vizdoom PROPERTIES @@ -1261,8 +1062,6 @@ if( APPLE ) endif( NOT NO_FMOD ) endif( APPLE ) -source_group("Assembly Files\\ia32" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/asm_ia32/.+") -source_group("Assembly Files\\x86_64" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/asm_x86_64/.+") source_group("Audio Files" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/sound/.+") source_group("Audio Files\\OPL Synth" REGULAR_EXPRESSION "^${CMAKE_CURRENT_SOURCE_DIR}/oplsynth/.+") source_group("Audio Files\\OPL Synth\\DOSBox" FILES oplsynth/dosbox/opl.cpp oplsynth/dosbox/opl.h) diff --git a/src/vizdoom/src/asm_ia32/a.asm b/src/vizdoom/src/asm_ia32/a.asm deleted file mode 100644 index b4bc529f6..000000000 --- a/src/vizdoom/src/asm_ia32/a.asm +++ /dev/null @@ -1,812 +0,0 @@ -; "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -; Ken Silverman's official web site: "http://www.advsys.net/ken" -; See the included license file "BUILDLIC.TXT" for license info. -; This file has been modified from Ken Silverman's original release - -%include "valgrind.inc" - - SECTION .data - -%ifndef M_TARGET_LINUX -%define ylookup _ylookup -%define vince _vince -%define vplce _vplce -%define palookupoffse _palookupoffse -%define bufplce _bufplce -%define dc_iscale _dc_iscale -%define dc_colormap _dc_colormap -%define dc_count _dc_count -%define dc_dest _dc_dest -%define dc_source _dc_source -%define dc_texturefrac _dc_texturefrac - -%define setupvlineasm _setupvlineasm -%define prevlineasm1 _prevlineasm1 -%define vlineasm1 _vlineasm1 -%define vlineasm4 _vlineasm4 - -%define setupmvlineasm _setupmvlineasm -%define mvlineasm1 _mvlineasm1 -%define mvlineasm4 _mvlineasm4 - -%define R_SetupDrawSlabA _R_SetupDrawSlabA -%define R_DrawSlabA _R_DrawSlabA -%endif - -EXTERN ylookup ; near - -EXTERN vplce ; near -EXTERN vince ; near -EXTERN palookupoffse ; near -EXTERN bufplce ; near - -EXTERN dc_iscale -EXTERN dc_colormap -EXTERN dc_count -EXTERN dc_dest -EXTERN dc_source -EXTERN dc_texturefrac - - SECTION .text - -ALIGN 16 -GLOBAL setvlinebpl_ -setvlinebpl_: - mov [fixchain1a+2], eax - mov [fixchain1b+2], eax - mov [fixchain2a+2], eax - mov [fixchain1m+2], eax - mov [fixchain2ma+2], eax - mov [fixchain2mb+2], eax - selfmod fixchain1a, fixchain2mb+6 - -setdrawslabbpl: - mov dword [voxbpl1+2], eax - mov dword [voxbpl2+2], eax - mov dword [voxbpl3+2], eax - mov dword [voxbpl4+2], eax - mov dword [voxbpl5+2], eax - mov dword [voxbpl6+2], eax - mov dword [voxbpl7+2], eax - mov dword [voxbpl8+2], eax - selfmod voxbpl1, voxpl8+6 - ret - - SECTION .data - -lastslabcolormap: - dd 4 - - SECTION .text - -GLOBAL R_SetupDrawSlabA -GLOBAL @R_SetupDrawSlabA@4 -R_SetupDrawSlabA: - mov ecx, [esp+4] -@R_SetupDrawSlabA@4: - cmp [lastslabcolormap], ecx - je .done - mov [lastslabcolormap], ecx - mov dword [voxpal1+2], ecx - mov dword [voxpal2+2], ecx - mov dword [voxpal3+2], ecx - mov dword [voxpal4+2], ecx - mov dword [voxpal5+2], ecx - mov dword [voxpal6+2], ecx - mov dword [voxpal7+2], ecx - mov dword [voxpal8+2], ecx -.done ret - - -; pass it log2(texheight) - -ALIGN 16 -GLOBAL setupvlineasm -setupvlineasm: - mov ecx, [esp+4] - - ;First 2 lines for VLINEASM1, rest for VLINEASM4 - mov byte [premach3a+2], cl - mov byte [mach3a+2], cl - - mov byte [machvsh1+2], cl ;32-shy - mov byte [machvsh3+2], cl ;32-shy - mov byte [machvsh5+2], cl ;32-shy - mov byte [machvsh6+2], cl ;32-shy - mov ch, cl - sub ch, 16 - mov byte [machvsh8+2], ch ;16-shy - neg cl - mov byte [machvsh7+2], cl ;shy - mov byte [machvsh9+2], cl ;shy - mov byte [machvsh10+2], cl ;shy - mov byte [machvsh11+2], cl ;shy - mov byte [machvsh12+2], cl ;shy - mov eax, 1 - shl eax, cl - dec eax - mov dword [machvsh2+2], eax ;(1<>sh) -;vplc3 = (ebp<<(32-sh))+((edx&65535)<<(16-sh)) -machvsh5: shl esi, 88h ;32-sh - mov eax, edx -machvsh6: shl ebp, 88h ;32-sh - and edx, 0000ffffh -machvsh7: shr eax, 88h ;sh - add esi, eax -machvsh8: shl edx, 88h ;16-sh - add ebp, edx - mov dword [vplce+12], esi - mov dword [vplce+4], ebp - - pop edi - pop esi - pop ebx - pop ebp - ret - -;************************************************************************* -;************************* Masked Vertical Lines ************************* -;************************************************************************* - -; pass it log2(texheight) - -ALIGN 16 -GLOBAL setupmvlineasm -setupmvlineasm: - mov ecx, dword [esp+4] - mov byte [maskmach3a+2], cl - mov byte [machmv13+2], cl - - mov byte [machmv14+2], cl - mov byte [machmv15+2], cl - mov byte [machmv16+2], cl - selfmod maskmach3a, machmv13+6 - ret - -ALIGN 16 -GLOBAL mvlineasm1 ;Masked vline -mvlineasm1: - push ebx - push edi - push esi - push ebp - mov ecx, [dc_count] - mov ebp, [dc_colormap] - mov edi, [dc_dest] - mov eax, [dc_iscale] - mov edx, [dc_texturefrac] - mov esi, [dc_source] -beginmvline: - mov ebx, edx -maskmach3a: shr ebx, 32 - movzx ebx, byte [esi+ebx] - cmp ebx, 0 - je short skipmask1 -maskmach3c: mov bl, byte [ebp+ebx] - mov [edi], bl -skipmask1: add edx, eax -fixchain1m: add edi, 320 - dec ecx - jnz short beginmvline - - pop ebp - pop esi - pop edi - pop ebx - mov eax, edx - ret - -ALIGN 16 -GLOBAL mvlineasm4 -mvlineasm4: - push ebx - push esi - push edi - push ebp - - mov ecx,[dc_count] - mov edi,[dc_dest] - - mov eax, [bufplce+0] - mov ebx, [bufplce+4] - mov [machmv1+3], eax - mov [machmv4+3], ebx - mov eax, [bufplce+8] - mov ebx, [bufplce+12] - mov [machmv7+3], eax - mov [machmv10+3], ebx - - mov eax, [palookupoffse] - mov ebx, [palookupoffse+4] - mov [machmv2+2], eax - mov [machmv5+2], ebx - mov eax, [palookupoffse+8] - mov ebx, [palookupoffse+12] - mov [machmv8+2], eax - mov [machmv11+2], ebx - - mov eax, [vince] ;vince - mov ebx, [vince+4] - xor bl, bl - mov [machmv3+2], eax - mov [machmv6+2], ebx - mov eax, [vince+8] - mov ebx, [vince+12] - mov [machmv9+2], eax - mov [machmv12+2], ebx - - inc ecx - push ecx - mov ecx, [vplce+0] - mov edx, [vplce+4] - mov esi, [vplce+8] - mov ebp, [vplce+12] -fixchain2ma: sub edi, 320 - - selfmod beginmvlineasm4, machmv2+6 - jmp short beginmvlineasm4 -ALIGN 16 -beginmvlineasm4: - dec dword [esp] - jz near endmvlineasm4 - - mov eax, ebp - mov ebx, esi -machmv16: shr eax, 32 -machmv12: add ebp, 0x88888888 ;vince[3] -machmv15: shr ebx, 32 -machmv9: add esi, 0x88888888 ;vince[2] -machmv10: movzx eax, byte [eax+0x88888888];bufplce[3] -machmv7: movzx ebx, byte [ebx+0x88888888];bufplce[2] - cmp eax, 1 - adc dl, dl - cmp ebx, 1 - adc dl, dl -machmv8: mov bl, [ebx+0x88888888] ;palookupoffs[2] -machmv11: mov bh, [eax+0x88888888] ;palookupoffs[3] - - mov eax, edx -machmv6: add edx, 0x88888888 ;vince[1] -machmv14: shr eax, 32 - shl ebx, 16 -machmv4: movzx eax, byte [eax+0x88888888];bufplce[1] - cmp eax, 1 - adc dl, dl -machmv5: mov bh, [eax+0x88888888] ;palookupoffs[1] - - mov eax, ecx -machmv3: add ecx, 0x88888888 ;vince[0] -machmv13: shr eax, 32 -machmv1: movzx eax, byte [eax+0x88888888];bufplce[0] - cmp eax, 1 - adc dl, dl -machmv2: mov bl, [eax+0x88888888] ;palookupoffs[0] - - xor eax, eax - shl dl, 4 -fixchain2mb: add edi, 320 - mov al, dl - add eax, mvcase15 - jmp eax ;16 byte cases - -ALIGN 16 -endmvlineasm4: - mov [vplce], ecx - mov [vplce+4], edx - mov [vplce+8], esi - mov [vplce+12], ebp - pop ecx - pop ebp - pop edi - pop esi - pop ebx - ret - - ;5,7,8,8,11,13,12,14,11,13,14,14,12,14,15,7 -ALIGN 16 -mvcase15: mov [edi], ebx - jmp beginmvlineasm4 -ALIGN 16 -mvcase14: mov [edi+1], bh - shr ebx, 16 - mov [edi+2], bx - jmp beginmvlineasm4 -ALIGN 16 -mvcase13: mov [edi], bl - shr ebx, 16 - mov [edi+2], bx - jmp beginmvlineasm4 -ALIGN 16 -mvcase12: shr ebx, 16 - mov [edi+2], bx - jmp beginmvlineasm4 -ALIGN 16 -mvcase11: mov [edi], bx - shr ebx, 16 - mov [edi+3], bh - jmp beginmvlineasm4 -ALIGN 16 -mvcase10: mov [edi+1], bh - shr ebx, 16 - mov [edi+3], bh - jmp beginmvlineasm4 -ALIGN 16 -mvcase9: mov [edi], bl - shr ebx, 16 - mov [edi+3], bh - jmp beginmvlineasm4 -ALIGN 16 -mvcase8: shr ebx, 16 - mov [edi+3], bh - jmp beginmvlineasm4 -ALIGN 16 -mvcase7: mov [edi], bx - shr ebx, 16 - mov [edi+2], bl - jmp beginmvlineasm4 -ALIGN 16 -mvcase6: shr ebx, 8 - mov [edi+1], bx - jmp beginmvlineasm4 -ALIGN 16 -mvcase5: mov [edi], bl - shr ebx, 16 - mov [edi+2], bl - jmp beginmvlineasm4 -ALIGN 16 -mvcase4: shr ebx, 16 - mov [edi+2], bl - jmp beginmvlineasm4 -ALIGN 16 -mvcase3: mov [edi], bx - jmp beginmvlineasm4 -ALIGN 16 -mvcase2: mov [edi+1], bh - jmp beginmvlineasm4 -ALIGN 16 -mvcase1: mov [edi], bl - jmp beginmvlineasm4 -ALIGN 16 -mvcase0: jmp beginmvlineasm4 - -align 16 - - -;************************************************************************* -;***************************** Voxel Slabs ******************************* -;************************************************************************* - -GLOBAL R_DrawSlabA -R_DrawSlabA: - push ebx - push ebp - push esi - push edi - - mov eax, [esp+5*4+0] - mov ebx, [esp+5*4+4] - mov ecx, [esp+5*4+8] - mov edx, [esp+5*4+12] - mov esi, [esp+5*4+16] - mov edi, [esp+5*4+20] - - cmp eax, 2 - je voxbegdraw2 - ja voxskip2 - xor eax, eax -voxbegdraw1: - mov ebp, ebx - shr ebp, 16 - add ebx, edx - dec ecx - mov al, byte [esi+ebp] -voxpal1: mov al, byte [eax+88888888h] - mov byte [edi], al -voxbpl1: lea edi, [edi+88888888h] - jnz voxbegdraw1 - jmp voxskipslab5 - -voxbegdraw2: - mov ebp, ebx - shr ebp, 16 - add ebx, edx - xor eax, eax - dec ecx - mov al, byte [esi+ebp] -voxpal2: mov al, byte [eax+88888888h] - mov ah, al - mov word [edi], ax -voxbpl2: lea edi, [edi+88888888h] - jnz voxbegdraw2 - jmp voxskipslab5 - -voxskip2: - cmp eax, 4 - jne voxskip4 - xor eax, eax -voxbegdraw4: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal3: mov al, byte [eax+88888888h] - mov ah, al - shl eax, 8 - mov al, ah - shl eax, 8 - mov al, ah - mov dword [edi], eax -voxbpl3: add edi, 88888888h - dec ecx - jnz voxbegdraw4 - jmp voxskipslab5 - -voxskip4: - add eax, edi - - test edi, 1 - jz voxskipslab1 - cmp edi, eax - je voxskipslab1 - - push eax - push ebx - push ecx - push edi -voxbegslab1: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal4: mov al, byte [eax+88888888h] - mov byte [edi], al -voxbpl4: add edi, 88888888h - dec ecx - jnz voxbegslab1 - pop edi - pop ecx - pop ebx - pop eax - inc edi - -voxskipslab1: - push eax - test edi, 2 - jz voxskipslab2 - dec eax - cmp edi, eax - jge voxskipslab2 - - push ebx - push ecx - push edi -voxbegslab2: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal5: mov al, byte [eax+88888888h] - mov ah, al - mov word [edi], ax -voxbpl5: add edi, 88888888h - dec ecx - jnz voxbegslab2 - pop edi - pop ecx - pop ebx - add edi, 2 - -voxskipslab2: - mov eax, [esp] - - sub eax, 3 - cmp edi, eax - jge voxskipslab3 - -voxprebegslab3: - push ebx - push ecx - push edi -voxbegslab3: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal6: mov al, byte [eax+88888888h] - mov ah, al - shl eax, 8 - mov al, ah - shl eax, 8 - mov al, ah - mov dword [edi], eax -voxbpl6: add edi, 88888888h - dec ecx - jnz voxbegslab3 - pop edi - pop ecx - pop ebx - add edi, 4 - - mov eax, [esp] - - sub eax, 3 - cmp edi, eax - jl voxprebegslab3 - -voxskipslab3: - mov eax, [esp] - - dec eax - cmp edi, eax - jge voxskipslab4 - - push ebx - push ecx - push edi -voxbegslab4: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal7: mov al, byte [eax+88888888h] - mov ah, al - mov word [edi], ax -voxbpl7: add edi, 88888888h - dec ecx - jnz voxbegslab4 - pop edi - pop ecx - pop ebx - add edi, 2 - -voxskipslab4: - pop eax - - cmp edi, eax - je voxskipslab5 - -voxbegslab5: - mov ebp, ebx - add ebx, edx - shr ebp, 16 - xor eax, eax - mov al, byte [esi+ebp] -voxpal8: mov al, byte [eax+88888888h] - mov byte [edi], al -voxbpl8: add edi, 88888888h - dec ecx - jnz voxbegslab5 - -voxskipslab5: - pop edi - pop esi - pop ebp - pop ebx - ret - -align 16 - -%ifdef M_TARGET_MACHO -GLOBAL _rtext_a_end -_rtext_a_end: -%endif diff --git a/src/vizdoom/src/asm_ia32/misc.asm b/src/vizdoom/src/asm_ia32/misc.asm deleted file mode 100644 index 69e723844..000000000 --- a/src/vizdoom/src/asm_ia32/misc.asm +++ /dev/null @@ -1,200 +0,0 @@ -;* -;* misc.nas -;* Miscellaneous assembly functions -;* -;*--------------------------------------------------------------------------- -;* Copyright 1998-2006 Randy Heit -;* All rights reserved. -;* -;* Redistribution and use in source and binary forms, with or without -;* modification, are permitted provided that the following conditions -;* are met: -;* -;* 1. Redistributions of source code must retain the above copyright -;* notice, this list of conditions and the following disclaimer. -;* 2. Redistributions in binary form must reproduce the above copyright -;* notice, this list of conditions and the following disclaimer in the -;* documentation and/or other materials provided with the distribution. -;* 3. The name of the author may not be used to endorse or promote products -;* derived from this software without specific prior written permission. -;* -;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -;*--------------------------------------------------------------------------- -;* - -BITS 32 - -%ifndef M_TARGET_LINUX - -%define DoBlending_MMX _DoBlending_MMX -%define BestColor_MMX _BestColor_MMX - -%endif - -%ifdef M_TARGET_WATCOM - SEGMENT DATA PUBLIC ALIGN=16 CLASS=DATA USE32 - SEGMENT DATA -%else - SECTION .data -%endif - -Blending256: - dd 0x01000100,0x00000100 - -%ifdef M_TARGET_WATCOM - SEGMENT CODE PUBLIC ALIGN=16 CLASS=CODE USE32 - SEGMENT CODE -%else - SECTION .text -%endif - -;----------------------------------------------------------- -; -; DoBlending_MMX -; -; MMX version of DoBlending -; -; (DWORD *from, DWORD *to, count, tor, tog, tob, toa) -;----------------------------------------------------------- - -GLOBAL DoBlending_MMX - -DoBlending_MMX: - pxor mm0,mm0 ; mm0 = 0 - mov eax,[esp+4*4] - shl eax,16 - mov edx,[esp+4*5] - shl edx,8 - or eax,[esp+4*6] - or eax,edx - mov ecx,[esp+4*3] ; ecx = count - movd mm1,eax ; mm1 = 00000000 00RRGGBB - mov eax,[esp+4*7] - shl eax,16 - mov edx,[esp+4*7] - shl edx,8 - or eax,[esp+4*7] - or eax,edx - mov edx,[esp+4*2] ; edx = dest - movd mm6,eax ; mm6 = 00000000 00AAAAAA - punpcklbw mm1,mm0 ; mm1 = 000000RR 00GG00BB - movq mm7,[Blending256] - punpcklbw mm6,mm0 ; mm6 = 000000AA 00AA00AA - mov eax,[esp+4*1] ; eax = source - pmullw mm1,mm6 ; mm1 = 000000RR 00GG00BB (multiplied by alpha) - psubusw mm7,mm6 ; mm7 = 000000aa 00aa00aa (one minus alpha) - nop ; Does this actually pair on a Pentium? - -; Do four colors per iteration: Count must be a multiple of four. - -.loop movq mm2,[eax] ; mm2 = 00r2g2b2 00r1g1b1 - add eax,8 - movq mm3,mm2 ; mm3 = 00r2g2b2 00r1g1b1 - punpcklbw mm2,mm0 ; mm2 = 000000r1 00g100b1 - punpckhbw mm3,mm0 ; mm3 = 000000r2 00g200b2 - pmullw mm2,mm7 ; mm2 = 0000r1rr g1ggb1bb - add edx,8 - pmullw mm3,mm7 ; mm3 = 0000r2rr g2ggb2bb - sub ecx,2 - paddusw mm2,mm1 - psrlw mm2,8 - paddusw mm3,mm1 - psrlw mm3,8 - packuswb mm2,mm3 ; mm2 = 00r2g2b2 00r1g1b1 - movq [edx-8],mm2 - - movq mm2,[eax] ; mm2 = 00r2g2b2 00r1g1b1 - add eax,8 - movq mm3,mm2 ; mm3 = 00r2g2b2 00r1g1b1 - punpcklbw mm2,mm0 ; mm2 = 000000r1 00g100b1 - punpckhbw mm3,mm0 ; mm3 = 000000r2 00g200b2 - pmullw mm2,mm7 ; mm2 = 0000r1rr g1ggb1bb - add edx,8 - pmullw mm3,mm7 ; mm3 = 0000r2rr g2ggb2bb - sub ecx,2 - paddusw mm2,mm1 - psrlw mm2,8 - paddusw mm3,mm1 - psrlw mm3,8 - packuswb mm2,mm3 ; mm2 = 00r2g2b2 00r1g1b1 - movq [edx-8],mm2 - - jnz .loop - - emms - ret - -;----------------------------------------------------------- -; -; BestColor_MMX -; -; Picks the closest matching color from a palette -; -; Passed FFRRGGBB and palette array in same format -; FF is the index of the first palette entry to consider -; -;----------------------------------------------------------- - -GLOBAL BestColor_MMX -GLOBAL @BestColor_MMX@8 - -BestColor_MMX: - mov ecx,[esp+4] - mov edx,[esp+8] -@BestColor_MMX@8: - pxor mm0,mm0 - movd mm1,ecx ; mm1 = color searching for - mov eax,257*257+257*257+257*257 ;eax = bestdist - push ebx - punpcklbw mm1,mm0 - mov ebx,ecx ; ebx = best color - shr ecx,24 ; ecx = count - and ebx,0xffffff - push esi - push ebp - -.loop movd mm2,[edx+ecx*4] ; mm2 = color considering now - inc ecx - punpcklbw mm2,mm0 - movq mm3,mm1 - psubsw mm3,mm2 - pmullw mm3,mm3 ; mm3 = color distance squared - - movd ebp,mm3 ; add the three components - psrlq mm3,32 ; into ebp to get the real - mov esi,ebp ; (squared) distance - shr esi,16 - and ebp,0xffff - add ebp,esi - movd esi,mm3 - add ebp,esi - - jz .perf ; found a perfect match - cmp eax,ebp - jb .skip - mov eax,ebp - lea ebx,[ecx-1] -.skip cmp ecx,256 - jne .loop - mov eax,ebx - pop ebp - pop esi - pop ebx - emms - ret - -.perf lea eax,[ecx-1] - pop ebp - pop esi - pop ebx - emms - ret diff --git a/src/vizdoom/src/asm_ia32/tmap.asm b/src/vizdoom/src/asm_ia32/tmap.asm deleted file mode 100644 index 00e633354..000000000 --- a/src/vizdoom/src/asm_ia32/tmap.asm +++ /dev/null @@ -1,1893 +0,0 @@ -;* -;* tmap.nas -;* The texture-mapping inner loops in pure assembly language. -;* -;*--------------------------------------------------------------------------- -;* Copyright 1998-2006 Randy Heit -;* All rights reserved. -;* -;* Redistribution and use in source and binary forms, with or without -;* modification, are permitted provided that the following conditions -;* are met: -;* -;* 1. Redistributions of source code must retain the above copyright -;* notice, this list of conditions and the following disclaimer. -;* 2. Redistributions in binary form must reproduce the above copyright -;* notice, this list of conditions and the following disclaimer in the -;* documentation and/or other materials provided with the distribution. -;* 3. The name of the author may not be used to endorse or promote products -;* derived from this software without specific prior written permission. -;* -;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -;*--------------------------------------------------------------------------- -;* - -BITS 32 - -%include "valgrind.inc" - -; Segment/section definition macros. - - SECTION .data - -%define SPACEFILLER4 (0x44444444) - -; If you change this in r_draw.c, be sure to change it here, too! -FUZZTABLE equ 50 - -%ifndef M_TARGET_LINUX - -%define ylookup _ylookup -%define centery _centery -%define fuzzpos _fuzzpos -%define fuzzoffset _fuzzoffset -%define NormalLight _NormalLight -%define viewheight _viewheight -%define fuzzviewheight _fuzzviewheight -%define CPU _CPU - -%define dc_pitch _dc_pitch -%define dc_colormap _dc_colormap -%define dc_color _dc_color -%define dc_iscale _dc_iscale -%define dc_texturefrac _dc_texturefrac -%define dc_srcblend _dc_srcblend -%define dc_destblend _dc_destblend -%define dc_source _dc_source -%define dc_yl _dc_yl -%define dc_yh _dc_yh -%define dc_x _dc_x -%define dc_count _dc_count -%define dc_dest _dc_dest -%define dc_destorg _dc_destorg - -%define Col2RGB8 _Col2RGB8 -%define RGB32k _RGB32k - -%define dc_ctspan _dc_ctspan -%define dc_temp _dc_temp - -%define ds_xstep _ds_xstep -%define ds_ystep _ds_ystep -%define ds_colormap _ds_colormap -%define ds_source _ds_source -%define ds_x1 _ds_x1 -%define ds_x2 _ds_x2 -%define ds_xfrac _ds_xfrac -%define ds_yfrac _ds_yfrac -%define ds_y _ds_y - -%define ds_cursource _ds_cursource -%define ds_curcolormap _ds_curcolormap - -%define R_SetSpanSource_ASM _R_SetSpanSource_ASM -%define R_SetSpanSize_ASM _R_SetSpanSize_ASM -%define R_SetSpanColormap_ASM _R_SetSpanColormap_ASM -%define R_SetupShadedCol _R_SetupShadedCol -%define R_SetupAddCol _R_SetupAddCol -%define R_SetupAddClampCol _R_SetupAddClampCol - -%endif - -EXTERN ylookup -EXTERN centery -EXTERN fuzzpos -EXTERN fuzzoffset -EXTERN NormalLight -EXTERN viewheight -EXTERN fuzzviewheight -EXTERN CPU - -EXTERN dc_pitch -EXTERN dc_colormap -EXTERN dc_color -EXTERN dc_iscale -EXTERN dc_texturefrac -EXTERN dc_srcblend -EXTERN dc_destblend -EXTERN dc_source -EXTERN dc_yl -EXTERN dc_yh -EXTERN dc_x -EXTERN dc_count -EXTERN dc_dest -EXTERN dc_destorg - -EXTERN dc_ctspan -EXTERN dc_temp - -EXTERN Col2RGB8 -EXTERN RGB32k - -EXTERN ds_xstep -EXTERN ds_ystep -EXTERN ds_colormap -EXTERN ds_source -EXTERN ds_x1 -EXTERN ds_x2 -EXTERN ds_xfrac -EXTERN ds_yfrac -EXTERN ds_y - -GLOBAL ds_cursource -GLOBAL ds_curcolormap - - -ds_cursource: - DD 0 - -ds_curcolormap: - DD 0 - - -; Local stuff: -lastAddress DD 0 -pixelcount DD 0 - - SECTION .text - - -GLOBAL @R_SetSpanSource_ASM@4 -GLOBAL R_SetSpanSource_ASM - -R_SetSpanSource_ASM: - mov ecx,[esp+4] - -@R_SetSpanSource_ASM@4: - mov [spreada+2],ecx - mov [spreadb+2],ecx - mov [spreadc+2],ecx - mov [spreadd+2],ecx - mov [spreade+2],ecx - mov [spreadf+2],ecx - mov [spreadg+2],ecx - - mov [mspreada+2],ecx - mov [mspreadb+2],ecx - mov [mspreadc+2],ecx - mov [mspreadd+2],ecx - mov [mspreade+2],ecx - mov [mspreadf+2],ecx - mov [mspreadg+2],ecx - - selfmod spreada, mspreadg+6 - - mov [ds_cursource],ecx - ret - -GLOBAL @R_SetSpanColormap_ASM@4 -GLOBAL R_SetSpanColormap_ASM - -R_SetSpanColormap_ASM: - mov ecx,[esp+4] - -@R_SetSpanColormap_ASM@4: - mov [spmapa+2],ecx - mov [spmapb+2],ecx - mov [spmapc+2],ecx - mov [spmapd+2],ecx - mov [spmape+2],ecx - mov [spmapf+2],ecx - mov [spmapg+2],ecx - - mov [mspmapa+2],ecx - mov [mspmapb+2],ecx - mov [mspmapc+2],ecx - mov [mspmapd+2],ecx - mov [mspmape+2],ecx - mov [mspmapf+2],ecx - mov [mspmapg+2],ecx - - selfmod spmapa, mspmapg+6 - - mov [ds_curcolormap],ecx - ret - -GLOBAL R_SetSpanSize_ASM - -EXTERN SetTiltedSpanSize - -R_SetSpanSize_ASM: - mov edx,[esp+4] - mov ecx,[esp+8] - call SetTiltedSpanSize - - mov [dsy1+2],dl - mov [dsy2+2],dl - - mov [dsx1+2],cl - mov [dsx2+2],cl - mov [dsx3+2],cl - mov [dsx4+2],cl - mov [dsx5+2],cl - mov [dsx6+2],cl - mov [dsx7+2],cl - - mov [dmsy1+2],dl - mov [dmsy2+2],dl - - mov [dmsx1+2],cl - mov [dmsx2+2],cl - mov [dmsx3+2],cl - mov [dmsx4+2],cl - mov [dmsx5+2],cl - mov [dmsx6+2],cl - mov [dmsx7+2],cl - - push ecx - add ecx,edx - mov eax,1 - shl eax,cl - dec eax - mov [dsm1+2],eax - mov [dsm5+1],eax - mov [dsm6+1],eax - mov [dsm7+1],eax - - mov [dmsm1+2],eax - mov [dmsm5+1],eax - mov [dmsm6+1],eax - mov [dmsm7+1],eax - pop ecx - ror eax,cl - mov [dsm2+2],eax - mov [dsm3+2],eax - mov [dsm4+2],eax - - mov [dmsm2+2],eax - mov [dmsm3+2],eax - mov [dmsm4+2],eax - and eax,0xffff - not eax - mov [dsm8+2],eax - mov [dsm9+2],eax - - mov [dmsm8+2],eax - mov [dmsm9+2],eax - - neg dl - mov [dsy3+2],dl - mov [dsy4+2],dl - - mov [dmsy3+2],dl - mov [dmsy4+2],dl - - selfmod dsy1, dmsm7+6 - -aret: ret - -%ifdef M_TARGET_MACHO - SECTION .text align=64 -%else - SECTION .rtext progbits alloc exec write align=64 -%endif - -%ifdef M_TARGET_MACHO -GLOBAL _rtext_tmap_start -_rtext_tmap_start: -%endif - -rtext_start: - -GLOBAL @R_DrawSpanP_ASM@0 -GLOBAL _R_DrawSpanP_ASM -GLOBAL R_DrawSpanP_ASM - -; eax: scratch -; ebx: zero -; ecx: yfrac at top end, xfrac int part at low end -; edx: xfrac frac part at top end -; edi: dest -; ebp: scratch -; esi: count -; [esp]: xstep -; [esp+4]: ystep - - align 16 - -@R_DrawSpanP_ASM@0: -_R_DrawSpanP_ASM: -R_DrawSpanP_ASM: - mov eax,[ds_x2] - mov ecx,[ds_x1] - sub eax,ecx - jl near rdspret ; count < 0: nothing to do, so leave - - push ebx - push edi - push ebp - push esi - sub esp, 8 - - mov edi,ecx - add edi,[dc_destorg] - mov ecx,[ds_y] - add edi,[ylookup+ecx*4] - mov edx,[ds_xstep] -dsy1: shl edx,6 - mov ebp,[ds_xstep] -dsy3: shr ebp,26 - xor ebx,ebx - lea esi,[eax+1] - mov [esp],edx - mov edx,[ds_ystep] - mov ecx,[ds_xfrac] -dsy4: shr ecx,26 -dsm8: and edx,strict dword 0xffffffc0 - or ebp,edx - mov [esp+4],ebp - mov ebp,[ds_yfrac] - mov edx,[ds_xfrac] -dsy2: shl edx,6 -dsm9: and ebp,strict dword 0xffffffc0 - or ecx,ebp - shr esi,1 - jnc dseven1 - -; do odd pixel - - mov ebp,ecx -dsx1: rol ebp,6 -dsm1: and ebp,0xfff - add edx,[esp] - adc ecx,[esp+4] -spreada mov bl,[ebp+SPACEFILLER4] -spmapa mov bl,[ebx+SPACEFILLER4] - mov [edi],bl - inc edi - -dseven1 shr esi,1 - jnc dsrest - -; do two more pixels - mov ebp,ecx - add edx,[esp] - adc ecx,[esp+4] -dsm2: and ebp,0xfc00003f -dsx2: rol ebp,6 - mov eax,ecx - add edx,[esp] - adc ecx,[esp+4] -spreadb mov bl,[ebp+SPACEFILLER4] ;read texel1 -dsx3: rol eax,6 -dsm6: and eax,0xfff -spmapb mov bl,[ebx+SPACEFILLER4] ;map texel1 - mov [edi],bl ;store texel1 - add edi,2 -spreadc mov bl,[eax+SPACEFILLER4] ;read texel2 -spmapc mov bl,[ebx+SPACEFILLER4] ;map texel2 - mov [edi-1],bl ;store texel2 - -; do the rest - -dsrest test esi,esi - jz near dsdone - - align 16 - -dsloop mov ebp,ecx -spstep1d add edx,[esp] -spstep2d adc ecx,[esp+4] -dsm3: and ebp,0xfc00003f -dsx4: rol ebp,6 - mov eax,ecx -spstep1e add edx,[esp] -spstep2e adc ecx,[esp+4] -spreadd mov bl,[ebp+SPACEFILLER4] ;read texel1 -dsx5: rol eax,6 -dsm5: and eax,0xfff -spmapd mov bl,[ebx+SPACEFILLER4] ;map texel1 - mov [edi],bl ;store texel1 - mov ebp,ecx -spreade mov bl,[eax+SPACEFILLER4] ;read texel2 -spstep1f add edx,[esp] -spstep2f adc ecx,[esp+4] -dsm4: and ebp,0xfc00003f -dsx6: rol ebp,6 -spmape mov bl,[ebx+SPACEFILLER4] ;map texel2 - mov eax,ecx - mov [edi+1],bl ;store texel2 -spreadf mov bl,[ebp+SPACEFILLER4] ;read texel3 -spmapf mov bl,[ebx+SPACEFILLER4] ;map texel3 - add edi,4 -dsx7: rol eax,6 -dsm7: and eax,0xfff - mov [edi-2],bl ;store texel3 -spreadg mov bl,[eax+SPACEFILLER4] ;read texel4 -spstep1g add edx,[esp] -spstep2g adc ecx,[esp+4] -spmapg mov bl,[ebx+SPACEFILLER4] ;map texel4 - dec esi - mov [edi-1],bl ;store texel4 - jnz near dsloop - -dsdone add esp,8 - pop esi - pop ebp - pop edi - pop ebx - -rdspret ret - -; This is the same as the previous routine, except it doesn't draw pixels -; where the texture's color value is 0. - -GLOBAL @R_DrawSpanMaskedP_ASM@0 -GLOBAL _R_DrawSpanMaskedP_ASM -GLOBAL R_DrawSpanMaskedP_ASM - -; eax: scratch -; ebx: zero -; ecx: yfrac at top end, xfrac int part at low end -; edx: xfrac frac part at top end -; edi: dest -; ebp: scratch -; esi: count -; [esp]: xstep -; [esp+4]: ystep - - align 16 - -@R_DrawSpanMaskedP_ASM@0: -_R_DrawSpanMaskedP_ASM: -R_DrawSpanMaskedP_ASM: - mov eax,[ds_x2] - mov ecx,[ds_x1] - sub eax,ecx - jl rdspret ; count < 0: nothing to do, so leave - - push ebx - push edi - push ebp - push esi - sub esp,8 - - mov edi,ecx - add edi,[dc_destorg] - mov ecx,[ds_y] - add edi,[ylookup+ecx*4] - mov edx,[ds_xstep] -dmsy1: shl edx,6 - mov ebp,[ds_xstep] -dmsy3: shr ebp,26 - xor ebx,ebx - lea esi,[eax+1] - mov [esp],edx - mov edx,[ds_ystep] - mov ecx,[ds_xfrac] -dmsy4: shr ecx,26 -dmsm8: and edx,strict dword 0xffffffc0 - or ebp,edx - mov [esp+4],ebp - mov ebp,[ds_yfrac] - mov edx,[ds_xfrac] -dmsy2: shl edx,6 -dmsm9: and ebp,strict dword 0xffffffc0 - or ecx,ebp - shr esi,1 - jnc dmseven1 - -; do odd pixel - - mov ebp,ecx -dmsx1: rol ebp,6 -dmsm1: and ebp,0xfff - add edx,[esp] - adc ecx,[esp+4] -mspreada mov bl,[ebp+SPACEFILLER4] - cmp bl,0 - je mspskipa -mspmapa mov bl,[ebx+SPACEFILLER4] - mov [edi],bl -mspskipa: inc edi - -dmseven1 shr esi,1 - jnc dmsrest - -; do two more pixels - mov ebp,ecx - add edx,[esp] - adc ecx,[esp+4] -dmsm2: and ebp,0xfc00003f -dmsx2: rol ebp,6 - mov eax,ecx - add edx,[esp] - adc ecx,[esp+4] -mspreadb mov bl,[ebp+SPACEFILLER4] ;read texel1 -dmsx3: rol eax,6 -dmsm6: and eax,0xfff - cmp bl,0 - je mspskipb -mspmapb mov bl,[ebx+SPACEFILLER4] ;map texel1 - mov [edi],bl ;store texel1 -mspskipb add edi,2 -mspreadc mov bl,[eax+SPACEFILLER4] ;read texel2 - cmp bl,0 - je dmsrest -mspmapc mov bl,[ebx+SPACEFILLER4] ;map texel2 - mov [edi-1],bl ;store texel2 - -; do the rest - -dmsrest test esi,esi - jz near dmsdone - - align 16 - -dmsloop mov ebp,ecx -mspstep1d add edx,[esp] -mspstep2d adc ecx,[esp+4] -dmsm3: and ebp,0xfc00003f -dmsx4: rol ebp,6 - mov eax,ecx -mspstep1e add edx,[esp] -mspstep2e adc ecx,[esp+4] -mspreadd mov bl,[ebp+SPACEFILLER4] ;read texel1 -dmsx5: rol eax,6 -dmsm5: and eax,0xfff - cmp bl,0 - mov ebp,ecx - je mspreade -mspmapd mov bl,[ebx+SPACEFILLER4] ;map texel1 - mov [edi],bl ;store texel1 -mspreade mov bl,[eax+SPACEFILLER4] ;read texel2 -mspstep1f add edx,[esp] -mspstep2f adc ecx,[esp+4] -dmsm4: and ebp,0xfc00003f -dmsx6: rol ebp,6 - cmp bl,0 - mov eax,ecx - je mspreadf -mspmape mov bl,[ebx+SPACEFILLER4] ;map texel2 - mov [edi+1],bl ;store texel2 -mspreadf mov bl,[ebp+SPACEFILLER4] ;read texel3 - add edi,4 -dmsx7: rol eax,6 -dmsm7: and eax,0xfff - cmp bl,0 - je mspreadg -mspmapf mov bl,[ebx+SPACEFILLER4] ;map texel3 - mov [edi-2],bl ;store texel3 -mspreadg mov bl,[eax+SPACEFILLER4] ;read texel4 -mspstep1g add edx,[esp] -mspstep2g adc ecx,[esp+4] - cmp bl,0 - je mspskipg -mspmapg mov bl,[ebx+SPACEFILLER4] ;map texel4 - mov [edi-1],bl ;store texel4 -mspskipg dec esi - jnz near dmsloop - -dmsdone add esp,8 - pop esi - pop ebp - pop edi - pop ebx - - ret - - - - -;*---------------------------------------------------------------------- -;* -;* R_DrawColumnP -;* -;*---------------------------------------------------------------------- - -GLOBAL @R_DrawColumnP_ASM@0 -GLOBAL _R_DrawColumnP_ASM -GLOBAL R_DrawColumnP_ASM - - align 16 - -R_DrawColumnP_ASM: -_R_DrawColumnP_ASM: -@R_DrawColumnP_ASM@0: - -; count = dc_yh - dc_yl; - - mov ecx,[dc_count] - test ecx,ecx - jle near rdcpret ; count <= 0: nothing to do, so leave - - push ebp ; save registers - push ebx - push edi - push esi - -; dest = ylookup[dc_yl] + dc_x + dc_destorg; - - mov edi,[dc_dest] - mov ebp,ecx - mov ebx,[dc_texturefrac] ; ebx = frac -rdcp1: sub edi,SPACEFILLER4 - mov ecx,ebx - shr ecx,16 - mov esi,[dc_source] - mov edx,[dc_iscale] - mov eax,[dc_colormap] - - cmp BYTE [CPU+66],byte 5 - jg rdcploop2 - - align 16 - -; The registers should now look like this: -; -; [31 .. 16][15 .. 8][7 .. 0] -; eax [colormap ] -; ebx [yi ][yf ] -; ecx [scratch ] -; edx [dyi ][dyf ] -; esi [source texture column ] -; edi [destination screen pointer ] -; ebp [counter ] -; - - -; Note the partial register stalls on anything better than a Pentium -; That's why there are two versions of this loop. - -rdcploop: - mov cl,[esi+ecx] ; Fetch texel - xor ch,ch - add ebx,edx ; increment frac -rdcp2: add edi,SPACEFILLER4 ; increment destination pointer - mov cl,[eax+ecx] ; colormap texel - mov [edi],cl ; Store texel - mov ecx,ebx - shr ecx,16 - dec ebp - jnz rdcploop ; loop - - pop esi - pop edi - pop ebx - pop ebp -rdcpret: - ret - - align 16 - -rdcploop2: - movzx ecx,byte [esi+ecx] ; Fetch texel - add ebx,edx ; increment frac - mov cl,[eax+ecx] ; colormap texel -rdcp3: add edi,SPACEFILLER4 ; increment destination pointer - mov [edi],cl ; Store texel - mov ecx,ebx - shr ecx,16 - dec ebp - jnz rdcploop2 ; loop - - pop esi - pop edi - pop ebx - pop ebp - ret - - - -;*---------------------------------------------------------------------- -;* -;* R_DrawFuzzColumnP -;* -;*---------------------------------------------------------------------- - -GLOBAL @R_DrawFuzzColumnP_ASM@0 -GLOBAL _R_DrawFuzzColumnP_ASM -GLOBAL R_DrawFuzzColumnP_ASM - - align 16 - -R_DrawFuzzColumnP_ASM: -_R_DrawFuzzColumnP_ASM: -@R_DrawFuzzColumnP_ASM@0: - -; Adjust borders. Low... - mov eax,[dc_yl] - push ebx - push esi - push edi - push ebp - - cmp eax,0 - jg .ylok - - mov eax,1 - nop - -; ...and high. -.ylok mov edx,[fuzzviewheight] - mov esi,[dc_yh] - cmp esi,edx - jle .yhok - - mov esi,edx - nop - -.yhok mov edx,[dc_x] - sub esi,eax ; esi = count - js near .dfcdone ; Zero length (or less) - - mov edi,[ylookup+eax*4] - mov ebx,edx - add edi,[dc_destorg] - mov eax,[NormalLight] - mov ecx,[fuzzpos] - add edi,ebx - add eax,256*6 - inc esi - mov ebp,[dc_pitch] - mov edx,FUZZTABLE - test ecx,ecx - je .fuzz0 - -; -; esi = count -; edi = dest -; ecx = fuzzpos -; eax = colormap 6 -; - -; first loop: end with fuzzpos or count 0, whichever happens first - - sub edx,ecx ; edx = # of entries left in fuzzoffset - mov ebx,esi - cmp esi,edx - jle .enuf - mov esi,edx -.enuf sub ebx,esi - mov edx,[fuzzoffset+ecx*4] - push ebx - xor ebx,ebx - -.loop1 inc ecx - mov bl,[edi+edx] - dec esi - mov bl,[eax+ebx] - mov [edi],bl - lea edi,[edi+ebp] - mov edx,[fuzzoffset+ecx*4] - jnz .loop1 - -; second loop: Chunk it into groups of FUZZTABLE-sized spans and do those - - pop esi - cmp ecx,FUZZTABLE - jl .savefuzzpos - xor ecx,ecx - nop -.fuzz0 cmp esi,FUZZTABLE - jl .chunked - -.oloop lea edx,[esi-FUZZTABLE] - mov esi,FUZZTABLE - push edx - mov edx,[fuzzoffset+ecx*4] - -.iloop inc ecx - mov bl,[edi+edx] - dec esi - mov bl,[eax+ebx] - mov [edi],bl - lea edi,[edi+ebp] - mov edx,[fuzzoffset+ecx*4] - jnz .iloop - - pop esi - xor ecx,ecx - cmp esi,FUZZTABLE - jge .oloop - -; third loop: Do whatever is left - -.chunked: - test esi,esi - jle .savefuzzpos - mov edx,[fuzzoffset+ecx*4] - nop - -.loop3 inc ecx - mov bl,[edi+edx] - dec esi - mov bl,[eax+ebx] - mov [edi],bl - lea edi,[edi+ebp] - mov edx,[fuzzoffset+ecx*4] - jnz .loop3 - -.savefuzzpos: - mov [fuzzpos],ecx -.dfcdone: - pop ebp - pop edi - pop esi - pop ebx - ret - - -;*---------------------------------------------------------------------- -;* -;* R_DrawColumnHorizP_ASM -;* -;*---------------------------------------------------------------------- - -GLOBAL @R_DrawColumnHorizP_ASM@0 -GLOBAL _R_DrawColumnHorizP_ASM -GLOBAL R_DrawColumnHorizP_ASM - - align 16 - -@R_DrawColumnHorizP_ASM@0: -_R_DrawColumnHorizP_ASM: -R_DrawColumnHorizP_ASM: - -; count = dc_yh - dc_yl; - - mov eax,[dc_yh] - mov ecx,[dc_yl] - sub eax,ecx - mov edx,[dc_x] - - jl near .leave ; count < 0: nothing to do, so leave - - push ebp ; save registers - push ebx - push edi - push esi - - inc eax ; make 0 count mean 0 pixels - and edx,3 - push eax - mov eax,[dc_temp] - mov esi,[dc_ctspan+edx*4] - add eax,edx - lea eax,[eax+ecx*4] ; eax = top of column in buffer - mov ebp,[dc_yh] - mov [esi],ecx - mov [esi+4],ebp - add esi,8 - mov edi,[dc_source] - mov [dc_ctspan+edx*4],esi - mov esi,[dc_iscale] - mov ecx,[dc_texturefrac] ; ecx = frac - mov dl,[edi] ; load cache - mov ebx,[esp] - and ebx,0xfffffff8 - jnz .mthan8 - -; Register usage in the following code is: -; -; eax: dest -; edi: source -; ecx: frac (16.16) -; esi: fracstep (16.16) -; ebx: add1 -; ebp: add2 -; dl: texel1 -; dh: texel2 -;[esp] count - -; there are fewer than 8 pixels to draw - - mov ebx,[esp] -.lthan8 shr ebx,1 - jnc .even - -; do one pixel before loop (little opportunity for pairing) - - mov ebp,ecx ; copy frac to ebx - add ecx,esi ; increment frac - shr ebp,16 ; shift frac over to low end - add eax,4 - mov dl,[edi+ebp] - mov [eax-4],dl - -.even test ebx,ebx - jz near .done - -.loop2 mov [esp],ebx ; save counter - mov ebx,ecx ; copy frac for texel1 to ebx - shr ebx,16 ; shift frac for texel1 to low end - add ecx,esi ; increment frac - mov ebp,ecx ; copy frac for texel2 to ebp - shr ebp,16 ; shift frac for texel2 to low end - add ecx,esi ; increment frac - mov dl,[edi+ebx] ; read texel1 - mov ebx,[esp] ; fetch counter - mov dh,[edi+ebp] ; read texel2 - mov [eax],dl ; write texel1 - mov [eax+4],dh ; write texel2 - add eax,8 ; increment dest - dec ebx ; decrement counter - jnz .loop2 ; loop until it hits 0 - - jmp .done - -; there are more than 8 pixels to draw. position eax as close to a 32 byte -; boundary as possible, then do whatever is left. - -.mthan8 test eax,4 - jz .try2 - - mov ebp,ecx ; frac: in ebp - add ecx,esi ; step - shr ebp,16 ; frac: shift - add eax,4 ; increment dest - mov ebx,[esp] ; fetch counter - mov dl,[edi+ebp] ; tex: read - dec ebx ; decrement counter - mov [eax-4],dl ; tex: write - mov [esp],ebx ; store counter - -.try2 test eax,8 - jz .try4 - - mov ebx,ecx ; frac1: in ebx - add ecx,esi ; step - shr ebx,16 ; frac1: shift - mov ebp,ecx ; frac2: in ebp - shr ebp,16 ; frac2: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex1: read - mov ebx,[esp] ; fetch counter - mov dh,[edi+ebp] ; tex2: read - mov [eax],dl ; tex1: write - mov [eax+4],dh ; tex2: write - sub ebx,2 ; decrement counter - add eax,8 ; increment dest - mov [esp],ebx ; store counter - -.try4 test eax,16 - jz .try8 - - mov ebx,ecx ; frac1: in ebx - add ecx,esi ; step - shr ebx,16 ; frac1: shift - mov ebp,ecx ; frac2: in ebp - shr ebp,16 ; frac2: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex1: read - mov ebx,ecx ; frac3: in ebx - shr ebx,16 ; frac3: shift - mov dh,[edi+ebp] ; tex2: read - add ecx,esi ; step - mov [eax],dl ; tex1: write - mov [eax+4],dh ; tex2: write - mov ebp,ecx ; frac4: in ebp - shr ebp,16 ; frac4: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex3: read - mov ebx,[esp] ; fetch counter - mov dh,[edi+ebp] ; tex4: read - sub ebx,4 ; decrement counter - mov [esp],ebx ; store counter - mov [eax+8],dl ; tex3: write - mov [eax+12],dh ; tex4: write - add eax,16 ; increment dest - -.try8 mov ebx,[esp] ; make counter count groups of 8 - sub esp,4 - shr ebx,3 - jmp .tail8 - - align 16 - -.loop8 mov [esp],ebx ; save counter - mov ebx,ecx ; frac1: in ebx - shr ebx,16 ; frac1: shift - add ecx,esi ; step - mov ebp,ecx ; frac2: in ebp - shr ebp,16 ; frac2: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex1: read - mov ebx,ecx ; frac3: in ebx - mov dh,[edi+ebp] ; tex2: read - shr ebx,16 ; frac3: shift - add ecx,esi ; step - mov [eax],dl ; tex1: write - mov [eax+4],dh ; tex2: write - mov ebp,ecx ; frac4: in ebp - shr ebp,16 ; frac4: shift - add ecx,esi ; step - mov dl,[edi+ebx] ; tex3: read - mov ebx,ecx ; frac5: in ebx - mov dh,[edi+ebp] ; tex4: read - shr ebx,16 ; frac5: shift - mov [eax+8],dl ; tex3: write - mov [eax+12],dh ; tex4: write - add ecx,esi ; step - mov ebp,ecx ; frac6: in ebp - shr ebp,16 ; frac6: shift - mov dl,[edi+ebx] ; tex5: read - add ecx,esi ; step - mov ebx,ecx ; frac7: in ebx - mov [eax+16],dl ; tex5: write - shr ebx,16 ; frac7: shift - mov dh,[edi+ebp] ; tex6: read - add ecx,esi ; step - mov ebp,ecx ; frac8: in ebp - mov [eax+20],dh ; tex6: write - shr ebp,16 ; frac8: shift - add eax,32 ; increment dest pointer - mov dl,[edi+ebx] ; tex7: read - mov ebx,[esp] ; fetch counter - mov [eax-8],dl ; tex7: write - mov dh,[edi+ebp] ; tex8: read - add ecx,esi ; step - mov [eax-4],dh ; tex8: write - mov dl,[eax] ; load cache - dec ebx ; decrement counter -.tail8 jnz near .loop8 ; loop if more to do - - pop ebp - mov ebx,[esp] - and ebx,7 - jnz near .lthan8 - -.done pop eax - pop esi - pop edi - pop ebx - pop ebp -.leave ret - - -;*---------------------------------------------------------------------- -;* -;* rt_copy1col_asm -;* -;* ecx = hx -;* edx = sx -;* [esp+4] = yl -;* [esp+8] = yh -;* -;*---------------------------------------------------------------------- - -GLOBAL @rt_copy1col_asm@16 -GLOBAL _rt_copy1col_asm -GLOBAL rt_copy1col_asm - - align 16 - -rt_copy1col_asm: -_rt_copy1col_asm: - pop eax - mov edx,[esp+4*3] - mov ecx,[esp+4*2] - push edx - push ecx - mov ecx,[esp+4*2] - mov edx,[esp+4*3] - push eax - -@rt_copy1col_asm@16: - mov eax, [esp+4] - push ebx - mov ebx, [esp+12] - push esi - sub ebx, eax - push edi - js .done - - lea esi,[eax*4] - inc ebx ; ebx = count - mov eax,edx - add ecx,esi - mov edi,[ylookup+esi] - add ecx,[dc_temp] ; ecx = source - mov esi,[dc_pitch] ; esi = pitch - add eax,edi ; eax = dest - add eax,[dc_destorg] - - shr ebx,1 - jnc .even - - mov dl,[ecx] - add ecx,4 - mov [eax],dl - add eax,esi - -.even and ebx,ebx - jz .done - -.loop mov dl,[ecx] - mov dh,[ecx+4] - mov [eax],dl - mov [eax+esi],dh - add ecx,8 - lea eax,[eax+esi*2] - dec ebx - jnz .loop - -.done pop edi - pop esi - pop ebx - ret 8 - -;*---------------------------------------------------------------------- -;* -;* rt_copy4cols_asm -;* -;* ecx = sx -;* edx = yl -;* [esp+4] = yh -;* -;*---------------------------------------------------------------------- - -GLOBAL @rt_copy4cols_asm@12 -GLOBAL _rt_copy4cols_asm -GLOBAL rt_copy4cols_asm - - align 16 - -rt_copy4cols_asm: -_rt_copy4cols_asm: - pop eax - mov ecx,[esp+8] - mov edx,[esp+4] - push ecx - mov ecx,[esp+4] - push eax - -@rt_copy4cols_asm@12: - push ebx - mov ebx,[esp+8] - push esi - sub ebx,edx - push edi - js .done - - inc ebx ; ebx = count - mov eax,ecx - mov esi,[ylookup+edx*4] - mov ecx,[dc_temp] - add eax,esi ; eax = dest - add eax,[dc_destorg] - lea ecx,[ecx+edx*4] ; ecx = source - mov edx,[dc_pitch] ; edx = pitch - - shr ebx,1 - jnc .even - - mov esi,[ecx] - add ecx,4 - mov [eax],esi - add eax,edx - -.even and ebx,ebx - jz .done - -.loop mov esi,[ecx] - mov edi,[ecx+4] - mov [eax],esi - mov [eax+edx],edi - add ecx,8 - lea eax,[eax+edx*2] - dec ebx - jnz .loop - -.done pop edi - pop esi - pop ebx - ret 4 - -;*---------------------------------------------------------------------- -;* -;* rt_map1col_asm -;* -;* ecx = hx -;* edx = sx -;* [esp+4] = yl -;* [esp+8] = yh -;* -;*---------------------------------------------------------------------- - -GLOBAL @rt_map1col_asm@16 -GLOBAL _rt_map1col_asm -GLOBAL rt_map1col_asm - - align 16 - -rt_map1col_asm: -_rt_map1col_asm: - pop eax - mov edx,[esp+4*3] - mov ecx,[esp+4*2] - push edx - push ecx - mov ecx,[esp+4*2] - mov edx,[esp+4*3] - push eax - -@rt_map1col_asm@16: - mov eax,[esp+4] - push ebx - mov ebx,[esp+12] - push ebp - push esi - sub ebx, eax - push edi - js .done - - lea edi,[eax*4] - mov esi,[dc_colormap] ; esi = colormap - inc ebx ; ebx = count - mov eax,edx - lea ebp,[ecx+edi] ; ebp = source - add ebp,[dc_temp] - mov ecx,[ylookup+edi] - mov edi,[dc_pitch] ; edi = pitch - add eax,ecx ; eax = dest - xor ecx,ecx - xor edx,edx - add eax,[dc_destorg] - - shr ebx,1 - jnc .even - - mov dl,[ebp] - add ebp,4 - mov dl,[esi+edx] - mov [eax],dl - add eax,edi - -.even and ebx,ebx - jz .done - -.loop mov dl,[ebp] - mov cl,[ebp+4] - add ebp,8 - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax],dl - mov [eax+edi],cl - dec ebx - lea eax,[eax+edi*2] - jnz .loop - -.done pop edi - pop esi - pop ebp - pop ebx - ret 8 - -;*---------------------------------------------------------------------- -;* -;* rt_map4cols_asm -;* -;* rt_map4cols_asm1 is for PPro and above -;* rt_map4cols_asm2 is for Pentium and below -;* -;* ecx = sx -;* edx = yl -;* [esp+4] = yh -;* -;*---------------------------------------------------------------------- - -GLOBAL @rt_map4cols_asm1@12 -GLOBAL _rt_map4cols_asm1 -GLOBAL rt_map4cols_asm1 - - align 16 - -rt_map4cols_asm1: -_rt_map4cols_asm1: - pop eax - mov ecx,[esp+8] - mov edx,[esp+4] - push ecx - mov ecx,[esp+4] - push eax - -@rt_map4cols_asm1@12: - push ebx - mov ebx,[esp+8] - push ebp - push esi - sub ebx,edx - push edi - js near .done - - mov esi,[dc_colormap] ; esi = colormap - shl edx,2 - mov eax,ecx - inc ebx ; ebx = count - mov edi,[ylookup+edx] - mov ebp,[dc_temp] - add ebp,edx ; ebp = source - add eax,edi ; eax = dest - mov edi,[dc_pitch] ; edi = pitch - add eax,[dc_destorg] - xor ecx,ecx - xor edx,edx - - shr ebx,1 - jnc .even - - mov dl,[ebp] - mov cl,[ebp+1] - add ebp,4 - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax],dl - mov [eax+1],cl - mov dl,[ebp-2] - mov cl,[ebp-1] - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax+2],dl - mov [eax+3],cl - add eax,edi - -.even and ebx,ebx - jz .done - -.loop: - mov dl,[ebp] - mov cl,[ebp+1] - add ebp,8 - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax],dl - mov [eax+1],cl - mov dl,[ebp-6] - mov cl,[ebp-5] - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax+2],dl - mov [eax+3],cl - mov dl,[ebp-4] - mov cl,[ebp-3] - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax+edi],dl - mov [eax+edi+1],cl - mov dl,[ebp-2] - mov cl,[ebp-1] - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax+edi+2],dl - mov [eax+edi+3],cl - lea eax,[eax+edi*2] - dec ebx - - jnz .loop - -.done pop edi - pop esi - pop ebp - pop ebx - ret 4 - -GLOBAL @rt_map4cols_asm2@12 -GLOBAL _rt_map4cols_asm2 -GLOBAL rt_map4cols_asm2 - - align 16 - -rt_map4cols_asm2: -_rt_map4cols_asm2: - pop eax - mov ecx,[esp+8] - mov edx,[esp+4] - push ecx - mov ecx,[esp+4] - push eax - -@rt_map4cols_asm2@12: - push ebx - mov ebx,[esp+8] - push ebp - push esi - sub ebx,edx - push edi - js near .done - - mov esi,[dc_colormap] ; esi = colormap - shl edx,2 - mov eax,ecx - inc ebx ; ebx = count - mov edi,[ylookup+edx] - mov ebp,[dc_temp] - add ebp,edx ; ebp = source - add eax,edi ; eax = dest - mov edi,[dc_pitch] ; edi = pitch - add eax,[dc_destorg] - xor ecx,ecx - xor edx,edx - - shr ebx,1 - jnc .even - - mov dl,[ebp] - mov cl,[ebp+1] - add ebp,4 - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax],dl - mov [eax+1],cl - mov dl,[ebp-2] - mov cl,[ebp-1] - mov dl,[esi+edx] - mov cl,[esi+ecx] - mov [eax+2],dl - mov [eax+3],cl - add eax,edi - -.even and ebx,ebx - jz .done - -.loop: - mov dl,[ebp+3] - mov ch,[esi+edx] - mov dl,[ebp+2] - mov cl,[esi+edx] - shl ecx,16 - mov dl,[ebp+1] - mov ch,[esi+edx] - mov dl,[ebp] - mov cl,[esi+edx] - mov [eax],ecx - add eax,edi - - mov dl,[ebp+7] - mov ch,[esi+edx] - mov dl,[ebp+6] - mov cl,[esi+edx] - shl ecx,16 - mov dl,[ebp+5] - mov ch,[esi+edx] - mov dl,[ebp+4] - mov cl,[esi+edx] - mov [eax],ecx - add eax,edi - add ebp,8 - dec ebx - - jnz .loop - -.done pop edi - pop esi - pop ebp - pop ebx - ret 4 - - align 16 - -GLOBAL rt_shaded4cols_asm -GLOBAL _rt_shaded4cols_asm - -rt_shaded4cols_asm: -_rt_shaded4cols_asm: - mov ecx,[esp+8] - push ebp - mov ebp,[esp+16] - sub ebp,ecx - js near s4nil - mov eax,[ylookup+ecx*4] - add eax,[dc_destorg] ; eax = destination - push ebx - push esi - mov esi,[dc_temp] - inc ebp ; ebp = count - add eax,[esp+16] - push edi - lea esi,[esi+ecx*4] ; esi = source - - align 16 - -s4loop: movzx edx,byte [esi] - movzx ecx,byte [esi+1] -s4cm1: movzx edx,byte [SPACEFILLER4+edx] ; colormap -s4cm2: movzx edi,byte [SPACEFILLER4+ecx] ; colormap - shl edx,8 - movzx ebx,byte [eax] - shl edi,8 - movzx ecx,byte [eax+1] - sub ebx,edx - sub ecx,edi - mov ebx,[Col2RGB8+0x10000+ebx*4] - mov ecx,[Col2RGB8+0x10000+ecx*4] -s4fg1: add ebx,[SPACEFILLER4+edx*4] -s4fg2: add ecx,[SPACEFILLER4+edi*4] - or ebx,0x1f07c1f - or ecx,0x1f07c1f - mov edx,ebx - shr ebx,15 - mov edi,ecx - shr ecx,15 - and edx,ebx - and ecx,edi - mov bl,[RGB32k+edx] - movzx edx,byte [esi+2] - mov bh,[RGB32k+ecx] - movzx ecx,byte [esi+3] - mov [eax],bl - mov [eax+1],bh - -s4cm3: movzx edx,byte [SPACEFILLER4+edx] ; colormap -s4cm4: movzx edi,byte [SPACEFILLER4+ecx] ; colormap - shl edx,8 - movzx ebx,byte [eax+2] - shl edi,8 - movzx ecx,byte [eax+3] - sub ebx,edx - sub ecx,edi - mov ebx,[Col2RGB8+0x10000+ebx*4] - mov ecx,[Col2RGB8+0x10000+ecx*4] -s4fg3: add ebx,[SPACEFILLER4+edx*4] -s4fg4: add ecx,[SPACEFILLER4+edi*4] - or ebx,0x1f07c1f - or ecx,0x1f07c1f - mov edx,ebx - shr ebx,15 - mov edi,ecx - shr ecx,15 - and edx,ebx - and ecx,edi -s4p: add eax,320 ; pitch - add esi,4 - mov bl,[RGB32k+edx] - mov bh,[RGB32k+ecx] -s4p2: mov [eax-320+2],bl -s4p3: mov [eax-320+3],bh - dec ebp - jne s4loop - - pop edi - pop esi - pop ebx -s4nil: pop ebp - ret - - align 16 - -GLOBAL rt_add4cols_asm -GLOBAL _rt_add4cols_asm - -rt_add4cols_asm: -_rt_add4cols_asm: - mov ecx,[esp+8] - push edi - mov edi,[esp+16] - sub edi,ecx - js near a4nil - mov eax,[ylookup+ecx*4] - add eax,[dc_destorg] - push ebx - push esi - mov esi,[dc_temp] - push ebp - inc edi - add eax,[esp+20] - lea esi,[esi+ecx*4] - - align 16 -a4loop: - movzx ebx,byte [esi] - movzx edx,byte [esi+1] - movzx ecx,byte [eax] - movzx ebp,byte [eax+1] -a4cm1: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap -a4cm2: movzx edx,byte [SPACEFILLER4+edx] ; colormap -a4bg1: mov ecx,[SPACEFILLER4+ecx*4] ; bg2rgb -a4bg2: mov ebp,[SPACEFILLER4+ebp*4] ; bg2rgb -a4fg1: add ecx,[SPACEFILLER4+ebx*4] ; fg2rgb -a4fg2: add ebp,[SPACEFILLER4+edx*4] ; fg2rgb - or ecx,0x01f07c1f - or ebp,0x01f07c1f - mov ebx,ecx - shr ecx,15 - mov edx,ebp - shr ebp,15 - and ecx,ebx - and ebp,edx - movzx ebx,byte [esi+2] - movzx edx,byte [esi+3] - mov cl,[RGB32k+ecx] - mov ch,[RGB32k+ebp] - mov [eax],cl - mov [eax+1],ch - - movzx ecx,byte [eax+2] - movzx ebp,byte [eax+3] -a4cm3: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap -a4cm4: movzx edx,byte [SPACEFILLER4+edx] ; colormap -a4bg3: mov ecx,[SPACEFILLER4+ecx*4] ; bg2rgb -a4bg4: mov ebp,[SPACEFILLER4+ebp*4] ; bg2rgb -a4fg3: add ecx,[SPACEFILLER4+ebx*4] ; fg2rgb -a4fg4: add ebp,[SPACEFILLER4+edx*4] ; fg2rgb - or ecx,0x01f07c1f - or ebp,0x01f07c1f - mov ebx,ecx - shr ecx,15 - mov edx,ebp - shr ebp,15 - and ebx,ecx - and edx,ebp - mov cl,[RGB32k+ebx] - mov ch,[RGB32k+edx] - mov [eax+2],cl - mov [eax+3],ch - - add esi,4 -a4p: add eax,320 ; pitch - sub edi,1 - jne a4loop - pop ebp - pop esi - pop ebx -a4nil: pop edi - ret - - align 16 - -GLOBAL rt_addclamp4cols_asm -GLOBAL _rt_addclamp4cols_asm - -rt_addclamp4cols_asm: -_rt_addclamp4cols_asm: - mov ecx,[esp+8] - push edi - mov edi,[esp+16] - sub edi,ecx - js near ac4nil - mov eax,[ylookup+ecx*4] - add eax,[dc_destorg] - push ebx - push esi - mov esi,[dc_temp] - push ebp - inc edi - add eax,[esp+20] - lea esi,[esi+ecx*4] - push edi - - align 16 -ac4loop: - movzx ebx,byte [esi] - movzx edx,byte [esi+1] - mov [esp],edi -ac4cm1: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap -ac4cm2: movzx edx,byte [SPACEFILLER4+edx] ; colormap - movzx ecx,byte [eax] - movzx ebp,byte [eax+1] -ac4fg1: mov ebx,[SPACEFILLER4+ebx*4] ; fg2rgb -ac4fg2: mov edx,[SPACEFILLER4+edx*4] ; fg2rgb -ac4bg1: add ebx,[SPACEFILLER4+ecx*4] ; bg2rgb -ac4bg2: add edx,[SPACEFILLER4+ebp*4] ; bg2rgb - mov ecx,ebx - or ebx,0x01f07c1f - and ecx,0x40100400 - and ebx,0x3fffffff - mov edi,ecx - shr ecx,5 - mov ebp,edx - sub edi,ecx - or edx,0x01f07c1f - or ebx,edi - mov ecx,ebx - shr ebx,15 - and ebp,0x40100400 - and ebx,ecx - and edx,0x3fffffff - mov edi,ebp - shr ebp,5 - mov cl,[RGB32k+ebx] - sub edi,ebp - mov [eax],cl - or edx,edi - mov ebp,edx - shr edx,15 - movzx ebx,byte [esi+2] - and ebp,edx - movzx edx,byte [esi+3] -ac4cm3: movzx ebx,byte [SPACEFILLER4+ebx] ; colormap - mov cl,[RGB32k+ebp] -ac4cm4: movzx edx,byte [SPACEFILLER4+edx] ; colormap - mov [eax+1],cl - movzx ecx,byte [eax+2] - movzx ebp,byte [eax+3] -ac4fg3: mov ebx,[SPACEFILLER4+ebx*4] ; fg2rgb -ac4fg4: mov edx,[SPACEFILLER4+edx*4] ; fg2rgb -ac4bg3: add ebx,[SPACEFILLER4+ecx*4] ; bg2rgb -ac4bg4: add edx,[SPACEFILLER4+ebp*4] ; bg2rgb - mov ecx,ebx - or ebx,0x01f07c1f - and ecx,0x40100400 - and ebx,0x3fffffff - mov edi,ecx - shr ecx,5 - mov ebp,edx - sub edi,ecx - or edx,0x01f07c1f - or ebx,edi - mov ecx,ebx - shr ebx,15 - and ebp,0x40100400 - and ebx,ecx - and edx,0x3fffffff - mov edi,ebp - shr ebp,5 - mov cl,[RGB32k+ebx] - sub edi,ebp - mov [eax+2],cl - or edx,edi - mov edi,[esp] - mov ebp,edx - shr edx,15 - add esi,4 - and edx,ebp - mov cl,[RGB32k+edx] - mov [eax+3],cl - -ac4p: add eax,320 ; pitch - sub edi,1 - jne ac4loop - pop edi - - pop ebp - pop esi - pop ebx -ac4nil: pop edi - ret - -rtext_end: -%ifdef M_TARGET_MACHO -GLOBAL _rtext_tmap_end -_rtext_tmap_end: -%endif - align 16 - -;************************ - - SECTION .text - -GLOBAL R_SetupShadedCol -GLOBAL @R_SetupShadedCol@0 - -# Patch the values of dc_colormap and dc_color into the shaded column drawer. - -R_SetupShadedCol: -@R_SetupShadedCol@0: - mov eax,[dc_colormap] - cmp [s4cm1+3],eax - je .cmdone - mov [s4cm1+3],eax - mov [s4cm2+3],eax - mov [s4cm3+3],eax - mov [s4cm4+3],eax -.cmdone mov eax,[dc_color] - lea eax,[Col2RGB8+eax*4] - cmp [s4fg1+3],eax - je .cdone - mov [s4fg1+3],eax - mov [s4fg2+3],eax - mov [s4fg3+3],eax - mov [s4fg4+3],eax - selfmod s4cm1, s4fg4+7 -.cdone ret - -GLOBAL R_SetupAddCol -GLOBAL @R_SetupAddCol@0 - -# Patch the values of dc_colormap, dc_srcblend, and dc_destblend into the -# unclamped adding column drawer. - -R_SetupAddCol: -@R_SetupAddCol@0: - mov eax,[dc_colormap] - cmp [a4cm1+3],eax - je .cmdone - mov [a4cm1+3],eax - mov [a4cm2+3],eax - mov [a4cm3+3],eax - mov [a4cm4+3],eax -.cmdone mov eax,[dc_srcblend] - cmp [a4fg1+3],eax - je .sbdone - mov [a4fg1+3],eax - mov [a4fg2+3],eax - mov [a4fg3+3],eax - mov [a4fg4+3],eax -.sbdone mov eax,[dc_destblend] - cmp [a4bg1+3],eax - je .dbdone - mov [a4bg1+3],eax - mov [a4bg2+3],eax - mov [a4bg3+3],eax - mov [a4bg4+3],eax - selfmod a4cm1, a4bg4+7 -.dbdone ret - -GLOBAL R_SetupAddClampCol -GLOBAL @R_SetupAddClampCol@0 - -# Patch the values of dc_colormap, dc_srcblend, and dc_destblend into the -# add with clamping column drawer. - -R_SetupAddClampCol: -@R_SetupAddClampCol@0: - mov eax,[dc_colormap] - cmp [ac4cm1+3],eax - je .cmdone - mov [ac4cm1+3],eax - mov [ac4cm2+3],eax - mov [ac4cm3+3],eax - mov [ac4cm4+3],eax -.cmdone mov eax,[dc_srcblend] - cmp [ac4fg1+3],eax - je .sbdone - mov [ac4fg1+3],eax - mov [ac4fg2+3],eax - mov [ac4fg3+3],eax - mov [ac4fg4+3],eax -.sbdone mov eax,[dc_destblend] - cmp [ac4bg1+3],eax - je .dbdone - mov [ac4bg1+3],eax - mov [ac4bg2+3],eax - mov [ac4bg3+3],eax - mov [ac4bg4+3],eax - selfmod ac4cm1, ac4bg4+7 -.dbdone ret - -EXTERN setvlinebpl_ -EXTERN setpitch3 - -GLOBAL @ASM_PatchPitch@0 -GLOBAL _ASM_PatchPitch -GLOBAL ASM_PatchPitch - -ASM_PatchPitch: -_ASM_PatchPitch: -@ASM_PatchPitch@0: - mov eax,[dc_pitch] - mov [rdcp1+2],eax - mov [rdcp2+2],eax - mov [rdcp3+2],eax - mov [s4p+1],eax - mov [a4p+1],eax - mov [ac4p+1],eax - mov ecx,eax - neg ecx - inc ecx - inc ecx - mov [s4p2+2],ecx - inc ecx - mov [s4p3+2],ecx - selfmod rtext_start, rtext_end - call setpitch3 - jmp setvlinebpl_ diff --git a/src/vizdoom/src/asm_ia32/tmap2.asm b/src/vizdoom/src/asm_ia32/tmap2.asm deleted file mode 100644 index e1f166878..000000000 --- a/src/vizdoom/src/asm_ia32/tmap2.asm +++ /dev/null @@ -1,640 +0,0 @@ -;* -;* tmap2.nas -;* The tilted plane inner loop. -;* -;*--------------------------------------------------------------------------- -;* Copyright 1998-2006 Randy Heit -;* All rights reserved. -;* -;* Redistribution and use in source and binary forms, with or without -;* modification, are permitted provided that the following conditions -;* are met: -;* -;* 1. Redistributions of source code must retain the above copyright -;* notice, this list of conditions and the following disclaimer. -;* 2. Redistributions in binary form must reproduce the above copyright -;* notice, this list of conditions and the following disclaimer in the -;* documentation and/or other materials provided with the distribution. -;* 3. The name of the author may not be used to endorse or promote products -;* derived from this software without specific prior written permission. -;* -;* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -;* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -;* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -;* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -;* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -;* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -;* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -;* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -;* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -;*--------------------------------------------------------------------------- -;* -;* I tried doing the ROL trick that R_DrawSpanP_ASM uses, and it was -;* actually slightly slower than the more straight-forward approach -;* used here, probably because the trick requires too much setup time. -;* - -BITS 32 - -%include "valgrind.inc" - -%define SPACEFILLER4 (0x44444444) - -%ifndef M_TARGET_LINUX - -%define plane_sz _plane_sz -%define plane_su _plane_su -%define plane_sv _plane_sv -%define plane_shade _plane_shade -%define planelightfloat _planelightfloat -%define spanend _spanend -%define ylookup _ylookup -%define dc_destorg _dc_destorg -%define ds_colormap _ds_colormap -%define ds_source _ds_source -%define centery _centery -%define centerx _centerx -%define ds_curtiltedsource _ds_curtiltedsource -%define pviewx _pviewx -%define pviewy _pviewy -%define tiltlighting _tiltlighting - -%define R_DrawTiltedPlane_ASM _R_DrawTiltedPlane_ASM -%define R_SetTiltedSpanSource_ASM _R_SetTiltedSpanSource_ASM -%define R_CalcTiltedLighting _R_CalcTiltedLighting - -%endif - -EXTERN plane_sz -EXTERN plane_su -EXTERN plane_sv -EXTERN planelightfloat -EXTERN spanend -EXTERN ylookup -EXTERN dc_destorg -EXTERN ds_colormap -EXTERN centery -EXTERN centerx -EXTERN ds_source -EXTERN plane_shade -EXTERN pviewx -EXTERN pviewy -EXTERN tiltlighting -EXTERN R_CalcTiltedLighting - -GLOBAL ds_curtiltedsource - -%define sv_i plane_sv -%define sv_j plane_sv+4 -%define sv_k plane_sv+8 - -%define su_i plane_su -%define su_j plane_su+4 -%define su_k plane_su+8 - -%define sz_i plane_sz -%define sz_j plane_sz+4 -%define sz_k plane_sz+8 - -%define SPANBITS 3 - - section .bss - -start_u: resq 1 -start_v: resq 1 -step_u: resq 1 -step_v: resq 1 - -step_iz: resq 1 -step_uz: resq 1 -step_vz: resq 1 - -end_z: resd 1 - - section .data - -ds_curtiltedsource: dd SPACEFILLER4 - -fp_1: -spanrecips: dd 0x3f800000 ; 1/1 - dd 0x3f000000 ; 1/2 - dd 0x3eaaaaab ; 1/3 - dd 0x3e800000 ; 1/4 - dd 0x3e4ccccd ; 1/5 - dd 0x3e2aaaab ; 1/6 - dd 0x3e124925 ; 1/7 -fp_8recip: dd 0x3e000000 ; 1/8 - dd 0x3de38e39 ; 1/9 - dd 0x3dcccccd ; 1/10 - dd 0x3dba2e8c ; 1/11 - dd 0x3daaaaab ; 1/12 - dd 0x3d9d89d9 ; 1/13 - dd 0x3d924925 ; 1/14 - dd 0x3d888889 ; 1/15 - -fp_quickint: dd 0x3f800000 ; 1 - dd 0x40000000 ; 2 - dd 0x40400000 ; 3 - dd 0x40800000 ; 4 - dd 0x40a00000 ; 5 - dd 0x40c00000 ; 6 - dd 0x40e00000 ; 7 -fp_8: dd 0x41000000 ; 8 - - section .text - -GLOBAL R_SetTiltedSpanSource_ASM -GLOBAL @R_SetTiltedSpanSource_ASM@4 - -R_SetTiltedSpanSource_ASM: - mov ecx,[esp+4] - -@R_SetTiltedSpanSource_ASM@4: - mov [fetch1+3],ecx - mov [fetch2+3],ecx - mov [fetch3+3],ecx - mov [fetch4+3],ecx - mov [fetch5+3],ecx - mov [fetch6+3],ecx - mov [fetch7+3],ecx - mov [fetch8+3],ecx - mov [fetch9+3],ecx - mov [fetch10+3],ecx - mov [ds_curtiltedsource],ecx - selfmod rtext_start, rtext_end - ret - -GLOBAL SetTiltedSpanSize - -SetTiltedSpanSize: - push ecx - mov cl,dl - neg cl - mov eax,1 - shl eax,cl - mov cl,[esp] - neg cl - mov [x1+2],cl - mov [x2+2],cl - mov [x3+2],cl - mov [x4+2],cl - mov [x5+2],cl - mov [x6+2],cl - mov [x7+2],cl - mov [x8+2],cl - mov [x9+2],cl - mov [x10+2],cl - - sub cl,dl - dec eax - mov [y1+2],cl - mov [y2+2],cl - mov [y3+2],cl - mov [y4+2],cl - mov [y5+2],cl - mov [y6+2],cl - mov [y7+2],cl - mov [y8+2],cl - mov [y9+2],cl - mov [y10+2],cl - not eax - pop ecx - - mov [m1+2],eax - mov [m2+2],eax - mov [m3+2],eax - mov [m4+2],eax - mov [m5+2],eax - mov [m6+2],eax - mov [m7+2],eax - mov [m8+2],eax - mov [m9+2],eax - mov [m10+2],eax - - selfmod rtext_start, rtext_end - - ret - -%ifndef M_TARGET_MACHO - SECTION .rtext progbits alloc exec write align=64 -%else - SECTION .text align=64 -GLOBAL _rtext_tmap2_start -_rtext_tmap2_start: -%endif - -rtext_start: - -GLOBAL R_DrawTiltedPlane_ASM -GLOBAL @R_DrawTiltedPlane_ASM@8 - -R_DrawTiltedPlane_ASM: - mov ecx,[esp+4] - mov edx,[esp+8] - - ; ecx = y - ; edx = x - -@R_DrawTiltedPlane_ASM@8: - push ebx - push esi - push edi - push ebp - - mov eax,[centery] - movzx ebx,word [spanend+ecx*2] - sub eax,ecx ; eax = centery-y - sub ebx,edx ; ebx = span length - 1 - mov edi,[ylookup+ecx*4] - push eax - add edi,[dc_destorg] - add edi,edx ; edi = frame buffer pointer - sub edx,[centerx] ; edx = x-centerx - push edx - xor eax,eax - - fild dword [esp+4] ; ymul - fild dword [esp] ; xmul | ymul - fld dword [sv_j] ; sv.j | xmul | ymul - fmul st0,st2 ; sv.j*ymul | xmul | ymul - fld dword [su_j] ; su.j | sv.j*ymul | xmul | ymul - fmul st0,st3 ; su.j*ymul | sv.j*ymul | xmul | ymul - fld dword [sz_j] ; sz.j | su.j*ymul | sv.j*ymul | xmul | ymul - fmulp st4,st0 ; su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fld dword [sv_i] ; sv.i | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fmul st0,st3 ; sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fld dword [su_i] ; su.i | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fmul st0,st4 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fld dword [sz_i] ; sz.i | su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | xmul | sz.j*ymul - fmulp st5,st0 ; su.i*xmul | sv.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul - fxch st1 ; sv.i*xmul | su.i*xmul | su.j*ymul | sv.j*ymul | sz.i*xmul | sz.j*ymul - faddp st3,st0 ; su.i*xmul | su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul - faddp st1,st0 ; su.i*xmul+su.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | sz.j*ymul - fxch st3 ; sz.j*ymul | sv.i*xmul+sv.j*ymul | sz.i*xmul | su.i*xmul+su.j*ymul - faddp st2,st0 ; sv.i*xmul+sv.j*ymul | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul - fadd dword [sv_k] ; v/z | sz.i*xmul+sz.j*ymul | su.i*xmul+su.j*ymul - fxch st1 ; sz.i*xmul+sz.j*ymul | v/z | su.i*xmul+su.j*ymul - fadd dword [sz_k] ; 1/z | v/z | su.i*xmul+su.j*ymul - fxch st2 ; su.i*xmul+su.j*ymul | v/z | 1/z - fadd dword [su_k] ; u/z | v/z | 1/z - fxch st2 ; 1/z | v/z | u/z - fxch st1 ; v/z | 1/z | u/z - -; if lighting is on, fill out the light table - mov al,[plane_shade] - test al,al - jz .litup - - push ebx - fild dword [esp] ; width | v/z | 1/z | u/z - fmul dword [sz_i] ; width*sz.i | v/z | 1/z | u/z - fadd st0,st2 ; 1/endz | v/z | 1/z | u/z - fld st2 ; 1/z | 1/endz | v/z | 1/z | u/z - fmul dword [planelightfloat] - fxch st1 - fmul dword [planelightfloat] - sub esp,8 - fistp dword [esp] - fistp dword [esp+4] - call R_CalcTiltedLighting - add esp, 12 - xor eax, eax - -.litup add esp, 8 - -; calculate initial z, u, and v values - fld st1 ; 1/z | v/z | 1/z | u/z - fdivr dword [fp_1] ; z | v/z | 1/z | u/z - - fld st3 ; u/z | z | v/z | 1/z | u/z - fmul st0,st1 ; u | z | v/z | 1/z | u/z - fld st2 ; v/z | u | z | v/z | 1/z | u/z - fmulp st2,st0 ; u | v | v/z | 1/z | u/z - fld st0 - fistp qword [start_u] - fld st1 - fistp qword [start_v] - - cmp ebx,7 ; Do we have at least 8 pixels to plot? - jl near ShortStrip - -; yes, we do, so figure out tex coords at end of this span - -; multiply i values by span length (8) - fld dword [su_i] ; su.i - fmul dword [fp_8] ; su.i*8 - fld dword [sv_i] ; sv.i | su.i*8 - fmul dword [fp_8] ; sv.i*8 | su.i*8 - fld dword [sz_i] ; sz.i | sv.i*8 | su.i*8 - fmul dword [fp_8] ; sz.i*8 | sv.i*8 | su.i*8 - fxch st2 ; su.i*8 | sv.i*8 | sz.i*8 - fstp qword [step_uz] ; sv.i*8 | sz.i*8 - fstp qword [step_vz] ; sz.i*8 - fst qword [step_iz] ; sz.i*8 - -; find tex coords at start of next span - faddp st4 - fld qword [step_vz] - faddp st3 - fld qword [step_uz] - faddp st5 - - fld st3 ; 1/z | u | v | v/z | 1/z | u/z - fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z - fst dword [end_z] - fld st5 ; u/z | z | u | v | v/z | 1/z | u/z - fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z - fxch st1 ; z | u' | u | v | v/z | 1/z | u/z - fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z - fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z - -; now subtract to get stepping values for this span - fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z - fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z - fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z - fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z - fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z - fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z - fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z - fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z - fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z - fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z - fistp qword [step_u] ; u | v | v/z | 1/z | u/z - -FullSpan: - xor eax,eax - cmp ebx,15 ; is there another complete span after this one? - jl NextIsShort - -; there is a complete span after this one - fld qword [step_iz] - faddp st4,st0 - fld qword [step_vz] - faddp st3,st0 - fld qword [step_uz] - faddp st5,st0 - jmp StartDiv - -NextIsShort: - cmp ebx,8 ; if next span is no more than 1 pixel, then we already - jle DrawFullSpan ; know everything we need to draw it - - fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint-8*4+ebx*4] - fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint-8*4+ebx*4] - fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint-8*4+ebx*4] - fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z - faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z - faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z - faddp st5,st0 ; u | v | v/z | 1/z | u/z - -StartDiv: - fld st3 ; 1/z | u | v | v/z | 1/z | u/z - fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z - -DrawFullSpan: - mov ecx,[start_v] - mov edx,[start_u] - - add ecx,[pviewy] - add edx,[pviewx] - - mov esi,edx - mov ebp,ecx -x1 shr ebp,26 -m1 and esi,0xfc000000 -y1 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch1 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+0],al - -x2 shr ebp,26 -m2 and esi,0xfc000000 -y2 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch2 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-4] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+1],al - -x3 shr ebp,26 -m3 and esi,0xfc000000 -y3 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch3 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-8] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+2],al - -x4 shr ebp,26 -m4 and esi,0xfc000000 -y4 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch4 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-12] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+3],al - -x5 shr ebp,26 -m5 and esi,0xfc000000 -y5 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch5 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-16] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+4],al - -x6 shr ebp,26 -m6 and esi,0xfc000000 -y6 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch6 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-20] - mov esi,edx - mov al,[ebp+eax] - mov ebp,ecx - mov [edi+5],al - -x7 shr ebp,26 -m7 and esi,0xfc000000 -y7 shr esi,20 - add ecx,[step_v] - add edx,[step_u] -fetch7 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4-24] -x8 shr ecx,26 - mov al,[ebp+eax] -m8 and edx,0xfc000000 - mov [edi+6],al - -y8 shr edx,20 - mov ebp,[tiltlighting+ebx*4-28] -fetch8 mov al,[edx+ecx+SPACEFILLER4] - mov al,[ebp+eax] - mov [edi+7],al - add edi,8 - - sub ebx,8 - jl near Done - - fld st1 - fistp qword [start_u] - fld st2 - fistp qword [start_v] - - cmp ebx,7 - jl near EndIsShort - - fst dword [end_z] - fld st5 ; u/z | z | u | v | v/z | 1/z | u/z - fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z - fxch st1 ; z | u' | u | v | v/z | 1/z | u/z - fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z - fxch st3 ; v | u' | u | v' | v/z | 1/z | u/z - fsubr st0,st3 ; v'-v | u' | u | v' | v/z | 1/z | u/z - fxch st2 ; u | u' | v'-v | v' | v/z | 1/z | u/z - fsubr st0,st1 ; u'-u | u' | v'-v | v' | v/z | 1/z | u/z - fxch st2 ; v'-v | u' | u'-u | v' | v/z | 1/z | u/z - fmul dword [fp_8recip] ; vstep | u' | u'-u | v' | v/z | 1/z | u/z - fxch st1 ; u' | vstep | u'-u | v' | v/z | 1/z | u/z - fxch st2 ; u'-u | vstep | u' | v' | v/z | 1/z | u/z - fmul dword [fp_8recip] ; ustep | vstep | u' | v' | v/z | 1/z | u/z - fxch st1 ; vstep | ustep | u' | v' | v/z | 1/z | u/z - fistp qword [step_v] ; ustep | u' | v' | v/z | 1/z | u/z - fistp qword [step_u] ; u | v | v/z | 1/z | u/z - jmp FullSpan - -OnlyOnePixelAtEnd: - fld st0 - fistp qword [start_u] - fld st1 - fistp qword [start_v] - -OnlyOnePixel: - mov edx,[start_v] - mov ecx,[start_u] - add edx,[pviewy] - add ecx,[pviewx] -x9 shr edx,26 -m9 and ecx,0xfc000000 -y9 shr ecx,20 - mov ebp,[tiltlighting] -fetch9 mov al,[ecx+edx+SPACEFILLER4] - mov al,[ebp+eax] - mov [edi],al - -Done: - fcompp - fcompp - fstp st0 - - pop ebp - pop edi - pop esi - pop ebx - ret - -ShortStrip: - cmp ebx,0 - jle near OnlyOnePixel - -MoreThanOnePixel: - fld dword [sz_i] ; sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint+ebx*4] - fld dword [sv_i] ; sv.i | sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint+ebx*4] - fld dword [su_i] ; su.i | sv.i | sz.i | u | v | v/z | 1/z | u/z - fmul dword [fp_quickint+ebx*4] - fxch st2 ; sz.i | sv.i | su.i | u | v | v/z | 1/z | u/z - faddp st6,st0 ; sv.i | su.i | u | v | v/z | 1/z | u/z - faddp st4,st0 ; su.i | u | v | v/z | 1/z | u/z - faddp st5,st0 ; u | v | v/z | 1/z | u/z - fld st3 ; 1/z | u | v | v/z | 1/z | u/z - fdivr dword [fp_1] ; z | u | v | v/z | 1/z | u/z - jmp CalcPartialSteps - -EndIsShort: - cmp ebx,0 - je near OnlyOnePixelAtEnd - -CalcPartialSteps: - fst dword [end_z] - fld st5 ; u/z | z | u | v | v/z | 1/z | u/z - fmul st0,st1 ; u' | z | u | v | v/z | 1/z | u/z - fxch st1 ; z | u' | u | v | v/z | 1/z | u/z - fmul st0,st4 ; v' | u' | u | v | v/z | 1/z | u/z - fxch st1 ; u' | v' | u | v | v/z | 1/z | u/z - fsubrp st2,st0 ; v' | u'-u | v | v/z | 1/z | u/z - fsubrp st2,st0 ; u'-u | v'-v | v/z | 1/z | u/z - fmul dword [spanrecips+ebx*4] ;ustep | v'-v | v/z | 1/z | u/z - fxch st1 ; v'-v | ustep | v/z | 1/z | u/z - fmul dword [spanrecips+ebx*4] ;vstep | ustep | v/z | 1/z | u/z - fxch st1 ; ustep | vstep | v/z | 1/z | u/z - fistp qword [step_u] ; vstep | v/z | 1/z | u/z - fistp qword [step_v] ; v/z | 1/z | u/z - - mov ecx,[start_v] - mov edx,[start_u] - - add ecx,[pviewy] - add edx,[pviewx] - - mov esi,edx - mov ebp,ecx -endloop: -x10 shr ebp,26 -m10 and esi,0xfc000000 - -y10 shr esi,20 - inc edi - - add ecx,[step_v] - add edx,[step_u] - -fetch10 mov al,[ebp+esi+SPACEFILLER4] - mov ebp,[tiltlighting+ebx*4] - - mov esi,edx - dec ebx - - mov al,[ebp+eax] - mov ebp,ecx - - mov [edi-1],al - jge endloop - - fcompp - fstp st0 - - pop ebp - pop edi - pop esi - pop ebx - ret - -rtext_end: -%ifdef M_TARGET_MACHO -GLOBAL _rtext_tmap2_end -_rtext_tmap2_end: -%endif diff --git a/src/vizdoom/src/asm_ia32/tmap3.asm b/src/vizdoom/src/asm_ia32/tmap3.asm deleted file mode 100644 index 3161ff368..000000000 --- a/src/vizdoom/src/asm_ia32/tmap3.asm +++ /dev/null @@ -1,344 +0,0 @@ -%include "valgrind.inc" - -%ifdef M_TARGET_WATCOM - SEGMENT DATA PUBLIC ALIGN=16 CLASS=DATA USE32 - SEGMENT DATA -%else - SECTION .data -%endif - -%ifndef M_TARGET_LINUX -%define ylookup _ylookup -%define vplce _vplce -%define vince _vince -%define palookupoffse _palookupoffse -%define bufplce _bufplce -%define dc_iscale _dc_iscale -%define dc_colormap _dc_colormap -%define dc_count _dc_count -%define dc_dest _dc_dest -%define dc_source _dc_source -%define dc_texturefrac _dc_texturefrac -%define dc_pitch _dc_pitch - -%define setupvlinetallasm _setupvlinetallasm -%define vlinetallasm4 _vlinetallasm4 -%define vlinetallasmathlon4 _vlinetallasmathlon4 -%define vlinetallasm1 _vlinetallasm1 -%define prevlinetallasm1 _prevlinetallasm1 -%endif - -EXTERN vplce -EXTERN vince -EXTERN palookupoffse -EXTERN bufplce - -EXTERN ylookup -EXTERN dc_iscale -EXTERN dc_colormap -EXTERN dc_count -EXTERN dc_dest -EXTERN dc_source -EXTERN dc_texturefrac -EXTERN dc_pitch - -GLOBAL vlt4pitch -GLOBAL vlt1pitch - -%ifdef M_TARGET_WATCOM - SEGMENT CODE PUBLIC ALIGN=16 CLASS=CODE USE32 - SEGMENT CODE -%else - SECTION .text -%endif - -ALIGN 16 -GLOBAL setpitch3 -setpitch3: - mov [vltpitch+2], eax - mov [vltpitcha+2],eax - mov [vlt1pitch1+2], eax - mov [vlt1pitch2+2], eax - selfmod vltpitch, vlt1pitch2+6 - ret - -ALIGN 16 -GLOBAL setupvlinetallasm -setupvlinetallasm: - mov ecx, [esp+4] - mov [shifter1+2], cl - mov [shifter2+2], cl - mov [shifter3+2], cl - mov [shifter4+2], cl - mov [shifter1a+2], cl - mov [shifter2a+2], cl - mov [shifter3a+2], cl - mov [shifter4a+2], cl - mov [preshift+2], cl - mov [shift11+2], cl - mov [shift12+2], cl - selfmod shifter1, shift12+6 - ret - -%ifdef M_TARGET_MACHO - SECTION .text align=64 -GLOBAL _rtext_tmap3_start -_rtext_tmap3_start: -%else - SECTION .rtext progbits alloc exec write align=64 -%endif - -ALIGN 16 - -GLOBAL vlinetallasm4 -vlinetallasm4: - push ebx - mov eax, [bufplce+0] - mov ebx, [bufplce+4] - mov ecx, [bufplce+8] - mov edx, [bufplce+12] - mov [source1+3], eax - mov [source2+3], ebx - mov [source3+3], ecx - mov [source4+3], edx - mov eax, [palookupoffse+0] - mov ebx, [palookupoffse+4] - mov ecx, [palookupoffse+8] - mov edx, [palookupoffse+12] - mov [lookup1+2], eax - mov [lookup2+2], ebx - mov [lookup3+2], ecx - mov [lookup4+2], edx - mov eax, [vince+0] - mov ebx, [vince+4] - mov ecx, [vince+8] - mov edx, [vince+12] - mov [step1+2], eax - mov [step2+2], ebx - mov [step3+2], ecx - mov [step4+1], edx - push ebp - push esi - push edi - mov ecx, [dc_count] - mov edi, [dc_dest] - mov eax, dword [ylookup+ecx*4-4] - add eax, edi - sub edi, eax - mov [write1+2],eax - inc eax - mov [write2+2],eax - inc eax - mov [write3+2],eax - inc eax - mov [write4+2],eax - mov ebx, [vplce] - mov ecx, [vplce+4] - mov esi, [vplce+8] - mov eax, [vplce+12] - selfmod loopit, vltpitch - jmp loopit - -ALIGN 16 -loopit: - mov edx, ebx -shifter1: shr edx, 24 -source1: movzx edx, BYTE [edx+0x88888888] -lookup1: mov dl, [edx+0x88888888] -write1: mov [edi+0x88888880], dl -step1: add ebx, 0x88888888 - mov edx, ecx -shifter2: shr edx, 24 -source2: movzx edx, BYTE [edx+0x88888888] -lookup2: mov dl, [edx+0x88888888] -write2: mov [edi+0x88888881], dl -step2: add ecx, 0x88888888 - mov edx, esi -shifter3: shr edx, 24 -source3: movzx edx, BYTE [edx+0x88888888] -lookup3: mov dl, BYTE [edx+0x88888888] -write3: mov [edi+0x88888882], dl -step3: add esi, 0x88888888 - mov edx, eax -shifter4: shr edx, 24 -source4: movzx edx, BYTE [edx+0x88888888] -lookup4: mov dl, [edx+0x88888888] -write4: mov [edi+0x88888883], dl -step4: add eax, 0x88888888 -vltpitch: add edi, 320 - jle near loopit - - mov [vplce], ebx - mov [vplce+4], ecx - mov [vplce+8], esi - mov [vplce+12], eax - - pop edi - pop esi - pop ebp - pop ebx - - ret - - ALIGN 16 - -GLOBAL vlinetallasmathlon4 -vlinetallasmathlon4: - push ebx - mov eax, [bufplce+0] - mov ebx, [bufplce+4] - mov ecx, [bufplce+8] - mov edx, [bufplce+12] - mov [source1a+3], eax - mov [source2a+3], ebx - mov [source3a+3], ecx - mov [source4a+3], edx - mov eax, [palookupoffse+0] - mov ebx, [palookupoffse+4] - mov ecx, [palookupoffse+8] - mov edx, [palookupoffse+12] - mov [lookup1a+2], eax - mov [lookup2a+2], ebx - mov [lookup3a+2], ecx - mov [lookup4a+2], edx - mov eax, [vince+0] - mov ebx, [vince+4] - mov ecx, [vince+8] - mov edx, [vince+12] - mov [step1a+2], eax - mov [step2a+2], ebx - mov [step3a+2], ecx - mov [step4a+1], edx - push ebp - push esi - push edi - mov ecx, [dc_count] - mov edi, [dc_dest] - mov eax, dword [ylookup+ecx*4-4] - add eax, edi - sub edi, eax - mov [write1a+2],eax - inc eax - mov [write2a+2],eax - inc eax - mov [write3a+2],eax - inc eax - mov [write4a+2],eax - mov ebp, [vplce] - mov ecx, [vplce+4] - mov esi, [vplce+8] - mov eax, [vplce+12] - selfmod loopita, vltpitcha - jmp loopita - -; Unfortunately, this code has not been carefully analyzed to determine -; how well it utilizes the processor's instruction units. Instead, I just -; kept rearranging code, seeing what sped it up and what slowed it down -; until I arrived at this. The is the fastest version I was able to -; manage, but that does not mean it cannot be made faster with careful -; instructing shuffling. - - ALIGN 64 - -loopita: mov edx, ebp - mov ebx, ecx -shifter1a: shr edx, 24 -shifter2a: shr ebx, 24 -source1a: movzx edx, BYTE [edx+0x88888888] -source2a: movzx ebx, BYTE [ebx+0x88888888] -step1a: add ebp, 0x88888888 -step2a: add ecx, 0x88888888 -lookup1a: mov dl, [edx+0x88888888] -lookup2a: mov dh, [ebx+0x88888888] - mov ebx, esi -write1a: mov [edi+0x88888880], dl -write2a: mov [edi+0x88888881], dh -shifter3a: shr ebx, 24 - mov edx, eax -source3a: movzx ebx, BYTE [ebx+0x88888888] -shifter4a: shr edx, 24 -step3a: add esi, 0x88888888 -source4a: movzx edx, BYTE [edx+0x88888888] -step4a: add eax, 0x88888888 -lookup3a: mov bl, [ebx+0x88888888] -lookup4a: mov dl, [edx+0x88888888] -write3a: mov [edi+0x88888882], bl -write4a: mov [edi+0x88888883], dl -vltpitcha: add edi, 320 - jle near loopita - - mov [vplce], ebp - mov [vplce+4], ecx - mov [vplce+8], esi - mov [vplce+12], eax - - pop edi - pop esi - pop ebp - pop ebx - - ret - -ALIGN 16 -GLOBAL prevlinetallasm1 -prevlinetallasm1: - mov ecx, [dc_count] - cmp ecx, 1 - ja vlinetallasm1 - - mov eax, [dc_iscale] - mov edx, [dc_texturefrac] - add eax, edx - mov ecx, [dc_source] -preshift: shr edx, 16 - push ebx - push edi - mov edi, [dc_colormap] - movzx ebx, byte [ecx+edx] - mov ecx, [dc_dest] - mov bl, byte [edi+ebx] - pop edi - mov byte [ecx], bl - pop ebx - ret - -ALIGN 16 -GLOBAL vlinetallasm1 -vlinetallasm1: - push ebp - push ebx - push edi - push esi - - mov ebp, [dc_count] - mov ebx, [dc_texturefrac] ; ebx = frac - mov edi, [dc_dest] - mov ecx, ebx -shift11: shr ecx, 16 - mov esi, [dc_source] - mov edx, [dc_iscale] -vlt1pitch1: sub edi, 0x88888888 - mov eax, [dc_colormap] - -loop2: - movzx ecx, BYTE [esi+ecx] - add ebx, edx -vlt1pitch2: add edi, 0x88888888 - mov cl,[eax+ecx] - mov [edi],cl - mov ecx,ebx -shift12: shr ecx,16 - dec ebp - jnz loop2 - - mov eax,ebx - pop esi - pop edi - pop ebx - pop ebp - ret - -%ifdef M_TARGET_MACHO -GLOBAL _rtext_tmap3_end -_rtext_tmap3_end: -%endif diff --git a/src/vizdoom/src/asm_x86_64/tmap3.asm b/src/vizdoom/src/asm_x86_64/tmap3.asm deleted file mode 100644 index bebf1ee41..000000000 --- a/src/vizdoom/src/asm_x86_64/tmap3.asm +++ /dev/null @@ -1,150 +0,0 @@ -%ifnidn __OUTPUT_FORMAT__,win64 -%error tmap3.asm is for Win64 output. You should use tmap.s for other systems. -%endif - -BITS 64 -DEFAULT REL - -EXTERN vplce -EXTERN vince -EXTERN palookupoffse -EXTERN bufplce - -EXTERN dc_count -EXTERN dc_dest -EXTERN dc_pitch - -SECTION .text - -GLOBAL ASM_PatchPitch -ASM_PatchPitch: - mov ecx, [dc_pitch] - mov [pm+3], ecx - mov [vltpitch+3], ecx - ret - align 16 - -GLOBAL setupvlinetallasm -setupvlinetallasm: - mov [shifter1+2], cl - mov [shifter2+2], cl - mov [shifter3+2], cl - mov [shifter4+2], cl - ret - align 16 - -; Yasm can't do progbits alloc exec for win64? -; Hmm, looks like it's automatic. No worries, then. -SECTION .rtext write ;progbits alloc exec - -GLOBAL vlinetallasm4 -PROC_FRAME vlinetallasm4 - rex_push_reg rbx - push_reg rdi - push_reg r15 - push_reg r14 - push_reg r13 - push_reg r12 - push_reg rbp - push_reg rsi - alloc_stack 8 ; Stack must be 16-byte aligned -END_PROLOGUE -; rax = bufplce base address -; rbx = -; rcx = offset from rdi/count (negative) -; edx/rdx = scratch -; rdi = bottom of columns to write to -; r8d-r11d = column offsets -; r12-r15 = palookupoffse[0] - palookupoffse[4] - - mov ecx, [dc_count] - mov rdi, [dc_dest] - test ecx, ecx - jle vltepilog ; count must be positive - - mov rax, [bufplce] - mov r8, [bufplce+8] - sub r8, rax - mov r9, [bufplce+16] - sub r9, rax - mov r10, [bufplce+24] - sub r10, rax - mov [source2+4], r8d - mov [source3+4], r9d - mov [source4+4], r10d - -pm: imul rcx, 320 - - mov r12, [palookupoffse] - mov r13, [palookupoffse+8] - mov r14, [palookupoffse+16] - mov r15, [palookupoffse+24] - - mov r8d, [vince] - mov r9d, [vince+4] - mov r10d, [vince+8] - mov r11d, [vince+12] - mov [step1+3], r8d - mov [step2+3], r9d - mov [step3+3], r10d - mov [step4+3], r11d - - add rdi, rcx - neg rcx - - mov r8d, [vplce] - mov r9d, [vplce+4] - mov r10d, [vplce+8] - mov r11d, [vplce+12] - jmp loopit - -ALIGN 16 -loopit: - mov edx, r8d -shifter1: shr edx, 24 -step1: add r8d, 0x88888888 - movzx edx, BYTE [rax+rdx] - mov ebx, r9d - mov dl, [r12+rdx] -shifter2: shr ebx, 24 -step2: add r9d, 0x88888888 -source2: movzx ebx, BYTE [rax+rbx+0x88888888] - mov ebp, r10d - mov bl, [r13+rbx] -shifter3: shr ebp, 24 -step3: add r10d, 0x88888888 -source3: movzx ebp, BYTE [rax+rbp+0x88888888] - mov esi, r11d - mov bpl, BYTE [r14+rbp] -shifter4: shr esi, 24 -step4: add r11d, 0x88888888 -source4: movzx esi, BYTE [rax+rsi+0x88888888] - mov [rdi+rcx], dl - mov [rdi+rcx+1], bl - mov sil, BYTE [r15+rsi] - mov [rdi+rcx+2], bpl - mov [rdi+rcx+3], sil - -vltpitch: add rcx, 320 - jl loopit - - mov [vplce], r8d - mov [vplce+4], r9d - mov [vplce+8], r10d - mov [vplce+12], r11d - -vltepilog: - add rsp, 8 - pop rsi - pop rbp - pop r12 - pop r13 - pop r14 - pop r15 - pop rdi - pop rbx - ret -vlinetallasm4_end: -ENDPROC_FRAME - ALIGN 16 - diff --git a/src/vizdoom/src/asm_x86_64/tmap3.s b/src/vizdoom/src/asm_x86_64/tmap3.s deleted file mode 100644 index 867d11c75..000000000 --- a/src/vizdoom/src/asm_x86_64/tmap3.s +++ /dev/null @@ -1,141 +0,0 @@ -#%include "valgrind.inc" - - .section .text - -.globl ASM_PatchPitch -ASM_PatchPitch: - movl dc_pitch(%rip), %ecx - movl %ecx, pm+3(%rip) - movl %ecx, vltpitch+3(%rip) -# selfmod pm, vltpitch+6 - ret - .align 16 - -.globl setupvlinetallasm -setupvlinetallasm: - movb %dil, shifter1+2(%rip) - movb %dil, shifter2+2(%rip) - movb %dil, shifter3+2(%rip) - movb %dil, shifter4+2(%rip) -# selfmod shifter1, shifter4+3 - ret - .align 16 - - .section .rtext,"awx" - -.globl vlinetallasm4 - .type vlinetallasm4,@function -vlinetallasm4: - .cfi_startproc - push %rbx - push %rdi - push %r15 - push %r14 - push %r13 - push %r12 - push %rbp - push %rsi - subq $8, %rsp # Does the stack need to be 16-byte aligned for Linux? - .cfi_adjust_cfa_offset 8 - -# rax = bufplce base address -# rbx = -# rcx = offset from rdi/count (negative) -# edx/rdx = scratch -# rdi = bottom of columns to write to -# r8d-r11d = column offsets -# r12-r15 = palookupoffse[0] - palookupoffse[4] - - movl dc_count(%rip), %ecx - movq dc_dest(%rip), %rdi - testl %ecx, %ecx - jle vltepilog # count must be positive - - movq bufplce(%rip), %rax - movq bufplce+8(%rip), %r8 - subq %rax, %r8 - movq bufplce+16(%rip), %r9 - subq %rax, %r9 - movq bufplce+24(%rip), %r10 - subq %rax, %r10 - movl %r8d, source2+4(%rip) - movl %r9d, source3+4(%rip) - movl %r10d, source4+4(%rip) - -pm: imulq $320, %rcx - - movq palookupoffse(%rip), %r12 - movq palookupoffse+8(%rip), %r13 - movq palookupoffse+16(%rip), %r14 - movq palookupoffse+24(%rip), %r15 - - movl vince(%rip), %r8d - movl vince+4(%rip), %r9d - movl vince+8(%rip), %r10d - movl vince+12(%rip), %r11d - movl %r8d, step1+3(%rip) - movl %r9d, step2+3(%rip) - movl %r10d, step3+3(%rip) - movl %r11d, step4+3(%rip) - - addq %rcx, %rdi - negq %rcx - - movl vplce(%rip), %r8d - movl vplce+4(%rip), %r9d - movl vplce+8(%rip), %r10d - movl vplce+12(%rip), %r11d -# selfmod loopit, vltepilog - jmp loopit - - .align 16 -loopit: - movl %r8d, %edx -shifter1: shrl $24, %edx -step1: addl $0x44444444, %r8d - movzbl (%rax,%rdx), %edx - movl %r9d, %ebx - movb (%r12,%rdx), %dl -shifter2: shrl $24, %ebx -step2: addl $0x44444444, %r9d -source2: movzbl 0x44444444(%rax,%rbx), %ebx - movl %r10d, %ebp - movb (%r13,%rbx), %bl -shifter3: shr $24, %ebp -step3: addl $0x44444444, %r10d -source3: movzbl 0x44444444(%rax,%rbp), %ebp - movl %r11d, %esi - movb (%r14,%rbp), %bpl -shifter4: shr $24, %esi -step4: add $0x44444444, %r11d -source4: movzbl 0x44444444(%rax,%rsi), %esi - movb %dl, (%rdi,%rcx) - movb %bl, 1(%rdi,%rcx) - movb (%r15,%rsi), %sil - movb %bpl, 2(%rdi,%rcx) - movb %sil, 3(%rdi,%rcx) - -vltpitch: addq $320, %rcx - jl loopit - - movl %r8d, vplce(%rip) - movl %r9d, vplce+4(%rip) - movl %r10d, vplce+8(%rip) - movl %r11d, vplce+12(%rip) - -vltepilog: - addq $8, %rsp - .cfi_adjust_cfa_offset -8 - pop %rsi - pop %rbp - pop %r12 - pop %r13 - pop %r14 - pop %r15 - pop %rdi - pop %rbx - ret - .cfi_endproc - .align 16 - - diff --git a/src/vizdoom/src/d_main.cpp b/src/vizdoom/src/d_main.cpp index b27cf1c74..d67448336 100644 --- a/src/vizdoom/src/d_main.cpp +++ b/src/vizdoom/src/d_main.cpp @@ -2221,6 +2221,7 @@ static void D_DoomInit() // Set the FPU precision to 53 significant bits. This is the default // for Visual C++, but not for GCC, so some slight math variances // might crop up if we leave it alone. +#if !defined(__arm__) #if defined(_FPU_GETCW) { int cw; @@ -2233,6 +2234,7 @@ static void D_DoomInit() #ifndef _WIN64 int cfp = _control87(_PC_53, _MCW_PC); #endif +#endif #endif // Check response files before coalescing file parameters. diff --git a/src/vizdoom/src/doomtype.h b/src/vizdoom/src/doomtype.h index 668822a22..2a867a311 100644 --- a/src/vizdoom/src/doomtype.h +++ b/src/vizdoom/src/doomtype.h @@ -49,58 +49,6 @@ struct PClass; typedef TMap FClassMap; -// Since this file is included by everything, it seems an appropriate place -// to check the NOASM/USEASM macros. - -// There are three assembly-related macros: -// -// NOASM - Assembly code is disabled -// X86_ASM - Using ia32 assembly code -// X64_ASM - Using amd64 assembly code -// -// Note that these relate only to using the pure assembly code. Inline -// assembly may still be used without respect to these macros, as -// deemed appropriate. - -#ifndef NOASM -// Select the appropriate type of assembly code to use. - -#if defined(_M_IX86) || defined(__i386__) - -#define X86_ASM -#ifdef X64_ASM -#undef X64_ASM -#endif - -#elif defined(_M_X64) || defined(__amd64__) - -#define X64_ASM -#ifdef X86_ASM -#undef X86_ASM -#endif - -#else - -#define NOASM - -#endif - -#endif - -#ifdef NOASM -// Ensure no assembly macros are defined if NOASM is defined. - -#ifdef X86_ASM -#undef X86_ASM -#endif - -#ifdef X64_ASM -#undef X64_ASM -#endif - -#endif - - #if defined(_MSC_VER) || defined(__WATCOMC__) #define STACK_ARGS __cdecl #else diff --git a/src/vizdoom/src/mscinlines.h b/src/vizdoom/src/mscinlines.h deleted file mode 100644 index c7f2527f4..000000000 --- a/src/vizdoom/src/mscinlines.h +++ /dev/null @@ -1,351 +0,0 @@ -// "Build Engine & Tools" Copyright (c) 1993-1997 Ken Silverman -// Ken Silverman's official web site: "http://www.advsys.net/ken" -// See the included license file "BUILDLIC.TXT" for license info. -// -// This file is based on pragmas.h from Ken Silverman's original Build -// source code release but is meant for use with Visual C++ instead of -// Watcom C. -// -// Some of the inline assembly has been turned into C code, because VC++ -// is smart enough to produce code at least as good as Ken's inlines. -// The more used functions are still inline assembly, because they do -// things that can't really be done in C. (I consider this a bad thing, -// because VC++ has considerably poorer support for inline assembly than -// Watcom, so it's better to rely on its C optimizer to produce fast code.) -// - - -#include -#include - -#pragma warning (disable: 4035) - -__forceinline SDWORD Scale (SDWORD a, SDWORD b, SDWORD c) -{ - __asm mov eax,a - __asm imul b - __asm idiv c -} - -__forceinline SDWORD MulScale (SDWORD a, SDWORD b, SDWORD c) -{ - __asm mov eax,a - __asm mov ecx,c - __asm imul b - __asm shrd eax,edx,cl -} - -#define MAKECONSTMulScale(s) \ - __forceinline SDWORD MulScale##s (SDWORD a, SDWORD b) \ - { \ - __asm mov eax,a \ - __asm imul b \ - __asm shrd eax,edx,s \ - } -MAKECONSTMulScale(1) -MAKECONSTMulScale(2) -MAKECONSTMulScale(3) -MAKECONSTMulScale(4) -MAKECONSTMulScale(5) -MAKECONSTMulScale(6) -MAKECONSTMulScale(7) -MAKECONSTMulScale(8) -MAKECONSTMulScale(9) -MAKECONSTMulScale(10) -MAKECONSTMulScale(11) -MAKECONSTMulScale(12) -MAKECONSTMulScale(13) -MAKECONSTMulScale(14) -MAKECONSTMulScale(15) -MAKECONSTMulScale(16) -MAKECONSTMulScale(17) -MAKECONSTMulScale(18) -MAKECONSTMulScale(19) -MAKECONSTMulScale(20) -MAKECONSTMulScale(21) -MAKECONSTMulScale(22) -MAKECONSTMulScale(23) -MAKECONSTMulScale(24) -MAKECONSTMulScale(25) -MAKECONSTMulScale(26) -MAKECONSTMulScale(27) -MAKECONSTMulScale(28) -MAKECONSTMulScale(29) -MAKECONSTMulScale(30) -MAKECONSTMulScale(31) -#undef MAKECONSTMulScale - -__forceinline SDWORD MulScale32 (SDWORD a, SDWORD b) -{ - __asm mov eax,a - __asm imul b - __asm mov eax,edx -} - -__forceinline DWORD UMulScale16(DWORD a, DWORD b) -{ - __asm mov eax,a - __asm mul b - __asm shrd eax,edx,16 -} - -__forceinline SDWORD DMulScale (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD s) -{ - __asm mov eax,a - __asm imul b - __asm mov ebx,eax - __asm mov eax,c - __asm mov esi,edx - __asm mov ecx,s - __asm imul d - __asm add eax,ebx - __asm adc edx,esi - __asm shrd eax,edx,cl -} - -#define MAKECONSTDMulScale(s) \ - __forceinline SDWORD DMulScale##s (SDWORD a, SDWORD b, SDWORD c, SDWORD d) \ - { \ - __asm mov eax,a \ - __asm imul b \ - __asm mov ebx,eax \ - __asm mov eax,c \ - __asm mov esi,edx \ - __asm imul d \ - __asm add eax,ebx \ - __asm adc edx,esi \ - __asm shrd eax,edx,s \ - } - -MAKECONSTDMulScale(1) -MAKECONSTDMulScale(2) -MAKECONSTDMulScale(3) -MAKECONSTDMulScale(4) -MAKECONSTDMulScale(5) -MAKECONSTDMulScale(6) -MAKECONSTDMulScale(7) -MAKECONSTDMulScale(8) -MAKECONSTDMulScale(9) -MAKECONSTDMulScale(10) -MAKECONSTDMulScale(11) -MAKECONSTDMulScale(12) -MAKECONSTDMulScale(13) -MAKECONSTDMulScale(14) -MAKECONSTDMulScale(15) -MAKECONSTDMulScale(16) -MAKECONSTDMulScale(17) -MAKECONSTDMulScale(18) -MAKECONSTDMulScale(19) -MAKECONSTDMulScale(20) -MAKECONSTDMulScale(21) -MAKECONSTDMulScale(22) -MAKECONSTDMulScale(23) -MAKECONSTDMulScale(24) -MAKECONSTDMulScale(25) -MAKECONSTDMulScale(26) -MAKECONSTDMulScale(27) -MAKECONSTDMulScale(28) -MAKECONSTDMulScale(29) -MAKECONSTDMulScale(30) -MAKECONSTDMulScale(31) -#undef MAKCONSTDMulScale - -__forceinline SDWORD DMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d) -{ - __asm mov eax,a - __asm imul b - __asm mov ebx,eax - __asm mov eax,c - __asm mov esi,edx - __asm imul d - __asm add eax,ebx - __asm adc edx,esi - __asm mov eax,edx -} - -#define MAKECONSTTMulScale(s) \ - __forceinline SDWORD TMulScale##s (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) \ - { \ - __asm mov eax,a \ - __asm imul b \ - __asm mov ebx,eax \ - __asm mov eax,d \ - __asm mov ecx,edx \ - __asm imul c \ - __asm add ebx,eax \ - __asm mov eax,e \ - __asm adc ecx,edx \ - __asm imul f \ - __asm add eax,ebx \ - __asm adc edx,ecx \ - __asm shrd eax,edx,s \ - } - -MAKECONSTTMulScale(1) -MAKECONSTTMulScale(2) -MAKECONSTTMulScale(3) -MAKECONSTTMulScale(4) -MAKECONSTTMulScale(5) -MAKECONSTTMulScale(6) -MAKECONSTTMulScale(7) -MAKECONSTTMulScale(8) -MAKECONSTTMulScale(9) -MAKECONSTTMulScale(10) -MAKECONSTTMulScale(11) -MAKECONSTTMulScale(12) -MAKECONSTTMulScale(13) -MAKECONSTTMulScale(14) -MAKECONSTTMulScale(15) -MAKECONSTTMulScale(16) -MAKECONSTTMulScale(17) -MAKECONSTTMulScale(18) -MAKECONSTTMulScale(19) -MAKECONSTTMulScale(20) -MAKECONSTTMulScale(21) -MAKECONSTTMulScale(22) -MAKECONSTTMulScale(23) -MAKECONSTTMulScale(24) -MAKECONSTTMulScale(25) -MAKECONSTTMulScale(26) -MAKECONSTTMulScale(27) -MAKECONSTTMulScale(28) -MAKECONSTTMulScale(29) -MAKECONSTTMulScale(30) -MAKECONSTTMulScale(31) -#undef MAKECONSTTMulScale - -__forceinline SDWORD TMulScale32 (SDWORD a, SDWORD b, SDWORD c, SDWORD d, SDWORD e, SDWORD f) -{ - __asm mov eax,a - __asm imul b - __asm mov ebx,eax - __asm mov eax,c - __asm mov ecx,edx - __asm imul d - __asm add ebx,eax - __asm mov eax,e - __asm adc ecx,edx - __asm imul f - __asm add eax,ebx - __asm adc edx,ecx - __asm mov eax,edx -} - -__forceinline SDWORD BoundMulScale (SDWORD a, SDWORD b, SDWORD c) -{ - __asm mov eax,a - __asm imul b - __asm mov ebx,edx - __asm mov ecx,c - __asm shrd eax,edx,cl - __asm sar edx,cl - __asm xor edx,eax - __asm js checkit - __asm xor edx,eax - __asm jz skipboundit - __asm cmp edx,0xffffffff - __asm je skipboundit -checkit: - __asm mov eax,ebx - __asm sar eax,31 - __asm xor eax,0x7fffffff -skipboundit: - ; -} - -__forceinline SDWORD DivScale (SDWORD a, SDWORD b, SDWORD c) -{ - __asm mov eax,a - __asm mov ecx,c - __asm shl eax,cl - __asm mov edx,a - __asm neg cl - __asm sar edx,cl - __asm idiv b -} - -__forceinline SDWORD DivScale1 (SDWORD a, SDWORD b) -{ - __asm mov eax,a - __asm add eax,eax - __asm sbb edx,edx - __asm idiv b -} - -#define MAKECONSTDivScale(s) \ - __forceinline SDWORD DivScale##s (SDWORD a, SDWORD b) \ - { \ - __asm mov edx,a \ - __asm sar edx,32-s \ - __asm mov eax,a \ - __asm shl eax,s \ - __asm idiv b \ - } - -MAKECONSTDivScale(2) -MAKECONSTDivScale(3) -MAKECONSTDivScale(4) -MAKECONSTDivScale(5) -MAKECONSTDivScale(6) -MAKECONSTDivScale(7) -MAKECONSTDivScale(8) -MAKECONSTDivScale(9) -MAKECONSTDivScale(10) -MAKECONSTDivScale(11) -MAKECONSTDivScale(12) -MAKECONSTDivScale(13) -MAKECONSTDivScale(14) -MAKECONSTDivScale(15) -MAKECONSTDivScale(16) -MAKECONSTDivScale(17) -MAKECONSTDivScale(18) -MAKECONSTDivScale(19) -MAKECONSTDivScale(20) -MAKECONSTDivScale(21) -MAKECONSTDivScale(22) -MAKECONSTDivScale(23) -MAKECONSTDivScale(24) -MAKECONSTDivScale(25) -MAKECONSTDivScale(26) -MAKECONSTDivScale(27) -MAKECONSTDivScale(28) -MAKECONSTDivScale(29) -MAKECONSTDivScale(30) -MAKECONSTDivScale(31) -#undef MAKECONSTDivScale - -__forceinline SDWORD DivScale32 (SDWORD a, SDWORD b) -{ - __asm mov edx,a - __asm xor eax,eax - __asm idiv b -} - -__forceinline void clearbuf (void *buff, unsigned int count, SDWORD clear) -{ - SDWORD *b2 = (SDWORD *)buff; - for (unsigned int i = 0; i != count; ++i) - { - b2[i] = clear; - } -} - -__forceinline void clearbufshort (void *buff, unsigned int count, WORD clear) -{ - SWORD *b2 = (SWORD *)buff; - for (unsigned int i = 0; i != count; ++i) - { - b2[i] = clear; - } -} - -__forceinline SDWORD ksgn (SDWORD a) -{ - __asm mov edx,a - __asm add edx,edx - __asm sbb eax,eax - __asm cmp eax,edx - __asm adc eax,0 -} - -#pragma warning (default: 4035) diff --git a/src/vizdoom/src/nodebuild.cpp b/src/vizdoom/src/nodebuild.cpp index 210f0f3de..c95db868d 100644 --- a/src/vizdoom/src/nodebuild.cpp +++ b/src/vizdoom/src/nodebuild.cpp @@ -1071,95 +1071,3 @@ void FNodeBuilder::PrintSet (int l, DWORD set) } Printf (PRINT_LOG, "*\n"); } - - - -#ifdef BACKPATCH -#ifdef _WIN32 -extern "C" { -__declspec(dllimport) int __stdcall VirtualProtect(void *, unsigned long, unsigned long, unsigned long *); -} -#define PAGE_EXECUTE_READWRITE 64 -#else -#include -#include -#include -#endif - -#ifdef __GNUC__ -extern "C" int ClassifyLineBackpatch (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2]) -#else -static int *CallerOffset; -int ClassifyLineBackpatchC (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2]) -#endif -{ - // Select the routine based on SSE2 availability and patch the caller so that - // they call that routine directly next time instead of going through here. - int *calleroffset; - int diff; - int (*func)(node_t &, const FSimpleVert *, const FSimpleVert *, int[2]); - -#ifdef __GNUC__ - calleroffset = (int *)__builtin_return_address(0); -#else - calleroffset = CallerOffset; -#endif -// printf ("Patching for SSE %d @ %p %d\n", SSELevel, calleroffset, *calleroffset); - -#ifndef DISABLE_SSE - if (CPU.bSSE2) - { - func = ClassifyLineSSE2; - diff = int((char *)ClassifyLineSSE2 - (char *)calleroffset); - } - else -#endif - { - func = ClassifyLine2; - diff = int((char *)ClassifyLine2 - (char *)calleroffset); - } - - calleroffset--; - // Patch the caller. -#ifdef _WIN32 - unsigned long oldprotect; - if (VirtualProtect (calleroffset, 4, PAGE_EXECUTE_READWRITE, &oldprotect)) -#else - // must make this page-aligned for mprotect - long pagesize = sysconf(_SC_PAGESIZE); - char *callerpage = (char *)((intptr_t)calleroffset & ~(pagesize - 1)); - size_t protectlen = (intptr_t)calleroffset + sizeof(void*) - (intptr_t)callerpage; - int ptect; - if (!(ptect = mprotect(callerpage, protectlen, PROT_READ|PROT_WRITE|PROT_EXEC))) -#endif - { - *calleroffset = diff; -#ifdef _WIN32 - VirtualProtect (calleroffset, sizeof(void*), oldprotect, &oldprotect); -#else - mprotect(callerpage, protectlen, PROT_READ|PROT_EXEC); -#endif - } - - // And return by calling the real function. - return func (node, v1, v2, sidev); -} - -#ifndef __GNUC__ -// The ClassifyLineBackpatch() function here is a stub that uses inline assembly and nakedness -// to retrieve the return address of the stack before sending control to the real -// ClassifyLineBackpatchC() function. Since BACKPATCH shouldn't be defined on 64-bit builds, -// we're okay that VC++ can't do inline assembly on that target. - -extern "C" __declspec(noinline) __declspec(naked) int ClassifyLineBackpatch (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2]) -{ - // We store the return address in a global, so as not to need to mess with the parameter list. - __asm - { - mov eax, [esp] - mov CallerOffset, eax - jmp ClassifyLineBackpatchC - } -} -#endif -#endif diff --git a/src/vizdoom/src/nodebuild.h b/src/vizdoom/src/nodebuild.h index ce68c4682..82145f29c 100644 --- a/src/vizdoom/src/nodebuild.h +++ b/src/vizdoom/src/nodebuild.h @@ -56,17 +56,6 @@ struct FSimpleVert extern "C" { int ClassifyLine2 (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2]); -#ifndef DISABLE_SSE - int ClassifyLineSSE1 (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2]); - int ClassifyLineSSE2 (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2]); -#ifdef BACKPATCH -#ifdef __GNUC__ - int ClassifyLineBackpatch (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2]) __attribute__((noinline)); -#else - int __declspec(noinline) ClassifyLineBackpatch (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2]); -#endif -#endif -#endif } class FNodeBuilder @@ -344,25 +333,5 @@ inline int FNodeBuilder::PointOnSide (int x, int y, int x1, int y1, int dx, int inline int FNodeBuilder::ClassifyLine (node_t &node, const FPrivVert *v1, const FPrivVert *v2, int sidev[2]) { -#ifdef DISABLE_SSE return ClassifyLine2 (node, v1, v2, sidev); -#else -#if defined(__SSE2__) || defined(_M_X64) - // If compiling with SSE2 support everywhere, just use the SSE2 version. - return ClassifyLineSSE2 (node, v1, v2, sidev); -#elif defined(_MSC_VER) && _MSC_VER < 1300 - // VC 6 does not support SSE optimizations. - return ClassifyLine2 (node, v1, v2, sidev); -#else - // Select the routine based on our flag. -#ifdef BACKPATCH - return ClassifyLineBackpatch (node, v1, v2, sidev); -#else - if (CPU.bSSE2) - return ClassifyLineSSE2 (node, v1, v2, sidev); - else - return ClassifyLine2 (node, v1, v2, sidev); -#endif -#endif -#endif } diff --git a/src/vizdoom/src/nodebuild_classify_sse2.cpp b/src/vizdoom/src/nodebuild_classify_sse2.cpp deleted file mode 100644 index 01c469093..000000000 --- a/src/vizdoom/src/nodebuild_classify_sse2.cpp +++ /dev/null @@ -1,144 +0,0 @@ -#ifndef DISABLE_SSE - -#include "doomtype.h" -#include "nodebuild.h" - -#define FAR_ENOUGH 17179869184.f // 4<<32 - -// You may notice that this function is identical to ClassifyLine2. -// The reason it is SSE2 is because this file is explicitly compiled -// with SSE2 math enabled, but the other files are not. - -extern "C" int ClassifyLineSSE2 (node_t &node, const FSimpleVert *v1, const FSimpleVert *v2, int sidev[2]) -{ - double d_x1 = double(node.x); - double d_y1 = double(node.y); - double d_dx = double(node.dx); - double d_dy = double(node.dy); - double d_xv1 = double(v1->x); - double d_xv2 = double(v2->x); - double d_yv1 = double(v1->y); - double d_yv2 = double(v2->y); - - double s_num1 = (d_y1 - d_yv1) * d_dx - (d_x1 - d_xv1) * d_dy; - double s_num2 = (d_y1 - d_yv2) * d_dx - (d_x1 - d_xv2) * d_dy; - - int nears = 0; - - if (s_num1 <= -FAR_ENOUGH) - { - if (s_num2 <= -FAR_ENOUGH) - { - sidev[0] = sidev[1] = 1; - return 1; - } - if (s_num2 >= FAR_ENOUGH) - { - sidev[0] = 1; - sidev[1] = -1; - return -1; - } - nears = 1; - } - else if (s_num1 >= FAR_ENOUGH) - { - if (s_num2 >= FAR_ENOUGH) - { - sidev[0] = sidev[1] = -1; - return 0; - } - if (s_num2 <= -FAR_ENOUGH) - { - sidev[0] = -1; - sidev[1] = 1; - return -1; - } - nears = 1; - } - else - { - nears = 2 | int(fabs(s_num2) < FAR_ENOUGH); - } - - if (nears) - { - double l = 1.f / (d_dx*d_dx + d_dy*d_dy); - if (nears & 2) - { - double dist = s_num1 * s_num1 * l; - if (dist < SIDE_EPSILON*SIDE_EPSILON) - { - sidev[0] = 0; - } - else - { - sidev[0] = s_num1 > 0.0 ? -1 : 1; - } - } - else - { - sidev[0] = s_num1 > 0.0 ? -1 : 1; - } - if (nears & 1) - { - double dist = s_num2 * s_num2 * l; - if (dist < SIDE_EPSILON*SIDE_EPSILON) - { - sidev[1] = 0; - } - else - { - sidev[1] = s_num2 > 0.0 ? -1 : 1; - } - } - else - { - sidev[1] = s_num2 > 0.0 ? -1 : 1; - } - } - else - { - sidev[0] = s_num1 > 0.0 ? -1 : 1; - sidev[1] = s_num2 > 0.0 ? -1 : 1; - } - - if ((sidev[0] | sidev[1]) == 0) - { // seg is coplanar with the splitter, so use its orientation to determine - // which child it ends up in. If it faces the same direction as the splitter, - // it goes in front. Otherwise, it goes in back. - - if (node.dx != 0) - { - if ((node.dx > 0 && v2->x > v1->x) || (node.dx < 0 && v2->x < v1->x)) - { - return 0; - } - else - { - return 1; - } - } - else - { - if ((node.dy > 0 && v2->y > v1->y) || (node.dy < 0 && v2->y < v1->y)) - { - return 0; - } - else - { - return 1; - } - } - } - else if (sidev[0] <= 0 && sidev[1] <= 0) - { - return 0; - } - else if (sidev[0] >= 0 && sidev[1] >= 0) - { - return 1; - } - return -1; -} - -#endif diff --git a/src/vizdoom/src/posix/sdl/i_main.cpp b/src/vizdoom/src/posix/sdl/i_main.cpp index a5cdd8018..569a9f02b 100644 --- a/src/vizdoom/src/posix/sdl/i_main.cpp +++ b/src/vizdoom/src/posix/sdl/i_main.cpp @@ -44,10 +44,6 @@ #include #endif #include -#if defined(__MACH__) && !defined(NOASM) -#include -#include -#endif #include "doomerrors.h" #include "m_argv.h" @@ -196,46 +192,6 @@ static int DoomSpecificInfo (char *buffer, char *end) return p; } -#if defined(__MACH__) && !defined(NOASM) -// NASM won't let us create custom sections for Mach-O. Whether that's a limitation of NASM -// or of Mach-O, I don't know, but since we're using NASM for the assembly, it doesn't much -// matter. -extern "C" -{ - extern void *rtext_a_start, *rtext_a_end; - extern void *rtext_tmap_start, *rtext_tmap_end; - extern void *rtext_tmap2_start, *rtext_tmap2_end; - extern void *rtext_tmap3_start, *rtext_tmap3_end; -}; - -static void unprotect_pages(long pagesize, void *start, void *end) -{ - char *page = (char *)((intptr_t)start & ~(pagesize - 1)); - size_t len = (char *)end - (char *)start; - if (mprotect(page, len, PROT_READ|PROT_WRITE|PROT_EXEC) != 0) - { - fprintf(stderr, "mprotect failed\n"); - exit(1); - } -} - -static void unprotect_rtext() -{ - static void *const pages[] = - { - rtext_a_start, rtext_a_end, - rtext_tmap_start, rtext_tmap_end, - rtext_tmap2_start, rtext_tmap2_end, - rtext_tmap3_start, rtext_tmap3_end - }; - long pagesize = sysconf(_SC_PAGESIZE); - for (void *const *p = pages; p < &pages[countof(pages)]; p += 2) - { - unprotect_pages(pagesize, p[0], p[1]); - } -} -#endif - void I_StartupJoysticks(); void I_ShutdownJoysticks(); @@ -267,10 +223,6 @@ int main (int argc, char **argv) seteuid (getuid ()); std::set_new_handler (NewFailure); -#if defined(__MACH__) && !defined(NOASM) - unprotect_rtext(); -#endif - // Set LC_NUMERIC environment variable in case some library decides to // clear the setlocale call at least this will be correct. // Note that the LANG environment variable is overridden by LC_* diff --git a/src/vizdoom/src/r_draw.cpp b/src/vizdoom/src/r_draw.cpp index ac85da97f..806324928 100644 --- a/src/vizdoom/src/r_draw.cpp +++ b/src/vizdoom/src/r_draw.cpp @@ -175,7 +175,6 @@ void R_InitShadeMaps() /* */ /************************************/ -#ifndef X86_ASM // // A column is a vertical slice/span from a wall texture that, // given the DOOM style restrictions on the view orientation, @@ -245,7 +244,6 @@ void R_DrawColumnP_C (void) } while (--count); } } -#endif // [RH] Just fills a column with a color void R_FillColumnP (void) @@ -437,7 +435,6 @@ void R_InitFuzzTable (int fuzzoff) } } -#ifndef X86_ASM // // Creates a fuzzy image by copying pixels from adjacent ones above and below. // Used with an all black colormap, this could create the SHADOW effect, @@ -513,7 +510,6 @@ void R_DrawFuzzColumnP_C (void) fuzzpos = fuzz; } } -#endif // // R_DrawTranlucentColumn @@ -1006,12 +1002,6 @@ const BYTE* ds_source; // just for profiling int dscount; -#ifdef X86_ASM -extern "C" void R_SetSpanSource_ASM (const BYTE *flat); -extern "C" void STACK_ARGS R_SetSpanSize_ASM (int xbits, int ybits); -extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); -extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; -#endif } //========================================================================== @@ -1025,12 +1015,6 @@ extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; void R_SetSpanSource(const BYTE *pixels) { ds_source = pixels; -#ifdef X86_ASM - if (ds_cursource != ds_source) - { - R_SetSpanSource_ASM(pixels); - } -#endif } //========================================================================== @@ -1044,12 +1028,6 @@ void R_SetSpanSource(const BYTE *pixels) void R_SetSpanColormap(BYTE *colormap) { ds_colormap = colormap; -#ifdef X86_ASM - if (ds_colormap != ds_curcolormap) - { - R_SetSpanColormap_ASM (ds_colormap); - } -#endif } //========================================================================== @@ -1073,15 +1051,11 @@ void R_SetupSpanBits(FTexture *tex) { ds_ybits--; } -#ifdef X86_ASM - R_SetSpanSize_ASM (ds_xbits, ds_ybits); -#endif } // // Draws the actual span. //VIZDOOM_CODE -#ifndef X86_ASM void R_DrawSpanP_C (void) { dsfixed_t xfrac; @@ -1218,7 +1192,6 @@ void R_DrawSpanMaskedP_C (void) } while (--count); } } -#endif void R_DrawSpanTranslucentP_C (void) { @@ -1514,7 +1487,6 @@ void R_FillSpan (void) // Actually, this is just R_DrawColumn with an extra width parameter. -#ifndef X86_ASM static const BYTE *slabcolormap; extern "C" void R_SetupDrawSlabC(const BYTE *colormap) @@ -1594,7 +1566,6 @@ extern "C" void STACK_ARGS R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, co dy--; } } -#endif /****************************************************/ @@ -1602,21 +1573,14 @@ extern "C" void STACK_ARGS R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, co // wallscan stuff, in C -#ifndef X86_ASM static DWORD STACK_ARGS vlinec1 (); static int vlinebits; DWORD (STACK_ARGS *dovline1)() = vlinec1; DWORD (STACK_ARGS *doprevline1)() = vlinec1; -#ifdef X64_ASM -extern "C" void vlinetallasm4(); -#define dovline4 vlinetallasm4 -extern "C" void setupvlinetallasm (int); -#else static void STACK_ARGS vlinec4 (); void (STACK_ARGS *dovline4)() = vlinec4; -#endif static DWORD STACK_ARGS mvlinec1(); static void STACK_ARGS mvlinec4(); @@ -1625,70 +1589,11 @@ static int mvlinebits; DWORD (STACK_ARGS *domvline1)() = mvlinec1; void (STACK_ARGS *domvline4)() = mvlinec4; -#else - -extern "C" -{ -DWORD STACK_ARGS vlineasm1 (); -DWORD STACK_ARGS prevlineasm1 (); -DWORD STACK_ARGS vlinetallasm1 (); -DWORD STACK_ARGS prevlinetallasm1 (); -void STACK_ARGS vlineasm4 (); -void STACK_ARGS vlinetallasmathlon4 (); -void STACK_ARGS vlinetallasm4 (); -void STACK_ARGS setupvlineasm (int); -void STACK_ARGS setupvlinetallasm (int); - -DWORD STACK_ARGS mvlineasm1(); -void STACK_ARGS mvlineasm4(); -void STACK_ARGS setupmvlineasm (int); -} - -DWORD (STACK_ARGS *dovline1)() = vlinetallasm1; -DWORD (STACK_ARGS *doprevline1)() = prevlinetallasm1; -void (STACK_ARGS *dovline4)() = vlinetallasm4; - -DWORD (STACK_ARGS *domvline1)() = mvlineasm1; -void (STACK_ARGS *domvline4)() = mvlineasm4; -#endif - void setupvline (int fracbits) { -#ifdef X86_ASM - if (CPU.Family <= 5) - { - if (fracbits >= 24) - { - setupvlineasm (fracbits); - dovline4 = vlineasm4; - dovline1 = vlineasm1; - doprevline1 = prevlineasm1; - } - else - { - setupvlinetallasm (fracbits); - dovline1 = vlinetallasm1; - doprevline1 = prevlinetallasm1; - dovline4 = vlinetallasm4; - } - } - else - { - setupvlinetallasm (fracbits); - if (CPU.bIsAMD && CPU.AMDFamily >= 7) - { - dovline4 = vlinetallasmathlon4; - } - } -#else vlinebits = fracbits; -#ifdef X64_ASM - setupvlinetallasm(fracbits); -#endif -#endif } //VIZDOOM_CODE -#if !defined(X86_ASM) DWORD STACK_ARGS vlinec1 () { DWORD fracstep = dc_iscale; @@ -1727,20 +1632,12 @@ void STACK_ARGS vlinec4 () dest += dc_pitch; } while (--count); } -#endif void setupmvline (int fracbits) { -#if defined(X86_ASM) - setupmvlineasm (fracbits); - domvline1 = mvlineasm1; - domvline4 = mvlineasm4; -#else mvlinebits = fracbits; -#endif } -#if !defined(X86_ASM) DWORD STACK_ARGS mvlinec1 () { DWORD fracstep = dc_iscale; @@ -1784,7 +1681,6 @@ void STACK_ARGS mvlinec4 () dest += dc_pitch; } while (--count); } -#endif extern "C" short spanend[MAXHEIGHT]; extern fixed_t rw_light; @@ -2198,23 +2094,6 @@ const BYTE *R_GetColumn (FTexture *tex, int col) // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () { -#ifdef X86_ASM - R_DrawColumn = R_DrawColumnP_ASM; - R_DrawColumnHoriz = R_DrawColumnHorizP_ASM; - R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; - R_DrawShadedColumn = R_DrawShadedColumnP_C; - R_DrawSpan = R_DrawSpanP_ASM; - R_DrawSpanMasked = R_DrawSpanMaskedP_ASM; - if (CPU.Family <= 5) - { - rt_map4cols = rt_map4cols_asm2; - } - else - { - rt_map4cols = rt_map4cols_asm1; - } -#else R_DrawColumnHoriz = R_DrawColumnHorizP_C; R_DrawColumn = R_DrawColumnP_C; R_DrawFuzzColumn = R_DrawFuzzColumnP_C; @@ -2223,7 +2102,7 @@ void R_InitColumnDrawers () R_DrawSpan = R_DrawSpanP_C; R_DrawSpanMasked = R_DrawSpanMaskedP_C; rt_map4cols = rt_map4cols_c; -#endif + R_DrawSpanTranslucent = R_DrawSpanTranslucentP_C; R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C; R_DrawSpanAddClamp = R_DrawSpanAddClampP_C; diff --git a/src/vizdoom/src/r_draw.h b/src/vizdoom/src/r_draw.h index c8a7f2d47..ff81712b5 100644 --- a/src/vizdoom/src/r_draw.h +++ b/src/vizdoom/src/r_draw.h @@ -67,12 +67,7 @@ extern void (*R_DrawColumn)(void); extern DWORD (STACK_ARGS *dovline1) (); extern DWORD (STACK_ARGS *doprevline1) (); -#ifdef X64_ASM -#define dovline4 vlinetallasm4 -extern "C" void vlinetallasm4(); -#else extern void (STACK_ARGS *dovline4) (); -#endif extern void setupvline (int); extern DWORD (STACK_ARGS *domvline1) (); @@ -165,21 +160,12 @@ void STACK_ARGS rt_addclamp4cols_asm (int sx, int yl, int yh); extern void (STACK_ARGS *rt_map4cols)(int sx, int yl, int yh); -#ifdef X86_ASM -#define rt_copy1col rt_copy1col_asm -#define rt_copy4cols rt_copy4cols_asm -#define rt_map1col rt_map1col_asm -#define rt_shaded4cols rt_shaded4cols_asm -#define rt_add4cols rt_add4cols_asm -#define rt_addclamp4cols rt_addclamp4cols_asm -#else #define rt_copy1col rt_copy1col_c #define rt_copy4cols rt_copy4cols_c #define rt_map1col rt_map1col_c #define rt_shaded4cols rt_shaded4cols_c #define rt_add4cols rt_add4cols_c #define rt_addclamp4cols rt_addclamp4cols_c -#endif void rt_draw4cols (int sx); @@ -188,20 +174,6 @@ void rt_initcols (BYTE *buffer=NULL); void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip); - -#ifdef X86_ASM - -extern "C" void R_DrawColumnP_Unrolled (void); -extern "C" void R_DrawColumnHorizP_ASM (void); -extern "C" void R_DrawColumnP_ASM (void); -extern "C" void R_DrawFuzzColumnP_ASM (void); - void R_DrawTranslatedColumnP_C (void); - void R_DrawShadedColumnP_C (void); -extern "C" void R_DrawSpanP_ASM (void); -extern "C" void R_DrawSpanMaskedP_ASM (void); - -#else - void R_DrawColumnHorizP_C (void); void R_DrawColumnP_C (void); void R_DrawFuzzColumnP_C (void); @@ -210,8 +182,6 @@ void R_DrawShadedColumnP_C (void); void R_DrawSpanP_C (void); void R_DrawSpanMaskedP_C (void); -#endif - void R_DrawSpanTranslucentP_C (void); void R_DrawSpanMaskedTranslucentP_C (void); @@ -222,13 +192,8 @@ void R_FillColumnP (void); void R_FillColumnHorizP (void); void R_FillSpan (void); -#ifdef X86_ASM -#define R_SetupDrawSlab R_SetupDrawSlabA -#define R_DrawSlab R_DrawSlabA -#else #define R_SetupDrawSlab R_SetupDrawSlabC #define R_DrawSlab R_DrawSlabC -#endif extern "C" void R_SetupDrawSlab(const BYTE *colormap); extern "C" void STACK_ARGS R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); diff --git a/src/vizdoom/src/r_drawt.cpp b/src/vizdoom/src/r_drawt.cpp index 604855078..ef0219c9d 100644 --- a/src/vizdoom/src/r_drawt.cpp +++ b/src/vizdoom/src/r_drawt.cpp @@ -67,13 +67,6 @@ unsigned int dc_tspans[4][MAXHEIGHT]; unsigned int *dc_ctspan[4]; unsigned int *horizspan[4]; -#ifdef X86_ASM -extern "C" void R_SetupShadedCol(); -extern "C" void R_SetupAddCol(); -extern "C" void R_SetupAddClampCol(); -#endif - -#ifndef X86_ASM // Copies one span at hx to the screen at sx. void rt_copy1col_c (int hx, int sx, int yl, int yh) { @@ -264,7 +257,6 @@ void STACK_ARGS rt_map4cols_c (int sx, int yl, int yh) y_mod += 2; } while (--count); } -#endif void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) { @@ -915,22 +907,6 @@ void rt_draw4cols (int sx) dc_ctspan[x][1] = screen->GetHeight(); } -#ifdef X86_ASM - // Setup assembly routines for changed colormaps or other parameters. - if (hcolfunc_post4 == rt_shaded4cols) - { - R_SetupShadedCol(); - } - else if (hcolfunc_post4 == rt_addclamp4cols || hcolfunc_post4 == rt_tlateaddclamp4cols) - { - R_SetupAddClampCol(); - } - else if (hcolfunc_post4 == rt_add4cols || hcolfunc_post4 == rt_tlateadd4cols) - { - R_SetupAddCol(); - } -#endif - for (;;) { // If a column is out of spans, mark it as such diff --git a/src/vizdoom/src/r_drawt_copy.cpp b/src/vizdoom/src/r_drawt_copy.cpp deleted file mode 100644 index 575678cf4..000000000 --- a/src/vizdoom/src/r_drawt_copy.cpp +++ /dev/null @@ -1,1372 +0,0 @@ -/* -** r_drawt.cpp -** Faster column drawers for modern processors -** -**--------------------------------------------------------------------------- -** Copyright 1998-2006 Randy Heit -** All rights reserved. -** -** Redistribution and use in source and binary forms, with or without -** modification, are permitted provided that the following conditions -** are met: -** -** 1. Redistributions of source code must retain the above copyright -** notice, this list of conditions and the following disclaimer. -** 2. Redistributions in binary form must reproduce the above copyright -** notice, this list of conditions and the following disclaimer in the -** documentation and/or other materials provided with the distribution. -** 3. The name of the author may not be used to endorse or promote products -** derived from this software without specific prior written permission. -** -** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**--------------------------------------------------------------------------- -** -** These functions stretch columns into a temporary buffer and then -** map them to the screen. On modern machines, this is faster than drawing -** them directly to the screen. -** -** Will I be able to even understand any of this if I come back to it later? -** Let's hope so. :-) -*/ - -#include "templates.h" -#include "doomtype.h" -#include "doomdef.h" -#include "r_defs.h" -#include "r_draw.h" -#include "r_main.h" -#include "r_things.h" -#include "v_video.h" - -//VIZDOOM_CODE -#include "viz_depth.h" -#include "viz_labels.h" - -// I should have commented this stuff better. -// -// dc_temp is the buffer R_DrawColumnHoriz writes into. -// dc_tspans points into it. -// dc_ctspan points into dc_tspans. -// horizspan also points into dc_tspans. - -// dc_ctspan is advanced while drawing into dc_temp. -// horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen. - -BYTE dc_tempbuff[MAXHEIGHT*4]; -BYTE *dc_temp; -unsigned int dc_tspans[4][MAXHEIGHT]; -unsigned int *dc_ctspan[4]; -unsigned int *horizspan[4]; - -#ifdef X86_ASM -extern "C" void R_SetupShadedCol(); -extern "C" void R_SetupAddCol(); -extern "C" void R_SetupAddClampCol(); -#endif - -#ifndef X86_ASM -// Copies one span at hx to the screen at sx. -void rt_copy1col_c (int hx, int sx, int yl, int yh) -{ - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - - if (count & 1) { - *dest = *source; - source += 4; - dest += pitch; - } - if (count & 2) { - dest[0] = source[0]; - dest[pitch] = source[4]; - source += 8; - dest += pitch*2; - } - if (!(count >>= 2)) - return; - - do { - dest[0] = source[0]; - dest[pitch] = source[4]; - dest[pitch*2] = source[8]; - dest[pitch*3] = source[12]; - source += 16; - dest += pitch*4; - } while (--count); - - if(vizDepthMap!=NULL) { - for(int y = yl; y <= yh; ++y) - vizDepthMap->setPoint(sx, y); - } - - if(vizLabels!=NULL) { - for(int y = yl; y <= yh; ++y) - vizLabels->setPoint(sx, y); - } -} - -// Copies all four spans to the screen starting at sx. -void STACK_ARGS rt_copy4cols_c (int sx, int yl, int yh) -{ - int *source; - int *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = (int *)(ylookup[yl] + sx + dc_destorg); - source = (int *)(&dc_temp[yl*4]); - pitch = dc_pitch/sizeof(int); - - if (count & 1) { - *dest = *source; - source += 4/sizeof(int); - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = source[0]; - dest[pitch] = source[4/sizeof(int)]; - source += 8/sizeof(int); - dest += pitch*2; - - if(vizDepthMap!=NULL) { - vizDepthMap->setPoint((unsigned int) sx, (unsigned int) yh - count); - } - - if(vizLabels!=NULL) { - vizLabels->setPoint((unsigned int) sx, (unsigned int) yh - count); - } - } while (--count); -} - -// Maps one span at hx to the screen at sx. -void rt_map1col_c (int hx, int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - count = yh-yl; - if (count < 0) - return; - count++; - - colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - - if (count & 1) { - *dest = colormap[*source]; - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = colormap[source[0]]; - dest[pitch] = colormap[source[4]]; - source += 8; - dest += pitch*2; - } while (--count); - - if(vizDepthMap!=NULL) { - for(int y = yl; y <= yh; ++y) - vizDepthMap->setPoint(sx, y); - } - - if(vizLabels!=NULL) { - for(int y = yl; y <= yh; ++y) - vizLabels->setPoint(sx, y); - } -} - -//VIZDOOM_CODE -// Maps all four spans to the screen starting at sx. -void STACK_ARGS rt_map4cols_c (int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - int y_mod=0; - if (count & 1) { - dest[0] = colormap[source[0]]; - dest[1] = colormap[source[1]]; - dest[2] = colormap[source[2]]; - dest[3] = colormap[source[3]]; - source += 4; - dest += pitch; - - //VIZDOOM_CODE - if(vizDepthMap!=NULL) { - for (int dx = 0; dx < 4; dx++) { - vizDepthMap->setActualDepth(vizDepthMap->helperBuffer[dx]); - vizDepthMap->setPoint((unsigned int) sx + dx, (unsigned int) yl + y_mod); - } - } - if(vizLabels!=NULL) { - for (int dx = 0; dx < 4; dx++) { - vizLabels->setPoint((unsigned int) sx + dx, (unsigned int) yl + y_mod); - } - } - y_mod+=1; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = colormap[source[0]]; - dest[1] = colormap[source[1]]; - dest[2] = colormap[source[2]]; - dest[3] = colormap[source[3]]; - dest[pitch] = colormap[source[4]]; - dest[pitch+1] = colormap[source[5]]; - dest[pitch+2] = colormap[source[6]]; - dest[pitch+3] = colormap[source[7]]; - source += 8; - dest += pitch*2; - - //VIZDOOM_CODE - if(vizDepthMap != NULL) { - for (int dx = 0; dx < 4; dx++) { - vizDepthMap->setActualDepth(vizDepthMap->helperBuffer[dx]); - for (int dy = 0; dy < 2; dy++) - vizDepthMap->setPoint((unsigned int) sx + dx, (unsigned int) yl + y_mod + dy); - } - } - - if(vizLabels != NULL) { - for (int dx = 0; dx < 4; dx++) { - for (int dy = 0; dy < 2; dy++) - vizLabels->setPoint((unsigned int) sx + dx, (unsigned int) yl + y_mod + dy); - } - } - - y_mod += 2; - } while (--count); -} -#endif - -void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) -{ - int count = yh - yl + 1; - BYTE *source = &dc_temp[yl*4 + hx]; - - // Things we do to hit the compiler's optimizer with a clue bat: - // 1. Parallelism is explicitly spelled out by using a separate - // C instruction for each assembly instruction. GCC lets me - // have four temporaries, but VC++ spills to the stack with - // more than two. Two is probably optimal, anyway. - // 2. The results of the translation lookups are explicitly - // stored in byte-sized variables. This causes the VC++ code - // to use byte mov instructions in most cases; for apparently - // random reasons, it will use movzx for some places. GCC - // ignores this and uses movzx always. - - // Do 8 rows at a time. - for (int count8 = count >> 3; count8; --count8) - { - int c0, c1; - BYTE b0, b1; - - c0 = source[0]; c1 = source[4]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[4] = b1; - - c0 = source[8]; c1 = source[12]; - b0 = translation[c0]; b1 = translation[c1]; - source[8] = b0; source[12] = b1; - - c0 = source[16]; c1 = source[20]; - b0 = translation[c0]; b1 = translation[c1]; - source[16] = b0; source[20] = b1; - - c0 = source[24]; c1 = source[28]; - b0 = translation[c0]; b1 = translation[c1]; - source[24] = b0; source[28] = b1; - - source += 32; - } - // Finish by doing 1 row at a time. - for (count &= 7; count; --count, source += 4) - { - source[0] = translation[source[0]]; - } -} - -void rt_Translate4cols(const BYTE *translation, int yl, int yh) -{ - int count = yh - yl + 1; - BYTE *source = &dc_temp[yl*4]; - int c0, c1; - BYTE b0, b1; - - // Do 2 rows at a time. - for (int count8 = count >> 1; count8; --count8) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - - c0 = source[4]; c1 = source[5]; - b0 = translation[c0]; b1 = translation[c1]; - source[4] = b0; source[5] = b1; - - c0 = source[6]; c1 = source[7]; - b0 = translation[c0]; b1 = translation[c1]; - source[6] = b0; source[7] = b1; - - source += 8; - } - // Do the final row if count was odd. - if (count & 1) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - } -} - -// Translates one span at hx to the screen at sx. -void rt_tlate1col (int hx, int sx, int yl, int yh) -{ - rt_Translate1col(dc_translation, hx, yl, yh); - rt_map1col(hx, sx, yl, yh); -} - -// Translates all four spans to the screen starting at sx. -void STACK_ARGS rt_tlate4cols (int sx, int yl, int yh) -{ - rt_Translate4cols(dc_translation, yl, yh); - rt_map4cols(sx, yl, yh); -} - -// Adds one span at hx to the screen at sx without clamping. -void rt_add1col (int hx, int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD fg = colormap[*source]; - DWORD bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - source += 4; - dest += pitch; - } while (--count); - - if(vizDepthMap!=NULL) { - for(int y = yl; y <= yh; ++y) - vizDepthMap->setPoint(sx, y); - } - - if(vizLabels!=NULL) { - for(int y = yl; y <= yh; ++y) - vizLabels->setPoint(sx, y); - } -} - -// Adds all four spans to the screen starting at sx without clamping. -void STACK_ARGS rt_add4cols_c (int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD fg = colormap[source[0]]; - DWORD bg = dest[0]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[0] = RGB32k.All[fg & (fg>>15)]; - - fg = colormap[source[1]]; - bg = dest[1]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[1] = RGB32k.All[fg & (fg>>15)]; - - - fg = colormap[source[2]]; - bg = dest[2]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[2] = RGB32k.All[fg & (fg>>15)]; - - fg = colormap[source[3]]; - bg = dest[3]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[3] = RGB32k.All[fg & (fg>>15)]; - - source += 4; - dest += pitch; - - if(vizDepthMap!=NULL) { - vizDepthMap->setPoint((unsigned int) sx, (unsigned int) yh - count); - } - - if(vizLabels!=NULL) { - vizLabels->setPoint((unsigned int) sx, (unsigned int) yh - count); - } - } while (--count); -} - -// Translates and adds one span at hx to the screen at sx without clamping. -void rt_tlateadd1col (int hx, int sx, int yl, int yh) -{ - rt_Translate1col(dc_translation, hx, yl, yh); - rt_add1col(hx, sx, yl, yh); -} - -// Translates and adds all four spans to the screen starting at sx without clamping. -void STACK_ARGS rt_tlateadd4cols (int sx, int yl, int yh) -{ - rt_Translate4cols(dc_translation, yl, yh); - rt_add4cols(sx, yl, yh); -} - -// Shades one span at hx to the screen at sx. -void rt_shaded1col (int hx, int sx, int yl, int yh) -{ - DWORD *fgstart; - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - fgstart = &Col2RGB8[0][dc_color]; - colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - - do { - DWORD val = colormap[*source]; - DWORD fg = fgstart[val<<8]; - val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; - *dest = RGB32k.All[val & (val>>15)]; - source += 4; - dest += pitch; - - if(vizDepthMap!=NULL) { - vizDepthMap->setPoint((unsigned int) sx, (unsigned int) yh - count); - } - - if(vizLabels!=NULL) { - vizLabels->setPoint((unsigned int) sx, (unsigned int) yh - count); - } - } while (--count); -} - -// Shades all four spans to the screen starting at sx. -void STACK_ARGS rt_shaded4cols_c (int sx, int yl, int yh) -{ - DWORD *fgstart; - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - fgstart = &Col2RGB8[0][dc_color]; - colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - - do { - DWORD val; - - val = colormap[source[0]]; - val = (Col2RGB8[64-val][dest[0]] + fgstart[val<<8]) | 0x1f07c1f; - dest[0] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[1]]; - val = (Col2RGB8[64-val][dest[1]] + fgstart[val<<8]) | 0x1f07c1f; - dest[1] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[2]]; - val = (Col2RGB8[64-val][dest[2]] + fgstart[val<<8]) | 0x1f07c1f; - dest[2] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[3]]; - val = (Col2RGB8[64-val][dest[3]] + fgstart[val<<8]) | 0x1f07c1f; - dest[3] = RGB32k.All[val & (val>>15)]; - - source += 4; - dest += pitch; - - if(vizDepthMap!=NULL) { - vizDepthMap->setPoint((unsigned int) sx, (unsigned int) yh - count); - } - - if(vizLabels!=NULL) { - vizLabels->setPoint((unsigned int) sx, (unsigned int) yh - count); - } - } while (--count); -} - -//VIZDOOM_CODE -// Adds one span at hx to the screen at sx with clamping. -void rt_addclamp1col (int hx, int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[(a>>15) & a]; - source += 4; - dest += pitch; - } while (--count); - - if(vizDepthMap!=NULL) { - for(int y = yl; y <= yh; ++y) vizDepthMap->setPoint(sx, y); - } - - if(vizLabels!=NULL) { - for(int y = yl; y <= yh; ++y) vizLabels->setPoint(sx, y); - } -} - -//VIZDOOM_CODE -// Adds all four spans to the screen starting at sx with clamping. -void STACK_ARGS rt_addclamp4cols_c (int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[1]]] + bg2rgb[dest[1]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[2]]] + bg2rgb[dest[2]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[3]]] + bg2rgb[dest[3]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[3] = RGB32k.All[(a>>15) & a]; - source += 4; - dest += pitch; - - if(vizDepthMap!=NULL) { - for (int dx = 0; dx < 4; dx++) { - vizDepthMap->setActualDepth(vizDepthMap->helperBuffer[dx]); - vizDepthMap->setPoint((unsigned int) sx + dx, (unsigned int) yh-count); - } - } - - if(vizLabels!=NULL) { - for (int dx = 0; dx < 4; dx++) { - vizLabels->setPoint((unsigned int) sx + dx, (unsigned int) yh-count); - } - } - } while (--count); -} - -// Translates and adds one span at hx to the screen at sx with clamping. -void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh) -{ - rt_Translate1col(dc_translation, hx, yl, yh); - rt_addclamp1col(hx, sx, yl, yh); -} - -// Translates and adds all four spans to the screen starting at sx with clamping. -void STACK_ARGS rt_tlateaddclamp4cols (int sx, int yl, int yh) -{ - rt_Translate4cols(dc_translation, yl, yh); - rt_addclamp4cols(sx, yl, yh); -} - -// Subtracts one span at hx to the screen at sx with clamping. -void rt_subclamp1col (int hx, int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - source += 4; - dest += pitch; - } while (--count); - - if(vizDepthMap!=NULL) { - for(int y = yl; y <= yh; ++y) vizDepthMap->setPoint(sx, y); - } - - if(vizLabels!=NULL) { - for(int y = yl; y <= yh; ++y) vizLabels->setPoint(sx, y); - } -} - -// Subtracts all four spans to the screen starting at sx with clamping. -void STACK_ARGS rt_subclamp4cols (int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[1]]] | 0x40100400) - bg2rgb[dest[1]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[2]]] | 0x40100400) - bg2rgb[dest[2]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[3]]] | 0x40100400) - bg2rgb[dest[3]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[3] = RGB32k.All[(a>>15) & a]; - - source += 4; - dest += pitch; - - if(vizDepthMap!=NULL) { - for (int dx = 0; dx < 4; dx++) { - vizDepthMap->setActualDepth(vizDepthMap->helperBuffer[dx]); - vizDepthMap->setPoint((unsigned int) sx + dx, (unsigned int) yh-count); - } - } - - if(vizLabels!=NULL) { - for (int dx = 0; dx < 4; dx++) { - vizLabels->setPoint((unsigned int) sx + dx, (unsigned int) yh-count); - } - } - } while (--count); -} - -// Translates and subtracts one span at hx to the screen at sx with clamping. -void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh) -{ - rt_Translate1col(dc_translation, hx, yl, yh); - rt_subclamp1col(hx, sx, yl, yh); -} - -// Translates and subtracts all four spans to the screen starting at sx with clamping. -void STACK_ARGS rt_tlatesubclamp4cols (int sx, int yl, int yh) -{ - rt_Translate4cols(dc_translation, yl, yh); - rt_subclamp4cols(sx, yl, yh); -} - -// Subtracts one span at hx from the screen at sx with clamping. -void rt_revsubclamp1col (int hx, int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4 + hx]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - source += 4; - dest += pitch; - } while (--count); - - if(vizDepthMap!=NULL) { - for(int y = yl; y <= yh; ++y) vizDepthMap->setPoint(sx, y); - } - - if(vizLabels!=NULL) { - for(int y = yl; y <= yh; ++y) vizLabels->setPoint(sx, y); - } -} - -// Subtracts all four spans from the screen starting at sx with clamping. -void STACK_ARGS rt_revsubclamp4cols (int sx, int yl, int yh) -{ - BYTE *colormap; - BYTE *source; - BYTE *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; - source = &dc_temp[yl*4]; - pitch = dc_pitch; - colormap = dc_colormap; - - do { - DWORD a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[1]] | 0x40100400) - fg2rgb[colormap[source[1]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[2]] | 0x40100400) - fg2rgb[colormap[source[2]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[3]] | 0x40100400) - fg2rgb[colormap[source[3]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[3] = RGB32k.All[(a>>15) & a]; - - source += 4; - dest += pitch; - - if(vizDepthMap!=NULL) { - for (int dx = 0; dx < 4; dx++) { - vizDepthMap->setActualDepth(vizDepthMap->helperBuffer[dx]); - vizDepthMap->setPoint((unsigned int) sx + dx, (unsigned int) yh-count); - } - } - - if(vizLabels!=NULL) { - for (int dx = 0; dx < 4; dx++) { - vizLabels->setPoint((unsigned int) sx + dx, (unsigned int) yh-count); - } - } - } while (--count); -} - -// Translates and subtracts one span at hx from the screen at sx with clamping. -void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh) -{ - rt_Translate1col(dc_translation, hx, yl, yh); - rt_revsubclamp1col(hx, sx, yl, yh); -} - -// Translates and subtracts all four spans from the screen starting at sx with clamping. -void STACK_ARGS rt_tlaterevsubclamp4cols (int sx, int yl, int yh) -{ - rt_Translate4cols(dc_translation, yl, yh); - rt_revsubclamp4cols(sx, yl, yh); -} - -// Copies all spans in all four columns to the screen starting at sx. -// sx should be dword-aligned. -//VIZDOOM_CODE -void rt_draw4cols (int sx) -{ - int x, bad; - unsigned int maxtop, minbot, minnexttop; - - // Place a dummy "span" in each column. These don't get - // drawn. They're just here to avoid special cases in the - // max/min calculations below. - for (x = 0; x < 4; ++x) - { - dc_ctspan[x][0] = screen->GetHeight()+1; - dc_ctspan[x][1] = screen->GetHeight(); - } - -#ifdef X86_ASM - // Setup assembly routines for changed colormaps or other parameters. - if (hcolfunc_post4 == rt_shaded4cols) - { - R_SetupShadedCol(); - } - else if (hcolfunc_post4 == rt_addclamp4cols || hcolfunc_post4 == rt_tlateaddclamp4cols) - { - R_SetupAddClampCol(); - } - else if (hcolfunc_post4 == rt_add4cols || hcolfunc_post4 == rt_tlateadd4cols) - { - R_SetupAddCol(); - } -#endif - - for (;;) - { - // If a column is out of spans, mark it as such - bad = 0; - minnexttop = 0xffffffff; - for (x = 0; x < 4; ++x) - { - if (horizspan[x] >= dc_ctspan[x]) - { - bad |= 1 << x; - } - else if ((horizspan[x]+2)[0] < minnexttop) - { - minnexttop = (horizspan[x]+2)[0]; - } - } - // Once all columns are out of spans, we're done - if (bad == 15) - { - return; - } - - // Find the largest shared area for the spans in each column - maxtop = MAX (MAX (horizspan[0][0], horizspan[1][0]), - MAX (horizspan[2][0], horizspan[3][0])); - minbot = MIN (MIN (horizspan[0][1], horizspan[1][1]), - MIN (horizspan[2][1], horizspan[3][1])); - - // If there is no shared area with these spans, draw each span - // individually and advance to the next spans until we reach a shared area. - // However, only draw spans down to the highest span in the next set of - // spans. If we allow the entire height of a span to be drawn, it could - // prevent any more shared areas from being drawn in these four columns. - // - // Example: Suppose we have the following arrangement: - // A CD - // A CD - // B D - // B D - // aB D - // aBcD - // aBcD - // aBc - // - // If we draw the entire height of the spans, we end up drawing this first: - // A CD - // A CD - // B D - // B D - // B D - // B D - // B D - // B D - // B - // - // This leaves only the "a" and "c" columns to be drawn, and they are not - // part of a shared area, but if we can include B and D with them, we can - // get a shared area. So we cut off everything in the first set just - // above the "a" column and end up drawing this first: - // A CD - // A CD - // B D - // B D - // - // Then the next time through, we have the following arrangement with an - // easily shared area to draw: - // aB D - // aBcD - // aBcD - // aBc - if (bad != 0 || maxtop > minbot) - { - int drawcount = 0; - for (x = 0; x < 4; ++x) - { - if (!(bad & 1)) - { - if(vizDepthMap!=NULL) vizDepthMap->setActualDepth(vizDepthMap->helperBuffer[x]); - if (horizspan[x][1] < minnexttop) - { - hcolfunc_post1 (x, sx+x, horizspan[x][0], horizspan[x][1]); - horizspan[x] += 2; - drawcount++; - } - else if (minnexttop > horizspan[x][0]) - { - hcolfunc_post1 (x, sx+x, horizspan[x][0], minnexttop-1); - horizspan[x][0] = minnexttop; - drawcount++; - } - } - bad >>= 1; - } - // Drawcount *should* always be non-zero. The reality is that some situations - // can make this not true. Unfortunately, I'm not sure what those situations are. - if (drawcount == 0) - { - return; - } - continue; - } - - // Draw any span fragments above the shared area. - for (x = 0; x < 4; ++x) - { - if (maxtop > horizspan[x][0]) - { - if(vizDepthMap!=NULL) vizDepthMap->setActualDepth(vizDepthMap->helperBuffer[x]); - hcolfunc_post1 (x, sx+x, horizspan[x][0], maxtop-1);//TU - } - } - - // Draw the shared area. - hcolfunc_post4 (sx, maxtop, minbot); - - // For each column, if part of the span is past the shared area, - // set its top to just below the shared area. Otherwise, advance - // to the next span in that column. - for (x = 0; x < 4; ++x) - { - if (minbot < horizspan[x][1]) - { - horizspan[x][0] = minbot+1; - } - else - { - horizspan[x] += 2; - } - } - } -} - -// Before each pass through a rendering loop that uses these routines, -// call this function to set up the span pointers. -void rt_initcols (BYTE *buff) -{ - int y; - - dc_temp = buff == NULL ? dc_tempbuff : buff; - for (y = 3; y >= 0; y--) - horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; -} - -// Stretches a column into a temporary buffer which is later -// drawn to the screen along with up to three other columns. -void R_DrawColumnHorizP_C (void) -{ - int count = dc_count; - BYTE *dest; - fixed_t fracstep; - fixed_t frac; - - if (count <= 0) - return; - - { - int x = dc_x & 3; - unsigned int **span; - - span = &dc_ctspan[x]; - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - dest = &dc_temp[x + 4*dc_yl]; - } - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - - if (count & 1) { - *dest = source[frac>>FRACBITS]; dest += 4; frac += fracstep; - } - - if (count & 2) { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest += 8; - } - - if (count & 4) { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest[8] = source[frac>>FRACBITS]; frac += fracstep; - dest[12]= source[frac>>FRACBITS]; frac += fracstep; - dest += 16; - } - - count >>= 3; - if (!count) return; - - do - { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest[8] = source[frac>>FRACBITS]; frac += fracstep; - dest[12]= source[frac>>FRACBITS]; frac += fracstep; - dest[16]= source[frac>>FRACBITS]; frac += fracstep; - dest[20]= source[frac>>FRACBITS]; frac += fracstep; - dest[24]= source[frac>>FRACBITS]; frac += fracstep; - dest[28]= source[frac>>FRACBITS]; frac += fracstep; - dest += 32; - - } while (--count); - } -} - -// [RH] Just fills a column with a given color -void R_FillColumnHorizP (void) -{ - int count = dc_count; - BYTE color = dc_color; - BYTE *dest; - - if (count <= 0) - return; - - { - int x = dc_x & 3; - unsigned int **span = &dc_ctspan[x]; - - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - dest = &dc_temp[x + 4*dc_yl]; - } - - if (count & 1) { - *dest = color; - dest += 4; - } - if (!(count >>= 1)) - return; - do { - dest[0] = color; dest[4] = color; - dest += 8; - } while (--count); -} - -// Same as R_DrawMaskedColumn() except that it always uses R_DrawColumnHoriz(). - -void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) -{ - while (span->Length != 0) - { - const int length = span->Length; - const int top = span->TopOffset; - - // calculate unclipped screen coordinates for post - dc_yl = (sprtopscreen + spryscale * top) >> FRACBITS; - dc_yh = (sprtopscreen + spryscale * (top + length) - FRACUNIT) >> FRACBITS; - - if (sprflipvert) - { - swapvalues (dc_yl, dc_yh); - } - - if (dc_yh >= mfloorclip[dc_x]) - { - dc_yh = mfloorclip[dc_x] - 1; - } - if (dc_yl < mceilingclip[dc_x]) - { - dc_yl = mceilingclip[dc_x]; - } - - if (dc_yl <= dc_yh) - { - if (sprflipvert) - { - dc_texturefrac = (dc_yl*dc_iscale) - (top << FRACBITS) - - FixedMul (centeryfrac, dc_iscale) - dc_texturemid; - const fixed_t maxfrac = length << FRACBITS; - while (dc_texturefrac >= maxfrac) - { - if (++dc_yl > dc_yh) - goto nextpost; - dc_texturefrac += dc_iscale; - } - fixed_t endfrac = dc_texturefrac + (dc_yh-dc_yl)*dc_iscale; - while (endfrac < 0) - { - if (--dc_yh < dc_yl) - goto nextpost; - endfrac -= dc_iscale; - } - } - else - { - dc_texturefrac = dc_texturemid - (top << FRACBITS) - + (dc_yl*dc_iscale) - FixedMul (centeryfrac-FRACUNIT, dc_iscale); - while (dc_texturefrac < 0) - { - if (++dc_yl > dc_yh) - goto nextpost; - dc_texturefrac += dc_iscale; - } - fixed_t endfrac = dc_texturefrac + (dc_yh-dc_yl)*dc_iscale; - const fixed_t maxfrac = length << FRACBITS; - if (dc_yh < mfloorclip[dc_x]-1 && endfrac < maxfrac - dc_iscale) - { - dc_yh++; - } - else while (endfrac >= maxfrac) - { - if (--dc_yh < dc_yl) - goto nextpost; - endfrac -= dc_iscale; - } - } - dc_source = column + top; - dc_dest = ylookup[dc_yl] + dc_x + dc_destorg; - dc_count = dc_yh - dc_yl + 1; - hcolfunc_pre (); - - - } -nextpost: - span++; - } - - if (sprflipvert) - { - unsigned int *front = horizspan[dc_x&3]; - unsigned int *back = dc_ctspan[dc_x&3] - 2; - - // Reorder the posts so that they get drawn top-to-bottom - // instead of bottom-to-top. - while (front < back) - { - swapvalues (front[0], back[0]); - swapvalues (front[1], back[1]); - front += 2; - back -= 2; - } - } -} diff --git a/src/vizdoom/src/r_main.cpp b/src/vizdoom/src/r_main.cpp index 48caf00f4..2d2df2ea9 100644 --- a/src/vizdoom/src/r_main.cpp +++ b/src/vizdoom/src/r_main.cpp @@ -723,9 +723,6 @@ void R_SetupBuffer () { dc_pitch = pitch; R_InitFuzzTable (pitch); -#if defined(X86_ASM) || defined(X64_ASM) - ASM_PatchPitch (); -#endif } dc_destorg = lineptr; for (int i = 0; i < RenderTarget->GetHeight(); i++) diff --git a/src/vizdoom/src/r_plane.cpp b/src/vizdoom/src/r_plane.cpp index 850451816..113a2214b 100644 --- a/src/vizdoom/src/r_plane.cpp +++ b/src/vizdoom/src/r_plane.cpp @@ -145,13 +145,6 @@ static fixed_t xscale, yscale; static DWORD xstepscale, ystepscale; static DWORD basexfrac, baseyfrac; -#ifdef X86_ASM -extern "C" void R_SetSpanSource_ASM (const BYTE *flat); -extern "C" void STACK_ARGS R_SetSpanSize_ASM (int xbits, int ybits); -extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); -extern "C" void R_SetTiltedSpanSource_ASM (const BYTE *flat); -extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; -#endif void R_DrawSinglePlane (visplane_t *, fixed_t alpha, bool additive, bool masked); //========================================================================== @@ -230,11 +223,7 @@ void R_MapPlane (int y, int x1) FixedMul (GlobVis, abs (centeryfrac - (y << FRACBITS))), planeshade) << COLORMAPSHIFT); } -#ifdef X86_ASM - if (ds_colormap != ds_curcolormap) - R_SetSpanColormap_ASM (ds_colormap); -#endif - + //VIZDOOM_CODE if(vizDepthMap!=NULL) vizDepthMap->setActualDepthConv(distance); /*static long max, min; if(min==0) min=max; @@ -1522,12 +1511,6 @@ void R_DrawSkyPlane (visplane_t *pl) void R_DrawNormalPlane (visplane_t *pl, fixed_t alpha, bool additive, bool masked) { -#ifdef X86_ASM - if (ds_source != ds_cursource) - { - R_SetSpanSource_ASM (ds_source); - } -#endif if (alpha <= 0) { @@ -1746,13 +1729,7 @@ void R_DrawTiltedPlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske } } -#if defined(X86_ASM) - if (ds_source != ds_curtiltedsource) - R_SetTiltedSpanSource_ASM (ds_source); - R_MapVisPlane (pl, R_DrawTiltedPlane_ASM); -#else R_MapVisPlane (pl, R_MapTiltedPlane); -#endif } //========================================================================== diff --git a/src/vizdoom/src/v_palette.cpp b/src/vizdoom/src/v_palette.cpp index 92182311b..4d84fa032 100644 --- a/src/vizdoom/src/v_palette.cpp +++ b/src/vizdoom/src/v_palette.cpp @@ -110,13 +110,6 @@ extern "C" BYTE BestColor_MMX (DWORD rgb, const DWORD *pal); int BestColor (const uint32 *pal_in, int r, int g, int b, int first, int num) { -#ifdef X86_ASM - if (CPU.bMMX) - { - int pre = 256 - num - first; - return BestColor_MMX (((first+pre)<<24)|(r<<16)|(g<<8)|b, pal_in-pre) - pre; - } -#endif const PalEntry *pal = (const PalEntry *)pal_in; int bestcolor = first; int bestdist = 257*257+257*257+257*257; @@ -422,23 +415,6 @@ void DoBlending (const PalEntry *from, PalEntry *to, int count, int r, int g, in to += not3count; } } -#endif -#ifdef X86_ASM - else if (CPU.bMMX) - { - if (count >= 4) - { - int not3count = count & ~3; - DoBlending_MMX (from, to, not3count, r, g, b, a); - count &= 3; - if (count <= 0) - { - return; - } - from += not3count; - to += not3count; - } - } #endif int i, ia; diff --git a/src/vizdoom/src/v_video.h b/src/vizdoom/src/v_video.h index 7593b7b4b..514405191 100644 --- a/src/vizdoom/src/v_video.h +++ b/src/vizdoom/src/v_video.h @@ -497,10 +497,6 @@ void V_RefreshViewBorder (); void V_SetBorderNeedRefresh(); -#if defined(X86_ASM) || defined(X64_ASM) -extern "C" void ASM_PatchPitch (void); -#endif - int CheckRatio (int width, int height, int *trueratio=NULL); static inline int CheckRatio (double width, double height) { return CheckRatio(int(width), int(height)); } extern const int BaseRatioSizes[5][4]; diff --git a/src/vizdoom/src/x86.cpp b/src/vizdoom/src/x86.cpp index 89d88333d..5828208a7 100644 --- a/src/vizdoom/src/x86.cpp +++ b/src/vizdoom/src/x86.cpp @@ -112,6 +112,8 @@ void CheckCPUID(CPUInfo *cpu) cpu->FeatureFlags[1] = foo[2]; // Store extended feature flags cpu->FeatureFlags[2] = foo[3]; // Store feature flags + cpu->HyperThreading = (foo[3] & (1 << 28)) > 0; + // If CLFLUSH instruction is supported, get the real cache line size. if (foo[3] & (1 << 19)) { @@ -223,58 +225,11 @@ void DumpCPUInfo(const CPUInfo *cpu) if (cpu->bSSE42) Printf(" SSE4.2"); if (cpu->b3DNow) Printf(" 3DNow!"); if (cpu->b3DNowPlus) Printf(" 3DNow!+"); - Printf ("\n"); + if (cpu->HyperThreading) Printf(" HyperThreading"); + Printf("\n"); } } -#if 0 -// Compiler output for this function is crap compared to the assembly -// version, which is why it isn't used. -void DoBlending_MMX2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a) -{ - __m64 blendcolor; - __m64 blendalpha; - __m64 zero; - __m64 blending256; - __m64 color1; - __m64 color2; - - zero = _mm_setzero_si64(); -#ifndef __GNUC__ - blending256.m64_i64 = 0x10001000100; -#else - blending256 = (__m64)0x10001000100ll; -#endif - - blendcolor = _mm_unpacklo_pi8(_m_from_int((r << 16) | (g << 8) | b), zero); // 000000RR 00GG00BB - blendalpha = _mm_unpacklo_pi8(_m_from_int((a << 16) | (a << 8) | a), zero); // 000000AA 00AA00AA - - blendcolor = _mm_mullo_pi16(blendcolor, blendalpha); // premultiply blend by alpha - blendalpha = _mm_subs_pu16(blending256, blendalpha); // one minus alpha - - // Do two colors per iteration: Count must be even - for (count >>= 1; count > 0; --count) - { - color1 = *(__m64 *)from; // 00r2g2b2 00r1g1b1 - from += 2; - color2 = _mm_unpackhi_pi8(color1, zero); // 000000r2 00g200b2 - color1 = _mm_unpacklo_pi8(color1, zero); // 000000r1 00g100b1 - color1 = _mm_mullo_pi16(blendalpha, color1); // 0000r1rr g1ggb1bb - color2 = _mm_mullo_pi16(blendalpha, color2); // 0000r2rr g2ggb2bb - color1 = _mm_adds_pu16(blendcolor, color1); - color2 = _mm_adds_pu16(blendcolor, color2); - color1 = _mm_srli_pi16(color1, 8); - color2 = _mm_srli_pi16(color2, 8); - *(__m64 *)to = _mm_packs_pu16(color1, color2); // 00r2g2b2 00r1g1b1 - to += 2; - } - _mm_empty(); -} -#endif - -#ifdef X86_ASM -extern "C" void STACK_ARGS DoBlending_MMX(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a); -#endif void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a) { @@ -288,17 +243,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g unaligned = ((size_t)from | (size_t)to) & 0xF; -#ifdef X86_ASM - // For unaligned accesses, the assembly MMX version is slightly faster. - // Note that using unaligned SSE loads and stores is still faster than - // the compiler-generated MMX version. - if (unaligned) - { - DoBlending_MMX(from, to, count, r, g, b, a); - return; - } -#endif - #if defined(__amd64__) || defined(_M_X64) long long color; @@ -326,7 +270,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g zero = _mm_setzero_si128(); -#ifndef X86_ASM if (unaligned) { for (count >>= 2; count > 0; --count) @@ -346,7 +289,6 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g } } else -#endif { for (count >>= 2; count > 0; --count) { From 853e58a235a714728070f5f7827b3608e49bada5 Mon Sep 17 00:00:00 2001 From: Marek Wydmuch Date: Tue, 3 Jan 2023 13:07:21 +0100 Subject: [PATCH 2/9] Fix building by using x86.h and x86.cpp from RZDoom --- src/vizdoom/src/x86.cpp | 12 ++-- src/vizdoom/src/x86.h | 139 ++++++++++++++++++++-------------------- 2 files changed, 74 insertions(+), 77 deletions(-) diff --git a/src/vizdoom/src/x86.cpp b/src/vizdoom/src/x86.cpp index 5828208a7..9ce21c840 100644 --- a/src/vizdoom/src/x86.cpp +++ b/src/vizdoom/src/x86.cpp @@ -2,10 +2,7 @@ #include "doomdef.h" #include "x86.h" -extern "C" -{ - CPUInfo CPU; -} +CPUInfo CPU; #if !defined(__amd64__) && !defined(__i386__) && !defined(_M_IX86) && !defined(_M_X64) void CheckCPUID(CPUInfo *cpu) @@ -22,7 +19,6 @@ void DumpCPUInfo(const CPUInfo *cpu) #ifdef _MSC_VER #include #endif -#include #include @@ -244,14 +240,14 @@ void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g unaligned = ((size_t)from | (size_t)to) & 0xF; #if defined(__amd64__) || defined(_M_X64) - long long color; + int64_t color; blending256 = _mm_set_epi64x(0x10001000100ll, 0x10001000100ll); - color = ((long long)r << 32) | (g << 16) | b; + color = ((int64_t)r << 32) | (g << 16) | b; blendcolor = _mm_set_epi64x(color, color); - color = ((long long)a << 32) | (a << 16) | a; + color = ((int64_t)a << 32) | (a << 16) | a; blendalpha = _mm_set_epi64x(color, color); #else int color; diff --git a/src/vizdoom/src/x86.h b/src/vizdoom/src/x86.h index e5ce7a00b..61134d2bd 100644 --- a/src/vizdoom/src/x86.h +++ b/src/vizdoom/src/x86.h @@ -8,104 +8,105 @@ struct CPUInfo // 92 bytes union { char VendorID[16]; - uint32 dwVendorID[4]; + uint32_t dwVendorID[4]; }; union { char CPUString[48]; - uint32 dwCPUString[12]; + uint32_t dwCPUString[12]; }; - BYTE Stepping; - BYTE Model; - BYTE Family; - BYTE Type; + uint8_t Stepping; + uint8_t Model; + uint8_t Family; + uint8_t Type; + uint8_t HyperThreading; union { struct { - BYTE BrandIndex; - BYTE CLFlush; - BYTE CPUCount; - BYTE APICID; + uint8_t BrandIndex; + uint8_t CLFlush; + uint8_t CPUCount; + uint8_t APICID; - uint32 bSSE3:1; - uint32 DontCare1:8; - uint32 bSSSE3:1; - uint32 DontCare1a:9; - uint32 bSSE41:1; - uint32 bSSE42:1; - uint32 DontCare2a:11; + uint32_t bSSE3 : 1; + uint32_t DontCare1 : 8; + uint32_t bSSSE3 : 1; + uint32_t DontCare1a : 9; + uint32_t bSSE41 : 1; + uint32_t bSSE42 : 1; + uint32_t DontCare2a : 11; - uint32 bFPU:1; - uint32 bVME:1; - uint32 bDE:1; - uint32 bPSE:1; - uint32 bRDTSC:1; - uint32 bMSR:1; - uint32 bPAE:1; - uint32 bMCE:1; - uint32 bCX8:1; - uint32 bAPIC:1; - uint32 bReserved1:1; - uint32 bSEP:1; - uint32 bMTRR:1; - uint32 bPGE:1; - uint32 bMCA:1; - uint32 bCMOV:1; - uint32 bPAT:1; - uint32 bPSE36:1; - uint32 bPSN:1; - uint32 bCFLUSH:1; - uint32 bReserved2:1; - uint32 bDS:1; - uint32 bACPI:1; - uint32 bMMX:1; - uint32 bFXSR:1; - uint32 bSSE:1; - uint32 bSSE2:1; - uint32 bSS:1; - uint32 bHTT:1; - uint32 bTM:1; - uint32 bReserved3:1; - uint32 bPBE:1; + uint32_t bFPU : 1; + uint32_t bVME : 1; + uint32_t bDE : 1; + uint32_t bPSE : 1; + uint32_t bRDTSC : 1; + uint32_t bMSR : 1; + uint32_t bPAE : 1; + uint32_t bMCE : 1; + uint32_t bCX8 : 1; + uint32_t bAPIC : 1; + uint32_t bReserved1 : 1; + uint32_t bSEP : 1; + uint32_t bMTRR : 1; + uint32_t bPGE : 1; + uint32_t bMCA : 1; + uint32_t bCMOV : 1; + uint32_t bPAT : 1; + uint32_t bPSE36 : 1; + uint32_t bPSN : 1; + uint32_t bCFLUSH : 1; + uint32_t bReserved2 : 1; + uint32_t bDS : 1; + uint32_t bACPI : 1; + uint32_t bMMX : 1; + uint32_t bFXSR : 1; + uint32_t bSSE : 1; + uint32_t bSSE2 : 1; + uint32_t bSS : 1; + uint32_t bHTT : 1; + uint32_t bTM : 1; + uint32_t bReserved3 : 1; + uint32_t bPBE : 1; - uint32 DontCare2:22; - uint32 bMMXPlus:1; // AMD's MMX extensions - uint32 bMMXAgain:1; // Just a copy of bMMX above - uint32 DontCare3:6; - uint32 b3DNowPlus:1; - uint32 b3DNow:1; + uint32_t DontCare2 : 22; + uint32_t bMMXPlus : 1; // AMD's MMX extensions + uint32_t bMMXAgain : 1; // Just a copy of bMMX above + uint32_t DontCare3 : 6; + uint32_t b3DNowPlus : 1; + uint32_t b3DNow : 1; }; - uint32 FeatureFlags[4]; + uint32_t FeatureFlags[4]; }; - BYTE AMDStepping; - BYTE AMDModel; - BYTE AMDFamily; - BYTE bIsAMD; + uint8_t AMDStepping; + uint8_t AMDModel; + uint8_t AMDFamily; + uint8_t bIsAMD; union { struct { - BYTE DataL1LineSize; - BYTE DataL1LinesPerTag; - BYTE DataL1Associativity; - BYTE DataL1SizeKB; + uint8_t DataL1LineSize; + uint8_t DataL1LinesPerTag; + uint8_t DataL1Associativity; + uint8_t DataL1SizeKB; }; - uint32 AMD_DataL1Info; + uint32_t AMD_DataL1Info; }; }; -extern "C" CPUInfo CPU; +extern CPUInfo CPU; struct PalEntry; -void CheckCPUID (CPUInfo *cpu); -void DumpCPUInfo (const CPUInfo *cpu); -void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a); +void CheckCPUID(CPUInfo* cpu); +void DumpCPUInfo(const CPUInfo* cpu); +void DoBlending_SSE2(const PalEntry* from, PalEntry* to, int count, int r, int g, int b, int a); #endif From 4e5c3326f0ded7dc322387554fadabcbc268ee7a Mon Sep 17 00:00:00 2001 From: Marek Wydmuch Date: Tue, 3 Jan 2023 22:01:23 +0100 Subject: [PATCH 3/9] Remove Julia bindings and update doc/Building.md --- CMakeLists.txt | 17 +- README.md | 11 +- doc/Building.md | 86 ++--- examples/julia/basic.jl | 84 ----- src/lib_julia/CMakeLists.txt | 11 - src/lib_julia/ViZDoomJuliaModule.cpp | 461 --------------------------- 6 files changed, 34 insertions(+), 636 deletions(-) delete mode 100644 examples/julia/basic.jl delete mode 100644 src/lib_julia/CMakeLists.txt delete mode 100644 src/lib_julia/ViZDoomJuliaModule.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ff1168b14..c39356a95 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,8 +11,8 @@ set(ViZDoom_VERSION_ID ${ViZDoom_VERSION_MAJOR}${ViZDoom_VERSION_MINOR}${ViZDoom # Building options #----------------------------------------------------------------------------------------------------------------------- -option(BUILD_PYTHON "Build ViZDoom Python (3) binding" OFF) -option(BUILD_JULIA "Build ViZDoom Julia binding." OFF) +option(BUILD_PYTHON "Build ViZDoom Python (3) binding" ON) +option(BUILD_ENGINE "Build ViZDoom Engine" ON) # CMake options @@ -86,7 +86,7 @@ if (MSVC) endif () -# Main library and engine +# Main library #----------------------------------------------------------------------------------------------------------------------- set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) @@ -155,17 +155,16 @@ set_target_properties(libvizdoom_shared PROPERTIES PROJECT_LABEL "shared library") + +# ViZDoom Engine +#----------------------------------------------------------------------------------------------------------------------- + add_subdirectory(${VIZDOOM_SRC_DIR}/vizdoom) -# Python and Julia +# Python bindings #----------------------------------------------------------------------------------------------------------------------- if (BUILD_PYTHON) add_subdirectory(${VIZDOOM_SRC_DIR}/lib_python) endif () - -if (BUILD_JULIA) - add_subdirectory(${VIZDOOM_SRC_DIR}/lib_julia) -endif () - diff --git a/README.md b/README.md index 626352c6c..70a6c8b1f 100644 --- a/README.md +++ b/README.md @@ -9,15 +9,15 @@ ViZDoom is based on [ZDoom](https://github.com/rheit/zdoom) to provide the game ## Features - Multi-platform (Linux, macOS, Windows), -- API for Python, C++, and Julia (thanks to [Jun Tian](https://github.com/findmyway)), and also Lua and Java for older versions, +- API for Python and C++, - [OpenAI Gym](https://www.gymlibrary.dev/) environment wrappers (thanks to [Benjamin Noah Beal](https://github.com/bebeal), [Lawrence Francis](https://github.com/ldfrancis), and [Mark Towers](https://github.com/pseudo-rnd-thoughts)), -- Easy-to-create custom scenarios (visual editors, scripting language and examples available), -- Async and sync single-player and multi-player modes, +- Easy-to-create custom scenarios (visual editors, scripting language, and examples available), +- Async and sync single-player and multiplayer modes, - Fast (up to 7000 fps in sync mode, single-threaded), - Lightweight (few MBs), - Customizable resolution and rendering parameters, - Access to the depth buffer (3D vision), -- Automatic labelling game objects visible in the frame, +- Automatic labeling of game objects visible in the frame, - Access to the audio buffer (thanks to [Shashank Hegde](https://github.com/hegde95)), - Access to the list of actors/objects and map geometry, - Off-screen rendering, @@ -26,6 +26,8 @@ ViZDoom is based on [ZDoom](https://github.com/rheit/zdoom) to provide the game ViZDoom API is **reinforcement learning** friendly (suitable also for learning from demonstration, apprenticeship learning or apprenticeship via inverse reinforcement learning, etc.). +Julia (thanks to [Jun Tian](https://github.com/findmyway)), Lua, and Java bindings are available in other branches but are no longer maintained. + ## Cite as > M Wydmuch, M Kempka & W Jaśkowski, ViZDoom Competitions: Playing Doom from Pixels, IEEE Transactions on Games, vol. 11, no. 3, pp. 248-259, 2019 @@ -114,7 +116,6 @@ See [documentation](doc/Gym.md) and [examples](examples/python/gym_wrapper.py) o - [Python](examples/python) (contain learning examples implemented in PyTorch, TensorFlow and Theano) - [C++](examples/c%2B%2B) -- [Julia](examples/julia) Python examples are currently the richest, so we recommend to look at them, even if you plan to use other language. API is almost identical for all languages. diff --git a/doc/Building.md b/doc/Building.md index 0da5c4aa4..e1d34cbb3 100644 --- a/doc/Building.md +++ b/doc/Building.md @@ -16,15 +16,15 @@ ## Dependencies -Even if you plan to install ViZDoom via PyPI or LuaRocks, you need to install some dependencies in your system first. +Even if you plan to install ViZDoom via PyPI you need to install some dependencies in your system first. ### Linux -* CMake 3.1+ +* CMake 3.4+ * Make * GCC 6.0+ * Boost libraries 1.65.0+ -* Python 3.5+ with Numpy for Python binding (optional) +* Python 3.7+ for Python binding (optional) Additionally, [ZDoom dependencies](http://zdoom.org/wiki/Compile_ZDoom_on_Linux) are needed. @@ -40,14 +40,7 @@ sudo apt-get install libboost-all-dev # Python 3 dependencies sudo apt-get install python3-dev python3-pip -pip3 install numpy # or install Anaconda 3 and add it to PATH - -# Julia dependencies -sudo apt-get install julia -julia -julia> using Pkg -julia> Pkg.add("CxxWrap") ``` If you do not have a root access, you can use a conda (e.g. [miniconda](https://docs.conda.io/en/latest/miniconda.html)) environment to install dependencies to your environment only: @@ -64,10 +57,10 @@ python setup.py build && python setup.py install ### MacOS -* CMake 3.1+ +* CMake 3.4+ * Clang 5.0+ * Boost libraries 1.65.0+ -* Python 3.5+ with Numpy for Python binding (optional) +* Python 3.7+ for Python binding (optional) Additionally, [ZDoom dependencies](http://zdoom.org/wiki/Compile_ZDoom_on_Mac_OS_X) are needed. @@ -77,27 +70,19 @@ To get dependencies install [homebrew](https://brew.sh/) # ZDoom dependencies and Boost libraries brew install cmake boost openal-soft sdl2 -# Python 3 dependencies -brew install python3 -pip3 install numpy -# or install Anaconda 3 and add it to PATH - -# Julia dependencies -brew cask install julia -julia -julia> using Pkg -julia> Pkg.add("CxxWrap") +# You can use system python or install Anaconda 3 and add it to PATH ``` ### Windows -* CMake 3.1+ +* CMake 3.4+ * Visual Studio 2012+ * Boost 1.65+ -* Python 3.5+ with Numpy for Python binding (optional) +* Python 3.7+ for Python binding (optional) Additionally, [ZDoom dependencies](http://zdoom.org/wiki/Compile_ZDoom_on_Windows) are needed. -Most of them are gathered in this repository: [ViZDoomWinDepBin](https://github.com/mwydmuch/ViZDoomWinDepBin). +Most of them (except Boost) are gathered in this repository: [ViZDoomWinDepBin](https://github.com/mwydmuch/ViZDoomWinDepBin). +You can download Boost from [here](https://www.boost.org/users/download). ## Installation via PyPI (recommended for Python users) @@ -118,18 +103,10 @@ pip install git+https://github.com/mwydmuch/ViZDoom.git ``` -## Installation of Windows binaries - -For Windows we are providing a compiled environment that can be download from [releases](https://github.com/mwydmuch/ViZDoom/releases) page. -To install it for Python, copy files to `site-packages` folder. - -Location of `site-packages` depends on Python distribution: -- Python: `python_root\Lib\site-packges` -- Anaconda: `anaconda_root\lib\pythonX.X\site-packages` - - ## Building +Instructions below can be used to build ViZDoom manually. + ### Linux >>> Using [pip/conda](#pypi) is the recommended way to install ViZDoom, please try it first unless you are sure you want to compile the package by hand. @@ -138,22 +115,11 @@ In ViZDoom's root directory: ```bash mkdir build cd build -cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_PYTHON=ON -DBUILD_JAVA=ON -DBUILD_LUA=ON -DBUILD_JULIA=ON +cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_ENGINE=ON -DBUILD_PYTHON=ON make ``` -where `-DBUILD_PYTHON=ON` and `-DBUILD_JULIA=ON` CMake options for Python and Julia bindings are optional (default OFF). To force building bindings for Python3 instead of the first version found use `-DBUILD_PYTHON3=ON`. - -To build Julia binding you first need to install CxxWrap package by running `julia` and using `Pkg.add("CxxWrap")` command (see [Linux dependencies](#linux_deps)). Then you need to manually set `JlCxx_DIR` variable: - -```sh -mkdir build -cd build -cmake .. -DCMAKE_BUILD_TYPE=Release \ --DBUILD_JULIA=ON \ --DJlCxx_DIR=~/.julia/vX.X/CxxWrap/deps/usr/lib/cmake/JlCxx/ -``` - +where `-DBUILD_ENGINE=ON` and `-DBUILD_PYTHON=ON` CMake options are optional (default ON). ### MacOS @@ -164,15 +130,15 @@ Run CMake and build generated Makefile. ```sh mkdir build cd build -cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_PYTHON=ON -DBUILD_JULIA=ON +cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_ENGINE=ON -DBUILD_PYTHON=ON make ``` -where `-DBUILD_PYTHON=ON` and `-DBUILD_JULIA=ON` CMake options for Python and Julia bindings are optional (default OFF). To force building bindings for Python3 instead of the first version found use `-DBUILD_PYTHON3=ON`. +where `-DBUILD_ENGINE=ON` and `-DBUILD_PYTHON=ON` CMake options are optional (default ON). -Users with brew-installed Python/Anaconda **may** need to manually set `PYTHON_EXECUTABLE`, `PYTHON_INCLUDE_DIR`, `PYTHON_LIBRARY` variables: +Users with brew-installed Python/Anaconda **may** (in some cases) need to manually set `PYTHON_EXECUTABLE`, `PYTHON_INCLUDE_DIR`, `PYTHON_LIBRARY` variables: -It should look like this for brew-installed Python (use `-DBUILD_PYTHON3=ON`, `include/pythonX.Xm` and `lib/libpythonX.Xm.dylib` for Python 3): +It should look like this for brew-installed Python: ```sh mkdir build @@ -185,7 +151,7 @@ cmake .. -DCMAKE_BUILD_TYPE=Release \ -DNUMPY_INCLUDES=/usr/local/Cellar/python/X.X.X/Frameworks/Python.framework/Versions/X.X/lib/pythonX.X/site-packages/numpy/core/include ``` -Or for Anaconda (use `-DBUILD_PYTHON3=ON`, `include/pythonX.Xm` and `lib/libpythonX.Xm.dylib` for Python 3): +Or for Anaconda: ```sh mkdir build @@ -198,17 +164,6 @@ cmake .. -DCMAKE_BUILD_TYPE=Release \ -DNUMPY_INCLUDES=~/anacondaX/lib/pythonX.X/site-packages/numpy/core/include ``` -To build Julia binding, you first need to install CxxWrap package by running `julia` and using `Pkg.add("CxxWrap")` command (see [MacOS dependencies](#macos_deps)). Then you need to manually set `JlCxx_DIR` variable: - -```sh -mkdir build -cd build -cmake .. -DCMAKE_BUILD_TYPE=Release \ --DBUILD_JULIA=ON \ --DJlCxx_DIR=~/.julia/vX.X/CxxWrap/deps/usr/lib/cmake/JlCxx/ -``` - - ### Windows Setting up the compilation on Windows is really tedious so using the [precompiled binaries](#windows_bin) is recommended. @@ -223,10 +178,9 @@ Run CMake GUI, select ViZDoom root directory and set paths to: * BOOST_LIBRARYDIR * PYTHON_INCLUDE_DIR (optional, for Python/Anaconda bindings) * PYTHON_LIBRARY (optional, for Python/Anaconda bindings) -* NUMPY_INCLUDES (optional, for Python/Anaconda bindings) * ZDoom dependencies paths -In configuration select BUILD_PYTHON, BUILD_PYTHON3 and BUILD_JAVA options for Python and Java bindings (optional, default OFF). +In configuration select `DBUILD_ENGINE` and `DBUILD_PYTHON` (optional, default ON). Use generated Visual Studio solution to build all parts of ViZDoom environment. diff --git a/examples/julia/basic.jl b/examples/julia/basic.jl deleted file mode 100644 index 5fb4d094a..000000000 --- a/examples/julia/basic.jl +++ /dev/null @@ -1,84 +0,0 @@ -using CxxWrap - -wrap_modules("../../bin/libvizdoomjl") - -const vz = ViZDoomWrapper - -game = vz.DoomGame() - -vz.set_doom_scenario_path(game, "../../scenarios/basic.wad") - -vz.set_doom_map(game, "map01") - -# Sets resolution. Default is 320X240 -vz.set_screen_resolution(game, vz.RES_640X480) - -# Sets the screen buffer format. Not used here but now you can change it. Defalut is CRCGCB. -vz.set_screen_format(game, vz.RGB24) - -# Enables depth buffer. -vz.set_depth_buffer_enabled(game, true) - -# Enables labeling of in vz objects labeling. -vz.set_labels_buffer_enabled(game, true) - -# Enables buffer with top down map of the current episode/level. -vz.set_automap_buffer_enabled(game, true) - - -# Sets other rendering options (all of these options except crosshair are enabled (set to True) by default) -vz.set_render_hud(game, false) -vz.set_render_minimal_hud(game, false) # If hud is enabled -vz.set_render_crosshair(game, false) -vz.set_render_weapon(game, true) -vz.set_render_decals(game, false) # Bullet holes and blood on the walls -vz.set_render_particles(game, false) -vz.set_render_effects_sprites(game, false) # Smoke and blood -vz.set_render_messages(game, false) # In-vz messages -vz.set_render_corpses(game, false) -vz.set_render_screen_flashes(game, true) # Effect upon taking damage or picking up items - - -# Adds buttons that will be allowed. -vz.add_available_button(game, vz.MOVE_LEFT) -vz.add_available_button(game, vz.MOVE_RIGHT) -vz.add_available_button(game, vz.ATTACK) - -# Adds vz variables that will be included in state. -vz.add_available_game_variable(game, vz.AMMO2) - -# Causes episodes to finish after 200 tics (actions) -vz.set_episode_timeout(game, 200) - -# Makes episodes start after 10 tics (~after raising the weapon) -vz.set_episode_start_time(game, 10) - -# Makes the window appear (turned on by default) -vz.set_window_visible(game, true) - -# Turns on the sound. (turned off by default) -vz.set_sound_enabled(game, true) - -# Sets the living reward (for each move) to -1 -vz.set_living_reward(game, -1) - -# Sets ViZDoom mode (PLAYER, ASYNC_PLAYER, SPECTATOR, ASYNC_SPECTATOR, PLAYER mode is default) -vz.set_mode(game, vz.PLAYER) - -vz.init(game) - -actions = [[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]] -episodes = 10 -sleep_time = 1.0 / vz.DEFAULT_TICRATE - -for i in 1:episodes - println("Episode #$i") - vz.new_episode(game) - - while !vz.is_episode_finished(game) - state = vz.get_state(game) - r = vz.make_action(game, rand(actions)) - println("Reward $r") - sleep(sleep_time) - end -end diff --git a/src/lib_julia/CMakeLists.txt b/src/lib_julia/CMakeLists.txt deleted file mode 100644 index 79056aa7b..000000000 --- a/src/lib_julia/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -find_package(JlCxx REQUIRED) - -add_library(vizdoomjl SHARED ViZDoomJuliaModule.cpp) -target_link_libraries(vizdoomjl JlCxx::cxxwrap_julia ${VIZDOOM_LIBS} libvizdoom_static) -set_target_properties(vizdoomjl - PROPERTIES - LIBRARY_OUTPUT_DIRECTORY ${VIZDOOM_OUTPUT_DIR} - LIBRARY_OUTPUT_DIRECTORY_RELEASE ${VIZDOOM_OUTPUT_DIR} - LIBRARY_OUTPUT_DIRECTORY_DEBUG ${VIZDOOM_OUTPUT_DIR} - LIBRARY_OUTPUT_DIRECTORY_MINSIZEREL ${VIZDOOM_OUTPUT_DIR} - LIBRARY_OUTPUT_DIRECTORY_RELWITHDEBINFO ${VIZDOOM_OUTPUT_DIR}) \ No newline at end of file diff --git a/src/lib_julia/ViZDoomJuliaModule.cpp b/src/lib_julia/ViZDoomJuliaModule.cpp deleted file mode 100644 index 3ca204a5f..000000000 --- a/src/lib_julia/ViZDoomJuliaModule.cpp +++ /dev/null @@ -1,461 +0,0 @@ -#include "ViZDoom.h" -#include - -using namespace vizdoom; - -namespace jlcxx -{ -template <> -struct IsBits : std::true_type -{ -}; -template <> -struct IsBits : std::true_type -{ -}; -template <> -struct IsBits : std::true_type -{ -}; -template <> -struct IsBits : std::true_type -{ -}; -template <> -struct IsBits