From 0878d3fd1b2606fe098ecfc28a175c067c7f6aad Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Thu, 24 Nov 2022 09:23:46 +0300 Subject: [PATCH 01/27] conan: use find_package to add SDL2 dependency in Linux (without conan) --- CMakeLists.txt | 10 ++++++++++ conanfile.py | 3 ++- src/apps/engine/CMakeLists.txt | 2 +- src/libs/core/CMakeLists.txt | 4 ++-- src/libs/input/CMakeLists.txt | 4 ++-- src/libs/window/CMakeLists.txt | 4 ++-- 6 files changed, 19 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f8b36cf29..a8f47caa2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,6 +38,16 @@ conan_cmake_run(CONANFILE conanfile.py ) if (NOT WIN32) + find_package(SDL2 REQUIRED) + message(STATUS "SDL2_LIBRARIES="${SDL2_LIBRARIES}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${SDL2_INCLUDE_DIRS}") + message(STATUS "CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}) +endif() + +if (WIN32) + message("Using Windows D3D9 API") + set(SDL_LIBRARIES "sdl") +else() # for DXVK Native message("Using DXVK-native for D3D9 API") include(ExternalProject) diff --git a/conanfile.py b/conanfile.py index 66a96db8a..a548fde78 100644 --- a/conanfile.py +++ b/conanfile.py @@ -16,7 +16,7 @@ class StormEngine(ConanFile): # dependencies used in deploy binaries # conan-center - requires = ["zlib/1.2.11", "spdlog/1.9.2", "fast_float/3.4.0", "sdl/2.0.18", "mimalloc/2.0.3", "sentry-native/0.5.0", + requires = ["zlib/1.2.11", "spdlog/1.9.2", "fast_float/3.4.0", "mimalloc/2.0.3", "sentry-native/0.5.0", # storm.jfrog.io "directx/9.0@storm/prebuilt", "fmod/2.02.05@storm/prebuilt"] # aux dependencies (e.g. for tests) @@ -27,6 +27,7 @@ def requirements(self): if self.settings.os == "Windows": # conan-center self.requires("7zip/19.00") + self.requires("sdl/2.0.18") else: # conan-center self.requires("openssl/1.1.1n")#fix for error: 'sentry-crashpad/0.4.13' requires 'openssl/1.1.1n' while 'pulseaudio/14.2' requires 'openssl/1.1.1q' diff --git a/src/apps/engine/CMakeLists.txt b/src/apps/engine/CMakeLists.txt index b710cc319..da2f17fff 100644 --- a/src/apps/engine/CMakeLists.txt +++ b/src/apps/engine/CMakeLists.txt @@ -32,7 +32,7 @@ STORM_SETUP( # external mimalloc sentry-native - sdl + ${SDL_LIBRARIES} zlib # system diff --git a/src/libs/core/CMakeLists.txt b/src/libs/core/CMakeLists.txt index 3e75e50cc..afd4561a1 100644 --- a/src/libs/core/CMakeLists.txt +++ b/src/libs/core/CMakeLists.txt @@ -1,5 +1,5 @@ STORM_SETUP( TARGET_NAME core TYPE library - DEPENDENCIES diagnostics math shared_headers steam_api fast_float sdl window -) \ No newline at end of file + DEPENDENCIES diagnostics math shared_headers steam_api fast_float ${SDL_LIBRARIES} window +) diff --git a/src/libs/input/CMakeLists.txt b/src/libs/input/CMakeLists.txt index 382ee3566..f33902846 100644 --- a/src/libs/input/CMakeLists.txt +++ b/src/libs/input/CMakeLists.txt @@ -1,5 +1,5 @@ STORM_SETUP( TARGET_NAME input TYPE library - DEPENDENCIES sdl util -) \ No newline at end of file + DEPENDENCIES ${SDL_LIBRARIES} util +) diff --git a/src/libs/window/CMakeLists.txt b/src/libs/window/CMakeLists.txt index 16afb93be..e81319628 100644 --- a/src/libs/window/CMakeLists.txt +++ b/src/libs/window/CMakeLists.txt @@ -1,5 +1,5 @@ STORM_SETUP( TARGET_NAME window TYPE library - DEPENDENCIES sdl -) \ No newline at end of file + DEPENDENCIES ${SDL_LIBRARIES} +) From 17889a0e620d2b29f70141dbf6aa3ff00358f756 Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Wed, 23 Nov 2022 21:26:38 +0300 Subject: [PATCH 02/27] linux: add basic support for Gallium Nine on Linux --- CMakeLists.txt | 16 + nine-native/CMakeLists.txt | 23 + nine-native/include/D3D9/d3d9.h | 2011 +++++++++++++++++ nine-native/include/D3D9/d3d9caps.h | 390 ++++ nine-native/include/D3D9/d3d9types.h | 1817 +++++++++++++++ nine-native/include/nine_sdl.h | 13 + nine-native/src/dri3.c | 788 +++++++ nine-native/src/dri3.h | 80 + nine-native/src/nine_sdl.c | 1251 ++++++++++ src/CMakeLists.txt | 4 + src/apps/engine/CMakeLists.txt | 6 +- .../ball_splash/src/ball_splash_defines.h | 2 + src/libs/core/src/token.h | 2 + src/libs/input/src/sdl_input.cpp | 4 +- src/libs/math/CMakeLists.txt | 2 +- src/libs/renderer/CMakeLists.txt | 4 +- src/libs/renderer/include/dx9render.h | 2 +- src/libs/renderer/src/s_device.cpp | 13 +- src/libs/renderer/src/s_device.h | 2 +- src/libs/renderer/src/storm_d3dx9.cpp | 5 + src/libs/renderer/src/technique.cpp | 10 + .../sink_effect/src/sink_splash_defines.h | 2 + 22 files changed, 6438 insertions(+), 9 deletions(-) create mode 100644 nine-native/CMakeLists.txt create mode 100644 nine-native/include/D3D9/d3d9.h create mode 100644 nine-native/include/D3D9/d3d9caps.h create mode 100644 nine-native/include/D3D9/d3d9types.h create mode 100644 nine-native/include/nine_sdl.h create mode 100644 nine-native/src/dri3.c create mode 100644 nine-native/src/dri3.h create mode 100644 nine-native/src/nine_sdl.c diff --git a/CMakeLists.txt b/CMakeLists.txt index a8f47caa2..f8b04647e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,9 @@ include(StormSetup) option(STORM_ENABLE_CRASH_REPORTS "Enable automatic crash reports" OFF) option(STORM_ENABLE_STEAM "Enable Steam integration" OFF) option(STORM_ENABLE_SAFE_MODE "Enable additional runtime checks" OFF) +if (NOT WIN32) +option(STORM_MESA_NINE "Use Gallium Nine from Mesa (without WINE) for D3D9 API" OFF) +endif() ### Set up output paths set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) @@ -47,6 +50,19 @@ endif() if (WIN32) message("Using Windows D3D9 API") set(SDL_LIBRARIES "sdl") +elseif (STORM_MESA_NINE) # for Gallium Nine + message("Using Gallium Nine for native D3D9 API") + + #sudo apt install libd3dadapter9-mesa-dev + find_package(PkgConfig REQUIRED) + pkg_check_modules(D3D REQUIRED IMPORTED_TARGET d3d) + + add_subdirectory(nine-native) + set(NINE_NATIVE_INCLUDE_DIRS + "${CMAKE_CURRENT_SOURCE_DIR}/nine-native/include" + "${CMAKE_CURRENT_SOURCE_DIR}/nine-native/include/D3D9" + ) + include_directories("${NINE_NATIVE_INCLUDE_DIRS}") else() # for DXVK Native message("Using DXVK-native for D3D9 API") diff --git a/nine-native/CMakeLists.txt b/nine-native/CMakeLists.txt new file mode 100644 index 000000000..0376ea2ec --- /dev/null +++ b/nine-native/CMakeLists.txt @@ -0,0 +1,23 @@ +project(nine-native) + +add_library(${PROJECT_NAME} STATIC + include/nine_sdl.h + src/nine_sdl.c + src/dri3.c + src/dri3.h +) + +target_include_directories(${PROJECT_NAME} PRIVATE + include + include/D3D9 +) + +target_link_libraries(${PROJECT_NAME} PRIVATE + SDL2 + X11 + xcb + xcb-present + xcb-dri3 + xcb-xfixes + X11-xcb +) diff --git a/nine-native/include/D3D9/d3d9.h b/nine-native/include/D3D9/d3d9.h new file mode 100644 index 000000000..d7fc714d2 --- /dev/null +++ b/nine-native/include/D3D9/d3d9.h @@ -0,0 +1,2011 @@ +/* + * Copyright 2011 Joakim Sindholt + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/*** THIS FILE IS AUTOGENERATED. DO NOT MODIFY MANUALLY. ***/ + +#ifndef _D3D9_H_ +#define _D3D9_H_ + +#include "d3d9types.h" +#include "d3d9caps.h" + +typedef struct IDirect3D9 IDirect3D9, *PDIRECT3D9, *LPDIRECT3D9; +typedef struct IDirect3D9Ex IDirect3D9Ex, *PDIRECT3D9EX, *LPDIRECT3D9EX; +typedef struct IDirect3D9ExOverlayExtension IDirect3D9ExOverlayExtension, *PDIRECT3D9EXOVERLAYEXTENSION, *LPDIRECT3D9EXOVERLAYEXTENSION; +typedef struct IDirect3DAuthenticatedChannel9 IDirect3DAuthenticatedChannel9, *PDIRECT3DAUTHENTICATEDCHANNEL9, *LPDIRECT3DAUTHENTICATEDCHANNEL9; +typedef struct IDirect3DBaseTexture9 IDirect3DBaseTexture9, *PDIRECT3DBASETEXTURE9, *LPDIRECT3DBASETEXTURE9; +typedef struct IDirect3DCryptoSession9 IDirect3DCryptoSession9, *PDIRECT3DCRYPTOSESSION9, *LPDIRECT3DCRYPTOSESSION9; +typedef struct IDirect3DCubeTexture9 IDirect3DCubeTexture9, *PDIRECT3DCUBETEXTURE9, *LPDIRECT3DCUBETEXTURE9; +typedef struct IDirect3DDevice9 IDirect3DDevice9, *PDIRECT3DDEVICE9, *LPDIRECT3DDEVICE9; +typedef struct IDirect3DDevice9Ex IDirect3DDevice9Ex, *PDIRECT3DDEVICE9EX, *LPDIRECT3DDEVICE9EX; +typedef struct IDirect3DDevice9Video IDirect3DDevice9Video, *PDIRECT3DDEVICE9VIDEO, *LPDIRECT3DDEVICE9VIDEO; +typedef struct IDirect3DIndexBuffer9 IDirect3DIndexBuffer9, *PDIRECT3DINDEXBUFFER9, *LPDIRECT3DINDEXBUFFER9; +typedef struct IDirect3DPixelShader9 IDirect3DPixelShader9, *PDIRECT3DPIXELSHADER9, *LPDIRECT3DPIXELSHADER9; +typedef struct IDirect3DQuery9 IDirect3DQuery9, *PDIRECT3DQUERY9, *LPDIRECT3DQUERY9; +typedef struct IDirect3DResource9 IDirect3DResource9, *PDIRECT3DRESOURCE9, *LPDIRECT3DRESOURCE9; +typedef struct IDirect3DStateBlock9 IDirect3DStateBlock9, *PDIRECT3DSTATEBLOCK9, *LPDIRECT3DSTATEBLOCK9; +typedef struct IDirect3DSurface9 IDirect3DSurface9, *PDIRECT3DSURFACE9, *LPDIRECT3DSURFACE9; +typedef struct IDirect3DSwapChain9 IDirect3DSwapChain9, *PDIRECT3DSWAPCHAIN9, *LPDIRECT3DSWAPCHAIN9; +typedef struct IDirect3DSwapChain9Ex IDirect3DSwapChain9Ex, *PDIRECT3DSWAPCHAIN9EX, *LPDIRECT3DSWAPCHAIN9EX; +typedef struct IDirect3DTexture9 IDirect3DTexture9, *PDIRECT3DTEXTURE9, *LPDIRECT3DTEXTURE9; +typedef struct IDirect3DVertexBuffer9 IDirect3DVertexBuffer9, *PDIRECT3DVERTEXBUFFER9, *LPDIRECT3DVERTEXBUFFER9; +typedef struct IDirect3DVertexDeclaration9 IDirect3DVertexDeclaration9, *PDIRECT3DVERTEXDECLARATION9, *LPDIRECT3DVERTEXDECLARATION9; +typedef struct IDirect3DVertexShader9 IDirect3DVertexShader9, *PDIRECT3DVERTEXSHADER9, *LPDIRECT3DVERTEXSHADER9; +typedef struct IDirect3DVolume9 IDirect3DVolume9, *PDIRECT3DVOLUME9, *LPDIRECT3DVOLUME9; +typedef struct IDirect3DVolumeTexture9 IDirect3DVolumeTexture9, *PDIRECT3DVOLUMETEXTURE9, *LPDIRECT3DVOLUMETEXTURE9; + +#ifdef __cplusplus + +extern "C" const GUID IID_IDirect3D9; +extern "C" const GUID IID_IDirect3D9Ex; +extern "C" const GUID IID_IDirect3D9ExOverlayExtension; +extern "C" const GUID IID_IDirect3DAuthenticatedChannel9; +extern "C" const GUID IID_IDirect3DBaseTexture9; +extern "C" const GUID IID_IDirect3DCryptoSession9; +extern "C" const GUID IID_IDirect3DCubeTexture9; +extern "C" const GUID IID_IDirect3DDevice9; +extern "C" const GUID IID_IDirect3DDevice9Ex; +extern "C" const GUID IID_IDirect3DDevice9Video; +extern "C" const GUID IID_IDirect3DIndexBuffer9; +extern "C" const GUID IID_IDirect3DPixelShader9; +extern "C" const GUID IID_IDirect3DQuery9; +extern "C" const GUID IID_IDirect3DResource9; +extern "C" const GUID IID_IDirect3DStateBlock9; +extern "C" const GUID IID_IDirect3DSurface9; +extern "C" const GUID IID_IDirect3DSwapChain9; +extern "C" const GUID IID_IDirect3DSwapChain9Ex; +extern "C" const GUID IID_IDirect3DTexture9; +extern "C" const GUID IID_IDirect3DVertexBuffer9; +extern "C" const GUID IID_IDirect3DVertexDeclaration9; +extern "C" const GUID IID_IDirect3DVertexShader9; +extern "C" const GUID IID_IDirect3DVolume9; +extern "C" const GUID IID_IDirect3DVolumeTexture9; + +struct IDirect3D9 : public IUnknown +{ + virtual HRESULT WINAPI RegisterSoftwareDevice(void *pInitializeFunction) = 0; + virtual UINT WINAPI GetAdapterCount() = 0; + virtual HRESULT WINAPI GetAdapterIdentifier(UINT Adapter, DWORD Flags, D3DADAPTER_IDENTIFIER9 *pIdentifier) = 0; + virtual UINT WINAPI GetAdapterModeCount(UINT Adapter, D3DFORMAT Format) = 0; + virtual HRESULT WINAPI EnumAdapterModes(UINT Adapter, D3DFORMAT Format, UINT Mode, D3DDISPLAYMODE *pMode) = 0; + virtual HRESULT WINAPI GetAdapterDisplayMode(UINT Adapter, D3DDISPLAYMODE *pMode) = 0; + virtual HRESULT WINAPI CheckDeviceType(UINT Adapter, D3DDEVTYPE DevType, D3DFORMAT AdapterFormat, D3DFORMAT BackBufferFormat, BOOL bWindowed) = 0; + virtual HRESULT WINAPI CheckDeviceFormat(UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT AdapterFormat, DWORD Usage, D3DRESOURCETYPE RType, D3DFORMAT CheckFormat) = 0; + virtual HRESULT WINAPI CheckDeviceMultiSampleType(UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT SurfaceFormat, BOOL Windowed, D3DMULTISAMPLE_TYPE MultiSampleType, DWORD *pQualityLevels) = 0; + virtual HRESULT WINAPI CheckDepthStencilMatch(UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT AdapterFormat, D3DFORMAT RenderTargetFormat, D3DFORMAT DepthStencilFormat) = 0; + virtual HRESULT WINAPI CheckDeviceFormatConversion(UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT SourceFormat, D3DFORMAT TargetFormat) = 0; + virtual HRESULT WINAPI GetDeviceCaps(UINT Adapter, D3DDEVTYPE DeviceType, D3DCAPS9 *pCaps) = 0; + virtual HMONITOR WINAPI GetAdapterMonitor(UINT Adapter) = 0; + virtual HRESULT WINAPI CreateDevice(UINT Adapter, D3DDEVTYPE DeviceType, HWND hFocusWindow, DWORD BehaviorFlags, D3DPRESENT_PARAMETERS *pPresentationParameters, IDirect3DDevice9 **ppReturnedDeviceInterface) = 0; +}; + +struct IDirect3D9Ex : public IDirect3D9 +{ + virtual UINT WINAPI GetAdapterModeCountEx(UINT Adapter, const D3DDISPLAYMODEFILTER *pFilter) = 0; + virtual HRESULT WINAPI EnumAdapterModesEx(UINT Adapter, const D3DDISPLAYMODEFILTER *pFilter, UINT Mode, D3DDISPLAYMODEEX *pMode) = 0; + virtual HRESULT WINAPI GetAdapterDisplayModeEx(UINT Adapter, D3DDISPLAYMODEEX *pMode, D3DDISPLAYROTATION *pRotation) = 0; + virtual HRESULT WINAPI CreateDeviceEx(UINT Adapter, D3DDEVTYPE DeviceType, HWND hFocusWindow, DWORD BehaviorFlags, D3DPRESENT_PARAMETERS *pPresentationParameters, D3DDISPLAYMODEEX *pFullscreenDisplayMode, IDirect3DDevice9Ex **ppReturnedDeviceInterface) = 0; + virtual HRESULT WINAPI GetAdapterLUID(UINT Adapter, LUID *pLUID) = 0; +}; + +struct IDirect3D9ExOverlayExtension : public IUnknown +{ + virtual HRESULT WINAPI CheckDeviceOverlayType(UINT Adapter, D3DDEVTYPE DevType, UINT OverlayWidth, UINT OverlayHeight, D3DFORMAT OverlayFormat, D3DDISPLAYMODEEX *pDisplayMode, D3DDISPLAYROTATION DisplayRotation, D3DOVERLAYCAPS *pOverlayCaps) = 0; +}; + +struct IDirect3DResource9 : public IUnknown +{ + virtual HRESULT WINAPI GetDevice(IDirect3DDevice9 **ppDevice) = 0; + virtual HRESULT WINAPI SetPrivateData(REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags) = 0; + virtual HRESULT WINAPI GetPrivateData(REFGUID refguid, void *pData, DWORD *pSizeOfData) = 0; + virtual HRESULT WINAPI FreePrivateData(REFGUID refguid) = 0; + virtual DWORD WINAPI SetPriority(DWORD PriorityNew) = 0; + virtual DWORD WINAPI GetPriority() = 0; + virtual void WINAPI PreLoad() = 0; + virtual D3DRESOURCETYPE WINAPI GetType() = 0; +}; + +struct IDirect3DBaseTexture9 : public IDirect3DResource9 +{ + virtual DWORD WINAPI SetLOD(DWORD LODNew) = 0; + virtual DWORD WINAPI GetLOD() = 0; + virtual DWORD WINAPI GetLevelCount() = 0; + virtual HRESULT WINAPI SetAutoGenFilterType(D3DTEXTUREFILTERTYPE FilterType) = 0; + virtual D3DTEXTUREFILTERTYPE WINAPI GetAutoGenFilterType() = 0; + virtual void WINAPI GenerateMipSubLevels() = 0; +}; + +struct IDirect3DCryptoSession9 : public IUnknown +{ + virtual HRESULT WINAPI GetCertificateSize(UINT *pCertificateSize) = 0; + virtual HRESULT WINAPI GetCertificate(UINT CertifacteSize, BYTE *ppCertificate) = 0; + virtual HRESULT WINAPI NegotiateKeyExchange(UINT DataSize, void *pData) = 0; + virtual HRESULT WINAPI EncryptionBlt(IDirect3DSurface9 *pSrcSurface, IDirect3DSurface9 *pDstSurface, UINT DstSurfaceSize, void *pIV) = 0; + virtual HRESULT WINAPI DecryptionBlt(IDirect3DSurface9 *pSrcSurface, IDirect3DSurface9 *pDstSurface, UINT SrcSurfaceSize, D3DENCRYPTED_BLOCK_INFO *pEncryptedBlockInfo, void *pContentKey, void *pIV) = 0; + virtual HRESULT WINAPI GetSurfacePitch(IDirect3DSurface9 *pSrcSurface, UINT *pSurfacePitch) = 0; + virtual HRESULT WINAPI StartSessionKeyRefresh(void *pRandomNumber, UINT RandomNumberSize) = 0; + virtual HRESULT WINAPI FinishSessionKeyRefresh() = 0; + virtual HRESULT WINAPI GetEncryptionBltKey(void *pReadbackKey, UINT KeySize) = 0; +}; + +struct IDirect3DCubeTexture9 : public IDirect3DBaseTexture9 +{ + virtual HRESULT WINAPI GetLevelDesc(UINT Level, D3DSURFACE_DESC *pDesc) = 0; + virtual HRESULT WINAPI GetCubeMapSurface(D3DCUBEMAP_FACES FaceType, UINT Level, IDirect3DSurface9 **ppCubeMapSurface) = 0; + virtual HRESULT WINAPI LockRect(D3DCUBEMAP_FACES FaceType, UINT Level, D3DLOCKED_RECT *pLockedRect, const RECT *pRect, DWORD Flags) = 0; + virtual HRESULT WINAPI UnlockRect(D3DCUBEMAP_FACES FaceType, UINT Level) = 0; + virtual HRESULT WINAPI AddDirtyRect(D3DCUBEMAP_FACES FaceType, const RECT *pDirtyRect) = 0; +}; + +struct IDirect3DDevice9 : public IUnknown +{ + virtual HRESULT WINAPI TestCooperativeLevel() = 0; + virtual UINT WINAPI GetAvailableTextureMem() = 0; + virtual HRESULT WINAPI EvictManagedResources() = 0; + virtual HRESULT WINAPI GetDirect3D(IDirect3D9 **ppD3D9) = 0; + virtual HRESULT WINAPI GetDeviceCaps(D3DCAPS9 *pCaps) = 0; + virtual HRESULT WINAPI GetDisplayMode(UINT iSwapChain, D3DDISPLAYMODE *pMode) = 0; + virtual HRESULT WINAPI GetCreationParameters(D3DDEVICE_CREATION_PARAMETERS *pParameters) = 0; + virtual HRESULT WINAPI SetCursorProperties(UINT XHotSpot, UINT YHotSpot, IDirect3DSurface9 *pCursorBitmap) = 0; + virtual void WINAPI SetCursorPosition(int X, int Y, DWORD Flags) = 0; + virtual BOOL WINAPI ShowCursor(BOOL bShow) = 0; + virtual HRESULT WINAPI CreateAdditionalSwapChain(D3DPRESENT_PARAMETERS *pPresentationParameters, IDirect3DSwapChain9 **pSwapChain) = 0; + virtual HRESULT WINAPI GetSwapChain(UINT iSwapChain, IDirect3DSwapChain9 **pSwapChain) = 0; + virtual UINT WINAPI GetNumberOfSwapChains() = 0; + virtual HRESULT WINAPI Reset(D3DPRESENT_PARAMETERS *pPresentationParameters) = 0; + virtual HRESULT WINAPI Present(const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion) = 0; + virtual HRESULT WINAPI GetBackBuffer(UINT iSwapChain, UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, IDirect3DSurface9 **ppBackBuffer) = 0; + virtual HRESULT WINAPI GetRasterStatus(UINT iSwapChain, D3DRASTER_STATUS *pRasterStatus) = 0; + virtual HRESULT WINAPI SetDialogBoxMode(BOOL bEnableDialogs) = 0; + virtual void WINAPI SetGammaRamp(UINT iSwapChain, DWORD Flags, const D3DGAMMARAMP *pRamp) = 0; + virtual void WINAPI GetGammaRamp(UINT iSwapChain, D3DGAMMARAMP *pRamp) = 0; + virtual HRESULT WINAPI CreateTexture(UINT Width, UINT Height, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DTexture9 **ppTexture, HANDLE *pSharedHandle) = 0; + virtual HRESULT WINAPI CreateVolumeTexture(UINT Width, UINT Height, UINT Depth, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DVolumeTexture9 **ppVolumeTexture, HANDLE *pSharedHandle) = 0; + virtual HRESULT WINAPI CreateCubeTexture(UINT EdgeLength, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DCubeTexture9 **ppCubeTexture, HANDLE *pSharedHandle) = 0; + virtual HRESULT WINAPI CreateVertexBuffer(UINT Length, DWORD Usage, DWORD FVF, D3DPOOL Pool, IDirect3DVertexBuffer9 **ppVertexBuffer, HANDLE *pSharedHandle) = 0; + virtual HRESULT WINAPI CreateIndexBuffer(UINT Length, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DIndexBuffer9 **ppIndexBuffer, HANDLE *pSharedHandle) = 0; + virtual HRESULT WINAPI CreateRenderTarget(UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Lockable, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle) = 0; + virtual HRESULT WINAPI CreateDepthStencilSurface(UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Discard, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle) = 0; + virtual HRESULT WINAPI UpdateSurface(IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, IDirect3DSurface9 *pDestinationSurface, const POINT *pDestPoint) = 0; + virtual HRESULT WINAPI UpdateTexture(IDirect3DBaseTexture9 *pSourceTexture, IDirect3DBaseTexture9 *pDestinationTexture) = 0; + virtual HRESULT WINAPI GetRenderTargetData(IDirect3DSurface9 *pRenderTarget, IDirect3DSurface9 *pDestSurface) = 0; + virtual HRESULT WINAPI GetFrontBufferData(UINT iSwapChain, IDirect3DSurface9 *pDestSurface) = 0; + virtual HRESULT WINAPI StretchRect(IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, IDirect3DSurface9 *pDestSurface, const RECT *pDestRect, D3DTEXTUREFILTERTYPE Filter) = 0; + virtual HRESULT WINAPI ColorFill(IDirect3DSurface9 *pSurface, const RECT *pRect, D3DCOLOR color) = 0; + virtual HRESULT WINAPI CreateOffscreenPlainSurface(UINT Width, UINT Height, D3DFORMAT Format, D3DPOOL Pool, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle) = 0; + virtual HRESULT WINAPI SetRenderTarget(DWORD RenderTargetIndex, IDirect3DSurface9 *pRenderTarget) = 0; + virtual HRESULT WINAPI GetRenderTarget(DWORD RenderTargetIndex, IDirect3DSurface9 **ppRenderTarget) = 0; + virtual HRESULT WINAPI SetDepthStencilSurface(IDirect3DSurface9 *pNewZStencil) = 0; + virtual HRESULT WINAPI GetDepthStencilSurface(IDirect3DSurface9 **ppZStencilSurface) = 0; + virtual HRESULT WINAPI BeginScene() = 0; + virtual HRESULT WINAPI EndScene() = 0; + virtual HRESULT WINAPI Clear(DWORD Count, const D3DRECT *pRects, DWORD Flags, D3DCOLOR Color, float Z, DWORD Stencil) = 0; + virtual HRESULT WINAPI SetTransform(D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix) = 0; + virtual HRESULT WINAPI GetTransform(D3DTRANSFORMSTATETYPE State, D3DMATRIX *pMatrix) = 0; + virtual HRESULT WINAPI MultiplyTransform(D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix) = 0; + virtual HRESULT WINAPI SetViewport(const D3DVIEWPORT9 *pViewport) = 0; + virtual HRESULT WINAPI GetViewport(D3DVIEWPORT9 *pViewport) = 0; + virtual HRESULT WINAPI SetMaterial(const D3DMATERIAL9 *pMaterial) = 0; + virtual HRESULT WINAPI GetMaterial(D3DMATERIAL9 *pMaterial) = 0; + virtual HRESULT WINAPI SetLight(DWORD Index, const D3DLIGHT9 *pLight) = 0; + virtual HRESULT WINAPI GetLight(DWORD Index, D3DLIGHT9 *pLight) = 0; + virtual HRESULT WINAPI LightEnable(DWORD Index, BOOL Enable) = 0; + virtual HRESULT WINAPI GetLightEnable(DWORD Index, BOOL *pEnable) = 0; + virtual HRESULT WINAPI SetClipPlane(DWORD Index, const float *pPlane) = 0; + virtual HRESULT WINAPI GetClipPlane(DWORD Index, float *pPlane) = 0; + virtual HRESULT WINAPI SetRenderState(D3DRENDERSTATETYPE State, DWORD Value) = 0; + virtual HRESULT WINAPI GetRenderState(D3DRENDERSTATETYPE State, DWORD *pValue) = 0; + virtual HRESULT WINAPI CreateStateBlock(D3DSTATEBLOCKTYPE Type, IDirect3DStateBlock9 **ppSB) = 0; + virtual HRESULT WINAPI BeginStateBlock() = 0; + virtual HRESULT WINAPI EndStateBlock(IDirect3DStateBlock9 **ppSB) = 0; + virtual HRESULT WINAPI SetClipStatus(const D3DCLIPSTATUS9 *pClipStatus) = 0; + virtual HRESULT WINAPI GetClipStatus(D3DCLIPSTATUS9 *pClipStatus) = 0; + virtual HRESULT WINAPI GetTexture(DWORD Stage, IDirect3DBaseTexture9 **ppTexture) = 0; + virtual HRESULT WINAPI SetTexture(DWORD Stage, IDirect3DBaseTexture9 *pTexture) = 0; + virtual HRESULT WINAPI GetTextureStageState(DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD *pValue) = 0; + virtual HRESULT WINAPI SetTextureStageState(DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD Value) = 0; + virtual HRESULT WINAPI GetSamplerState(DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD *pValue) = 0; + virtual HRESULT WINAPI SetSamplerState(DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD Value) = 0; + virtual HRESULT WINAPI ValidateDevice(DWORD *pNumPasses) = 0; + virtual HRESULT WINAPI SetPaletteEntries(UINT PaletteNumber, const PALETTEENTRY *pEntries) = 0; + virtual HRESULT WINAPI GetPaletteEntries(UINT PaletteNumber, PALETTEENTRY *pEntries) = 0; + virtual HRESULT WINAPI SetCurrentTexturePalette(UINT PaletteNumber) = 0; + virtual HRESULT WINAPI GetCurrentTexturePalette(UINT *PaletteNumber) = 0; + virtual HRESULT WINAPI SetScissorRect(const RECT *pRect) = 0; + virtual HRESULT WINAPI GetScissorRect(RECT *pRect) = 0; + virtual HRESULT WINAPI SetSoftwareVertexProcessing(BOOL bSoftware) = 0; + virtual BOOL WINAPI GetSoftwareVertexProcessing() = 0; + virtual HRESULT WINAPI SetNPatchMode(float nSegments) = 0; + virtual float WINAPI GetNPatchMode() = 0; + virtual HRESULT WINAPI DrawPrimitive(D3DPRIMITIVETYPE PrimitiveType, UINT StartVertex, UINT PrimitiveCount) = 0; + virtual HRESULT WINAPI DrawIndexedPrimitive(D3DPRIMITIVETYPE PrimitiveType, INT BaseVertexIndex, UINT MinVertexIndex, UINT NumVertices, UINT startIndex, UINT primCount) = 0; + virtual HRESULT WINAPI DrawPrimitiveUP(D3DPRIMITIVETYPE PrimitiveType, UINT PrimitiveCount, const void *pVertexStreamZeroData, UINT VertexStreamZeroStride) = 0; + virtual HRESULT WINAPI DrawIndexedPrimitiveUP(D3DPRIMITIVETYPE PrimitiveType, UINT MinVertexIndex, UINT NumVertices, UINT PrimitiveCount, const void *pIndexData, D3DFORMAT IndexDataFormat, const void *pVertexStreamZeroData, UINT VertexStreamZeroStride) = 0; + virtual HRESULT WINAPI ProcessVertices(UINT SrcStartIndex, UINT DestIndex, UINT VertexCount, IDirect3DVertexBuffer9 *pDestBuffer, IDirect3DVertexDeclaration9 *pVertexDecl, DWORD Flags) = 0; + virtual HRESULT WINAPI CreateVertexDeclaration(const D3DVERTEXELEMENT9 *pVertexElements, IDirect3DVertexDeclaration9 **ppDecl) = 0; + virtual HRESULT WINAPI SetVertexDeclaration(IDirect3DVertexDeclaration9 *pDecl) = 0; + virtual HRESULT WINAPI GetVertexDeclaration(IDirect3DVertexDeclaration9 **ppDecl) = 0; + virtual HRESULT WINAPI SetFVF(DWORD FVF) = 0; + virtual HRESULT WINAPI GetFVF(DWORD *pFVF) = 0; + virtual HRESULT WINAPI CreateVertexShader(const DWORD *pFunction, IDirect3DVertexShader9 **ppShader) = 0; + virtual HRESULT WINAPI SetVertexShader(IDirect3DVertexShader9 *pShader) = 0; + virtual HRESULT WINAPI GetVertexShader(IDirect3DVertexShader9 **ppShader) = 0; + virtual HRESULT WINAPI SetVertexShaderConstantF(UINT StartRegister, const float *pConstantData, UINT Vector4fCount) = 0; + virtual HRESULT WINAPI GetVertexShaderConstantF(UINT StartRegister, float *pConstantData, UINT Vector4fCount) = 0; + virtual HRESULT WINAPI SetVertexShaderConstantI(UINT StartRegister, const int *pConstantData, UINT Vector4iCount) = 0; + virtual HRESULT WINAPI GetVertexShaderConstantI(UINT StartRegister, int *pConstantData, UINT Vector4iCount) = 0; + virtual HRESULT WINAPI SetVertexShaderConstantB(UINT StartRegister, const BOOL *pConstantData, UINT BoolCount) = 0; + virtual HRESULT WINAPI GetVertexShaderConstantB(UINT StartRegister, BOOL *pConstantData, UINT BoolCount) = 0; + virtual HRESULT WINAPI SetStreamSource(UINT StreamNumber, IDirect3DVertexBuffer9 *pStreamData, UINT OffsetInBytes, UINT Stride) = 0; + virtual HRESULT WINAPI GetStreamSource(UINT StreamNumber, IDirect3DVertexBuffer9 **ppStreamData, UINT *pOffsetInBytes, UINT *pStride) = 0; + virtual HRESULT WINAPI SetStreamSourceFreq(UINT StreamNumber, UINT Setting) = 0; + virtual HRESULT WINAPI GetStreamSourceFreq(UINT StreamNumber, UINT *pSetting) = 0; + virtual HRESULT WINAPI SetIndices(IDirect3DIndexBuffer9 *pIndexData) = 0; + virtual HRESULT WINAPI GetIndices(IDirect3DIndexBuffer9 **ppIndexData) = 0; + virtual HRESULT WINAPI CreatePixelShader(const DWORD *pFunction, IDirect3DPixelShader9 **ppShader) = 0; + virtual HRESULT WINAPI SetPixelShader(IDirect3DPixelShader9 *pShader) = 0; + virtual HRESULT WINAPI GetPixelShader(IDirect3DPixelShader9 **ppShader) = 0; + virtual HRESULT WINAPI SetPixelShaderConstantF(UINT StartRegister, const float *pConstantData, UINT Vector4fCount) = 0; + virtual HRESULT WINAPI GetPixelShaderConstantF(UINT StartRegister, float *pConstantData, UINT Vector4fCount) = 0; + virtual HRESULT WINAPI SetPixelShaderConstantI(UINT StartRegister, const int *pConstantData, UINT Vector4iCount) = 0; + virtual HRESULT WINAPI GetPixelShaderConstantI(UINT StartRegister, int *pConstantData, UINT Vector4iCount) = 0; + virtual HRESULT WINAPI SetPixelShaderConstantB(UINT StartRegister, const BOOL *pConstantData, UINT BoolCount) = 0; + virtual HRESULT WINAPI GetPixelShaderConstantB(UINT StartRegister, BOOL *pConstantData, UINT BoolCount) = 0; + virtual HRESULT WINAPI DrawRectPatch(UINT Handle, const float *pNumSegs, const D3DRECTPATCH_INFO *pRectPatchInfo) = 0; + virtual HRESULT WINAPI DrawTriPatch(UINT Handle, const float *pNumSegs, const D3DTRIPATCH_INFO *pTriPatchInfo) = 0; + virtual HRESULT WINAPI DeletePatch(UINT Handle) = 0; + virtual HRESULT WINAPI CreateQuery(D3DQUERYTYPE Type, IDirect3DQuery9 **ppQuery) = 0; +}; + +/*** IUnknown methods ***/ +#define IDirect3DDevice9_QueryInterface(p,a,b) (p)->QueryInterface(a,b) +#define IDirect3DDevice9_AddRef(p) (p)->AddRef() +#define IDirect3DDevice9_Release(p) (p)->Release() +/*** IDirect3DDevice9 methods ***/ +#define IDirect3DDevice9_TestCooperativeLevel(p) (p)->TestCooperativeLevel() +#define IDirect3DDevice9_GetAvailableTextureMem(p) (p)->GetAvailableTextureMem() +#define IDirect3DDevice9_EvictManagedResources(p) (p)->EvictManagedResources() +#define IDirect3DDevice9_GetDirect3D(p,a) (p)->GetDirect3D(a) +#define IDirect3DDevice9_GetDeviceCaps(p,a) (p)->GetDeviceCaps(a) +#define IDirect3DDevice9_GetDisplayMode(p,a,b) (p)->GetDisplayMode(a,b) +#define IDirect3DDevice9_GetCreationParameters(p,a) (p)->GetCreationParameters(a) +#define IDirect3DDevice9_SetCursorProperties(p,a,b,c) (p)->SetCursorProperties(a,b,c) +#define IDirect3DDevice9_SetCursorPosition(p,a,b,c) (p)->SetCursorPosition(a,b,c) +#define IDirect3DDevice9_ShowCursor(p,a) (p)->ShowCursor(a) +#define IDirect3DDevice9_CreateAdditionalSwapChain(p,a,b) (p)->CreateAdditionalSwapChain(a,b) +#define IDirect3DDevice9_GetSwapChain(p,a,b) (p)->GetSwapChain(a,b) +#define IDirect3DDevice9_GetNumberOfSwapChains(p) (p)->GetNumberOfSwapChains() +#define IDirect3DDevice9_Reset(p,a) (p)->Reset(a) +#define IDirect3DDevice9_Present(p,a,b,c,d) (p)->Present(a,b,c,d) +#define IDirect3DDevice9_GetBackBuffer(p,a,b,c,d) (p)->GetBackBuffer(a,b,c,d) +#define IDirect3DDevice9_GetRasterStatus(p,a,b) (p)->GetRasterStatus(a,b) +#define IDirect3DDevice9_SetDialogBoxMode(p,a) (p)->SetDialogBoxMode(a) +#define IDirect3DDevice9_SetGammaRamp(p,a,b,c) (p)->SetGammaRamp(a,b,c) +#define IDirect3DDevice9_GetGammaRamp(p,a,b) (p)->GetGammaRamp(a,b) +#define IDirect3DDevice9_CreateTexture(p,a,b,c,d,e,f,g,h) (p)->CreateTexture(a,b,c,d,e,f,g,h) +#define IDirect3DDevice9_CreateVolumeTexture(p,a,b,c,d,e,f,g,h,i) (p)->CreateVolumeTexture(a,b,c,d,e,f,g,h,i) +#define IDirect3DDevice9_CreateCubeTexture(p,a,b,c,d,e,f,g) (p)->CreateCubeTexture(a,b,c,d,e,f,g) +#define IDirect3DDevice9_CreateVertexBuffer(p,a,b,c,d,e,f) (p)->CreateVertexBuffer(a,b,c,d,e,f) +#define IDirect3DDevice9_CreateIndexBuffer(p,a,b,c,d,e,f) (p)->CreateIndexBuffer(a,b,c,d,e,f) +#define IDirect3DDevice9_CreateRenderTarget(p,a,b,c,d,e,f,g,h) (p)->CreateRenderTarget(a,b,c,d,e,f,g,h) +#define IDirect3DDevice9_CreateDepthStencilSurface(p,a,b,c,d,e,f,g,h) (p)->CreateDepthStencilSurface(a,b,c,d,e,f,g,h) +#define IDirect3DDevice9_UpdateSurface(p,a,b,c,d) (p)->UpdateSurface(a,b,c,d) +#define IDirect3DDevice9_UpdateTexture(p,a,b) (p)->UpdateTexture(a,b) +#define IDirect3DDevice9_GetRenderTargetData(p,a,b) (p)->GetRenderTargetData(a,b) +#define IDirect3DDevice9_GetFrontBufferData(p,a,b) (p)->GetFrontBufferData(a,b) +#define IDirect3DDevice9_StretchRect(p,a,b,c,d,e) (p)->StretchRect(a,b,c,d,e) +#define IDirect3DDevice9_ColorFill(p,a,b,c) (p)->ColorFill(a,b,c) +#define IDirect3DDevice9_CreateOffscreenPlainSurface(p,a,b,c,d,e,f) (p)->CreateOffscreenPlainSurface(a,b,c,d,e,f) +#define IDirect3DDevice9_SetRenderTarget(p,a,b) (p)->SetRenderTarget(a,b) +#define IDirect3DDevice9_GetRenderTarget(p,a,b) (p)->GetRenderTarget(a,b) +#define IDirect3DDevice9_SetDepthStencilSurface(p,a) (p)->SetDepthStencilSurface(a) +#define IDirect3DDevice9_GetDepthStencilSurface(p,a) (p)->GetDepthStencilSurface(a) +#define IDirect3DDevice9_BeginScene(p) (p)->BeginScene() +#define IDirect3DDevice9_EndScene(p) (p)->EndScene() +#define IDirect3DDevice9_Clear(p,a,b,c,d,e,f) (p)->Clear(a,b,c,d,e,f) +#define IDirect3DDevice9_SetTransform(p,a,b) (p)->SetTransform(a,b) +#define IDirect3DDevice9_GetTransform(p,a,b) (p)->GetTransform(a,b) +#define IDirect3DDevice9_MultiplyTransform(p,a,b) (p)->MultiplyTransform(a,b) +#define IDirect3DDevice9_SetViewport(p,a) (p)->SetViewport(a) +#define IDirect3DDevice9_GetViewport(p,a) (p)->GetViewport(a) +#define IDirect3DDevice9_SetMaterial(p,a) (p)->SetMaterial(a) +#define IDirect3DDevice9_GetMaterial(p,a) (p)->GetMaterial(a) +#define IDirect3DDevice9_SetLight(p,a,b) (p)->SetLight(a,b) +#define IDirect3DDevice9_GetLight(p,a,b) (p)->GetLight(a,b) +#define IDirect3DDevice9_LightEnable(p,a,b) (p)->LightEnable(a,b) +#define IDirect3DDevice9_GetLightEnable(p,a,b) (p)->GetLightEnable(a,b) +#define IDirect3DDevice9_SetClipPlane(p,a,b) (p)->SetClipPlane(a,b) +#define IDirect3DDevice9_GetClipPlane(p,a,b) (p)->GetClipPlane(a,b) +#define IDirect3DDevice9_SetRenderState(p,a,b) (p)->SetRenderState(a,b) +#define IDirect3DDevice9_GetRenderState(p,a,b) (p)->GetRenderState(a,b) +#define IDirect3DDevice9_CreateStateBlock(p,a,b) (p)->CreateStateBlock(a,b) +#define IDirect3DDevice9_BeginStateBlock(p) (p)->BeginStateBlock() +#define IDirect3DDevice9_EndStateBlock(p,a) (p)->EndStateBlock(a) +#define IDirect3DDevice9_SetClipStatus(p,a) (p)->SetClipStatus(a) +#define IDirect3DDevice9_GetClipStatus(p,a) (p)->GetClipStatus(a) +#define IDirect3DDevice9_GetTexture(p,a,b) (p)->GetTexture(a,b) +#define IDirect3DDevice9_SetTexture(p,a,b) (p)->SetTexture(a,b) +#define IDirect3DDevice9_GetTextureStageState(p,a,b,c) (p)->GetTextureStageState(a,b,c) +#define IDirect3DDevice9_SetTextureStageState(p,a,b,c) (p)->SetTextureStageState(a,b,c) +#define IDirect3DDevice9_GetSamplerState(p,a,b,c) (p)->GetSamplerState(a,b,c) +#define IDirect3DDevice9_SetSamplerState(p,a,b,c) (p)->SetSamplerState(a,b,c) +#define IDirect3DDevice9_ValidateDevice(p,a) (p)->ValidateDevice(a) +#define IDirect3DDevice9_SetPaletteEntries(p,a,b) (p)->SetPaletteEntries(a,b) +#define IDirect3DDevice9_GetPaletteEntries(p,a,b) (p)->GetPaletteEntries(a,b) +#define IDirect3DDevice9_SetCurrentTexturePalette(p,a) (p)->SetCurrentTexturePalette(a) +#define IDirect3DDevice9_GetCurrentTexturePalette(p,a) (p)->GetCurrentTexturePalette(a) +#define IDirect3DDevice9_SetScissorRect(p,a) (p)->SetScissorRect(a) +#define IDirect3DDevice9_GetScissorRect(p,a) (p)->GetScissorRect(a) +#define IDirect3DDevice9_SetSoftwareVertexProcessing(p,a) (p)->SetSoftwareVertexProcessing(a) +#define IDirect3DDevice9_GetSoftwareVertexProcessing(p) (p)->GetSoftwareVertexProcessing() +#define IDirect3DDevice9_SetNPatchMode(p,a) (p)->SetNPatchMode(a) +#define IDirect3DDevice9_GetNPatchMode(p) (p)->GetNPatchMode() +#define IDirect3DDevice9_DrawPrimitive(p,a,b,c) (p)->DrawPrimitive(a,b,c) +#define IDirect3DDevice9_DrawIndexedPrimitive(p,a,b,c,d,e,f) (p)->DrawIndexedPrimitive(a,b,c,d,e,f) +#define IDirect3DDevice9_DrawPrimitiveUP(p,a,b,c,d) (p)->DrawPrimitiveUP(a,b,c,d) +#define IDirect3DDevice9_DrawIndexedPrimitiveUP(p,a,b,c,d,e,f,g,h) (p)->DrawIndexedPrimitiveUP(a,b,c,d,e,f,g,h) +#define IDirect3DDevice9_ProcessVertices(p,a,b,c,d,e,f) (p)->ProcessVertices(a,b,c,d,e,f) +#define IDirect3DDevice9_CreateVertexDeclaration(p,a,b) (p)->CreateVertexDeclaration(a,b) +#define IDirect3DDevice9_SetVertexDeclaration(p,a) (p)->SetVertexDeclaration(a) +#define IDirect3DDevice9_GetVertexDeclaration(p,a) (p)->GetVertexDeclaration(a) +#define IDirect3DDevice9_SetFVF(p,a) (p)->SetFVF(a) +#define IDirect3DDevice9_GetFVF(p,a) (p)->GetFVF(a) +#define IDirect3DDevice9_CreateVertexShader(p,a,b) (p)->CreateVertexShader(a,b) +#define IDirect3DDevice9_SetVertexShader(p,a) (p)->SetVertexShader(a) +#define IDirect3DDevice9_GetVertexShader(p,a) (p)->GetVertexShader(a) +#define IDirect3DDevice9_SetVertexShaderConstantF(p,a,b,c) (p)->SetVertexShaderConstantF(a,b,c) +#define IDirect3DDevice9_GetVertexShaderConstantF(p,a,b,c) (p)->GetVertexShaderConstantF(a,b,c) +#define IDirect3DDevice9_SetVertexShaderConstantI(p,a,b,c) (p)->SetVertexShaderConstantI(a,b,c) +#define IDirect3DDevice9_GetVertexShaderConstantI(p,a,b,c) (p)->GetVertexShaderConstantI(a,b,c) +#define IDirect3DDevice9_SetVertexShaderConstantB(p,a,b,c) (p)->SetVertexShaderConstantB(a,b,c) +#define IDirect3DDevice9_GetVertexShaderConstantB(p,a,b,c) (p)->GetVertexShaderConstantB(a,b,c) +#define IDirect3DDevice9_SetStreamSource(p,a,b,c,d) (p)->SetStreamSource(a,b,c,d) +#define IDirect3DDevice9_GetStreamSource(p,a,b,c,d) (p)->GetStreamSource(a,b,c,d) +#define IDirect3DDevice9_SetStreamSourceFreq(p,a,b) (p)->SetStreamSourceFreq(a,b) +#define IDirect3DDevice9_GetStreamSourceFreq(p,a,b) (p)->GetStreamSourceFreq(a,b) +#define IDirect3DDevice9_SetIndices(p,a) (p)->SetIndices(a) +#define IDirect3DDevice9_GetIndices(p,a) (p)->GetIndices(a) +#define IDirect3DDevice9_CreatePixelShader(p,a,b) (p)->CreatePixelShader(a,b) +#define IDirect3DDevice9_SetPixelShader(p,a) (p)->SetPixelShader(a) +#define IDirect3DDevice9_GetPixelShader(p,a) (p)->GetPixelShader(a) +#define IDirect3DDevice9_SetPixelShaderConstantF(p,a,b,c) (p)->SetPixelShaderConstantF(a,b,c) +#define IDirect3DDevice9_GetPixelShaderConstantF(p,a,b,c) (p)->GetPixelShaderConstantF(a,b,c) +#define IDirect3DDevice9_SetPixelShaderConstantI(p,a,b,c) (p)->SetPixelShaderConstantI(a,b,c) +#define IDirect3DDevice9_GetPixelShaderConstantI(p,a,b,c) (p)->GetPixelShaderConstantI(a,b,c) +#define IDirect3DDevice9_SetPixelShaderConstantB(p,a,b,c) (p)->SetPixelShaderConstantB(a,b,c) +#define IDirect3DDevice9_GetPixelShaderConstantB(p,a,b,c) (p)->GetPixelShaderConstantB(a,b,c) +#define IDirect3DDevice9_DrawRectPatch(p,a,b,c) (p)->DrawRectPatch(a,b,c) +#define IDirect3DDevice9_DrawTriPatch(p,a,b,c) (p)->DrawTriPatch(a,b,c) +#define IDirect3DDevice9_DeletePatch(p,a) (p)->DeletePatch(a) +#define IDirect3DDevice9_CreateQuery(p,a,b) (p)->CreateQuery(a,b) + +struct IDirect3DDevice9Ex : public IDirect3DDevice9 +{ + virtual HRESULT WINAPI SetConvolutionMonoKernel(UINT width, UINT height, float *rows, float *columns) = 0; + virtual HRESULT WINAPI ComposeRects(IDirect3DSurface9 *pSrc, IDirect3DSurface9 *pDst, IDirect3DVertexBuffer9 *pSrcRectDescs, UINT NumRects, IDirect3DVertexBuffer9 *pDstRectDescs, D3DCOMPOSERECTSOP Operation, int Xoffset, int Yoffset) = 0; + virtual HRESULT WINAPI PresentEx(const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion, DWORD dwFlags) = 0; + virtual HRESULT WINAPI GetGPUThreadPriority(INT *pPriority) = 0; + virtual HRESULT WINAPI SetGPUThreadPriority(INT Priority) = 0; + virtual HRESULT WINAPI WaitForVBlank(UINT iSwapChain) = 0; + virtual HRESULT WINAPI CheckResourceResidency(IDirect3DResource9 **pResourceArray, UINT32 NumResources) = 0; + virtual HRESULT WINAPI SetMaximumFrameLatency(UINT MaxLatency) = 0; + virtual HRESULT WINAPI GetMaximumFrameLatency(UINT *pMaxLatency) = 0; + virtual HRESULT WINAPI CheckDeviceState(HWND hDestinationWindow) = 0; + virtual HRESULT WINAPI CreateRenderTargetEx(UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Lockable, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle, DWORD Usage) = 0; + virtual HRESULT WINAPI CreateOffscreenPlainSurfaceEx(UINT Width, UINT Height, D3DFORMAT Format, D3DPOOL Pool, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle, DWORD Usage) = 0; + virtual HRESULT WINAPI CreateDepthStencilSurfaceEx(UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Discard, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle, DWORD Usage) = 0; + virtual HRESULT WINAPI ResetEx(D3DPRESENT_PARAMETERS *pPresentationParameters, D3DDISPLAYMODEEX *pFullscreenDisplayMode) = 0; + virtual HRESULT WINAPI GetDisplayModeEx(UINT iSwapChain, D3DDISPLAYMODEEX *pMode, D3DDISPLAYROTATION *pRotation) = 0; +}; + +struct IDirect3DDevice9Video : public IUnknown +{ + virtual HRESULT WINAPI GetContentProtectionCaps(const GUID *pCryptoType, const GUID *pDecodeProfile, D3DCONTENTPROTECTIONCAPS *pCaps) = 0; + virtual HRESULT WINAPI CreateAuthenticatedChannel(D3DAUTHENTICATEDCHANNELTYPE ChannelType, IDirect3DAuthenticatedChannel9 **ppAuthenticatedChannel, HANDLE *pChannelHandle) = 0; + virtual HRESULT WINAPI CreateCryptoSession(const GUID *pCryptoType, const GUID *pDecodeProfile, IDirect3DCryptoSession9 **ppCryptoSession, HANDLE *pCryptoHandle) = 0; +}; + +struct IDirect3DIndexBuffer9 : public IDirect3DResource9 +{ + virtual HRESULT WINAPI Lock(UINT OffsetToLock, UINT SizeToLock, void **ppbData, DWORD Flags) = 0; + virtual HRESULT WINAPI Unlock() = 0; + virtual HRESULT WINAPI GetDesc(D3DINDEXBUFFER_DESC *pDesc) = 0; +}; + +struct IDirect3DPixelShader9 : public IUnknown +{ + virtual HRESULT WINAPI GetDevice(IDirect3DDevice9 **ppDevice) = 0; + virtual HRESULT WINAPI GetFunction(void *pData, UINT *pSizeOfData) = 0; +}; + +struct IDirect3DQuery9 : public IUnknown +{ + virtual HRESULT WINAPI GetDevice(IDirect3DDevice9 **ppDevice) = 0; + virtual D3DQUERYTYPE WINAPI GetType() = 0; + virtual DWORD WINAPI GetDataSize() = 0; + virtual HRESULT WINAPI Issue(DWORD dwIssueFlags) = 0; + virtual HRESULT WINAPI GetData(void *pData, DWORD dwSize, DWORD dwGetDataFlags) = 0; +}; + +struct IDirect3DStateBlock9 : public IUnknown +{ + virtual HRESULT WINAPI GetDevice(IDirect3DDevice9 **ppDevice) = 0; + virtual HRESULT WINAPI Capture() = 0; + virtual HRESULT WINAPI Apply() = 0; +}; + +struct IDirect3DSurface9 : public IDirect3DResource9 +{ + virtual HRESULT WINAPI GetContainer(REFIID riid, void **ppContainer) = 0; + virtual HRESULT WINAPI GetDesc(D3DSURFACE_DESC *pDesc) = 0; + virtual HRESULT WINAPI LockRect(D3DLOCKED_RECT *pLockedRect, const RECT *pRect, DWORD Flags) = 0; + virtual HRESULT WINAPI UnlockRect() = 0; + virtual HRESULT WINAPI GetDC(HDC *phdc) = 0; + virtual HRESULT WINAPI ReleaseDC(HDC hdc) = 0; +}; + +/*** IUnknown methods ***/ +#define IDirect3DSurface9_QueryInterface(p,a,b) (p)->QueryInterface(a,b) +#define IDirect3DSurface9_AddRef(p) (p)->AddRef() +#define IDirect3DSurface9_Release(p) (p)->Release() +/*** IDirect3DSurface9 methods: IDirect3DResource9 ***/ +#define IDirect3DSurface9_GetDevice(p,a) (p)->GetDevice(a) +#define IDirect3DSurface9_SetPrivateData(p,a,b,c,d) (p)->SetPrivateData(a,b,c,d) +#define IDirect3DSurface9_GetPrivateData(p,a,b,c) (p)->GetPrivateData(a,b,c) +#define IDirect3DSurface9_FreePrivateData(p,a) (p)->FreePrivateData(a) +#define IDirect3DSurface9_SetPriority(p,a) (p)->SetPriority(a) +#define IDirect3DSurface9_GetPriority(p) (p)->GetPriority() +#define IDirect3DSurface9_PreLoad(p) (p)->PreLoad() +#define IDirect3DSurface9_GetType(p) (p)->GetType() +/*** IDirect3DSurface9 methods ***/ +#define IDirect3DSurface9_GetContainer(p,a,b) (p)->GetContainer(a,b) +#define IDirect3DSurface9_GetDesc(p,a) (p)->GetDesc(a) +#define IDirect3DSurface9_LockRect(p,a,b,c) (p)->LockRect(a,b,c) +#define IDirect3DSurface9_UnlockRect(p) (p)->UnlockRect() +#define IDirect3DSurface9_GetDC(p,a) (p)->GetDC(a) +#define IDirect3DSurface9_ReleaseDC(p,a) (p)->ReleaseDC(a) + +struct IDirect3DSwapChain9 : public IUnknown +{ + virtual HRESULT WINAPI Present(const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion, DWORD dwFlags) = 0; + virtual HRESULT WINAPI GetFrontBufferData(IDirect3DSurface9 *pDestSurface) = 0; + virtual HRESULT WINAPI GetBackBuffer(UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, IDirect3DSurface9 **ppBackBuffer) = 0; + virtual HRESULT WINAPI GetRasterStatus(D3DRASTER_STATUS *pRasterStatus) = 0; + virtual HRESULT WINAPI GetDisplayMode(D3DDISPLAYMODE *pMode) = 0; + virtual HRESULT WINAPI GetDevice(IDirect3DDevice9 **ppDevice) = 0; + virtual HRESULT WINAPI GetPresentParameters(D3DPRESENT_PARAMETERS *pPresentationParameters) = 0; +}; + +struct IDirect3DSwapChain9Ex : public IDirect3DSwapChain9 +{ + virtual HRESULT WINAPI GetLastPresentCount(UINT *pLastPresentCount) = 0; + virtual HRESULT WINAPI GetPresentStats(D3DPRESENTSTATS *pPresentationStatistics) = 0; + virtual HRESULT WINAPI GetDisplayModeEx(D3DDISPLAYMODEEX *pMode, D3DDISPLAYROTATION *pRotation) = 0; +}; + +struct IDirect3DTexture9 : public IDirect3DBaseTexture9 +{ + virtual HRESULT WINAPI GetLevelDesc(UINT Level, D3DSURFACE_DESC *pDesc) = 0; + virtual HRESULT WINAPI GetSurfaceLevel(UINT Level, IDirect3DSurface9 **ppSurfaceLevel) = 0; + virtual HRESULT WINAPI LockRect(UINT Level, D3DLOCKED_RECT *pLockedRect, const RECT *pRect, DWORD Flags) = 0; + virtual HRESULT WINAPI UnlockRect(UINT Level) = 0; + virtual HRESULT WINAPI AddDirtyRect(const RECT *pDirtyRect) = 0; +}; + +struct IDirect3DVertexBuffer9 : public IDirect3DResource9 +{ + virtual HRESULT WINAPI Lock(UINT OffsetToLock, UINT SizeToLock, void **ppbData, DWORD Flags) = 0; + virtual HRESULT WINAPI Unlock() = 0; + virtual HRESULT WINAPI GetDesc(D3DVERTEXBUFFER_DESC *pDesc) = 0; +}; + +struct IDirect3DVertexDeclaration9 : public IUnknown +{ + virtual HRESULT WINAPI GetDevice(IDirect3DDevice9 **ppDevice) = 0; + virtual HRESULT WINAPI GetDeclaration(D3DVERTEXELEMENT9 *pElement, UINT *pNumElements) = 0; +}; + +struct IDirect3DVertexShader9 : public IUnknown +{ + virtual HRESULT WINAPI GetDevice(IDirect3DDevice9 **ppDevice) = 0; + virtual HRESULT WINAPI GetFunction(void *pData, UINT *pSizeOfData) = 0; +}; + +struct IDirect3DVolume9 : public IUnknown +{ + virtual HRESULT WINAPI GetDevice(IDirect3DDevice9 **ppDevice) = 0; + virtual HRESULT WINAPI SetPrivateData(REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags) = 0; + virtual HRESULT WINAPI GetPrivateData(REFGUID refguid, void *pData, DWORD *pSizeOfData) = 0; + virtual HRESULT WINAPI FreePrivateData(REFGUID refguid) = 0; + virtual HRESULT WINAPI GetContainer(REFIID riid, void **ppContainer) = 0; + virtual HRESULT WINAPI GetDesc(D3DVOLUME_DESC *pDesc) = 0; + virtual HRESULT WINAPI LockBox(D3DLOCKED_BOX *pLockedVolume, const D3DBOX *pBox, DWORD Flags) = 0; + virtual HRESULT WINAPI UnlockBox() = 0; +}; + +struct IDirect3DVolumeTexture9 : public IDirect3DBaseTexture9 +{ + virtual HRESULT WINAPI GetLevelDesc(UINT Level, D3DVOLUME_DESC *pDesc) = 0; + virtual HRESULT WINAPI GetVolumeLevel(UINT Level, IDirect3DVolume9 **ppVolumeLevel) = 0; + virtual HRESULT WINAPI LockBox(UINT Level, D3DLOCKED_BOX *pLockedVolume, const D3DBOX *pBox, DWORD Flags) = 0; + virtual HRESULT WINAPI UnlockBox(UINT Level) = 0; + virtual HRESULT WINAPI AddDirtyBox(const D3DBOX *pDirtyBox) = 0; +}; + + +#else /* __cplusplus */ + +extern const GUID IID_IDirect3D9; +extern const GUID IID_IDirect3D9Ex; +extern const GUID IID_IDirect3D9ExOverlayExtension; +extern const GUID IID_IDirect3DAuthenticatedChannel9; +extern const GUID IID_IDirect3DBaseTexture9; +extern const GUID IID_IDirect3DCryptoSession9; +extern const GUID IID_IDirect3DCubeTexture9; +extern const GUID IID_IDirect3DDevice9; +extern const GUID IID_IDirect3DDevice9Ex; +extern const GUID IID_IDirect3DDevice9Video; +extern const GUID IID_IDirect3DIndexBuffer9; +extern const GUID IID_IDirect3DPixelShader9; +extern const GUID IID_IDirect3DQuery9; +extern const GUID IID_IDirect3DResource9; +extern const GUID IID_IDirect3DStateBlock9; +extern const GUID IID_IDirect3DSurface9; +extern const GUID IID_IDirect3DSwapChain9; +extern const GUID IID_IDirect3DSwapChain9Ex; +extern const GUID IID_IDirect3DTexture9; +extern const GUID IID_IDirect3DVertexBuffer9; +extern const GUID IID_IDirect3DVertexDeclaration9; +extern const GUID IID_IDirect3DVertexShader9; +extern const GUID IID_IDirect3DVolume9; +extern const GUID IID_IDirect3DVolumeTexture9; + +typedef struct IDirect3D9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3D9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3D9 *This); + ULONG (WINAPI *Release)(IDirect3D9 *This); + /* IDirect3D9 */ + HRESULT (WINAPI *RegisterSoftwareDevice)(IDirect3D9 *This, void *pInitializeFunction); + UINT (WINAPI *GetAdapterCount)(IDirect3D9 *This); + HRESULT (WINAPI *GetAdapterIdentifier)(IDirect3D9 *This, UINT Adapter, DWORD Flags, D3DADAPTER_IDENTIFIER9 *pIdentifier); + UINT (WINAPI *GetAdapterModeCount)(IDirect3D9 *This, UINT Adapter, D3DFORMAT Format); + HRESULT (WINAPI *EnumAdapterModes)(IDirect3D9 *This, UINT Adapter, D3DFORMAT Format, UINT Mode, D3DDISPLAYMODE *pMode); + HRESULT (WINAPI *GetAdapterDisplayMode)(IDirect3D9 *This, UINT Adapter, D3DDISPLAYMODE *pMode); + HRESULT (WINAPI *CheckDeviceType)(IDirect3D9 *This, UINT Adapter, D3DDEVTYPE DevType, D3DFORMAT AdapterFormat, D3DFORMAT BackBufferFormat, BOOL bWindowed); + HRESULT (WINAPI *CheckDeviceFormat)(IDirect3D9 *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT AdapterFormat, DWORD Usage, D3DRESOURCETYPE RType, D3DFORMAT CheckFormat); + HRESULT (WINAPI *CheckDeviceMultiSampleType)(IDirect3D9 *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT SurfaceFormat, BOOL Windowed, D3DMULTISAMPLE_TYPE MultiSampleType, DWORD *pQualityLevels); + HRESULT (WINAPI *CheckDepthStencilMatch)(IDirect3D9 *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT AdapterFormat, D3DFORMAT RenderTargetFormat, D3DFORMAT DepthStencilFormat); + HRESULT (WINAPI *CheckDeviceFormatConversion)(IDirect3D9 *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT SourceFormat, D3DFORMAT TargetFormat); + HRESULT (WINAPI *GetDeviceCaps)(IDirect3D9 *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DCAPS9 *pCaps); + HMONITOR (WINAPI *GetAdapterMonitor)(IDirect3D9 *This, UINT Adapter); + HRESULT (WINAPI *CreateDevice)(IDirect3D9 *This, UINT Adapter, D3DDEVTYPE DeviceType, HWND hFocusWindow, DWORD BehaviorFlags, D3DPRESENT_PARAMETERS *pPresentationParameters, IDirect3DDevice9 **ppReturnedDeviceInterface); +} IDirect3D9Vtbl; +struct IDirect3D9 +{ + IDirect3D9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3D9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3D9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3D9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3D9 macros */ +#define IDirect3D9_RegisterSoftwareDevice(p,a) (p)->lpVtbl->RegisterSoftwareDevice(p,a) +#define IDirect3D9_GetAdapterCount(p) (p)->lpVtbl->GetAdapterCount(p) +#define IDirect3D9_GetAdapterIdentifier(p,a,b,c) (p)->lpVtbl->GetAdapterIdentifier(p,a,b,c) +#define IDirect3D9_GetAdapterModeCount(p,a,b) (p)->lpVtbl->GetAdapterModeCount(p,a,b) +#define IDirect3D9_EnumAdapterModes(p,a,b,c,d) (p)->lpVtbl->EnumAdapterModes(p,a,b,c,d) +#define IDirect3D9_GetAdapterDisplayMode(p,a,b) (p)->lpVtbl->GetAdapterDisplayMode(p,a,b) +#define IDirect3D9_CheckDeviceType(p,a,b,c,d,e) (p)->lpVtbl->CheckDeviceType(p,a,b,c,d,e) +#define IDirect3D9_CheckDeviceFormat(p,a,b,c,d,e,f) (p)->lpVtbl->CheckDeviceFormat(p,a,b,c,d,e,f) +#define IDirect3D9_CheckDeviceMultiSampleType(p,a,b,c,d,e,f) (p)->lpVtbl->CheckDeviceMultiSampleType(p,a,b,c,d,e,f) +#define IDirect3D9_CheckDepthStencilMatch(p,a,b,c,d,e) (p)->lpVtbl->CheckDepthStencilMatch(p,a,b,c,d,e) +#define IDirect3D9_CheckDeviceFormatConversion(p,a,b,c,d) (p)->lpVtbl->CheckDeviceFormatConversion(p,a,b,c,d) +#define IDirect3D9_GetDeviceCaps(p,a,b,c) (p)->lpVtbl->GetDeviceCaps(p,a,b,c) +#define IDirect3D9_GetAdapterMonitor(p,a) (p)->lpVtbl->GetAdapterMonitor(p,a) +#define IDirect3D9_CreateDevice(p,a,b,c,d,e,f) (p)->lpVtbl->CreateDevice(p,a,b,c,d,e,f) + +typedef struct IDirect3D9ExVtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3D9Ex *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3D9Ex *This); + ULONG (WINAPI *Release)(IDirect3D9Ex *This); + /* IDirect3D9 */ + HRESULT (WINAPI *RegisterSoftwareDevice)(IDirect3D9Ex *This, void *pInitializeFunction); + UINT (WINAPI *GetAdapterCount)(IDirect3D9Ex *This); + HRESULT (WINAPI *GetAdapterIdentifier)(IDirect3D9Ex *This, UINT Adapter, DWORD Flags, D3DADAPTER_IDENTIFIER9 *pIdentifier); + UINT (WINAPI *GetAdapterModeCount)(IDirect3D9Ex *This, UINT Adapter, D3DFORMAT Format); + HRESULT (WINAPI *EnumAdapterModes)(IDirect3D9Ex *This, UINT Adapter, D3DFORMAT Format, UINT Mode, D3DDISPLAYMODE *pMode); + HRESULT (WINAPI *GetAdapterDisplayMode)(IDirect3D9Ex *This, UINT Adapter, D3DDISPLAYMODE *pMode); + HRESULT (WINAPI *CheckDeviceType)(IDirect3D9Ex *This, UINT Adapter, D3DDEVTYPE DevType, D3DFORMAT AdapterFormat, D3DFORMAT BackBufferFormat, BOOL bWindowed); + HRESULT (WINAPI *CheckDeviceFormat)(IDirect3D9Ex *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT AdapterFormat, DWORD Usage, D3DRESOURCETYPE RType, D3DFORMAT CheckFormat); + HRESULT (WINAPI *CheckDeviceMultiSampleType)(IDirect3D9Ex *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT SurfaceFormat, BOOL Windowed, D3DMULTISAMPLE_TYPE MultiSampleType, DWORD *pQualityLevels); + HRESULT (WINAPI *CheckDepthStencilMatch)(IDirect3D9Ex *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT AdapterFormat, D3DFORMAT RenderTargetFormat, D3DFORMAT DepthStencilFormat); + HRESULT (WINAPI *CheckDeviceFormatConversion)(IDirect3D9Ex *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT SourceFormat, D3DFORMAT TargetFormat); + HRESULT (WINAPI *GetDeviceCaps)(IDirect3D9Ex *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DCAPS9 *pCaps); + HMONITOR (WINAPI *GetAdapterMonitor)(IDirect3D9Ex *This, UINT Adapter); + HRESULT (WINAPI *CreateDevice)(IDirect3D9Ex *This, UINT Adapter, D3DDEVTYPE DeviceType, HWND hFocusWindow, DWORD BehaviorFlags, D3DPRESENT_PARAMETERS *pPresentationParameters, IDirect3DDevice9 **ppReturnedDeviceInterface); + /* IDirect3D9Ex */ + UINT (WINAPI *GetAdapterModeCountEx)(IDirect3D9Ex *This, UINT Adapter, const D3DDISPLAYMODEFILTER *pFilter); + HRESULT (WINAPI *EnumAdapterModesEx)(IDirect3D9Ex *This, UINT Adapter, const D3DDISPLAYMODEFILTER *pFilter, UINT Mode, D3DDISPLAYMODEEX *pMode); + HRESULT (WINAPI *GetAdapterDisplayModeEx)(IDirect3D9Ex *This, UINT Adapter, D3DDISPLAYMODEEX *pMode, D3DDISPLAYROTATION *pRotation); + HRESULT (WINAPI *CreateDeviceEx)(IDirect3D9Ex *This, UINT Adapter, D3DDEVTYPE DeviceType, HWND hFocusWindow, DWORD BehaviorFlags, D3DPRESENT_PARAMETERS *pPresentationParameters, D3DDISPLAYMODEEX *pFullscreenDisplayMode, IDirect3DDevice9Ex **ppReturnedDeviceInterface); + HRESULT (WINAPI *GetAdapterLUID)(IDirect3D9Ex *This, UINT Adapter, LUID *pLUID); +} IDirect3D9ExVtbl; +struct IDirect3D9Ex +{ + IDirect3D9ExVtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3D9Ex_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3D9Ex_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3D9Ex_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3D9 macros */ +#define IDirect3D9Ex_RegisterSoftwareDevice(p,a) (p)->lpVtbl->RegisterSoftwareDevice(p,a) +#define IDirect3D9Ex_GetAdapterCount(p) (p)->lpVtbl->GetAdapterCount(p) +#define IDirect3D9Ex_GetAdapterIdentifier(p,a,b,c) (p)->lpVtbl->GetAdapterIdentifier(p,a,b,c) +#define IDirect3D9Ex_GetAdapterModeCount(p,a,b) (p)->lpVtbl->GetAdapterModeCount(p,a,b) +#define IDirect3D9Ex_EnumAdapterModes(p,a,b,c,d) (p)->lpVtbl->EnumAdapterModes(p,a,b,c,d) +#define IDirect3D9Ex_GetAdapterDisplayMode(p,a,b) (p)->lpVtbl->GetAdapterDisplayMode(p,a,b) +#define IDirect3D9Ex_CheckDeviceType(p,a,b,c,d,e) (p)->lpVtbl->CheckDeviceType(p,a,b,c,d,e) +#define IDirect3D9Ex_CheckDeviceFormat(p,a,b,c,d,e,f) (p)->lpVtbl->CheckDeviceFormat(p,a,b,c,d,e,f) +#define IDirect3D9Ex_CheckDeviceMultiSampleType(p,a,b,c,d,e,f) (p)->lpVtbl->CheckDeviceMultiSampleType(p,a,b,c,d,e,f) +#define IDirect3D9Ex_CheckDepthStencilMatch(p,a,b,c,d,e) (p)->lpVtbl->CheckDepthStencilMatch(p,a,b,c,d,e) +#define IDirect3D9Ex_CheckDeviceFormatConversion(p,a,b,c,d) (p)->lpVtbl->CheckDeviceFormatConversion(p,a,b,c,d) +#define IDirect3D9Ex_GetDeviceCaps(p,a,b,c) (p)->lpVtbl->GetDeviceCaps(p,a,b,c) +#define IDirect3D9Ex_GetAdapterMonitor(p,a) (p)->lpVtbl->GetAdapterMonitor(p,a) +#define IDirect3D9Ex_CreateDevice(p,a,b,c,d,e,f) (p)->lpVtbl->CreateDevice(p,a,b,c,d,e,f) +/* IDirect3D9Ex macros */ +#define IDirect3D9Ex_GetAdapterModeCountEx(p,a,b) (p)->lpVtbl->GetAdapterModeCountEx(p,a,b) +#define IDirect3D9Ex_EnumAdapterModesEx(p,a,b,c,d) (p)->lpVtbl->EnumAdapterModesEx(p,a,b,c,d) +#define IDirect3D9Ex_GetAdapterDisplayModeEx(p,a,b,c) (p)->lpVtbl->GetAdapterDisplayModeEx(p,a,b,c) +#define IDirect3D9Ex_CreateDeviceEx(p,a,b,c,d,e,f,g) (p)->lpVtbl->CreateDeviceEx(p,a,b,c,d,e,f,g) +#define IDirect3D9Ex_GetAdapterLUID(p,a,b) (p)->lpVtbl->GetAdapterLUID(p,a,b) + +typedef struct IDirect3D9ExOverlayExtensionVtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3D9ExOverlayExtension *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3D9ExOverlayExtension *This); + ULONG (WINAPI *Release)(IDirect3D9ExOverlayExtension *This); + /* IDirect3D9ExOverlayExtension */ + HRESULT (WINAPI *CheckDeviceOverlayType)(IDirect3D9ExOverlayExtension *This, UINT Adapter, D3DDEVTYPE DevType, UINT OverlayWidth, UINT OverlayHeight, D3DFORMAT OverlayFormat, D3DDISPLAYMODEEX *pDisplayMode, D3DDISPLAYROTATION DisplayRotation, D3DOVERLAYCAPS *pOverlayCaps); +} IDirect3D9ExOverlayExtensionVtbl; +struct IDirect3D9ExOverlayExtension +{ + IDirect3D9ExOverlayExtensionVtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3D9ExOverlayExtension_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3D9ExOverlayExtension_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3D9ExOverlayExtension_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3D9ExOverlayExtension macros */ +#define IDirect3D9ExOverlayExtension_CheckDeviceOverlayType(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->CheckDeviceOverlayType(p,a,b,c,d,e,f,g,h) + +typedef struct IDirect3DAuthenticatedChannel9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DAuthenticatedChannel9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DAuthenticatedChannel9 *This); + ULONG (WINAPI *Release)(IDirect3DAuthenticatedChannel9 *This); + /* IDirect3DAuthenticatedChannel9 */ + HRESULT (WINAPI *GetCertificateSize)(IDirect3DAuthenticatedChannel9 *This, UINT *pCertificateSize); + HRESULT (WINAPI *GetCertificate)(IDirect3DAuthenticatedChannel9 *This, UINT CertifacteSize, BYTE *ppCertificate); + HRESULT (WINAPI *NegotiateKeyExchange)(IDirect3DAuthenticatedChannel9 *This, UINT DataSize, void *pData); + HRESULT (WINAPI *Query)(IDirect3DAuthenticatedChannel9 *This, UINT InputSize, const void *pInput, UINT OutputSize, void *pOutput); + HRESULT (WINAPI *Configure)(IDirect3DAuthenticatedChannel9 *This, UINT InputSize, const void *pInput, D3DAUTHENTICATEDCHANNEL_CONFIGURE_OUTPUT *pOutput); +} IDirect3DAuthenticatedChannel9Vtbl; +struct IDirect3DAuthenticatedChannel9 +{ + IDirect3DAuthenticatedChannel9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DAuthenticatedChannel9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DAuthenticatedChannel9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DAuthenticatedChannel9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DAuthenticatedChannel9 macros */ +#define IDirect3DAuthenticatedChannel9_GetCertificateSize(p,a) (p)->lpVtbl->GetCertificateSize(p,a) +#define IDirect3DAuthenticatedChannel9_GetCertificate(p,a,b) (p)->lpVtbl->GetCertificate(p,a,b) +#define IDirect3DAuthenticatedChannel9_NegotiateKeyExchange(p,a,b) (p)->lpVtbl->NegotiateKeyExchange(p,a,b) +#define IDirect3DAuthenticatedChannel9_Query(p,a,b,c,d) (p)->lpVtbl->Query(p,a,b,c,d) +#define IDirect3DAuthenticatedChannel9_Configure(p,a,b,c) (p)->lpVtbl->Configure(p,a,b,c) + +typedef struct IDirect3DBaseTexture9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DBaseTexture9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DBaseTexture9 *This); + ULONG (WINAPI *Release)(IDirect3DBaseTexture9 *This); + /* IDirect3DResource9 */ + HRESULT (WINAPI *GetDevice)(IDirect3DBaseTexture9 *This, IDirect3DDevice9 **ppDevice); + HRESULT (WINAPI *SetPrivateData)(IDirect3DBaseTexture9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); + HRESULT (WINAPI *GetPrivateData)(IDirect3DBaseTexture9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); + HRESULT (WINAPI *FreePrivateData)(IDirect3DBaseTexture9 *This, REFGUID refguid); + DWORD (WINAPI *SetPriority)(IDirect3DBaseTexture9 *This, DWORD PriorityNew); + DWORD (WINAPI *GetPriority)(IDirect3DBaseTexture9 *This); + void (WINAPI *PreLoad)(IDirect3DBaseTexture9 *This); + D3DRESOURCETYPE (WINAPI *GetType)(IDirect3DBaseTexture9 *This); + /* IDirect3DBaseTexture9 */ + DWORD (WINAPI *SetLOD)(IDirect3DBaseTexture9 *This, DWORD LODNew); + DWORD (WINAPI *GetLOD)(IDirect3DBaseTexture9 *This); + DWORD (WINAPI *GetLevelCount)(IDirect3DBaseTexture9 *This); + HRESULT (WINAPI *SetAutoGenFilterType)(IDirect3DBaseTexture9 *This, D3DTEXTUREFILTERTYPE FilterType); + D3DTEXTUREFILTERTYPE (WINAPI *GetAutoGenFilterType)(IDirect3DBaseTexture9 *This); + void (WINAPI *GenerateMipSubLevels)(IDirect3DBaseTexture9 *This); +} IDirect3DBaseTexture9Vtbl; +struct IDirect3DBaseTexture9 +{ + IDirect3DBaseTexture9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DBaseTexture9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DBaseTexture9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DBaseTexture9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DResource9 macros */ +#define IDirect3DBaseTexture9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) +#define IDirect3DBaseTexture9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) +#define IDirect3DBaseTexture9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) +#define IDirect3DBaseTexture9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) +#define IDirect3DBaseTexture9_SetPriority(p,a) (p)->lpVtbl->SetPriority(p,a) +#define IDirect3DBaseTexture9_GetPriority(p) (p)->lpVtbl->GetPriority(p) +#define IDirect3DBaseTexture9_PreLoad(p) (p)->lpVtbl->PreLoad(p) +#define IDirect3DBaseTexture9_GetType(p) (p)->lpVtbl->GetType(p) +/* IDirect3DBaseTexture9 macros */ +#define IDirect3DBaseTexture9_SetLOD(p,a) (p)->lpVtbl->SetLOD(p,a) +#define IDirect3DBaseTexture9_GetLOD(p) (p)->lpVtbl->GetLOD(p) +#define IDirect3DBaseTexture9_GetLevelCount(p) (p)->lpVtbl->GetLevelCount(p) +#define IDirect3DBaseTexture9_SetAutoGenFilterType(p,a) (p)->lpVtbl->SetAutoGenFilterType(p,a) +#define IDirect3DBaseTexture9_GetAutoGenFilterType(p) (p)->lpVtbl->GetAutoGenFilterType(p) +#define IDirect3DBaseTexture9_GenerateMipSubLevels(p) (p)->lpVtbl->GenerateMipSubLevels(p) + +typedef struct IDirect3DCryptoSession9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DCryptoSession9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DCryptoSession9 *This); + ULONG (WINAPI *Release)(IDirect3DCryptoSession9 *This); + /* IDirect3DCryptoSession9 */ + HRESULT (WINAPI *GetCertificateSize)(IDirect3DCryptoSession9 *This, UINT *pCertificateSize); + HRESULT (WINAPI *GetCertificate)(IDirect3DCryptoSession9 *This, UINT CertifacteSize, BYTE *ppCertificate); + HRESULT (WINAPI *NegotiateKeyExchange)(IDirect3DCryptoSession9 *This, UINT DataSize, void *pData); + HRESULT (WINAPI *EncryptionBlt)(IDirect3DCryptoSession9 *This, IDirect3DSurface9 *pSrcSurface, IDirect3DSurface9 *pDstSurface, UINT DstSurfaceSize, void *pIV); + HRESULT (WINAPI *DecryptionBlt)(IDirect3DCryptoSession9 *This, IDirect3DSurface9 *pSrcSurface, IDirect3DSurface9 *pDstSurface, UINT SrcSurfaceSize, D3DENCRYPTED_BLOCK_INFO *pEncryptedBlockInfo, void *pContentKey, void *pIV); + HRESULT (WINAPI *GetSurfacePitch)(IDirect3DCryptoSession9 *This, IDirect3DSurface9 *pSrcSurface, UINT *pSurfacePitch); + HRESULT (WINAPI *StartSessionKeyRefresh)(IDirect3DCryptoSession9 *This, void *pRandomNumber, UINT RandomNumberSize); + HRESULT (WINAPI *FinishSessionKeyRefresh)(IDirect3DCryptoSession9 *This); + HRESULT (WINAPI *GetEncryptionBltKey)(IDirect3DCryptoSession9 *This, void *pReadbackKey, UINT KeySize); +} IDirect3DCryptoSession9Vtbl; +struct IDirect3DCryptoSession9 +{ + IDirect3DCryptoSession9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DCryptoSession9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DCryptoSession9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DCryptoSession9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DCryptoSession9 macros */ +#define IDirect3DCryptoSession9_GetCertificateSize(p,a) (p)->lpVtbl->GetCertificateSize(p,a) +#define IDirect3DCryptoSession9_GetCertificate(p,a,b) (p)->lpVtbl->GetCertificate(p,a,b) +#define IDirect3DCryptoSession9_NegotiateKeyExchange(p,a,b) (p)->lpVtbl->NegotiateKeyExchange(p,a,b) +#define IDirect3DCryptoSession9_EncryptionBlt(p,a,b,c,d) (p)->lpVtbl->EncryptionBlt(p,a,b,c,d) +#define IDirect3DCryptoSession9_DecryptionBlt(p,a,b,c,d,e,f) (p)->lpVtbl->DecryptionBlt(p,a,b,c,d,e,f) +#define IDirect3DCryptoSession9_GetSurfacePitch(p,a,b) (p)->lpVtbl->GetSurfacePitch(p,a,b) +#define IDirect3DCryptoSession9_StartSessionKeyRefresh(p,a,b) (p)->lpVtbl->StartSessionKeyRefresh(p,a,b) +#define IDirect3DCryptoSession9_FinishSessionKeyRefresh(p) (p)->lpVtbl->FinishSessionKeyRefresh(p) +#define IDirect3DCryptoSession9_GetEncryptionBltKey(p,a,b) (p)->lpVtbl->GetEncryptionBltKey(p,a,b) + +typedef struct IDirect3DCubeTexture9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DCubeTexture9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DCubeTexture9 *This); + ULONG (WINAPI *Release)(IDirect3DCubeTexture9 *This); + /* IDirect3DResource9 */ + HRESULT (WINAPI *GetDevice)(IDirect3DCubeTexture9 *This, IDirect3DDevice9 **ppDevice); + HRESULT (WINAPI *SetPrivateData)(IDirect3DCubeTexture9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); + HRESULT (WINAPI *GetPrivateData)(IDirect3DCubeTexture9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); + HRESULT (WINAPI *FreePrivateData)(IDirect3DCubeTexture9 *This, REFGUID refguid); + DWORD (WINAPI *SetPriority)(IDirect3DCubeTexture9 *This, DWORD PriorityNew); + DWORD (WINAPI *GetPriority)(IDirect3DCubeTexture9 *This); + void (WINAPI *PreLoad)(IDirect3DCubeTexture9 *This); + D3DRESOURCETYPE (WINAPI *GetType)(IDirect3DCubeTexture9 *This); + /* IDirect3DBaseTexture9 */ + DWORD (WINAPI *SetLOD)(IDirect3DCubeTexture9 *This, DWORD LODNew); + DWORD (WINAPI *GetLOD)(IDirect3DCubeTexture9 *This); + DWORD (WINAPI *GetLevelCount)(IDirect3DCubeTexture9 *This); + HRESULT (WINAPI *SetAutoGenFilterType)(IDirect3DCubeTexture9 *This, D3DTEXTUREFILTERTYPE FilterType); + D3DTEXTUREFILTERTYPE (WINAPI *GetAutoGenFilterType)(IDirect3DCubeTexture9 *This); + void (WINAPI *GenerateMipSubLevels)(IDirect3DCubeTexture9 *This); + /* IDirect3DCubeTexture9 */ + HRESULT (WINAPI *GetLevelDesc)(IDirect3DCubeTexture9 *This, UINT Level, D3DSURFACE_DESC *pDesc); + HRESULT (WINAPI *GetCubeMapSurface)(IDirect3DCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, UINT Level, IDirect3DSurface9 **ppCubeMapSurface); + HRESULT (WINAPI *LockRect)(IDirect3DCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, UINT Level, D3DLOCKED_RECT *pLockedRect, const RECT *pRect, DWORD Flags); + HRESULT (WINAPI *UnlockRect)(IDirect3DCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, UINT Level); + HRESULT (WINAPI *AddDirtyRect)(IDirect3DCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, const RECT *pDirtyRect); +} IDirect3DCubeTexture9Vtbl; +struct IDirect3DCubeTexture9 +{ + IDirect3DCubeTexture9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DCubeTexture9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DCubeTexture9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DCubeTexture9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DResource9 macros */ +#define IDirect3DCubeTexture9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) +#define IDirect3DCubeTexture9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) +#define IDirect3DCubeTexture9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) +#define IDirect3DCubeTexture9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) +#define IDirect3DCubeTexture9_SetPriority(p,a) (p)->lpVtbl->SetPriority(p,a) +#define IDirect3DCubeTexture9_GetPriority(p) (p)->lpVtbl->GetPriority(p) +#define IDirect3DCubeTexture9_PreLoad(p) (p)->lpVtbl->PreLoad(p) +#define IDirect3DCubeTexture9_GetType(p) (p)->lpVtbl->GetType(p) +/* IDirect3DBaseTexture9 macros */ +#define IDirect3DCubeTexture9_SetLOD(p,a) (p)->lpVtbl->SetLOD(p,a) +#define IDirect3DCubeTexture9_GetLOD(p) (p)->lpVtbl->GetLOD(p) +#define IDirect3DCubeTexture9_GetLevelCount(p) (p)->lpVtbl->GetLevelCount(p) +#define IDirect3DCubeTexture9_SetAutoGenFilterType(p,a) (p)->lpVtbl->SetAutoGenFilterType(p,a) +#define IDirect3DCubeTexture9_GetAutoGenFilterType(p) (p)->lpVtbl->GetAutoGenFilterType(p) +#define IDirect3DCubeTexture9_GenerateMipSubLevels(p) (p)->lpVtbl->GenerateMipSubLevels(p) +/* IDirect3DCubeTexture9 macros */ +#define IDirect3DCubeTexture9_GetLevelDesc(p,a,b) (p)->lpVtbl->GetLevelDesc(p,a,b) +#define IDirect3DCubeTexture9_GetCubeMapSurface(p,a,b,c) (p)->lpVtbl->GetCubeMapSurface(p,a,b,c) +#define IDirect3DCubeTexture9_LockRect(p,a,b,c,d,e) (p)->lpVtbl->LockRect(p,a,b,c,d,e) +#define IDirect3DCubeTexture9_UnlockRect(p,a,b) (p)->lpVtbl->UnlockRect(p,a,b) +#define IDirect3DCubeTexture9_AddDirtyRect(p,a,b) (p)->lpVtbl->AddDirtyRect(p,a,b) + +typedef struct IDirect3DDevice9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DDevice9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DDevice9 *This); + ULONG (WINAPI *Release)(IDirect3DDevice9 *This); + /* IDirect3DDevice9 */ + HRESULT (WINAPI *TestCooperativeLevel)(IDirect3DDevice9 *This); + UINT (WINAPI *GetAvailableTextureMem)(IDirect3DDevice9 *This); + HRESULT (WINAPI *EvictManagedResources)(IDirect3DDevice9 *This); + HRESULT (WINAPI *GetDirect3D)(IDirect3DDevice9 *This, IDirect3D9 **ppD3D9); + HRESULT (WINAPI *GetDeviceCaps)(IDirect3DDevice9 *This, D3DCAPS9 *pCaps); + HRESULT (WINAPI *GetDisplayMode)(IDirect3DDevice9 *This, UINT iSwapChain, D3DDISPLAYMODE *pMode); + HRESULT (WINAPI *GetCreationParameters)(IDirect3DDevice9 *This, D3DDEVICE_CREATION_PARAMETERS *pParameters); + HRESULT (WINAPI *SetCursorProperties)(IDirect3DDevice9 *This, UINT XHotSpot, UINT YHotSpot, IDirect3DSurface9 *pCursorBitmap); + void (WINAPI *SetCursorPosition)(IDirect3DDevice9 *This, int X, int Y, DWORD Flags); + BOOL (WINAPI *ShowCursor)(IDirect3DDevice9 *This, BOOL bShow); + HRESULT (WINAPI *CreateAdditionalSwapChain)(IDirect3DDevice9 *This, D3DPRESENT_PARAMETERS *pPresentationParameters, IDirect3DSwapChain9 **pSwapChain); + HRESULT (WINAPI *GetSwapChain)(IDirect3DDevice9 *This, UINT iSwapChain, IDirect3DSwapChain9 **pSwapChain); + UINT (WINAPI *GetNumberOfSwapChains)(IDirect3DDevice9 *This); + HRESULT (WINAPI *Reset)(IDirect3DDevice9 *This, D3DPRESENT_PARAMETERS *pPresentationParameters); + HRESULT (WINAPI *Present)(IDirect3DDevice9 *This, const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion); + HRESULT (WINAPI *GetBackBuffer)(IDirect3DDevice9 *This, UINT iSwapChain, UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, IDirect3DSurface9 **ppBackBuffer); + HRESULT (WINAPI *GetRasterStatus)(IDirect3DDevice9 *This, UINT iSwapChain, D3DRASTER_STATUS *pRasterStatus); + HRESULT (WINAPI *SetDialogBoxMode)(IDirect3DDevice9 *This, BOOL bEnableDialogs); + void (WINAPI *SetGammaRamp)(IDirect3DDevice9 *This, UINT iSwapChain, DWORD Flags, const D3DGAMMARAMP *pRamp); + void (WINAPI *GetGammaRamp)(IDirect3DDevice9 *This, UINT iSwapChain, D3DGAMMARAMP *pRamp); + HRESULT (WINAPI *CreateTexture)(IDirect3DDevice9 *This, UINT Width, UINT Height, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DTexture9 **ppTexture, HANDLE *pSharedHandle); + HRESULT (WINAPI *CreateVolumeTexture)(IDirect3DDevice9 *This, UINT Width, UINT Height, UINT Depth, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DVolumeTexture9 **ppVolumeTexture, HANDLE *pSharedHandle); + HRESULT (WINAPI *CreateCubeTexture)(IDirect3DDevice9 *This, UINT EdgeLength, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DCubeTexture9 **ppCubeTexture, HANDLE *pSharedHandle); + HRESULT (WINAPI *CreateVertexBuffer)(IDirect3DDevice9 *This, UINT Length, DWORD Usage, DWORD FVF, D3DPOOL Pool, IDirect3DVertexBuffer9 **ppVertexBuffer, HANDLE *pSharedHandle); + HRESULT (WINAPI *CreateIndexBuffer)(IDirect3DDevice9 *This, UINT Length, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DIndexBuffer9 **ppIndexBuffer, HANDLE *pSharedHandle); + HRESULT (WINAPI *CreateRenderTarget)(IDirect3DDevice9 *This, UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Lockable, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle); + HRESULT (WINAPI *CreateDepthStencilSurface)(IDirect3DDevice9 *This, UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Discard, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle); + HRESULT (WINAPI *UpdateSurface)(IDirect3DDevice9 *This, IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, IDirect3DSurface9 *pDestinationSurface, const POINT *pDestPoint); + HRESULT (WINAPI *UpdateTexture)(IDirect3DDevice9 *This, IDirect3DBaseTexture9 *pSourceTexture, IDirect3DBaseTexture9 *pDestinationTexture); + HRESULT (WINAPI *GetRenderTargetData)(IDirect3DDevice9 *This, IDirect3DSurface9 *pRenderTarget, IDirect3DSurface9 *pDestSurface); + HRESULT (WINAPI *GetFrontBufferData)(IDirect3DDevice9 *This, UINT iSwapChain, IDirect3DSurface9 *pDestSurface); + HRESULT (WINAPI *StretchRect)(IDirect3DDevice9 *This, IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, IDirect3DSurface9 *pDestSurface, const RECT *pDestRect, D3DTEXTUREFILTERTYPE Filter); + HRESULT (WINAPI *ColorFill)(IDirect3DDevice9 *This, IDirect3DSurface9 *pSurface, const RECT *pRect, D3DCOLOR color); + HRESULT (WINAPI *CreateOffscreenPlainSurface)(IDirect3DDevice9 *This, UINT Width, UINT Height, D3DFORMAT Format, D3DPOOL Pool, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle); + HRESULT (WINAPI *SetRenderTarget)(IDirect3DDevice9 *This, DWORD RenderTargetIndex, IDirect3DSurface9 *pRenderTarget); + HRESULT (WINAPI *GetRenderTarget)(IDirect3DDevice9 *This, DWORD RenderTargetIndex, IDirect3DSurface9 **ppRenderTarget); + HRESULT (WINAPI *SetDepthStencilSurface)(IDirect3DDevice9 *This, IDirect3DSurface9 *pNewZStencil); + HRESULT (WINAPI *GetDepthStencilSurface)(IDirect3DDevice9 *This, IDirect3DSurface9 **ppZStencilSurface); + HRESULT (WINAPI *BeginScene)(IDirect3DDevice9 *This); + HRESULT (WINAPI *EndScene)(IDirect3DDevice9 *This); + HRESULT (WINAPI *Clear)(IDirect3DDevice9 *This, DWORD Count, const D3DRECT *pRects, DWORD Flags, D3DCOLOR Color, float Z, DWORD Stencil); + HRESULT (WINAPI *SetTransform)(IDirect3DDevice9 *This, D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix); + HRESULT (WINAPI *GetTransform)(IDirect3DDevice9 *This, D3DTRANSFORMSTATETYPE State, D3DMATRIX *pMatrix); + HRESULT (WINAPI *MultiplyTransform)(IDirect3DDevice9 *This, D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix); + HRESULT (WINAPI *SetViewport)(IDirect3DDevice9 *This, const D3DVIEWPORT9 *pViewport); + HRESULT (WINAPI *GetViewport)(IDirect3DDevice9 *This, D3DVIEWPORT9 *pViewport); + HRESULT (WINAPI *SetMaterial)(IDirect3DDevice9 *This, const D3DMATERIAL9 *pMaterial); + HRESULT (WINAPI *GetMaterial)(IDirect3DDevice9 *This, D3DMATERIAL9 *pMaterial); + HRESULT (WINAPI *SetLight)(IDirect3DDevice9 *This, DWORD Index, const D3DLIGHT9 *pLight); + HRESULT (WINAPI *GetLight)(IDirect3DDevice9 *This, DWORD Index, D3DLIGHT9 *pLight); + HRESULT (WINAPI *LightEnable)(IDirect3DDevice9 *This, DWORD Index, BOOL Enable); + HRESULT (WINAPI *GetLightEnable)(IDirect3DDevice9 *This, DWORD Index, BOOL *pEnable); + HRESULT (WINAPI *SetClipPlane)(IDirect3DDevice9 *This, DWORD Index, const float *pPlane); + HRESULT (WINAPI *GetClipPlane)(IDirect3DDevice9 *This, DWORD Index, float *pPlane); + HRESULT (WINAPI *SetRenderState)(IDirect3DDevice9 *This, D3DRENDERSTATETYPE State, DWORD Value); + HRESULT (WINAPI *GetRenderState)(IDirect3DDevice9 *This, D3DRENDERSTATETYPE State, DWORD *pValue); + HRESULT (WINAPI *CreateStateBlock)(IDirect3DDevice9 *This, D3DSTATEBLOCKTYPE Type, IDirect3DStateBlock9 **ppSB); + HRESULT (WINAPI *BeginStateBlock)(IDirect3DDevice9 *This); + HRESULT (WINAPI *EndStateBlock)(IDirect3DDevice9 *This, IDirect3DStateBlock9 **ppSB); + HRESULT (WINAPI *SetClipStatus)(IDirect3DDevice9 *This, const D3DCLIPSTATUS9 *pClipStatus); + HRESULT (WINAPI *GetClipStatus)(IDirect3DDevice9 *This, D3DCLIPSTATUS9 *pClipStatus); + HRESULT (WINAPI *GetTexture)(IDirect3DDevice9 *This, DWORD Stage, IDirect3DBaseTexture9 **ppTexture); + HRESULT (WINAPI *SetTexture)(IDirect3DDevice9 *This, DWORD Stage, IDirect3DBaseTexture9 *pTexture); + HRESULT (WINAPI *GetTextureStageState)(IDirect3DDevice9 *This, DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD *pValue); + HRESULT (WINAPI *SetTextureStageState)(IDirect3DDevice9 *This, DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD Value); + HRESULT (WINAPI *GetSamplerState)(IDirect3DDevice9 *This, DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD *pValue); + HRESULT (WINAPI *SetSamplerState)(IDirect3DDevice9 *This, DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD Value); + HRESULT (WINAPI *ValidateDevice)(IDirect3DDevice9 *This, DWORD *pNumPasses); + HRESULT (WINAPI *SetPaletteEntries)(IDirect3DDevice9 *This, UINT PaletteNumber, const PALETTEENTRY *pEntries); + HRESULT (WINAPI *GetPaletteEntries)(IDirect3DDevice9 *This, UINT PaletteNumber, PALETTEENTRY *pEntries); + HRESULT (WINAPI *SetCurrentTexturePalette)(IDirect3DDevice9 *This, UINT PaletteNumber); + HRESULT (WINAPI *GetCurrentTexturePalette)(IDirect3DDevice9 *This, UINT *PaletteNumber); + HRESULT (WINAPI *SetScissorRect)(IDirect3DDevice9 *This, const RECT *pRect); + HRESULT (WINAPI *GetScissorRect)(IDirect3DDevice9 *This, RECT *pRect); + HRESULT (WINAPI *SetSoftwareVertexProcessing)(IDirect3DDevice9 *This, BOOL bSoftware); + BOOL (WINAPI *GetSoftwareVertexProcessing)(IDirect3DDevice9 *This); + HRESULT (WINAPI *SetNPatchMode)(IDirect3DDevice9 *This, float nSegments); + float (WINAPI *GetNPatchMode)(IDirect3DDevice9 *This); + HRESULT (WINAPI *DrawPrimitive)(IDirect3DDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, UINT StartVertex, UINT PrimitiveCount); + HRESULT (WINAPI *DrawIndexedPrimitive)(IDirect3DDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, INT BaseVertexIndex, UINT MinVertexIndex, UINT NumVertices, UINT startIndex, UINT primCount); + HRESULT (WINAPI *DrawPrimitiveUP)(IDirect3DDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, UINT PrimitiveCount, const void *pVertexStreamZeroData, UINT VertexStreamZeroStride); + HRESULT (WINAPI *DrawIndexedPrimitiveUP)(IDirect3DDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, UINT MinVertexIndex, UINT NumVertices, UINT PrimitiveCount, const void *pIndexData, D3DFORMAT IndexDataFormat, const void *pVertexStreamZeroData, UINT VertexStreamZeroStride); + HRESULT (WINAPI *ProcessVertices)(IDirect3DDevice9 *This, UINT SrcStartIndex, UINT DestIndex, UINT VertexCount, IDirect3DVertexBuffer9 *pDestBuffer, IDirect3DVertexDeclaration9 *pVertexDecl, DWORD Flags); + HRESULT (WINAPI *CreateVertexDeclaration)(IDirect3DDevice9 *This, const D3DVERTEXELEMENT9 *pVertexElements, IDirect3DVertexDeclaration9 **ppDecl); + HRESULT (WINAPI *SetVertexDeclaration)(IDirect3DDevice9 *This, IDirect3DVertexDeclaration9 *pDecl); + HRESULT (WINAPI *GetVertexDeclaration)(IDirect3DDevice9 *This, IDirect3DVertexDeclaration9 **ppDecl); + HRESULT (WINAPI *SetFVF)(IDirect3DDevice9 *This, DWORD FVF); + HRESULT (WINAPI *GetFVF)(IDirect3DDevice9 *This, DWORD *pFVF); + HRESULT (WINAPI *CreateVertexShader)(IDirect3DDevice9 *This, const DWORD *pFunction, IDirect3DVertexShader9 **ppShader); + HRESULT (WINAPI *SetVertexShader)(IDirect3DDevice9 *This, IDirect3DVertexShader9 *pShader); + HRESULT (WINAPI *GetVertexShader)(IDirect3DDevice9 *This, IDirect3DVertexShader9 **ppShader); + HRESULT (WINAPI *SetVertexShaderConstantF)(IDirect3DDevice9 *This, UINT StartRegister, const float *pConstantData, UINT Vector4fCount); + HRESULT (WINAPI *GetVertexShaderConstantF)(IDirect3DDevice9 *This, UINT StartRegister, float *pConstantData, UINT Vector4fCount); + HRESULT (WINAPI *SetVertexShaderConstantI)(IDirect3DDevice9 *This, UINT StartRegister, const int *pConstantData, UINT Vector4iCount); + HRESULT (WINAPI *GetVertexShaderConstantI)(IDirect3DDevice9 *This, UINT StartRegister, int *pConstantData, UINT Vector4iCount); + HRESULT (WINAPI *SetVertexShaderConstantB)(IDirect3DDevice9 *This, UINT StartRegister, const BOOL *pConstantData, UINT BoolCount); + HRESULT (WINAPI *GetVertexShaderConstantB)(IDirect3DDevice9 *This, UINT StartRegister, BOOL *pConstantData, UINT BoolCount); + HRESULT (WINAPI *SetStreamSource)(IDirect3DDevice9 *This, UINT StreamNumber, IDirect3DVertexBuffer9 *pStreamData, UINT OffsetInBytes, UINT Stride); + HRESULT (WINAPI *GetStreamSource)(IDirect3DDevice9 *This, UINT StreamNumber, IDirect3DVertexBuffer9 **ppStreamData, UINT *pOffsetInBytes, UINT *pStride); + HRESULT (WINAPI *SetStreamSourceFreq)(IDirect3DDevice9 *This, UINT StreamNumber, UINT Setting); + HRESULT (WINAPI *GetStreamSourceFreq)(IDirect3DDevice9 *This, UINT StreamNumber, UINT *pSetting); + HRESULT (WINAPI *SetIndices)(IDirect3DDevice9 *This, IDirect3DIndexBuffer9 *pIndexData); + HRESULT (WINAPI *GetIndices)(IDirect3DDevice9 *This, IDirect3DIndexBuffer9 **ppIndexData); + HRESULT (WINAPI *CreatePixelShader)(IDirect3DDevice9 *This, const DWORD *pFunction, IDirect3DPixelShader9 **ppShader); + HRESULT (WINAPI *SetPixelShader)(IDirect3DDevice9 *This, IDirect3DPixelShader9 *pShader); + HRESULT (WINAPI *GetPixelShader)(IDirect3DDevice9 *This, IDirect3DPixelShader9 **ppShader); + HRESULT (WINAPI *SetPixelShaderConstantF)(IDirect3DDevice9 *This, UINT StartRegister, const float *pConstantData, UINT Vector4fCount); + HRESULT (WINAPI *GetPixelShaderConstantF)(IDirect3DDevice9 *This, UINT StartRegister, float *pConstantData, UINT Vector4fCount); + HRESULT (WINAPI *SetPixelShaderConstantI)(IDirect3DDevice9 *This, UINT StartRegister, const int *pConstantData, UINT Vector4iCount); + HRESULT (WINAPI *GetPixelShaderConstantI)(IDirect3DDevice9 *This, UINT StartRegister, int *pConstantData, UINT Vector4iCount); + HRESULT (WINAPI *SetPixelShaderConstantB)(IDirect3DDevice9 *This, UINT StartRegister, const BOOL *pConstantData, UINT BoolCount); + HRESULT (WINAPI *GetPixelShaderConstantB)(IDirect3DDevice9 *This, UINT StartRegister, BOOL *pConstantData, UINT BoolCount); + HRESULT (WINAPI *DrawRectPatch)(IDirect3DDevice9 *This, UINT Handle, const float *pNumSegs, const D3DRECTPATCH_INFO *pRectPatchInfo); + HRESULT (WINAPI *DrawTriPatch)(IDirect3DDevice9 *This, UINT Handle, const float *pNumSegs, const D3DTRIPATCH_INFO *pTriPatchInfo); + HRESULT (WINAPI *DeletePatch)(IDirect3DDevice9 *This, UINT Handle); + HRESULT (WINAPI *CreateQuery)(IDirect3DDevice9 *This, D3DQUERYTYPE Type, IDirect3DQuery9 **ppQuery); +} IDirect3DDevice9Vtbl; +struct IDirect3DDevice9 +{ + IDirect3DDevice9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DDevice9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DDevice9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DDevice9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DDevice9 macros */ +#define IDirect3DDevice9_TestCooperativeLevel(p) (p)->lpVtbl->TestCooperativeLevel(p) +#define IDirect3DDevice9_GetAvailableTextureMem(p) (p)->lpVtbl->GetAvailableTextureMem(p) +#define IDirect3DDevice9_EvictManagedResources(p) (p)->lpVtbl->EvictManagedResources(p) +#define IDirect3DDevice9_GetDirect3D(p,a) (p)->lpVtbl->GetDirect3D(p,a) +#define IDirect3DDevice9_GetDeviceCaps(p,a) (p)->lpVtbl->GetDeviceCaps(p,a) +#define IDirect3DDevice9_GetDisplayMode(p,a,b) (p)->lpVtbl->GetDisplayMode(p,a,b) +#define IDirect3DDevice9_GetCreationParameters(p,a) (p)->lpVtbl->GetCreationParameters(p,a) +#define IDirect3DDevice9_SetCursorProperties(p,a,b,c) (p)->lpVtbl->SetCursorProperties(p,a,b,c) +#define IDirect3DDevice9_SetCursorPosition(p,a,b,c) (p)->lpVtbl->SetCursorPosition(p,a,b,c) +#define IDirect3DDevice9_ShowCursor(p,a) (p)->lpVtbl->ShowCursor(p,a) +#define IDirect3DDevice9_CreateAdditionalSwapChain(p,a,b) (p)->lpVtbl->CreateAdditionalSwapChain(p,a,b) +#define IDirect3DDevice9_GetSwapChain(p,a,b) (p)->lpVtbl->GetSwapChain(p,a,b) +#define IDirect3DDevice9_GetNumberOfSwapChains(p) (p)->lpVtbl->GetNumberOfSwapChains(p) +#define IDirect3DDevice9_Reset(p,a) (p)->lpVtbl->Reset(p,a) +#define IDirect3DDevice9_Present(p,a,b,c,d) (p)->lpVtbl->Present(p,a,b,c,d) +#define IDirect3DDevice9_GetBackBuffer(p,a,b,c,d) (p)->lpVtbl->GetBackBuffer(p,a,b,c,d) +#define IDirect3DDevice9_GetRasterStatus(p,a,b) (p)->lpVtbl->GetRasterStatus(p,a,b) +#define IDirect3DDevice9_SetDialogBoxMode(p,a) (p)->lpVtbl->SetDialogBoxMode(p,a) +#define IDirect3DDevice9_SetGammaRamp(p,a,b,c) (p)->lpVtbl->SetGammaRamp(p,a,b,c) +#define IDirect3DDevice9_GetGammaRamp(p,a,b) (p)->lpVtbl->GetGammaRamp(p,a,b) +#define IDirect3DDevice9_CreateTexture(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->CreateTexture(p,a,b,c,d,e,f,g,h) +#define IDirect3DDevice9_CreateVolumeTexture(p,a,b,c,d,e,f,g,h,i) (p)->lpVtbl->CreateVolumeTexture(p,a,b,c,d,e,f,g,h,i) +#define IDirect3DDevice9_CreateCubeTexture(p,a,b,c,d,e,f,g) (p)->lpVtbl->CreateCubeTexture(p,a,b,c,d,e,f,g) +#define IDirect3DDevice9_CreateVertexBuffer(p,a,b,c,d,e,f) (p)->lpVtbl->CreateVertexBuffer(p,a,b,c,d,e,f) +#define IDirect3DDevice9_CreateIndexBuffer(p,a,b,c,d,e,f) (p)->lpVtbl->CreateIndexBuffer(p,a,b,c,d,e,f) +#define IDirect3DDevice9_CreateRenderTarget(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->CreateRenderTarget(p,a,b,c,d,e,f,g,h) +#define IDirect3DDevice9_CreateDepthStencilSurface(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->CreateDepthStencilSurface(p,a,b,c,d,e,f,g,h) +#define IDirect3DDevice9_UpdateSurface(p,a,b,c,d) (p)->lpVtbl->UpdateSurface(p,a,b,c,d) +#define IDirect3DDevice9_UpdateTexture(p,a,b) (p)->lpVtbl->UpdateTexture(p,a,b) +#define IDirect3DDevice9_GetRenderTargetData(p,a,b) (p)->lpVtbl->GetRenderTargetData(p,a,b) +#define IDirect3DDevice9_GetFrontBufferData(p,a,b) (p)->lpVtbl->GetFrontBufferData(p,a,b) +#define IDirect3DDevice9_StretchRect(p,a,b,c,d,e) (p)->lpVtbl->StretchRect(p,a,b,c,d,e) +#define IDirect3DDevice9_ColorFill(p,a,b,c) (p)->lpVtbl->ColorFill(p,a,b,c) +#define IDirect3DDevice9_CreateOffscreenPlainSurface(p,a,b,c,d,e,f) (p)->lpVtbl->CreateOffscreenPlainSurface(p,a,b,c,d,e,f) +#define IDirect3DDevice9_SetRenderTarget(p,a,b) (p)->lpVtbl->SetRenderTarget(p,a,b) +#define IDirect3DDevice9_GetRenderTarget(p,a,b) (p)->lpVtbl->GetRenderTarget(p,a,b) +#define IDirect3DDevice9_SetDepthStencilSurface(p,a) (p)->lpVtbl->SetDepthStencilSurface(p,a) +#define IDirect3DDevice9_GetDepthStencilSurface(p,a) (p)->lpVtbl->GetDepthStencilSurface(p,a) +#define IDirect3DDevice9_BeginScene(p) (p)->lpVtbl->BeginScene(p) +#define IDirect3DDevice9_EndScene(p) (p)->lpVtbl->EndScene(p) +#define IDirect3DDevice9_Clear(p,a,b,c,d,e,f) (p)->lpVtbl->Clear(p,a,b,c,d,e,f) +#define IDirect3DDevice9_SetTransform(p,a,b) (p)->lpVtbl->SetTransform(p,a,b) +#define IDirect3DDevice9_GetTransform(p,a,b) (p)->lpVtbl->GetTransform(p,a,b) +#define IDirect3DDevice9_MultiplyTransform(p,a,b) (p)->lpVtbl->MultiplyTransform(p,a,b) +#define IDirect3DDevice9_SetViewport(p,a) (p)->lpVtbl->SetViewport(p,a) +#define IDirect3DDevice9_GetViewport(p,a) (p)->lpVtbl->GetViewport(p,a) +#define IDirect3DDevice9_SetMaterial(p,a) (p)->lpVtbl->SetMaterial(p,a) +#define IDirect3DDevice9_GetMaterial(p,a) (p)->lpVtbl->GetMaterial(p,a) +#define IDirect3DDevice9_SetLight(p,a,b) (p)->lpVtbl->SetLight(p,a,b) +#define IDirect3DDevice9_GetLight(p,a,b) (p)->lpVtbl->GetLight(p,a,b) +#define IDirect3DDevice9_LightEnable(p,a,b) (p)->lpVtbl->LightEnable(p,a,b) +#define IDirect3DDevice9_GetLightEnable(p,a,b) (p)->lpVtbl->GetLightEnable(p,a,b) +#define IDirect3DDevice9_SetClipPlane(p,a,b) (p)->lpVtbl->SetClipPlane(p,a,b) +#define IDirect3DDevice9_GetClipPlane(p,a,b) (p)->lpVtbl->GetClipPlane(p,a,b) +#define IDirect3DDevice9_SetRenderState(p,a,b) (p)->lpVtbl->SetRenderState(p,a,b) +#define IDirect3DDevice9_GetRenderState(p,a,b) (p)->lpVtbl->GetRenderState(p,a,b) +#define IDirect3DDevice9_CreateStateBlock(p,a,b) (p)->lpVtbl->CreateStateBlock(p,a,b) +#define IDirect3DDevice9_BeginStateBlock(p) (p)->lpVtbl->BeginStateBlock(p) +#define IDirect3DDevice9_EndStateBlock(p,a) (p)->lpVtbl->EndStateBlock(p,a) +#define IDirect3DDevice9_SetClipStatus(p,a) (p)->lpVtbl->SetClipStatus(p,a) +#define IDirect3DDevice9_GetClipStatus(p,a) (p)->lpVtbl->GetClipStatus(p,a) +#define IDirect3DDevice9_GetTexture(p,a,b) (p)->lpVtbl->GetTexture(p,a,b) +#define IDirect3DDevice9_SetTexture(p,a,b) (p)->lpVtbl->SetTexture(p,a,b) +#define IDirect3DDevice9_GetTextureStageState(p,a,b,c) (p)->lpVtbl->GetTextureStageState(p,a,b,c) +#define IDirect3DDevice9_SetTextureStageState(p,a,b,c) (p)->lpVtbl->SetTextureStageState(p,a,b,c) +#define IDirect3DDevice9_GetSamplerState(p,a,b,c) (p)->lpVtbl->GetSamplerState(p,a,b,c) +#define IDirect3DDevice9_SetSamplerState(p,a,b,c) (p)->lpVtbl->SetSamplerState(p,a,b,c) +#define IDirect3DDevice9_ValidateDevice(p,a) (p)->lpVtbl->ValidateDevice(p,a) +#define IDirect3DDevice9_SetPaletteEntries(p,a,b) (p)->lpVtbl->SetPaletteEntries(p,a,b) +#define IDirect3DDevice9_GetPaletteEntries(p,a,b) (p)->lpVtbl->GetPaletteEntries(p,a,b) +#define IDirect3DDevice9_SetCurrentTexturePalette(p,a) (p)->lpVtbl->SetCurrentTexturePalette(p,a) +#define IDirect3DDevice9_GetCurrentTexturePalette(p,a) (p)->lpVtbl->GetCurrentTexturePalette(p,a) +#define IDirect3DDevice9_SetScissorRect(p,a) (p)->lpVtbl->SetScissorRect(p,a) +#define IDirect3DDevice9_GetScissorRect(p,a) (p)->lpVtbl->GetScissorRect(p,a) +#define IDirect3DDevice9_SetSoftwareVertexProcessing(p,a) (p)->lpVtbl->SetSoftwareVertexProcessing(p,a) +#define IDirect3DDevice9_GetSoftwareVertexProcessing(p) (p)->lpVtbl->GetSoftwareVertexProcessing(p) +#define IDirect3DDevice9_SetNPatchMode(p,a) (p)->lpVtbl->SetNPatchMode(p,a) +#define IDirect3DDevice9_GetNPatchMode(p) (p)->lpVtbl->GetNPatchMode(p) +#define IDirect3DDevice9_DrawPrimitive(p,a,b,c) (p)->lpVtbl->DrawPrimitive(p,a,b,c) +#define IDirect3DDevice9_DrawIndexedPrimitive(p,a,b,c,d,e,f) (p)->lpVtbl->DrawIndexedPrimitive(p,a,b,c,d,e,f) +#define IDirect3DDevice9_DrawPrimitiveUP(p,a,b,c,d) (p)->lpVtbl->DrawPrimitiveUP(p,a,b,c,d) +#define IDirect3DDevice9_DrawIndexedPrimitiveUP(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->DrawIndexedPrimitiveUP(p,a,b,c,d,e,f,g,h) +#define IDirect3DDevice9_ProcessVertices(p,a,b,c,d,e,f) (p)->lpVtbl->ProcessVertices(p,a,b,c,d,e,f) +#define IDirect3DDevice9_CreateVertexDeclaration(p,a,b) (p)->lpVtbl->CreateVertexDeclaration(p,a,b) +#define IDirect3DDevice9_SetVertexDeclaration(p,a) (p)->lpVtbl->SetVertexDeclaration(p,a) +#define IDirect3DDevice9_GetVertexDeclaration(p,a) (p)->lpVtbl->GetVertexDeclaration(p,a) +#define IDirect3DDevice9_SetFVF(p,a) (p)->lpVtbl->SetFVF(p,a) +#define IDirect3DDevice9_GetFVF(p,a) (p)->lpVtbl->GetFVF(p,a) +#define IDirect3DDevice9_CreateVertexShader(p,a,b) (p)->lpVtbl->CreateVertexShader(p,a,b) +#define IDirect3DDevice9_SetVertexShader(p,a) (p)->lpVtbl->SetVertexShader(p,a) +#define IDirect3DDevice9_GetVertexShader(p,a) (p)->lpVtbl->GetVertexShader(p,a) +#define IDirect3DDevice9_SetVertexShaderConstantF(p,a,b,c) (p)->lpVtbl->SetVertexShaderConstantF(p,a,b,c) +#define IDirect3DDevice9_GetVertexShaderConstantF(p,a,b,c) (p)->lpVtbl->GetVertexShaderConstantF(p,a,b,c) +#define IDirect3DDevice9_SetVertexShaderConstantI(p,a,b,c) (p)->lpVtbl->SetVertexShaderConstantI(p,a,b,c) +#define IDirect3DDevice9_GetVertexShaderConstantI(p,a,b,c) (p)->lpVtbl->GetVertexShaderConstantI(p,a,b,c) +#define IDirect3DDevice9_SetVertexShaderConstantB(p,a,b,c) (p)->lpVtbl->SetVertexShaderConstantB(p,a,b,c) +#define IDirect3DDevice9_GetVertexShaderConstantB(p,a,b,c) (p)->lpVtbl->GetVertexShaderConstantB(p,a,b,c) +#define IDirect3DDevice9_SetStreamSource(p,a,b,c,d) (p)->lpVtbl->SetStreamSource(p,a,b,c,d) +#define IDirect3DDevice9_GetStreamSource(p,a,b,c,d) (p)->lpVtbl->GetStreamSource(p,a,b,c,d) +#define IDirect3DDevice9_SetStreamSourceFreq(p,a,b) (p)->lpVtbl->SetStreamSourceFreq(p,a,b) +#define IDirect3DDevice9_GetStreamSourceFreq(p,a,b) (p)->lpVtbl->GetStreamSourceFreq(p,a,b) +#define IDirect3DDevice9_SetIndices(p,a) (p)->lpVtbl->SetIndices(p,a) +#define IDirect3DDevice9_GetIndices(p,a) (p)->lpVtbl->GetIndices(p,a) +#define IDirect3DDevice9_CreatePixelShader(p,a,b) (p)->lpVtbl->CreatePixelShader(p,a,b) +#define IDirect3DDevice9_SetPixelShader(p,a) (p)->lpVtbl->SetPixelShader(p,a) +#define IDirect3DDevice9_GetPixelShader(p,a) (p)->lpVtbl->GetPixelShader(p,a) +#define IDirect3DDevice9_SetPixelShaderConstantF(p,a,b,c) (p)->lpVtbl->SetPixelShaderConstantF(p,a,b,c) +#define IDirect3DDevice9_GetPixelShaderConstantF(p,a,b,c) (p)->lpVtbl->GetPixelShaderConstantF(p,a,b,c) +#define IDirect3DDevice9_SetPixelShaderConstantI(p,a,b,c) (p)->lpVtbl->SetPixelShaderConstantI(p,a,b,c) +#define IDirect3DDevice9_GetPixelShaderConstantI(p,a,b,c) (p)->lpVtbl->GetPixelShaderConstantI(p,a,b,c) +#define IDirect3DDevice9_SetPixelShaderConstantB(p,a,b,c) (p)->lpVtbl->SetPixelShaderConstantB(p,a,b,c) +#define IDirect3DDevice9_GetPixelShaderConstantB(p,a,b,c) (p)->lpVtbl->GetPixelShaderConstantB(p,a,b,c) +#define IDirect3DDevice9_DrawRectPatch(p,a,b,c) (p)->lpVtbl->DrawRectPatch(p,a,b,c) +#define IDirect3DDevice9_DrawTriPatch(p,a,b,c) (p)->lpVtbl->DrawTriPatch(p,a,b,c) +#define IDirect3DDevice9_DeletePatch(p,a) (p)->lpVtbl->DeletePatch(p,a) +#define IDirect3DDevice9_CreateQuery(p,a,b) (p)->lpVtbl->CreateQuery(p,a,b) + +typedef struct IDirect3DDevice9ExVtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DDevice9Ex *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DDevice9Ex *This); + ULONG (WINAPI *Release)(IDirect3DDevice9Ex *This); + /* IDirect3DDevice9 */ + HRESULT (WINAPI *TestCooperativeLevel)(IDirect3DDevice9Ex *This); + UINT (WINAPI *GetAvailableTextureMem)(IDirect3DDevice9Ex *This); + HRESULT (WINAPI *EvictManagedResources)(IDirect3DDevice9Ex *This); + HRESULT (WINAPI *GetDirect3D)(IDirect3DDevice9Ex *This, IDirect3D9 **ppD3D9); + HRESULT (WINAPI *GetDeviceCaps)(IDirect3DDevice9Ex *This, D3DCAPS9 *pCaps); + HRESULT (WINAPI *GetDisplayMode)(IDirect3DDevice9Ex *This, UINT iSwapChain, D3DDISPLAYMODE *pMode); + HRESULT (WINAPI *GetCreationParameters)(IDirect3DDevice9Ex *This, D3DDEVICE_CREATION_PARAMETERS *pParameters); + HRESULT (WINAPI *SetCursorProperties)(IDirect3DDevice9Ex *This, UINT XHotSpot, UINT YHotSpot, IDirect3DSurface9 *pCursorBitmap); + void (WINAPI *SetCursorPosition)(IDirect3DDevice9Ex *This, int X, int Y, DWORD Flags); + BOOL (WINAPI *ShowCursor)(IDirect3DDevice9Ex *This, BOOL bShow); + HRESULT (WINAPI *CreateAdditionalSwapChain)(IDirect3DDevice9Ex *This, D3DPRESENT_PARAMETERS *pPresentationParameters, IDirect3DSwapChain9 **pSwapChain); + HRESULT (WINAPI *GetSwapChain)(IDirect3DDevice9Ex *This, UINT iSwapChain, IDirect3DSwapChain9 **pSwapChain); + UINT (WINAPI *GetNumberOfSwapChains)(IDirect3DDevice9Ex *This); + HRESULT (WINAPI *Reset)(IDirect3DDevice9Ex *This, D3DPRESENT_PARAMETERS *pPresentationParameters); + HRESULT (WINAPI *Present)(IDirect3DDevice9Ex *This, const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion); + HRESULT (WINAPI *GetBackBuffer)(IDirect3DDevice9Ex *This, UINT iSwapChain, UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, IDirect3DSurface9 **ppBackBuffer); + HRESULT (WINAPI *GetRasterStatus)(IDirect3DDevice9Ex *This, UINT iSwapChain, D3DRASTER_STATUS *pRasterStatus); + HRESULT (WINAPI *SetDialogBoxMode)(IDirect3DDevice9Ex *This, BOOL bEnableDialogs); + void (WINAPI *SetGammaRamp)(IDirect3DDevice9Ex *This, UINT iSwapChain, DWORD Flags, const D3DGAMMARAMP *pRamp); + void (WINAPI *GetGammaRamp)(IDirect3DDevice9Ex *This, UINT iSwapChain, D3DGAMMARAMP *pRamp); + HRESULT (WINAPI *CreateTexture)(IDirect3DDevice9Ex *This, UINT Width, UINT Height, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DTexture9 **ppTexture, HANDLE *pSharedHandle); + HRESULT (WINAPI *CreateVolumeTexture)(IDirect3DDevice9Ex *This, UINT Width, UINT Height, UINT Depth, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DVolumeTexture9 **ppVolumeTexture, HANDLE *pSharedHandle); + HRESULT (WINAPI *CreateCubeTexture)(IDirect3DDevice9Ex *This, UINT EdgeLength, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DCubeTexture9 **ppCubeTexture, HANDLE *pSharedHandle); + HRESULT (WINAPI *CreateVertexBuffer)(IDirect3DDevice9Ex *This, UINT Length, DWORD Usage, DWORD FVF, D3DPOOL Pool, IDirect3DVertexBuffer9 **ppVertexBuffer, HANDLE *pSharedHandle); + HRESULT (WINAPI *CreateIndexBuffer)(IDirect3DDevice9Ex *This, UINT Length, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DIndexBuffer9 **ppIndexBuffer, HANDLE *pSharedHandle); + HRESULT (WINAPI *CreateRenderTarget)(IDirect3DDevice9Ex *This, UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Lockable, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle); + HRESULT (WINAPI *CreateDepthStencilSurface)(IDirect3DDevice9Ex *This, UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Discard, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle); + HRESULT (WINAPI *UpdateSurface)(IDirect3DDevice9Ex *This, IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, IDirect3DSurface9 *pDestinationSurface, const POINT *pDestPoint); + HRESULT (WINAPI *UpdateTexture)(IDirect3DDevice9Ex *This, IDirect3DBaseTexture9 *pSourceTexture, IDirect3DBaseTexture9 *pDestinationTexture); + HRESULT (WINAPI *GetRenderTargetData)(IDirect3DDevice9Ex *This, IDirect3DSurface9 *pRenderTarget, IDirect3DSurface9 *pDestSurface); + HRESULT (WINAPI *GetFrontBufferData)(IDirect3DDevice9Ex *This, UINT iSwapChain, IDirect3DSurface9 *pDestSurface); + HRESULT (WINAPI *StretchRect)(IDirect3DDevice9Ex *This, IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, IDirect3DSurface9 *pDestSurface, const RECT *pDestRect, D3DTEXTUREFILTERTYPE Filter); + HRESULT (WINAPI *ColorFill)(IDirect3DDevice9Ex *This, IDirect3DSurface9 *pSurface, const RECT *pRect, D3DCOLOR color); + HRESULT (WINAPI *CreateOffscreenPlainSurface)(IDirect3DDevice9Ex *This, UINT Width, UINT Height, D3DFORMAT Format, D3DPOOL Pool, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle); + HRESULT (WINAPI *SetRenderTarget)(IDirect3DDevice9Ex *This, DWORD RenderTargetIndex, IDirect3DSurface9 *pRenderTarget); + HRESULT (WINAPI *GetRenderTarget)(IDirect3DDevice9Ex *This, DWORD RenderTargetIndex, IDirect3DSurface9 **ppRenderTarget); + HRESULT (WINAPI *SetDepthStencilSurface)(IDirect3DDevice9Ex *This, IDirect3DSurface9 *pNewZStencil); + HRESULT (WINAPI *GetDepthStencilSurface)(IDirect3DDevice9Ex *This, IDirect3DSurface9 **ppZStencilSurface); + HRESULT (WINAPI *BeginScene)(IDirect3DDevice9Ex *This); + HRESULT (WINAPI *EndScene)(IDirect3DDevice9Ex *This); + HRESULT (WINAPI *Clear)(IDirect3DDevice9Ex *This, DWORD Count, const D3DRECT *pRects, DWORD Flags, D3DCOLOR Color, float Z, DWORD Stencil); + HRESULT (WINAPI *SetTransform)(IDirect3DDevice9Ex *This, D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix); + HRESULT (WINAPI *GetTransform)(IDirect3DDevice9Ex *This, D3DTRANSFORMSTATETYPE State, D3DMATRIX *pMatrix); + HRESULT (WINAPI *MultiplyTransform)(IDirect3DDevice9Ex *This, D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix); + HRESULT (WINAPI *SetViewport)(IDirect3DDevice9Ex *This, const D3DVIEWPORT9 *pViewport); + HRESULT (WINAPI *GetViewport)(IDirect3DDevice9Ex *This, D3DVIEWPORT9 *pViewport); + HRESULT (WINAPI *SetMaterial)(IDirect3DDevice9Ex *This, const D3DMATERIAL9 *pMaterial); + HRESULT (WINAPI *GetMaterial)(IDirect3DDevice9Ex *This, D3DMATERIAL9 *pMaterial); + HRESULT (WINAPI *SetLight)(IDirect3DDevice9Ex *This, DWORD Index, const D3DLIGHT9 *pLight); + HRESULT (WINAPI *GetLight)(IDirect3DDevice9Ex *This, DWORD Index, D3DLIGHT9 *pLight); + HRESULT (WINAPI *LightEnable)(IDirect3DDevice9Ex *This, DWORD Index, BOOL Enable); + HRESULT (WINAPI *GetLightEnable)(IDirect3DDevice9Ex *This, DWORD Index, BOOL *pEnable); + HRESULT (WINAPI *SetClipPlane)(IDirect3DDevice9Ex *This, DWORD Index, const float *pPlane); + HRESULT (WINAPI *GetClipPlane)(IDirect3DDevice9Ex *This, DWORD Index, float *pPlane); + HRESULT (WINAPI *SetRenderState)(IDirect3DDevice9Ex *This, D3DRENDERSTATETYPE State, DWORD Value); + HRESULT (WINAPI *GetRenderState)(IDirect3DDevice9Ex *This, D3DRENDERSTATETYPE State, DWORD *pValue); + HRESULT (WINAPI *CreateStateBlock)(IDirect3DDevice9Ex *This, D3DSTATEBLOCKTYPE Type, IDirect3DStateBlock9 **ppSB); + HRESULT (WINAPI *BeginStateBlock)(IDirect3DDevice9Ex *This); + HRESULT (WINAPI *EndStateBlock)(IDirect3DDevice9Ex *This, IDirect3DStateBlock9 **ppSB); + HRESULT (WINAPI *SetClipStatus)(IDirect3DDevice9Ex *This, const D3DCLIPSTATUS9 *pClipStatus); + HRESULT (WINAPI *GetClipStatus)(IDirect3DDevice9Ex *This, D3DCLIPSTATUS9 *pClipStatus); + HRESULT (WINAPI *GetTexture)(IDirect3DDevice9Ex *This, DWORD Stage, IDirect3DBaseTexture9 **ppTexture); + HRESULT (WINAPI *SetTexture)(IDirect3DDevice9Ex *This, DWORD Stage, IDirect3DBaseTexture9 *pTexture); + HRESULT (WINAPI *GetTextureStageState)(IDirect3DDevice9Ex *This, DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD *pValue); + HRESULT (WINAPI *SetTextureStageState)(IDirect3DDevice9Ex *This, DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD Value); + HRESULT (WINAPI *GetSamplerState)(IDirect3DDevice9Ex *This, DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD *pValue); + HRESULT (WINAPI *SetSamplerState)(IDirect3DDevice9Ex *This, DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD Value); + HRESULT (WINAPI *ValidateDevice)(IDirect3DDevice9Ex *This, DWORD *pNumPasses); + HRESULT (WINAPI *SetPaletteEntries)(IDirect3DDevice9Ex *This, UINT PaletteNumber, const PALETTEENTRY *pEntries); + HRESULT (WINAPI *GetPaletteEntries)(IDirect3DDevice9Ex *This, UINT PaletteNumber, PALETTEENTRY *pEntries); + HRESULT (WINAPI *SetCurrentTexturePalette)(IDirect3DDevice9Ex *This, UINT PaletteNumber); + HRESULT (WINAPI *GetCurrentTexturePalette)(IDirect3DDevice9Ex *This, UINT *PaletteNumber); + HRESULT (WINAPI *SetScissorRect)(IDirect3DDevice9Ex *This, const RECT *pRect); + HRESULT (WINAPI *GetScissorRect)(IDirect3DDevice9Ex *This, RECT *pRect); + HRESULT (WINAPI *SetSoftwareVertexProcessing)(IDirect3DDevice9Ex *This, BOOL bSoftware); + BOOL (WINAPI *GetSoftwareVertexProcessing)(IDirect3DDevice9Ex *This); + HRESULT (WINAPI *SetNPatchMode)(IDirect3DDevice9Ex *This, float nSegments); + float (WINAPI *GetNPatchMode)(IDirect3DDevice9Ex *This); + HRESULT (WINAPI *DrawPrimitive)(IDirect3DDevice9Ex *This, D3DPRIMITIVETYPE PrimitiveType, UINT StartVertex, UINT PrimitiveCount); + HRESULT (WINAPI *DrawIndexedPrimitive)(IDirect3DDevice9Ex *This, D3DPRIMITIVETYPE PrimitiveType, INT BaseVertexIndex, UINT MinVertexIndex, UINT NumVertices, UINT startIndex, UINT primCount); + HRESULT (WINAPI *DrawPrimitiveUP)(IDirect3DDevice9Ex *This, D3DPRIMITIVETYPE PrimitiveType, UINT PrimitiveCount, const void *pVertexStreamZeroData, UINT VertexStreamZeroStride); + HRESULT (WINAPI *DrawIndexedPrimitiveUP)(IDirect3DDevice9Ex *This, D3DPRIMITIVETYPE PrimitiveType, UINT MinVertexIndex, UINT NumVertices, UINT PrimitiveCount, const void *pIndexData, D3DFORMAT IndexDataFormat, const void *pVertexStreamZeroData, UINT VertexStreamZeroStride); + HRESULT (WINAPI *ProcessVertices)(IDirect3DDevice9Ex *This, UINT SrcStartIndex, UINT DestIndex, UINT VertexCount, IDirect3DVertexBuffer9 *pDestBuffer, IDirect3DVertexDeclaration9 *pVertexDecl, DWORD Flags); + HRESULT (WINAPI *CreateVertexDeclaration)(IDirect3DDevice9Ex *This, const D3DVERTEXELEMENT9 *pVertexElements, IDirect3DVertexDeclaration9 **ppDecl); + HRESULT (WINAPI *SetVertexDeclaration)(IDirect3DDevice9Ex *This, IDirect3DVertexDeclaration9 *pDecl); + HRESULT (WINAPI *GetVertexDeclaration)(IDirect3DDevice9Ex *This, IDirect3DVertexDeclaration9 **ppDecl); + HRESULT (WINAPI *SetFVF)(IDirect3DDevice9Ex *This, DWORD FVF); + HRESULT (WINAPI *GetFVF)(IDirect3DDevice9Ex *This, DWORD *pFVF); + HRESULT (WINAPI *CreateVertexShader)(IDirect3DDevice9Ex *This, const DWORD *pFunction, IDirect3DVertexShader9 **ppShader); + HRESULT (WINAPI *SetVertexShader)(IDirect3DDevice9Ex *This, IDirect3DVertexShader9 *pShader); + HRESULT (WINAPI *GetVertexShader)(IDirect3DDevice9Ex *This, IDirect3DVertexShader9 **ppShader); + HRESULT (WINAPI *SetVertexShaderConstantF)(IDirect3DDevice9Ex *This, UINT StartRegister, const float *pConstantData, UINT Vector4fCount); + HRESULT (WINAPI *GetVertexShaderConstantF)(IDirect3DDevice9Ex *This, UINT StartRegister, float *pConstantData, UINT Vector4fCount); + HRESULT (WINAPI *SetVertexShaderConstantI)(IDirect3DDevice9Ex *This, UINT StartRegister, const int *pConstantData, UINT Vector4iCount); + HRESULT (WINAPI *GetVertexShaderConstantI)(IDirect3DDevice9Ex *This, UINT StartRegister, int *pConstantData, UINT Vector4iCount); + HRESULT (WINAPI *SetVertexShaderConstantB)(IDirect3DDevice9Ex *This, UINT StartRegister, const BOOL *pConstantData, UINT BoolCount); + HRESULT (WINAPI *GetVertexShaderConstantB)(IDirect3DDevice9Ex *This, UINT StartRegister, BOOL *pConstantData, UINT BoolCount); + HRESULT (WINAPI *SetStreamSource)(IDirect3DDevice9Ex *This, UINT StreamNumber, IDirect3DVertexBuffer9 *pStreamData, UINT OffsetInBytes, UINT Stride); + HRESULT (WINAPI *GetStreamSource)(IDirect3DDevice9Ex *This, UINT StreamNumber, IDirect3DVertexBuffer9 **ppStreamData, UINT *pOffsetInBytes, UINT *pStride); + HRESULT (WINAPI *SetStreamSourceFreq)(IDirect3DDevice9Ex *This, UINT StreamNumber, UINT Setting); + HRESULT (WINAPI *GetStreamSourceFreq)(IDirect3DDevice9Ex *This, UINT StreamNumber, UINT *pSetting); + HRESULT (WINAPI *SetIndices)(IDirect3DDevice9Ex *This, IDirect3DIndexBuffer9 *pIndexData); + HRESULT (WINAPI *GetIndices)(IDirect3DDevice9Ex *This, IDirect3DIndexBuffer9 **ppIndexData); + HRESULT (WINAPI *CreatePixelShader)(IDirect3DDevice9Ex *This, const DWORD *pFunction, IDirect3DPixelShader9 **ppShader); + HRESULT (WINAPI *SetPixelShader)(IDirect3DDevice9Ex *This, IDirect3DPixelShader9 *pShader); + HRESULT (WINAPI *GetPixelShader)(IDirect3DDevice9Ex *This, IDirect3DPixelShader9 **ppShader); + HRESULT (WINAPI *SetPixelShaderConstantF)(IDirect3DDevice9Ex *This, UINT StartRegister, const float *pConstantData, UINT Vector4fCount); + HRESULT (WINAPI *GetPixelShaderConstantF)(IDirect3DDevice9Ex *This, UINT StartRegister, float *pConstantData, UINT Vector4fCount); + HRESULT (WINAPI *SetPixelShaderConstantI)(IDirect3DDevice9Ex *This, UINT StartRegister, const int *pConstantData, UINT Vector4iCount); + HRESULT (WINAPI *GetPixelShaderConstantI)(IDirect3DDevice9Ex *This, UINT StartRegister, int *pConstantData, UINT Vector4iCount); + HRESULT (WINAPI *SetPixelShaderConstantB)(IDirect3DDevice9Ex *This, UINT StartRegister, const BOOL *pConstantData, UINT BoolCount); + HRESULT (WINAPI *GetPixelShaderConstantB)(IDirect3DDevice9Ex *This, UINT StartRegister, BOOL *pConstantData, UINT BoolCount); + HRESULT (WINAPI *DrawRectPatch)(IDirect3DDevice9Ex *This, UINT Handle, const float *pNumSegs, const D3DRECTPATCH_INFO *pRectPatchInfo); + HRESULT (WINAPI *DrawTriPatch)(IDirect3DDevice9Ex *This, UINT Handle, const float *pNumSegs, const D3DTRIPATCH_INFO *pTriPatchInfo); + HRESULT (WINAPI *DeletePatch)(IDirect3DDevice9Ex *This, UINT Handle); + HRESULT (WINAPI *CreateQuery)(IDirect3DDevice9Ex *This, D3DQUERYTYPE Type, IDirect3DQuery9 **ppQuery); + /* IDirect3DDevice9Ex */ + HRESULT (WINAPI *SetConvolutionMonoKernel)(IDirect3DDevice9Ex *This, UINT width, UINT height, float *rows, float *columns); + HRESULT (WINAPI *ComposeRects)(IDirect3DDevice9Ex *This, IDirect3DSurface9 *pSrc, IDirect3DSurface9 *pDst, IDirect3DVertexBuffer9 *pSrcRectDescs, UINT NumRects, IDirect3DVertexBuffer9 *pDstRectDescs, D3DCOMPOSERECTSOP Operation, int Xoffset, int Yoffset); + HRESULT (WINAPI *PresentEx)(IDirect3DDevice9Ex *This, const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion, DWORD dwFlags); + HRESULT (WINAPI *GetGPUThreadPriority)(IDirect3DDevice9Ex *This, INT *pPriority); + HRESULT (WINAPI *SetGPUThreadPriority)(IDirect3DDevice9Ex *This, INT Priority); + HRESULT (WINAPI *WaitForVBlank)(IDirect3DDevice9Ex *This, UINT iSwapChain); + HRESULT (WINAPI *CheckResourceResidency)(IDirect3DDevice9Ex *This, IDirect3DResource9 **pResourceArray, UINT32 NumResources); + HRESULT (WINAPI *SetMaximumFrameLatency)(IDirect3DDevice9Ex *This, UINT MaxLatency); + HRESULT (WINAPI *GetMaximumFrameLatency)(IDirect3DDevice9Ex *This, UINT *pMaxLatency); + HRESULT (WINAPI *CheckDeviceState)(IDirect3DDevice9Ex *This, HWND hDestinationWindow); + HRESULT (WINAPI *CreateRenderTargetEx)(IDirect3DDevice9Ex *This, UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Lockable, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle, DWORD Usage); + HRESULT (WINAPI *CreateOffscreenPlainSurfaceEx)(IDirect3DDevice9Ex *This, UINT Width, UINT Height, D3DFORMAT Format, D3DPOOL Pool, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle, DWORD Usage); + HRESULT (WINAPI *CreateDepthStencilSurfaceEx)(IDirect3DDevice9Ex *This, UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Discard, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle, DWORD Usage); + HRESULT (WINAPI *ResetEx)(IDirect3DDevice9Ex *This, D3DPRESENT_PARAMETERS *pPresentationParameters, D3DDISPLAYMODEEX *pFullscreenDisplayMode); + HRESULT (WINAPI *GetDisplayModeEx)(IDirect3DDevice9Ex *This, UINT iSwapChain, D3DDISPLAYMODEEX *pMode, D3DDISPLAYROTATION *pRotation); +} IDirect3DDevice9ExVtbl; +struct IDirect3DDevice9Ex +{ + IDirect3DDevice9ExVtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DDevice9Ex_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DDevice9Ex_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DDevice9Ex_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DDevice9 macros */ +#define IDirect3DDevice9Ex_TestCooperativeLevel(p) (p)->lpVtbl->TestCooperativeLevel(p) +#define IDirect3DDevice9Ex_GetAvailableTextureMem(p) (p)->lpVtbl->GetAvailableTextureMem(p) +#define IDirect3DDevice9Ex_EvictManagedResources(p) (p)->lpVtbl->EvictManagedResources(p) +#define IDirect3DDevice9Ex_GetDirect3D(p,a) (p)->lpVtbl->GetDirect3D(p,a) +#define IDirect3DDevice9Ex_GetDeviceCaps(p,a) (p)->lpVtbl->GetDeviceCaps(p,a) +#define IDirect3DDevice9Ex_GetDisplayMode(p,a,b) (p)->lpVtbl->GetDisplayMode(p,a,b) +#define IDirect3DDevice9Ex_GetCreationParameters(p,a) (p)->lpVtbl->GetCreationParameters(p,a) +#define IDirect3DDevice9Ex_SetCursorProperties(p,a,b,c) (p)->lpVtbl->SetCursorProperties(p,a,b,c) +#define IDirect3DDevice9Ex_SetCursorPosition(p,a,b,c) (p)->lpVtbl->SetCursorPosition(p,a,b,c) +#define IDirect3DDevice9Ex_ShowCursor(p,a) (p)->lpVtbl->ShowCursor(p,a) +#define IDirect3DDevice9Ex_CreateAdditionalSwapChain(p,a,b) (p)->lpVtbl->CreateAdditionalSwapChain(p,a,b) +#define IDirect3DDevice9Ex_GetSwapChain(p,a,b) (p)->lpVtbl->GetSwapChain(p,a,b) +#define IDirect3DDevice9Ex_GetNumberOfSwapChains(p) (p)->lpVtbl->GetNumberOfSwapChains(p) +#define IDirect3DDevice9Ex_Reset(p,a) (p)->lpVtbl->Reset(p,a) +#define IDirect3DDevice9Ex_Present(p,a,b,c,d) (p)->lpVtbl->Present(p,a,b,c,d) +#define IDirect3DDevice9Ex_GetBackBuffer(p,a,b,c,d) (p)->lpVtbl->GetBackBuffer(p,a,b,c,d) +#define IDirect3DDevice9Ex_GetRasterStatus(p,a,b) (p)->lpVtbl->GetRasterStatus(p,a,b) +#define IDirect3DDevice9Ex_SetDialogBoxMode(p,a) (p)->lpVtbl->SetDialogBoxMode(p,a) +#define IDirect3DDevice9Ex_SetGammaRamp(p,a,b,c) (p)->lpVtbl->SetGammaRamp(p,a,b,c) +#define IDirect3DDevice9Ex_GetGammaRamp(p,a,b) (p)->lpVtbl->GetGammaRamp(p,a,b) +#define IDirect3DDevice9Ex_CreateTexture(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->CreateTexture(p,a,b,c,d,e,f,g,h) +#define IDirect3DDevice9Ex_CreateVolumeTexture(p,a,b,c,d,e,f,g,h,i) (p)->lpVtbl->CreateVolumeTexture(p,a,b,c,d,e,f,g,h,i) +#define IDirect3DDevice9Ex_CreateCubeTexture(p,a,b,c,d,e,f,g) (p)->lpVtbl->CreateCubeTexture(p,a,b,c,d,e,f,g) +#define IDirect3DDevice9Ex_CreateVertexBuffer(p,a,b,c,d,e,f) (p)->lpVtbl->CreateVertexBuffer(p,a,b,c,d,e,f) +#define IDirect3DDevice9Ex_CreateIndexBuffer(p,a,b,c,d,e,f) (p)->lpVtbl->CreateIndexBuffer(p,a,b,c,d,e,f) +#define IDirect3DDevice9Ex_CreateRenderTarget(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->CreateRenderTarget(p,a,b,c,d,e,f,g,h) +#define IDirect3DDevice9Ex_CreateDepthStencilSurface(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->CreateDepthStencilSurface(p,a,b,c,d,e,f,g,h) +#define IDirect3DDevice9Ex_UpdateSurface(p,a,b,c,d) (p)->lpVtbl->UpdateSurface(p,a,b,c,d) +#define IDirect3DDevice9Ex_UpdateTexture(p,a,b) (p)->lpVtbl->UpdateTexture(p,a,b) +#define IDirect3DDevice9Ex_GetRenderTargetData(p,a,b) (p)->lpVtbl->GetRenderTargetData(p,a,b) +#define IDirect3DDevice9Ex_GetFrontBufferData(p,a,b) (p)->lpVtbl->GetFrontBufferData(p,a,b) +#define IDirect3DDevice9Ex_StretchRect(p,a,b,c,d,e) (p)->lpVtbl->StretchRect(p,a,b,c,d,e) +#define IDirect3DDevice9Ex_ColorFill(p,a,b,c) (p)->lpVtbl->ColorFill(p,a,b,c) +#define IDirect3DDevice9Ex_CreateOffscreenPlainSurface(p,a,b,c,d,e,f) (p)->lpVtbl->CreateOffscreenPlainSurface(p,a,b,c,d,e,f) +#define IDirect3DDevice9Ex_SetRenderTarget(p,a,b) (p)->lpVtbl->SetRenderTarget(p,a,b) +#define IDirect3DDevice9Ex_GetRenderTarget(p,a,b) (p)->lpVtbl->GetRenderTarget(p,a,b) +#define IDirect3DDevice9Ex_SetDepthStencilSurface(p,a) (p)->lpVtbl->SetDepthStencilSurface(p,a) +#define IDirect3DDevice9Ex_GetDepthStencilSurface(p,a) (p)->lpVtbl->GetDepthStencilSurface(p,a) +#define IDirect3DDevice9Ex_BeginScene(p) (p)->lpVtbl->BeginScene(p) +#define IDirect3DDevice9Ex_EndScene(p) (p)->lpVtbl->EndScene(p) +#define IDirect3DDevice9Ex_Clear(p,a,b,c,d,e,f) (p)->lpVtbl->Clear(p,a,b,c,d,e,f) +#define IDirect3DDevice9Ex_SetTransform(p,a,b) (p)->lpVtbl->SetTransform(p,a,b) +#define IDirect3DDevice9Ex_GetTransform(p,a,b) (p)->lpVtbl->GetTransform(p,a,b) +#define IDirect3DDevice9Ex_MultiplyTransform(p,a,b) (p)->lpVtbl->MultiplyTransform(p,a,b) +#define IDirect3DDevice9Ex_SetViewport(p,a) (p)->lpVtbl->SetViewport(p,a) +#define IDirect3DDevice9Ex_GetViewport(p,a) (p)->lpVtbl->GetViewport(p,a) +#define IDirect3DDevice9Ex_SetMaterial(p,a) (p)->lpVtbl->SetMaterial(p,a) +#define IDirect3DDevice9Ex_GetMaterial(p,a) (p)->lpVtbl->GetMaterial(p,a) +#define IDirect3DDevice9Ex_SetLight(p,a,b) (p)->lpVtbl->SetLight(p,a,b) +#define IDirect3DDevice9Ex_GetLight(p,a,b) (p)->lpVtbl->GetLight(p,a,b) +#define IDirect3DDevice9Ex_LightEnable(p,a,b) (p)->lpVtbl->LightEnable(p,a,b) +#define IDirect3DDevice9Ex_GetLightEnable(p,a,b) (p)->lpVtbl->GetLightEnable(p,a,b) +#define IDirect3DDevice9Ex_SetClipPlane(p,a,b) (p)->lpVtbl->SetClipPlane(p,a,b) +#define IDirect3DDevice9Ex_GetClipPlane(p,a,b) (p)->lpVtbl->GetClipPlane(p,a,b) +#define IDirect3DDevice9Ex_SetRenderState(p,a,b) (p)->lpVtbl->SetRenderState(p,a,b) +#define IDirect3DDevice9Ex_GetRenderState(p,a,b) (p)->lpVtbl->GetRenderState(p,a,b) +#define IDirect3DDevice9Ex_CreateStateBlock(p,a,b) (p)->lpVtbl->CreateStateBlock(p,a,b) +#define IDirect3DDevice9Ex_BeginStateBlock(p) (p)->lpVtbl->BeginStateBlock(p) +#define IDirect3DDevice9Ex_EndStateBlock(p,a) (p)->lpVtbl->EndStateBlock(p,a) +#define IDirect3DDevice9Ex_SetClipStatus(p,a) (p)->lpVtbl->SetClipStatus(p,a) +#define IDirect3DDevice9Ex_GetClipStatus(p,a) (p)->lpVtbl->GetClipStatus(p,a) +#define IDirect3DDevice9Ex_GetTexture(p,a,b) (p)->lpVtbl->GetTexture(p,a,b) +#define IDirect3DDevice9Ex_SetTexture(p,a,b) (p)->lpVtbl->SetTexture(p,a,b) +#define IDirect3DDevice9Ex_GetTextureStageState(p,a,b,c) (p)->lpVtbl->GetTextureStageState(p,a,b,c) +#define IDirect3DDevice9Ex_SetTextureStageState(p,a,b,c) (p)->lpVtbl->SetTextureStageState(p,a,b,c) +#define IDirect3DDevice9Ex_GetSamplerState(p,a,b,c) (p)->lpVtbl->GetSamplerState(p,a,b,c) +#define IDirect3DDevice9Ex_SetSamplerState(p,a,b,c) (p)->lpVtbl->SetSamplerState(p,a,b,c) +#define IDirect3DDevice9Ex_ValidateDevice(p,a) (p)->lpVtbl->ValidateDevice(p,a) +#define IDirect3DDevice9Ex_SetPaletteEntries(p,a,b) (p)->lpVtbl->SetPaletteEntries(p,a,b) +#define IDirect3DDevice9Ex_GetPaletteEntries(p,a,b) (p)->lpVtbl->GetPaletteEntries(p,a,b) +#define IDirect3DDevice9Ex_SetCurrentTexturePalette(p,a) (p)->lpVtbl->SetCurrentTexturePalette(p,a) +#define IDirect3DDevice9Ex_GetCurrentTexturePalette(p,a) (p)->lpVtbl->GetCurrentTexturePalette(p,a) +#define IDirect3DDevice9Ex_SetScissorRect(p,a) (p)->lpVtbl->SetScissorRect(p,a) +#define IDirect3DDevice9Ex_GetScissorRect(p,a) (p)->lpVtbl->GetScissorRect(p,a) +#define IDirect3DDevice9Ex_SetSoftwareVertexProcessing(p,a) (p)->lpVtbl->SetSoftwareVertexProcessing(p,a) +#define IDirect3DDevice9Ex_GetSoftwareVertexProcessing(p) (p)->lpVtbl->GetSoftwareVertexProcessing(p) +#define IDirect3DDevice9Ex_SetNPatchMode(p,a) (p)->lpVtbl->SetNPatchMode(p,a) +#define IDirect3DDevice9Ex_GetNPatchMode(p) (p)->lpVtbl->GetNPatchMode(p) +#define IDirect3DDevice9Ex_DrawPrimitive(p,a,b,c) (p)->lpVtbl->DrawPrimitive(p,a,b,c) +#define IDirect3DDevice9Ex_DrawIndexedPrimitive(p,a,b,c,d,e,f) (p)->lpVtbl->DrawIndexedPrimitive(p,a,b,c,d,e,f) +#define IDirect3DDevice9Ex_DrawPrimitiveUP(p,a,b,c,d) (p)->lpVtbl->DrawPrimitiveUP(p,a,b,c,d) +#define IDirect3DDevice9Ex_DrawIndexedPrimitiveUP(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->DrawIndexedPrimitiveUP(p,a,b,c,d,e,f,g,h) +#define IDirect3DDevice9Ex_ProcessVertices(p,a,b,c,d,e,f) (p)->lpVtbl->ProcessVertices(p,a,b,c,d,e,f) +#define IDirect3DDevice9Ex_CreateVertexDeclaration(p,a,b) (p)->lpVtbl->CreateVertexDeclaration(p,a,b) +#define IDirect3DDevice9Ex_SetVertexDeclaration(p,a) (p)->lpVtbl->SetVertexDeclaration(p,a) +#define IDirect3DDevice9Ex_GetVertexDeclaration(p,a) (p)->lpVtbl->GetVertexDeclaration(p,a) +#define IDirect3DDevice9Ex_SetFVF(p,a) (p)->lpVtbl->SetFVF(p,a) +#define IDirect3DDevice9Ex_GetFVF(p,a) (p)->lpVtbl->GetFVF(p,a) +#define IDirect3DDevice9Ex_CreateVertexShader(p,a,b) (p)->lpVtbl->CreateVertexShader(p,a,b) +#define IDirect3DDevice9Ex_SetVertexShader(p,a) (p)->lpVtbl->SetVertexShader(p,a) +#define IDirect3DDevice9Ex_GetVertexShader(p,a) (p)->lpVtbl->GetVertexShader(p,a) +#define IDirect3DDevice9Ex_SetVertexShaderConstantF(p,a,b,c) (p)->lpVtbl->SetVertexShaderConstantF(p,a,b,c) +#define IDirect3DDevice9Ex_GetVertexShaderConstantF(p,a,b,c) (p)->lpVtbl->GetVertexShaderConstantF(p,a,b,c) +#define IDirect3DDevice9Ex_SetVertexShaderConstantI(p,a,b,c) (p)->lpVtbl->SetVertexShaderConstantI(p,a,b,c) +#define IDirect3DDevice9Ex_GetVertexShaderConstantI(p,a,b,c) (p)->lpVtbl->GetVertexShaderConstantI(p,a,b,c) +#define IDirect3DDevice9Ex_SetVertexShaderConstantB(p,a,b,c) (p)->lpVtbl->SetVertexShaderConstantB(p,a,b,c) +#define IDirect3DDevice9Ex_GetVertexShaderConstantB(p,a,b,c) (p)->lpVtbl->GetVertexShaderConstantB(p,a,b,c) +#define IDirect3DDevice9Ex_SetStreamSource(p,a,b,c,d) (p)->lpVtbl->SetStreamSource(p,a,b,c,d) +#define IDirect3DDevice9Ex_GetStreamSource(p,a,b,c,d) (p)->lpVtbl->GetStreamSource(p,a,b,c,d) +#define IDirect3DDevice9Ex_SetStreamSourceFreq(p,a,b) (p)->lpVtbl->SetStreamSourceFreq(p,a,b) +#define IDirect3DDevice9Ex_GetStreamSourceFreq(p,a,b) (p)->lpVtbl->GetStreamSourceFreq(p,a,b) +#define IDirect3DDevice9Ex_SetIndices(p,a) (p)->lpVtbl->SetIndices(p,a) +#define IDirect3DDevice9Ex_GetIndices(p,a) (p)->lpVtbl->GetIndices(p,a) +#define IDirect3DDevice9Ex_CreatePixelShader(p,a,b) (p)->lpVtbl->CreatePixelShader(p,a,b) +#define IDirect3DDevice9Ex_SetPixelShader(p,a) (p)->lpVtbl->SetPixelShader(p,a) +#define IDirect3DDevice9Ex_GetPixelShader(p,a) (p)->lpVtbl->GetPixelShader(p,a) +#define IDirect3DDevice9Ex_SetPixelShaderConstantF(p,a,b,c) (p)->lpVtbl->SetPixelShaderConstantF(p,a,b,c) +#define IDirect3DDevice9Ex_GetPixelShaderConstantF(p,a,b,c) (p)->lpVtbl->GetPixelShaderConstantF(p,a,b,c) +#define IDirect3DDevice9Ex_SetPixelShaderConstantI(p,a,b,c) (p)->lpVtbl->SetPixelShaderConstantI(p,a,b,c) +#define IDirect3DDevice9Ex_GetPixelShaderConstantI(p,a,b,c) (p)->lpVtbl->GetPixelShaderConstantI(p,a,b,c) +#define IDirect3DDevice9Ex_SetPixelShaderConstantB(p,a,b,c) (p)->lpVtbl->SetPixelShaderConstantB(p,a,b,c) +#define IDirect3DDevice9Ex_GetPixelShaderConstantB(p,a,b,c) (p)->lpVtbl->GetPixelShaderConstantB(p,a,b,c) +#define IDirect3DDevice9Ex_DrawRectPatch(p,a,b,c) (p)->lpVtbl->DrawRectPatch(p,a,b,c) +#define IDirect3DDevice9Ex_DrawTriPatch(p,a,b,c) (p)->lpVtbl->DrawTriPatch(p,a,b,c) +#define IDirect3DDevice9Ex_DeletePatch(p,a) (p)->lpVtbl->DeletePatch(p,a) +#define IDirect3DDevice9Ex_CreateQuery(p,a,b) (p)->lpVtbl->CreateQuery(p,a,b) +/* IDirect3DDevice9Ex macros */ +#define IDirect3DDevice9Ex_SetConvolutionMonoKernel(p,a,b,c,d) (p)->lpVtbl->SetConvolutionMonoKernel(p,a,b,c,d) +#define IDirect3DDevice9Ex_ComposeRects(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->ComposeRects(p,a,b,c,d,e,f,g,h) +#define IDirect3DDevice9Ex_PresentEx(p,a,b,c,d,e) (p)->lpVtbl->PresentEx(p,a,b,c,d,e) +#define IDirect3DDevice9Ex_GetGPUThreadPriority(p,a) (p)->lpVtbl->GetGPUThreadPriority(p,a) +#define IDirect3DDevice9Ex_SetGPUThreadPriority(p,a) (p)->lpVtbl->SetGPUThreadPriority(p,a) +#define IDirect3DDevice9Ex_WaitForVBlank(p,a) (p)->lpVtbl->WaitForVBlank(p,a) +#define IDirect3DDevice9Ex_CheckResourceResidency(p,a,b) (p)->lpVtbl->CheckResourceResidency(p,a,b) +#define IDirect3DDevice9Ex_SetMaximumFrameLatency(p,a) (p)->lpVtbl->SetMaximumFrameLatency(p,a) +#define IDirect3DDevice9Ex_GetMaximumFrameLatency(p,a) (p)->lpVtbl->GetMaximumFrameLatency(p,a) +#define IDirect3DDevice9Ex_CheckDeviceState(p,a) (p)->lpVtbl->CheckDeviceState(p,a) +#define IDirect3DDevice9Ex_CreateRenderTargetEx(p,a,b,c,d,e,f,g,h,i) (p)->lpVtbl->CreateRenderTargetEx(p,a,b,c,d,e,f,g,h,i) +#define IDirect3DDevice9Ex_CreateOffscreenPlainSurfaceEx(p,a,b,c,d,e,f,g) (p)->lpVtbl->CreateOffscreenPlainSurfaceEx(p,a,b,c,d,e,f,g) +#define IDirect3DDevice9Ex_CreateDepthStencilSurfaceEx(p,a,b,c,d,e,f,g,h,i) (p)->lpVtbl->CreateDepthStencilSurfaceEx(p,a,b,c,d,e,f,g,h,i) +#define IDirect3DDevice9Ex_ResetEx(p,a,b) (p)->lpVtbl->ResetEx(p,a,b) +#define IDirect3DDevice9Ex_GetDisplayModeEx(p,a,b,c) (p)->lpVtbl->GetDisplayModeEx(p,a,b,c) + +typedef struct IDirect3DDevice9VideoVtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DDevice9Video *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DDevice9Video *This); + ULONG (WINAPI *Release)(IDirect3DDevice9Video *This); + /* IDirect3DDevice9Video */ + HRESULT (WINAPI *GetContentProtectionCaps)(IDirect3DDevice9Video *This, const GUID *pCryptoType, const GUID *pDecodeProfile, D3DCONTENTPROTECTIONCAPS *pCaps); + HRESULT (WINAPI *CreateAuthenticatedChannel)(IDirect3DDevice9Video *This, D3DAUTHENTICATEDCHANNELTYPE ChannelType, IDirect3DAuthenticatedChannel9 **ppAuthenticatedChannel, HANDLE *pChannelHandle); + HRESULT (WINAPI *CreateCryptoSession)(IDirect3DDevice9Video *This, const GUID *pCryptoType, const GUID *pDecodeProfile, IDirect3DCryptoSession9 **ppCryptoSession, HANDLE *pCryptoHandle); +} IDirect3DDevice9VideoVtbl; +struct IDirect3DDevice9Video +{ + IDirect3DDevice9VideoVtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DDevice9Video_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DDevice9Video_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DDevice9Video_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DDevice9Video macros */ +#define IDirect3DDevice9Video_GetContentProtectionCaps(p,a,b,c) (p)->lpVtbl->GetContentProtectionCaps(p,a,b,c) +#define IDirect3DDevice9Video_CreateAuthenticatedChannel(p,a,b,c) (p)->lpVtbl->CreateAuthenticatedChannel(p,a,b,c) +#define IDirect3DDevice9Video_CreateCryptoSession(p,a,b,c,d) (p)->lpVtbl->CreateCryptoSession(p,a,b,c,d) + +typedef struct IDirect3DIndexBuffer9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DIndexBuffer9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DIndexBuffer9 *This); + ULONG (WINAPI *Release)(IDirect3DIndexBuffer9 *This); + /* IDirect3DResource9 */ + HRESULT (WINAPI *GetDevice)(IDirect3DIndexBuffer9 *This, IDirect3DDevice9 **ppDevice); + HRESULT (WINAPI *SetPrivateData)(IDirect3DIndexBuffer9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); + HRESULT (WINAPI *GetPrivateData)(IDirect3DIndexBuffer9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); + HRESULT (WINAPI *FreePrivateData)(IDirect3DIndexBuffer9 *This, REFGUID refguid); + DWORD (WINAPI *SetPriority)(IDirect3DIndexBuffer9 *This, DWORD PriorityNew); + DWORD (WINAPI *GetPriority)(IDirect3DIndexBuffer9 *This); + void (WINAPI *PreLoad)(IDirect3DIndexBuffer9 *This); + D3DRESOURCETYPE (WINAPI *GetType)(IDirect3DIndexBuffer9 *This); + /* IDirect3DIndexBuffer9 */ + HRESULT (WINAPI *Lock)(IDirect3DIndexBuffer9 *This, UINT OffsetToLock, UINT SizeToLock, void **ppbData, DWORD Flags); + HRESULT (WINAPI *Unlock)(IDirect3DIndexBuffer9 *This); + HRESULT (WINAPI *GetDesc)(IDirect3DIndexBuffer9 *This, D3DINDEXBUFFER_DESC *pDesc); +} IDirect3DIndexBuffer9Vtbl; +struct IDirect3DIndexBuffer9 +{ + IDirect3DIndexBuffer9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DIndexBuffer9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DIndexBuffer9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DIndexBuffer9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DResource9 macros */ +#define IDirect3DIndexBuffer9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) +#define IDirect3DIndexBuffer9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) +#define IDirect3DIndexBuffer9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) +#define IDirect3DIndexBuffer9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) +#define IDirect3DIndexBuffer9_SetPriority(p,a) (p)->lpVtbl->SetPriority(p,a) +#define IDirect3DIndexBuffer9_GetPriority(p) (p)->lpVtbl->GetPriority(p) +#define IDirect3DIndexBuffer9_PreLoad(p) (p)->lpVtbl->PreLoad(p) +#define IDirect3DIndexBuffer9_GetType(p) (p)->lpVtbl->GetType(p) +/* IDirect3DIndexBuffer9 macros */ +#define IDirect3DIndexBuffer9_Lock(p,a,b,c,d) (p)->lpVtbl->Lock(p,a,b,c,d) +#define IDirect3DIndexBuffer9_Unlock(p) (p)->lpVtbl->Unlock(p) +#define IDirect3DIndexBuffer9_GetDesc(p,a) (p)->lpVtbl->GetDesc(p,a) + +typedef struct IDirect3DPixelShader9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DPixelShader9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DPixelShader9 *This); + ULONG (WINAPI *Release)(IDirect3DPixelShader9 *This); + /* IDirect3DPixelShader9 */ + HRESULT (WINAPI *GetDevice)(IDirect3DPixelShader9 *This, IDirect3DDevice9 **ppDevice); + HRESULT (WINAPI *GetFunction)(IDirect3DPixelShader9 *This, void *pData, UINT *pSizeOfData); +} IDirect3DPixelShader9Vtbl; +struct IDirect3DPixelShader9 +{ + IDirect3DPixelShader9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DPixelShader9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DPixelShader9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DPixelShader9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DPixelShader9 macros */ +#define IDirect3DPixelShader9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) +#define IDirect3DPixelShader9_GetFunction(p,a,b) (p)->lpVtbl->GetFunction(p,a,b) + +typedef struct IDirect3DQuery9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DQuery9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DQuery9 *This); + ULONG (WINAPI *Release)(IDirect3DQuery9 *This); + /* IDirect3DQuery9 */ + HRESULT (WINAPI *GetDevice)(IDirect3DQuery9 *This, IDirect3DDevice9 **ppDevice); + D3DQUERYTYPE (WINAPI *GetType)(IDirect3DQuery9 *This); + DWORD (WINAPI *GetDataSize)(IDirect3DQuery9 *This); + HRESULT (WINAPI *Issue)(IDirect3DQuery9 *This, DWORD dwIssueFlags); + HRESULT (WINAPI *GetData)(IDirect3DQuery9 *This, void *pData, DWORD dwSize, DWORD dwGetDataFlags); +} IDirect3DQuery9Vtbl; +struct IDirect3DQuery9 +{ + IDirect3DQuery9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DQuery9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DQuery9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DQuery9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DQuery9 macros */ +#define IDirect3DQuery9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) +#define IDirect3DQuery9_GetType(p) (p)->lpVtbl->GetType(p) +#define IDirect3DQuery9_GetDataSize(p) (p)->lpVtbl->GetDataSize(p) +#define IDirect3DQuery9_Issue(p,a) (p)->lpVtbl->Issue(p,a) +#define IDirect3DQuery9_GetData(p,a,b,c) (p)->lpVtbl->GetData(p,a,b,c) + +typedef struct IDirect3DResource9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DResource9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DResource9 *This); + ULONG (WINAPI *Release)(IDirect3DResource9 *This); + /* IDirect3DResource9 */ + HRESULT (WINAPI *GetDevice)(IDirect3DResource9 *This, IDirect3DDevice9 **ppDevice); + HRESULT (WINAPI *SetPrivateData)(IDirect3DResource9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); + HRESULT (WINAPI *GetPrivateData)(IDirect3DResource9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); + HRESULT (WINAPI *FreePrivateData)(IDirect3DResource9 *This, REFGUID refguid); + DWORD (WINAPI *SetPriority)(IDirect3DResource9 *This, DWORD PriorityNew); + DWORD (WINAPI *GetPriority)(IDirect3DResource9 *This); + void (WINAPI *PreLoad)(IDirect3DResource9 *This); + D3DRESOURCETYPE (WINAPI *GetType)(IDirect3DResource9 *This); +} IDirect3DResource9Vtbl; +struct IDirect3DResource9 +{ + IDirect3DResource9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DResource9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DResource9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DResource9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DResource9 macros */ +#define IDirect3DResource9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) +#define IDirect3DResource9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) +#define IDirect3DResource9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) +#define IDirect3DResource9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) +#define IDirect3DResource9_SetPriority(p,a) (p)->lpVtbl->SetPriority(p,a) +#define IDirect3DResource9_GetPriority(p) (p)->lpVtbl->GetPriority(p) +#define IDirect3DResource9_PreLoad(p) (p)->lpVtbl->PreLoad(p) +#define IDirect3DResource9_GetType(p) (p)->lpVtbl->GetType(p) + +typedef struct IDirect3DStateBlock9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DStateBlock9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DStateBlock9 *This); + ULONG (WINAPI *Release)(IDirect3DStateBlock9 *This); + /* IDirect3DStateBlock9 */ + HRESULT (WINAPI *GetDevice)(IDirect3DStateBlock9 *This, IDirect3DDevice9 **ppDevice); + HRESULT (WINAPI *Capture)(IDirect3DStateBlock9 *This); + HRESULT (WINAPI *Apply)(IDirect3DStateBlock9 *This); +} IDirect3DStateBlock9Vtbl; +struct IDirect3DStateBlock9 +{ + IDirect3DStateBlock9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DStateBlock9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DStateBlock9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DStateBlock9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DStateBlock9 macros */ +#define IDirect3DStateBlock9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) +#define IDirect3DStateBlock9_Capture(p) (p)->lpVtbl->Capture(p) +#define IDirect3DStateBlock9_Apply(p) (p)->lpVtbl->Apply(p) + +typedef struct IDirect3DSurface9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DSurface9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DSurface9 *This); + ULONG (WINAPI *Release)(IDirect3DSurface9 *This); + /* IDirect3DResource9 */ + HRESULT (WINAPI *GetDevice)(IDirect3DSurface9 *This, IDirect3DDevice9 **ppDevice); + HRESULT (WINAPI *SetPrivateData)(IDirect3DSurface9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); + HRESULT (WINAPI *GetPrivateData)(IDirect3DSurface9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); + HRESULT (WINAPI *FreePrivateData)(IDirect3DSurface9 *This, REFGUID refguid); + DWORD (WINAPI *SetPriority)(IDirect3DSurface9 *This, DWORD PriorityNew); + DWORD (WINAPI *GetPriority)(IDirect3DSurface9 *This); + void (WINAPI *PreLoad)(IDirect3DSurface9 *This); + D3DRESOURCETYPE (WINAPI *GetType)(IDirect3DSurface9 *This); + /* IDirect3DSurface9 */ + HRESULT (WINAPI *GetContainer)(IDirect3DSurface9 *This, REFIID riid, void **ppContainer); + HRESULT (WINAPI *GetDesc)(IDirect3DSurface9 *This, D3DSURFACE_DESC *pDesc); + HRESULT (WINAPI *LockRect)(IDirect3DSurface9 *This, D3DLOCKED_RECT *pLockedRect, const RECT *pRect, DWORD Flags); + HRESULT (WINAPI *UnlockRect)(IDirect3DSurface9 *This); + HRESULT (WINAPI *GetDC)(IDirect3DSurface9 *This, HDC *phdc); + HRESULT (WINAPI *ReleaseDC)(IDirect3DSurface9 *This, HDC hdc); +} IDirect3DSurface9Vtbl; +struct IDirect3DSurface9 +{ + IDirect3DSurface9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DSurface9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DSurface9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DSurface9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DResource9 macros */ +#define IDirect3DSurface9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) +#define IDirect3DSurface9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) +#define IDirect3DSurface9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) +#define IDirect3DSurface9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) +#define IDirect3DSurface9_SetPriority(p,a) (p)->lpVtbl->SetPriority(p,a) +#define IDirect3DSurface9_GetPriority(p) (p)->lpVtbl->GetPriority(p) +#define IDirect3DSurface9_PreLoad(p) (p)->lpVtbl->PreLoad(p) +#define IDirect3DSurface9_GetType(p) (p)->lpVtbl->GetType(p) +/* IDirect3DSurface9 macros */ +#define IDirect3DSurface9_GetContainer(p,a,b) (p)->lpVtbl->GetContainer(p,a,b) +#define IDirect3DSurface9_GetDesc(p,a) (p)->lpVtbl->GetDesc(p,a) +#define IDirect3DSurface9_LockRect(p,a,b,c) (p)->lpVtbl->LockRect(p,a,b,c) +#define IDirect3DSurface9_UnlockRect(p) (p)->lpVtbl->UnlockRect(p) +#define IDirect3DSurface9_GetDC(p,a) (p)->lpVtbl->GetDC(p,a) +#define IDirect3DSurface9_ReleaseDC(p,a) (p)->lpVtbl->ReleaseDC(p,a) + +typedef struct IDirect3DSwapChain9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DSwapChain9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DSwapChain9 *This); + ULONG (WINAPI *Release)(IDirect3DSwapChain9 *This); + /* IDirect3DSwapChain9 */ + HRESULT (WINAPI *Present)(IDirect3DSwapChain9 *This, const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion, DWORD dwFlags); + HRESULT (WINAPI *GetFrontBufferData)(IDirect3DSwapChain9 *This, IDirect3DSurface9 *pDestSurface); + HRESULT (WINAPI *GetBackBuffer)(IDirect3DSwapChain9 *This, UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, IDirect3DSurface9 **ppBackBuffer); + HRESULT (WINAPI *GetRasterStatus)(IDirect3DSwapChain9 *This, D3DRASTER_STATUS *pRasterStatus); + HRESULT (WINAPI *GetDisplayMode)(IDirect3DSwapChain9 *This, D3DDISPLAYMODE *pMode); + HRESULT (WINAPI *GetDevice)(IDirect3DSwapChain9 *This, IDirect3DDevice9 **ppDevice); + HRESULT (WINAPI *GetPresentParameters)(IDirect3DSwapChain9 *This, D3DPRESENT_PARAMETERS *pPresentationParameters); +} IDirect3DSwapChain9Vtbl; +struct IDirect3DSwapChain9 +{ + IDirect3DSwapChain9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DSwapChain9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DSwapChain9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DSwapChain9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DSwapChain9 macros */ +#define IDirect3DSwapChain9_Present(p,a,b,c,d,e) (p)->lpVtbl->Present(p,a,b,c,d,e) +#define IDirect3DSwapChain9_GetFrontBufferData(p,a) (p)->lpVtbl->GetFrontBufferData(p,a) +#define IDirect3DSwapChain9_GetBackBuffer(p,a,b,c) (p)->lpVtbl->GetBackBuffer(p,a,b,c) +#define IDirect3DSwapChain9_GetRasterStatus(p,a) (p)->lpVtbl->GetRasterStatus(p,a) +#define IDirect3DSwapChain9_GetDisplayMode(p,a) (p)->lpVtbl->GetDisplayMode(p,a) +#define IDirect3DSwapChain9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) +#define IDirect3DSwapChain9_GetPresentParameters(p,a) (p)->lpVtbl->GetPresentParameters(p,a) + +typedef struct IDirect3DSwapChain9ExVtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DSwapChain9Ex *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DSwapChain9Ex *This); + ULONG (WINAPI *Release)(IDirect3DSwapChain9Ex *This); + /* IDirect3DSwapChain9 */ + HRESULT (WINAPI *Present)(IDirect3DSwapChain9Ex *This, const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion, DWORD dwFlags); + HRESULT (WINAPI *GetFrontBufferData)(IDirect3DSwapChain9Ex *This, IDirect3DSurface9 *pDestSurface); + HRESULT (WINAPI *GetBackBuffer)(IDirect3DSwapChain9Ex *This, UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, IDirect3DSurface9 **ppBackBuffer); + HRESULT (WINAPI *GetRasterStatus)(IDirect3DSwapChain9Ex *This, D3DRASTER_STATUS *pRasterStatus); + HRESULT (WINAPI *GetDisplayMode)(IDirect3DSwapChain9Ex *This, D3DDISPLAYMODE *pMode); + HRESULT (WINAPI *GetDevice)(IDirect3DSwapChain9Ex *This, IDirect3DDevice9 **ppDevice); + HRESULT (WINAPI *GetPresentParameters)(IDirect3DSwapChain9Ex *This, D3DPRESENT_PARAMETERS *pPresentationParameters); + /* IDirect3DSwapChain9Ex */ + HRESULT (WINAPI *GetLastPresentCount)(IDirect3DSwapChain9Ex *This, UINT *pLastPresentCount); + HRESULT (WINAPI *GetPresentStats)(IDirect3DSwapChain9Ex *This, D3DPRESENTSTATS *pPresentationStatistics); + HRESULT (WINAPI *GetDisplayModeEx)(IDirect3DSwapChain9Ex *This, D3DDISPLAYMODEEX *pMode, D3DDISPLAYROTATION *pRotation); +} IDirect3DSwapChain9ExVtbl; +struct IDirect3DSwapChain9Ex +{ + IDirect3DSwapChain9ExVtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DSwapChain9Ex_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DSwapChain9Ex_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DSwapChain9Ex_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DSwapChain9 macros */ +#define IDirect3DSwapChain9Ex_Present(p,a,b,c,d,e) (p)->lpVtbl->Present(p,a,b,c,d,e) +#define IDirect3DSwapChain9Ex_GetFrontBufferData(p,a) (p)->lpVtbl->GetFrontBufferData(p,a) +#define IDirect3DSwapChain9Ex_GetBackBuffer(p,a,b,c) (p)->lpVtbl->GetBackBuffer(p,a,b,c) +#define IDirect3DSwapChain9Ex_GetRasterStatus(p,a) (p)->lpVtbl->GetRasterStatus(p,a) +#define IDirect3DSwapChain9Ex_GetDisplayMode(p,a) (p)->lpVtbl->GetDisplayMode(p,a) +#define IDirect3DSwapChain9Ex_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) +#define IDirect3DSwapChain9Ex_GetPresentParameters(p,a) (p)->lpVtbl->GetPresentParameters(p,a) +/* IDirect3DSwapChain9Ex macros */ +#define IDirect3DSwapChain9Ex_GetLastPresentCount(p,a) (p)->lpVtbl->GetLastPresentCount(p,a) +#define IDirect3DSwapChain9Ex_GetPresentStats(p,a) (p)->lpVtbl->GetPresentStats(p,a) +#define IDirect3DSwapChain9Ex_GetDisplayModeEx(p,a,b) (p)->lpVtbl->GetDisplayModeEx(p,a,b) + +typedef struct IDirect3DTexture9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DTexture9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DTexture9 *This); + ULONG (WINAPI *Release)(IDirect3DTexture9 *This); + /* IDirect3DResource9 */ + HRESULT (WINAPI *GetDevice)(IDirect3DTexture9 *This, IDirect3DDevice9 **ppDevice); + HRESULT (WINAPI *SetPrivateData)(IDirect3DTexture9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); + HRESULT (WINAPI *GetPrivateData)(IDirect3DTexture9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); + HRESULT (WINAPI *FreePrivateData)(IDirect3DTexture9 *This, REFGUID refguid); + DWORD (WINAPI *SetPriority)(IDirect3DTexture9 *This, DWORD PriorityNew); + DWORD (WINAPI *GetPriority)(IDirect3DTexture9 *This); + void (WINAPI *PreLoad)(IDirect3DTexture9 *This); + D3DRESOURCETYPE (WINAPI *GetType)(IDirect3DTexture9 *This); + /* IDirect3DBaseTexture9 */ + DWORD (WINAPI *SetLOD)(IDirect3DTexture9 *This, DWORD LODNew); + DWORD (WINAPI *GetLOD)(IDirect3DTexture9 *This); + DWORD (WINAPI *GetLevelCount)(IDirect3DTexture9 *This); + HRESULT (WINAPI *SetAutoGenFilterType)(IDirect3DTexture9 *This, D3DTEXTUREFILTERTYPE FilterType); + D3DTEXTUREFILTERTYPE (WINAPI *GetAutoGenFilterType)(IDirect3DTexture9 *This); + void (WINAPI *GenerateMipSubLevels)(IDirect3DTexture9 *This); + /* IDirect3DTexture9 */ + HRESULT (WINAPI *GetLevelDesc)(IDirect3DTexture9 *This, UINT Level, D3DSURFACE_DESC *pDesc); + HRESULT (WINAPI *GetSurfaceLevel)(IDirect3DTexture9 *This, UINT Level, IDirect3DSurface9 **ppSurfaceLevel); + HRESULT (WINAPI *LockRect)(IDirect3DTexture9 *This, UINT Level, D3DLOCKED_RECT *pLockedRect, const RECT *pRect, DWORD Flags); + HRESULT (WINAPI *UnlockRect)(IDirect3DTexture9 *This, UINT Level); + HRESULT (WINAPI *AddDirtyRect)(IDirect3DTexture9 *This, const RECT *pDirtyRect); +} IDirect3DTexture9Vtbl; +struct IDirect3DTexture9 +{ + IDirect3DTexture9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DTexture9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DTexture9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DTexture9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DResource9 macros */ +#define IDirect3DTexture9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) +#define IDirect3DTexture9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) +#define IDirect3DTexture9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) +#define IDirect3DTexture9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) +#define IDirect3DTexture9_SetPriority(p,a) (p)->lpVtbl->SetPriority(p,a) +#define IDirect3DTexture9_GetPriority(p) (p)->lpVtbl->GetPriority(p) +#define IDirect3DTexture9_PreLoad(p) (p)->lpVtbl->PreLoad(p) +#define IDirect3DTexture9_GetType(p) (p)->lpVtbl->GetType(p) +/* IDirect3DBaseTexture9 macros */ +#define IDirect3DTexture9_SetLOD(p,a) (p)->lpVtbl->SetLOD(p,a) +#define IDirect3DTexture9_GetLOD(p) (p)->lpVtbl->GetLOD(p) +#define IDirect3DTexture9_GetLevelCount(p) (p)->lpVtbl->GetLevelCount(p) +#define IDirect3DTexture9_SetAutoGenFilterType(p,a) (p)->lpVtbl->SetAutoGenFilterType(p,a) +#define IDirect3DTexture9_GetAutoGenFilterType(p) (p)->lpVtbl->GetAutoGenFilterType(p) +#define IDirect3DTexture9_GenerateMipSubLevels(p) (p)->lpVtbl->GenerateMipSubLevels(p) +/* IDirect3DTexture9 macros */ +#define IDirect3DTexture9_GetLevelDesc(p,a,b) (p)->lpVtbl->GetLevelDesc(p,a,b) +#define IDirect3DTexture9_GetSurfaceLevel(p,a,b) (p)->lpVtbl->GetSurfaceLevel(p,a,b) +#define IDirect3DTexture9_LockRect(p,a,b,c,d) (p)->lpVtbl->LockRect(p,a,b,c,d) +#define IDirect3DTexture9_UnlockRect(p,a) (p)->lpVtbl->UnlockRect(p,a) +#define IDirect3DTexture9_AddDirtyRect(p,a) (p)->lpVtbl->AddDirtyRect(p,a) + +typedef struct IDirect3DVertexBuffer9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DVertexBuffer9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DVertexBuffer9 *This); + ULONG (WINAPI *Release)(IDirect3DVertexBuffer9 *This); + /* IDirect3DResource9 */ + HRESULT (WINAPI *GetDevice)(IDirect3DVertexBuffer9 *This, IDirect3DDevice9 **ppDevice); + HRESULT (WINAPI *SetPrivateData)(IDirect3DVertexBuffer9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); + HRESULT (WINAPI *GetPrivateData)(IDirect3DVertexBuffer9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); + HRESULT (WINAPI *FreePrivateData)(IDirect3DVertexBuffer9 *This, REFGUID refguid); + DWORD (WINAPI *SetPriority)(IDirect3DVertexBuffer9 *This, DWORD PriorityNew); + DWORD (WINAPI *GetPriority)(IDirect3DVertexBuffer9 *This); + void (WINAPI *PreLoad)(IDirect3DVertexBuffer9 *This); + D3DRESOURCETYPE (WINAPI *GetType)(IDirect3DVertexBuffer9 *This); + /* IDirect3DVertexBuffer9 */ + HRESULT (WINAPI *Lock)(IDirect3DVertexBuffer9 *This, UINT OffsetToLock, UINT SizeToLock, void **ppbData, DWORD Flags); + HRESULT (WINAPI *Unlock)(IDirect3DVertexBuffer9 *This); + HRESULT (WINAPI *GetDesc)(IDirect3DVertexBuffer9 *This, D3DVERTEXBUFFER_DESC *pDesc); +} IDirect3DVertexBuffer9Vtbl; +struct IDirect3DVertexBuffer9 +{ + IDirect3DVertexBuffer9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DVertexBuffer9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DVertexBuffer9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DVertexBuffer9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DResource9 macros */ +#define IDirect3DVertexBuffer9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) +#define IDirect3DVertexBuffer9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) +#define IDirect3DVertexBuffer9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) +#define IDirect3DVertexBuffer9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) +#define IDirect3DVertexBuffer9_SetPriority(p,a) (p)->lpVtbl->SetPriority(p,a) +#define IDirect3DVertexBuffer9_GetPriority(p) (p)->lpVtbl->GetPriority(p) +#define IDirect3DVertexBuffer9_PreLoad(p) (p)->lpVtbl->PreLoad(p) +#define IDirect3DVertexBuffer9_GetType(p) (p)->lpVtbl->GetType(p) +/* IDirect3DVertexBuffer9 macros */ +#define IDirect3DVertexBuffer9_Lock(p,a,b,c,d) (p)->lpVtbl->Lock(p,a,b,c,d) +#define IDirect3DVertexBuffer9_Unlock(p) (p)->lpVtbl->Unlock(p) +#define IDirect3DVertexBuffer9_GetDesc(p,a) (p)->lpVtbl->GetDesc(p,a) + +typedef struct IDirect3DVertexDeclaration9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DVertexDeclaration9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DVertexDeclaration9 *This); + ULONG (WINAPI *Release)(IDirect3DVertexDeclaration9 *This); + /* IDirect3DVertexDeclaration9 */ + HRESULT (WINAPI *GetDevice)(IDirect3DVertexDeclaration9 *This, IDirect3DDevice9 **ppDevice); + HRESULT (WINAPI *GetDeclaration)(IDirect3DVertexDeclaration9 *This, D3DVERTEXELEMENT9 *pElement, UINT *pNumElements); +} IDirect3DVertexDeclaration9Vtbl; +struct IDirect3DVertexDeclaration9 +{ + IDirect3DVertexDeclaration9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DVertexDeclaration9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DVertexDeclaration9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DVertexDeclaration9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DVertexDeclaration9 macros */ +#define IDirect3DVertexDeclaration9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) +#define IDirect3DVertexDeclaration9_GetDeclaration(p,a,b) (p)->lpVtbl->GetDeclaration(p,a,b) + +typedef struct IDirect3DVertexShader9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DVertexShader9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DVertexShader9 *This); + ULONG (WINAPI *Release)(IDirect3DVertexShader9 *This); + /* IDirect3DVertexShader9 */ + HRESULT (WINAPI *GetDevice)(IDirect3DVertexShader9 *This, IDirect3DDevice9 **ppDevice); + HRESULT (WINAPI *GetFunction)(IDirect3DVertexShader9 *This, void *pData, UINT *pSizeOfData); +} IDirect3DVertexShader9Vtbl; +struct IDirect3DVertexShader9 +{ + IDirect3DVertexShader9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DVertexShader9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DVertexShader9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DVertexShader9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DVertexShader9 macros */ +#define IDirect3DVertexShader9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) +#define IDirect3DVertexShader9_GetFunction(p,a,b) (p)->lpVtbl->GetFunction(p,a,b) + +typedef struct IDirect3DVolume9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DVolume9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DVolume9 *This); + ULONG (WINAPI *Release)(IDirect3DVolume9 *This); + /* IDirect3DVolume9 */ + HRESULT (WINAPI *GetDevice)(IDirect3DVolume9 *This, IDirect3DDevice9 **ppDevice); + HRESULT (WINAPI *SetPrivateData)(IDirect3DVolume9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); + HRESULT (WINAPI *GetPrivateData)(IDirect3DVolume9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); + HRESULT (WINAPI *FreePrivateData)(IDirect3DVolume9 *This, REFGUID refguid); + HRESULT (WINAPI *GetContainer)(IDirect3DVolume9 *This, REFIID riid, void **ppContainer); + HRESULT (WINAPI *GetDesc)(IDirect3DVolume9 *This, D3DVOLUME_DESC *pDesc); + HRESULT (WINAPI *LockBox)(IDirect3DVolume9 *This, D3DLOCKED_BOX *pLockedVolume, const D3DBOX *pBox, DWORD Flags); + HRESULT (WINAPI *UnlockBox)(IDirect3DVolume9 *This); +} IDirect3DVolume9Vtbl; +struct IDirect3DVolume9 +{ + IDirect3DVolume9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DVolume9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DVolume9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DVolume9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DVolume9 macros */ +#define IDirect3DVolume9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) +#define IDirect3DVolume9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) +#define IDirect3DVolume9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) +#define IDirect3DVolume9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) +#define IDirect3DVolume9_GetContainer(p,a,b) (p)->lpVtbl->GetContainer(p,a,b) +#define IDirect3DVolume9_GetDesc(p,a) (p)->lpVtbl->GetDesc(p,a) +#define IDirect3DVolume9_LockBox(p,a,b,c) (p)->lpVtbl->LockBox(p,a,b,c) +#define IDirect3DVolume9_UnlockBox(p) (p)->lpVtbl->UnlockBox(p) + +typedef struct IDirect3DVolumeTexture9Vtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IDirect3DVolumeTexture9 *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IDirect3DVolumeTexture9 *This); + ULONG (WINAPI *Release)(IDirect3DVolumeTexture9 *This); + /* IDirect3DResource9 */ + HRESULT (WINAPI *GetDevice)(IDirect3DVolumeTexture9 *This, IDirect3DDevice9 **ppDevice); + HRESULT (WINAPI *SetPrivateData)(IDirect3DVolumeTexture9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); + HRESULT (WINAPI *GetPrivateData)(IDirect3DVolumeTexture9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); + HRESULT (WINAPI *FreePrivateData)(IDirect3DVolumeTexture9 *This, REFGUID refguid); + DWORD (WINAPI *SetPriority)(IDirect3DVolumeTexture9 *This, DWORD PriorityNew); + DWORD (WINAPI *GetPriority)(IDirect3DVolumeTexture9 *This); + void (WINAPI *PreLoad)(IDirect3DVolumeTexture9 *This); + D3DRESOURCETYPE (WINAPI *GetType)(IDirect3DVolumeTexture9 *This); + /* IDirect3DBaseTexture9 */ + DWORD (WINAPI *SetLOD)(IDirect3DVolumeTexture9 *This, DWORD LODNew); + DWORD (WINAPI *GetLOD)(IDirect3DVolumeTexture9 *This); + DWORD (WINAPI *GetLevelCount)(IDirect3DVolumeTexture9 *This); + HRESULT (WINAPI *SetAutoGenFilterType)(IDirect3DVolumeTexture9 *This, D3DTEXTUREFILTERTYPE FilterType); + D3DTEXTUREFILTERTYPE (WINAPI *GetAutoGenFilterType)(IDirect3DVolumeTexture9 *This); + void (WINAPI *GenerateMipSubLevels)(IDirect3DVolumeTexture9 *This); + /* IDirect3DVolumeTexture9 */ + HRESULT (WINAPI *GetLevelDesc)(IDirect3DVolumeTexture9 *This, UINT Level, D3DVOLUME_DESC *pDesc); + HRESULT (WINAPI *GetVolumeLevel)(IDirect3DVolumeTexture9 *This, UINT Level, IDirect3DVolume9 **ppVolumeLevel); + HRESULT (WINAPI *LockBox)(IDirect3DVolumeTexture9 *This, UINT Level, D3DLOCKED_BOX *pLockedVolume, const D3DBOX *pBox, DWORD Flags); + HRESULT (WINAPI *UnlockBox)(IDirect3DVolumeTexture9 *This, UINT Level); + HRESULT (WINAPI *AddDirtyBox)(IDirect3DVolumeTexture9 *This, const D3DBOX *pDirtyBox); +} IDirect3DVolumeTexture9Vtbl; +struct IDirect3DVolumeTexture9 +{ + IDirect3DVolumeTexture9Vtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IDirect3DVolumeTexture9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IDirect3DVolumeTexture9_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IDirect3DVolumeTexture9_Release(p) (p)->lpVtbl->Release(p) +/* IDirect3DResource9 macros */ +#define IDirect3DVolumeTexture9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) +#define IDirect3DVolumeTexture9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) +#define IDirect3DVolumeTexture9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) +#define IDirect3DVolumeTexture9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) +#define IDirect3DVolumeTexture9_SetPriority(p,a) (p)->lpVtbl->SetPriority(p,a) +#define IDirect3DVolumeTexture9_GetPriority(p) (p)->lpVtbl->GetPriority(p) +#define IDirect3DVolumeTexture9_PreLoad(p) (p)->lpVtbl->PreLoad(p) +#define IDirect3DVolumeTexture9_GetType(p) (p)->lpVtbl->GetType(p) +/* IDirect3DBaseTexture9 macros */ +#define IDirect3DVolumeTexture9_SetLOD(p,a) (p)->lpVtbl->SetLOD(p,a) +#define IDirect3DVolumeTexture9_GetLOD(p) (p)->lpVtbl->GetLOD(p) +#define IDirect3DVolumeTexture9_GetLevelCount(p) (p)->lpVtbl->GetLevelCount(p) +#define IDirect3DVolumeTexture9_SetAutoGenFilterType(p,a) (p)->lpVtbl->SetAutoGenFilterType(p,a) +#define IDirect3DVolumeTexture9_GetAutoGenFilterType(p) (p)->lpVtbl->GetAutoGenFilterType(p) +#define IDirect3DVolumeTexture9_GenerateMipSubLevels(p) (p)->lpVtbl->GenerateMipSubLevels(p) +/* IDirect3DVolumeTexture9 macros */ +#define IDirect3DVolumeTexture9_GetLevelDesc(p,a,b) (p)->lpVtbl->GetLevelDesc(p,a,b) +#define IDirect3DVolumeTexture9_GetVolumeLevel(p,a,b) (p)->lpVtbl->GetVolumeLevel(p,a,b) +#define IDirect3DVolumeTexture9_LockBox(p,a,b,c,d) (p)->lpVtbl->LockBox(p,a,b,c,d) +#define IDirect3DVolumeTexture9_UnlockBox(p,a) (p)->lpVtbl->UnlockBox(p,a) +#define IDirect3DVolumeTexture9_AddDirtyBox(p,a) (p)->lpVtbl->AddDirtyBox(p,a) + +#endif /* __cplusplus */ + +#ifdef _WIN32 + +IDirect3D9 *WINAPI +Direct3DCreate9( UINT SDKVersion ); + +HRESULT WINAPI +Direct3DCreate9Ex( UINT SDKVersion, + IDirect3D9Ex **ppD3D9 ); + +void *WINAPI +Direct3DShaderValidatorCreate9( void ); + +int WINAPI +D3DPERF_BeginEvent( D3DCOLOR color, + LPCWSTR name ); + +int WINAPI +D3DPERF_EndEvent( void ); + +DWORD WINAPI +D3DPERF_GetStatus( void ); + +void WINAPI +D3DPERF_SetOptions( DWORD options ); + +BOOL WINAPI +D3DPERF_QueryRepeatFrame( void ); + +void WINAPI +D3DPERF_SetMarker( D3DCOLOR color, + LPCWSTR name ); + +void WINAPI +D3DPERF_SetRegion( D3DCOLOR color, + LPCWSTR name ); + +void WINAPI +DebugSetMute( void ); + +#endif + +#endif /* _D3D9_H_ */ diff --git a/nine-native/include/D3D9/d3d9caps.h b/nine-native/include/D3D9/d3d9caps.h new file mode 100644 index 000000000..70f9919c5 --- /dev/null +++ b/nine-native/include/D3D9/d3d9caps.h @@ -0,0 +1,390 @@ +/* + * Copyright 2011 Joakim Sindholt + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef _D3D9CAPS_H_ +#define _D3D9CAPS_H_ + +#include "d3d9types.h" + +/* Caps flags */ +#define D3DCAPS_OVERLAY 0x00000800 +#define D3DCAPS_READ_SCANLINE 0x00020000 + +#define D3DCAPS2_FULLSCREENGAMMA 0x00020000 +#define D3DCAPS2_CANCALIBRATEGAMMA 0x00100000 +#define D3DCAPS2_RESERVED 0x02000000 +#define D3DCAPS2_CANMANAGERESOURCE 0x10000000 +#define D3DCAPS2_DYNAMICTEXTURES 0x20000000 +#define D3DCAPS2_CANAUTOGENMIPMAP 0x40000000 +#define D3DCAPS2_CANSHARERESOURCE 0x80000000 + +#define D3DCAPS3_ALPHA_FULLSCREEN_FLIP_OR_DISCARD 0x00000020 +#define D3DCAPS3_LINEAR_TO_SRGB_PRESENTATION 0x00000080 +#define D3DCAPS3_COPY_TO_VIDMEM 0x00000100 +#define D3DCAPS3_COPY_TO_SYSTEMMEM 0x00000200 +#define D3DCAPS3_DXVAHD 0x00000400 +#define D3DCAPS3_RESERVED 0x8000001F + +#define D3DPRESENT_INTERVAL_DEFAULT 0x00000000 +#define D3DPRESENT_INTERVAL_ONE 0x00000001 +#define D3DPRESENT_INTERVAL_TWO 0x00000002 +#define D3DPRESENT_INTERVAL_THREE 0x00000004 +#define D3DPRESENT_INTERVAL_FOUR 0x00000008 +#define D3DPRESENT_INTERVAL_IMMEDIATE 0x80000000 + +#define D3DCURSORCAPS_COLOR 0x00000001 +#define D3DCURSORCAPS_LOWRES 0x00000002 + +#define D3DDEVCAPS_EXECUTESYSTEMMEMORY 0x00000010 +#define D3DDEVCAPS_EXECUTEVIDEOMEMORY 0x00000020 +#define D3DDEVCAPS_TLVERTEXSYSTEMMEMORY 0x00000040 +#define D3DDEVCAPS_TLVERTEXVIDEOMEMORY 0x00000080 +#define D3DDEVCAPS_TEXTURESYSTEMMEMORY 0x00000100 +#define D3DDEVCAPS_TEXTUREVIDEOMEMORY 0x00000200 +#define D3DDEVCAPS_DRAWPRIMTLVERTEX 0x00000400 +#define D3DDEVCAPS_CANRENDERAFTERFLIP 0x00000800 +#define D3DDEVCAPS_TEXTURENONLOCALVIDMEM 0x00001000 +#define D3DDEVCAPS_DRAWPRIMITIVES2 0x00002000 +#define D3DDEVCAPS_SEPARATETEXTUREMEMORIES 0x00004000 +#define D3DDEVCAPS_DRAWPRIMITIVES2EX 0x00008000 +#define D3DDEVCAPS_HWTRANSFORMANDLIGHT 0x00010000 +#define D3DDEVCAPS_CANBLTSYSTONONLOCAL 0x00020000 +#define D3DDEVCAPS_HWRASTERIZATION 0x00080000 +#define D3DDEVCAPS_PUREDEVICE 0x00100000 +#define D3DDEVCAPS_QUINTICRTPATCHES 0x00200000 +#define D3DDEVCAPS_RTPATCHES 0x00400000 +#define D3DDEVCAPS_RTPATCHHANDLEZERO 0x00800000 +#define D3DDEVCAPS_NPATCHES 0x01000000 + +#define D3DPMISCCAPS_MASKZ 0x00000002 +#define D3DPMISCCAPS_CULLNONE 0x00000010 +#define D3DPMISCCAPS_CULLCW 0x00000020 +#define D3DPMISCCAPS_CULLCCW 0x00000040 +#define D3DPMISCCAPS_COLORWRITEENABLE 0x00000080 +#define D3DPMISCCAPS_CLIPPLANESCALEDPOINTS 0x00000100 +#define D3DPMISCCAPS_CLIPTLVERTS 0x00000200 +#define D3DPMISCCAPS_TSSARGTEMP 0x00000400 +#define D3DPMISCCAPS_BLENDOP 0x00000800 +#define D3DPMISCCAPS_NULLREFERENCE 0x00001000 +#define D3DPMISCCAPS_INDEPENDENTWRITEMASKS 0x00004000 +#define D3DPMISCCAPS_PERSTAGECONSTANT 0x00008000 +#define D3DPMISCCAPS_FOGANDSPECULARALPHA 0x00010000 +#define D3DPMISCCAPS_SEPARATEALPHABLEND 0x00020000 +#define D3DPMISCCAPS_MRTINDEPENDENTBITDEPTHS 0x00040000 +#define D3DPMISCCAPS_MRTPOSTPIXELSHADERBLENDING 0x00080000 +#define D3DPMISCCAPS_FOGVERTEXCLAMPED 0x00100000 +#define D3DPMISCCAPS_POSTBLENDSRGBCONVERT 0x00200000 + +#define D3DPRASTERCAPS_DITHER 0x00000001 +#define D3DPRASTERCAPS_ZTEST 0x00000010 +#define D3DPRASTERCAPS_FOGVERTEX 0x00000080 +#define D3DPRASTERCAPS_FOGTABLE 0x00000100 +#define D3DPRASTERCAPS_MIPMAPLODBIAS 0x00002000 +#define D3DPRASTERCAPS_ZBUFFERLESSHSR 0x00008000 +#define D3DPRASTERCAPS_FOGRANGE 0x00010000 +#define D3DPRASTERCAPS_ANISOTROPY 0x00020000 +#define D3DPRASTERCAPS_WBUFFER 0x00040000 +#define D3DPRASTERCAPS_WFOG 0x00100000 +#define D3DPRASTERCAPS_ZFOG 0x00200000 +#define D3DPRASTERCAPS_COLORPERSPECTIVE 0x00400000 +#define D3DPRASTERCAPS_SCISSORTEST 0x01000000 +#define D3DPRASTERCAPS_SLOPESCALEDEPTHBIAS 0x02000000 +#define D3DPRASTERCAPS_DEPTHBIAS 0x04000000 +#define D3DPRASTERCAPS_MULTISAMPLE_TOGGLE 0x08000000 + +#define D3DPCMPCAPS_NEVER 0x00000001 +#define D3DPCMPCAPS_LESS 0x00000002 +#define D3DPCMPCAPS_EQUAL 0x00000004 +#define D3DPCMPCAPS_LESSEQUAL 0x00000008 +#define D3DPCMPCAPS_GREATER 0x00000010 +#define D3DPCMPCAPS_NOTEQUAL 0x00000020 +#define D3DPCMPCAPS_GREATEREQUAL 0x00000040 +#define D3DPCMPCAPS_ALWAYS 0x00000080 + +#define D3DPBLENDCAPS_ZERO 0x00000001 +#define D3DPBLENDCAPS_ONE 0x00000002 +#define D3DPBLENDCAPS_SRCCOLOR 0x00000004 +#define D3DPBLENDCAPS_INVSRCCOLOR 0x00000008 +#define D3DPBLENDCAPS_SRCALPHA 0x00000010 +#define D3DPBLENDCAPS_INVSRCALPHA 0x00000020 +#define D3DPBLENDCAPS_DESTALPHA 0x00000040 +#define D3DPBLENDCAPS_INVDESTALPHA 0x00000080 +#define D3DPBLENDCAPS_DESTCOLOR 0x00000100 +#define D3DPBLENDCAPS_INVDESTCOLOR 0x00000200 +#define D3DPBLENDCAPS_SRCALPHASAT 0x00000400 +#define D3DPBLENDCAPS_BOTHSRCALPHA 0x00000800 +#define D3DPBLENDCAPS_BOTHINVSRCALPHA 0x00001000 +#define D3DPBLENDCAPS_BLENDFACTOR 0x00002000 +#ifndef D3D_DISABLE_9EX +# define D3DPBLENDCAPS_SRCCOLOR2 0x00004000 +# define D3DPBLENDCAPS_INVSRCCOLOR2 0x00008000 +#endif + +#define D3DPSHADECAPS_COLORGOURAUDRGB 0x00000008 +#define D3DPSHADECAPS_SPECULARGOURAUDRGB 0x00000200 +#define D3DPSHADECAPS_ALPHAGOURAUDBLEND 0x00004000 +#define D3DPSHADECAPS_FOGGOURAUD 0x00080000 + +#define D3DPTEXTURECAPS_PERSPECTIVE 0x00000001 +#define D3DPTEXTURECAPS_POW2 0x00000002 +#define D3DPTEXTURECAPS_ALPHA 0x00000004 +#define D3DPTEXTURECAPS_SQUAREONLY 0x00000020 +#define D3DPTEXTURECAPS_TEXREPEATNOTSCALEDBYSIZE 0x00000040 +#define D3DPTEXTURECAPS_ALPHAPALETTE 0x00000080 +#define D3DPTEXTURECAPS_NONPOW2CONDITIONAL 0x00000100 +#define D3DPTEXTURECAPS_PROJECTED 0x00000400 +#define D3DPTEXTURECAPS_CUBEMAP 0x00000800 +#define D3DPTEXTURECAPS_VOLUMEMAP 0x00002000 +#define D3DPTEXTURECAPS_MIPMAP 0x00004000 +#define D3DPTEXTURECAPS_MIPVOLUMEMAP 0x00008000 +#define D3DPTEXTURECAPS_MIPCUBEMAP 0x00010000 +#define D3DPTEXTURECAPS_CUBEMAP_POW2 0x00020000 +#define D3DPTEXTURECAPS_VOLUMEMAP_POW2 0x00040000 +#define D3DPTEXTURECAPS_NOPROJECTEDBUMPENV 0x00200000 + +#define D3DPTFILTERCAPS_MINFPOINT 0x00000100 +#define D3DPTFILTERCAPS_MINFLINEAR 0x00000200 +#define D3DPTFILTERCAPS_MINFANISOTROPIC 0x00000400 +#define D3DPTFILTERCAPS_MINFPYRAMIDALQUAD 0x00000800 +#define D3DPTFILTERCAPS_MINFGAUSSIANQUAD 0x00001000 +#define D3DPTFILTERCAPS_MIPFPOINT 0x00010000 +#define D3DPTFILTERCAPS_MIPFLINEAR 0x00020000 +#define D3DPTFILTERCAPS_MAGFPOINT 0x01000000 +#define D3DPTFILTERCAPS_MAGFLINEAR 0x02000000 +#define D3DPTFILTERCAPS_MAGFANISOTROPIC 0x04000000 +#define D3DPTFILTERCAPS_MAGFPYRAMIDALQUAD 0x08000000 +#define D3DPTFILTERCAPS_MAGFGAUSSIANQUAD 0x10000000 + +#define D3DPTADDRESSCAPS_WRAP 0x00000001 +#define D3DPTADDRESSCAPS_MIRROR 0x00000002 +#define D3DPTADDRESSCAPS_CLAMP 0x00000004 +#define D3DPTADDRESSCAPS_BORDER 0x00000008 +#define D3DPTADDRESSCAPS_INDEPENDENTUV 0x00000010 +#define D3DPTADDRESSCAPS_MIRRORONCE 0x00000020 + +#define D3DLINECAPS_TEXTURE 0x00000001 +#define D3DLINECAPS_ZTEST 0x00000002 +#define D3DLINECAPS_BLEND 0x00000004 +#define D3DLINECAPS_ALPHACMP 0x00000008 +#define D3DLINECAPS_FOG 0x00000010 +#define D3DLINECAPS_ANTIALIAS 0x00000020 + +#define D3DSTENCILCAPS_KEEP 0x00000001 +#define D3DSTENCILCAPS_ZERO 0x00000002 +#define D3DSTENCILCAPS_REPLACE 0x00000004 +#define D3DSTENCILCAPS_INCRSAT 0x00000008 +#define D3DSTENCILCAPS_DECRSAT 0x00000010 +#define D3DSTENCILCAPS_INVERT 0x00000020 +#define D3DSTENCILCAPS_INCR 0x00000040 +#define D3DSTENCILCAPS_DECR 0x00000080 +#define D3DSTENCILCAPS_TWOSIDED 0x00000100 + +#define D3DFVFCAPS_TEXCOORDCOUNTMASK 0x0000FFFF +#define D3DFVFCAPS_DONOTSTRIPELEMENTS 0x00080000 +#define D3DFVFCAPS_PSIZE 0x00100000 + +#define D3DTEXOPCAPS_DISABLE 0x00000001 +#define D3DTEXOPCAPS_SELECTARG1 0x00000002 +#define D3DTEXOPCAPS_SELECTARG2 0x00000004 +#define D3DTEXOPCAPS_MODULATE 0x00000008 +#define D3DTEXOPCAPS_MODULATE2X 0x00000010 +#define D3DTEXOPCAPS_MODULATE4X 0x00000020 +#define D3DTEXOPCAPS_ADD 0x00000040 +#define D3DTEXOPCAPS_ADDSIGNED 0x00000080 +#define D3DTEXOPCAPS_ADDSIGNED2X 0x00000100 +#define D3DTEXOPCAPS_SUBTRACT 0x00000200 +#define D3DTEXOPCAPS_ADDSMOOTH 0x00000400 +#define D3DTEXOPCAPS_BLENDDIFFUSEALPHA 0x00000800 +#define D3DTEXOPCAPS_BLENDTEXTUREALPHA 0x00001000 +#define D3DTEXOPCAPS_BLENDFACTORALPHA 0x00002000 +#define D3DTEXOPCAPS_BLENDTEXTUREALPHAPM 0x00004000 +#define D3DTEXOPCAPS_BLENDCURRENTALPHA 0x00008000 +#define D3DTEXOPCAPS_PREMODULATE 0x00010000 +#define D3DTEXOPCAPS_MODULATEALPHA_ADDCOLOR 0x00020000 +#define D3DTEXOPCAPS_MODULATECOLOR_ADDALPHA 0x00040000 +#define D3DTEXOPCAPS_MODULATEINVALPHA_ADDCOLOR 0x00080000 +#define D3DTEXOPCAPS_MODULATEINVCOLOR_ADDALPHA 0x00100000 +#define D3DTEXOPCAPS_BUMPENVMAP 0x00200000 +#define D3DTEXOPCAPS_BUMPENVMAPLUMINANCE 0x00400000 +#define D3DTEXOPCAPS_DOTPRODUCT3 0x00800000 +#define D3DTEXOPCAPS_MULTIPLYADD 0x01000000 +#define D3DTEXOPCAPS_LERP 0x02000000 + +#define D3DVTXPCAPS_TEXGEN 0x00000001 +#define D3DVTXPCAPS_MATERIALSOURCE7 0x00000002 +#define D3DVTXPCAPS_DIRECTIONALLIGHTS 0x00000008 +#define D3DVTXPCAPS_POSITIONALLIGHTS 0x00000010 +#define D3DVTXPCAPS_LOCALVIEWER 0x00000020 +#define D3DVTXPCAPS_TWEENING 0x00000040 +#define D3DVTXPCAPS_TEXGEN_SPHEREMAP 0x00000100 +#define D3DVTXPCAPS_NO_TEXGEN_NONLOCALVIEWER 0x00000200 + +#define D3DDEVCAPS2_STREAMOFFSET 0x00000001 +#define D3DDEVCAPS2_DMAPNPATCH 0x00000002 +#define D3DDEVCAPS2_ADAPTIVETESSRTPATCH 0x00000004 +#define D3DDEVCAPS2_ADAPTIVETESSNPATCH 0x00000008 +#define D3DDEVCAPS2_CAN_STRETCHRECT_FROM_TEXTURES 0x00000010 +#define D3DDEVCAPS2_PRESAMPLEDDMAPNPATCH 0x00000020 +#define D3DDEVCAPS2_VERTEXELEMENTSCANSHARESTREAMOFFSET 0x00000040 + +#define D3DDTCAPS_UBYTE4 0x00000001 +#define D3DDTCAPS_UBYTE4N 0x00000002 +#define D3DDTCAPS_SHORT2N 0x00000004 +#define D3DDTCAPS_SHORT4N 0x00000008 +#define D3DDTCAPS_USHORT2N 0x00000010 +#define D3DDTCAPS_USHORT4N 0x00000020 +#define D3DDTCAPS_UDEC3 0x00000040 +#define D3DDTCAPS_DEC3N 0x00000080 +#define D3DDTCAPS_FLOAT16_2 0x00000100 +#define D3DDTCAPS_FLOAT16_4 0x00000200 + + +#define D3DVS20_MAX_DYNAMICFLOWCONTROLDEPTH 24 +#define D3DVS20_MIN_DYNAMICFLOWCONTROLDEPTH 0 +#define D3DVS20_MAX_NUMTEMPS 32 +#define D3DVS20_MIN_NUMTEMPS 12 +#define D3DVS20_MAX_STATICFLOWCONTROLDEPTH 4 +#define D3DVS20_MIN_STATICFLOWCONTROLDEPTH 1 + +#define D3DVS20CAPS_PREDICATION (1 << 0) + +#define D3DPS20CAPS_ARBITRARYSWIZZLE (1 << 0) +#define D3DPS20CAPS_GRADIENTINSTRUCTIONS (1 << 1) +#define D3DPS20CAPS_PREDICATION (1 << 2) +#define D3DPS20CAPS_NODEPENDENTREADLIMIT (1 << 3) +#define D3DPS20CAPS_NOTEXINSTRUCTIONLIMIT (1 << 4) + +#define D3DPS20_MAX_DYNAMICFLOWCONTROLDEPTH 24 +#define D3DPS20_MIN_DYNAMICFLOWCONTROLDEPTH 0 +#define D3DPS20_MAX_NUMTEMPS 32 +#define D3DPS20_MIN_NUMTEMPS 12 +#define D3DPS20_MAX_STATICFLOWCONTROLDEPTH 4 +#define D3DPS20_MIN_STATICFLOWCONTROLDEPTH 0 +#define D3DPS20_MAX_NUMINSTRUCTIONSLOTS 512 +#define D3DPS20_MIN_NUMINSTRUCTIONSLOTS 96 + +#define D3DMIN30SHADERINSTRUCTIONS 512 +#define D3DMAX30SHADERINSTRUCTIONS 32768 + +/* Structs */ +typedef struct _D3DVSHADERCAPS2_0 { + DWORD Caps; + INT DynamicFlowControlDepth; + INT NumTemps; + INT StaticFlowControlDepth; +} D3DVSHADERCAPS2_0, *PD3DVSHADERCAPS2_0, *LPD3DVSHADERCAPS2_0; + +typedef struct _D3DPSHADERCAPS2_0 { + DWORD Caps; + INT DynamicFlowControlDepth; + INT NumTemps; + INT StaticFlowControlDepth; + INT NumInstructionSlots; +} D3DPSHADERCAPS2_0, *PD3DPSHADERCAPS2_0, *LPD3DPSHADERCAPS2_0; + +typedef struct _D3DCAPS9 { + D3DDEVTYPE DeviceType; + UINT AdapterOrdinal; + DWORD Caps; + DWORD Caps2; + DWORD Caps3; + DWORD PresentationIntervals; + DWORD CursorCaps; + DWORD DevCaps; + DWORD PrimitiveMiscCaps; + DWORD RasterCaps; + DWORD ZCmpCaps; + DWORD SrcBlendCaps; + DWORD DestBlendCaps; + DWORD AlphaCmpCaps; + DWORD ShadeCaps; + DWORD TextureCaps; + DWORD TextureFilterCaps; + DWORD CubeTextureFilterCaps; + DWORD VolumeTextureFilterCaps; + DWORD TextureAddressCaps; + DWORD VolumeTextureAddressCaps; + DWORD LineCaps; + DWORD MaxTextureWidth; + DWORD MaxTextureHeight; + DWORD MaxVolumeExtent; + DWORD MaxTextureRepeat; + DWORD MaxTextureAspectRatio; + DWORD MaxAnisotropy; + float MaxVertexW; + float GuardBandLeft; + float GuardBandTop; + float GuardBandRight; + float GuardBandBottom; + float ExtentsAdjust; + DWORD StencilCaps; + DWORD FVFCaps; + DWORD TextureOpCaps; + DWORD MaxTextureBlendStages; + DWORD MaxSimultaneousTextures; + DWORD VertexProcessingCaps; + DWORD MaxActiveLights; + DWORD MaxUserClipPlanes; + DWORD MaxVertexBlendMatrices; + DWORD MaxVertexBlendMatrixIndex; + float MaxPointSize; + DWORD MaxPrimitiveCount; + DWORD MaxVertexIndex; + DWORD MaxStreams; + DWORD MaxStreamStride; + DWORD VertexShaderVersion; + DWORD MaxVertexShaderConst; + DWORD PixelShaderVersion; + float PixelShader1xMaxValue; + DWORD DevCaps2; + float MaxNpatchTessellationLevel; + DWORD Reserved5; + UINT MasterAdapterOrdinal; + UINT AdapterOrdinalInGroup; + UINT NumberOfAdaptersInGroup; + DWORD DeclTypes; + DWORD NumSimultaneousRTs; + DWORD StretchRectFilterCaps; + D3DVSHADERCAPS2_0 VS20Caps; + D3DPSHADERCAPS2_0 PS20Caps; + DWORD VertexTextureFilterCaps; + DWORD MaxVShaderInstructionsExecuted; + DWORD MaxPShaderInstructionsExecuted; + DWORD MaxVertexShader30InstructionSlots; + DWORD MaxPixelShader30InstructionSlots; +} D3DCAPS9, *PD3DCAPS9, *LPD3DCAPS9; + +typedef struct _D3DCONTENTPROTECTIONCAPS { + DWORD Caps; + GUID KeyExchangeType; + UINT BufferAlignmentStart; + UINT BlockAlignmentSize; + ULONGLONG ProtectedMemorySize; +} D3DCONTENTPROTECTIONCAPS, *PD3DCONTENTPROTECTIONCAPS, *LPD3DCONTENTPROTECTIONCAPS; + +typedef struct _D3DOVERLAYCAPS { + UINT Caps; + UINT MaxOverlayDisplayWidth; + UINT MaxOverlayDisplayHeight; +} D3DOVERLAYCAPS, *PD3DOVERLAYCAPS, *LPD3DOVERLAYCAPS; + +#endif /* _D3D9CAPS_H_ */ diff --git a/nine-native/include/D3D9/d3d9types.h b/nine-native/include/D3D9/d3d9types.h new file mode 100644 index 000000000..e18f29867 --- /dev/null +++ b/nine-native/include/D3D9/d3d9types.h @@ -0,0 +1,1817 @@ +/* + * Copyright 2011 Joakim Sindholt + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef _D3D9TYPES_H_ +#define _D3D9TYPES_H_ + +#ifdef _WIN32 +#include +#else /* _WIN32 */ +#include + +#ifndef NULL +#define NULL 0 +#endif + +/******************************************************** + * Windows types * + ********************************************************/ +/* Function macros */ +#define FAILED(x) ((HRESULT)(x) < 0) +#define SUCCEEDED(x) ((HRESULT)(x) >= 0) + +#define MAKE_HRESULT(sev,fac,code) \ + ( \ + ((HRESULT)(sev) << 31) | \ + ((HRESULT)(fac) << 16) | \ + (HRESULT)(code) \ + ) + +/* Windows errors */ +#define E_OUTOFMEMORY MAKE_HRESULT(1, 0x007, 14) +#define E_NOINTERFACE MAKE_HRESULT(1, 0x000, 0x4002) +#define E_POINTER MAKE_HRESULT(1, 0x000, 0x4003) +#define E_FAIL MAKE_HRESULT(1, 0x000, 0x4005) + +#define S_OK ((HRESULT)0) +#define S_FALSE ((HRESULT)1) + +/* WORD types */ +typedef uint8_t BYTE; +typedef uint16_t WORD; +typedef uint32_t DWORD; + +/* Renamed types */ +typedef int BOOL; +#ifndef FALSE +#define FALSE 0 +#define TRUE (!FALSE) +#endif + +typedef uint32_t UINT32; +typedef uint64_t UINT64; + +typedef unsigned short USHORT; +typedef unsigned int UINT; +typedef unsigned int ULONG; +typedef unsigned long long ULONGLONG; + +typedef short SHORT; +typedef int INT; +typedef int LONG; +typedef long long LONGLONG; +typedef float FLOAT; + +/* Windows types */ +typedef void *HANDLE; +typedef int32_t HRESULT; +typedef HANDLE HWND; +typedef HANDLE HMONITOR; +typedef HANDLE HDC; + +/* Unions */ +typedef union { + struct { + DWORD LowPart; + LONG HighPart; + }; + + struct { + DWORD LowPart; + LONG HighPart; + } u; + + LONGLONG QuadPart; +} LARGE_INTEGER, *LPLARGE_INTEGER; + +/* Structs */ + +typedef struct _GUID { + DWORD Data1; + WORD Data2; + WORD Data3; + BYTE Data4[8]; +} GUID, IID, *LPGUID, *REFGUID, *REFIID; + +typedef struct _LUID { + DWORD LowPart; + LONG HighPart; +} LUID, *LPLUID, *PLUID; + +typedef struct _PALETTEENTRY { + BYTE peRed; + BYTE peGreen; + BYTE peBlue; + BYTE peFlags; +} PALETTEENTRY, *LPPALETTEENTRY; + +typedef struct _POINT { + LONG x; + LONG y; +} POINT, *LPPOINT; + +typedef struct _RECT { + LONG left; + LONG top; + LONG right; + LONG bottom; +} RECT, *LPRECT; + +typedef struct _RGNDATAHEADER { + DWORD dwSize; + DWORD iType; + DWORD nCount; + DWORD nRgnSize; + RECT rcBound; +} RGNDATAHEADER, *LPRGNDATAHEADER; + +typedef struct _RGNDATA { + RGNDATAHEADER rdh; + char Buffer[1]; +} RGNDATA, *LPRGNDATA; +#endif /* _WIN32 */ + +#ifndef MAKEFOURCC +#define MAKEFOURCC(a, b, c, d) \ + ( \ + (DWORD)(BYTE)(a) | \ + ((DWORD)(BYTE)(b) << 8) | \ + ((DWORD)(BYTE)(c) << 16) | \ + ((DWORD)(BYTE)(d) << 24) \ + ) +#endif /* MAKEFOURCC */ + + +#define D3DPRESENTFLAG_LOCKABLE_BACKBUFFER 0x00000001 +#define D3DPRESENTFLAG_DISCARD_DEPTHSTENCIL 0x00000002 +#define D3DPRESENTFLAG_DEVICECLIP 0x00000004 +#define D3DPRESENTFLAG_VIDEO 0x00000010 +#define D3DPRESENTFLAG_NOAUTOROTATE 0x00000020 +#define D3DPRESENTFLAG_UNPRUNEDMODE 0x00000040 +#define D3DPRESENTFLAG_OVERLAY_LIMITEDRGB 0x00000080 +#define D3DPRESENTFLAG_OVERLAY_YCbCr_BT709 0x00000100 +#define D3DPRESENTFLAG_OVERLAY_YCbCr_xvYCC 0x00000200 +#define D3DPRESENTFLAG_RESTRICTED_CONTENT 0x00000400 +#define D3DPRESENTFLAG_RESTRICT_SHARED_RESOURCE_DRIVER 0x00000800 + +/* Windows calling convention */ +#ifndef WINAPI + #if defined(__x86_64__) && !defined(__ILP32__) + #define WINAPI __attribute__((ms_abi)) + #elif defined(__i386__) + #define WINAPI __attribute__((__stdcall__)) + #else /* neither amd64 nor i386 */ + #define WINAPI + #endif +#endif /* WINAPI */ + +/* Implementation caps */ +#define D3DPRESENT_BACK_BUFFERS_MAX 3 +#define D3DPRESENT_BACK_BUFFERS_MAX_EX 30 + +/* Functions */ +#define MAKE_D3DHRESULT(code) MAKE_HRESULT(1, 0x876, code) +#define MAKE_D3DSTATUS(code) MAKE_HRESULT(0, 0x876, code) + +/* SDK version */ +#define D3D_SDK_VERSION 32 + +/* Adapter */ +#define D3DADAPTER_DEFAULT 0 + +/******************************************************** + * Return codes * + ********************************************************/ +#define D3D_OK S_OK +#define D3DOK_NOAUTOGEN MAKE_D3DSTATUS(2159) +#define D3DERR_OUTOFVIDEOMEMORY MAKE_D3DHRESULT(380) +#define D3DERR_WASSTILLDRAWING MAKE_D3DHRESULT(540) +#define D3DERR_WRONGTEXTUREFORMAT MAKE_D3DHRESULT(2072) +#define D3DERR_UNSUPPORTEDCOLOROPERATION MAKE_D3DHRESULT(2073) +#define D3DERR_UNSUPPORTEDCOLORARG MAKE_D3DHRESULT(2074) +#define D3DERR_UNSUPPORTEDALPHAOPERATION MAKE_D3DHRESULT(2075) +#define D3DERR_UNSUPPORTEDALPHAARG MAKE_D3DHRESULT(2076) +#define D3DERR_TOOMANYOPERATIONS MAKE_D3DHRESULT(2077) +#define D3DERR_CONFLICTINGTEXTUREFILTER MAKE_D3DHRESULT(2078) +#define D3DERR_UNSUPPORTEDFACTORVALUE MAKE_D3DHRESULT(2079) +#define D3DERR_CONFLICTINGRENDERSTATE MAKE_D3DHRESULT(2081) +#define D3DERR_UNSUPPORTEDTEXTUREFILTER MAKE_D3DHRESULT(2082) +#define D3DERR_CONFLICTINGTEXTUREPALETTE MAKE_D3DHRESULT(2086) +#define D3DERR_DRIVERINTERNALERROR MAKE_D3DHRESULT(2087) +#define D3DERR_NOTFOUND MAKE_D3DHRESULT(2150) +#define D3DERR_MOREDATA MAKE_D3DHRESULT(2151) +#define D3DERR_DEVICELOST MAKE_D3DHRESULT(2152) +#define D3DERR_DEVICENOTRESET MAKE_D3DHRESULT(2153) +#define D3DERR_NOTAVAILABLE MAKE_D3DHRESULT(2154) +#define D3DERR_INVALIDDEVICE MAKE_D3DHRESULT(2155) +#define D3DERR_INVALIDCALL MAKE_D3DHRESULT(2156) +#define D3DERR_DRIVERINVALIDCALL MAKE_D3DHRESULT(2157) +#define D3DERR_DEVICEREMOVED MAKE_D3DHRESULT(2160) +#define D3DERR_DEVICEHUNG MAKE_D3DHRESULT(2164) +#define S_PRESENT_OCCLUDED MAKE_D3DSTATUS(2168) + +/******************************************************** + * Bitmasks * + *******************************************************/ +/* IDirect3DDevice9::Clear */ +#define D3DCLEAR_TARGET 0x00000001 +#define D3DCLEAR_ZBUFFER 0x00000002 +#define D3DCLEAR_STENCIL 0x00000004 + +/* Usage */ +/* http://msdn.microsoft.com/en-us/library/ee416659(VS.85).aspx */ +#define D3DUSAGE_RENDERTARGET 0x00000001 +#define D3DUSAGE_DEPTHSTENCIL 0x00000002 +#define D3DUSAGE_WRITEONLY 0x00000008 +#define D3DUSAGE_SOFTWAREPROCESSING 0x00000010 +#define D3DUSAGE_DONOTCLIP 0x00000020 +#define D3DUSAGE_POINTS 0x00000040 +#define D3DUSAGE_RTPATCHES 0x00000080 +#define D3DUSAGE_NPATCHES 0x00000100 +#define D3DUSAGE_DYNAMIC 0x00000200 +#define D3DUSAGE_AUTOGENMIPMAP 0x00000400 +#ifndef D3D_DISABLE_9EX +#define D3DUSAGE_RESTRICTED_CONTENT 0x00000800 +#define D3DUSAGE_RESTRICT_SHARED_RESOURCE_DRIVER 0x00001000 +#define D3DUSAGE_RESTRICT_SHARED_RESOURCE 0x00002000 +#endif +#define D3DUSAGE_DMAP 0x00004000 +#define D3DUSAGE_QUERY_LEGACYBUMPMAP 0x00008000 +#define D3DUSAGE_QUERY_SRGBREAD 0x00010000 +#define D3DUSAGE_QUERY_FILTER 0x00020000 +#define D3DUSAGE_QUERY_SRGBWRITE 0x00040000 +#define D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING 0x00080000 +#define D3DUSAGE_QUERY_VERTEXTEXTURE 0x00100000 +#define D3DUSAGE_QUERY_WRAPANDMIP 0x00200000 +#ifndef D3D_DISABLE_9EX +#define D3DUSAGE_NONSECURE 0x00800000 +#define D3DUSAGE_TEXTAPI 0x10000000 +#endif + +/* Buffer locking */ +/* http://msdn.microsoft.com/en-us/library/ee416503(VS.85).aspx */ +#define D3DLOCK_READONLY 0x00000010 +#define D3DLOCK_NOSYSLOCK 0x00000800 +#define D3DLOCK_NOOVERWRITE 0x00001000 +#define D3DLOCK_DISCARD 0x00002000 +#define D3DLOCK_DONOTWAIT 0x00004000 +#define D3DLOCK_NO_DIRTY_UPDATE 0x00008000 + +/* FVF */ +/* http://msdn.microsoft.com/en-us/library/ee416490(VS.85).aspx */ +/* http://msdn.microsoft.com/en-us/library/ms791638.aspx */ +#define D3DFVF_XYZ 0x00000002 +#define D3DFVF_XYZRHW 0x00000004 +#define D3DFVF_XYZB1 0x00000006 +#define D3DFVF_XYZB2 0x00000008 +#define D3DFVF_XYZB3 0x0000000A +#define D3DFVF_XYZB4 0x0000000C +#define D3DFVF_XYZB5 0x0000000E +#define D3DFVF_XYZW 0x00004002 +#define D3DFVF_POSITION_MASK 0x0000400E + +#define D3DFVF_NORMAL 0x00000010 +#define D3DFVF_PSIZE 0x00000020 +#define D3DFVF_DIFFUSE 0x00000040 +#define D3DFVF_SPECULAR 0x00000080 + +#define D3DFVF_TEX0 0x00000000 +#define D3DFVF_TEX1 0x00000100 +#define D3DFVF_TEX2 0x00000200 +#define D3DFVF_TEX3 0x00000300 +#define D3DFVF_TEX4 0x00000400 +#define D3DFVF_TEX5 0x00000500 +#define D3DFVF_TEX6 0x00000600 +#define D3DFVF_TEX7 0x00000700 +#define D3DFVF_TEX8 0x00000800 +#define D3DFVF_TEXCOUNT_MASK 0x00000F00 +#define D3DFVF_TEXCOUNT_SHIFT 8 +#define D3DFVF_TEXTUREFORMAT1 0x00000003 +#define D3DFVF_TEXTUREFORMAT2 0x00000000 +#define D3DFVF_TEXTUREFORMAT3 0x00000001 +#define D3DFVF_TEXTUREFORMAT4 0x00000002 + +#define D3DFVF_POSITION_MASK 0x0000400E +#define D3DFVF_TEXCOUNT_MASK 0x00000F00 +#define D3DFVF_TEXCOUNT_SHIFT 8 + +#define D3DFVF_LASTBETA_UBYTE4 0x00001000 +#define D3DFVF_LASTBETA_D3DCOLOR 0x00008000 + +#define D3DFVF_RESERVED0 0x00000001 +#define D3DFVF_RESERVED2 0x00006000 + +#define D3DTA_SELECTMASK 0x0000000f +#define D3DTA_DIFFUSE 0x00000000 +#define D3DTA_CURRENT 0x00000001 +#define D3DTA_TEXTURE 0x00000002 +#define D3DTA_TFACTOR 0x00000003 +#define D3DTA_SPECULAR 0x00000004 +#define D3DTA_TEMP 0x00000005 +#define D3DTA_CONSTANT 0x00000006 +#define D3DTA_COMPLEMENT 0x00000010 +#define D3DTA_ALPHAREPLICATE 0x00000020 + +#define D3DSPD_IUNKNOWN 0x00000001 + +#define D3DPRESENT_DONOTWAIT 0x00000001 +#define D3DPRESENT_LINEAR_CONTENT 0x00000002 +#define D3DPRESENT_RATE_DEFAULT 0 + +#define D3DCREATE_FPU_PRESERVE 0x00000002 +#define D3DCREATE_MULTITHREADED 0x00000004 +#define D3DCREATE_PUREDEVICE 0x00000010 +#define D3DCREATE_SOFTWARE_VERTEXPROCESSING 0x00000020 +#define D3DCREATE_HARDWARE_VERTEXPROCESSING 0x00000040 +#define D3DCREATE_MIXED_VERTEXPROCESSING 0x00000080 +#define D3DCREATE_DISABLE_DRIVER_MANAGEMENT 0x00000100 +#define D3DCREATE_ADAPTERGROUP_DEVICE 0x00000200 + +#define D3DSTREAMSOURCE_INDEXEDDATA (1 << 30) +#define D3DSTREAMSOURCE_INSTANCEDATA (2 << 30) + +/* D3DRS_COLORWRITEENABLE */ +#define D3DCOLORWRITEENABLE_RED (1L << 0) +#define D3DCOLORWRITEENABLE_GREEN (1L << 1) +#define D3DCOLORWRITEENABLE_BLUE (1L << 2) +#define D3DCOLORWRITEENABLE_ALPHA (1L << 3) + + +/******************************************************** + * Function macros * + *******************************************************/ + +/* Colors */ +#define D3DCOLOR_ARGB(a,r,g,b) \ + ((D3DCOLOR)( \ + (((a) & 0xFF) << 24) | \ + (((r) & 0xFF) << 16) | \ + (((g) & 0xFF) << 8) | \ + ((b) & 0xFF) \ + )) + +#define D3DCOLOR_RGBA(r,g,b,a) D3DCOLOR_ARGB(a,r,g,b) +#define D3DCOLOR_XRGB(r,g,b) D3DCOLOR_ARGB(0xFF,r,g,b) +#define D3DCOLOR_AYUV(a,y,u,v) D3DCOLOR_ARGB(a,y,u,v) +#define D3DCOLOR_XYUV(y,u,v) D3DCOLOR_ARGB(0xFF,y,u,v) + +#define D3DCOLOR_COLORVALUE(r,g,b,a) \ + D3DCOLOR_RGBA( \ + (DWORD)((r) * 255.0f), \ + (DWORD)((g) * 255.0f), \ + (DWORD)((b) * 255.0f), \ + (DWORD)((a) * 255.0f) \ + ) + +/* Shaders */ +#define D3DDECL_END() { 0xFF, 0, D3DDECLTYPE_UNUSED, 0, 0, 0 } + +/***************************************************************************** + * Typedefs * + *****************************************************************************/ +typedef DWORD D3DCOLOR; + +/***************************************************************************** + * Enums * + *****************************************************************************/ +typedef enum D3DDISPLAYROTATION { + D3DDISPLAYROTATION_IDENTITY = 1, + D3DDISPLAYROTATION_90 = 2, + D3DDISPLAYROTATION_180 = 3, + D3DDISPLAYROTATION_270 = 4 +} D3DDISPLAYROTATION; + +typedef enum D3DSCANLINEORDERING { + D3DSCANLINEORDERING_UNKNOWN = 0, + D3DSCANLINEORDERING_PROGRESSIVE = 1, + D3DSCANLINEORDERING_INTERLACED = 2 +} D3DSCANLINEORDERING; + +typedef enum _D3DAUTHENTICATEDCHANNELTYPE { + D3DAUTHENTICATEDCHANNEL_D3D9 = 1, + D3DAUTHENTICATEDCHANNEL_DRIVER_SOFTWARE = 2, + D3DAUTHENTICATEDCHANNEL_DRIVER_HARDWARE = 3 +} D3DAUTHENTICATEDCHANNELTYPE; + +typedef enum _D3DAUTHENTICATEDCHANNEL_PROCESSIDENTIFIERTYPE { + PROCESSIDTYPE_UNKNOWN = 0, + PROCESSIDTYPE_DWM = 1, + PROCESSIDTYPE_HANDLE = 2 +} D3DAUTHENTICATEDCHANNEL_PROCESSIDENTIFIERTYPE; + +typedef enum _D3DBACKBUFFER_TYPE { + D3DBACKBUFFER_TYPE_MONO = 0, + D3DBACKBUFFER_TYPE_LEFT = 1, + D3DBACKBUFFER_TYPE_RIGHT = 2 +} D3DBACKBUFFER_TYPE; + +typedef enum _D3DBASISTYPE { + D3DBASIS_BEZIER = 0, + D3DBASIS_BSPLINE = 1, + D3DBASIS_CATMULL_ROM = 2 +} D3DBASISTYPE; + +typedef enum _D3DBLEND { + D3DBLEND_ZERO = 1, + D3DBLEND_ONE = 2, + D3DBLEND_SRCCOLOR = 3, + D3DBLEND_INVSRCCOLOR = 4, + D3DBLEND_SRCALPHA = 5, + D3DBLEND_INVSRCALPHA = 6, + D3DBLEND_DESTALPHA = 7, + D3DBLEND_INVDESTALPHA = 8, + D3DBLEND_DESTCOLOR = 9, + D3DBLEND_INVDESTCOLOR = 10, + D3DBLEND_SRCALPHASAT = 11, + D3DBLEND_BOTHSRCALPHA = 12, + D3DBLEND_BOTHINVSRCALPHA = 13, + D3DBLEND_BLENDFACTOR = 14, + D3DBLEND_INVBLENDFACTOR = 15, + D3DBLEND_SRCCOLOR2 = 16, + D3DBLEND_INVSRCCOLOR2 = 17 +} D3DBLEND; + +typedef enum _D3DBLENDOP { + D3DBLENDOP_ADD = 1, + D3DBLENDOP_SUBTRACT = 2, + D3DBLENDOP_REVSUBTRACT = 3, + D3DBLENDOP_MIN = 4, + D3DBLENDOP_MAX = 5 +} D3DBLENDOP; + +typedef enum _D3DBUSTYPE { + D3DBUSTYPE_OTHER = 0x00000000, + D3DBUSTYPE_PCI = 0x00000001, + D3DBUSTYPE_PCIX = 0x00000002, + D3DBUSTYPE_PCIEXPRESS = 0x00000003, + D3DBUSTYPE_AGP = 0x00000004, + D3DBUSIMPL_MODIFIER_INSIDE_OF_CHIPSET = 0x00010000, + D3DBUSIMPL_MODIFIER_TRACKS_ON_MOTHER_BOARD_TO_CHIP = 0x00020000, + D3DBUSIMPL_MODIFIER_TRACKS_ON_MOTHER_BOARD_TO_SOCKET = 0x00030000, + D3DBUSIMPL_MODIFIER_DAUGHTER_BOARD_CONNECTOR = 0x00040000, + D3DBUSIMPL_MODIFIER_DAUGHTER_BOARD_CONNECTOR_INSIDE_OF_NUAE = 0x00050000, + D3DBUSIMPL_MODIFIER_NON_STANDARD = 0x80000000 +} D3DBUSTYPE; + +typedef enum _D3DCMPFUNC { + D3DCMP_NEVER_ZERO = 0, //Needed to avoid warnings + D3DCMP_NEVER = 1, + D3DCMP_LESS = 2, + D3DCMP_EQUAL = 3, + D3DCMP_LESSEQUAL = 4, + D3DCMP_GREATER = 5, + D3DCMP_NOTEQUAL = 6, + D3DCMP_GREATEREQUAL = 7, + D3DCMP_ALWAYS = 8 +} D3DCMPFUNC; + +typedef enum _D3DCOMPOSERECTSOP{ + D3DCOMPOSERECTS_COPY = 1, + D3DCOMPOSERECTS_OR = 2, + D3DCOMPOSERECTS_AND = 3, + D3DCOMPOSERECTS_NEG = 4 +} D3DCOMPOSERECTSOP; + +typedef enum _D3DCUBEMAP_FACES { + D3DCUBEMAP_FACE_POSITIVE_X = 0, + D3DCUBEMAP_FACE_NEGATIVE_X = 1, + D3DCUBEMAP_FACE_POSITIVE_Y = 2, + D3DCUBEMAP_FACE_NEGATIVE_Y = 3, + D3DCUBEMAP_FACE_POSITIVE_Z = 4, + D3DCUBEMAP_FACE_NEGATIVE_Z = 5 +} D3DCUBEMAP_FACES; + +typedef enum _D3DCULL { + D3DCULL_NONE = 1, + D3DCULL_CW = 2, + D3DCULL_CCW = 3 +} D3DCULL; + +typedef enum _D3DDEBUGMONITORTOKENS { + D3DDMT_ENABLE = 0, + D3DDMT_DISABLE = 1 +} D3DDEBUGMONITORTOKENS; + +typedef enum _D3DDECLMETHOD { + D3DDECLMETHOD_DEFAULT = 0, + D3DDECLMETHOD_PARTIALU = 1, + D3DDECLMETHOD_PARTIALV = 2, + D3DDECLMETHOD_CROSSUV = 3, + D3DDECLMETHOD_UV = 4, + D3DDECLMETHOD_LOOKUP = 5, + D3DDECLMETHOD_LOOKUPPRESAMPLED = 6 +} D3DDECLMETHOD; + +typedef enum _D3DDECLTYPE { + D3DDECLTYPE_FLOAT1 = 0, + D3DDECLTYPE_FLOAT2 = 1, + D3DDECLTYPE_FLOAT3 = 2, + D3DDECLTYPE_FLOAT4 = 3, + D3DDECLTYPE_D3DCOLOR = 4, + D3DDECLTYPE_UBYTE4 = 5, + D3DDECLTYPE_SHORT2 = 6, + D3DDECLTYPE_SHORT4 = 7, + D3DDECLTYPE_UBYTE4N = 8, + D3DDECLTYPE_SHORT2N = 9, + D3DDECLTYPE_SHORT4N = 10, + D3DDECLTYPE_USHORT2N = 11, + D3DDECLTYPE_USHORT4N = 12, + D3DDECLTYPE_UDEC3 = 13, + D3DDECLTYPE_DEC3N = 14, + D3DDECLTYPE_FLOAT16_2 = 15, + D3DDECLTYPE_FLOAT16_4 = 16, + D3DDECLTYPE_UNUSED = 17 +} D3DDECLTYPE; + +typedef enum _D3DDECLUSAGE { + D3DDECLUSAGE_POSITION = 0, + D3DDECLUSAGE_BLENDWEIGHT = 1, + D3DDECLUSAGE_BLENDINDICES = 2, + D3DDECLUSAGE_NORMAL = 3, + D3DDECLUSAGE_PSIZE = 4, + D3DDECLUSAGE_TEXCOORD = 5, + D3DDECLUSAGE_TANGENT = 6, + D3DDECLUSAGE_BINORMAL = 7, + D3DDECLUSAGE_TESSFACTOR = 8, + D3DDECLUSAGE_POSITIONT = 9, + D3DDECLUSAGE_COLOR = 10, + D3DDECLUSAGE_FOG = 11, + D3DDECLUSAGE_DEPTH = 12, + D3DDECLUSAGE_SAMPLE = 13 +} D3DDECLUSAGE; + +typedef enum _D3DDEGREETYPE { + D3DDEGREE_LINEAR = 1, + D3DDEGREE_QUADRATIC = 2, + D3DDEGREE_CUBIC = 3, + D3DDEGREE_QUINTIC = 5 +} D3DDEGREETYPE; + +typedef enum _D3DDEVTYPE { + D3DDEVTYPE_HAL = 1, + D3DDEVTYPE_REF = 2, + D3DDEVTYPE_SW = 3, + D3DDEVTYPE_NULLREF = 4 +} D3DDEVTYPE; + +typedef enum _D3DFILLMODE { + D3DFILL_SOLID_ZERO = 0, + D3DFILL_POINT = 1, + D3DFILL_WIREFRAME = 2, + D3DFILL_SOLID = 3 +} D3DFILLMODE; + +typedef enum _D3DFOGMODE { + D3DFOG_NONE = 0, + D3DFOG_EXP = 1, + D3DFOG_EXP2 = 2, + D3DFOG_LINEAR = 3 +} D3DFOGMODE; + +typedef enum _D3DFORMAT { + D3DFMT_UNKNOWN = 0, + D3DFMT_R8G8B8 = 20, + D3DFMT_A8R8G8B8 = 21, + D3DFMT_X8R8G8B8 = 22, + D3DFMT_R5G6B5 = 23, + D3DFMT_X1R5G5B5 = 24, + D3DFMT_A1R5G5B5 = 25, + D3DFMT_A4R4G4B4 = 26, + D3DFMT_R3G3B2 = 27, + D3DFMT_A8 = 28, + D3DFMT_A8R3G3B2 = 29, + D3DFMT_X4R4G4B4 = 30, + D3DFMT_A2B10G10R10 = 31, + D3DFMT_A8B8G8R8 = 32, + D3DFMT_X8B8G8R8 = 33, + D3DFMT_G16R16 = 34, + D3DFMT_A2R10G10B10 = 35, + D3DFMT_A16B16G16R16 = 36, + D3DFMT_A8P8 = 40, + D3DFMT_P8 = 41, + D3DFMT_L8 = 50, + D3DFMT_A8L8 = 51, + D3DFMT_A4L4 = 52, + D3DFMT_V8U8 = 60, + D3DFMT_L6V5U5 = 61, + D3DFMT_X8L8V8U8 = 62, + D3DFMT_Q8W8V8U8 = 63, + D3DFMT_V16U16 = 64, + D3DFMT_A2W10V10U10 = 67, + D3DFMT_UYVY = MAKEFOURCC('U', 'Y', 'V', 'Y'), + D3DFMT_R8G8_B8G8 = MAKEFOURCC('R', 'G', 'B', 'G'), + D3DFMT_YUY2 = MAKEFOURCC('Y', 'U', 'Y', '2'), + D3DFMT_G8R8_G8B8 = MAKEFOURCC('G', 'R', 'G', 'B'), + D3DFMT_DXT1 = MAKEFOURCC('D', 'X', 'T', '1'), + D3DFMT_DXT2 = MAKEFOURCC('D', 'X', 'T', '2'), + D3DFMT_DXT3 = MAKEFOURCC('D', 'X', 'T', '3'), + D3DFMT_DXT4 = MAKEFOURCC('D', 'X', 'T', '4'), + D3DFMT_DXT5 = MAKEFOURCC('D', 'X', 'T', '5'), + D3DFMT_D16_LOCKABLE = 70, + D3DFMT_D32 = 71, + D3DFMT_D15S1 = 73, + D3DFMT_D24S8 = 75, + D3DFMT_D24X8 = 77, + D3DFMT_D24X4S4 = 79, + D3DFMT_D16 = 80, + D3DFMT_D32F_LOCKABLE = 82, + D3DFMT_D24FS8 = 83, + D3DFMT_D32_LOCKABLE = 84, + D3DFMT_S8_LOCKABLE = 85, + D3DFMT_L16 = 81, + D3DFMT_VERTEXDATA = 100, + D3DFMT_INDEX16 = 101, + D3DFMT_INDEX32 = 102, + D3DFMT_Q16W16V16U16 = 110, + D3DFMT_MULTI2_ARGB8 = MAKEFOURCC('M','E','T','1'), + D3DFMT_R16F = 111, + D3DFMT_G16R16F = 112, + D3DFMT_A16B16G16R16F = 113, + D3DFMT_R32F = 114, + D3DFMT_G32R32F = 115, + D3DFMT_A32B32G32R32F = 116, + D3DFMT_CxV8U8 = 117, + D3DFMT_A1 = 118, + D3DFMT_A2B10G10R10_XR_BIAS = 119, + D3DFMT_BINARYBUFFER = 199, + D3DFMT_ATI1 = MAKEFOURCC('A', 'T', 'I', '1'), + D3DFMT_ATI2 = MAKEFOURCC('A', 'T', 'I', '2'), + D3DFMT_ATOC = MAKEFOURCC('A', 'T', 'O', 'C'), + D3DFMT_DF16 = MAKEFOURCC('D', 'F', '1', '6'), + D3DFMT_DF24 = MAKEFOURCC('D', 'F', '2', '4'), + D3DFMT_INTZ = MAKEFOURCC('I', 'N', 'T', 'Z'), + D3DFMT_NULL = MAKEFOURCC('N', 'U', 'L', 'L'), + D3DFMT_NVDB = MAKEFOURCC('N', 'V', 'D', 'B'), + D3DFMT_NV11 = MAKEFOURCC('N', 'V', '1', '1'), + D3DFMT_NV12 = MAKEFOURCC('N', 'V', '1', '2'), + D3DFMT_RESZ = MAKEFOURCC('R', 'E', 'S', 'Z'), + D3DFMT_Y210 = MAKEFOURCC('Y', '2', '1', '0'), + D3DFMT_Y216 = MAKEFOURCC('Y', '2', '1', '6'), + D3DFMT_Y410 = MAKEFOURCC('Y', '4', '1', '0') +} D3DFORMAT; + +typedef enum _D3DLIGHTTYPE { + D3DLIGHT_POINT = 1, + D3DLIGHT_SPOT = 2, + D3DLIGHT_DIRECTIONAL = 3 +} D3DLIGHTTYPE; + +typedef enum _D3DMATERIALCOLORSOURCE { + D3DMCS_MATERIAL = 0, + D3DMCS_COLOR1 = 1, + D3DMCS_COLOR2 = 2 +} D3DMATERIALCOLORSOURCE; + +typedef enum _D3DMULTISAMPLE_TYPE { + D3DMULTISAMPLE_NONE = 0, + D3DMULTISAMPLE_NONMASKABLE = 1, + D3DMULTISAMPLE_2_SAMPLES = 2, + D3DMULTISAMPLE_3_SAMPLES = 3, + D3DMULTISAMPLE_4_SAMPLES = 4, + D3DMULTISAMPLE_5_SAMPLES = 5, + D3DMULTISAMPLE_6_SAMPLES = 6, + D3DMULTISAMPLE_7_SAMPLES = 7, + D3DMULTISAMPLE_8_SAMPLES = 8, + D3DMULTISAMPLE_9_SAMPLES = 9, + D3DMULTISAMPLE_10_SAMPLES = 10, + D3DMULTISAMPLE_11_SAMPLES = 11, + D3DMULTISAMPLE_12_SAMPLES = 12, + D3DMULTISAMPLE_13_SAMPLES = 13, + D3DMULTISAMPLE_14_SAMPLES = 14, + D3DMULTISAMPLE_15_SAMPLES = 15, + D3DMULTISAMPLE_16_SAMPLES = 16 +} D3DMULTISAMPLE_TYPE; + +typedef enum _D3DPATCHEDGESTYLE { + D3DPATCHEDGE_DISCRETE = 0, + D3DPATCHEDGE_CONTINUOUS = 1 +} D3DPATCHEDGESTYLE; + +typedef enum _D3DPOOL { + D3DPOOL_DEFAULT = 0, + D3DPOOL_MANAGED = 1, + D3DPOOL_SYSTEMMEM = 2, + D3DPOOL_SCRATCH = 3 +} D3DPOOL; + +typedef enum _D3DPRIMITIVETYPE { + D3DPT_POINTLIST = 1, + D3DPT_LINELIST = 2, + D3DPT_LINESTRIP = 3, + D3DPT_TRIANGLELIST = 4, + D3DPT_TRIANGLESTRIP = 5, + D3DPT_TRIANGLEFAN = 6 +} D3DPRIMITIVETYPE; + +typedef enum _D3DQUERYTYPE { + D3DQUERYTYPE_VCACHE = 4, + D3DQUERYTYPE_RESOURCEMANAGER = 5, + D3DQUERYTYPE_VERTEXSTATS = 6, + D3DQUERYTYPE_EVENT = 8, + D3DQUERYTYPE_OCCLUSION = 9, + D3DQUERYTYPE_TIMESTAMP = 10, + D3DQUERYTYPE_TIMESTAMPDISJOINT = 11, + D3DQUERYTYPE_TIMESTAMPFREQ = 12, + D3DQUERYTYPE_PIPELINETIMINGS = 13, + D3DQUERYTYPE_INTERFACETIMINGS = 14, + D3DQUERYTYPE_VERTEXTIMINGS = 15, + D3DQUERYTYPE_PIXELTIMINGS = 16, + D3DQUERYTYPE_BANDWIDTHTIMINGS = 17, + D3DQUERYTYPE_CACHEUTILIZATION = 18, + D3DQUERYTYPE_MEMORYPRESSURE = 19 +} D3DQUERYTYPE; + +#define D3DISSUE_BEGIN (1 << 1) +#define D3DISSUE_END (1 << 0) +#define D3DGETDATA_FLUSH (1 << 0) + + +typedef enum _D3DRENDERSTATETYPE { + D3DRS_ZENABLE = 7, + D3DRS_FILLMODE = 8, + D3DRS_SHADEMODE = 9, + D3DRS_ZWRITEENABLE = 14, + D3DRS_ALPHATESTENABLE = 15, + D3DRS_LASTPIXEL = 16, + D3DRS_SRCBLEND = 19, + D3DRS_DESTBLEND = 20, + D3DRS_CULLMODE = 22, + D3DRS_ZFUNC = 23, + D3DRS_ALPHAREF = 24, + D3DRS_ALPHAFUNC = 25, + D3DRS_DITHERENABLE = 26, + D3DRS_ALPHABLENDENABLE = 27, + D3DRS_FOGENABLE = 28, + D3DRS_SPECULARENABLE = 29, + D3DRS_FOGCOLOR = 34, + D3DRS_FOGTABLEMODE = 35, + D3DRS_FOGSTART = 36, + D3DRS_FOGEND = 37, + D3DRS_FOGDENSITY = 38, + D3DRS_RANGEFOGENABLE = 48, + D3DRS_STENCILENABLE = 52, + D3DRS_STENCILFAIL = 53, + D3DRS_STENCILZFAIL = 54, + D3DRS_STENCILPASS = 55, + D3DRS_STENCILFUNC = 56, + D3DRS_STENCILREF = 57, + D3DRS_STENCILMASK = 58, + D3DRS_STENCILWRITEMASK = 59, + D3DRS_TEXTUREFACTOR = 60, + D3DRS_WRAP0 = 128, + D3DRS_WRAP1 = 129, + D3DRS_WRAP2 = 130, + D3DRS_WRAP3 = 131, + D3DRS_WRAP4 = 132, + D3DRS_WRAP5 = 133, + D3DRS_WRAP6 = 134, + D3DRS_WRAP7 = 135, + D3DRS_CLIPPING = 136, + D3DRS_LIGHTING = 137, + D3DRS_AMBIENT = 139, + D3DRS_FOGVERTEXMODE = 140, + D3DRS_COLORVERTEX = 141, + D3DRS_LOCALVIEWER = 142, + D3DRS_NORMALIZENORMALS = 143, + D3DRS_DIFFUSEMATERIALSOURCE = 145, + D3DRS_SPECULARMATERIALSOURCE = 146, + D3DRS_AMBIENTMATERIALSOURCE = 147, + D3DRS_EMISSIVEMATERIALSOURCE = 148, + D3DRS_VERTEXBLEND = 151, + D3DRS_CLIPPLANEENABLE = 152, + D3DRS_POINTSIZE = 154, + D3DRS_POINTSIZE_MIN = 155, + D3DRS_POINTSPRITEENABLE = 156, + D3DRS_POINTSCALEENABLE = 157, + D3DRS_POINTSCALE_A = 158, + D3DRS_POINTSCALE_B = 159, + D3DRS_POINTSCALE_C = 160, + D3DRS_MULTISAMPLEANTIALIAS = 161, + D3DRS_MULTISAMPLEMASK = 162, + D3DRS_PATCHEDGESTYLE = 163, + D3DRS_DEBUGMONITORTOKEN = 165, + D3DRS_POINTSIZE_MAX = 166, + D3DRS_INDEXEDVERTEXBLENDENABLE = 167, + D3DRS_COLORWRITEENABLE = 168, + D3DRS_TWEENFACTOR = 170, + D3DRS_BLENDOP = 171, + D3DRS_POSITIONDEGREE = 172, + D3DRS_NORMALDEGREE = 173, + D3DRS_SCISSORTESTENABLE = 174, + D3DRS_SLOPESCALEDEPTHBIAS = 175, + D3DRS_ANTIALIASEDLINEENABLE = 176, + D3DRS_MINTESSELLATIONLEVEL = 178, + D3DRS_MAXTESSELLATIONLEVEL = 179, + D3DRS_ADAPTIVETESS_X = 180, + D3DRS_ADAPTIVETESS_Y = 181, + D3DRS_ADAPTIVETESS_Z = 182, + D3DRS_ADAPTIVETESS_W = 183, + D3DRS_ENABLEADAPTIVETESSELLATION = 184, + D3DRS_TWOSIDEDSTENCILMODE = 185, + D3DRS_CCW_STENCILFAIL = 186, + D3DRS_CCW_STENCILZFAIL = 187, + D3DRS_CCW_STENCILPASS = 188, + D3DRS_CCW_STENCILFUNC = 189, + D3DRS_COLORWRITEENABLE1 = 190, + D3DRS_COLORWRITEENABLE2 = 191, + D3DRS_COLORWRITEENABLE3 = 192, + D3DRS_BLENDFACTOR = 193, + D3DRS_SRGBWRITEENABLE = 194, + D3DRS_DEPTHBIAS = 195, + D3DRS_WRAP8 = 198, + D3DRS_WRAP9 = 199, + D3DRS_WRAP10 = 200, + D3DRS_WRAP11 = 201, + D3DRS_WRAP12 = 202, + D3DRS_WRAP13 = 203, + D3DRS_WRAP14 = 204, + D3DRS_WRAP15 = 205, + D3DRS_SEPARATEALPHABLENDENABLE = 206, + D3DRS_SRCBLENDALPHA = 207, + D3DRS_DESTBLENDALPHA = 208, + D3DRS_BLENDOPALPHA = 209 +} D3DRENDERSTATETYPE; + +typedef enum _D3DRESOURCETYPE { + D3DRTYPE_SURFACE = 1, + D3DRTYPE_VOLUME = 2, + D3DRTYPE_TEXTURE = 3, + D3DRTYPE_VOLUMETEXTURE = 4, + D3DRTYPE_CUBETEXTURE = 5, + D3DRTYPE_VERTEXBUFFER = 6, + D3DRTYPE_INDEXBUFFER = 7 +} D3DRESOURCETYPE; +#define D3DRTYPECOUNT (D3DRTYPE_INDEXBUFFER+1) + +typedef enum _D3DSAMPLERSTATETYPE { + D3DSAMP_ADDRESSU = 1, + D3DSAMP_ADDRESSV = 2, + D3DSAMP_ADDRESSW = 3, + D3DSAMP_BORDERCOLOR = 4, + D3DSAMP_MAGFILTER = 5, + D3DSAMP_MINFILTER = 6, + D3DSAMP_MIPFILTER = 7, + D3DSAMP_MIPMAPLODBIAS = 8, + D3DSAMP_MAXMIPLEVEL = 9, + D3DSAMP_MAXANISOTROPY = 10, + D3DSAMP_SRGBTEXTURE = 11, + D3DSAMP_ELEMENTINDEX = 12, + D3DSAMP_DMAPOFFSET = 13 +} D3DSAMPLERSTATETYPE; + +typedef enum _D3DSAMPLER_TEXTURE_TYPE { + D3DSTT_UNKNOWN = 0<<27, + D3DSTT_1D = 1<<27, + D3DSTT_2D = 2<<27, + D3DSTT_CUBE = 3<<27, + D3DSTT_VOLUME = 4<<27 +} D3DSAMPLER_TEXTURE_TYPE; + +typedef enum _D3DSHADEMODE { + D3DSHADE_FLAT = 1, + D3DSHADE_GOURAUD = 2, + D3DSHADE_PHONG = 3 +} D3DSHADEMODE; + +typedef enum _D3DSHADER_ADDRESSMODE_TYPE { + D3DSHADER_ADDRMODE_ABSOLUTE = 0<<13, + D3DSHADER_ADDRMODE_RELATIVE = 1<<13 +} D3DSHADER_ADDRESSMODE_TYPE; + +typedef enum _D3DSHADER_COMPARISON { + D3DSPC_RESERVED0 = 0, + D3DSPC_GT = 1, + D3DSPC_EQ = 2, + D3DSPC_GE = 3, + D3DSPC_LT = 4, + D3DSPC_NE = 5, + D3DSPC_LE = 6, + D3DSPC_RESERVED1 = 7 +} D3DSHADER_COMPARISON; + +#define D3DDP_MAXTEXCOORD 8 + +#define D3DSI_OPCODE_MASK 0x0000FFFF +#define D3DSI_INSTLENGTH_MASK 0x0F000000 +#define D3DSI_INSTLENGTH_SHIFT 24 + +typedef enum _D3DSHADER_INSTRUCTION_OPCODE_TYPE { + D3DSIO_NOP = 0, + D3DSIO_MOV = 1, + D3DSIO_ADD = 2, + D3DSIO_SUB = 3, + D3DSIO_MAD = 4, + D3DSIO_MUL = 5, + D3DSIO_RCP = 6, + D3DSIO_RSQ = 7, + D3DSIO_DP3 = 8, + D3DSIO_DP4 = 9, + D3DSIO_MIN = 10, + D3DSIO_MAX = 11, + D3DSIO_SLT = 12, + D3DSIO_SGE = 13, + D3DSIO_EXP = 14, + D3DSIO_LOG = 15, + D3DSIO_LIT = 16, + D3DSIO_DST = 17, + D3DSIO_LRP = 18, + D3DSIO_FRC = 19, + D3DSIO_M4x4 = 20, + D3DSIO_M4x3 = 21, + D3DSIO_M3x4 = 22, + D3DSIO_M3x3 = 23, + D3DSIO_M3x2 = 24, + D3DSIO_CALL = 25, + D3DSIO_CALLNZ = 26, + D3DSIO_LOOP = 27, + D3DSIO_RET = 28, + D3DSIO_ENDLOOP = 29, + D3DSIO_LABEL = 30, + D3DSIO_DCL = 31, + D3DSIO_POW = 32, + D3DSIO_CRS = 33, + D3DSIO_SGN = 34, + D3DSIO_ABS = 35, + D3DSIO_NRM = 36, + D3DSIO_SINCOS = 37, + D3DSIO_REP = 38, + D3DSIO_ENDREP = 39, + D3DSIO_IF = 40, + D3DSIO_IFC = 41, + D3DSIO_ELSE = 42, + D3DSIO_ENDIF = 43, + D3DSIO_BREAK = 44, + D3DSIO_BREAKC = 45, + D3DSIO_MOVA = 46, + D3DSIO_DEFB = 47, + D3DSIO_DEFI = 48, + D3DSIO_TEXCOORD = 64, + D3DSIO_TEXKILL = 65, + D3DSIO_TEX = 66, + D3DSIO_TEXBEM = 67, + D3DSIO_TEXBEML = 68, + D3DSIO_TEXREG2AR = 69, + D3DSIO_TEXREG2GB = 70, + D3DSIO_TEXM3x2PAD = 71, + D3DSIO_TEXM3x2TEX = 72, + D3DSIO_TEXM3x3PAD = 73, + D3DSIO_TEXM3x3TEX = 74, + D3DSIO_RESERVED0 = 75, + D3DSIO_TEXM3x3SPEC = 76, + D3DSIO_TEXM3x3VSPEC = 77, + D3DSIO_EXPP = 78, + D3DSIO_LOGP = 79, + D3DSIO_CND = 80, + D3DSIO_DEF = 81, + D3DSIO_TEXREG2RGB = 82, + D3DSIO_TEXDP3TEX = 83, + D3DSIO_TEXM3x2DEPTH = 84, + D3DSIO_TEXDP3 = 85, + D3DSIO_TEXM3x3 = 86, + D3DSIO_TEXDEPTH = 87, + D3DSIO_CMP = 88, + D3DSIO_BEM = 89, + D3DSIO_DP2ADD = 90, + D3DSIO_DSX = 91, + D3DSIO_DSY = 92, + D3DSIO_TEXLDD = 93, + D3DSIO_SETP = 94, + D3DSIO_TEXLDL = 95, + D3DSIO_BREAKP = 96, + D3DSIO_PHASE = 0xFFFD, + D3DSIO_COMMENT = 0xFFFE, + D3DSIO_END = 0xFFFF +} D3DSHADER_INSTRUCTION_OPCODE_TYPE; + +#define D3DSI_COISSUE 0x40000000 + +#define D3DSP_DCL_USAGE_SHIFT 0 +#define D3DSP_DCL_USAGE_MASK 0x0000000f + +#define D3DSP_DCL_USAGEINDEX_SHIFT 16 +#define D3DSP_DCL_USAGEINDEX_MASK 0x000f0000 + +#define D3DSP_TEXTURETYPE_SHIFT 27 +#define D3DSP_TEXTURETYPE_MASK 0x78000000 + +#define D3DSP_REGNUM_MASK 0x000007FF + +#define D3DSP_WRITEMASK_0 0x00010000 +#define D3DSP_WRITEMASK_1 0x00020000 +#define D3DSP_WRITEMASK_2 0x00040000 +#define D3DSP_WRITEMASK_3 0x00080000 +#define D3DSP_WRITEMASK_ALL 0x000F0000 + +#define D3DSP_DSTMOD_SHIFT 20 +#define D3DSP_DSTMOD_MASK (0xF << D3DSP_DSTMOD_SHIFT) + +typedef enum _D3DSHADER_PARAM_DSTMOD_TYPE { + D3DSPDM_NONE = 0 << D3DSP_DSTMOD_SHIFT, + D3DSPDM_SATURATE = 1 << D3DSP_DSTMOD_SHIFT, + D3DSPDM_PARTIALPRECISION = 2 << D3DSP_DSTMOD_SHIFT, + D3DSPDM_MSAMPCENTROID = 4 << D3DSP_DSTMOD_SHIFT, + D3DSPDM_FORCE_DWORD = 0x7FFFFFFF +} D3DSHADER_PARAM_DSTMOD_TYPE; + +#define D3DSP_DSTSHIFT_SHIFT 24 +#define D3DSP_DSTSHIFT_MASK (0xF << D3DSP_DSTSHIFT_SHIFT) + +#define D3DSP_REGTYPE_SHIFT 28 +#define D3DSP_REGTYPE_SHIFT2 8 +#define D3DSP_REGTYPE_MASK (0x7 << D3DSP_REGTYPE_SHIFT) +#define D3DSP_REGTYPE_MASK2 0x00001800 + +typedef enum _D3DSHADER_MISCTYPE_OFFSETS { + D3DSMO_POSITION = 0, + D3DSMO_FACE = 1 +} D3DSHADER_MISCTYPE_OFFSETS; + +typedef enum _D3DSHADER_PARAM_REGISTER_TYPE { + D3DSPR_TEMP = 0, + D3DSPR_INPUT = 1, + D3DSPR_CONST = 2, + D3DSPR_ADDR = 3, + D3DSPR_TEXTURE = 3, + D3DSPR_RASTOUT = 4, + D3DSPR_ATTROUT = 5, + D3DSPR_TEXCRDOUT = 6, + D3DSPR_OUTPUT = 6, + D3DSPR_CONSTINT = 7, + D3DSPR_COLOROUT = 8, + D3DSPR_DEPTHOUT = 9, + D3DSPR_SAMPLER = 10, + D3DSPR_CONST2 = 11, + D3DSPR_CONST3 = 12, + D3DSPR_CONST4 = 13, + D3DSPR_CONSTBOOL = 14, + D3DSPR_LOOP = 15, + D3DSPR_TEMPFLOAT16 = 16, + D3DSPR_MISCTYPE = 17, + D3DSPR_LABEL = 18, + D3DSPR_PREDICATE = 19 +} D3DSHADER_PARAM_REGISTER_TYPE; + +#define D3DSP_SWIZZLE_SHIFT 16 +#define D3DSP_SWIZZLE_MASK (0xFF << D3DSP_SWIZZLE_SHIFT) + +#define D3DSP_NOSWIZZLE \ + ((0 << (D3DSP_SWIZZLE_SHIFT + 0)) | (1 << (D3DSP_SWIZZLE_SHIFT + 2)) | (2 << (D3DSP_SWIZZLE_SHIFT + 4)) | (3 << (D3DSP_SWIZZLE_SHIFT + 6))) + +#define D3DSP_SRCMOD_SHIFT 24 +#define D3DSP_SRCMOD_MASK (0xF << D3DSP_SRCMOD_SHIFT) + +typedef enum _D3DSHADER_PARAM_SRCMOD_TYPE { + D3DSPSM_NONE = 0<<24, + D3DSPSM_NEG = 1<<24, + D3DSPSM_BIAS = 2<<24, + D3DSPSM_BIASNEG = 3<<24, + D3DSPSM_SIGN = 4<<24, + D3DSPSM_SIGNNEG = 5<<24, + D3DSPSM_COMP = 6<<24, + D3DSPSM_X2 = 7<<24, + D3DSPSM_X2NEG = 8<<24, + D3DSPSM_DZ = 9<<24, + D3DSPSM_DW = 10<<24, + D3DSPSM_ABS = 11<<24, + D3DSPSM_ABSNEG = 12<<24, + D3DSPSM_NOT = 13<<24 +} D3DSHADER_PARAM_SRCMOD_TYPE; + +#define D3DPS_VERSION(major, minor) (0xFFFF0000 | ((major) << 8) | (minor)) +#define D3DVS_VERSION(major, minor) (0xFFFE0000 | ((major) << 8) | (minor)) +#define D3DSHADER_VERSION_MAJOR(version) (((version) >> 8) & 0xFF) +#define D3DSHADER_VERSION_MINOR(version) (((version) >> 0) & 0xFF) + +#define D3DSI_COMMENTSIZE_SHIFT 16 +#define D3DSI_COMMENTSIZE_MASK (0x7FFF << D3DSI_COMMENTSIZE_SHIFT) + +typedef enum _D3DSTATEBLOCKTYPE { + D3DSBT_ALL = 1, + D3DSBT_PIXELSTATE = 2, + D3DSBT_VERTEXSTATE = 3 +} D3DSTATEBLOCKTYPE; + +typedef enum _D3DSTENCILOP { + D3DSTENCILOP_KEEP = 1, + D3DSTENCILOP_ZERO = 2, + D3DSTENCILOP_REPLACE = 3, + D3DSTENCILOP_INCRSAT = 4, + D3DSTENCILOP_DECRSAT = 5, + D3DSTENCILOP_INVERT = 6, + D3DSTENCILOP_INCR = 7, + D3DSTENCILOP_DECR = 8 +} D3DSTENCILOP; + +typedef enum _D3DSWAPEFFECT { + D3DSWAPEFFECT_DISCARD = 1, + D3DSWAPEFFECT_FLIP = 2, + D3DSWAPEFFECT_COPY = 3, + D3DSWAPEFFECT_OVERLAY = 4, + D3DSWAPEFFECT_FLIPEX = 5 +} D3DSWAPEFFECT; + +typedef enum _D3DTEXTUREADDRESS { + D3DTADDRESS_WRAP = 1, + D3DTADDRESS_MIRROR = 2, + D3DTADDRESS_CLAMP = 3, + D3DTADDRESS_BORDER = 4, + D3DTADDRESS_MIRRORONCE = 5 +} D3DTEXTUREADDRESS; + +typedef enum _D3DTEXTUREFILTERTYPE { + D3DTEXF_NONE = 0, + D3DTEXF_POINT = 1, + D3DTEXF_LINEAR = 2, + D3DTEXF_ANISOTROPIC = 3, + D3DTEXF_PYRAMIDALQUAD = 6, + D3DTEXF_GAUSSIANQUAD = 7, + D3DTEXF_CONVOLUTIONMONO = 8, + D3DTEXF_FORCE_DWORD = 0x7fffffff +} D3DTEXTUREFILTERTYPE; + +typedef enum _D3DTEXTUREOP { + D3DTOP_DISABLE = 1, + D3DTOP_SELECTARG1 = 2, + D3DTOP_SELECTARG2 = 3, + D3DTOP_MODULATE = 4, + D3DTOP_MODULATE2X = 5, + D3DTOP_MODULATE4X = 6, + D3DTOP_ADD = 7, + D3DTOP_ADDSIGNED = 8, + D3DTOP_ADDSIGNED2X = 9, + D3DTOP_SUBTRACT = 10, + D3DTOP_ADDSMOOTH = 11, + D3DTOP_BLENDDIFFUSEALPHA = 12, + D3DTOP_BLENDTEXTUREALPHA = 13, + D3DTOP_BLENDFACTORALPHA = 14, + D3DTOP_BLENDTEXTUREALPHAPM = 15, + D3DTOP_BLENDCURRENTALPHA = 16, + D3DTOP_PREMODULATE = 17, + D3DTOP_MODULATEALPHA_ADDCOLOR = 18, + D3DTOP_MODULATECOLOR_ADDALPHA = 19, + D3DTOP_MODULATEINVALPHA_ADDCOLOR = 20, + D3DTOP_MODULATEINVCOLOR_ADDALPHA = 21, + D3DTOP_BUMPENVMAP = 22, + D3DTOP_BUMPENVMAPLUMINANCE = 23, + D3DTOP_DOTPRODUCT3 = 24, + D3DTOP_MULTIPLYADD = 25, + D3DTOP_LERP = 26 +} D3DTEXTUREOP; + +typedef enum _D3DTEXTURESTAGESTATETYPE { + D3DTSS_COLOROP = 1, + D3DTSS_COLORARG1 = 2, + D3DTSS_COLORARG2 = 3, + D3DTSS_ALPHAOP = 4, + D3DTSS_ALPHAARG1 = 5, + D3DTSS_ALPHAARG2 = 6, + D3DTSS_BUMPENVMAT00 = 7, + D3DTSS_BUMPENVMAT01 = 8, + D3DTSS_BUMPENVMAT10 = 9, + D3DTSS_BUMPENVMAT11 = 10, + D3DTSS_TEXCOORDINDEX = 11, + D3DTSS_BUMPENVLSCALE = 22, + D3DTSS_BUMPENVLOFFSET = 23, + D3DTSS_TEXTURETRANSFORMFLAGS = 24, + D3DTSS_COLORARG0 = 26, + D3DTSS_ALPHAARG0 = 27, + D3DTSS_RESULTARG = 28, + D3DTSS_CONSTANT = 32 +} D3DTEXTURESTAGESTATETYPE; + +/* MSDN has this in d3d9caps.h, but it should be here */ +#define D3DTSS_TCI_PASSTHRU 0x00000 +#define D3DTSS_TCI_CAMERASPACENORMAL 0x10000 +#define D3DTSS_TCI_CAMERASPACEPOSITION 0x20000 +#define D3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR 0x30000 +#define D3DTSS_TCI_SPHEREMAP 0x40000 + +typedef enum _D3DTEXTURETRANSFORMFLAGS { + D3DTTFF_DISABLE = 0, + D3DTTFF_COUNT1 = 1, + D3DTTFF_COUNT2 = 2, + D3DTTFF_COUNT3 = 3, + D3DTTFF_COUNT4 = 4, + D3DTTFF_PROJECTED = 256 +} D3DTEXTURETRANSFORMFLAGS; + +typedef enum _D3DTRANSFORMSTATETYPE { + D3DTS_VIEW = 2, + D3DTS_PROJECTION = 3, + D3DTS_TEXTURE0 = 16, + D3DTS_TEXTURE1 = 17, + D3DTS_TEXTURE2 = 18, + D3DTS_TEXTURE3 = 19, + D3DTS_TEXTURE4 = 20, + D3DTS_TEXTURE5 = 21, + D3DTS_TEXTURE6 = 22, + D3DTS_TEXTURE7 = 23 +} D3DTRANSFORMSTATETYPE; + +#define D3DDMAPSAMPLER 256 +#define D3DVERTEXTEXTURESAMPLER0 (D3DDMAPSAMPLER+1) +#define D3DVERTEXTEXTURESAMPLER1 (D3DDMAPSAMPLER+2) +#define D3DVERTEXTEXTURESAMPLER2 (D3DDMAPSAMPLER+3) +#define D3DVERTEXTEXTURESAMPLER3 (D3DDMAPSAMPLER+4) + +#define D3DTS_WORLD D3DTS_WORLDMATRIX(0) +#define D3DTS_WORLD1 D3DTS_WORLDMATRIX(1) +#define D3DTS_WORLD2 D3DTS_WORLDMATRIX(2) +#define D3DTS_WORLD3 D3DTS_WORLDMATRIX(3) +#define D3DTS_WORLDMATRIX(index) (D3DTRANSFORMSTATETYPE)(index + 256) + +typedef enum _D3DVERTEXBLENDFLAGS { + D3DVBF_DISABLE = 0, + D3DVBF_1WEIGHTS = 1, + D3DVBF_2WEIGHTS = 2, + D3DVBF_3WEIGHTS = 3, + D3DVBF_TWEENING = 255, + D3DVBF_0WEIGHTS = 256 +} D3DVERTEXBLENDFLAGS; + +typedef enum _D3DVS_ADDRESSMODE_TYPE { + D3DVS_ADDRMODE_ABSOLUTE = 0<<13, + D3DVS_ADDRMODE_RELATIVE = 1<<13 +} D3DVS_ADDRESSMODE_TYPE; + +typedef enum _D3DVS_RASTOUT_OFFSETS { + D3DSRO_POSITION = 0, + D3DSRO_FOG = 1, + D3DSRO_POINT_SIZE = 2 +} D3DVS_RASTOUT_OFFSETS; + +typedef enum _D3DZBUFFERTYPE { + D3DZB_FALSE = 0, + D3DZB_TRUE = 1, + D3DZB_USEW = 2 +} D3DZBUFFERTYPE; + +/***************************************************************************** + * Structs * + *****************************************************************************/ +typedef struct D3DDISPLAYMODEEX { + UINT Size; + UINT Width; + UINT Height; + UINT RefreshRate; + D3DFORMAT Format; + D3DSCANLINEORDERING ScanLineOrdering; +} D3DDISPLAYMODEEX, *PD3DDISPLAYMODEEX, *LPD3DDISPLAYMODEEX; + +typedef struct D3DDISPLAYMODEFILTER { + UINT Size; + D3DFORMAT Format; + D3DSCANLINEORDERING ScanLineOrdering; +} D3DDISPLAYMODEFILTER, *PD3DDISPLAYMODEFILTER, *LPD3DDISPLAYMODEFILTER; + +typedef struct _D3D_OMAC { + BYTE Omac[16]; +} D3D_OMAC, *PD3D_OMAC, *LPD3D_OMAC; + +typedef struct _D3DADAPTER_IDENTIFIER9 { + char Driver[512]; + char Description[512]; + char DeviceName[32]; + DWORD DriverVersionLowPart; + DWORD DriverVersionHighPart; + DWORD VendorId; + DWORD DeviceId; + DWORD SubSysId; + DWORD Revision; + GUID DeviceIdentifier; + DWORD WHQLLevel; +} D3DADAPTER_IDENTIFIER9, *PD3DADAPTER_IDENTIFIER9, *LPD3DADAPTER_IDENTIFIER9; + +typedef struct _D3DAES_CTR_IV { + UINT64 IV; + UINT64 Count; +} D3DAES_CTR_IV, *PD3DAES_CTR_IV, *LPD3DAES_CTR_IV; + +typedef struct _D3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT { + D3D_OMAC omac; + GUID ConfigureType; + HANDLE hChannel; + UINT SequenceNumber; +} D3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT, *PD3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT, *LPD3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_CONFIGURECRYPTOSESSION { + D3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT Parameters; + HANDLE DXVA2DecodeHandle; + HANDLE CryptoSessionHandle; + HANDLE DeviceHandle; +} D3DAUTHENTICATEDCHANNEL_CONFIGURECRYPTOSESSION, *PD3DAUTHENTICATEDCHANNEL_CONFIGURECRYPTOSESSION, *LPD3DAUTHENTICATEDCHANNEL_CONFIGURECRYPTOSESSION; + +typedef struct _D3DAUTHENTICATEDCHANNEL_CONFIGUREINITIALIZE { + D3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT Parameters; + UINT StartSequenceQuery; + UINT StartSequenceConfigure; +} D3DAUTHENTICATEDCHANNEL_CONFIGUREINITIALIZE, *PD3DAUTHENTICATEDCHANNEL_CONFIGUREINITIALIZE, *LPD3DAUTHENTICATEDCHANNEL_CONFIGUREINITIALIZE; + +typedef struct _D3DAUTHENTICATEDCHANNEL_PROTECTION_FLAGS { + union { + struct { + UINT ProtectionEnabled : 1; + UINT OverlayOrFullscreenRequired : 1; + UINT Reserved : 30; + }; + UINT Value; + }; +} D3DAUTHENTICATEDCHANNEL_PROTECTION_FLAGS, *PD3DAUTHENTICATEDCHANNEL_PROTECTION_FLAGS, *LPD3DAUTHENTICATEDCHANNEL_PROTECTION_FLAGS; + +typedef struct _D3DAUTHENTICATEDCHANNEL_CONFIGUREPROTECTION { + D3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT Parameters; + D3DAUTHENTICATEDCHANNEL_PROTECTION_FLAGS Protections; +} D3DAUTHENTICATEDCHANNEL_CONFIGUREPROTECTION, *PD3DAUTHENTICATEDCHANNEL_CONFIGUREPROTECTION, *LPD3DAUTHENTICATEDCHANNEL_CONFIGUREPROTECTION; + +typedef struct _D3DAUTHENTICATEDCHANNEL_CONFIGURESHAREDRESOURCE { + D3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT Parameters; + D3DAUTHENTICATEDCHANNEL_PROCESSIDENTIFIERTYPE ProcessIdentiferType; + HANDLE ProcessHandle; + BOOL AllowAccess; +} D3DAUTHENTICATEDCHANNEL_CONFIGURESHAREDRESOURCE, *PD3DAUTHENTICATEDCHANNEL_CONFIGURESHAREDRESOURCE, *LPD3DAUTHENTICATEDCHANNEL_CONFIGURESHAREDRESOURCE; + +typedef struct _D3DAUTHENTICATEDCHANNEL_CONFIGUREUNCOMPRESSEDENCRYPTION { + D3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT Parameters; + GUID EncryptionGuid; +} D3DAUTHENTICATEDCHANNEL_CONFIGUREUNCOMPRESSEDENCRYPTION, *PD3DAUTHENTICATEDCHANNEL_CONFIGUREUNCOMPRESSEDENCRYPTION, *LPD3DAUTHENTICATEDCHANNEL_CONFIGUREUNCOMPRESSEDENCRYPTION; + +typedef struct _D3DAUTHENTICATEDCHANNEL_CONFIGURE_OUTPUT { + D3D_OMAC omac; + GUID ConfigureType; + HANDLE hChannel; + UINT SequenceNumber; + HRESULT ReturnCode; +} D3DAUTHENTICATEDCHANNEL_CONFIGURE_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_CONFIGURE_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_CONFIGURE_OUTPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERY_INPUT { + GUID QueryType; + HANDLE hChannel; + UINT SequenceNumber; +} D3DAUTHENTICATEDCHANNEL_QUERY_INPUT, *PD3DAUTHENTICATEDCHANNEL_QUERY_INPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERY_INPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT { + D3D_OMAC omac; + GUID QueryType; + HANDLE hChannel; + UINT SequenceNumber; + HRESULT ReturnCode; +} D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYCHANNELTYPE_OUTPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; + D3DAUTHENTICATEDCHANNELTYPE ChannelType; +} D3DAUTHENTICATEDCHANNEL_QUERYCHANNELTYPE_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYCHANNELTYPE_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYCHANNELTYPE_OUTPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYCRYPTOSESSION_INPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_INPUT Input; + HANDLE DXVA2DecodeHandle; +} D3DAUTHENTICATEDCHANNEL_QUERYCRYPTOSESSION_INPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYCRYPTOSESSION_INPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYCRYPTOSESSION_INPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYCRYPTOSESSION_OUTPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; + HANDLE DXVA2DecodeHandle; + HANDLE CryptoSessionHandle; + HANDLE DeviceHandle; +} D3DAUTHENTICATEDCHANNEL_QUERYCRYPTOSESSION_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYCRYPTOSESSION_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYCRYPTOSESSION_OUTPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYDEVICEHANDLE_OUTPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; + HANDLE DeviceHandle; +} D3DAUTHENTICATEDCHANNEL_QUERYDEVICEHANDLE_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYDEVICEHANDLE_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYDEVICEHANDLE_OUTPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUIDCOUNT_OUTPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; + UINT NumEncryptionGuids; +} D3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUIDCOUNT_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUIDCOUNT_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUIDCOUNT_OUTPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUID_INPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_INPUT Input; + UINT EncryptionGuidIndex; +} D3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUID_INPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUID_INPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUID_INPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUID_OUTPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; + UINT EncryptionGuidIndex; + GUID EncryptionGuid; +} D3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUID_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUID_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUID_OUTPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYINFOBUSTYPE_OUTPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; + D3DBUSTYPE BusType; + BOOL bAccessibleInContiguousBlocks; + BOOL bAccessibleInNonContiguousBlocks; +} D3DAUTHENTICATEDCHANNEL_QUERYINFOBUSTYPE_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYINFOBUSTYPE_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYINFOBUSTYPE_OUTPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYOUTPUTIDCOUNT_INPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_INPUT Input; + HANDLE DeviceHandle; + HANDLE CryptoSessionHandle; +} D3DAUTHENTICATEDCHANNEL_QUERYOUTPUTIDCOUNT_INPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYOUTPUTIDCOUNT_INPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYOUTPUTIDCOUNT_INPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYOUTPUTIDCOUNT_OUTPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; + HANDLE DeviceHandle; + HANDLE CryptoSessionHandle; + UINT NumOutputIDs; +} D3DAUTHENTICATEDCHANNEL_QUERYOUTPUTIDCOUNT_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYOUTPUTIDCOUNT_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYOUTPUTIDCOUNT_OUTPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYOUTPUTID_INPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_INPUT Input; + HANDLE DeviceHandle; + HANDLE CryptoSessionHandle; + UINT OutputIDIndex; +} D3DAUTHENTICATEDCHANNEL_QUERYOUTPUTID_INPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYOUTPUTID_INPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYOUTPUTID_INPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYOUTPUTID_OUTPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; + HANDLE DeviceHandle; + HANDLE CryptoSessionHandle; + UINT OutputIDIndex; + UINT64 OutputID; +} D3DAUTHENTICATEDCHANNEL_QUERYOUTPUTID_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYOUTPUTID_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYOUTPUTID_OUTPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYPROTECTION_OUTPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; + D3DAUTHENTICATEDCHANNEL_PROTECTION_FLAGS ProtectionFlags; +} D3DAUTHENTICATEDCHANNEL_QUERYPROTECTION_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYPROTECTION_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYPROTECTION_OUTPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESSCOUNT_OUTPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; + UINT NumRestrictedSharedResourceProcesses; +} D3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESSCOUNT_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESSCOUNT_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESSCOUNT_OUTPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESS_INPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_INPUT Input; + UINT ProcessIndex; +} D3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESS_INPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESS_INPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESS_INPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESS_OUTPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; + UINT ProcessIndex; + D3DAUTHENTICATEDCHANNEL_PROCESSIDENTIFIERTYPE ProcessIdentifer; + HANDLE ProcessHandle; +} D3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESS_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESS_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESS_OUTPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYUNCOMPRESSEDENCRYPTIONLEVEL_OUTPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; + GUID EncryptionGuid; +} D3DAUTHENTICATEDCHANNEL_QUERYUNCOMPRESSEDENCRYPTIONLEVEL_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYUNCOMPRESSEDENCRYPTIONLEVEL_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYUNCOMPRESSEDENCRYPTIONLEVEL_OUTPUT; + +typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYUNRESTRICTEDPROTECTEDSHAREDRESOURCECOUNT_OUTPUT { + D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; + UINT NumUnrestrictedProtectedSharedResources; +} D3DAUTHENTICATEDCHANNEL_QUERYUNRESTRICTEDPROTECTEDSHAREDRESOURCECOUNT_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYUNRESTRICTEDPROTECTEDSHAREDRESOURCECOUNT_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYUNRESTRICTEDPROTECTEDSHAREDRESOURCECOUNT_OUTPUT; + +typedef struct _D3DBOX { + UINT Left; + UINT Top; + UINT Right; + UINT Bottom; + UINT Front; + UINT Back; +} D3DBOX, *PD3DBOX, *LPD3DBOX; + +typedef struct _D3DCLIPSTATUS9 { + DWORD ClipUnion; + DWORD ClipIntersection; +} D3DCLIPSTATUS9, *PD3DCLIPSTATUS9, *LPD3DCLIPSTATUS9; + +typedef struct _D3DCOLORVALUE { + float r; + float g; + float b; + float a; +} D3DCOLORVALUE, *PD3DCOLORVALUE, *LPD3DCOLORVALUE; + +typedef struct _D3DCOMPOSERECTDESC { + USHORT X, Y; + USHORT Width, Height; +} D3DCOMPOSERECTDESC, *PD3DCOMPOSERECTDESC, *LPD3DCOMPOSERECTDESC; + +typedef struct _D3DCOMPOSERECTDESTINATION { + USHORT SrcRectIndex; + USHORT Reserved; + SHORT X, Y; +} D3DCOMPOSERECTDESTINATION, *PD3DCOMPOSERECTDESTINATION, *LPD3DCOMPOSERECTDESTINATION; + +typedef struct _D3DDEVICE_CREATION_PARAMETERS { + UINT AdapterOrdinal; + D3DDEVTYPE DeviceType; + HWND hFocusWindow; + DWORD BehaviorFlags; +} D3DDEVICE_CREATION_PARAMETERS, *PD3DDEVICE_CREATION_PARAMETERS, *LPD3DDEVICE_CREATION_PARAMETERS; + +typedef struct _D3DDEVINFO_D3D9BANDWIDTHTIMINGS { + FLOAT MaxBandwidthUtilized; + FLOAT FrontEndUploadMemoryUtilizedPercent; + FLOAT VertexRateUtilizedPercent; + FLOAT TriangleSetupRateUtilizedPercent; + FLOAT FillRateUtilizedPercent; +} D3DDEVINFO_D3D9BANDWIDTHTIMINGS, *PD3DDEVINFO_D3D9BANDWIDTHTIMINGS, *LPD3DDEVINFO_D3D9BANDWIDTHTIMINGS; + +typedef struct _D3DDEVINFO_D3D9CACHEUTILIZATION { + FLOAT TextureCacheHitRate; + FLOAT PostTransformVertexCacheHitRate; +} D3DDEVINFO_D3D9CACHEUTILIZATION, *PD3DDEVINFO_D3D9CACHEUTILIZATION, *LPD3DDEVINFO_D3D9CACHEUTILIZATION; + +typedef struct _D3DDEVINFO_D3D9INTERFACETIMINGS { + FLOAT WaitingForGPUToUseApplicationResourceTimePercent; + FLOAT WaitingForGPUToAcceptMoreCommandsTimePercent; + FLOAT WaitingForGPUToStayWithinLatencyTimePercent; + FLOAT WaitingForGPUExclusiveResourceTimePercent; + FLOAT WaitingForGPUOtherTimePercent; +} D3DDEVINFO_D3D9INTERFACETIMINGS, *PD3DDEVINFO_D3D9INTERFACETIMINGS, *LPD3DDEVINFO_D3D9INTERFACETIMINGS; + +typedef struct _D3DDEVINFO_D3D9PIPELINETIMINGS { + FLOAT VertexProcessingTimePercent; + FLOAT PixelProcessingTimePercent; + FLOAT OtherGPUProcessingTimePercent; + FLOAT GPUIdleTimePercent; +} D3DDEVINFO_D3D9PIPELINETIMINGS, *PD3DDEVINFO_D3D9PIPELINETIMINGS, *LPD3DDEVINFO_D3D9PIPELINETIMINGS; + +typedef struct _D3DDEVINFO_D3D9STAGETIMINGS { + FLOAT MemoryProcessingPercent; + FLOAT ComputationProcessingPercent; +} D3DDEVINFO_D3D9STAGETIMINGS, *PD3DDEVINFO_D3D9STAGETIMINGS, *LPD3DDEVINFO_D3D9STAGETIMINGS; + +typedef struct _D3DDEVINFO_D3DVERTEXSTATS { + DWORD NumRenderedTriangles; + DWORD NumExtraClippingTriangles; +} D3DDEVINFO_D3DVERTEXSTATS, *LPD3DDEVINFO_D3DVERTEXSTATS; + +typedef struct _D3DRESOURCESTATS { + BOOL bThrashing; + DWORD ApproxBytesDownloaded; + DWORD NumEvicts; + DWORD NumVidCreates; + DWORD LastPri; + DWORD NumUsed; + DWORD NumUsedInVidMem; + DWORD WorkingSet; + DWORD WorkingSetBytes; + DWORD TotalManaged; + DWORD TotalBytes; +} D3DRESOURCESTATS, *PD3DRESOURCESTATS, *LPD3DRESOURCESTATS; + +typedef struct _D3DDEVINFO_RESOURCEMANAGER { + D3DRESOURCESTATS stats[(D3DRTYPE_INDEXBUFFER+1)]; +} D3DDEVINFO_RESOURCEMANAGER, *LPD3DDEVINFO_RESOURCEMANAGER; + +typedef struct _D3DDEVINFO_VCACHE { + DWORD Pattern; + DWORD OptMethod; + DWORD CacheSize; + DWORD MagicNumber; +} D3DDEVINFO_VCACHE, *LPD3DDEVINFO_VCACHE; + +typedef struct _D3DDISPLAYMODE { + UINT Width; + UINT Height; + UINT RefreshRate; + D3DFORMAT Format; +} D3DDISPLAYMODE, *PD3DDISPLAYMODE, *LPD3DDISPLAYMODE; + +typedef struct _D3DENCRYPTED_BLOCK_INFO { + UINT NumEncryptedBytesAtBeginning; + UINT NumBytesInSkipPattern; + UINT NumBytesInEncryptPattern; +} D3DENCRYPTED_BLOCK_INFO, *PD3DENCRYPTED_BLOCK_INFO, *LPD3DENCRYPTED_BLOCK_INFO; + +typedef struct _D3DGAMMARAMP { + WORD red [256]; + WORD green[256]; + WORD blue [256]; +} D3DGAMMARAMP, *PD3DGAMMARAMP, *LPD3DGAMMARAMP; + +typedef struct _D3DINDEXBUFFER_DESC { + D3DFORMAT Format; + D3DRESOURCETYPE Type; + DWORD Usage; + D3DPOOL Pool; + UINT Size; +} D3DINDEXBUFFER_DESC, *PD3DINDEXBUFFER_DESC, *LPD3DINDEXBUFFER_DESC; + +typedef struct _D3DVECTOR { + float x; + float y; + float z; +} D3DVECTOR, *PD3DVECTOR, *LPD3DVECTOR; + +typedef struct _D3DLIGHT9 { + D3DLIGHTTYPE Type; + D3DCOLORVALUE Diffuse; + D3DCOLORVALUE Specular; + D3DCOLORVALUE Ambient; + D3DVECTOR Position; + D3DVECTOR Direction; + float Range; + float Falloff; + float Attenuation0; + float Attenuation1; + float Attenuation2; + float Theta; + float Phi; +} D3DLIGHT9, *PD3DLIGHT9, *LPD3DLIGHT9; + +typedef struct _D3DLOCKED_BOX { + INT RowPitch; + INT SlicePitch; + void* pBits; +} D3DLOCKED_BOX, *PD3DLOCKED_BOX, *LPD3DLOCKED_BOX; + +typedef struct _D3DLOCKED_RECT { + INT Pitch; + void* pBits; +} D3DLOCKED_RECT, *PD3DLOCKED_RECT, *LPD3DLOCKED_RECT; + +typedef struct _D3DMATERIAL9 { + D3DCOLORVALUE Diffuse; + D3DCOLORVALUE Ambient; + D3DCOLORVALUE Specular; + D3DCOLORVALUE Emissive; + float Power; +} D3DMATERIAL9, *PD3DMATERIAL9, *LPD3DMATERIAL9; + +typedef struct _D3DMATRIX { + union { + struct { + float _11, _12, _13, _14; + float _21, _22, _23, _24; + float _31, _32, _33, _34; + float _41, _42, _43, _44; + }; + float m[4][4]; + }; +} D3DMATRIX, *PD3DMATRIX, *LPD3DMATRIX; + +typedef struct _D3DMEMORYPRESSURE { + UINT64 BytesEvictedFromProcess; + UINT64 SizeOfInefficientAllocation; + DWORD LevelOfEfficiency; +} D3DMEMORYPRESSURE, *PD3DMEMORYPRESSURE, *LPD3DMEMORYPRESSURE; + +typedef struct _D3DPRESENTSTATS { + UINT PresentCount; + UINT PresentRefreshCount; + UINT SyncRefreshCount; + LARGE_INTEGER SyncQPCTime; + LARGE_INTEGER SyncGPUTime; +} D3DPRESENTSTATS, *PD3DPRESENTSTATS, *LPD3DPRESENTSTATS; + +typedef struct _D3DPRESENT_PARAMETERS_ { + UINT BackBufferWidth; + UINT BackBufferHeight; + D3DFORMAT BackBufferFormat; + UINT BackBufferCount; + D3DMULTISAMPLE_TYPE MultiSampleType; + DWORD MultiSampleQuality; + D3DSWAPEFFECT SwapEffect; + HWND hDeviceWindow; + BOOL Windowed; + BOOL EnableAutoDepthStencil; + D3DFORMAT AutoDepthStencilFormat; + DWORD Flags; + UINT FullScreen_RefreshRateInHz; + UINT PresentationInterval; +} D3DPRESENT_PARAMETERS, *PD3DPRESENT_PARAMETERS, *LPD3DPRESENT_PARAMETERS; + +typedef struct _D3DRANGE { + UINT Offset; + UINT Size; +} D3DRANGE, *PD3DRANGE, *LPD3DRANGE; + +typedef struct _D3DRASTER_STATUS { + BOOL InVBlank; + UINT ScanLine; +} D3DRASTER_STATUS, *PD3DRASTER_STATUS, *LPD3DRASTER_STATUS; + +typedef struct _D3DRECT { + LONG x1; + LONG y1; + LONG x2; + LONG y2; +} D3DRECT, *PD3DRECT, *LPD3DRECT; + +typedef struct _D3DRECTPATCH_INFO { + UINT StartVertexOffsetWidth; + UINT StartVertexOffsetHeight; + UINT Width; + UINT Height; + UINT Stride; + D3DBASISTYPE Basis; + D3DDEGREETYPE Degree; +} D3DRECTPATCH_INFO, *PD3DRECTPATCH_INFO, *LPD3DRECTPATCH_INFO; + +typedef struct _D3DSURFACE_DESC { + D3DFORMAT Format; + D3DRESOURCETYPE Type; + DWORD Usage; + D3DPOOL Pool; + D3DMULTISAMPLE_TYPE MultiSampleType; + DWORD MultiSampleQuality; + UINT Width; + UINT Height; +} D3DSURFACE_DESC, *PD3DSURFACE_DESC, *LPD3DSURFACE_DESC; + +typedef struct _D3DTRIPATCH_INFO { + UINT StartVertexOffset; + UINT NumVertices; + D3DBASISTYPE Basis; + D3DDEGREETYPE Degree; +} D3DTRIPATCH_INFO, *PD3DTRIPATCH_INFO, *LPD3DTRIPATCH_INFO; + +typedef struct _D3DVERTEXBUFFER_DESC { + D3DFORMAT Format; + D3DRESOURCETYPE Type; + DWORD Usage; + D3DPOOL Pool; + UINT Size; + DWORD FVF; +} D3DVERTEXBUFFER_DESC, *PD3DVERTEXBUFFER_DESC, *LPD3DVERTEXBUFFER_DESC; + +typedef struct _D3DVERTEXELEMENT9 { + WORD Stream; + WORD Offset; + BYTE Type; + BYTE Method; + BYTE Usage; + BYTE UsageIndex; +} D3DVERTEXELEMENT9, *LPD3DVERTEXELEMENT9; + +typedef struct _D3DVIEWPORT9 { + DWORD X; + DWORD Y; + DWORD Width; + DWORD Height; + float MinZ; + float MaxZ; +} D3DVIEWPORT9, *PD3DVIEWPORT9, *LPD3DVIEWPORT9; + +typedef struct _D3DVOLUME_DESC { + D3DFORMAT Format; + D3DRESOURCETYPE Type; + DWORD Usage; + D3DPOOL Pool; + UINT Width; + UINT Height; + UINT Depth; +} D3DVOLUME_DESC, *PD3DVOLUME_DESC, *LPD3DVOLUME_DESC; + +#ifndef _WIN32 +/* If _WIN32 isn't declared it means only internal header files are used. To + * avoid a conflict, IUnknown is declared here rather than in d3d9.h */ + +typedef struct IUnknown IUnknown, *PUNKNOWN, *LPUNKNOWN; + +#ifdef __cplusplus +extern "C" const GUID IID_IUnknown; + +struct IUnknown +{ + virtual HRESULT WINAPI QueryInterface(REFIID riid, void **ppvObject) = 0; + virtual ULONG WINAPI AddRef() = 0; + virtual ULONG WINAPI Release() = 0; +}; +#else /* __cplusplus */ +extern const GUID IID_IUnknown; + +typedef struct IUnknownVtbl +{ + /* IUnknown */ + HRESULT (WINAPI *QueryInterface)(IUnknown *This, REFIID riid, void **ppvObject); + ULONG (WINAPI *AddRef)(IUnknown *This); + ULONG (WINAPI *Release)(IUnknown *This); +} IUnknownVtbl; + +struct IUnknown +{ + IUnknownVtbl *lpVtbl; +}; + +/* IUnknown macros */ +#define IUnknown_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) +#define IUnknown_AddRef(p) (p)->lpVtbl->AddRef(p) +#define IUnknown_Release(p) (p)->lpVtbl->Release(p) +#endif /* __cplusplus */ +#endif /* _WIN32 */ + +#endif /* _D3D9TYPES_H_ */ diff --git a/nine-native/include/nine_sdl.h b/nine-native/include/nine_sdl.h new file mode 100644 index 000000000..294ad6040 --- /dev/null +++ b/nine-native/include/nine_sdl.h @@ -0,0 +1,13 @@ + +#ifdef __cplusplus +extern "C" { +#endif + +struct SDL_Window; +struct IDirect3D9Ex* Direct3DCreate9Ex_SDL(struct SDL_Window *win); +struct IDirect3D9* Direct3DCreate9_SDL(struct SDL_Window *win); + +#ifdef __cplusplus +} +#endif + diff --git a/nine-native/src/dri3.c b/nine-native/src/dri3.c new file mode 100644 index 000000000..06cfeba4f --- /dev/null +++ b/nine-native/src/dri3.c @@ -0,0 +1,788 @@ +/* + * Copyright © 2014 Axel Davy + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include "dri3.h" + + +#ifdef _DEBUG +#define TRACE(...) fprintf(stderr, __VA_ARGS__) +#define ERR(...) fprintf(stderr, __VA_ARGS__) +#else +#define TRACE(...) +#define ERR(...) fprintf(stderr, __VA_ARGS__) +#endif + + +// --------------------------------- dlls/winex11.drv/dri3.c -------------------------------------------- + +BOOL +DRI3CheckExtension(Display *dpy, int major, int minor) +{ + xcb_connection_t *xcb_connection = XGetXCBConnection(dpy); + xcb_dri3_query_version_cookie_t dri3_cookie; + xcb_dri3_query_version_reply_t *dri3_reply; + xcb_generic_error_t *error; + const xcb_query_extension_reply_t *extension; + int fd; + + xcb_prefetch_extension_data(xcb_connection, &xcb_dri3_id); + + extension = xcb_get_extension_data(xcb_connection, &xcb_dri3_id); + if (!(extension && extension->present)) { + ERR("DRI3 extension is not present\n"); + return FALSE; + } + + dri3_cookie = xcb_dri3_query_version(xcb_connection, major, minor); + + dri3_reply = xcb_dri3_query_version_reply(xcb_connection, dri3_cookie, &error); + if (!dri3_reply) { + free(error); + ERR("Issue getting requested version of DRI3: %d,%d\n", major, minor); + return FALSE; + } + + if (!DRI3Open(dpy, DefaultScreen(dpy), &fd)) { + ERR("DRI3 advertised, but not working\n"); + return FALSE; + } + close(fd); + + TRACE("DRI3 version %d,%d found. %d %d requested\n", major, minor, (int)dri3_reply->major_version, (int)dri3_reply->minor_version); + free(dri3_reply); + + return TRUE; +} + +BOOL +PRESENTCheckExtension(Display *dpy, int major, int minor) +{ + xcb_connection_t *xcb_connection = XGetXCBConnection(dpy); + xcb_present_query_version_cookie_t present_cookie; + xcb_present_query_version_reply_t *present_reply; + xcb_generic_error_t *error; + const xcb_query_extension_reply_t *extension; + + xcb_prefetch_extension_data(xcb_connection, &xcb_present_id); + + extension = xcb_get_extension_data(xcb_connection, &xcb_present_id); + if (!(extension && extension->present)) { + ERR("PRESENT extension is not present\n"); + return FALSE; + } + + present_cookie = xcb_present_query_version(xcb_connection, major, minor); + + present_reply = xcb_present_query_version_reply(xcb_connection, present_cookie, &error); + if (!present_reply) { + free(error); + ERR("Issue getting requested version of PRESENT: %d,%d\n", major, minor); + return FALSE; + } + + TRACE("PRESENT version %d,%d found. %d %d requested\n", major, minor, (int)present_reply->major_version, (int)present_reply->minor_version); + free(present_reply); + + return TRUE; +} + +BOOL +DRI3Open(Display *dpy, int screen, int *device_fd) +{ + xcb_dri3_open_cookie_t cookie; + xcb_dri3_open_reply_t *reply; + xcb_connection_t *xcb_connection = XGetXCBConnection(dpy); + int fd; + Window root = RootWindow(dpy, screen); + + cookie = xcb_dri3_open(xcb_connection, root, 0); + + reply = xcb_dri3_open_reply(xcb_connection, cookie, NULL); + if (!reply) + return FALSE; + + if (reply->nfd != 1) { + free(reply); + return FALSE; + } + + fd = xcb_dri3_open_reply_fds(xcb_connection, reply)[0]; + fcntl(fd, F_SETFD, FD_CLOEXEC); + + *device_fd = fd; + + return TRUE; +} + +BOOL +DRI3PixmapFromDmaBuf(Display *dpy, int screen, int fd, int width, int height, int stride, int depth, int bpp, Pixmap *pixmap) +{ + xcb_connection_t *xcb_connection = XGetXCBConnection(dpy); + Window root = RootWindow(dpy, screen); + xcb_void_cookie_t cookie; + xcb_generic_error_t *error; + + cookie = xcb_dri3_pixmap_from_buffer_checked(xcb_connection, + (*pixmap = xcb_generate_id(xcb_connection)), + root, + 0, + width, height, stride, + depth, bpp, fd); + error = xcb_request_check(xcb_connection, cookie); /* performs a flush */ + if (error) { + ERR("Error using DRI3 to convert a DmaBufFd to pixmap\n"); + return FALSE; + } + return TRUE; +} + +BOOL +DRI3DmaBufFromPixmap(Display *dpy, Pixmap pixmap, int *fd, int *width, int *height, int *stride, int *depth, int *bpp) +{ + xcb_connection_t *xcb_connection = XGetXCBConnection(dpy); + xcb_dri3_buffer_from_pixmap_cookie_t bp_cookie; + xcb_dri3_buffer_from_pixmap_reply_t *bp_reply; + + bp_cookie = xcb_dri3_buffer_from_pixmap(xcb_connection, pixmap); + bp_reply = xcb_dri3_buffer_from_pixmap_reply(xcb_connection, bp_cookie, NULL); + if (!bp_reply) + return FALSE; + *fd = xcb_dri3_buffer_from_pixmap_reply_fds(xcb_connection, bp_reply)[0]; + *width = bp_reply->width; + *height = bp_reply->height; + *stride = bp_reply->stride; + *depth = bp_reply->depth; + *bpp = bp_reply->depth; + return TRUE; +} + +struct PRESENTPriv { + xcb_connection_t *xcb_connection; + xcb_connection_t *xcb_connection_bis; /* to avoid libxcb thread bugs, use a different connection to present pixmaps */ + XID window; + uint64_t last_msc; + uint64_t last_target; + uint32_t last_serial_given; + xcb_special_event_t *special_event; + PRESENTPixmapPriv *first_present_priv; + int pixmap_present_pending; + BOOL notify_with_serial_pending; + pthread_mutex_t mutex_present; /* protect readind/writing present_priv things */ + pthread_mutex_t mutex_xcb_wait; + BOOL xcb_wait; +}; + +struct PRESENTPixmapPriv { + PRESENTpriv *present_priv; + Pixmap pixmap; + BOOL released; + unsigned int width; + unsigned int height; + unsigned int depth; + BOOL present_complete_pending; + uint32_t serial; + BOOL last_present_was_flip; + PRESENTPixmapPriv *next; +}; + +static PRESENTPixmapPriv *PRESENTFindPixmapPriv(PRESENTpriv *present_priv, uint32_t serial) +{ + PRESENTPixmapPriv *current = present_priv->first_present_priv; + + while (current) { + if (current->serial == serial) + return current; + current = current->next; + } + return NULL; +} + +static void PRESENThandle_events(PRESENTpriv *present_priv, xcb_present_generic_event_t *ge) +{ + PRESENTPixmapPriv *present_pixmap_priv = NULL; + + switch (ge->evtype) { + case XCB_PRESENT_COMPLETE_NOTIFY: { + xcb_present_complete_notify_event_t *ce = (void *) ge; + if (ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC) { + if (ce->serial) + present_priv->notify_with_serial_pending = FALSE; + free(ce); + return; + } + present_pixmap_priv = PRESENTFindPixmapPriv(present_priv, ce->serial); + if (!present_pixmap_priv || ce->kind != XCB_PRESENT_COMPLETE_KIND_PIXMAP) { + ERR("FATAL ERROR: PRESENT handling failed\n"); + free(ce); + return; + } + present_pixmap_priv->present_complete_pending = FALSE; + switch (ce->mode) { + case XCB_PRESENT_COMPLETE_MODE_FLIP: + present_pixmap_priv->last_present_was_flip = TRUE; + break; + case XCB_PRESENT_COMPLETE_MODE_COPY: + present_pixmap_priv->last_present_was_flip = FALSE; + break; + } + present_priv->pixmap_present_pending--; + present_priv->last_msc = ce->msc; + break; + } + case XCB_PRESENT_EVENT_IDLE_NOTIFY: { + xcb_present_idle_notify_event_t *ie = (void *) ge; + present_pixmap_priv = PRESENTFindPixmapPriv(present_priv, ie->serial); + if (!present_pixmap_priv || present_pixmap_priv->pixmap != ie->pixmap) { + ERR("FATAL ERROR: PRESENT handling failed\n"); + free(ie); + return; + } + present_pixmap_priv->released = TRUE; + break; + } + } + free(ge); +} + +static void PRESENTflush_events(PRESENTpriv *present_priv, BOOL assert_no_other_thread_waiting) +{ + xcb_generic_event_t *ev; + + if ((present_priv->xcb_wait && !assert_no_other_thread_waiting) || /* don't steal events to someone waiting */ + !present_priv->special_event) + return; + + while ((ev = xcb_poll_for_special_event(present_priv->xcb_connection, present_priv->special_event)) != NULL) { + PRESENThandle_events(present_priv, (void *) ev); + } +} + +static BOOL PRESENTwait_events(PRESENTpriv *present_priv, BOOL allow_other_threads) +{ + xcb_generic_event_t *ev; + + if (allow_other_threads) { + present_priv->xcb_wait = TRUE; + pthread_mutex_lock(&present_priv->mutex_xcb_wait); + pthread_mutex_unlock(&present_priv->mutex_present); + } + ev = xcb_wait_for_special_event(present_priv->xcb_connection, present_priv->special_event); + if (allow_other_threads) { + pthread_mutex_unlock(&present_priv->mutex_xcb_wait); + pthread_mutex_lock(&present_priv->mutex_present); + present_priv->xcb_wait = FALSE; + } + if (!ev) { + ERR("FATAL error: xcb had an error\n"); + return FALSE; + } + + PRESENThandle_events(present_priv, (void *) ev); + return TRUE; +} + +static struct xcb_connection_t * +create_xcb_connection(Display *dpy) +{ + int screen_num = DefaultScreen(dpy); + xcb_connection_t *ret; + xcb_xfixes_query_version_cookie_t cookie; + xcb_xfixes_query_version_reply_t *rep; + + ret = xcb_connect(DisplayString(dpy), &screen_num); + cookie = xcb_xfixes_query_version_unchecked(ret, XCB_XFIXES_MAJOR_VERSION, XCB_XFIXES_MINOR_VERSION); + rep = xcb_xfixes_query_version_reply(ret, cookie, NULL); + if (rep) + free(rep); + return ret; +} + +BOOL +PRESENTInit(Display *dpy, PRESENTpriv **present_priv) +{ + *present_priv = (PRESENTpriv *) calloc(1, sizeof(PRESENTpriv)); + if (!*present_priv) { + return FALSE; + } + (*present_priv)->xcb_connection = create_xcb_connection(dpy); + (*present_priv)->xcb_connection_bis = create_xcb_connection(dpy); + pthread_mutex_init(&(*present_priv)->mutex_present, NULL); + pthread_mutex_init(&(*present_priv)->mutex_xcb_wait, NULL); + return TRUE; +} + +static void PRESENTForceReleases(PRESENTpriv *present_priv) +{ + PRESENTPixmapPriv *current = NULL; + + if (!present_priv->window) + return; + + /* There should be no other thread listening for events here. + * This can happen when hDestWindowOverride changes without reset. + * This case should never happen, but can happen in theory.*/ + if (present_priv->xcb_wait) { + xcb_present_notify_msc(present_priv->xcb_connection, present_priv->window, 0, 0, 0, 0); + xcb_flush(present_priv->xcb_connection); + pthread_mutex_lock(&present_priv->mutex_xcb_wait); + pthread_mutex_unlock(&present_priv->mutex_xcb_wait); + /* the problem here is that we don't have access to the event the other thread got. + * It is either presented event, idle event or notify event. + */ + while (present_priv->pixmap_present_pending >= 2) + PRESENTwait_events(present_priv, FALSE); + PRESENTflush_events(present_priv, TRUE); + /* Remaining events to come can be a pair of present/idle, + * or an idle, or nothing. To be sure we are after all pixmaps + * have been presented, add an event to the queue that can only + * be after the present event, then if we receive an event more, + * we are sure all pixmaps were presented */ + present_priv->notify_with_serial_pending = TRUE; + xcb_present_notify_msc(present_priv->xcb_connection, present_priv->window, 1, present_priv->last_target + 5, 0, 0); + xcb_flush(present_priv->xcb_connection); + while (present_priv->notify_with_serial_pending) + PRESENTwait_events(present_priv, FALSE); + /* Now we are sure we are not expecting any new event */ + } else { + while (present_priv->pixmap_present_pending) /* wait all sent pixmaps are presented */ + PRESENTwait_events(present_priv, FALSE); + PRESENTflush_events(present_priv, TRUE); /* may be remaining idle event */ + /* Since idle events are send with the complete events when it is not flips, + * we are not expecting any new event here */ + } + + current = present_priv->first_present_priv; + while (current) { + if (!current->released) { + if (!current->last_present_was_flip && !present_priv->xcb_wait) { + ERR("ERROR: a pixmap seems not released by PRESENT for no reason. Code bug.\n"); + } else { + /* Present the same pixmap with a non-valid part to force the copy mode and the releases */ + xcb_xfixes_region_t valid, update; + xcb_rectangle_t rect_update; + rect_update.x = 0; + rect_update.y = 0; + rect_update.width = 8; + rect_update.height = 1; + valid = xcb_generate_id(present_priv->xcb_connection); + update = xcb_generate_id(present_priv->xcb_connection); + xcb_xfixes_create_region(present_priv->xcb_connection, valid, 1, &rect_update); + xcb_xfixes_create_region(present_priv->xcb_connection, update, 1, &rect_update); + /* here we know the pixmap has been presented. Thus if it is on screen, + * the following request can only make it released by the server if it is not */ + xcb_present_pixmap(present_priv->xcb_connection, present_priv->window, + current->pixmap, 0, valid, update, 0, 0, None, None, + None, XCB_PRESENT_OPTION_COPY | XCB_PRESENT_OPTION_ASYNC, 0, 0, 0, 0, NULL); + xcb_flush(present_priv->xcb_connection); + PRESENTwait_events(present_priv, FALSE); /* by assumption this can only be idle event */ + PRESENTflush_events(present_priv, TRUE); /* Shoudln't be needed */ + } + } + current = current->next; + } + /* Now all pixmaps are released (possibility if xcb_wait is true that one is not aware yet), + * and we don't expect any new Present event to come from Xserver */ +} + +static void PRESENTFreeXcbQueue(PRESENTpriv *present_priv) +{ + if (present_priv->window) { + xcb_unregister_for_special_event(present_priv->xcb_connection, present_priv->special_event); + present_priv->last_msc = 0; + present_priv->last_target = 0; + present_priv->special_event = NULL; + } +} + +static BOOL PRESENTPrivChangeWindow(PRESENTpriv *present_priv, XID window) +{ + xcb_void_cookie_t cookie; + xcb_generic_error_t *error; + xcb_present_event_t eid; + + PRESENTForceReleases(present_priv); + PRESENTFreeXcbQueue(present_priv); + present_priv->window = window; + + if (window) { + cookie = xcb_present_select_input_checked(present_priv->xcb_connection, + (eid = xcb_generate_id(present_priv->xcb_connection)), + window, + XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY| + XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY); + present_priv->special_event = xcb_register_for_special_xge(present_priv->xcb_connection, + &xcb_present_id, + eid, NULL); + error = xcb_request_check(present_priv->xcb_connection, cookie); /* performs a flush */ + if (error || !present_priv->special_event) { + ERR("FAILED to use the X PRESENT extension. Was the destination a window ?\n"); + if (present_priv->special_event) + xcb_unregister_for_special_event(present_priv->xcb_connection, present_priv->special_event); + present_priv->special_event = NULL; + present_priv->window = 0; + } + } + return (present_priv->window != 0); +} + +/* Destroy the content, except the link and the struct mem */ +static void +PRESENTDestroyPixmapContent(Display *dpy, PRESENTPixmapPriv *present_pixmap) +{ + XFreePixmap(dpy, present_pixmap->pixmap); +} + +void +PRESENTDestroy(Display *dpy, PRESENTpriv *present_priv) +{ + PRESENTPixmapPriv *current = NULL; + + pthread_mutex_lock(&present_priv->mutex_present); + + PRESENTForceReleases(present_priv); + + current = present_priv->first_present_priv; + while (current) { + PRESENTPixmapPriv *next = current->next; + PRESENTDestroyPixmapContent(dpy, current); + free(current); + current = next; + } + + PRESENTFreeXcbQueue(present_priv); + + xcb_disconnect(present_priv->xcb_connection); + xcb_disconnect(present_priv->xcb_connection_bis); + pthread_mutex_unlock(&present_priv->mutex_present); + pthread_mutex_destroy(&present_priv->mutex_present); + pthread_mutex_destroy(&present_priv->mutex_xcb_wait); + + free(present_priv); +} + +BOOL +PRESENTPixmapInit(PRESENTpriv *present_priv, Pixmap pixmap, PRESENTPixmapPriv **present_pixmap_priv) +{ + xcb_get_geometry_cookie_t cookie; + xcb_get_geometry_reply_t *reply; + + cookie = xcb_get_geometry(present_priv->xcb_connection, pixmap); + reply = xcb_get_geometry_reply(present_priv->xcb_connection, cookie, NULL); + + if (!reply) + return FALSE; + + *present_pixmap_priv = (PRESENTPixmapPriv *) calloc(1, sizeof(PRESENTPixmapPriv)); + if (!*present_pixmap_priv) { + free(reply); + return FALSE; + } + pthread_mutex_lock(&present_priv->mutex_present); + + (*present_pixmap_priv)->released = TRUE; + (*present_pixmap_priv)->pixmap = pixmap; + (*present_pixmap_priv)->present_priv = present_priv; + (*present_pixmap_priv)->next = present_priv->first_present_priv; + (*present_pixmap_priv)->width = reply->width; + (*present_pixmap_priv)->height = reply->height; + (*present_pixmap_priv)->depth = reply->depth; + free(reply); + + present_priv->last_serial_given++; + (*present_pixmap_priv)->serial = present_priv->last_serial_given; + present_priv->first_present_priv = *present_pixmap_priv; + + pthread_mutex_unlock(&present_priv->mutex_present); + return TRUE; +} + +BOOL +PRESENTTryFreePixmap(Display *dpy, PRESENTPixmapPriv *present_pixmap_priv) +{ + PRESENTpriv *present_priv = present_pixmap_priv->present_priv; + PRESENTPixmapPriv *current; + + pthread_mutex_lock(&present_priv->mutex_present); + + if (!present_pixmap_priv->released || present_pixmap_priv->present_complete_pending) { + pthread_mutex_unlock(&present_priv->mutex_present); + return FALSE; + } + + if (present_priv->first_present_priv == present_pixmap_priv) { + present_priv->first_present_priv = present_pixmap_priv->next; + goto free_priv; + } + + current = present_priv->first_present_priv; + while (current->next != present_pixmap_priv) + current = current->next; + current->next = present_pixmap_priv->next; +free_priv: + PRESENTDestroyPixmapContent(dpy, present_pixmap_priv); + free(present_pixmap_priv); + pthread_mutex_unlock(&present_priv->mutex_present); + return TRUE; +} + +BOOL +PRESENTHelperCopyFront(Display *dpy, PRESENTPixmapPriv *present_pixmap_priv) +{ + PRESENTpriv *present_priv = present_pixmap_priv->present_priv; + xcb_void_cookie_t cookie; + xcb_generic_error_t *error; + + uint32_t v = 0; + xcb_gcontext_t gc; + + pthread_mutex_lock(&present_priv->mutex_present); + + if (!present_priv->window) { + pthread_mutex_unlock(&present_priv->mutex_present); + return FALSE; + } + + xcb_create_gc(present_priv->xcb_connection, + (gc = xcb_generate_id(present_priv->xcb_connection)), + present_priv->window, + XCB_GC_GRAPHICS_EXPOSURES, + &v); + cookie = xcb_copy_area_checked(present_priv->xcb_connection, + present_priv->window, + present_pixmap_priv->pixmap, + gc, + 0, 0, 0, 0, + present_pixmap_priv->width, + present_pixmap_priv->height); + error = xcb_request_check(present_priv->xcb_connection, cookie); + xcb_free_gc(present_priv->xcb_connection, gc); + pthread_mutex_unlock(&present_priv->mutex_present); + return (error != NULL); +} + +BOOL +PRESENTPixmap(Display *dpy, XID window, + PRESENTPixmapPriv *present_pixmap_priv, D3DPRESENT_PARAMETERS *pPresentationParameters, + const RECT *pSourceRect, const RECT *pDestRect, const RGNDATA *pDirtyRegion) +{ + PRESENTpriv *present_priv = present_pixmap_priv->present_priv; + xcb_void_cookie_t cookie; + xcb_generic_error_t *error; + int64_t target_msc, presentationInterval; + xcb_xfixes_region_t valid, update; + int16_t x_off, y_off; + uint32_t options = XCB_PRESENT_OPTION_NONE; + + pthread_mutex_lock(&present_priv->mutex_present); + + if (window != present_priv->window) + PRESENTPrivChangeWindow(present_priv, window); + + if (!window) { + ERR("ERROR: Try to Present a pixmap on a NULL window\n"); + pthread_mutex_unlock(&present_priv->mutex_present); + return FALSE; + } + + PRESENTflush_events(present_priv, FALSE); + if (!present_pixmap_priv->released || present_pixmap_priv->present_complete_pending) { + ERR("FATAL ERROR: Trying to Present a pixmap not released\n"); + pthread_mutex_unlock(&present_priv->mutex_present); + return FALSE; + } + target_msc = present_priv->last_msc; + switch(pPresentationParameters->PresentationInterval) { + case D3DPRESENT_INTERVAL_DEFAULT: + case D3DPRESENT_INTERVAL_ONE: + presentationInterval = 1; + break; + case D3DPRESENT_INTERVAL_TWO: + presentationInterval = 2; + break; + case D3DPRESENT_INTERVAL_THREE: + presentationInterval = 3; + break; + case D3DPRESENT_INTERVAL_FOUR: + presentationInterval = 4; + break; + case D3DPRESENT_INTERVAL_IMMEDIATE: + default: + presentationInterval = 0; + options |= XCB_PRESENT_OPTION_ASYNC; + break; + } + target_msc += presentationInterval * (present_priv->pixmap_present_pending + 1); + + /* Note: PRESENT defines some way to do partial copy: + * presentproto: + * 'x-off' and 'y-off' define the location in the window where + * the 0,0 location of the pixmap will be presented. valid-area + * and update-area are relative to the pixmap. + */ + if (!pSourceRect && !pDestRect && !pDirtyRegion) { + valid = 0; + update = 0; + x_off = 0; + y_off = 0; + } else { + xcb_rectangle_t rect_update; + xcb_rectangle_t *rect_updates; + int i; + + rect_update.x = 0; + rect_update.y = 0; + rect_update.width = present_pixmap_priv->width; + rect_update.height = present_pixmap_priv->height; + x_off = 0; + y_off = 0; + if (pSourceRect) { + x_off = -pSourceRect->left; + y_off = -pSourceRect->top; + rect_update.x = pSourceRect->left; + rect_update.y = pSourceRect->top; + rect_update.width = pSourceRect->right - pSourceRect->left; + rect_update.height = pSourceRect->bottom - pSourceRect->top; + } + if (pDestRect) { + x_off += pDestRect->left; + y_off += pDestRect->top; + rect_update.width = pDestRect->right - pDestRect->left; + rect_update.height = pDestRect->bottom - pDestRect->top; + /* Note: the size of pDestRect and pSourceRect are supposed to be the same size + * because the driver would have done things to assure that. */ + } + valid = xcb_generate_id(present_priv->xcb_connection_bis); + update = xcb_generate_id(present_priv->xcb_connection_bis); + xcb_xfixes_create_region(present_priv->xcb_connection_bis, valid, 1, &rect_update); + if (pDirtyRegion && pDirtyRegion->rdh.nCount) { + rect_updates = (void *) calloc(pDirtyRegion->rdh.nCount, sizeof(xcb_rectangle_t)); + for (i = 0; i < pDirtyRegion->rdh.nCount; i++) + { + RECT rc; + memcpy(&rc, pDirtyRegion->Buffer + i * sizeof(RECT), sizeof(RECT)); + rect_update.x = rc.left; + rect_update.y = rc.top; + rect_update.width = rc.right - rc.left; + rect_update.height = rc.bottom - rc.top; + memcpy(rect_updates + i * sizeof(xcb_rectangle_t), &rect_update, sizeof(xcb_rectangle_t)); + } + xcb_xfixes_create_region(present_priv->xcb_connection_bis, update, pDirtyRegion->rdh.nCount, rect_updates); + free(rect_updates); + } else + xcb_xfixes_create_region(present_priv->xcb_connection_bis, update, 1, &rect_update); + } + if (pPresentationParameters->SwapEffect == D3DSWAPEFFECT_COPY) + options |= XCB_PRESENT_OPTION_COPY; + cookie = xcb_present_pixmap_checked(present_priv->xcb_connection_bis, + window, + present_pixmap_priv->pixmap, + present_pixmap_priv->serial, + valid, update, x_off, y_off, + None, None, None, options, + target_msc, 0, 0, 0, NULL); + error = xcb_request_check(present_priv->xcb_connection_bis, cookie); /* performs a flush */ + + if (update) + xcb_xfixes_destroy_region(present_priv->xcb_connection_bis, update); + if (valid) + xcb_xfixes_destroy_region(present_priv->xcb_connection_bis, valid); + + if (error) { + xcb_get_geometry_cookie_t cookie_geom; + xcb_get_geometry_reply_t *reply; + + cookie_geom = xcb_get_geometry(present_priv->xcb_connection_bis, window); + reply = xcb_get_geometry_reply(present_priv->xcb_connection_bis, cookie_geom, NULL); + + ERR("Error using PRESENT. Here some debug info\n"); + if (!reply) { + ERR("Error querying window info. Perhaps it doesn't exist anymore\n"); + pthread_mutex_unlock(&present_priv->mutex_present); + return FALSE; + } + ERR("Pixmap: width=%d, height=%d, depth=%d\n", + present_pixmap_priv->width, present_pixmap_priv->height, + present_pixmap_priv->depth); + ERR("Window: width=%d, height=%d, depth=%d, x=%d, y=%d\n", + (int) reply->width, (int) reply->height, + (int) reply->depth, (int) reply->x, (int) reply->y); + ERR("Present parameter: PresentationInterval=%d, BackBufferCount=%d, Pending presentations=%d\n", + pPresentationParameters->PresentationInterval, + pPresentationParameters->BackBufferCount, + present_priv->pixmap_present_pending + ); + if (present_pixmap_priv->depth != reply->depth) + ERR("Depths are different. PRESENT needs the pixmap and the window have same depth\n"); + free(reply); + pthread_mutex_unlock(&present_priv->mutex_present); + return FALSE; + } + present_priv->last_target = target_msc; + present_priv->pixmap_present_pending++; + present_pixmap_priv->present_complete_pending = TRUE; + present_pixmap_priv->released = FALSE; + pthread_mutex_unlock(&present_priv->mutex_present); + return TRUE; +} + +BOOL +PRESENTWaitPixmapReleased(PRESENTPixmapPriv *present_pixmap_priv) +{ + PRESENTpriv *present_priv = present_pixmap_priv->present_priv; + + pthread_mutex_lock(&present_priv->mutex_present); + + PRESENTflush_events(present_priv, FALSE); + + while (!present_pixmap_priv->released || present_pixmap_priv->present_complete_pending) { + /* Note: following if should not happen because we'll never + * use two PRESENTWaitPixmapReleased in parallels on same window. + * However it would make it work in that case */ + if (present_priv->xcb_wait) { /* we allow only one thread to dispatch events */ + pthread_mutex_lock(&present_priv->mutex_xcb_wait); + /* here the other thread got an event but hasn't treated it yet */ + pthread_mutex_unlock(&present_priv->mutex_xcb_wait); + pthread_mutex_unlock(&present_priv->mutex_present); + struct timespec duration = { 0, 10 * 1000*1000 }; + nanosleep(&duration,NULL); /* Let it treat the event */ + pthread_mutex_lock(&present_priv->mutex_present); + } else if (!PRESENTwait_events(present_priv, TRUE)) { + pthread_mutex_unlock(&present_priv->mutex_present); + return FALSE; + } + } + pthread_mutex_unlock(&present_priv->mutex_present); + return TRUE; +} diff --git a/nine-native/src/dri3.h b/nine-native/src/dri3.h new file mode 100644 index 000000000..ee2264bf5 --- /dev/null +++ b/nine-native/src/dri3.h @@ -0,0 +1,80 @@ +/* + * Copyright © 2014 Axel Davy + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XNINE_DRI3_H +#define __XNINE_DRI3_H + +#include +#include +#include +#include +#include +#include + +BOOL +DRI3CheckExtension(Display *dpy, int major, int minor); + +BOOL +PRESENTCheckExtension(Display *dpy, int major, int minor); + +BOOL +DRI3Open(Display *dpy, int screen, int *device_fd); + +BOOL +DRI3PixmapFromDmaBuf(Display *dpy, int screen, int fd, int width, int height, int stride, int depth, int bpp, Pixmap *pixmap); + +BOOL +DRI3DmaBufFromPixmap(Display *dpy, Pixmap pixmap, int *fd, int *width, int *height, int *stride, int *depth, int *bpp); + +typedef struct PRESENTPriv PRESENTpriv; +typedef struct PRESENTPixmapPriv PRESENTPixmapPriv; + +BOOL +PRESENTInit(Display *dpy, PRESENTpriv **present_priv); + +/* will clean properly and free all PRESENTPixmapPriv associated to PRESENTpriv. + * PRESENTPixmapPriv should not be freed by something else. + * If never a PRESENTPixmapPriv has to be destroyed, + * please destroy the current PRESENTpriv and create a new one. + * This will take care than all pixmaps are released */ +void +PRESENTDestroy(Display *dpy, PRESENTpriv *present_priv); + +BOOL +PRESENTPixmapInit(PRESENTpriv *present_priv, Pixmap pixmap, PRESENTPixmapPriv **present_pixmap_priv); + +BOOL +PRESENTTryFreePixmap(Display *dpy, PRESENTPixmapPriv *present_pixmap_priv); + +BOOL +PRESENTHelperCopyFront(Display *dpy, PRESENTPixmapPriv *present_pixmap_priv); + +BOOL +PRESENTPixmap(Display *dpy, XID window, + PRESENTPixmapPriv *present_pixmap_priv, D3DPRESENT_PARAMETERS *pPresentationParameters, + const RECT *pSourceRect, const RECT *pDestRect, const RGNDATA *pDirtyRegion); + +BOOL +PRESENTWaitPixmapReleased(PRESENTPixmapPriv *present_pixmap_priv); + +#endif /* __XNINE_DRI3_H */ diff --git a/nine-native/src/nine_sdl.c b/nine-native/src/nine_sdl.c new file mode 100644 index 000000000..873628935 --- /dev/null +++ b/nine-native/src/nine_sdl.c @@ -0,0 +1,1251 @@ +// ---------------------------------------- +// nine_sdl + +#include "nine_sdl.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include "dri3.h" + +// ----------- utils funcs / stubs ------------------------ +#define TRACE(...) +#define FIXME(...) fprintf(stderr, __VA_ARGS__) +#define WARN(...) fprintf(stderr, __VA_ARGS__) +#define ERR(...) fprintf(stderr, __VA_ARGS__) + +static inline BOOL IsEqualGUID(const GUID* a, const GUID* b) +{ + return memcmp(a,b,sizeof(GUID)) == 0; +} +static const char* debugstr_guid(const GUID* id) +{ + return "GUID"; +} + +static inline LONG WINAPI InterlockedIncrement( LONG volatile *dest ) +{ + return __sync_add_and_fetch(dest, 1);; +} +static inline LONG WINAPI InterlockedDecrement( LONG volatile *dest ) +{ + return __sync_sub_and_fetch(dest, 1);; +} + + +// --------------------------------------------------------------------------------------- + +static const D3DFORMAT ConvertFromSDL(Uint32 format) +{ + switch (format) + { + case SDL_PIXELFORMAT_ARGB4444: return D3DFMT_A4R4G4B4; + case SDL_PIXELFORMAT_RGB332: return D3DFMT_R3G3B2; + case SDL_PIXELFORMAT_ARGB1555: return D3DFMT_A1R5G5B5; + case SDL_PIXELFORMAT_RGB555: return D3DFMT_X1R5G5B5; + case SDL_PIXELFORMAT_RGB565: return D3DFMT_R5G6B5; + case SDL_PIXELFORMAT_RGB24: return D3DFMT_R8G8B8; + case SDL_PIXELFORMAT_RGB888: return D3DFMT_X8R8G8B8; + case SDL_PIXELFORMAT_ARGB8888: return D3DFMT_A8R8G8B8; + case SDL_PIXELFORMAT_ARGB2101010: return D3DFMT_A2R10G10B10; + default: + case SDL_PIXELFORMAT_UNKNOWN: return D3DFMT_UNKNOWN; + } +} + + +static const Uint32 ConvertToSDL(D3DFORMAT format) +{ + switch (format) + { + case D3DFMT_A4R4G4B4: return SDL_PIXELFORMAT_ARGB4444; + case D3DFMT_R3G3B2: return SDL_PIXELFORMAT_RGB332; + case D3DFMT_A1R5G5B5: return SDL_PIXELFORMAT_ARGB1555; + case D3DFMT_X1R5G5B5: return SDL_PIXELFORMAT_RGB555; + case D3DFMT_R5G6B5: return SDL_PIXELFORMAT_RGB565; + case D3DFMT_R8G8B8: return SDL_PIXELFORMAT_RGB24; + case D3DFMT_X8R8G8B8: return SDL_PIXELFORMAT_RGB888; + case D3DFMT_A8R8G8B8: return SDL_PIXELFORMAT_ARGB8888; + case D3DFMT_A2R10G10B10: return SDL_PIXELFORMAT_ARGB2101010; + default: + case D3DFMT_UNKNOWN: return SDL_PIXELFORMAT_UNKNOWN; + } +} + + +// ---- dlls/winex11.drv/d3dadapter.c ---------------------------------------------------------------- + +#include +#include +#include +#include + +const GUID IID_IDirect3D9Ex = { 0x02177241, 0x69FC, 0x400C, {0x8F, 0xF1, 0x93, 0xA4, 0x4D, 0xF6, 0x86, 0x1D}}; +const GUID IID_IDirect3D9 = { 0x81BDCBCA, 0x64D4, 0x426D, {0xAE, 0x8D, 0xAD, 0x1, 0x47, 0xF4, 0x27, 0x5C}}; +const GUID IID_ID3DPresent = { 0x77D60E80, 0xF1E6, 0x11DF, { 0x9E, 0x39, 0x95, 0x0C, 0xDF, 0xD7, 0x20, 0x85 } }; +const GUID IID_ID3DPresentGroup = { 0xB9C3016E, 0xF32A, 0x11DF, { 0x9C, 0x18, 0x92, 0xEA, 0xDE, 0xD7, 0x20, 0x85 } }; +const GUID IID_IUnknown = { 0x00000000, 0x0000, 0x0000, { 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46 } }; + + +struct DRI3Present +{ + /* COM vtable */ + void *vtable; + /* IUnknown reference count */ + LONG refs; + + D3DPRESENT_PARAMETERS params; + PRESENTpriv *present_priv; + + SDL_Window* sdl_win; + Display* x11_display; + Window x11_window; +}; + +struct D3DWindowBuffer +{ + PRESENTPixmapPriv *present_pixmap_priv; +}; + + +static ULONG WINAPI +DRI3Present_AddRef( struct DRI3Present *This ) +{ + ULONG refs = InterlockedIncrement(&This->refs); + TRACE("%p increasing refcount to %u.\n", This, refs); + return refs; +} + +static ULONG WINAPI +DRI3Present_Release( struct DRI3Present *This ) +{ + ULONG refs = InterlockedDecrement(&This->refs); + TRACE("%p decreasing refcount to %u.\n", This, refs); + if (refs == 0) { + /* dtor */ + SDL_SetWindowFullscreen(This->sdl_win, FALSE); + PRESENTDestroy(This->x11_display, This->present_priv); + free(This); + } + return refs; +} + +static HRESULT WINAPI +DRI3Present_QueryInterface( struct DRI3Present *This, + REFIID riid, + void **ppvObject ) +{ + if (!ppvObject) { return E_POINTER; } + + if (IsEqualGUID(&IID_ID3DPresent, riid) || + IsEqualGUID(&IID_IUnknown, riid)) { + *ppvObject = This; + DRI3Present_AddRef(This); + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); + *ppvObject = NULL; + + return E_NOINTERFACE; +} + +static void +DRI3Present_ChangePresentParameters( struct DRI3Present *This, + D3DPRESENT_PARAMETERS *params, + BOOL first_time); + +static HRESULT WINAPI +DRI3Present_SetPresentParameters( struct DRI3Present *This, + D3DPRESENT_PARAMETERS *pPresentationParameters, + D3DDISPLAYMODEEX *pFullscreenDisplayMode ) +{ + if (!pPresentationParameters) { + WARN("pPresentationParameters is NULL.\n"); + return D3DERR_INVALIDCALL; + } + + if (pPresentationParameters->Windowed) { + SDL_SetWindowFullscreen(This->sdl_win, FALSE); + DRI3Present_ChangePresentParameters(This, pPresentationParameters, FALSE); + + } + else { + if (!pFullscreenDisplayMode) { + WARN("pFullscreenDisplayMode is NULL.\n"); + return D3DERR_INVALIDCALL; + } + + SDL_DisplayMode target; + SDL_DisplayMode closest; + memset(&target, 0, sizeof(target)); + memset(&closest, 0, sizeof(closest)); + + // msdn: "When switching to full-screen mode, + // Direct3D will try to find a desktop format that matches the back buffer format, + // so that back buffer and front buffer formats will be identical (to eliminate the need for color conversion)." + Uint32 preferred_format_for_backbuffer = ConvertToSDL(pPresentationParameters->BackBufferFormat); + + target.w = pFullscreenDisplayMode->Height; + target.h = pFullscreenDisplayMode->Width; + target.refresh_rate = pFullscreenDisplayMode->RefreshRate; + if (preferred_format_for_backbuffer != SDL_PIXELFORMAT_UNKNOWN) + target.format = preferred_format_for_backbuffer; + else + target.format = ConvertToSDL(pFullscreenDisplayMode->Format); + + SDL_DisplayMode* mode = NULL; + if (FALSE) { + /* + * this doesn't seem to be a good idea: + * it returns different mode even when the request mode exits and works... + */ + int Adapter = 0; + mode = SDL_GetClosestDisplayMode(Adapter, &target, &closest); + if (!mode) { + WARN("Could not find requested fullscreen display mode (%dx%d %dHz, format = %d).\n", pFullscreenDisplayMode->Width, pFullscreenDisplayMode->Height, pFullscreenDisplayMode->RefreshRate, pFullscreenDisplayMode->Format); + } + } + else { + mode = ⌖ + } + + int err = SDL_SetWindowDisplayMode(This->sdl_win, mode); + if (err < 0) { + WARN("SDL_SetWindowDisplayMode returned an error: %s\n", SDL_GetError()); + return D3DERR_INVALIDCALL; + } + + err = SDL_SetWindowFullscreen(This->sdl_win, SDL_WINDOW_FULLSCREEN); + if (err < 0) { + WARN("SDL_SetWindowFullscreen returned an error: %s\n", SDL_GetError()); + return D3DERR_INVALIDCALL; + } + + DRI3Present_ChangePresentParameters(This, pPresentationParameters, FALSE); + } + return D3D_OK; +} + +static HRESULT WINAPI +DRI3Present_D3DWindowBufferFromDmaBuf( struct DRI3Present *This, + int dmaBufFd, + int width, + int height, + int stride, + int depth, + int bpp, + struct D3DWindowBuffer **out) +{ + Pixmap pixmap; + + if (!DRI3PixmapFromDmaBuf(This->x11_display, DefaultScreen(This->x11_display), + dmaBufFd, width, height, stride, depth, + bpp, &pixmap )) + return D3DERR_DRIVERINTERNALERROR; + + *out = calloc(1, sizeof(struct D3DWindowBuffer)); + PRESENTPixmapInit(This->present_priv, pixmap, &((*out)->present_pixmap_priv)); + return D3D_OK; +} + +static HRESULT WINAPI +DRI3Present_DestroyD3DWindowBuffer( struct DRI3Present *This, + struct D3DWindowBuffer *buffer ) +{ + /* the pixmap is managed by the PRESENT backend. + * But if it can delete it right away, we may have + * better performance */ + PRESENTTryFreePixmap(This->x11_display, buffer->present_pixmap_priv); + free(buffer); + return D3D_OK; +} + +static HRESULT WINAPI +DRI3Present_WaitBufferReleased( struct DRI3Present *This, + struct D3DWindowBuffer *buffer) +{ + (void) This; + PRESENTWaitPixmapReleased(buffer->present_pixmap_priv); + return D3D_OK; +} + +static HRESULT WINAPI +DRI3Present_FrontBufferCopy( struct DRI3Present *This, + struct D3DWindowBuffer *buffer ) +{ + /* TODO: use dc_rect */ + if (PRESENTHelperCopyFront(This->x11_display, buffer->present_pixmap_priv)) + return D3D_OK; + else + return D3DERR_DRIVERINTERNALERROR; +} + +static HRESULT WINAPI +DRI3Present_PresentBuffer( struct DRI3Present *This, + struct D3DWindowBuffer *buffer, + HWND hWndOverride, + const RECT *pSourceRect, + const RECT *pDestRect, + const RGNDATA *pDirtyRegion, + DWORD Flags ) +{ +/* TODO? + + if (d3d->dc_rect.top != 0 && + d3d->dc_rect.left != 0) { + if (!pDestRect) + pDestRect = (const RECT *) &(d3d->dc_rect); + else { + dest_translate.top = pDestRect->top + d3d->dc_rect.top; + dest_translate.left = pDestRect->left + d3d->dc_rect.left; + dest_translate.bottom = pDestRect->bottom + d3d->dc_rect.bottom; + dest_translate.right = pDestRect->right + d3d->dc_rect.right; + pDestRect = (const RECT *) &dest_translate; + } + } +*/ + + if (!PRESENTPixmap(This->x11_display, This->x11_window, buffer->present_pixmap_priv, + &This->params, pSourceRect, pDestRect, pDirtyRegion)) + return D3DERR_DRIVERINTERNALERROR; + + return D3D_OK; +} + +static HRESULT WINAPI +DRI3Present_GetRasterStatus( struct DRI3Present *This, + D3DRASTER_STATUS *pRasterStatus ) +{ + FIXME("(%p, %p), stub!\n", This, pRasterStatus); + return D3DERR_INVALIDCALL; +} + +static HRESULT WINAPI +DRI3Present_GetDisplayMode( struct DRI3Present *This, + D3DDISPLAYMODEEX *pMode, + D3DDISPLAYROTATION *pRotation ) +{ + int Adapter = 0; + SDL_DisplayMode mode; + int err = SDL_GetDesktopDisplayMode(Adapter, &mode); + if (err < 0) { + WARN("SDL_GetCurrentDisplayMode returned an error: %s\n", SDL_GetError()); + return D3DERR_INVALIDCALL; + } + + pMode->Width = mode.w; + pMode->Height = mode.h; + pMode->RefreshRate = mode.refresh_rate; + pMode->Format = ConvertFromSDL(mode.format); + pMode->ScanLineOrdering = D3DSCANLINEORDERING_PROGRESSIVE; + + *pRotation = D3DDISPLAYROTATION_IDENTITY; + + return D3D_OK; +} + +static HRESULT WINAPI +DRI3Present_GetPresentStats( struct DRI3Present *This, + D3DPRESENTSTATS *pStats ) +{ + FIXME("(%p, %p), stub!\n", This, pStats); + return D3DERR_INVALIDCALL; +} + +static HRESULT WINAPI +DRI3Present_GetCursorPos( struct DRI3Present *This, + POINT *pPoint ) +{ + // FIXME faked + BOOL ok; + if (!pPoint) + return D3DERR_INVALIDCALL; + ok = TRUE; + memset(pPoint,0,sizeof(POINT)); + return ok ? S_OK : D3DERR_DRIVERINTERNALERROR; +} + +static HRESULT WINAPI +DRI3Present_SetCursorPos( struct DRI3Present *This, + POINT *pPoint ) +{ + // FIXME faked + if (!pPoint) + return D3DERR_INVALIDCALL; + return S_OK; +} + +static HRESULT WINAPI +DRI3Present_SetCursor( struct DRI3Present *This, + void *pBitmap, + POINT *pHotspot, + BOOL bShow ) +{ + // FIXME faked + return D3D_OK; +} + +static HRESULT WINAPI +DRI3Present_SetGammaRamp( struct DRI3Present *This, + const D3DGAMMARAMP *pRamp, + HWND hWndOverride ) +{ + // FIXME faked + return D3D_OK; +} + +static HRESULT WINAPI +DRI3Present_GetWindowInfo( struct DRI3Present *This, + HWND hWnd, + int *width, int *height, int *depth ) +{ + int w,h; + SDL_GetWindowSize(This->sdl_win, &w, &h); + Uint32 format = SDL_GetWindowPixelFormat(This->sdl_win); + + *width = w; + *height = h; + *depth = format != SDL_PIXELFORMAT_UNKNOWN ? SDL_BITSPERPIXEL(format) : 24; + return D3D_OK; +} + +/*----------*/ + + +static ID3DPresentVtbl DRI3Present_vtable = { + (void *)DRI3Present_QueryInterface, + (void *)DRI3Present_AddRef, + (void *)DRI3Present_Release, + (void *)DRI3Present_SetPresentParameters, + (void *)DRI3Present_D3DWindowBufferFromDmaBuf, + (void *)DRI3Present_DestroyD3DWindowBuffer, + (void *)DRI3Present_WaitBufferReleased, + (void *)DRI3Present_FrontBufferCopy, + (void *)DRI3Present_PresentBuffer, + (void *)DRI3Present_GetRasterStatus, + (void *)DRI3Present_GetDisplayMode, + (void *)DRI3Present_GetPresentStats, + (void *)DRI3Present_GetCursorPos, + (void *)DRI3Present_SetCursorPos, + (void *)DRI3Present_SetCursor, + (void *)DRI3Present_SetGammaRamp, + (void *)DRI3Present_GetWindowInfo +}; + +static void +DRI3Present_ChangePresentParameters( struct DRI3Present *This, + D3DPRESENT_PARAMETERS *params, + BOOL first_time) +{ + (void) first_time; /* will be used to manage screen res if windowed mode change */ + + if (params->hDeviceWindow && params->hDeviceWindow != This->sdl_win) { + WARN("Changing hDeviceWindow not supported\n"); + } + + int w,h; + SDL_GetWindowSize(This->sdl_win, &w, &h); + params->BackBufferWidth = w; + params->BackBufferHeight = h; + + This->params = *params; +} + +static HRESULT +DRI3Present_new( SDL_Window* sdl_win, + D3DPRESENT_PARAMETERS *params, + struct DRI3Present **out ) +{ + struct DRI3Present *This; + + if (!sdl_win) { + ERR("No SDL_Window specified for presentation backend.\n"); + return D3DERR_INVALIDCALL; + } + + SDL_SysWMinfo info; + SDL_VERSION(&info.version); + SDL_bool Ok = SDL_GetWindowWMInfo(sdl_win, &info); + if (!Ok) { + ERR("Invalid SDL_Window specified for presentation backend.\n"); + return D3DERR_INVALIDCALL; + } + + This = calloc(1, sizeof(struct DRI3Present)); + if (!This) { + return E_OUTOFMEMORY; + } + + This->vtable = &DRI3Present_vtable; + This->refs = 1; + This->sdl_win = sdl_win; + This->x11_display = info.info.x11.display; + This->x11_window = info.info.x11.window; + + DRI3Present_ChangePresentParameters(This, params, TRUE); + + PRESENTInit(info.info.x11.display, &(This->present_priv)); + + *out = This; + + return D3D_OK; +} + +struct DRI3PresentGroup +{ + /* COM vtable */ + void *vtable; + /* IUnknown reference count */ + LONG refs; + + struct DRI3Present **present_backends; + unsigned npresent_backends; +}; + +static ULONG WINAPI +DRI3PresentGroup_AddRef( struct DRI3PresentGroup *This ) +{ + ULONG refs = InterlockedIncrement(&This->refs); + TRACE("%p increasing refcount to %u.\n", This, refs); + return refs; +} + +static ULONG WINAPI +DRI3PresentGroup_Release( struct DRI3PresentGroup *This ) +{ + ULONG refs = InterlockedDecrement(&This->refs); + TRACE("%p decreasing refcount to %u.\n", This, refs); + if (refs == 0) { + unsigned i; + if (This->present_backends) { + for (i = 0; i < This->npresent_backends; ++i) { + if (This->present_backends[i]) + DRI3Present_Release(This->present_backends[i]); + } + free(This->present_backends); + } + free(This); + } + return refs; +} + +static HRESULT WINAPI +DRI3PresentGroup_QueryInterface( struct DRI3PresentGroup *This, + REFIID riid, + void **ppvObject ) +{ + if (!ppvObject) { return E_POINTER; } + if (IsEqualGUID(&IID_ID3DPresentGroup, riid) || + IsEqualGUID(&IID_IUnknown, riid)) { + *ppvObject = This; + DRI3PresentGroup_AddRef(This); + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); + *ppvObject = NULL; + + return E_NOINTERFACE; +} + +static UINT WINAPI +DRI3PresentGroup_GetMultiheadCount( struct DRI3PresentGroup *This ) +{ + FIXME("(%p), stub!\n", This); + return 1; +} + +static HRESULT WINAPI +DRI3PresentGroup_GetPresent( struct DRI3PresentGroup *This, + UINT Index, + ID3DPresent **ppPresent ) +{ + if (Index >= DRI3PresentGroup_GetMultiheadCount(This)) { + ERR("Index >= MultiHeadCount\n"); + return D3DERR_INVALIDCALL; + } + DRI3Present_AddRef(This->present_backends[Index]); + *ppPresent = (ID3DPresent *)This->present_backends[Index]; + + return D3D_OK; +} + +static HRESULT WINAPI +DRI3PresentGroup_CreateAdditionalPresent( struct DRI3PresentGroup *This, + D3DPRESENT_PARAMETERS *pPresentationParameters, + ID3DPresent **ppPresent ) +{ + FIXME("(%p, %p, %p), stub!\n", This, pPresentationParameters, ppPresent); + return D3DERR_INVALIDCALL; +} + +static void WINAPI +DRI3PresentGroup_GetVersion( struct DRI3PresentGroup *This, + int *major, + int *minor) +{ + *major = 1; + *minor = 0; +} + +static ID3DPresentGroupVtbl DRI3PresentGroup_vtable = { + (void *)DRI3PresentGroup_QueryInterface, + (void *)DRI3PresentGroup_AddRef, + (void *)DRI3PresentGroup_Release, + (void *)DRI3PresentGroup_GetMultiheadCount, + (void *)DRI3PresentGroup_GetPresent, + (void *)DRI3PresentGroup_CreateAdditionalPresent, + (void *)DRI3PresentGroup_GetVersion +}; + +static HRESULT +dri3_create_present_group( SDL_Window* win, + UINT adapter, + D3DPRESENT_PARAMETERS *params, + unsigned nparams, + ID3DPresentGroup **group ) +{ + struct DRI3PresentGroup *This = calloc(1, sizeof(struct DRI3PresentGroup)); + HRESULT hr; + unsigned i; + + if (!This) { + ERR("Out of memory.\n"); + return E_OUTOFMEMORY; + } + + This->vtable = &DRI3PresentGroup_vtable; + This->refs = 1; + This->npresent_backends = nparams; + This->present_backends = calloc(This->npresent_backends, sizeof(struct DRI3Present *)); + + if (!This->present_backends) { + DRI3PresentGroup_Release(This); + ERR("Out of memory.\n"); + return E_OUTOFMEMORY; + } + + if (nparams != 1) { adapter = 0; } + for (i = 0; i < This->npresent_backends; ++i) { + /* create an ID3DPresent for it */ + hr = DRI3Present_new(win, ¶ms[i], + &This->present_backends[i]); + if (FAILED(hr)) { + DRI3PresentGroup_Release(This); + return hr; + } + } + + *group = (ID3DPresentGroup *)This; + TRACE("Returning %p\n", *group); + + return D3D_OK; +} + + + +// --------- dlls/d3d9/d3dadapter9.c ---------------------------------------------------------------- + +struct d3dadapter9 +{ + /* COM vtable */ + void *vtable; + /* IUnknown reference count */ + LONG refs; + + /* simple test, one adapter */ + ID3DAdapter9 *adapter; + + /* true if it implements IDirect3D9Ex */ + BOOL ex; +}; + +/* convenience wrapper for calls into ID3D9Adapter */ +#define ADAPTER_PROC(name, ...) \ + ID3DAdapter9_##name(This->adapter, ## __VA_ARGS__) + +static HRESULT WINAPI +d3dadapter9_CheckDeviceFormat( struct d3dadapter9 *This, + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT AdapterFormat, + DWORD Usage, + D3DRESOURCETYPE RType, + D3DFORMAT CheckFormat ); + +static ULONG WINAPI +d3dadapter9_AddRef( struct d3dadapter9 *This ) +{ + ULONG refs = InterlockedIncrement(&This->refs); + TRACE("%p increasing refcount to %u.\n", This, refs); + return refs; +} + +static ULONG WINAPI +d3dadapter9_Release( struct d3dadapter9 *This ) +{ + ULONG refs = InterlockedDecrement(&This->refs); + TRACE("%p decreasing refcount to %u.\n", This, refs); + if (refs == 0) { + /* dtor */ + if (This->adapter){ + ID3DAdapter9_Release(This->adapter); + } + + free(This); + } + return refs; +} + +static HRESULT WINAPI +d3dadapter9_QueryInterface( struct d3dadapter9 *This, + REFIID riid, + void **ppvObject ) +{ + if (!ppvObject) { return E_POINTER; } + if ((IsEqualGUID(&IID_IDirect3D9Ex, riid) && This->ex) || + IsEqualGUID(&IID_IDirect3D9, riid) || + IsEqualGUID(&IID_IUnknown, riid)) { + *ppvObject = This; + d3dadapter9_AddRef(This); + return S_OK; + } + + WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); + *ppvObject = NULL; + + return E_NOINTERFACE; +} + +static HRESULT WINAPI +d3dadapter9_RegisterSoftwareDevice( struct d3dadapter9 *This, + void *pInitializeFunction ) +{ + FIXME("(%p, %p), stub!\n", This, pInitializeFunction); + return D3DERR_INVALIDCALL; +} + +static UINT WINAPI +d3dadapter9_GetAdapterCount( struct d3dadapter9 *This ) +{ + return This ? 1 : 0; +} + +static HRESULT WINAPI +d3dadapter9_GetAdapterIdentifier( struct d3dadapter9 *This, + UINT Adapter, + DWORD Flags, + D3DADAPTER_IDENTIFIER9 *pIdentifier ) +{ + HRESULT hr; + + if (Adapter >= d3dadapter9_GetAdapterCount(This)) { return D3DERR_INVALIDCALL; } + + hr = ADAPTER_PROC(GetAdapterIdentifier, Flags, pIdentifier); + return hr; +} + +static UINT WINAPI +d3dadapter9_GetAdapterModeCount( struct d3dadapter9 *This, + UINT Adapter, + D3DFORMAT Format ) +{ + if (Adapter >= d3dadapter9_GetAdapterCount(This)) { + WARN("Adapter %u does not exist.\n", Adapter); + return 0; + } + if (FAILED(d3dadapter9_CheckDeviceFormat(This, Adapter, D3DDEVTYPE_HAL, + Format, D3DUSAGE_RENDERTARGET, + D3DRTYPE_SURFACE, Format))) { + WARN("DeviceFormat not available.\n"); + return 0; + } + + int NumMatchingModes = 0; + int NumModes = SDL_GetNumDisplayModes(Adapter); + int i; + for (i=0;i= d3dadapter9_GetAdapterCount(This)) { + WARN("Adapter %u does not exist.\n", Adapter); + return D3DERR_INVALIDCALL; + } + if (!pMode) { + WARN("pMode is NULL.\n"); + return D3DERR_INVALIDCALL; + } + + hr = d3dadapter9_CheckDeviceFormat(This, Adapter, D3DDEVTYPE_HAL, + Format, D3DUSAGE_RENDERTARGET, + D3DRTYPE_SURFACE, Format); + if (FAILED(hr)) { + TRACE("DeviceFormat not available.\n"); + return hr; + } + + int IndexMatchingModes = 0; + int NumModes = SDL_GetNumDisplayModes(Adapter); + int i; + for (i=0;iWidth = mode.w; + pMode->Height = mode.h; + pMode->RefreshRate = mode.refresh_rate; + pMode->Format = Format; + + return D3D_OK; + } + IndexMatchingModes ++; + } + + WARN("invalid mode for format %d on adapter %d: %d\n", Format, Adapter, ModeIndex); + return D3DERR_INVALIDCALL; +} + +static HRESULT WINAPI +d3dadapter9_GetAdapterDisplayMode( struct d3dadapter9 *This, + UINT Adapter, + D3DDISPLAYMODE *pMode ) +{ + if (Adapter >= d3dadapter9_GetAdapterCount(This)) { + WARN("Adapter %u does not exist.\n", Adapter); + return D3DERR_INVALIDCALL; + } + + SDL_DisplayMode mode; + int err = SDL_GetCurrentDisplayMode(Adapter, &mode); + if (err < 0) { + WARN("SDL_GetCurrentDisplayMode returned an error: %s\n", SDL_GetError()); + return D3DERR_INVALIDCALL; + } + + pMode->Width = mode.w; + pMode->Height = mode.h; + pMode->RefreshRate = mode.refresh_rate; + pMode->Format = ConvertFromSDL(mode.format); + + return D3D_OK; +} + +static HRESULT WINAPI +d3dadapter9_CheckDeviceType( struct d3dadapter9 *This, + UINT Adapter, + D3DDEVTYPE DevType, + D3DFORMAT AdapterFormat, + D3DFORMAT BackBufferFormat, + BOOL bWindowed ) +{ + if (Adapter >= d3dadapter9_GetAdapterCount(This)) { return D3DERR_INVALIDCALL; } + return ADAPTER_PROC(CheckDeviceType, + DevType, AdapterFormat, BackBufferFormat, bWindowed); +} + +static HRESULT WINAPI +d3dadapter9_CheckDeviceFormat( struct d3dadapter9 *This, + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT AdapterFormat, + DWORD Usage, + D3DRESOURCETYPE RType, + D3DFORMAT CheckFormat ) +{ + if (Adapter >= d3dadapter9_GetAdapterCount(This)) { return D3DERR_INVALIDCALL; } + return ADAPTER_PROC(CheckDeviceFormat, + DeviceType, AdapterFormat, Usage, RType, CheckFormat); +} + +static HRESULT WINAPI +d3dadapter9_CheckDeviceMultiSampleType( struct d3dadapter9 *This, + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT SurfaceFormat, + BOOL Windowed, + D3DMULTISAMPLE_TYPE MultiSampleType, + DWORD *pQualityLevels ) +{ + if (Adapter >= d3dadapter9_GetAdapterCount(This)) { return D3DERR_INVALIDCALL; } + return ADAPTER_PROC(CheckDeviceMultiSampleType, DeviceType, SurfaceFormat, + Windowed, MultiSampleType, pQualityLevels); +} + +static HRESULT WINAPI +d3dadapter9_CheckDepthStencilMatch( struct d3dadapter9 *This, + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT AdapterFormat, + D3DFORMAT RenderTargetFormat, + D3DFORMAT DepthStencilFormat ) +{ + if (Adapter >= d3dadapter9_GetAdapterCount(This)) { return D3DERR_INVALIDCALL; } + return ADAPTER_PROC(CheckDepthStencilMatch, DeviceType, AdapterFormat, + RenderTargetFormat, DepthStencilFormat); +} + +static HRESULT WINAPI +d3dadapter9_CheckDeviceFormatConversion( struct d3dadapter9 *This, + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DFORMAT SourceFormat, + D3DFORMAT TargetFormat ) +{ + if (Adapter >= d3dadapter9_GetAdapterCount(This)) { return D3DERR_INVALIDCALL; } + return ADAPTER_PROC(CheckDeviceFormatConversion, + DeviceType, SourceFormat, TargetFormat); +} + +static HRESULT WINAPI +d3dadapter9_GetDeviceCaps( struct d3dadapter9 *This, + UINT Adapter, + D3DDEVTYPE DeviceType, + D3DCAPS9 *pCaps ) +{ + HRESULT hr; + + if (Adapter >= d3dadapter9_GetAdapterCount(This)) { return D3DERR_INVALIDCALL; } + + hr = ADAPTER_PROC(GetDeviceCaps, DeviceType, pCaps); + if (FAILED(hr)) { return hr; } + + pCaps->MasterAdapterOrdinal = 0; + pCaps->AdapterOrdinalInGroup = 0; + pCaps->NumberOfAdaptersInGroup = 1; + + return hr; +} + +static HMONITOR WINAPI +d3dadapter9_GetAdapterMonitor( struct d3dadapter9 *This, + UINT Adapter ) +{ + // FIXME faked + if (Adapter >= d3dadapter9_GetAdapterCount(This)) { return (HMONITOR)0; } + return (HMONITOR)0;; +} + +static HRESULT WINAPI +d3dadapter9_CreateDeviceEx( struct d3dadapter9 *This, + UINT Adapter, + D3DDEVTYPE DeviceType, + HWND hFocusWindow, + DWORD BehaviorFlags, + D3DPRESENT_PARAMETERS *pPresentationParameters, + D3DDISPLAYMODEEX *pFullscreenDisplayMode, + IDirect3DDevice9Ex **ppReturnedDeviceInterface ); + +static HRESULT WINAPI +d3dadapter9_CreateDevice( struct d3dadapter9 *This, + UINT Adapter, + D3DDEVTYPE DeviceType, + HWND hFocusWindow, + DWORD BehaviorFlags, + D3DPRESENT_PARAMETERS *pPresentationParameters, + IDirect3DDevice9 **ppReturnedDeviceInterface ) +{ + HRESULT hr; + hr = d3dadapter9_CreateDeviceEx(This, Adapter, DeviceType, hFocusWindow, + BehaviorFlags, pPresentationParameters, + NULL, + (IDirect3DDevice9Ex **)ppReturnedDeviceInterface); + if (FAILED(hr)) + return hr; + return D3D_OK; +} + +static UINT WINAPI +d3dadapter9_GetAdapterModeCountEx( struct d3dadapter9 *This, + UINT Adapter, + const D3DDISPLAYMODEFILTER *pFilter ) +{ + return 1; +} + +static HRESULT WINAPI +d3dadapter9_EnumAdapterModesEx( struct d3dadapter9 *This, + UINT Adapter, + const D3DDISPLAYMODEFILTER *pFilter, + UINT Mode, + D3DDISPLAYMODEEX *pMode ) +{ + FIXME("(%p, %u, %p, %u, %p), stub!\n", This, Adapter, pFilter, Mode, pMode); + return D3DERR_INVALIDCALL; +} + +static HRESULT WINAPI +d3dadapter9_GetAdapterDisplayModeEx( struct d3dadapter9 *This, + UINT Adapter, + D3DDISPLAYMODEEX *pMode, + D3DDISPLAYROTATION *pRotation ) +{ + FIXME("(%p, %u, %p, %p), stub!\n", This, Adapter, pMode, pRotation); + return D3DERR_INVALIDCALL; +} + +static HRESULT WINAPI +d3dadapter9_CreateDeviceEx( struct d3dadapter9 *This, + UINT Adapter, + D3DDEVTYPE DeviceType, + HWND hFocusWindow, + DWORD BehaviorFlags, + D3DPRESENT_PARAMETERS *pPresentationParameters, + D3DDISPLAYMODEEX *pFullscreenDisplayMode, + IDirect3DDevice9Ex **ppReturnedDeviceInterface ) +{ + ID3DPresentGroup *present; + HRESULT hr; + SDL_Window* sdl_win = (SDL_Window*)hFocusWindow; + + if (Adapter >= d3dadapter9_GetAdapterCount(This)) { + WARN("Adapter %u does not exist.\n", Adapter); + return D3DERR_INVALIDCALL; + } + + if (!sdl_win) { + ERR("no SDL_Window specified\n"); + return D3DERR_INVALIDCALL; + } + + { + UINT nparams = 1; + UINT ordinal = 0; + hr = dri3_create_present_group(sdl_win, ordinal, pPresentationParameters, + nparams, &present); + } + + if (FAILED(hr)) { + WARN("Failed to create PresentGroup.\n"); + return hr; + } + + if (This->ex) { + hr = ADAPTER_PROC(CreateDeviceEx, Adapter, DeviceType, hFocusWindow, + BehaviorFlags, pPresentationParameters, + pFullscreenDisplayMode, + (IDirect3D9Ex *)This, present, + ppReturnedDeviceInterface); + } else { /* CreateDevice on non-ex */ + hr = ADAPTER_PROC(CreateDevice, Adapter, DeviceType, hFocusWindow, + BehaviorFlags, pPresentationParameters, + (IDirect3D9 *)This, present, + (IDirect3DDevice9 **)ppReturnedDeviceInterface); + } + if (FAILED(hr)) { + WARN("ADAPTER_PROC failed.\n"); + ID3DPresentGroup_Release(present); + } + + return hr; +} + +static HRESULT WINAPI +d3dadapter9_GetAdapterLUID( struct d3dadapter9 *This, + UINT Adapter, + LUID *pLUID ) +{ + FIXME("(%p, %u, %p), stub!\n", This, Adapter, pLUID); + return D3DERR_INVALIDCALL; +} + +static IDirect3D9ExVtbl d3dadapter9_vtable = { + (void *)d3dadapter9_QueryInterface, + (void *)d3dadapter9_AddRef, + (void *)d3dadapter9_Release, + (void *)d3dadapter9_RegisterSoftwareDevice, + (void *)d3dadapter9_GetAdapterCount, + (void *)d3dadapter9_GetAdapterIdentifier, + (void *)d3dadapter9_GetAdapterModeCount, + (void *)d3dadapter9_EnumAdapterModes, + (void *)d3dadapter9_GetAdapterDisplayMode, + (void *)d3dadapter9_CheckDeviceType, + (void *)d3dadapter9_CheckDeviceFormat, + (void *)d3dadapter9_CheckDeviceMultiSampleType, + (void *)d3dadapter9_CheckDepthStencilMatch, + (void *)d3dadapter9_CheckDeviceFormatConversion, + (void *)d3dadapter9_GetDeviceCaps, + (void *)d3dadapter9_GetAdapterMonitor, + (void *)d3dadapter9_CreateDevice, + (void *)d3dadapter9_GetAdapterModeCountEx, + (void *)d3dadapter9_EnumAdapterModesEx, + (void *)d3dadapter9_GetAdapterDisplayModeEx, + (void *)d3dadapter9_CreateDeviceEx, + (void *)d3dadapter9_GetAdapterLUID +}; + +HRESULT +d3dadapter9_new( BOOL ex, Display *dpy, + IDirect3D9Ex **ppOut ) +{ + static void * WINAPI (*pD3DAdapter9GetProc)(const char *) = NULL; + static BOOL StaticInitDone = FALSE; + + // load dynamic library and retrieve "D3DAdapter9GetProc" symbol. + if (!StaticInitDone) { + StaticInitDone = TRUE; + + if (!PRESENTCheckExtension(dpy, 1, 0)) { + ERR("Unable to query PRESENT.\n"); + return D3DERR_NOTAVAILABLE; + } + + if (!DRI3CheckExtension(dpy, 1, 0)) { + ERR("Unable to query DRI3.\n"); + return D3DERR_NOTAVAILABLE; + } + + + void * handle = NULL; + + const char *path = getenv("D3D_MODULE_PATH"); + if (path) { + /* extremely basic path parsing attempt */ + const char *dot = strrchr(path, '.'); + if (dot) { + if (!strcmp(dot, ".so")) { + handle = dlopen(path, RTLD_LOCAL | RTLD_NOW); + if (!handle) + ERR("Failed to load d3d9 lib '%s': %s\n", path, dlerror()); + } + } else { + char str[4096]; + snprintf(str, sizeof(str), "%s/d3dadapter9.so", path); + handle = dlopen(str, RTLD_LOCAL | RTLD_NOW); + ERR("Failed to load d3d9 lib '%s': %s\n", str, dlerror()); + } + } else { + const char *paths[] = { + "/usr/lib32/d3d/d3dadapter9.so", + "/usr/lib/d3d/d3dadapter9.so", + "/usr/lib/x86_64-linux-gnu/d3d/d3dadapter9.so", + }; + for (unsigned i = 0; !handle && i < 3; i++) + handle = dlopen(paths[i], RTLD_LOCAL | RTLD_NOW); + if (!handle) + ERR("Failed to load d3d9 lib: %s\n", dlerror()); + } + if (!handle) { + return D3DERR_NOTAVAILABLE; + } + + pD3DAdapter9GetProc = dlsym(handle, "D3DAdapter9GetProc"); + if (!pD3DAdapter9GetProc) { + ERR("Failed to load d3d9 lib symbols\n"); + return D3DERR_NOTAVAILABLE; + } + } + + int fd; + if (!DRI3Open(dpy, DefaultScreen(dpy), &fd)) { + ERR("DRI3Open failed (fd=%d)\n", fd); + return D3DERR_NOTAVAILABLE; + } + + const struct D3DAdapter9DRM *d3d9_drm = pD3DAdapter9GetProc(D3DADAPTER9DRM_NAME); + if (!d3d9_drm || d3d9_drm->major_version != D3DADAPTER9DRM_MAJOR) + { + ERR("Your display driver doesn't support native D3D9 adapters.\n"); + return D3DERR_NOTAVAILABLE; + } + + ID3DAdapter9* adapter = NULL; + HRESULT hr = d3d9_drm->create_adapter(fd, &adapter); + if (FAILED(hr)) { + ERR("Unable to create ID3DAdapter9 (fd=%d)\n", fd); + return hr; + } + + struct d3dadapter9 *This = calloc(1, sizeof(struct d3dadapter9)); + if (!This) { + ERR("Out of memory.\n"); + return E_OUTOFMEMORY; + } + + This->vtable = &d3dadapter9_vtable; + This->refs = 1; + This->ex = ex; + This->adapter = adapter; + + *ppOut = (IDirect3D9Ex *)This; + FIXME("\033[1;32m\nNative Direct3D 9 is active.\033[0m\n"); + return D3D_OK; +} + + +// -------------------------------------------------------------------- + +static IDirect3D9Ex* Direct3DCreate9Ex_SDL_common(BOOL ex, SDL_Window *win ) +{ + SDL_SysWMinfo info; + SDL_VERSION(&info.version); + SDL_bool Ok = SDL_GetWindowWMInfo(win, &info); + if (!Ok) { + ERR("SDL_GetWindowWMInfo failed."); + return NULL; + } + + IDirect3D9Ex *pD3D9Ex = NULL; + HRESULT hr = d3dadapter9_new( ex, info.info.x11.display, &pD3D9Ex ); + if (FAILED(hr)) { + return NULL; + } + + return pD3D9Ex; +} + +IDirect3D9Ex* SDL_Direct3DCreate9Ex(SDL_Window *win) +{ + return Direct3DCreate9Ex_SDL_common(TRUE, win); +} + + +IDirect3D9* Direct3DCreate9_SDL(SDL_Window *win) +{ + return (IDirect3D9*)Direct3DCreate9Ex_SDL_common(FALSE, win); +} + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ab8f64b21..0bbaca48f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,6 +6,10 @@ if (STORM_ENABLE_STEAM) add_definitions(-DSTORM_ENABLE_STEAM=1) endif() +if (STORM_MESA_NINE) + add_definitions(-DSTORM_MESA_NINE=1) +endif() + if(STORM_ENABLE_CRASH_REPORTS) add_definitions(-DSTORM_ENABLE_CRASH_REPORTS=1) endif() diff --git a/src/apps/engine/CMakeLists.txt b/src/apps/engine/CMakeLists.txt index da2f17fff..249b5939e 100644 --- a/src/apps/engine/CMakeLists.txt +++ b/src/apps/engine/CMakeLists.txt @@ -16,6 +16,10 @@ else() set(SYSTEM_DEPS "ffi") endif() +if (STORM_MESA_NINE) + set(SYSTEM_DEPS ${SYSTEM_DEPS} "nine-native") +endif() + STORM_SETUP( TARGET_NAME engine TYPE executable @@ -37,4 +41,4 @@ STORM_SETUP( # system ${SYSTEM_DEPS} -) \ No newline at end of file +) diff --git a/src/libs/ball_splash/src/ball_splash_defines.h b/src/libs/ball_splash/src/ball_splash_defines.h index 08faf3442..cc3ebdc08 100644 --- a/src/libs/ball_splash/src/ball_splash_defines.h +++ b/src/libs/ball_splash/src/ball_splash_defines.h @@ -3,7 +3,9 @@ #include "rands.h" #include +#ifndef STORM_MESA_NINE #include +#endif #include #define BALLSPLASH_TRACE_INFORMATION diff --git a/src/libs/core/src/token.h b/src/libs/core/src/token.h index b001fe617..6cdef33f8 100644 --- a/src/libs/core/src/token.h +++ b/src/libs/core/src/token.h @@ -1,6 +1,8 @@ #pragma once +#ifndef STORM_MESA_NINE #include +#endif #include #include diff --git a/src/libs/input/src/sdl_input.cpp b/src/libs/input/src/sdl_input.cpp index cfd677d86..83327c1f5 100644 --- a/src/libs/input/src/sdl_input.cpp +++ b/src/libs/input/src/sdl_input.cpp @@ -3,7 +3,9 @@ #include #include #include +#ifndef STORM_MESA_NINE #include +#endif #include namespace storm @@ -293,7 +295,7 @@ SDLInput::SDLInput() keyStates_ = SDL_GetKeyboardState(nullptr); #ifndef _WIN32 // since SDL 2.0.18 breaks WINAPI mouse api - SDL_SetRelativeMouseMode(SDL_TRUE); + //SDL_SetRelativeMouseMode(SDL_TRUE); #endif SDL_AddEventWatch(&SDLEventHandler, this); OpenController(); diff --git a/src/libs/math/CMakeLists.txt b/src/libs/math/CMakeLists.txt index 6d7a8f636..be0605c1a 100644 --- a/src/libs/math/CMakeLists.txt +++ b/src/libs/math/CMakeLists.txt @@ -1,4 +1,4 @@ STORM_SETUP( TARGET_NAME math TYPE library -) \ No newline at end of file +) diff --git a/src/libs/renderer/CMakeLists.txt b/src/libs/renderer/CMakeLists.txt index 3a16f7055..9f932b0d2 100644 --- a/src/libs/renderer/CMakeLists.txt +++ b/src/libs/renderer/CMakeLists.txt @@ -1,5 +1,7 @@ if (WIN32) set(SYSTEM_DEPS "legacy_stdio_definitions") +elseif (STORM_MESA_NINE) +set(SYSTEM_DEPS "") else() set(SYSTEM_DEPS "${DXVK_NATIVE_D3D9_LIB}") endif() @@ -8,4 +10,4 @@ STORM_SETUP( TARGET_NAME renderer TYPE storm_module DEPENDENCIES core directx util ${SYSTEM_DEPS} -) \ No newline at end of file +) diff --git a/src/libs/renderer/include/dx9render.h b/src/libs/renderer/include/dx9render.h index 656f370ca..2e9d2567a 100644 --- a/src/libs/renderer/include/dx9render.h +++ b/src/libs/renderer/include/dx9render.h @@ -176,7 +176,7 @@ class VDX9RENDER : public SERVICE ~VDX9RENDER() override{}; // DX9Render: Init/Release - virtual bool InitDevice(bool windowed, HWND hwnd, int32_t width, int32_t height) = 0; + virtual bool InitDevice(bool windowed, void *hwnd, int32_t width, int32_t height) = 0; virtual bool ReleaseDevice() = 0; // DX9Render: Animation diff --git a/src/libs/renderer/src/s_device.cpp b/src/libs/renderer/src/s_device.cpp index 778312fa0..ee15173e5 100644 --- a/src/libs/renderer/src/s_device.cpp +++ b/src/libs/renderer/src/s_device.cpp @@ -13,6 +13,9 @@ #include #include +#ifdef STORM_MESA_NINE +#include "nine_sdl.h" +#endif #include @@ -556,7 +559,7 @@ bool DX9RENDER::Init() videoAdapterIndex = ini->GetInt(nullptr, "adapter", std::numeric_limits::max()); // stencil_format = D3DFMT_D24S8; - if (!InitDevice(bWindow, static_cast(core.GetWindow()->OSHandle()), screen_size.x, screen_size.y)) + if (!InitDevice(bWindow, core.GetWindow()->OSHandle(), screen_size.x, screen_size.y)) return false; #ifdef _WIN32 // Effects @@ -691,7 +694,7 @@ DX9RENDER::~DX9RENDER() ReleaseDevice(); } -bool DX9RENDER::InitDevice(bool windowed, HWND _hwnd, int32_t width, int32_t height) +bool DX9RENDER::InitDevice(bool windowed, void *_hwnd, int32_t width, int32_t height) { // GUARD(DX9RENDER::InitDevice) @@ -702,9 +705,13 @@ bool DX9RENDER::InitDevice(bool windowed, HWND _hwnd, int32_t width, int32_t hei screen_size.y = height; bWindow = windowed; - hwnd = _hwnd; + hwnd = static_cast(_hwnd); core.Trace("Initializing DirectX 9"); +#ifdef STORM_MESA_NINE + d3d = Direct3DCreate9_SDL(static_cast(_hwnd)); +#else d3d = Direct3DCreate9(D3D_SDK_VERSION); +#endif if (d3d == nullptr) { // MessageBox(hwnd, "Direct3DCreate9 error", "InitDevice::Direct3DCreate9", MB_OK); diff --git a/src/libs/renderer/src/s_device.h b/src/libs/renderer/src/s_device.h index 177559dc5..b4e202b6d 100644 --- a/src/libs/renderer/src/s_device.h +++ b/src/libs/renderer/src/s_device.h @@ -105,7 +105,7 @@ class DX9RENDER : public VDX9RENDER ~DX9RENDER() override; // DX9Render: Init/Release - bool InitDevice(bool windowed, HWND hwnd, int32_t width, int32_t height) override; + bool InitDevice(bool windowed, void *hwnd, int32_t width, int32_t height) override; bool ReleaseDevice() override; // DX9Render: Animation diff --git a/src/libs/renderer/src/storm_d3dx9.cpp b/src/libs/renderer/src/storm_d3dx9.cpp index 5ed9bdb54..df57f646e 100644 --- a/src/libs/renderer/src/storm_d3dx9.cpp +++ b/src/libs/renderer/src/storm_d3dx9.cpp @@ -8,6 +8,11 @@ #define WARN(...) fprintf(stdout, __VA_ARGS__) +#ifdef STORM_MESA_NINE // IMPROVE Nine Native, taken from dxvk-native/include/native/windows/windows_base.h +typedef int32_t HRESULT; +#define E_NOTIMPL ((HRESULT)0x80004001) +#endif + ///////////////////////// Parts from WINE source code for d3dx9 licensed under GPLv2 ///////////////////////// #define D3DX_FILTER_NONE 0x00000001 diff --git a/src/libs/renderer/src/technique.cpp b/src/libs/renderer/src/technique.cpp index 9ff0b9db6..4a20b95f9 100644 --- a/src/libs/renderer/src/technique.cpp +++ b/src/libs/renderer/src/technique.cpp @@ -10,6 +10,16 @@ #include #include +#ifdef STORM_MESA_NINE // IMPROVE Nine Native + taken from dxvk-native/include/native/directx/d3d8types.h +#define D3DWRAP_U 1 +#define D3DWRAP_V 2 +#define D3DWRAP_W 4 +#define D3DWRAPCOORD_0 1 +#define D3DWRAPCOORD_1 2 +#define D3DWRAPCOORD_2 4 +#define D3DWRAPCOORD_3 8 +#endif + #define USE_FX // Will load techniques from fx files #ifdef USE_FX diff --git a/src/libs/sink_effect/src/sink_splash_defines.h b/src/libs/sink_effect/src/sink_splash_defines.h index b0054b6b1..d71d15f0b 100644 --- a/src/libs/sink_effect/src/sink_splash_defines.h +++ b/src/libs/sink_effect/src/sink_splash_defines.h @@ -1,6 +1,8 @@ #pragma once +#ifndef STORM_MESA_NINE #include "windows.h" +#endif #include "d3d9types.h" #define SINK_SPLASH_TRACE_INFORMATION From ccf74625536c363e13ae340abd445435b2004757 Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Fri, 29 Apr 2022 23:07:36 +0300 Subject: [PATCH 03/27] [arm] add sse2neon.h from https://github.com/DLTcollab/sse2neon/blob/6a56e5a3ffa0d0848e4e8e35752434b2ec9e3219/sse2neon.h --- src/libs/math/include/sse2neon.h | 8801 ++++++++++++++++++++++++++++++ 1 file changed, 8801 insertions(+) create mode 100644 src/libs/math/include/sse2neon.h diff --git a/src/libs/math/include/sse2neon.h b/src/libs/math/include/sse2neon.h new file mode 100644 index 000000000..490c0a45a --- /dev/null +++ b/src/libs/math/include/sse2neon.h @@ -0,0 +1,8801 @@ +#ifndef SSE2NEON_H +#define SSE2NEON_H + +// This header file provides a simple API translation layer +// between SSE intrinsics to their corresponding Arm/Aarch64 NEON versions +// +// This header file does not yet translate all of the SSE intrinsics. +// +// Contributors to this work are: +// John W. Ratcliff +// Brandon Rowlett +// Ken Fast +// Eric van Beurden +// Alexander Potylitsin +// Hasindu Gamaarachchi +// Jim Huang +// Mark Cheng +// Malcolm James MacLeod +// Devin Hussey (easyaspi314) +// Sebastian Pop +// Developer Ecosystem Engineering +// Danila Kutenin +// François Turban (JishinMaster) +// Pei-Hsuan Hung +// Yang-Hao Yuan +// Syoyo Fujita +// Brecht Van Lommel + +/* + * sse2neon is freely redistributable under the MIT License. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* Tunable configurations */ + +/* Enable precise implementation of math operations + * This would slow down the computation a bit, but gives consistent result with + * x86 SSE. (e.g. would solve a hole or NaN pixel in the rendering result) + */ +/* _mm_min|max_ps|ss|pd|sd */ +#ifndef SSE2NEON_PRECISE_MINMAX +#define SSE2NEON_PRECISE_MINMAX (0) +#endif +/* _mm_rcp_ps and _mm_div_ps */ +#ifndef SSE2NEON_PRECISE_DIV +#define SSE2NEON_PRECISE_DIV (0) +#endif +/* _mm_sqrt_ps and _mm_rsqrt_ps */ +#ifndef SSE2NEON_PRECISE_SQRT +#define SSE2NEON_PRECISE_SQRT (0) +#endif +/* _mm_dp_pd */ +#ifndef SSE2NEON_PRECISE_DP +#define SSE2NEON_PRECISE_DP (0) +#endif + +/* compiler specific definitions */ +#if defined(__GNUC__) || defined(__clang__) +#pragma push_macro("FORCE_INLINE") +#pragma push_macro("ALIGN_STRUCT") +#define FORCE_INLINE static inline __attribute__((always_inline)) +#define ALIGN_STRUCT(x) __attribute__((aligned(x))) +#define _sse2neon_likely(x) __builtin_expect(!!(x), 1) +#define _sse2neon_unlikely(x) __builtin_expect(!!(x), 0) +#else /* non-GNU / non-clang compilers */ +#warning "Macro name collisions may happen with unsupported compiler." +#ifndef FORCE_INLINE +#define FORCE_INLINE static inline +#endif +#ifndef ALIGN_STRUCT +#define ALIGN_STRUCT(x) __declspec(align(x)) +#endif +#define _sse2neon_likely(x) (x) +#define _sse2neon_unlikely(x) (x) +#endif + +/* C language does not allow initializing a variable with a function call. */ +#ifdef __cplusplus +#define _sse2neon_const static const +#else +#define _sse2neon_const const +#endif + +#include +#include + +/* Architecture-specific build options */ +/* FIXME: #pragma GCC push_options is only available on GCC */ +#if defined(__GNUC__) +#if defined(__arm__) && __ARM_ARCH == 7 +/* According to ARM C Language Extensions Architecture specification, + * __ARM_NEON is defined to a value indicating the Advanced SIMD (NEON) + * architecture supported. + */ +#if !defined(__ARM_NEON) || !defined(__ARM_NEON__) +#error "You must enable NEON instructions (e.g. -mfpu=neon) to use SSE2NEON." +#endif +#if !defined(__clang__) +#pragma GCC push_options +#pragma GCC target("fpu=neon") +#endif +#elif defined(__aarch64__) +#if !defined(__clang__) +#pragma GCC push_options +#pragma GCC target("+simd") +#endif +#elif __ARM_ARCH == 8 +#if !defined(__ARM_NEON) || !defined(__ARM_NEON__) +#error \ + "You must enable NEON instructions (e.g. -mfpu=neon-fp-armv8) to use SSE2NEON." +#endif +#if !defined(__clang__) +#pragma GCC push_options +#endif +#else +#error "Unsupported target. Must be either ARMv7-A+NEON or ARMv8-A." +#endif +#endif + +#include +#if !defined(__aarch64__) && (__ARM_ARCH == 8) +#if defined __has_include && __has_include() +#include +#endif +#endif + +/* Rounding functions require either Aarch64 instructions or libm failback */ +#if !defined(__aarch64__) +#include +#endif + +/* "__has_builtin" can be used to query support for built-in functions + * provided by gcc/clang and other compilers that support it. + */ +#ifndef __has_builtin /* GCC prior to 10 or non-clang compilers */ +/* Compatibility with gcc <= 9 */ +#if defined(__GNUC__) && (__GNUC__ <= 9) +#define __has_builtin(x) HAS##x +#define HAS__builtin_popcount 1 +#define HAS__builtin_popcountll 1 +#else +#define __has_builtin(x) 0 +#endif +#endif + +/** + * MACRO for shuffle parameter for _mm_shuffle_ps(). + * Argument fp3 is a digit[0123] that represents the fp from argument "b" + * of mm_shuffle_ps that will be placed in fp3 of result. fp2 is the same + * for fp2 in result. fp1 is a digit[0123] that represents the fp from + * argument "a" of mm_shuffle_ps that will be places in fp1 of result. + * fp0 is the same for fp0 of result. + */ +#define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \ + (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) + +/* Rounding mode macros. */ +#define _MM_FROUND_TO_NEAREST_INT 0x00 +#define _MM_FROUND_TO_NEG_INF 0x01 +#define _MM_FROUND_TO_POS_INF 0x02 +#define _MM_FROUND_TO_ZERO 0x03 +#define _MM_FROUND_CUR_DIRECTION 0x04 +#define _MM_FROUND_NO_EXC 0x08 +#define _MM_FROUND_RAISE_EXC 0x00 +#define _MM_FROUND_NINT (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_FLOOR (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_CEIL (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_TRUNC (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) +#define _MM_ROUND_NEAREST 0x0000 +#define _MM_ROUND_DOWN 0x2000 +#define _MM_ROUND_UP 0x4000 +#define _MM_ROUND_TOWARD_ZERO 0x6000 +/* Flush zero mode macros. */ +#define _MM_FLUSH_ZERO_MASK 0x8000 +#define _MM_FLUSH_ZERO_ON 0x8000 +#define _MM_FLUSH_ZERO_OFF 0x0000 +/* Denormals are zeros mode macros. */ +#define _MM_DENORMALS_ZERO_MASK 0x0040 +#define _MM_DENORMALS_ZERO_ON 0x0040 +#define _MM_DENORMALS_ZERO_OFF 0x0000 + +/* indicate immediate constant argument in a given range */ +#define __constrange(a, b) const + +/* A few intrinsics accept traditional data types like ints or floats, but + * most operate on data types that are specific to SSE. + * If a vector type ends in d, it contains doubles, and if it does not have + * a suffix, it contains floats. An integer vector type can contain any type + * of integer, from chars to shorts to unsigned long longs. + */ +typedef int64x1_t __m64; +typedef float32x4_t __m128; /* 128-bit vector containing 4 floats */ +// On ARM 32-bit architecture, the float64x2_t is not supported. +// The data type __m128d should be represented in a different way for related +// intrinsic conversion. +#if defined(__aarch64__) +typedef float64x2_t __m128d; /* 128-bit vector containing 2 doubles */ +#else +typedef float32x4_t __m128d; +#endif +typedef int64x2_t __m128i; /* 128-bit vector containing integers */ + +// __int64 is defined in the Intrinsics Guide which maps to different datatype +// in different data model +#if !(defined(_WIN32) || defined(_WIN64) || defined(__int64)) +#if (defined(__x86_64__) || defined(__i386__)) +#define __int64 long long +#else +#define __int64 int64_t +#endif +#endif + +/* type-safe casting between types */ + +#define vreinterpretq_m128_f16(x) vreinterpretq_f32_f16(x) +#define vreinterpretq_m128_f32(x) (x) +#define vreinterpretq_m128_f64(x) vreinterpretq_f32_f64(x) + +#define vreinterpretq_m128_u8(x) vreinterpretq_f32_u8(x) +#define vreinterpretq_m128_u16(x) vreinterpretq_f32_u16(x) +#define vreinterpretq_m128_u32(x) vreinterpretq_f32_u32(x) +#define vreinterpretq_m128_u64(x) vreinterpretq_f32_u64(x) + +#define vreinterpretq_m128_s8(x) vreinterpretq_f32_s8(x) +#define vreinterpretq_m128_s16(x) vreinterpretq_f32_s16(x) +#define vreinterpretq_m128_s32(x) vreinterpretq_f32_s32(x) +#define vreinterpretq_m128_s64(x) vreinterpretq_f32_s64(x) + +#define vreinterpretq_f16_m128(x) vreinterpretq_f16_f32(x) +#define vreinterpretq_f32_m128(x) (x) +#define vreinterpretq_f64_m128(x) vreinterpretq_f64_f32(x) + +#define vreinterpretq_u8_m128(x) vreinterpretq_u8_f32(x) +#define vreinterpretq_u16_m128(x) vreinterpretq_u16_f32(x) +#define vreinterpretq_u32_m128(x) vreinterpretq_u32_f32(x) +#define vreinterpretq_u64_m128(x) vreinterpretq_u64_f32(x) + +#define vreinterpretq_s8_m128(x) vreinterpretq_s8_f32(x) +#define vreinterpretq_s16_m128(x) vreinterpretq_s16_f32(x) +#define vreinterpretq_s32_m128(x) vreinterpretq_s32_f32(x) +#define vreinterpretq_s64_m128(x) vreinterpretq_s64_f32(x) + +#define vreinterpretq_m128i_s8(x) vreinterpretq_s64_s8(x) +#define vreinterpretq_m128i_s16(x) vreinterpretq_s64_s16(x) +#define vreinterpretq_m128i_s32(x) vreinterpretq_s64_s32(x) +#define vreinterpretq_m128i_s64(x) (x) + +#define vreinterpretq_m128i_u8(x) vreinterpretq_s64_u8(x) +#define vreinterpretq_m128i_u16(x) vreinterpretq_s64_u16(x) +#define vreinterpretq_m128i_u32(x) vreinterpretq_s64_u32(x) +#define vreinterpretq_m128i_u64(x) vreinterpretq_s64_u64(x) + +#define vreinterpretq_f32_m128i(x) vreinterpretq_f32_s64(x) +#define vreinterpretq_f64_m128i(x) vreinterpretq_f64_s64(x) + +#define vreinterpretq_s8_m128i(x) vreinterpretq_s8_s64(x) +#define vreinterpretq_s16_m128i(x) vreinterpretq_s16_s64(x) +#define vreinterpretq_s32_m128i(x) vreinterpretq_s32_s64(x) +#define vreinterpretq_s64_m128i(x) (x) + +#define vreinterpretq_u8_m128i(x) vreinterpretq_u8_s64(x) +#define vreinterpretq_u16_m128i(x) vreinterpretq_u16_s64(x) +#define vreinterpretq_u32_m128i(x) vreinterpretq_u32_s64(x) +#define vreinterpretq_u64_m128i(x) vreinterpretq_u64_s64(x) + +#define vreinterpret_m64_s8(x) vreinterpret_s64_s8(x) +#define vreinterpret_m64_s16(x) vreinterpret_s64_s16(x) +#define vreinterpret_m64_s32(x) vreinterpret_s64_s32(x) +#define vreinterpret_m64_s64(x) (x) + +#define vreinterpret_m64_u8(x) vreinterpret_s64_u8(x) +#define vreinterpret_m64_u16(x) vreinterpret_s64_u16(x) +#define vreinterpret_m64_u32(x) vreinterpret_s64_u32(x) +#define vreinterpret_m64_u64(x) vreinterpret_s64_u64(x) + +#define vreinterpret_m64_f16(x) vreinterpret_s64_f16(x) +#define vreinterpret_m64_f32(x) vreinterpret_s64_f32(x) +#define vreinterpret_m64_f64(x) vreinterpret_s64_f64(x) + +#define vreinterpret_u8_m64(x) vreinterpret_u8_s64(x) +#define vreinterpret_u16_m64(x) vreinterpret_u16_s64(x) +#define vreinterpret_u32_m64(x) vreinterpret_u32_s64(x) +#define vreinterpret_u64_m64(x) vreinterpret_u64_s64(x) + +#define vreinterpret_s8_m64(x) vreinterpret_s8_s64(x) +#define vreinterpret_s16_m64(x) vreinterpret_s16_s64(x) +#define vreinterpret_s32_m64(x) vreinterpret_s32_s64(x) +#define vreinterpret_s64_m64(x) (x) + +#define vreinterpret_f32_m64(x) vreinterpret_f32_s64(x) + +#if defined(__aarch64__) +#define vreinterpretq_m128d_s32(x) vreinterpretq_f64_s32(x) +#define vreinterpretq_m128d_s64(x) vreinterpretq_f64_s64(x) + +#define vreinterpretq_m128d_u64(x) vreinterpretq_f64_u64(x) + +#define vreinterpretq_m128d_f32(x) vreinterpretq_f64_f32(x) +#define vreinterpretq_m128d_f64(x) (x) + +#define vreinterpretq_s64_m128d(x) vreinterpretq_s64_f64(x) + +#define vreinterpretq_u32_m128d(x) vreinterpretq_u32_f64(x) +#define vreinterpretq_u64_m128d(x) vreinterpretq_u64_f64(x) + +#define vreinterpretq_f64_m128d(x) (x) +#define vreinterpretq_f32_m128d(x) vreinterpretq_f32_f64(x) +#else +#define vreinterpretq_m128d_s32(x) vreinterpretq_f32_s32(x) +#define vreinterpretq_m128d_s64(x) vreinterpretq_f32_s64(x) + +#define vreinterpretq_m128d_u32(x) vreinterpretq_f32_u32(x) +#define vreinterpretq_m128d_u64(x) vreinterpretq_f32_u64(x) + +#define vreinterpretq_m128d_f32(x) (x) + +#define vreinterpretq_s64_m128d(x) vreinterpretq_s64_f32(x) + +#define vreinterpretq_u32_m128d(x) vreinterpretq_u32_f32(x) +#define vreinterpretq_u64_m128d(x) vreinterpretq_u64_f32(x) + +#define vreinterpretq_f32_m128d(x) (x) +#endif + +// A struct is defined in this header file called 'SIMDVec' which can be used +// by applications which attempt to access the contents of an __m128 struct +// directly. It is important to note that accessing the __m128 struct directly +// is bad coding practice by Microsoft: @see: +// https://docs.microsoft.com/en-us/cpp/cpp/m128 +// +// However, some legacy source code may try to access the contents of an __m128 +// struct directly so the developer can use the SIMDVec as an alias for it. Any +// casting must be done manually by the developer, as you cannot cast or +// otherwise alias the base NEON data type for intrinsic operations. +// +// union intended to allow direct access to an __m128 variable using the names +// that the MSVC compiler provides. This union should really only be used when +// trying to access the members of the vector as integer values. GCC/clang +// allow native access to the float members through a simple array access +// operator (in C since 4.6, in C++ since 4.8). +// +// Ideally direct accesses to SIMD vectors should not be used since it can cause +// a performance hit. If it really is needed however, the original __m128 +// variable can be aliased with a pointer to this union and used to access +// individual components. The use of this union should be hidden behind a macro +// that is used throughout the codebase to access the members instead of always +// declaring this type of variable. +typedef union ALIGN_STRUCT(16) SIMDVec { + float m128_f32[4]; // as floats - DON'T USE. Added for convenience. + int8_t m128_i8[16]; // as signed 8-bit integers. + int16_t m128_i16[8]; // as signed 16-bit integers. + int32_t m128_i32[4]; // as signed 32-bit integers. + int64_t m128_i64[2]; // as signed 64-bit integers. + uint8_t m128_u8[16]; // as unsigned 8-bit integers. + uint16_t m128_u16[8]; // as unsigned 16-bit integers. + uint32_t m128_u32[4]; // as unsigned 32-bit integers. + uint64_t m128_u64[2]; // as unsigned 64-bit integers. +} SIMDVec; + +// casting using SIMDVec +#define vreinterpretq_nth_u64_m128i(x, n) (((SIMDVec *) &x)->m128_u64[n]) +#define vreinterpretq_nth_u32_m128i(x, n) (((SIMDVec *) &x)->m128_u32[n]) +#define vreinterpretq_nth_u8_m128i(x, n) (((SIMDVec *) &x)->m128_u8[n]) + +/* SSE macros */ +#define _MM_GET_FLUSH_ZERO_MODE _sse2neon_mm_get_flush_zero_mode +#define _MM_SET_FLUSH_ZERO_MODE _sse2neon_mm_set_flush_zero_mode +#define _MM_GET_DENORMALS_ZERO_MODE _sse2neon_mm_get_denormals_zero_mode +#define _MM_SET_DENORMALS_ZERO_MODE _sse2neon_mm_set_denormals_zero_mode + +// Function declaration +// SSE +FORCE_INLINE unsigned int _MM_GET_ROUNDING_MODE(); +FORCE_INLINE __m128 _mm_move_ss(__m128, __m128); +FORCE_INLINE __m128 _mm_or_ps(__m128, __m128); +FORCE_INLINE __m128 _mm_set_ps1(float); +FORCE_INLINE __m128 _mm_setzero_ps(void); +// SSE2 +FORCE_INLINE __m128i _mm_and_si128(__m128i, __m128i); +FORCE_INLINE __m128i _mm_castps_si128(__m128); +FORCE_INLINE __m128i _mm_cmpeq_epi32(__m128i, __m128i); +FORCE_INLINE __m128i _mm_cvtps_epi32(__m128); +FORCE_INLINE __m128d _mm_move_sd(__m128d, __m128d); +FORCE_INLINE __m128i _mm_or_si128(__m128i, __m128i); +FORCE_INLINE __m128i _mm_set_epi32(int, int, int, int); +FORCE_INLINE __m128i _mm_set_epi64x(int64_t, int64_t); +FORCE_INLINE __m128d _mm_set_pd(double, double); +FORCE_INLINE __m128i _mm_set1_epi32(int); +FORCE_INLINE __m128i _mm_setzero_si128(); +// SSE4.1 +FORCE_INLINE __m128d _mm_ceil_pd(__m128d); +FORCE_INLINE __m128 _mm_ceil_ps(__m128); +FORCE_INLINE __m128d _mm_floor_pd(__m128d); +FORCE_INLINE __m128 _mm_floor_ps(__m128); +FORCE_INLINE __m128d _mm_round_pd(__m128d, int); +FORCE_INLINE __m128 _mm_round_ps(__m128, int); +// SSE4.2 +FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t, uint8_t); + +/* Backwards compatibility for compilers with lack of specific type support */ + +// Older gcc does not define vld1q_u8_x4 type +#if defined(__GNUC__) && !defined(__clang__) && \ + ((__GNUC__ <= 10 && defined(__arm__)) || \ + (__GNUC__ == 10 && __GNUC_MINOR__ < 3 && defined(__aarch64__)) || \ + (__GNUC__ <= 9 && defined(__aarch64__))) +FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p) +{ + uint8x16x4_t ret; + ret.val[0] = vld1q_u8(p + 0); + ret.val[1] = vld1q_u8(p + 16); + ret.val[2] = vld1q_u8(p + 32); + ret.val[3] = vld1q_u8(p + 48); + return ret; +} +#else +// Wraps vld1q_u8_x4 +FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p) +{ + return vld1q_u8_x4(p); +} +#endif + +/* Function Naming Conventions + * The naming convention of SSE intrinsics is straightforward. A generic SSE + * intrinsic function is given as follows: + * _mm__ + * + * The parts of this format are given as follows: + * 1. describes the operation performed by the intrinsic + * 2. identifies the data type of the function's primary arguments + * + * This last part, , is a little complicated. It identifies the + * content of the input values, and can be set to any of the following values: + * + ps - vectors contain floats (ps stands for packed single-precision) + * + pd - vectors cantain doubles (pd stands for packed double-precision) + * + epi8/epi16/epi32/epi64 - vectors contain 8-bit/16-bit/32-bit/64-bit + * signed integers + * + epu8/epu16/epu32/epu64 - vectors contain 8-bit/16-bit/32-bit/64-bit + * unsigned integers + * + si128 - unspecified 128-bit vector or 256-bit vector + * + m128/m128i/m128d - identifies input vector types when they are different + * than the type of the returned vector + * + * For example, _mm_setzero_ps. The _mm implies that the function returns + * a 128-bit vector. The _ps at the end implies that the argument vectors + * contain floats. + * + * A complete example: Byte Shuffle - pshufb (_mm_shuffle_epi8) + * // Set packed 16-bit integers. 128 bits, 8 short, per 16 bits + * __m128i v_in = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); + * // Set packed 8-bit integers + * // 128 bits, 16 chars, per 8 bits + * __m128i v_perm = _mm_setr_epi8(1, 0, 2, 3, 8, 9, 10, 11, + * 4, 5, 12, 13, 6, 7, 14, 15); + * // Shuffle packed 8-bit integers + * __m128i v_out = _mm_shuffle_epi8(v_in, v_perm); // pshufb + * + * Data (Number, Binary, Byte Index): + +------+------+-------------+------+------+-------------+ + | 1 | 2 | 3 | 4 | Number + +------+------+------+------+------+------+------+------+ + | 0000 | 0001 | 0000 | 0010 | 0000 | 0011 | 0000 | 0100 | Binary + +------+------+------+------+------+------+------+------+ + | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | Index + +------+------+------+------+------+------+------+------+ + + +------+------+------+------+------+------+------+------+ + | 5 | 6 | 7 | 8 | Number + +------+------+------+------+------+------+------+------+ + | 0000 | 0101 | 0000 | 0110 | 0000 | 0111 | 0000 | 1000 | Binary + +------+------+------+------+------+------+------+------+ + | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | Index + +------+------+------+------+------+------+------+------+ + * Index (Byte Index): + +------+------+------+------+------+------+------+------+ + | 1 | 0 | 2 | 3 | 8 | 9 | 10 | 11 | + +------+------+------+------+------+------+------+------+ + + +------+------+------+------+------+------+------+------+ + | 4 | 5 | 12 | 13 | 6 | 7 | 14 | 15 | + +------+------+------+------+------+------+------+------+ + * Result: + +------+------+------+------+------+------+------+------+ + | 1 | 0 | 2 | 3 | 8 | 9 | 10 | 11 | Index + +------+------+------+------+------+------+------+------+ + | 0001 | 0000 | 0000 | 0010 | 0000 | 0101 | 0000 | 0110 | Binary + +------+------+------+------+------+------+------+------+ + | 256 | 2 | 5 | 6 | Number + +------+------+------+------+------+------+------+------+ + + +------+------+------+------+------+------+------+------+ + | 4 | 5 | 12 | 13 | 6 | 7 | 14 | 15 | Index + +------+------+------+------+------+------+------+------+ + | 0000 | 0011 | 0000 | 0111 | 0000 | 0100 | 0000 | 1000 | Binary + +------+------+------+------+------+------+------+------+ + | 3 | 7 | 4 | 8 | Number + +------+------+------+------+------+------+-------------+ + */ + +/* Constants for use with _mm_prefetch. */ +enum _mm_hint { + _MM_HINT_NTA = 0, /* load data to L1 and L2 cache, mark it as NTA */ + _MM_HINT_T0 = 1, /* load data to L1 and L2 cache */ + _MM_HINT_T1 = 2, /* load data to L2 cache only */ + _MM_HINT_T2 = 3, /* load data to L2 cache only, mark it as NTA */ + _MM_HINT_ENTA = 4, /* exclusive version of _MM_HINT_NTA */ + _MM_HINT_ET0 = 5, /* exclusive version of _MM_HINT_T0 */ + _MM_HINT_ET1 = 6, /* exclusive version of _MM_HINT_T1 */ + _MM_HINT_ET2 = 7 /* exclusive version of _MM_HINT_T2 */ +}; + +// The bit field mapping to the FPCR(floating-point control register) +typedef struct { + uint16_t res0; + uint8_t res1 : 6; + uint8_t bit22 : 1; + uint8_t bit23 : 1; + uint8_t bit24 : 1; + uint8_t res2 : 7; +#if defined(__aarch64__) + uint32_t res3; +#endif +} fpcr_bitfield; + +// Takes the upper 64 bits of a and places it in the low end of the result +// Takes the lower 64 bits of b and places it into the high end of the result. +FORCE_INLINE __m128 _mm_shuffle_ps_1032(__m128 a, __m128 b) +{ + float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a)); + float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b)); + return vreinterpretq_m128_f32(vcombine_f32(a32, b10)); +} + +// takes the lower two 32-bit values from a and swaps them and places in high +// end of result takes the higher two 32 bit values from b and swaps them and +// places in low end of result. +FORCE_INLINE __m128 _mm_shuffle_ps_2301(__m128 a, __m128 b) +{ + float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a))); + float32x2_t b23 = vrev64_f32(vget_high_f32(vreinterpretq_f32_m128(b))); + return vreinterpretq_m128_f32(vcombine_f32(a01, b23)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_0321(__m128 a, __m128 b) +{ + float32x2_t a21 = vget_high_f32( + vextq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 3)); + float32x2_t b03 = vget_low_f32( + vextq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b), 3)); + return vreinterpretq_m128_f32(vcombine_f32(a21, b03)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_2103(__m128 a, __m128 b) +{ + float32x2_t a03 = vget_low_f32( + vextq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 3)); + float32x2_t b21 = vget_high_f32( + vextq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b), 3)); + return vreinterpretq_m128_f32(vcombine_f32(a03, b21)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_1010(__m128 a, __m128 b) +{ + float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a)); + float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b)); + return vreinterpretq_m128_f32(vcombine_f32(a10, b10)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_1001(__m128 a, __m128 b) +{ + float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a))); + float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b)); + return vreinterpretq_m128_f32(vcombine_f32(a01, b10)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_0101(__m128 a, __m128 b) +{ + float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a))); + float32x2_t b01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(b))); + return vreinterpretq_m128_f32(vcombine_f32(a01, b01)); +} + +// keeps the low 64 bits of b in the low and puts the high 64 bits of a in the +// high +FORCE_INLINE __m128 _mm_shuffle_ps_3210(__m128 a, __m128 b) +{ + float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a)); + float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b)); + return vreinterpretq_m128_f32(vcombine_f32(a10, b32)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_0011(__m128 a, __m128 b) +{ + float32x2_t a11 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(a)), 1); + float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); + return vreinterpretq_m128_f32(vcombine_f32(a11, b00)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_0022(__m128 a, __m128 b) +{ + float32x2_t a22 = + vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0); + float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); + return vreinterpretq_m128_f32(vcombine_f32(a22, b00)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_2200(__m128 a, __m128 b) +{ + float32x2_t a00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(a)), 0); + float32x2_t b22 = + vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(b)), 0); + return vreinterpretq_m128_f32(vcombine_f32(a00, b22)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_3202(__m128 a, __m128 b) +{ + float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); + float32x2_t a22 = + vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 0); + float32x2_t a02 = vset_lane_f32(a0, a22, 1); /* TODO: use vzip ?*/ + float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b)); + return vreinterpretq_m128_f32(vcombine_f32(a02, b32)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_1133(__m128 a, __m128 b) +{ + float32x2_t a33 = + vdup_lane_f32(vget_high_f32(vreinterpretq_f32_m128(a)), 1); + float32x2_t b11 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 1); + return vreinterpretq_m128_f32(vcombine_f32(a33, b11)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_2010(__m128 a, __m128 b) +{ + float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a)); + float32_t b2 = vgetq_lane_f32(vreinterpretq_f32_m128(b), 2); + float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); + float32x2_t b20 = vset_lane_f32(b2, b00, 1); + return vreinterpretq_m128_f32(vcombine_f32(a10, b20)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_2001(__m128 a, __m128 b) +{ + float32x2_t a01 = vrev64_f32(vget_low_f32(vreinterpretq_f32_m128(a))); + float32_t b2 = vgetq_lane_f32(b, 2); + float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); + float32x2_t b20 = vset_lane_f32(b2, b00, 1); + return vreinterpretq_m128_f32(vcombine_f32(a01, b20)); +} + +FORCE_INLINE __m128 _mm_shuffle_ps_2032(__m128 a, __m128 b) +{ + float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a)); + float32_t b2 = vgetq_lane_f32(b, 2); + float32x2_t b00 = vdup_lane_f32(vget_low_f32(vreinterpretq_f32_m128(b)), 0); + float32x2_t b20 = vset_lane_f32(b2, b00, 1); + return vreinterpretq_m128_f32(vcombine_f32(a32, b20)); +} + +// Kahan summation for accurate summation of floating-point numbers. +// http://blog.zachbjornson.com/2019/08/11/fast-float-summation.html +FORCE_INLINE void _sse2neon_kadd_f32(float *sum, float *c, float y) +{ + y -= *c; + float t = *sum + y; + *c = (t - *sum) - y; + *sum = t; +} + +#if defined(__ARM_FEATURE_CRYPTO) && \ + (defined(__aarch64__) || __has_builtin(__builtin_arm_crypto_vmullp64)) +// Wraps vmull_p64 +FORCE_INLINE uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b) +{ + poly64_t a = vget_lane_p64(vreinterpret_p64_u64(_a), 0); + poly64_t b = vget_lane_p64(vreinterpret_p64_u64(_b), 0); + return vreinterpretq_u64_p128(vmull_p64(a, b)); +} +#else // ARMv7 polyfill +// ARMv7/some A64 lacks vmull_p64, but it has vmull_p8. +// +// vmull_p8 calculates 8 8-bit->16-bit polynomial multiplies, but we need a +// 64-bit->128-bit polynomial multiply. +// +// It needs some work and is somewhat slow, but it is still faster than all +// known scalar methods. +// +// Algorithm adapted to C from +// https://www.workofard.com/2017/07/ghash-for-low-end-cores/, which is adapted +// from "Fast Software Polynomial Multiplication on ARM Processors Using the +// NEON Engine" by Danilo Camara, Conrado Gouvea, Julio Lopez and Ricardo Dahab +// (https://hal.inria.fr/hal-01506572) +static uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b) +{ + poly8x8_t a = vreinterpret_p8_u64(_a); + poly8x8_t b = vreinterpret_p8_u64(_b); + + // Masks + uint8x16_t k48_32 = vcombine_u8(vcreate_u8(0x0000ffffffffffff), + vcreate_u8(0x00000000ffffffff)); + uint8x16_t k16_00 = vcombine_u8(vcreate_u8(0x000000000000ffff), + vcreate_u8(0x0000000000000000)); + + // Do the multiplies, rotating with vext to get all combinations + uint8x16_t d = vreinterpretq_u8_p16(vmull_p8(a, b)); // D = A0 * B0 + uint8x16_t e = + vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 1))); // E = A0 * B1 + uint8x16_t f = + vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 1), b)); // F = A1 * B0 + uint8x16_t g = + vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 2))); // G = A0 * B2 + uint8x16_t h = + vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 2), b)); // H = A2 * B0 + uint8x16_t i = + vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 3))); // I = A0 * B3 + uint8x16_t j = + vreinterpretq_u8_p16(vmull_p8(vext_p8(a, a, 3), b)); // J = A3 * B0 + uint8x16_t k = + vreinterpretq_u8_p16(vmull_p8(a, vext_p8(b, b, 4))); // L = A0 * B4 + + // Add cross products + uint8x16_t l = veorq_u8(e, f); // L = E + F + uint8x16_t m = veorq_u8(g, h); // M = G + H + uint8x16_t n = veorq_u8(i, j); // N = I + J + + // Interleave. Using vzip1 and vzip2 prevents Clang from emitting TBL + // instructions. +#if defined(__aarch64__) + uint8x16_t lm_p0 = vreinterpretq_u8_u64( + vzip1q_u64(vreinterpretq_u64_u8(l), vreinterpretq_u64_u8(m))); + uint8x16_t lm_p1 = vreinterpretq_u8_u64( + vzip2q_u64(vreinterpretq_u64_u8(l), vreinterpretq_u64_u8(m))); + uint8x16_t nk_p0 = vreinterpretq_u8_u64( + vzip1q_u64(vreinterpretq_u64_u8(n), vreinterpretq_u64_u8(k))); + uint8x16_t nk_p1 = vreinterpretq_u8_u64( + vzip2q_u64(vreinterpretq_u64_u8(n), vreinterpretq_u64_u8(k))); +#else + uint8x16_t lm_p0 = vcombine_u8(vget_low_u8(l), vget_low_u8(m)); + uint8x16_t lm_p1 = vcombine_u8(vget_high_u8(l), vget_high_u8(m)); + uint8x16_t nk_p0 = vcombine_u8(vget_low_u8(n), vget_low_u8(k)); + uint8x16_t nk_p1 = vcombine_u8(vget_high_u8(n), vget_high_u8(k)); +#endif + // t0 = (L) (P0 + P1) << 8 + // t1 = (M) (P2 + P3) << 16 + uint8x16_t t0t1_tmp = veorq_u8(lm_p0, lm_p1); + uint8x16_t t0t1_h = vandq_u8(lm_p1, k48_32); + uint8x16_t t0t1_l = veorq_u8(t0t1_tmp, t0t1_h); + + // t2 = (N) (P4 + P5) << 24 + // t3 = (K) (P6 + P7) << 32 + uint8x16_t t2t3_tmp = veorq_u8(nk_p0, nk_p1); + uint8x16_t t2t3_h = vandq_u8(nk_p1, k16_00); + uint8x16_t t2t3_l = veorq_u8(t2t3_tmp, t2t3_h); + + // De-interleave +#if defined(__aarch64__) + uint8x16_t t0 = vreinterpretq_u8_u64( + vuzp1q_u64(vreinterpretq_u64_u8(t0t1_l), vreinterpretq_u64_u8(t0t1_h))); + uint8x16_t t1 = vreinterpretq_u8_u64( + vuzp2q_u64(vreinterpretq_u64_u8(t0t1_l), vreinterpretq_u64_u8(t0t1_h))); + uint8x16_t t2 = vreinterpretq_u8_u64( + vuzp1q_u64(vreinterpretq_u64_u8(t2t3_l), vreinterpretq_u64_u8(t2t3_h))); + uint8x16_t t3 = vreinterpretq_u8_u64( + vuzp2q_u64(vreinterpretq_u64_u8(t2t3_l), vreinterpretq_u64_u8(t2t3_h))); +#else + uint8x16_t t1 = vcombine_u8(vget_high_u8(t0t1_l), vget_high_u8(t0t1_h)); + uint8x16_t t0 = vcombine_u8(vget_low_u8(t0t1_l), vget_low_u8(t0t1_h)); + uint8x16_t t3 = vcombine_u8(vget_high_u8(t2t3_l), vget_high_u8(t2t3_h)); + uint8x16_t t2 = vcombine_u8(vget_low_u8(t2t3_l), vget_low_u8(t2t3_h)); +#endif + // Shift the cross products + uint8x16_t t0_shift = vextq_u8(t0, t0, 15); // t0 << 8 + uint8x16_t t1_shift = vextq_u8(t1, t1, 14); // t1 << 16 + uint8x16_t t2_shift = vextq_u8(t2, t2, 13); // t2 << 24 + uint8x16_t t3_shift = vextq_u8(t3, t3, 12); // t3 << 32 + + // Accumulate the products + uint8x16_t cross1 = veorq_u8(t0_shift, t1_shift); + uint8x16_t cross2 = veorq_u8(t2_shift, t3_shift); + uint8x16_t mix = veorq_u8(d, cross1); + uint8x16_t r = veorq_u8(mix, cross2); + return vreinterpretq_u64_u8(r); +} +#endif // ARMv7 polyfill + +// C equivalent: +// __m128i _mm_shuffle_epi32_default(__m128i a, +// __constrange(0, 255) int imm) { +// __m128i ret; +// ret[0] = a[imm & 0x3]; ret[1] = a[(imm >> 2) & 0x3]; +// ret[2] = a[(imm >> 4) & 0x03]; ret[3] = a[(imm >> 6) & 0x03]; +// return ret; +// } +#define _mm_shuffle_epi32_default(a, imm) \ + __extension__({ \ + int32x4_t ret; \ + ret = vmovq_n_s32( \ + vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm) & (0x3))); \ + ret = vsetq_lane_s32( \ + vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 2) & 0x3), \ + ret, 1); \ + ret = vsetq_lane_s32( \ + vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 4) & 0x3), \ + ret, 2); \ + ret = vsetq_lane_s32( \ + vgetq_lane_s32(vreinterpretq_s32_m128i(a), ((imm) >> 6) & 0x3), \ + ret, 3); \ + vreinterpretq_m128i_s32(ret); \ + }) + +// Takes the upper 64 bits of a and places it in the low end of the result +// Takes the lower 64 bits of a and places it into the high end of the result. +FORCE_INLINE __m128i _mm_shuffle_epi_1032(__m128i a) +{ + int32x2_t a32 = vget_high_s32(vreinterpretq_s32_m128i(a)); + int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a)); + return vreinterpretq_m128i_s32(vcombine_s32(a32, a10)); +} + +// takes the lower two 32-bit values from a and swaps them and places in low end +// of result takes the higher two 32 bit values from a and swaps them and places +// in high end of result. +FORCE_INLINE __m128i _mm_shuffle_epi_2301(__m128i a) +{ + int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a))); + int32x2_t a23 = vrev64_s32(vget_high_s32(vreinterpretq_s32_m128i(a))); + return vreinterpretq_m128i_s32(vcombine_s32(a01, a23)); +} + +// rotates the least significant 32 bits into the most significant 32 bits, and +// shifts the rest down +FORCE_INLINE __m128i _mm_shuffle_epi_0321(__m128i a) +{ + return vreinterpretq_m128i_s32( + vextq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(a), 1)); +} + +// rotates the most significant 32 bits into the least significant 32 bits, and +// shifts the rest up +FORCE_INLINE __m128i _mm_shuffle_epi_2103(__m128i a) +{ + return vreinterpretq_m128i_s32( + vextq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(a), 3)); +} + +// gets the lower 64 bits of a, and places it in the upper 64 bits +// gets the lower 64 bits of a and places it in the lower 64 bits +FORCE_INLINE __m128i _mm_shuffle_epi_1010(__m128i a) +{ + int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a)); + return vreinterpretq_m128i_s32(vcombine_s32(a10, a10)); +} + +// gets the lower 64 bits of a, swaps the 0 and 1 elements, and places it in the +// lower 64 bits gets the lower 64 bits of a, and places it in the upper 64 bits +FORCE_INLINE __m128i _mm_shuffle_epi_1001(__m128i a) +{ + int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a))); + int32x2_t a10 = vget_low_s32(vreinterpretq_s32_m128i(a)); + return vreinterpretq_m128i_s32(vcombine_s32(a01, a10)); +} + +// gets the lower 64 bits of a, swaps the 0 and 1 elements and places it in the +// upper 64 bits gets the lower 64 bits of a, swaps the 0 and 1 elements, and +// places it in the lower 64 bits +FORCE_INLINE __m128i _mm_shuffle_epi_0101(__m128i a) +{ + int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a))); + return vreinterpretq_m128i_s32(vcombine_s32(a01, a01)); +} + +FORCE_INLINE __m128i _mm_shuffle_epi_2211(__m128i a) +{ + int32x2_t a11 = vdup_lane_s32(vget_low_s32(vreinterpretq_s32_m128i(a)), 1); + int32x2_t a22 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 0); + return vreinterpretq_m128i_s32(vcombine_s32(a11, a22)); +} + +FORCE_INLINE __m128i _mm_shuffle_epi_0122(__m128i a) +{ + int32x2_t a22 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 0); + int32x2_t a01 = vrev64_s32(vget_low_s32(vreinterpretq_s32_m128i(a))); + return vreinterpretq_m128i_s32(vcombine_s32(a22, a01)); +} + +FORCE_INLINE __m128i _mm_shuffle_epi_3332(__m128i a) +{ + int32x2_t a32 = vget_high_s32(vreinterpretq_s32_m128i(a)); + int32x2_t a33 = vdup_lane_s32(vget_high_s32(vreinterpretq_s32_m128i(a)), 1); + return vreinterpretq_m128i_s32(vcombine_s32(a32, a33)); +} + +// FORCE_INLINE __m128i _mm_shuffle_epi32_splat(__m128i a, __constrange(0,255) +// int imm) +#if defined(__aarch64__) +#define _mm_shuffle_epi32_splat(a, imm) \ + __extension__({ \ + vreinterpretq_m128i_s32( \ + vdupq_laneq_s32(vreinterpretq_s32_m128i(a), (imm))); \ + }) +#else +#define _mm_shuffle_epi32_splat(a, imm) \ + __extension__({ \ + vreinterpretq_m128i_s32( \ + vdupq_n_s32(vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm)))); \ + }) +#endif + +// NEON does not support a general purpose permute intrinsic +// Selects four specific single-precision, floating-point values from a and b, +// based on the mask i. +// +// C equivalent: +// __m128 _mm_shuffle_ps_default(__m128 a, __m128 b, +// __constrange(0, 255) int imm) { +// __m128 ret; +// ret[0] = a[imm & 0x3]; ret[1] = a[(imm >> 2) & 0x3]; +// ret[2] = b[(imm >> 4) & 0x03]; ret[3] = b[(imm >> 6) & 0x03]; +// return ret; +// } +// +// https://msdn.microsoft.com/en-us/library/vstudio/5f0858x0(v=vs.100).aspx +#define _mm_shuffle_ps_default(a, b, imm) \ + __extension__({ \ + float32x4_t ret; \ + ret = vmovq_n_f32( \ + vgetq_lane_f32(vreinterpretq_f32_m128(a), (imm) & (0x3))); \ + ret = vsetq_lane_f32( \ + vgetq_lane_f32(vreinterpretq_f32_m128(a), ((imm) >> 2) & 0x3), \ + ret, 1); \ + ret = vsetq_lane_f32( \ + vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 4) & 0x3), \ + ret, 2); \ + ret = vsetq_lane_f32( \ + vgetq_lane_f32(vreinterpretq_f32_m128(b), ((imm) >> 6) & 0x3), \ + ret, 3); \ + vreinterpretq_m128_f32(ret); \ + }) + +// Shuffles the lower 4 signed or unsigned 16-bit integers in a as specified +// by imm. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/y41dkk37(v=vs.100) +// FORCE_INLINE __m128i _mm_shufflelo_epi16_function(__m128i a, +// __constrange(0,255) int +// imm) +#define _mm_shufflelo_epi16_function(a, imm) \ + __extension__({ \ + int16x8_t ret = vreinterpretq_s16_m128i(a); \ + int16x4_t lowBits = vget_low_s16(ret); \ + ret = vsetq_lane_s16(vget_lane_s16(lowBits, (imm) & (0x3)), ret, 0); \ + ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 2) & 0x3), ret, \ + 1); \ + ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 4) & 0x3), ret, \ + 2); \ + ret = vsetq_lane_s16(vget_lane_s16(lowBits, ((imm) >> 6) & 0x3), ret, \ + 3); \ + vreinterpretq_m128i_s16(ret); \ + }) + +// Shuffles the upper 4 signed or unsigned 16-bit integers in a as specified +// by imm. +// https://msdn.microsoft.com/en-us/library/13ywktbs(v=vs.100).aspx +// FORCE_INLINE __m128i _mm_shufflehi_epi16_function(__m128i a, +// __constrange(0,255) int +// imm) +#define _mm_shufflehi_epi16_function(a, imm) \ + __extension__({ \ + int16x8_t ret = vreinterpretq_s16_m128i(a); \ + int16x4_t highBits = vget_high_s16(ret); \ + ret = vsetq_lane_s16(vget_lane_s16(highBits, (imm) & (0x3)), ret, 4); \ + ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 2) & 0x3), ret, \ + 5); \ + ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 4) & 0x3), ret, \ + 6); \ + ret = vsetq_lane_s16(vget_lane_s16(highBits, ((imm) >> 6) & 0x3), ret, \ + 7); \ + vreinterpretq_m128i_s16(ret); \ + }) + +/* MMX */ + +//_mm_empty is a no-op on arm +FORCE_INLINE void _mm_empty(void) {} + +/* SSE */ + +// Adds the four single-precision, floating-point values of a and b. +// +// r0 := a0 + b0 +// r1 := a1 + b1 +// r2 := a2 + b2 +// r3 := a3 + b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/c9848chc(v=vs.100).aspx +FORCE_INLINE __m128 _mm_add_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_f32( + vaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +} + +// adds the scalar single-precision floating point values of a and b. +// https://msdn.microsoft.com/en-us/library/be94x2y6(v=vs.100).aspx +FORCE_INLINE __m128 _mm_add_ss(__m128 a, __m128 b) +{ + float32_t b0 = vgetq_lane_f32(vreinterpretq_f32_m128(b), 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of . + return vreinterpretq_m128_f32(vaddq_f32(a, value)); +} + +// Computes the bitwise AND of the four single-precision, floating-point values +// of a and b. +// +// r0 := a0 & b0 +// r1 := a1 & b1 +// r2 := a2 & b2 +// r3 := a3 & b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/73ck1xc5(v=vs.100).aspx +FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_s32( + vandq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b))); +} + +// Computes the bitwise AND-NOT of the four single-precision, floating-point +// values of a and b. +// +// r0 := ~a0 & b0 +// r1 := ~a1 & b1 +// r2 := ~a2 & b2 +// r3 := ~a3 & b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/68h7wd02(v=vs.100).aspx +FORCE_INLINE __m128 _mm_andnot_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_s32( + vbicq_s32(vreinterpretq_s32_m128(b), + vreinterpretq_s32_m128(a))); // *NOTE* argument swap +} + +// Average packed unsigned 16-bit integers in a and b, and store the results in +// dst. +// +// FOR j := 0 to 3 +// i := j*16 +// dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_pu16 +FORCE_INLINE __m64 _mm_avg_pu16(__m64 a, __m64 b) +{ + return vreinterpret_m64_u16( + vrhadd_u16(vreinterpret_u16_m64(a), vreinterpret_u16_m64(b))); +} + +// Average packed unsigned 8-bit integers in a and b, and store the results in +// dst. +// +// FOR j := 0 to 7 +// i := j*8 +// dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_pu8 +FORCE_INLINE __m64 _mm_avg_pu8(__m64 a, __m64 b) +{ + return vreinterpret_m64_u8( + vrhadd_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b))); +} + +// Compares for equality. +// https://msdn.microsoft.com/en-us/library/vstudio/36aectz5(v=vs.100).aspx +FORCE_INLINE __m128 _mm_cmpeq_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_u32( + vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +} + +// Compares for equality. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/k423z28e(v=vs.100) +FORCE_INLINE __m128 _mm_cmpeq_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmpeq_ps(a, b)); +} + +// Compares for greater than or equal. +// https://msdn.microsoft.com/en-us/library/vstudio/fs813y2t(v=vs.100).aspx +FORCE_INLINE __m128 _mm_cmpge_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_u32( + vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +} + +// Compares for greater than or equal. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/kesh3ddc(v=vs.100) +FORCE_INLINE __m128 _mm_cmpge_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmpge_ps(a, b)); +} + +// Compares for greater than. +// +// r0 := (a0 > b0) ? 0xffffffff : 0x0 +// r1 := (a1 > b1) ? 0xffffffff : 0x0 +// r2 := (a2 > b2) ? 0xffffffff : 0x0 +// r3 := (a3 > b3) ? 0xffffffff : 0x0 +// +// https://msdn.microsoft.com/en-us/library/vstudio/11dy102s(v=vs.100).aspx +FORCE_INLINE __m128 _mm_cmpgt_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_u32( + vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +} + +// Compares for greater than. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/1xyyyy9e(v=vs.100) +FORCE_INLINE __m128 _mm_cmpgt_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmpgt_ps(a, b)); +} + +// Compares for less than or equal. +// +// r0 := (a0 <= b0) ? 0xffffffff : 0x0 +// r1 := (a1 <= b1) ? 0xffffffff : 0x0 +// r2 := (a2 <= b2) ? 0xffffffff : 0x0 +// r3 := (a3 <= b3) ? 0xffffffff : 0x0 +// +// https://msdn.microsoft.com/en-us/library/vstudio/1s75w83z(v=vs.100).aspx +FORCE_INLINE __m128 _mm_cmple_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_u32( + vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +} + +// Compares for less than or equal. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/a7x0hbhw(v=vs.100) +FORCE_INLINE __m128 _mm_cmple_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmple_ps(a, b)); +} + +// Compares for less than +// https://msdn.microsoft.com/en-us/library/vstudio/f330yhc8(v=vs.100).aspx +FORCE_INLINE __m128 _mm_cmplt_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_u32( + vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +} + +// Compares for less than +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/fy94wye7(v=vs.100) +FORCE_INLINE __m128 _mm_cmplt_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmplt_ps(a, b)); +} + +// Compares for inequality. +// https://msdn.microsoft.com/en-us/library/sf44thbx(v=vs.100).aspx +FORCE_INLINE __m128 _mm_cmpneq_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_u32(vmvnq_u32( + vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)))); +} + +// Compares for inequality. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/ekya8fh4(v=vs.100) +FORCE_INLINE __m128 _mm_cmpneq_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmpneq_ps(a, b)); +} + +// Compares for not greater than or equal. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/wsexys62(v=vs.100) +FORCE_INLINE __m128 _mm_cmpnge_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_u32(vmvnq_u32( + vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)))); +} + +// Compares for not greater than or equal. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/fk2y80s8(v=vs.100) +FORCE_INLINE __m128 _mm_cmpnge_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmpnge_ps(a, b)); +} + +// Compares for not greater than. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/d0xh7w0s(v=vs.100) +FORCE_INLINE __m128 _mm_cmpngt_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_u32(vmvnq_u32( + vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)))); +} + +// Compares for not greater than. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/z7x9ydwh(v=vs.100) +FORCE_INLINE __m128 _mm_cmpngt_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmpngt_ps(a, b)); +} + +// Compares for not less than or equal. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/6a330kxw(v=vs.100) +FORCE_INLINE __m128 _mm_cmpnle_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_u32(vmvnq_u32( + vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)))); +} + +// Compares for not less than or equal. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/z7x9ydwh(v=vs.100) +FORCE_INLINE __m128 _mm_cmpnle_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmpnle_ps(a, b)); +} + +// Compares for not less than. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/4686bbdw(v=vs.100) +FORCE_INLINE __m128 _mm_cmpnlt_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_u32(vmvnq_u32( + vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)))); +} + +// Compares for not less than. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/56b9z2wf(v=vs.100) +FORCE_INLINE __m128 _mm_cmpnlt_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmpnlt_ps(a, b)); +} + +// Compares the four 32-bit floats in a and b to check if any values are NaN. +// Ordered compare between each value returns true for "orderable" and false for +// "not orderable" (NaN). +// https://msdn.microsoft.com/en-us/library/vstudio/0h9w00fx(v=vs.100).aspx see +// also: +// http://stackoverflow.com/questions/8627331/what-does-ordered-unordered-comparison-mean +// http://stackoverflow.com/questions/29349621/neon-isnanval-intrinsics +FORCE_INLINE __m128 _mm_cmpord_ps(__m128 a, __m128 b) +{ + // Note: NEON does not have ordered compare builtin + // Need to compare a eq a and b eq b to check for NaN + // Do AND of results to get final + uint32x4_t ceqaa = + vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); + uint32x4_t ceqbb = + vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); + return vreinterpretq_m128_u32(vandq_u32(ceqaa, ceqbb)); +} + +// Compares for ordered. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/343t62da(v=vs.100) +FORCE_INLINE __m128 _mm_cmpord_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmpord_ps(a, b)); +} + +// Compares for unordered. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/khy6fk1t(v=vs.100) +FORCE_INLINE __m128 _mm_cmpunord_ps(__m128 a, __m128 b) +{ + uint32x4_t f32a = + vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a)); + uint32x4_t f32b = + vceqq_f32(vreinterpretq_f32_m128(b), vreinterpretq_f32_m128(b)); + return vreinterpretq_m128_u32(vmvnq_u32(vandq_u32(f32a, f32b))); +} + +// Compares for unordered. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/2as2387b(v=vs.100) +FORCE_INLINE __m128 _mm_cmpunord_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_cmpunord_ps(a, b)); +} + +// Compares the lower single-precision floating point scalar values of a and b +// using an equality operation. : +// https://msdn.microsoft.com/en-us/library/93yx2h2b(v=vs.100).aspx +FORCE_INLINE int _mm_comieq_ss(__m128 a, __m128 b) +{ + uint32x4_t a_eq_b = + vceqq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); + return vgetq_lane_u32(a_eq_b, 0) & 0x1; +} + +// Compares the lower single-precision floating point scalar values of a and b +// using a greater than or equal operation. : +// https://msdn.microsoft.com/en-us/library/8t80des6(v=vs.100).aspx +FORCE_INLINE int _mm_comige_ss(__m128 a, __m128 b) +{ + uint32x4_t a_ge_b = + vcgeq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); + return vgetq_lane_u32(a_ge_b, 0) & 0x1; +} + +// Compares the lower single-precision floating point scalar values of a and b +// using a greater than operation. : +// https://msdn.microsoft.com/en-us/library/b0738e0t(v=vs.100).aspx +FORCE_INLINE int _mm_comigt_ss(__m128 a, __m128 b) +{ + uint32x4_t a_gt_b = + vcgtq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); + return vgetq_lane_u32(a_gt_b, 0) & 0x1; +} + +// Compares the lower single-precision floating point scalar values of a and b +// using a less than or equal operation. : +// https://msdn.microsoft.com/en-us/library/1w4t7c57(v=vs.90).aspx +FORCE_INLINE int _mm_comile_ss(__m128 a, __m128 b) +{ + uint32x4_t a_le_b = + vcleq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); + return vgetq_lane_u32(a_le_b, 0) & 0x1; +} + +// Compares the lower single-precision floating point scalar values of a and b +// using a less than operation. : +// https://msdn.microsoft.com/en-us/library/2kwe606b(v=vs.90).aspx Important +// note!! The documentation on MSDN is incorrect! If either of the values is a +// NAN the docs say you will get a one, but in fact, it will return a zero!! +FORCE_INLINE int _mm_comilt_ss(__m128 a, __m128 b) +{ + uint32x4_t a_lt_b = + vcltq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b)); + return vgetq_lane_u32(a_lt_b, 0) & 0x1; +} + +// Compares the lower single-precision floating point scalar values of a and b +// using an inequality operation. : +// https://msdn.microsoft.com/en-us/library/bafh5e0a(v=vs.90).aspx +FORCE_INLINE int _mm_comineq_ss(__m128 a, __m128 b) +{ + return !_mm_comieq_ss(a, b); +} + +// Convert packed signed 32-bit integers in b to packed single-precision +// (32-bit) floating-point elements, store the results in the lower 2 elements +// of dst, and copy the upper 2 packed elements from a to the upper elements of +// dst. +// +// dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +// dst[63:32] := Convert_Int32_To_FP32(b[63:32]) +// dst[95:64] := a[95:64] +// dst[127:96] := a[127:96] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_pi2ps +FORCE_INLINE __m128 _mm_cvt_pi2ps(__m128 a, __m64 b) +{ + return vreinterpretq_m128_f32( + vcombine_f32(vcvt_f32_s32(vreinterpret_s32_m64(b)), + vget_high_f32(vreinterpretq_f32_m128(a)))); +} + +// Convert packed single-precision (32-bit) floating-point elements in a to +// packed 32-bit integers, and store the results in dst. +// +// FOR j := 0 to 1 +// i := 32*j +// dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_ps2pi +FORCE_INLINE __m64 _mm_cvt_ps2pi(__m128 a) +{ +#if defined(__aarch64__) || defined(__ARM_FEATURE_DIRECTED_ROUNDING) + return vreinterpret_m64_s32( + vget_low_s32(vcvtnq_s32_f32(vrndiq_f32(vreinterpretq_f32_m128(a))))); +#else + return vreinterpret_m64_s32(vcvt_s32_f32(vget_low_f32( + vreinterpretq_f32_m128(_mm_round_ps(a, _MM_FROUND_CUR_DIRECTION))))); +#endif +} + +// Convert the signed 32-bit integer b to a single-precision (32-bit) +// floating-point element, store the result in the lower element of dst, and +// copy the upper 3 packed elements from a to the upper elements of dst. +// +// dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +// dst[127:32] := a[127:32] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_si2ss +FORCE_INLINE __m128 _mm_cvt_si2ss(__m128 a, int b) +{ + return vreinterpretq_m128_f32( + vsetq_lane_f32((float) b, vreinterpretq_f32_m128(a), 0)); +} + +// Convert the lower single-precision (32-bit) floating-point element in a to a +// 32-bit integer, and store the result in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_ss2si +FORCE_INLINE int _mm_cvt_ss2si(__m128 a) +{ +#if defined(__aarch64__) || defined(__ARM_FEATURE_DIRECTED_ROUNDING) + return vgetq_lane_s32(vcvtnq_s32_f32(vrndiq_f32(vreinterpretq_f32_m128(a))), + 0); +#else + float32_t data = vgetq_lane_f32( + vreinterpretq_f32_m128(_mm_round_ps(a, _MM_FROUND_CUR_DIRECTION)), 0); + return (int32_t) data; +#endif +} + +// Convert packed 16-bit integers in a to packed single-precision (32-bit) +// floating-point elements, and store the results in dst. +// +// FOR j := 0 to 3 +// i := j*16 +// m := j*32 +// dst[m+31:m] := Convert_Int16_To_FP32(a[i+15:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi16_ps +FORCE_INLINE __m128 _mm_cvtpi16_ps(__m64 a) +{ + return vreinterpretq_m128_f32( + vcvtq_f32_s32(vmovl_s16(vreinterpret_s16_m64(a)))); +} + +// Convert packed 32-bit integers in b to packed single-precision (32-bit) +// floating-point elements, store the results in the lower 2 elements of dst, +// and copy the upper 2 packed elements from a to the upper elements of dst. +// +// dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +// dst[63:32] := Convert_Int32_To_FP32(b[63:32]) +// dst[95:64] := a[95:64] +// dst[127:96] := a[127:96] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi32_ps +FORCE_INLINE __m128 _mm_cvtpi32_ps(__m128 a, __m64 b) +{ + return vreinterpretq_m128_f32( + vcombine_f32(vcvt_f32_s32(vreinterpret_s32_m64(b)), + vget_high_f32(vreinterpretq_f32_m128(a)))); +} + +// Convert packed signed 32-bit integers in a to packed single-precision +// (32-bit) floating-point elements, store the results in the lower 2 elements +// of dst, then convert the packed signed 32-bit integers in b to +// single-precision (32-bit) floating-point element, and store the results in +// the upper 2 elements of dst. +// +// dst[31:0] := Convert_Int32_To_FP32(a[31:0]) +// dst[63:32] := Convert_Int32_To_FP32(a[63:32]) +// dst[95:64] := Convert_Int32_To_FP32(b[31:0]) +// dst[127:96] := Convert_Int32_To_FP32(b[63:32]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi32x2_ps +FORCE_INLINE __m128 _mm_cvtpi32x2_ps(__m64 a, __m64 b) +{ + return vreinterpretq_m128_f32(vcvtq_f32_s32( + vcombine_s32(vreinterpret_s32_m64(a), vreinterpret_s32_m64(b)))); +} + +// Convert the lower packed 8-bit integers in a to packed single-precision +// (32-bit) floating-point elements, and store the results in dst. +// +// FOR j := 0 to 3 +// i := j*8 +// m := j*32 +// dst[m+31:m] := Convert_Int8_To_FP32(a[i+7:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi8_ps +FORCE_INLINE __m128 _mm_cvtpi8_ps(__m64 a) +{ + return vreinterpretq_m128_f32(vcvtq_f32_s32( + vmovl_s16(vget_low_s16(vmovl_s8(vreinterpret_s8_m64(a)))))); +} + +// Convert packed single-precision (32-bit) floating-point elements in a to +// packed 16-bit integers, and store the results in dst. Note: this intrinsic +// will generate 0x7FFF, rather than 0x8000, for input values between 0x7FFF and +// 0x7FFFFFFF. +// +// FOR j := 0 to 3 +// i := 16*j +// k := 32*j +// IF a[k+31:k] >= FP32(0x7FFF) && a[k+31:k] <= FP32(0x7FFFFFFF) +// dst[i+15:i] := 0x7FFF +// ELSE +// dst[i+15:i] := Convert_FP32_To_Int16(a[k+31:k]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pi16 +FORCE_INLINE __m64 _mm_cvtps_pi16(__m128 a) +{ + const __m128 i16Min = _mm_set_ps1((float) INT16_MIN); + const __m128 i16Max = _mm_set_ps1((float) INT16_MAX); + const __m128 i32Max = _mm_set_ps1((float) INT32_MAX); + const __m128i maxMask = _mm_castps_si128( + _mm_and_ps(_mm_cmpge_ps(a, i16Max), _mm_cmple_ps(a, i32Max))); + const __m128i betweenMask = _mm_castps_si128( + _mm_and_ps(_mm_cmpgt_ps(a, i16Min), _mm_cmplt_ps(a, i16Max))); + const __m128i minMask = _mm_cmpeq_epi32(_mm_or_si128(maxMask, betweenMask), + _mm_setzero_si128()); + __m128i max = _mm_and_si128(maxMask, _mm_set1_epi32(INT16_MAX)); + __m128i min = _mm_and_si128(minMask, _mm_set1_epi32(INT16_MIN)); + __m128i cvt = _mm_and_si128(betweenMask, _mm_cvtps_epi32(a)); + __m128i res32 = _mm_or_si128(_mm_or_si128(max, min), cvt); + return vreinterpret_m64_s16(vmovn_s32(vreinterpretq_s32_m128i(res32))); +} + +// Convert packed single-precision (32-bit) floating-point elements in a to +// packed 32-bit integers, and store the results in dst. +// +// FOR j := 0 to 1 +// i := 32*j +// dst[i+31:i] := Convert_FP32_To_Int32(a[i+31:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pi32 +#define _mm_cvtps_pi32(a) _mm_cvt_ps2pi(a) + +// Convert packed single-precision (32-bit) floating-point elements in a to +// packed 8-bit integers, and store the results in lower 4 elements of dst. +// Note: this intrinsic will generate 0x7F, rather than 0x80, for input values +// between 0x7F and 0x7FFFFFFF. +// +// FOR j := 0 to 3 +// i := 8*j +// k := 32*j +// IF a[k+31:k] >= FP32(0x7F) && a[k+31:k] <= FP32(0x7FFFFFFF) +// dst[i+7:i] := 0x7F +// ELSE +// dst[i+7:i] := Convert_FP32_To_Int8(a[k+31:k]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pi8 +FORCE_INLINE __m64 _mm_cvtps_pi8(__m128 a) +{ + const __m128 i8Min = _mm_set_ps1((float) INT8_MIN); + const __m128 i8Max = _mm_set_ps1((float) INT8_MAX); + const __m128 i32Max = _mm_set_ps1((float) INT32_MAX); + const __m128i maxMask = _mm_castps_si128( + _mm_and_ps(_mm_cmpge_ps(a, i8Max), _mm_cmple_ps(a, i32Max))); + const __m128i betweenMask = _mm_castps_si128( + _mm_and_ps(_mm_cmpgt_ps(a, i8Min), _mm_cmplt_ps(a, i8Max))); + const __m128i minMask = _mm_cmpeq_epi32(_mm_or_si128(maxMask, betweenMask), + _mm_setzero_si128()); + __m128i max = _mm_and_si128(maxMask, _mm_set1_epi32(INT8_MAX)); + __m128i min = _mm_and_si128(minMask, _mm_set1_epi32(INT8_MIN)); + __m128i cvt = _mm_and_si128(betweenMask, _mm_cvtps_epi32(a)); + __m128i res32 = _mm_or_si128(_mm_or_si128(max, min), cvt); + int16x4_t res16 = vmovn_s32(vreinterpretq_s32_m128i(res32)); + int8x8_t res8 = vmovn_s16(vcombine_s16(res16, res16)); + static const uint32_t bitMask[2] = {0xFFFFFFFF, 0}; + int8x8_t mask = vreinterpret_s8_u32(vld1_u32(bitMask)); + + return vreinterpret_m64_s8(vorr_s8(vand_s8(mask, res8), vdup_n_s8(0))); +} + +// Convert packed unsigned 16-bit integers in a to packed single-precision +// (32-bit) floating-point elements, and store the results in dst. +// +// FOR j := 0 to 3 +// i := j*16 +// m := j*32 +// dst[m+31:m] := Convert_UInt16_To_FP32(a[i+15:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpu16_ps +FORCE_INLINE __m128 _mm_cvtpu16_ps(__m64 a) +{ + return vreinterpretq_m128_f32( + vcvtq_f32_u32(vmovl_u16(vreinterpret_u16_m64(a)))); +} + +// Convert the lower packed unsigned 8-bit integers in a to packed +// single-precision (32-bit) floating-point elements, and store the results in +// dst. +// +// FOR j := 0 to 3 +// i := j*8 +// m := j*32 +// dst[m+31:m] := Convert_UInt8_To_FP32(a[i+7:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpu8_ps +FORCE_INLINE __m128 _mm_cvtpu8_ps(__m64 a) +{ + return vreinterpretq_m128_f32(vcvtq_f32_u32( + vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_m64(a)))))); +} + +// Convert the signed 32-bit integer b to a single-precision (32-bit) +// floating-point element, store the result in the lower element of dst, and +// copy the upper 3 packed elements from a to the upper elements of dst. +// +// dst[31:0] := Convert_Int32_To_FP32(b[31:0]) +// dst[127:32] := a[127:32] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_ss +#define _mm_cvtsi32_ss(a, b) _mm_cvt_si2ss(a, b) + +// Convert the signed 64-bit integer b to a single-precision (32-bit) +// floating-point element, store the result in the lower element of dst, and +// copy the upper 3 packed elements from a to the upper elements of dst. +// +// dst[31:0] := Convert_Int64_To_FP32(b[63:0]) +// dst[127:32] := a[127:32] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_ss +FORCE_INLINE __m128 _mm_cvtsi64_ss(__m128 a, int64_t b) +{ + return vreinterpretq_m128_f32( + vsetq_lane_f32((float) b, vreinterpretq_f32_m128(a), 0)); +} + +// Copy the lower single-precision (32-bit) floating-point element of a to dst. +// +// dst[31:0] := a[31:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_f32 +FORCE_INLINE float _mm_cvtss_f32(__m128 a) +{ + return vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); +} + +// Convert the lower single-precision (32-bit) floating-point element in a to a +// 32-bit integer, and store the result in dst. +// +// dst[31:0] := Convert_FP32_To_Int32(a[31:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_si32 +#define _mm_cvtss_si32(a) _mm_cvt_ss2si(a) + +// Convert the lower single-precision (32-bit) floating-point element in a to a +// 64-bit integer, and store the result in dst. +// +// dst[63:0] := Convert_FP32_To_Int64(a[31:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_si64 +FORCE_INLINE int64_t _mm_cvtss_si64(__m128 a) +{ +#if defined(__aarch64__) || defined(__ARM_FEATURE_DIRECTED_ROUNDING) + return (int64_t) vgetq_lane_f32(vrndiq_f32(vreinterpretq_f32_m128(a)), 0); +#else + float32_t data = vgetq_lane_f32( + vreinterpretq_f32_m128(_mm_round_ps(a, _MM_FROUND_CUR_DIRECTION)), 0); + return (int64_t) data; +#endif +} + +// Convert packed single-precision (32-bit) floating-point elements in a to +// packed 32-bit integers with truncation, and store the results in dst. +// +// FOR j := 0 to 1 +// i := 32*j +// dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_ps2pi +FORCE_INLINE __m64 _mm_cvtt_ps2pi(__m128 a) +{ + return vreinterpret_m64_s32( + vget_low_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a)))); +} + +// Convert the lower single-precision (32-bit) floating-point element in a to a +// 32-bit integer with truncation, and store the result in dst. +// +// dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_ss2si +FORCE_INLINE int _mm_cvtt_ss2si(__m128 a) +{ + return vgetq_lane_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a)), 0); +} + +// Convert packed single-precision (32-bit) floating-point elements in a to +// packed 32-bit integers with truncation, and store the results in dst. +// +// FOR j := 0 to 1 +// i := 32*j +// dst[i+31:i] := Convert_FP32_To_Int32_Truncate(a[i+31:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_pi32 +#define _mm_cvttps_pi32(a) _mm_cvtt_ps2pi(a) + +// Convert the lower single-precision (32-bit) floating-point element in a to a +// 32-bit integer with truncation, and store the result in dst. +// +// dst[31:0] := Convert_FP32_To_Int32_Truncate(a[31:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_si32 +#define _mm_cvttss_si32(a) _mm_cvtt_ss2si(a) + +// Convert the lower single-precision (32-bit) floating-point element in a to a +// 64-bit integer with truncation, and store the result in dst. +// +// dst[63:0] := Convert_FP32_To_Int64_Truncate(a[31:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_si64 +FORCE_INLINE int64_t _mm_cvttss_si64(__m128 a) +{ + return (int64_t) vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); +} + +// Divides the four single-precision, floating-point values of a and b. +// +// r0 := a0 / b0 +// r1 := a1 / b1 +// r2 := a2 / b2 +// r3 := a3 / b3 +// +// https://msdn.microsoft.com/en-us/library/edaw8147(v=vs.100).aspx +FORCE_INLINE __m128 _mm_div_ps(__m128 a, __m128 b) +{ +#if defined(__aarch64__) && !SSE2NEON_PRECISE_DIV + return vreinterpretq_m128_f32( + vdivq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +#else + float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(b)); + recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(b))); +#if SSE2NEON_PRECISE_DIV + // Additional Netwon-Raphson iteration for accuracy + recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(b))); +#endif + return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(a), recip)); +#endif +} + +// Divides the scalar single-precision floating point value of a by b. +// https://msdn.microsoft.com/en-us/library/4y73xa49(v=vs.100).aspx +FORCE_INLINE __m128 _mm_div_ss(__m128 a, __m128 b) +{ + float32_t value = + vgetq_lane_f32(vreinterpretq_f32_m128(_mm_div_ps(a, b)), 0); + return vreinterpretq_m128_f32( + vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0)); +} + +// Extract a 16-bit integer from a, selected with imm8, and store the result in +// the lower element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_pi16 +#define _mm_extract_pi16(a, imm) \ + (int32_t) vget_lane_u16(vreinterpret_u16_m64(a), (imm)) + +// Free aligned memory that was allocated with _mm_malloc. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_free +FORCE_INLINE void _mm_free(void *addr) +{ + free(addr); +} + +// Macro: Get the flush zero bits from the MXCSR control and status register. +// The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or +// _MM_FLUSH_ZERO_OFF +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_GET_FLUSH_ZERO_MODE +FORCE_INLINE unsigned int _sse2neon_mm_get_flush_zero_mode() +{ + union { + fpcr_bitfield field; +#if defined(__aarch64__) + uint64_t value; +#else + uint32_t value; +#endif + } r; + +#if defined(__aarch64__) + __asm__ __volatile__("mrs %0, FPCR" : "=r"(r.value)); /* read */ +#else + __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */ +#endif + + return r.field.bit24 ? _MM_FLUSH_ZERO_ON : _MM_FLUSH_ZERO_OFF; +} + +// Macro: Get the rounding mode bits from the MXCSR control and status register. +// The rounding mode may contain any of the following flags: _MM_ROUND_NEAREST, +// _MM_ROUND_DOWN, _MM_ROUND_UP, _MM_ROUND_TOWARD_ZERO +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_GET_ROUNDING_MODE +FORCE_INLINE unsigned int _MM_GET_ROUNDING_MODE() +{ + union { + fpcr_bitfield field; +#if defined(__aarch64__) + uint64_t value; +#else + uint32_t value; +#endif + } r; + +#if defined(__aarch64__) + __asm__ __volatile__("mrs %0, FPCR" : "=r"(r.value)); /* read */ +#else + __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */ +#endif + + if (r.field.bit22) { + return r.field.bit23 ? _MM_ROUND_TOWARD_ZERO : _MM_ROUND_UP; + } else { + return r.field.bit23 ? _MM_ROUND_DOWN : _MM_ROUND_NEAREST; + } +} + +// Copy a to dst, and insert the 16-bit integer i into dst at the location +// specified by imm8. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_pi16 +#define _mm_insert_pi16(a, b, imm) \ + __extension__({ \ + vreinterpret_m64_s16( \ + vset_lane_s16((b), vreinterpret_s16_m64(a), (imm))); \ + }) + +// Loads four single-precision, floating-point values. +// https://msdn.microsoft.com/en-us/library/vstudio/zzd50xxt(v=vs.100).aspx +FORCE_INLINE __m128 _mm_load_ps(const float *p) +{ + return vreinterpretq_m128_f32(vld1q_f32(p)); +} + +// Load a single-precision (32-bit) floating-point element from memory into all +// elements of dst. +// +// dst[31:0] := MEM[mem_addr+31:mem_addr] +// dst[63:32] := MEM[mem_addr+31:mem_addr] +// dst[95:64] := MEM[mem_addr+31:mem_addr] +// dst[127:96] := MEM[mem_addr+31:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ps1 +#define _mm_load_ps1 _mm_load1_ps + +// Loads an single - precision, floating - point value into the low word and +// clears the upper three words. +// https://msdn.microsoft.com/en-us/library/548bb9h4%28v=vs.90%29.aspx +FORCE_INLINE __m128 _mm_load_ss(const float *p) +{ + return vreinterpretq_m128_f32(vsetq_lane_f32(*p, vdupq_n_f32(0), 0)); +} + +// Loads a single single-precision, floating-point value, copying it into all +// four words +// https://msdn.microsoft.com/en-us/library/vstudio/5cdkf716(v=vs.100).aspx +FORCE_INLINE __m128 _mm_load1_ps(const float *p) +{ + return vreinterpretq_m128_f32(vld1q_dup_f32(p)); +} + +// Sets the upper two single-precision, floating-point values with 64 +// bits of data loaded from the address p; the lower two values are passed +// through from a. +// +// r0 := a0 +// r1 := a1 +// r2 := *p0 +// r3 := *p1 +// +// https://msdn.microsoft.com/en-us/library/w92wta0x(v%3dvs.100).aspx +FORCE_INLINE __m128 _mm_loadh_pi(__m128 a, __m64 const *p) +{ + return vreinterpretq_m128_f32( + vcombine_f32(vget_low_f32(a), vld1_f32((const float32_t *) p))); +} + +// Sets the lower two single-precision, floating-point values with 64 +// bits of data loaded from the address p; the upper two values are passed +// through from a. +// +// Return Value +// r0 := *p0 +// r1 := *p1 +// r2 := a2 +// r3 := a3 +// +// https://msdn.microsoft.com/en-us/library/s57cyak2(v=vs.100).aspx +FORCE_INLINE __m128 _mm_loadl_pi(__m128 a, __m64 const *p) +{ + return vreinterpretq_m128_f32( + vcombine_f32(vld1_f32((const float32_t *) p), vget_high_f32(a))); +} + +// Load 4 single-precision (32-bit) floating-point elements from memory into dst +// in reverse order. mem_addr must be aligned on a 16-byte boundary or a +// general-protection exception may be generated. +// +// dst[31:0] := MEM[mem_addr+127:mem_addr+96] +// dst[63:32] := MEM[mem_addr+95:mem_addr+64] +// dst[95:64] := MEM[mem_addr+63:mem_addr+32] +// dst[127:96] := MEM[mem_addr+31:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_ps +FORCE_INLINE __m128 _mm_loadr_ps(const float *p) +{ + float32x4_t v = vrev64q_f32(vld1q_f32(p)); + return vreinterpretq_m128_f32(vextq_f32(v, v, 2)); +} + +// Loads four single-precision, floating-point values. +// https://msdn.microsoft.com/en-us/library/x1b16s7z%28v=vs.90%29.aspx +FORCE_INLINE __m128 _mm_loadu_ps(const float *p) +{ + // for neon, alignment doesn't matter, so _mm_load_ps and _mm_loadu_ps are + // equivalent for neon + return vreinterpretq_m128_f32(vld1q_f32(p)); +} + +// Load unaligned 16-bit integer from memory into the first element of dst. +// +// dst[15:0] := MEM[mem_addr+15:mem_addr] +// dst[MAX:16] := 0 +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si16 +FORCE_INLINE __m128i _mm_loadu_si16(const void *p) +{ + return vreinterpretq_m128i_s16( + vsetq_lane_s16(*(const int16_t *) p, vdupq_n_s16(0), 0)); +} + +// Load unaligned 64-bit integer from memory into the first element of dst. +// +// dst[63:0] := MEM[mem_addr+63:mem_addr] +// dst[MAX:64] := 0 +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si64 +FORCE_INLINE __m128i _mm_loadu_si64(const void *p) +{ + return vreinterpretq_m128i_s64( + vcombine_s64(vld1_s64((const int64_t *) p), vdup_n_s64(0))); +} + +// Allocate aligned blocks of memory. +// https://software.intel.com/en-us/ +// cpp-compiler-developer-guide-and-reference-allocating-and-freeing-aligned-memory-blocks +FORCE_INLINE void *_mm_malloc(size_t size, size_t align) +{ + void *ptr; + if (align == 1) + return malloc(size); + if (align == 2 || (sizeof(void *) == 8 && align == 4)) + align = sizeof(void *); + if (!posix_memalign(&ptr, align, size)) + return ptr; + return NULL; +} + +// Conditionally store 8-bit integer elements from a into memory using mask +// (elements are not stored when the highest bit is not set in the corresponding +// element) and a non-temporal memory hint. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskmove_si64 +FORCE_INLINE void _mm_maskmove_si64(__m64 a, __m64 mask, char *mem_addr) +{ + int8x8_t shr_mask = vshr_n_s8(vreinterpret_s8_m64(mask), 7); + __m128 b = _mm_load_ps((const float *) mem_addr); + int8x8_t masked = + vbsl_s8(vreinterpret_u8_s8(shr_mask), vreinterpret_s8_m64(a), + vreinterpret_s8_u64(vget_low_u64(vreinterpretq_u64_m128(b)))); + vst1_s8((int8_t *) mem_addr, masked); +} + +// Conditionally store 8-bit integer elements from a into memory using mask +// (elements are not stored when the highest bit is not set in the corresponding +// element) and a non-temporal memory hint. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_maskmovq +#define _m_maskmovq(a, mask, mem_addr) _mm_maskmove_si64(a, mask, mem_addr) + +// Compare packed signed 16-bit integers in a and b, and store packed maximum +// values in dst. +// +// FOR j := 0 to 3 +// i := j*16 +// dst[i+15:i] := MAX(a[i+15:i], b[i+15:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pi16 +FORCE_INLINE __m64 _mm_max_pi16(__m64 a, __m64 b) +{ + return vreinterpret_m64_s16( + vmax_s16(vreinterpret_s16_m64(a), vreinterpret_s16_m64(b))); +} + +// Computes the maximums of the four single-precision, floating-point values of +// a and b. +// https://msdn.microsoft.com/en-us/library/vstudio/ff5d607a(v=vs.100).aspx +FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b) +{ +#if SSE2NEON_PRECISE_MINMAX + float32x4_t _a = vreinterpretq_f32_m128(a); + float32x4_t _b = vreinterpretq_f32_m128(b); + return vreinterpretq_m128_f32(vbslq_f32(vcgtq_f32(_a, _b), _a, _b)); +#else + return vreinterpretq_m128_f32( + vmaxq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +#endif +} + +// Compare packed unsigned 8-bit integers in a and b, and store packed maximum +// values in dst. +// +// FOR j := 0 to 7 +// i := j*8 +// dst[i+7:i] := MAX(a[i+7:i], b[i+7:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pu8 +FORCE_INLINE __m64 _mm_max_pu8(__m64 a, __m64 b) +{ + return vreinterpret_m64_u8( + vmax_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b))); +} + +// Computes the maximum of the two lower scalar single-precision floating point +// values of a and b. +// https://msdn.microsoft.com/en-us/library/s6db5esz(v=vs.100).aspx +FORCE_INLINE __m128 _mm_max_ss(__m128 a, __m128 b) +{ + float32_t value = vgetq_lane_f32(_mm_max_ps(a, b), 0); + return vreinterpretq_m128_f32( + vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0)); +} + +// Compare packed signed 16-bit integers in a and b, and store packed minimum +// values in dst. +// +// FOR j := 0 to 3 +// i := j*16 +// dst[i+15:i] := MIN(a[i+15:i], b[i+15:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pi16 +FORCE_INLINE __m64 _mm_min_pi16(__m64 a, __m64 b) +{ + return vreinterpret_m64_s16( + vmin_s16(vreinterpret_s16_m64(a), vreinterpret_s16_m64(b))); +} + +// Computes the minima of the four single-precision, floating-point values of a +// and b. +// https://msdn.microsoft.com/en-us/library/vstudio/wh13kadz(v=vs.100).aspx +FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b) +{ +#if SSE2NEON_PRECISE_MINMAX + float32x4_t _a = vreinterpretq_f32_m128(a); + float32x4_t _b = vreinterpretq_f32_m128(b); + return vreinterpretq_m128_f32(vbslq_f32(vcltq_f32(_a, _b), _a, _b)); +#else + return vreinterpretq_m128_f32( + vminq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +#endif +} + +// Compare packed unsigned 8-bit integers in a and b, and store packed minimum +// values in dst. +// +// FOR j := 0 to 7 +// i := j*8 +// dst[i+7:i] := MIN(a[i+7:i], b[i+7:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pu8 +FORCE_INLINE __m64 _mm_min_pu8(__m64 a, __m64 b) +{ + return vreinterpret_m64_u8( + vmin_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b))); +} + +// Computes the minimum of the two lower scalar single-precision floating point +// values of a and b. +// https://msdn.microsoft.com/en-us/library/0a9y7xaa(v=vs.100).aspx +FORCE_INLINE __m128 _mm_min_ss(__m128 a, __m128 b) +{ + float32_t value = vgetq_lane_f32(_mm_min_ps(a, b), 0); + return vreinterpretq_m128_f32( + vsetq_lane_f32(value, vreinterpretq_f32_m128(a), 0)); +} + +// Sets the low word to the single-precision, floating-point value of b +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/35hdzazd(v=vs.100) +FORCE_INLINE __m128 _mm_move_ss(__m128 a, __m128 b) +{ + return vreinterpretq_m128_f32( + vsetq_lane_f32(vgetq_lane_f32(vreinterpretq_f32_m128(b), 0), + vreinterpretq_f32_m128(a), 0)); +} + +// Moves the upper two values of B into the lower two values of A. +// +// r3 := a3 +// r2 := a2 +// r1 := b3 +// r0 := b2 +FORCE_INLINE __m128 _mm_movehl_ps(__m128 __A, __m128 __B) +{ + float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(__A)); + float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(__B)); + return vreinterpretq_m128_f32(vcombine_f32(b32, a32)); +} + +// Moves the lower two values of B into the upper two values of A. +// +// r3 := b1 +// r2 := b0 +// r1 := a1 +// r0 := a0 +FORCE_INLINE __m128 _mm_movelh_ps(__m128 __A, __m128 __B) +{ + float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(__A)); + float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(__B)); + return vreinterpretq_m128_f32(vcombine_f32(a10, b10)); +} + +// Create mask from the most significant bit of each 8-bit element in a, and +// store the result in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_pi8 +FORCE_INLINE int _mm_movemask_pi8(__m64 a) +{ + uint8x8_t input = vreinterpret_u8_m64(a); +#if defined(__aarch64__) + static const int8x8_t shift = {0, 1, 2, 3, 4, 5, 6, 7}; + uint8x8_t tmp = vshr_n_u8(input, 7); + return vaddv_u8(vshl_u8(tmp, shift)); +#else + // Refer the implementation of `_mm_movemask_epi8` + uint16x4_t high_bits = vreinterpret_u16_u8(vshr_n_u8(input, 7)); + uint32x2_t paired16 = + vreinterpret_u32_u16(vsra_n_u16(high_bits, high_bits, 7)); + uint8x8_t paired32 = + vreinterpret_u8_u32(vsra_n_u32(paired16, paired16, 14)); + return vget_lane_u8(paired32, 0) | ((int) vget_lane_u8(paired32, 4) << 4); +#endif +} + +// NEON does not provide this method +// Creates a 4-bit mask from the most significant bits of the four +// single-precision, floating-point values. +// https://msdn.microsoft.com/en-us/library/vstudio/4490ys29(v=vs.100).aspx +FORCE_INLINE int _mm_movemask_ps(__m128 a) +{ + uint32x4_t input = vreinterpretq_u32_m128(a); +#if defined(__aarch64__) + static const int32x4_t shift = {0, 1, 2, 3}; + uint32x4_t tmp = vshrq_n_u32(input, 31); + return vaddvq_u32(vshlq_u32(tmp, shift)); +#else + // Uses the exact same method as _mm_movemask_epi8, see that for details. + // Shift out everything but the sign bits with a 32-bit unsigned shift + // right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(input, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = + vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); +#endif +} + +// Multiplies the four single-precision, floating-point values of a and b. +// +// r0 := a0 * b0 +// r1 := a1 * b1 +// r2 := a2 * b2 +// r3 := a3 * b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/22kbk6t9(v=vs.100).aspx +FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_f32( + vmulq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +} + +// Multiply the lower single-precision (32-bit) floating-point element in a and +// b, store the result in the lower element of dst, and copy the upper 3 packed +// elements from a to the upper elements of dst. +// +// dst[31:0] := a[31:0] * b[31:0] +// dst[127:32] := a[127:32] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ss +FORCE_INLINE __m128 _mm_mul_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_mul_ps(a, b)); +} + +// Multiply the packed unsigned 16-bit integers in a and b, producing +// intermediate 32-bit integers, and store the high 16 bits of the intermediate +// integers in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_pu16 +FORCE_INLINE __m64 _mm_mulhi_pu16(__m64 a, __m64 b) +{ + return vreinterpret_m64_u16(vshrn_n_u32( + vmull_u16(vreinterpret_u16_m64(a), vreinterpret_u16_m64(b)), 16)); +} + +// Computes the bitwise OR of the four single-precision, floating-point values +// of a and b. +// https://msdn.microsoft.com/en-us/library/vstudio/7ctdsyy0(v=vs.100).aspx +FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_s32( + vorrq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b))); +} + +// Average packed unsigned 8-bit integers in a and b, and store the results in +// dst. +// +// FOR j := 0 to 7 +// i := j*8 +// dst[i+7:i] := (a[i+7:i] + b[i+7:i] + 1) >> 1 +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pavgb +#define _m_pavgb(a, b) _mm_avg_pu8(a, b) + +// Average packed unsigned 16-bit integers in a and b, and store the results in +// dst. +// +// FOR j := 0 to 3 +// i := j*16 +// dst[i+15:i] := (a[i+15:i] + b[i+15:i] + 1) >> 1 +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pavgw +#define _m_pavgw(a, b) _mm_avg_pu16(a, b) + +// Extract a 16-bit integer from a, selected with imm8, and store the result in +// the lower element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pextrw +#define _m_pextrw(a, imm) _mm_extract_pi16(a, imm) + +// Copy a to dst, and insert the 16-bit integer i into dst at the location +// specified by imm8. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=m_pinsrw +#define _m_pinsrw(a, i, imm) _mm_insert_pi16(a, i, imm) + +// Compare packed signed 16-bit integers in a and b, and store packed maximum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmaxsw +#define _m_pmaxsw(a, b) _mm_max_pi16(a, b) + +// Compare packed unsigned 8-bit integers in a and b, and store packed maximum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmaxub +#define _m_pmaxub(a, b) _mm_max_pu8(a, b) + +// Compare packed signed 16-bit integers in a and b, and store packed minimum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pminsw +#define _m_pminsw(a, b) _mm_min_pi16(a, b) + +// Compare packed unsigned 8-bit integers in a and b, and store packed minimum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pminub +#define _m_pminub(a, b) _mm_min_pu8(a, b) + +// Create mask from the most significant bit of each 8-bit element in a, and +// store the result in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmovmskb +#define _m_pmovmskb(a) _mm_movemask_pi8(a) + +// Multiply the packed unsigned 16-bit integers in a and b, producing +// intermediate 32-bit integers, and store the high 16 bits of the intermediate +// integers in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmulhuw +#define _m_pmulhuw(a, b) _mm_mulhi_pu16(a, b) + +// Loads one cache line of data from address p to a location closer to the +// processor. https://msdn.microsoft.com/en-us/library/84szxsww(v=vs.100).aspx +FORCE_INLINE void _mm_prefetch(const void *p, int i) +{ + (void) i; + __builtin_prefetch(p); +} + +// Compute the absolute differences of packed unsigned 8-bit integers in a and +// b, then horizontally sum each consecutive 8 differences to produce four +// unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low +// 16 bits of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=m_psadbw +#define _m_psadbw(a, b) _mm_sad_pu8(a, b) + +// Shuffle 16-bit integers in a using the control in imm8, and store the results +// in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pshufw +#define _m_pshufw(a, imm) _mm_shuffle_pi16(a, imm) + +// Compute the approximate reciprocal of packed single-precision (32-bit) +// floating-point elements in a, and store the results in dst. The maximum +// relative error for this approximation is less than 1.5*2^-12. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ps +FORCE_INLINE __m128 _mm_rcp_ps(__m128 in) +{ + float32x4_t recip = vrecpeq_f32(vreinterpretq_f32_m128(in)); + recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in))); +#if SSE2NEON_PRECISE_DIV + // Additional Netwon-Raphson iteration for accuracy + recip = vmulq_f32(recip, vrecpsq_f32(recip, vreinterpretq_f32_m128(in))); +#endif + return vreinterpretq_m128_f32(recip); +} + +// Compute the approximate reciprocal of the lower single-precision (32-bit) +// floating-point element in a, store the result in the lower element of dst, +// and copy the upper 3 packed elements from a to the upper elements of dst. The +// maximum relative error for this approximation is less than 1.5*2^-12. +// +// dst[31:0] := (1.0 / a[31:0]) +// dst[127:32] := a[127:32] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ss +FORCE_INLINE __m128 _mm_rcp_ss(__m128 a) +{ + return _mm_move_ss(a, _mm_rcp_ps(a)); +} + +// Computes the approximations of the reciprocal square roots of the four +// single-precision floating point values of in. +// The current precision is 1% error. +// https://msdn.microsoft.com/en-us/library/22hfsh53(v=vs.100).aspx +FORCE_INLINE __m128 _mm_rsqrt_ps(__m128 in) +{ + float32x4_t out = vrsqrteq_f32(vreinterpretq_f32_m128(in)); +#if SSE2NEON_PRECISE_SQRT + // Additional Netwon-Raphson iteration for accuracy + out = vmulq_f32( + out, vrsqrtsq_f32(vmulq_f32(vreinterpretq_f32_m128(in), out), out)); + out = vmulq_f32( + out, vrsqrtsq_f32(vmulq_f32(vreinterpretq_f32_m128(in), out), out)); +#endif + return vreinterpretq_m128_f32(out); +} + +// Compute the approximate reciprocal square root of the lower single-precision +// (32-bit) floating-point element in a, store the result in the lower element +// of dst, and copy the upper 3 packed elements from a to the upper elements of +// dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ss +FORCE_INLINE __m128 _mm_rsqrt_ss(__m128 in) +{ + return vsetq_lane_f32(vgetq_lane_f32(_mm_rsqrt_ps(in), 0), in, 0); +} + +// Compute the absolute differences of packed unsigned 8-bit integers in a and +// b, then horizontally sum each consecutive 8 differences to produce four +// unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low +// 16 bits of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_pu8 +FORCE_INLINE __m64 _mm_sad_pu8(__m64 a, __m64 b) +{ + uint64x1_t t = vpaddl_u32(vpaddl_u16( + vpaddl_u8(vabd_u8(vreinterpret_u8_m64(a), vreinterpret_u8_m64(b))))); + return vreinterpret_m64_u16( + vset_lane_u16(vget_lane_u64(t, 0), vdup_n_u16(0), 0)); +} + +// Macro: Set the flush zero bits of the MXCSR control and status register to +// the value in unsigned 32-bit integer a. The flush zero may contain any of the +// following flags: _MM_FLUSH_ZERO_ON or _MM_FLUSH_ZERO_OFF +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_SET_FLUSH_ZERO_MODE +FORCE_INLINE void _sse2neon_mm_set_flush_zero_mode(unsigned int flag) +{ + // AArch32 Advanced SIMD arithmetic always uses the Flush-to-zero setting, + // regardless of the value of the FZ bit. + union { + fpcr_bitfield field; +#if defined(__aarch64__) + uint64_t value; +#else + uint32_t value; +#endif + } r; + +#if defined(__aarch64__) + __asm__ __volatile__("mrs %0, FPCR" : "=r"(r.value)); /* read */ +#else + __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */ +#endif + + r.field.bit24 = (flag & _MM_FLUSH_ZERO_MASK) == _MM_FLUSH_ZERO_ON; + +#if defined(__aarch64__) + __asm__ __volatile__("msr FPCR, %0" ::"r"(r)); /* write */ +#else + __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r)); /* write */ +#endif +} + +// Sets the four single-precision, floating-point values to the four inputs. +// https://msdn.microsoft.com/en-us/library/vstudio/afh0zf75(v=vs.100).aspx +FORCE_INLINE __m128 _mm_set_ps(float w, float z, float y, float x) +{ + float ALIGN_STRUCT(16) data[4] = {x, y, z, w}; + return vreinterpretq_m128_f32(vld1q_f32(data)); +} + +// Sets the four single-precision, floating-point values to w. +// https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx +FORCE_INLINE __m128 _mm_set_ps1(float _w) +{ + return vreinterpretq_m128_f32(vdupq_n_f32(_w)); +} + +// Macro: Set the rounding mode bits of the MXCSR control and status register to +// the value in unsigned 32-bit integer a. The rounding mode may contain any of +// the following flags: _MM_ROUND_NEAREST, _MM_ROUND_DOWN, _MM_ROUND_UP, +// _MM_ROUND_TOWARD_ZERO +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_SET_ROUNDING_MODE +FORCE_INLINE void _MM_SET_ROUNDING_MODE(int rounding) +{ + union { + fpcr_bitfield field; +#if defined(__aarch64__) + uint64_t value; +#else + uint32_t value; +#endif + } r; + +#if defined(__aarch64__) + __asm__ __volatile__("mrs %0, FPCR" : "=r"(r.value)); /* read */ +#else + __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */ +#endif + + switch (rounding) { + case _MM_ROUND_TOWARD_ZERO: + r.field.bit22 = 1; + r.field.bit23 = 1; + break; + case _MM_ROUND_DOWN: + r.field.bit22 = 0; + r.field.bit23 = 1; + break; + case _MM_ROUND_UP: + r.field.bit22 = 1; + r.field.bit23 = 0; + break; + default: //_MM_ROUND_NEAREST + r.field.bit22 = 0; + r.field.bit23 = 0; + } + +#if defined(__aarch64__) + __asm__ __volatile__("msr FPCR, %0" ::"r"(r)); /* write */ +#else + __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r)); /* write */ +#endif +} + +// Copy single-precision (32-bit) floating-point element a to the lower element +// of dst, and zero the upper 3 elements. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ss +FORCE_INLINE __m128 _mm_set_ss(float a) +{ + float ALIGN_STRUCT(16) data[4] = {a, 0, 0, 0}; + return vreinterpretq_m128_f32(vld1q_f32(data)); +} + +// Sets the four single-precision, floating-point values to w. +// +// r0 := r1 := r2 := r3 := w +// +// https://msdn.microsoft.com/en-us/library/vstudio/2x1se8ha(v=vs.100).aspx +FORCE_INLINE __m128 _mm_set1_ps(float _w) +{ + return vreinterpretq_m128_f32(vdupq_n_f32(_w)); +} + +// FIXME: _mm_setcsr() implementation supports changing the rounding mode only. +FORCE_INLINE void _mm_setcsr(unsigned int a) +{ + _MM_SET_ROUNDING_MODE(a); +} + +// FIXME: _mm_getcsr() implementation supports reading the rounding mode only. +FORCE_INLINE unsigned int _mm_getcsr() +{ + return _MM_GET_ROUNDING_MODE(); +} + +// Sets the four single-precision, floating-point values to the four inputs in +// reverse order. +// https://msdn.microsoft.com/en-us/library/vstudio/d2172ct3(v=vs.100).aspx +FORCE_INLINE __m128 _mm_setr_ps(float w, float z, float y, float x) +{ + float ALIGN_STRUCT(16) data[4] = {w, z, y, x}; + return vreinterpretq_m128_f32(vld1q_f32(data)); +} + +// Clears the four single-precision, floating-point values. +// https://msdn.microsoft.com/en-us/library/vstudio/tk1t2tbz(v=vs.100).aspx +FORCE_INLINE __m128 _mm_setzero_ps(void) +{ + return vreinterpretq_m128_f32(vdupq_n_f32(0)); +} + +// Shuffle 16-bit integers in a using the control in imm8, and store the results +// in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pi16 +#if __has_builtin(__builtin_shufflevector) +#define _mm_shuffle_pi16(a, imm) \ + __extension__({ \ + vreinterpret_m64_s16(__builtin_shufflevector( \ + vreinterpret_s16_m64(a), vreinterpret_s16_m64(a), (imm & 0x3), \ + ((imm >> 2) & 0x3), ((imm >> 4) & 0x3), ((imm >> 6) & 0x3))); \ + }) +#else +#define _mm_shuffle_pi16(a, imm) \ + __extension__({ \ + int16x4_t ret; \ + ret = \ + vmov_n_s16(vget_lane_s16(vreinterpret_s16_m64(a), (imm) & (0x3))); \ + ret = vset_lane_s16( \ + vget_lane_s16(vreinterpret_s16_m64(a), ((imm) >> 2) & 0x3), ret, \ + 1); \ + ret = vset_lane_s16( \ + vget_lane_s16(vreinterpret_s16_m64(a), ((imm) >> 4) & 0x3), ret, \ + 2); \ + ret = vset_lane_s16( \ + vget_lane_s16(vreinterpret_s16_m64(a), ((imm) >> 6) & 0x3), ret, \ + 3); \ + vreinterpret_m64_s16(ret); \ + }) +#endif + +// Guarantees that every preceding store is globally visible before any +// subsequent store. +// https://msdn.microsoft.com/en-us/library/5h2w73d1%28v=vs.90%29.aspx +FORCE_INLINE void _mm_sfence(void) +{ + __sync_synchronize(); +} + +// FORCE_INLINE __m128 _mm_shuffle_ps(__m128 a, __m128 b, __constrange(0,255) +// int imm) +#if __has_builtin(__builtin_shufflevector) +#define _mm_shuffle_ps(a, b, imm) \ + __extension__({ \ + float32x4_t _input1 = vreinterpretq_f32_m128(a); \ + float32x4_t _input2 = vreinterpretq_f32_m128(b); \ + float32x4_t _shuf = __builtin_shufflevector( \ + _input1, _input2, (imm) & (0x3), ((imm) >> 2) & 0x3, \ + (((imm) >> 4) & 0x3) + 4, (((imm) >> 6) & 0x3) + 4); \ + vreinterpretq_m128_f32(_shuf); \ + }) +#else // generic +#define _mm_shuffle_ps(a, b, imm) \ + __extension__({ \ + __m128 ret; \ + switch (imm) { \ + case _MM_SHUFFLE(1, 0, 3, 2): \ + ret = _mm_shuffle_ps_1032((a), (b)); \ + break; \ + case _MM_SHUFFLE(2, 3, 0, 1): \ + ret = _mm_shuffle_ps_2301((a), (b)); \ + break; \ + case _MM_SHUFFLE(0, 3, 2, 1): \ + ret = _mm_shuffle_ps_0321((a), (b)); \ + break; \ + case _MM_SHUFFLE(2, 1, 0, 3): \ + ret = _mm_shuffle_ps_2103((a), (b)); \ + break; \ + case _MM_SHUFFLE(1, 0, 1, 0): \ + ret = _mm_movelh_ps((a), (b)); \ + break; \ + case _MM_SHUFFLE(1, 0, 0, 1): \ + ret = _mm_shuffle_ps_1001((a), (b)); \ + break; \ + case _MM_SHUFFLE(0, 1, 0, 1): \ + ret = _mm_shuffle_ps_0101((a), (b)); \ + break; \ + case _MM_SHUFFLE(3, 2, 1, 0): \ + ret = _mm_shuffle_ps_3210((a), (b)); \ + break; \ + case _MM_SHUFFLE(0, 0, 1, 1): \ + ret = _mm_shuffle_ps_0011((a), (b)); \ + break; \ + case _MM_SHUFFLE(0, 0, 2, 2): \ + ret = _mm_shuffle_ps_0022((a), (b)); \ + break; \ + case _MM_SHUFFLE(2, 2, 0, 0): \ + ret = _mm_shuffle_ps_2200((a), (b)); \ + break; \ + case _MM_SHUFFLE(3, 2, 0, 2): \ + ret = _mm_shuffle_ps_3202((a), (b)); \ + break; \ + case _MM_SHUFFLE(3, 2, 3, 2): \ + ret = _mm_movehl_ps((b), (a)); \ + break; \ + case _MM_SHUFFLE(1, 1, 3, 3): \ + ret = _mm_shuffle_ps_1133((a), (b)); \ + break; \ + case _MM_SHUFFLE(2, 0, 1, 0): \ + ret = _mm_shuffle_ps_2010((a), (b)); \ + break; \ + case _MM_SHUFFLE(2, 0, 0, 1): \ + ret = _mm_shuffle_ps_2001((a), (b)); \ + break; \ + case _MM_SHUFFLE(2, 0, 3, 2): \ + ret = _mm_shuffle_ps_2032((a), (b)); \ + break; \ + default: \ + ret = _mm_shuffle_ps_default((a), (b), (imm)); \ + break; \ + } \ + ret; \ + }) +#endif + +// Computes the approximations of square roots of the four single-precision, +// floating-point values of a. First computes reciprocal square roots and then +// reciprocals of the four values. +// +// r0 := sqrt(a0) +// r1 := sqrt(a1) +// r2 := sqrt(a2) +// r3 := sqrt(a3) +// +// https://msdn.microsoft.com/en-us/library/vstudio/8z67bwwk(v=vs.100).aspx +FORCE_INLINE __m128 _mm_sqrt_ps(__m128 in) +{ +#if SSE2NEON_PRECISE_SQRT + float32x4_t recip = vrsqrteq_f32(vreinterpretq_f32_m128(in)); + + // Test for vrsqrteq_f32(0) -> positive infinity case. + // Change to zero, so that s * 1/sqrt(s) result is zero too. + const uint32x4_t pos_inf = vdupq_n_u32(0x7F800000); + const uint32x4_t div_by_zero = + vceqq_u32(pos_inf, vreinterpretq_u32_f32(recip)); + recip = vreinterpretq_f32_u32( + vandq_u32(vmvnq_u32(div_by_zero), vreinterpretq_u32_f32(recip))); + + // Additional Netwon-Raphson iteration for accuracy + recip = vmulq_f32( + vrsqrtsq_f32(vmulq_f32(recip, recip), vreinterpretq_f32_m128(in)), + recip); + recip = vmulq_f32( + vrsqrtsq_f32(vmulq_f32(recip, recip), vreinterpretq_f32_m128(in)), + recip); + + // sqrt(s) = s * 1/sqrt(s) + return vreinterpretq_m128_f32(vmulq_f32(vreinterpretq_f32_m128(in), recip)); +#elif defined(__aarch64__) + return vreinterpretq_m128_f32(vsqrtq_f32(vreinterpretq_f32_m128(in))); +#else + float32x4_t recipsq = vrsqrteq_f32(vreinterpretq_f32_m128(in)); + float32x4_t sq = vrecpeq_f32(recipsq); + return vreinterpretq_m128_f32(sq); +#endif +} + +// Computes the approximation of the square root of the scalar single-precision +// floating point value of in. +// https://msdn.microsoft.com/en-us/library/ahfsc22d(v=vs.100).aspx +FORCE_INLINE __m128 _mm_sqrt_ss(__m128 in) +{ + float32_t value = + vgetq_lane_f32(vreinterpretq_f32_m128(_mm_sqrt_ps(in)), 0); + return vreinterpretq_m128_f32( + vsetq_lane_f32(value, vreinterpretq_f32_m128(in), 0)); +} + +// Stores four single-precision, floating-point values. +// https://msdn.microsoft.com/en-us/library/vstudio/s3h4ay6y(v=vs.100).aspx +FORCE_INLINE void _mm_store_ps(float *p, __m128 a) +{ + vst1q_f32(p, vreinterpretq_f32_m128(a)); +} + +// Store the lower single-precision (32-bit) floating-point element from a into +// 4 contiguous elements in memory. mem_addr must be aligned on a 16-byte +// boundary or a general-protection exception may be generated. +// +// MEM[mem_addr+31:mem_addr] := a[31:0] +// MEM[mem_addr+63:mem_addr+32] := a[31:0] +// MEM[mem_addr+95:mem_addr+64] := a[31:0] +// MEM[mem_addr+127:mem_addr+96] := a[31:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ps1 +FORCE_INLINE void _mm_store_ps1(float *p, __m128 a) +{ + float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); + vst1q_f32(p, vdupq_n_f32(a0)); +} + +// Stores the lower single - precision, floating - point value. +// https://msdn.microsoft.com/en-us/library/tzz10fbx(v=vs.100).aspx +FORCE_INLINE void _mm_store_ss(float *p, __m128 a) +{ + vst1q_lane_f32(p, vreinterpretq_f32_m128(a), 0); +} + +// Store the lower single-precision (32-bit) floating-point element from a into +// 4 contiguous elements in memory. mem_addr must be aligned on a 16-byte +// boundary or a general-protection exception may be generated. +// +// MEM[mem_addr+31:mem_addr] := a[31:0] +// MEM[mem_addr+63:mem_addr+32] := a[31:0] +// MEM[mem_addr+95:mem_addr+64] := a[31:0] +// MEM[mem_addr+127:mem_addr+96] := a[31:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_ps +#define _mm_store1_ps _mm_store_ps1 + +// Stores the upper two single-precision, floating-point values of a to the +// address p. +// +// *p0 := a2 +// *p1 := a3 +// +// https://msdn.microsoft.com/en-us/library/a7525fs8(v%3dvs.90).aspx +FORCE_INLINE void _mm_storeh_pi(__m64 *p, __m128 a) +{ + *p = vreinterpret_m64_f32(vget_high_f32(a)); +} + +// Stores the lower two single-precision floating point values of a to the +// address p. +// +// *p0 := a0 +// *p1 := a1 +// +// https://msdn.microsoft.com/en-us/library/h54t98ks(v=vs.90).aspx +FORCE_INLINE void _mm_storel_pi(__m64 *p, __m128 a) +{ + *p = vreinterpret_m64_f32(vget_low_f32(a)); +} + +// Store 4 single-precision (32-bit) floating-point elements from a into memory +// in reverse order. mem_addr must be aligned on a 16-byte boundary or a +// general-protection exception may be generated. +// +// MEM[mem_addr+31:mem_addr] := a[127:96] +// MEM[mem_addr+63:mem_addr+32] := a[95:64] +// MEM[mem_addr+95:mem_addr+64] := a[63:32] +// MEM[mem_addr+127:mem_addr+96] := a[31:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_ps +FORCE_INLINE void _mm_storer_ps(float *p, __m128 a) +{ + float32x4_t tmp = vrev64q_f32(vreinterpretq_f32_m128(a)); + float32x4_t rev = vextq_f32(tmp, tmp, 2); + vst1q_f32(p, rev); +} + +// Stores four single-precision, floating-point values. +// https://msdn.microsoft.com/en-us/library/44e30x22(v=vs.100).aspx +FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a) +{ + vst1q_f32(p, vreinterpretq_f32_m128(a)); +} + +// Stores 16-bits of integer data a at the address p. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si16 +FORCE_INLINE void _mm_storeu_si16(void *p, __m128i a) +{ + vst1q_lane_s16((int16_t *) p, vreinterpretq_s16_m128i(a), 0); +} + +// Stores 64-bits of integer data a at the address p. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si64 +FORCE_INLINE void _mm_storeu_si64(void *p, __m128i a) +{ + vst1q_lane_s64((int64_t *) p, vreinterpretq_s64_m128i(a), 0); +} + +// Store 64-bits of integer data from a into memory using a non-temporal memory +// hint. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_pi +FORCE_INLINE void _mm_stream_pi(__m64 *p, __m64 a) +{ + vst1_s64((int64_t *) p, vreinterpret_s64_m64(a)); +} + +// Store 128-bits (composed of 4 packed single-precision (32-bit) floating- +// point elements) from a into memory using a non-temporal memory hint. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_ps +FORCE_INLINE void _mm_stream_ps(float *p, __m128 a) +{ +#if __has_builtin(__builtin_nontemporal_store) + __builtin_nontemporal_store(a, (float32x4_t *) p); +#else + vst1q_f32(p, vreinterpretq_f32_m128(a)); +#endif +} + +// Subtracts the four single-precision, floating-point values of a and b. +// +// r0 := a0 - b0 +// r1 := a1 - b1 +// r2 := a2 - b2 +// r3 := a3 - b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/1zad2k61(v=vs.100).aspx +FORCE_INLINE __m128 _mm_sub_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_f32( + vsubq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +} + +// Subtract the lower single-precision (32-bit) floating-point element in b from +// the lower single-precision (32-bit) floating-point element in a, store the +// result in the lower element of dst, and copy the upper 3 packed elements from +// a to the upper elements of dst. +// +// dst[31:0] := a[31:0] - b[31:0] +// dst[127:32] := a[127:32] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ss +FORCE_INLINE __m128 _mm_sub_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_sub_ps(a, b)); +} + +// Macro: Transpose the 4x4 matrix formed by the 4 rows of single-precision +// (32-bit) floating-point elements in row0, row1, row2, and row3, and store the +// transposed matrix in these vectors (row0 now contains column 0, etc.). +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=MM_TRANSPOSE4_PS +#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(ROW01.val[0]), \ + vget_low_f32(ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(ROW01.val[1]), \ + vget_low_f32(ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(ROW01.val[0]), \ + vget_high_f32(ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(ROW01.val[1]), \ + vget_high_f32(ROW23.val[1])); \ + } while (0) + +// according to the documentation, these intrinsics behave the same as the +// non-'u' versions. We'll just alias them here. +#define _mm_ucomieq_ss _mm_comieq_ss +#define _mm_ucomige_ss _mm_comige_ss +#define _mm_ucomigt_ss _mm_comigt_ss +#define _mm_ucomile_ss _mm_comile_ss +#define _mm_ucomilt_ss _mm_comilt_ss +#define _mm_ucomineq_ss _mm_comineq_ss + +// Return vector of type __m128i with undefined elements. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_undefined_si128 +FORCE_INLINE __m128i _mm_undefined_si128(void) +{ +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wuninitialized" +#endif + __m128i a; + return a; +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif +} + +// Return vector of type __m128 with undefined elements. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ps +FORCE_INLINE __m128 _mm_undefined_ps(void) +{ +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wuninitialized" +#endif + __m128 a; + return a; +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif +} + +// Selects and interleaves the upper two single-precision, floating-point values +// from a and b. +// +// r0 := a2 +// r1 := b2 +// r2 := a3 +// r3 := b3 +// +// https://msdn.microsoft.com/en-us/library/skccxx7d%28v=vs.90%29.aspx +FORCE_INLINE __m128 _mm_unpackhi_ps(__m128 a, __m128 b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128_f32( + vzip2q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +#else + float32x2_t a1 = vget_high_f32(vreinterpretq_f32_m128(a)); + float32x2_t b1 = vget_high_f32(vreinterpretq_f32_m128(b)); + float32x2x2_t result = vzip_f32(a1, b1); + return vreinterpretq_m128_f32(vcombine_f32(result.val[0], result.val[1])); +#endif +} + +// Selects and interleaves the lower two single-precision, floating-point values +// from a and b. +// +// r0 := a0 +// r1 := b0 +// r2 := a1 +// r3 := b1 +// +// https://msdn.microsoft.com/en-us/library/25st103b%28v=vs.90%29.aspx +FORCE_INLINE __m128 _mm_unpacklo_ps(__m128 a, __m128 b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128_f32( + vzip1q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +#else + float32x2_t a1 = vget_low_f32(vreinterpretq_f32_m128(a)); + float32x2_t b1 = vget_low_f32(vreinterpretq_f32_m128(b)); + float32x2x2_t result = vzip_f32(a1, b1); + return vreinterpretq_m128_f32(vcombine_f32(result.val[0], result.val[1])); +#endif +} + +// Computes bitwise EXOR (exclusive-or) of the four single-precision, +// floating-point values of a and b. +// https://msdn.microsoft.com/en-us/library/ss6k3wk8(v=vs.100).aspx +FORCE_INLINE __m128 _mm_xor_ps(__m128 a, __m128 b) +{ + return vreinterpretq_m128_s32( + veorq_s32(vreinterpretq_s32_m128(a), vreinterpretq_s32_m128(b))); +} + +/* SSE2 */ + +// Adds the 8 signed or unsigned 16-bit integers in a to the 8 signed or +// unsigned 16-bit integers in b. +// https://msdn.microsoft.com/en-us/library/fceha5k4(v=vs.100).aspx +FORCE_INLINE __m128i _mm_add_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s16( + vaddq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// Adds the 4 signed or unsigned 32-bit integers in a to the 4 signed or +// unsigned 32-bit integers in b. +// +// r0 := a0 + b0 +// r1 := a1 + b1 +// r2 := a2 + b2 +// r3 := a3 + b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/09xs4fkk(v=vs.100).aspx +FORCE_INLINE __m128i _mm_add_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + vaddq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Adds the 4 signed or unsigned 64-bit integers in a to the 4 signed or +// unsigned 32-bit integers in b. +// https://msdn.microsoft.com/en-us/library/vstudio/09xs4fkk(v=vs.100).aspx +FORCE_INLINE __m128i _mm_add_epi64(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s64( + vaddq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b))); +} + +// Adds the 16 signed or unsigned 8-bit integers in a to the 16 signed or +// unsigned 8-bit integers in b. +// https://technet.microsoft.com/en-us/subscriptions/yc7tcyzs(v=vs.90) +FORCE_INLINE __m128i _mm_add_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s8( + vaddq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Add packed double-precision (64-bit) floating-point elements in a and b, and +// store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_pd +FORCE_INLINE __m128d _mm_add_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vaddq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + double *da = (double *) &a; + double *db = (double *) &b; + double c[2]; + c[0] = da[0] + db[0]; + c[1] = da[1] + db[1]; + return vld1q_f32((float32_t *) c); +#endif +} + +// Add the lower double-precision (64-bit) floating-point element in a and b, +// store the result in the lower element of dst, and copy the upper element from +// a to the upper element of dst. +// +// dst[63:0] := a[63:0] + b[63:0] +// dst[127:64] := a[127:64] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sd +FORCE_INLINE __m128d _mm_add_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return _mm_move_sd(a, _mm_add_pd(a, b)); +#else + double *da = (double *) &a; + double *db = (double *) &b; + double c[2]; + c[0] = da[0] + db[0]; + c[1] = da[1]; + return vld1q_f32((float32_t *) c); +#endif +} + +// Add 64-bit integers a and b, and store the result in dst. +// +// dst[63:0] := a[63:0] + b[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_si64 +FORCE_INLINE __m64 _mm_add_si64(__m64 a, __m64 b) +{ + return vreinterpret_m64_s64( + vadd_s64(vreinterpret_s64_m64(a), vreinterpret_s64_m64(b))); +} + +// Adds the 8 signed 16-bit integers in a to the 8 signed 16-bit integers in b +// and saturates. +// +// r0 := SignedSaturate(a0 + b0) +// r1 := SignedSaturate(a1 + b1) +// ... +// r7 := SignedSaturate(a7 + b7) +// +// https://msdn.microsoft.com/en-us/library/1a306ef8(v=vs.100).aspx +FORCE_INLINE __m128i _mm_adds_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s16( + vqaddq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// Add packed signed 8-bit integers in a and b using saturation, and store the +// results in dst. +// +// FOR j := 0 to 15 +// i := j*8 +// dst[i+7:i] := Saturate8( a[i+7:i] + b[i+7:i] ) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi8 +FORCE_INLINE __m128i _mm_adds_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s8( + vqaddq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Add packed unsigned 16-bit integers in a and b using saturation, and store +// the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu16 +FORCE_INLINE __m128i _mm_adds_epu16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u16( + vqaddq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b))); +} + +// Adds the 16 unsigned 8-bit integers in a to the 16 unsigned 8-bit integers in +// b and saturates.. +// https://msdn.microsoft.com/en-us/library/9hahyddy(v=vs.100).aspx +FORCE_INLINE __m128i _mm_adds_epu8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vqaddq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b))); +} + +// Compute the bitwise AND of packed double-precision (64-bit) floating-point +// elements in a and b, and store the results in dst. +// +// FOR j := 0 to 1 +// i := j*64 +// dst[i+63:i] := a[i+63:i] AND b[i+63:i] +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_pd +FORCE_INLINE __m128d _mm_and_pd(__m128d a, __m128d b) +{ + return vreinterpretq_m128d_s64( + vandq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b))); +} + +// Computes the bitwise AND of the 128-bit value in a and the 128-bit value in +// b. +// +// r := a & b +// +// https://msdn.microsoft.com/en-us/library/vstudio/6d1txsa8(v=vs.100).aspx +FORCE_INLINE __m128i _mm_and_si128(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + vandq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Compute the bitwise NOT of packed double-precision (64-bit) floating-point +// elements in a and then AND with b, and store the results in dst. +// +// FOR j := 0 to 1 +// i := j*64 +// dst[i+63:i] := ((NOT a[i+63:i]) AND b[i+63:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_pd +FORCE_INLINE __m128d _mm_andnot_pd(__m128d a, __m128d b) +{ + // *NOTE* argument swap + return vreinterpretq_m128d_s64( + vbicq_s64(vreinterpretq_s64_m128d(b), vreinterpretq_s64_m128d(a))); +} + +// Computes the bitwise AND of the 128-bit value in b and the bitwise NOT of the +// 128-bit value in a. +// +// r := (~a) & b +// +// https://msdn.microsoft.com/en-us/library/vstudio/1beaceh8(v=vs.100).aspx +FORCE_INLINE __m128i _mm_andnot_si128(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + vbicq_s32(vreinterpretq_s32_m128i(b), + vreinterpretq_s32_m128i(a))); // *NOTE* argument swap +} + +// Computes the average of the 8 unsigned 16-bit integers in a and the 8 +// unsigned 16-bit integers in b and rounds. +// +// r0 := (a0 + b0) / 2 +// r1 := (a1 + b1) / 2 +// ... +// r7 := (a7 + b7) / 2 +// +// https://msdn.microsoft.com/en-us/library/vstudio/y13ca3c8(v=vs.90).aspx +FORCE_INLINE __m128i _mm_avg_epu16(__m128i a, __m128i b) +{ + return (__m128i) vrhaddq_u16(vreinterpretq_u16_m128i(a), + vreinterpretq_u16_m128i(b)); +} + +// Computes the average of the 16 unsigned 8-bit integers in a and the 16 +// unsigned 8-bit integers in b and rounds. +// +// r0 := (a0 + b0) / 2 +// r1 := (a1 + b1) / 2 +// ... +// r15 := (a15 + b15) / 2 +// +// https://msdn.microsoft.com/en-us/library/vstudio/8zwh554a(v%3dvs.90).aspx +FORCE_INLINE __m128i _mm_avg_epu8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vrhaddq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b))); +} + +// Shift a left by imm8 bytes while shifting in zeros, and store the results in +// dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bslli_si128 +#define _mm_bslli_si128(a, imm) _mm_slli_si128(a, imm) + +// Shift a right by imm8 bytes while shifting in zeros, and store the results in +// dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bsrli_si128 +#define _mm_bsrli_si128(a, imm) _mm_srli_si128(a, imm) + +// Cast vector of type __m128d to type __m128. This intrinsic is only used for +// compilation and does not generate any instructions, thus it has zero latency. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ps +FORCE_INLINE __m128 _mm_castpd_ps(__m128d a) +{ + return vreinterpretq_m128_s64(vreinterpretq_s64_m128d(a)); +} + +// Cast vector of type __m128d to type __m128i. This intrinsic is only used for +// compilation and does not generate any instructions, thus it has zero latency. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_si128 +FORCE_INLINE __m128i _mm_castpd_si128(__m128d a) +{ + return vreinterpretq_m128i_s64(vreinterpretq_s64_m128d(a)); +} + +// Cast vector of type __m128 to type __m128d. This intrinsic is only used for +// compilation and does not generate any instructions, thus it has zero latency. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_pd +FORCE_INLINE __m128d _mm_castps_pd(__m128 a) +{ + return vreinterpretq_m128d_s32(vreinterpretq_s32_m128(a)); +} + +// Applies a type cast to reinterpret four 32-bit floating point values passed +// in as a 128-bit parameter as packed 32-bit integers. +// https://msdn.microsoft.com/en-us/library/bb514099.aspx +FORCE_INLINE __m128i _mm_castps_si128(__m128 a) +{ + return vreinterpretq_m128i_s32(vreinterpretq_s32_m128(a)); +} + +// Cast vector of type __m128i to type __m128d. This intrinsic is only used for +// compilation and does not generate any instructions, thus it has zero latency. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_pd +FORCE_INLINE __m128d _mm_castsi128_pd(__m128i a) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vreinterpretq_f64_m128i(a)); +#else + return vreinterpretq_m128d_f32(vreinterpretq_f32_m128i(a)); +#endif +} + +// Applies a type cast to reinterpret four 32-bit integers passed in as a +// 128-bit parameter as packed 32-bit floating point values. +// https://msdn.microsoft.com/en-us/library/bb514029.aspx +FORCE_INLINE __m128 _mm_castsi128_ps(__m128i a) +{ + return vreinterpretq_m128_s32(vreinterpretq_s32_m128i(a)); +} + +// Cache line containing p is flushed and invalidated from all caches in the +// coherency domain. : +// https://msdn.microsoft.com/en-us/library/ba08y07y(v=vs.100).aspx +FORCE_INLINE void _mm_clflush(void const *p) +{ + (void) p; + // no corollary for Neon? +} + +// Compares the 8 signed or unsigned 16-bit integers in a and the 8 signed or +// unsigned 16-bit integers in b for equality. +// https://msdn.microsoft.com/en-us/library/2ay060te(v=vs.100).aspx +FORCE_INLINE __m128i _mm_cmpeq_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u16( + vceqq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// Compare packed 32-bit integers in a and b for equality, and store the results +// in dst +FORCE_INLINE __m128i _mm_cmpeq_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u32( + vceqq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Compares the 16 signed or unsigned 8-bit integers in a and the 16 signed or +// unsigned 8-bit integers in b for equality. +// https://msdn.microsoft.com/en-us/library/windows/desktop/bz5xk21a(v=vs.90).aspx +FORCE_INLINE __m128i _mm_cmpeq_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vceqq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Compare packed double-precision (64-bit) floating-point elements in a and b +// for equality, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_pd +FORCE_INLINE __m128d _mm_cmpeq_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_u64( + vceqq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + // (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) + uint32x4_t cmp = + vceqq_u32(vreinterpretq_u32_m128d(a), vreinterpretq_u32_m128d(b)); + uint32x4_t swapped = vrev64q_u32(cmp); + return vreinterpretq_m128d_u32(vandq_u32(cmp, swapped)); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point elements in a and +// b for equality, store the result in the lower element of dst, and copy the +// upper element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_sd +FORCE_INLINE __m128d _mm_cmpeq_sd(__m128d a, __m128d b) +{ + return _mm_move_sd(a, _mm_cmpeq_pd(a, b)); +} + +// Compare packed double-precision (64-bit) floating-point elements in a and b +// for greater-than-or-equal, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_pd +FORCE_INLINE __m128d _mm_cmpge_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_u64( + vcgeq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t b1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(b)); + uint64_t d[2]; + d[0] = (*(double *) &a0) >= (*(double *) &b0) ? ~UINT64_C(0) : UINT64_C(0); + d[1] = (*(double *) &a1) >= (*(double *) &b1) ? ~UINT64_C(0) : UINT64_C(0); + + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point elements in a and +// b for greater-than-or-equal, store the result in the lower element of dst, +// and copy the upper element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_sd +FORCE_INLINE __m128d _mm_cmpge_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return _mm_move_sd(a, _mm_cmpge_pd(a, b)); +#else + // expand "_mm_cmpge_pd()" to reduce unnecessary operations + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t d[2]; + d[0] = (*(double *) &a0) >= (*(double *) &b0) ? ~UINT64_C(0) : UINT64_C(0); + d[1] = a1; + + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compares the 8 signed 16-bit integers in a and the 8 signed 16-bit integers +// in b for greater than. +// +// r0 := (a0 > b0) ? 0xffff : 0x0 +// r1 := (a1 > b1) ? 0xffff : 0x0 +// ... +// r7 := (a7 > b7) ? 0xffff : 0x0 +// +// https://technet.microsoft.com/en-us/library/xd43yfsa(v=vs.100).aspx +FORCE_INLINE __m128i _mm_cmpgt_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u16( + vcgtq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// Compares the 4 signed 32-bit integers in a and the 4 signed 32-bit integers +// in b for greater than. +// https://msdn.microsoft.com/en-us/library/vstudio/1s9f2z0y(v=vs.100).aspx +FORCE_INLINE __m128i _mm_cmpgt_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u32( + vcgtq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Compares the 16 signed 8-bit integers in a and the 16 signed 8-bit integers +// in b for greater than. +// +// r0 := (a0 > b0) ? 0xff : 0x0 +// r1 := (a1 > b1) ? 0xff : 0x0 +// ... +// r15 := (a15 > b15) ? 0xff : 0x0 +// +// https://msdn.microsoft.com/zh-tw/library/wf45zt2b(v=vs.100).aspx +FORCE_INLINE __m128i _mm_cmpgt_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vcgtq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Compare packed double-precision (64-bit) floating-point elements in a and b +// for greater-than, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_pd +FORCE_INLINE __m128d _mm_cmpgt_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_u64( + vcgtq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t b1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(b)); + uint64_t d[2]; + d[0] = (*(double *) &a0) > (*(double *) &b0) ? ~UINT64_C(0) : UINT64_C(0); + d[1] = (*(double *) &a1) > (*(double *) &b1) ? ~UINT64_C(0) : UINT64_C(0); + + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point elements in a and +// b for greater-than, store the result in the lower element of dst, and copy +// the upper element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_sd +FORCE_INLINE __m128d _mm_cmpgt_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return _mm_move_sd(a, _mm_cmpgt_pd(a, b)); +#else + // expand "_mm_cmpge_pd()" to reduce unnecessary operations + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t d[2]; + d[0] = (*(double *) &a0) > (*(double *) &b0) ? ~UINT64_C(0) : UINT64_C(0); + d[1] = a1; + + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compare packed double-precision (64-bit) floating-point elements in a and b +// for less-than-or-equal, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_pd +FORCE_INLINE __m128d _mm_cmple_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_u64( + vcleq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t b1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(b)); + uint64_t d[2]; + d[0] = (*(double *) &a0) <= (*(double *) &b0) ? ~UINT64_C(0) : UINT64_C(0); + d[1] = (*(double *) &a1) <= (*(double *) &b1) ? ~UINT64_C(0) : UINT64_C(0); + + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point elements in a and +// b for less-than-or-equal, store the result in the lower element of dst, and +// copy the upper element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_sd +FORCE_INLINE __m128d _mm_cmple_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return _mm_move_sd(a, _mm_cmple_pd(a, b)); +#else + // expand "_mm_cmpge_pd()" to reduce unnecessary operations + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t d[2]; + d[0] = (*(double *) &a0) <= (*(double *) &b0) ? ~UINT64_C(0) : UINT64_C(0); + d[1] = a1; + + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compares the 8 signed 16-bit integers in a and the 8 signed 16-bit integers +// in b for less than. +// +// r0 := (a0 < b0) ? 0xffff : 0x0 +// r1 := (a1 < b1) ? 0xffff : 0x0 +// ... +// r7 := (a7 < b7) ? 0xffff : 0x0 +// +// https://technet.microsoft.com/en-us/library/t863edb2(v=vs.100).aspx +FORCE_INLINE __m128i _mm_cmplt_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u16( + vcltq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + + +// Compares the 4 signed 32-bit integers in a and the 4 signed 32-bit integers +// in b for less than. +// https://msdn.microsoft.com/en-us/library/vstudio/4ak0bf5d(v=vs.100).aspx +FORCE_INLINE __m128i _mm_cmplt_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u32( + vcltq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Compares the 16 signed 8-bit integers in a and the 16 signed 8-bit integers +// in b for lesser than. +// https://msdn.microsoft.com/en-us/library/windows/desktop/9s46csht(v=vs.90).aspx +FORCE_INLINE __m128i _mm_cmplt_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vcltq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Compare packed double-precision (64-bit) floating-point elements in a and b +// for less-than, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_pd +FORCE_INLINE __m128d _mm_cmplt_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_u64( + vcltq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t b1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(b)); + uint64_t d[2]; + d[0] = (*(double *) &a0) < (*(double *) &b0) ? ~UINT64_C(0) : UINT64_C(0); + d[1] = (*(double *) &a1) < (*(double *) &b1) ? ~UINT64_C(0) : UINT64_C(0); + + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point elements in a and +// b for less-than, store the result in the lower element of dst, and copy the +// upper element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_sd +FORCE_INLINE __m128d _mm_cmplt_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return _mm_move_sd(a, _mm_cmplt_pd(a, b)); +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t d[2]; + d[0] = (*(double *) &a0) < (*(double *) &b0) ? ~UINT64_C(0) : UINT64_C(0); + d[1] = a1; + + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compare packed double-precision (64-bit) floating-point elements in a and b +// for not-equal, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_pd +FORCE_INLINE __m128d _mm_cmpneq_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_s32(vmvnq_s32(vreinterpretq_s32_u64( + vceqq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))))); +#else + // (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) + uint32x4_t cmp = + vceqq_u32(vreinterpretq_u32_m128d(a), vreinterpretq_u32_m128d(b)); + uint32x4_t swapped = vrev64q_u32(cmp); + return vreinterpretq_m128d_u32(vmvnq_u32(vandq_u32(cmp, swapped))); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point elements in a and +// b for not-equal, store the result in the lower element of dst, and copy the +// upper element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_sd +FORCE_INLINE __m128d _mm_cmpneq_sd(__m128d a, __m128d b) +{ + return _mm_move_sd(a, _mm_cmpneq_pd(a, b)); +} + +// Compare packed double-precision (64-bit) floating-point elements in a and b +// for not-greater-than-or-equal, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_pd +FORCE_INLINE __m128d _mm_cmpnge_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_u64(veorq_u64( + vcgeq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)), + vdupq_n_u64(UINT64_MAX))); +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t b1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(b)); + uint64_t d[2]; + d[0] = + !((*(double *) &a0) >= (*(double *) &b0)) ? ~UINT64_C(0) : UINT64_C(0); + d[1] = + !((*(double *) &a1) >= (*(double *) &b1)) ? ~UINT64_C(0) : UINT64_C(0); + + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point elements in a and +// b for not-greater-than-or-equal, store the result in the lower element of +// dst, and copy the upper element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_sd +FORCE_INLINE __m128d _mm_cmpnge_sd(__m128d a, __m128d b) +{ + return _mm_move_sd(a, _mm_cmpnge_pd(a, b)); +} + +// Compare packed double-precision (64-bit) floating-point elements in a and b +// for not-greater-than, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_cmpngt_pd +FORCE_INLINE __m128d _mm_cmpngt_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_u64(veorq_u64( + vcgtq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)), + vdupq_n_u64(UINT64_MAX))); +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t b1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(b)); + uint64_t d[2]; + d[0] = + !((*(double *) &a0) > (*(double *) &b0)) ? ~UINT64_C(0) : UINT64_C(0); + d[1] = + !((*(double *) &a1) > (*(double *) &b1)) ? ~UINT64_C(0) : UINT64_C(0); + + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point elements in a and +// b for not-greater-than, store the result in the lower element of dst, and +// copy the upper element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_sd +FORCE_INLINE __m128d _mm_cmpngt_sd(__m128d a, __m128d b) +{ + return _mm_move_sd(a, _mm_cmpngt_pd(a, b)); +} + +// Compare packed double-precision (64-bit) floating-point elements in a and b +// for not-less-than-or-equal, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_pd +FORCE_INLINE __m128d _mm_cmpnle_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_u64(veorq_u64( + vcleq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)), + vdupq_n_u64(UINT64_MAX))); +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t b1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(b)); + uint64_t d[2]; + d[0] = + !((*(double *) &a0) <= (*(double *) &b0)) ? ~UINT64_C(0) : UINT64_C(0); + d[1] = + !((*(double *) &a1) <= (*(double *) &b1)) ? ~UINT64_C(0) : UINT64_C(0); + + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point elements in a and +// b for not-less-than-or-equal, store the result in the lower element of dst, +// and copy the upper element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_sd +FORCE_INLINE __m128d _mm_cmpnle_sd(__m128d a, __m128d b) +{ + return _mm_move_sd(a, _mm_cmpnle_pd(a, b)); +} + +// Compare packed double-precision (64-bit) floating-point elements in a and b +// for not-less-than, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_pd +FORCE_INLINE __m128d _mm_cmpnlt_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_u64(veorq_u64( + vcltq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)), + vdupq_n_u64(UINT64_MAX))); +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t b1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(b)); + uint64_t d[2]; + d[0] = + !((*(double *) &a0) < (*(double *) &b0)) ? ~UINT64_C(0) : UINT64_C(0); + d[1] = + !((*(double *) &a1) < (*(double *) &b1)) ? ~UINT64_C(0) : UINT64_C(0); + + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point elements in a and +// b for not-less-than, store the result in the lower element of dst, and copy +// the upper element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_sd +FORCE_INLINE __m128d _mm_cmpnlt_sd(__m128d a, __m128d b) +{ + return _mm_move_sd(a, _mm_cmpnlt_pd(a, b)); +} + +// Compare packed double-precision (64-bit) floating-point elements in a and b +// to see if neither is NaN, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_pd +FORCE_INLINE __m128d _mm_cmpord_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + // Excluding NaNs, any two floating point numbers can be compared. + uint64x2_t not_nan_a = + vceqq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(a)); + uint64x2_t not_nan_b = + vceqq_f64(vreinterpretq_f64_m128d(b), vreinterpretq_f64_m128d(b)); + return vreinterpretq_m128d_u64(vandq_u64(not_nan_a, not_nan_b)); +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t b1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(b)); + uint64_t d[2]; + d[0] = ((*(double *) &a0) == (*(double *) &a0) && + (*(double *) &b0) == (*(double *) &b0)) + ? ~UINT64_C(0) + : UINT64_C(0); + d[1] = ((*(double *) &a1) == (*(double *) &a1) && + (*(double *) &b1) == (*(double *) &b1)) + ? ~UINT64_C(0) + : UINT64_C(0); + + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point elements in a and +// b to see if neither is NaN, store the result in the lower element of dst, and +// copy the upper element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_sd +FORCE_INLINE __m128d _mm_cmpord_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return _mm_move_sd(a, _mm_cmpord_pd(a, b)); +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t d[2]; + d[0] = ((*(double *) &a0) == (*(double *) &a0) && + (*(double *) &b0) == (*(double *) &b0)) + ? ~UINT64_C(0) + : UINT64_C(0); + d[1] = a1; + + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compare packed double-precision (64-bit) floating-point elements in a and b +// to see if either is NaN, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_pd +FORCE_INLINE __m128d _mm_cmpunord_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + // Two NaNs are not equal in comparison operation. + uint64x2_t not_nan_a = + vceqq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(a)); + uint64x2_t not_nan_b = + vceqq_f64(vreinterpretq_f64_m128d(b), vreinterpretq_f64_m128d(b)); + return vreinterpretq_m128d_s32( + vmvnq_s32(vreinterpretq_s32_u64(vandq_u64(not_nan_a, not_nan_b)))); +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t b1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(b)); + uint64_t d[2]; + d[0] = ((*(double *) &a0) == (*(double *) &a0) && + (*(double *) &b0) == (*(double *) &b0)) + ? UINT64_C(0) + : ~UINT64_C(0); + d[1] = ((*(double *) &a1) == (*(double *) &a1) && + (*(double *) &b1) == (*(double *) &b1)) + ? UINT64_C(0) + : ~UINT64_C(0); + + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point elements in a and +// b to see if either is NaN, store the result in the lower element of dst, and +// copy the upper element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_sd +FORCE_INLINE __m128d _mm_cmpunord_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return _mm_move_sd(a, _mm_cmpunord_pd(a, b)); +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t d[2]; + d[0] = ((*(double *) &a0) == (*(double *) &a0) && + (*(double *) &b0) == (*(double *) &b0)) + ? UINT64_C(0) + : ~UINT64_C(0); + d[1] = a1; + + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point element in a and b +// for greater-than-or-equal, and return the boolean result (0 or 1). +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sd +FORCE_INLINE int _mm_comige_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vgetq_lane_u64(vcgeq_f64(a, b), 0) & 0x1; +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + + return (*(double *) &a0 >= *(double *) &b0); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point element in a and b +// for greater-than, and return the boolean result (0 or 1). +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sd +FORCE_INLINE int _mm_comigt_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vgetq_lane_u64(vcgtq_f64(a, b), 0) & 0x1; +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + + return (*(double *) &a0 > *(double *) &b0); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point element in a and b +// for less-than-or-equal, and return the boolean result (0 or 1). +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sd +FORCE_INLINE int _mm_comile_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vgetq_lane_u64(vcleq_f64(a, b), 0) & 0x1; +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + + return (*(double *) &a0 <= *(double *) &b0); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point element in a and b +// for less-than, and return the boolean result (0 or 1). +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sd +FORCE_INLINE int _mm_comilt_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vgetq_lane_u64(vcltq_f64(a, b), 0) & 0x1; +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + + return (*(double *) &a0 < *(double *) &b0); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point element in a and b +// for equality, and return the boolean result (0 or 1). +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sd +FORCE_INLINE int _mm_comieq_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vgetq_lane_u64(vceqq_f64(a, b), 0) & 0x1; +#else + uint32x4_t a_not_nan = + vceqq_u32(vreinterpretq_u32_m128d(a), vreinterpretq_u32_m128d(a)); + uint32x4_t b_not_nan = + vceqq_u32(vreinterpretq_u32_m128d(b), vreinterpretq_u32_m128d(b)); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_eq_b = + vceqq_u32(vreinterpretq_u32_m128d(a), vreinterpretq_u32_m128d(b)); + uint64x2_t and_results = vandq_u64(vreinterpretq_u64_u32(a_and_b_not_nan), + vreinterpretq_u64_u32(a_eq_b)); + return vgetq_lane_u64(and_results, 0) & 0x1; +#endif +} + +// Compare the lower double-precision (64-bit) floating-point element in a and b +// for not-equal, and return the boolean result (0 or 1). +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sd +FORCE_INLINE int _mm_comineq_sd(__m128d a, __m128d b) +{ + return !_mm_comieq_sd(a, b); +} + +// Convert packed signed 32-bit integers in a to packed double-precision +// (64-bit) floating-point elements, and store the results in dst. +// +// FOR j := 0 to 1 +// i := j*32 +// m := j*64 +// dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_pd +FORCE_INLINE __m128d _mm_cvtepi32_pd(__m128i a) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vcvtq_f64_s64(vmovl_s32(vget_low_s32(vreinterpretq_s32_m128i(a))))); +#else + double a0 = (double) vgetq_lane_s32(vreinterpretq_s32_m128i(a), 0); + double a1 = (double) vgetq_lane_s32(vreinterpretq_s32_m128i(a), 1); + return _mm_set_pd(a1, a0); +#endif +} + +// Converts the four signed 32-bit integer values of a to single-precision, +// floating-point values +// https://msdn.microsoft.com/en-us/library/vstudio/36bwxcx5(v=vs.100).aspx +FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a) +{ + return vreinterpretq_m128_f32(vcvtq_f32_s32(vreinterpretq_s32_m128i(a))); +} + +// Convert packed double-precision (64-bit) floating-point elements in a to +// packed 32-bit integers, and store the results in dst. +// +// FOR j := 0 to 1 +// i := 32*j +// k := 64*j +// dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epi32 +FORCE_INLINE __m128i _mm_cvtpd_epi32(__m128d a) +{ + __m128d rnd = _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION); + double d0 = ((double *) &rnd)[0]; + double d1 = ((double *) &rnd)[1]; + return _mm_set_epi32(0, 0, (int32_t) d1, (int32_t) d0); +} + +// Convert packed double-precision (64-bit) floating-point elements in a to +// packed 32-bit integers, and store the results in dst. +// +// FOR j := 0 to 1 +// i := 32*j +// k := 64*j +// dst[i+31:i] := Convert_FP64_To_Int32(a[k+63:k]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_pi32 +FORCE_INLINE __m64 _mm_cvtpd_pi32(__m128d a) +{ + __m128d rnd = _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION); + double d0 = ((double *) &rnd)[0]; + double d1 = ((double *) &rnd)[1]; + int32_t ALIGN_STRUCT(16) data[2] = {(int32_t) d0, (int32_t) d1}; + return vreinterpret_m64_s32(vld1_s32(data)); +} + +// Convert packed double-precision (64-bit) floating-point elements in a to +// packed single-precision (32-bit) floating-point elements, and store the +// results in dst. +// +// FOR j := 0 to 1 +// i := 32*j +// k := 64*j +// dst[i+31:i] := Convert_FP64_To_FP32(a[k+64:k]) +// ENDFOR +// dst[127:64] := 0 +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ps +FORCE_INLINE __m128 _mm_cvtpd_ps(__m128d a) +{ +#if defined(__aarch64__) + float32x2_t tmp = vcvt_f32_f64(vreinterpretq_f64_m128d(a)); + return vreinterpretq_m128_f32(vcombine_f32(tmp, vdup_n_f32(0))); +#else + float a0 = (float) ((double *) &a)[0]; + float a1 = (float) ((double *) &a)[1]; + return _mm_set_ps(0, 0, a1, a0); +#endif +} + +// Convert packed signed 32-bit integers in a to packed double-precision +// (64-bit) floating-point elements, and store the results in dst. +// +// FOR j := 0 to 1 +// i := j*32 +// m := j*64 +// dst[m+63:m] := Convert_Int32_To_FP64(a[i+31:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi32_pd +FORCE_INLINE __m128d _mm_cvtpi32_pd(__m64 a) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vcvtq_f64_s64(vmovl_s32(vreinterpret_s32_m64(a)))); +#else + double a0 = (double) vget_lane_s32(vreinterpret_s32_m64(a), 0); + double a1 = (double) vget_lane_s32(vreinterpret_s32_m64(a), 1); + return _mm_set_pd(a1, a0); +#endif +} + +// Converts the four single-precision, floating-point values of a to signed +// 32-bit integer values. +// +// r0 := (int) a0 +// r1 := (int) a1 +// r2 := (int) a2 +// r3 := (int) a3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/xdc42k5e(v=vs.100).aspx +// *NOTE*. The default rounding mode on SSE is 'round to even', which ARMv7-A +// does not support! It is supported on ARMv8-A however. +FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a) +{ +#if defined(__aarch64__) || defined(__ARM_FEATURE_DIRECTED_ROUNDING) + switch (_MM_GET_ROUNDING_MODE()) { + case _MM_ROUND_NEAREST: + return vreinterpretq_m128i_s32(vcvtnq_s32_f32(a)); + case _MM_ROUND_DOWN: + return vreinterpretq_m128i_s32(vcvtmq_s32_f32(a)); + case _MM_ROUND_UP: + return vreinterpretq_m128i_s32(vcvtpq_s32_f32(a)); + default: // _MM_ROUND_TOWARD_ZERO + return vreinterpretq_m128i_s32(vcvtq_s32_f32(a)); + } +#else + float *f = (float *) &a; + switch (_MM_GET_ROUNDING_MODE()) { + case _MM_ROUND_NEAREST: { + uint32x4_t signmask = vdupq_n_u32(0x80000000); + float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a), + vdupq_n_f32(0.5f)); /* +/- 0.5 */ + int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32( + vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/ + int32x4_t r_trunc = vcvtq_s32_f32( + vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */ + int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32( + vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */ + int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone), + vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */ + float32x4_t delta = vsubq_f32( + vreinterpretq_f32_m128(a), + vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */ + uint32x4_t is_delta_half = + vceqq_f32(delta, half); /* delta == +/- 0.5 */ + return vreinterpretq_m128i_s32( + vbslq_s32(is_delta_half, r_even, r_normal)); + } + case _MM_ROUND_DOWN: + return _mm_set_epi32(floorf(f[3]), floorf(f[2]), floorf(f[1]), + floorf(f[0])); + case _MM_ROUND_UP: + return _mm_set_epi32(ceilf(f[3]), ceilf(f[2]), ceilf(f[1]), + ceilf(f[0])); + default: // _MM_ROUND_TOWARD_ZERO + return _mm_set_epi32((int32_t) f[3], (int32_t) f[2], (int32_t) f[1], + (int32_t) f[0]); + } +#endif +} + +// Convert packed single-precision (32-bit) floating-point elements in a to +// packed double-precision (64-bit) floating-point elements, and store the +// results in dst. +// +// FOR j := 0 to 1 +// i := 64*j +// k := 32*j +// dst[i+63:i] := Convert_FP32_To_FP64(a[k+31:k]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pd +FORCE_INLINE __m128d _mm_cvtps_pd(__m128 a) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vcvt_f64_f32(vget_low_f32(vreinterpretq_f32_m128(a)))); +#else + double a0 = (double) vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); + double a1 = (double) vgetq_lane_f32(vreinterpretq_f32_m128(a), 1); + return _mm_set_pd(a1, a0); +#endif +} + +// Copy the lower double-precision (64-bit) floating-point element of a to dst. +// +// dst[63:0] := a[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_f64 +FORCE_INLINE double _mm_cvtsd_f64(__m128d a) +{ +#if defined(__aarch64__) + return (double) vgetq_lane_f64(vreinterpretq_f64_m128d(a), 0); +#else + return ((double *) &a)[0]; +#endif +} + +// Convert the lower double-precision (64-bit) floating-point element in a to a +// 32-bit integer, and store the result in dst. +// +// dst[31:0] := Convert_FP64_To_Int32(a[63:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si32 +FORCE_INLINE int32_t _mm_cvtsd_si32(__m128d a) +{ +#if defined(__aarch64__) + return (int32_t) vgetq_lane_f64(vrndiq_f64(vreinterpretq_f64_m128d(a)), 0); +#else + __m128d rnd = _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION); + double ret = ((double *) &rnd)[0]; + return (int32_t) ret; +#endif +} + +// Convert the lower double-precision (64-bit) floating-point element in a to a +// 64-bit integer, and store the result in dst. +// +// dst[63:0] := Convert_FP64_To_Int64(a[63:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64 +FORCE_INLINE int64_t _mm_cvtsd_si64(__m128d a) +{ +#if defined(__aarch64__) + return (int64_t) vgetq_lane_f64(vrndiq_f64(vreinterpretq_f64_m128d(a)), 0); +#else + __m128d rnd = _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION); + double ret = ((double *) &rnd)[0]; + return (int64_t) ret; +#endif +} + +// Convert the lower double-precision (64-bit) floating-point element in a to a +// 64-bit integer, and store the result in dst. +// +// dst[63:0] := Convert_FP64_To_Int64(a[63:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64x +#define _mm_cvtsd_si64x _mm_cvtsd_si64 + +// Convert the lower double-precision (64-bit) floating-point element in b to a +// single-precision (32-bit) floating-point element, store the result in the +// lower element of dst, and copy the upper 3 packed elements from a to the +// upper elements of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_ss +FORCE_INLINE __m128 _mm_cvtsd_ss(__m128 a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128_f32(vsetq_lane_f32( + vget_lane_f32(vcvt_f32_f64(vreinterpretq_f64_m128d(b)), 0), + vreinterpretq_f32_m128(a), 0)); +#else + return vreinterpretq_m128_f32(vsetq_lane_f32((float) ((double *) &b)[0], + vreinterpretq_f32_m128(a), 0)); +#endif +} + +// Copy the lower 32-bit integer in a to dst. +// +// dst[31:0] := a[31:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si32 +FORCE_INLINE int _mm_cvtsi128_si32(__m128i a) +{ + return vgetq_lane_s32(vreinterpretq_s32_m128i(a), 0); +} + +// Copy the lower 64-bit integer in a to dst. +// +// dst[63:0] := a[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64 +FORCE_INLINE int64_t _mm_cvtsi128_si64(__m128i a) +{ + return vgetq_lane_s64(vreinterpretq_s64_m128i(a), 0); +} + +// Copy the lower 64-bit integer in a to dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64x +#define _mm_cvtsi128_si64x(a) _mm_cvtsi128_si64(a) + +// Convert the signed 32-bit integer b to a double-precision (64-bit) +// floating-point element, store the result in the lower element of dst, and +// copy the upper element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_sd +FORCE_INLINE __m128d _mm_cvtsi32_sd(__m128d a, int32_t b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vsetq_lane_f64((double) b, vreinterpretq_f64_m128d(a), 0)); +#else + double bf = (double) b; + return vreinterpretq_m128d_s64( + vsetq_lane_s64(*(int64_t *) &bf, vreinterpretq_s64_m128d(a), 0)); +#endif +} + +// Copy the lower 64-bit integer in a to dst. +// +// dst[63:0] := a[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64x +#define _mm_cvtsi128_si64x(a) _mm_cvtsi128_si64(a) + +// Moves 32-bit integer a to the least significant 32 bits of an __m128 object, +// zero extending the upper bits. +// +// r0 := a +// r1 := 0x0 +// r2 := 0x0 +// r3 := 0x0 +// +// https://msdn.microsoft.com/en-us/library/ct3539ha%28v=vs.90%29.aspx +FORCE_INLINE __m128i _mm_cvtsi32_si128(int a) +{ + return vreinterpretq_m128i_s32(vsetq_lane_s32(a, vdupq_n_s32(0), 0)); +} + +// Convert the signed 64-bit integer b to a double-precision (64-bit) +// floating-point element, store the result in the lower element of dst, and +// copy the upper element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_sd +FORCE_INLINE __m128d _mm_cvtsi64_sd(__m128d a, int64_t b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vsetq_lane_f64((double) b, vreinterpretq_f64_m128d(a), 0)); +#else + double bf = (double) b; + return vreinterpretq_m128d_s64( + vsetq_lane_s64(*(int64_t *) &bf, vreinterpretq_s64_m128d(a), 0)); +#endif +} + +// Moves 64-bit integer a to the least significant 64 bits of an __m128 object, +// zero extending the upper bits. +// +// r0 := a +// r1 := 0x0 +FORCE_INLINE __m128i _mm_cvtsi64_si128(int64_t a) +{ + return vreinterpretq_m128i_s64(vsetq_lane_s64(a, vdupq_n_s64(0), 0)); +} + +// Copy 64-bit integer a to the lower element of dst, and zero the upper +// element. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_si128 +#define _mm_cvtsi64x_si128(a) _mm_cvtsi64_si128(a) + +// Convert the signed 64-bit integer b to a double-precision (64-bit) +// floating-point element, store the result in the lower element of dst, and +// copy the upper element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_sd +#define _mm_cvtsi64x_sd(a, b) _mm_cvtsi64_sd(a, b) + +// Convert the lower single-precision (32-bit) floating-point element in b to a +// double-precision (64-bit) floating-point element, store the result in the +// lower element of dst, and copy the upper element from a to the upper element +// of dst. +// +// dst[63:0] := Convert_FP32_To_FP64(b[31:0]) +// dst[127:64] := a[127:64] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sd +FORCE_INLINE __m128d _mm_cvtss_sd(__m128d a, __m128 b) +{ + double d = (double) vgetq_lane_f32(vreinterpretq_f32_m128(b), 0); +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vsetq_lane_f64(d, vreinterpretq_f64_m128d(a), 0)); +#else + return vreinterpretq_m128d_s64( + vsetq_lane_s64(*(int64_t *) &d, vreinterpretq_s64_m128d(a), 0)); +#endif +} + +// Convert packed double-precision (64-bit) floating-point elements in a to +// packed 32-bit integers with truncation, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_epi32 +FORCE_INLINE __m128i _mm_cvttpd_epi32(__m128d a) +{ + double a0 = ((double *) &a)[0]; + double a1 = ((double *) &a)[1]; + return _mm_set_epi32(0, 0, (int32_t) a1, (int32_t) a0); +} + +// Convert packed double-precision (64-bit) floating-point elements in a to +// packed 32-bit integers with truncation, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_pi32 +FORCE_INLINE __m64 _mm_cvttpd_pi32(__m128d a) +{ + double a0 = ((double *) &a)[0]; + double a1 = ((double *) &a)[1]; + int32_t ALIGN_STRUCT(16) data[2] = {(int32_t) a0, (int32_t) a1}; + return vreinterpret_m64_s32(vld1_s32(data)); +} + +// Converts the four single-precision, floating-point values of a to signed +// 32-bit integer values using truncate. +// https://msdn.microsoft.com/en-us/library/vstudio/1h005y6x(v=vs.100).aspx +FORCE_INLINE __m128i _mm_cvttps_epi32(__m128 a) +{ + return vreinterpretq_m128i_s32(vcvtq_s32_f32(vreinterpretq_f32_m128(a))); +} + +// Convert the lower double-precision (64-bit) floating-point element in a to a +// 32-bit integer with truncation, and store the result in dst. +// +// dst[63:0] := Convert_FP64_To_Int32_Truncate(a[63:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si32 +FORCE_INLINE int32_t _mm_cvttsd_si32(__m128d a) +{ + double ret = *((double *) &a); + return (int32_t) ret; +} + +// Convert the lower double-precision (64-bit) floating-point element in a to a +// 64-bit integer with truncation, and store the result in dst. +// +// dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64 +FORCE_INLINE int64_t _mm_cvttsd_si64(__m128d a) +{ +#if defined(__aarch64__) + return vgetq_lane_s64(vcvtq_s64_f64(vreinterpretq_f64_m128d(a)), 0); +#else + double ret = *((double *) &a); + return (int64_t) ret; +#endif +} + +// Convert the lower double-precision (64-bit) floating-point element in a to a +// 64-bit integer with truncation, and store the result in dst. +// +// dst[63:0] := Convert_FP64_To_Int64_Truncate(a[63:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64x +#define _mm_cvttsd_si64x(a) _mm_cvttsd_si64(a) + +// Divide packed double-precision (64-bit) floating-point elements in a by +// packed elements in b, and store the results in dst. +// +// FOR j := 0 to 1 +// i := 64*j +// dst[i+63:i] := a[i+63:i] / b[i+63:i] +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_pd +FORCE_INLINE __m128d _mm_div_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vdivq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + double *da = (double *) &a; + double *db = (double *) &b; + double c[2]; + c[0] = da[0] / db[0]; + c[1] = da[1] / db[1]; + return vld1q_f32((float32_t *) c); +#endif +} + +// Divide the lower double-precision (64-bit) floating-point element in a by the +// lower double-precision (64-bit) floating-point element in b, store the result +// in the lower element of dst, and copy the upper element from a to the upper +// element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sd +FORCE_INLINE __m128d _mm_div_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + float64x2_t tmp = + vdivq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b)); + return vreinterpretq_m128d_f64( + vsetq_lane_f64(vgetq_lane_f64(vreinterpretq_f64_m128d(a), 1), tmp, 1)); +#else + return _mm_move_sd(a, _mm_div_pd(a, b)); +#endif +} + +// Extracts the selected signed or unsigned 16-bit integer from a and zero +// extends. +// https://msdn.microsoft.com/en-us/library/6dceta0c(v=vs.100).aspx +// FORCE_INLINE int _mm_extract_epi16(__m128i a, __constrange(0,8) int imm) +#define _mm_extract_epi16(a, imm) \ + vgetq_lane_u16(vreinterpretq_u16_m128i(a), (imm)) + +// Inserts the least significant 16 bits of b into the selected 16-bit integer +// of a. +// https://msdn.microsoft.com/en-us/library/kaze8hz1%28v=vs.100%29.aspx +// FORCE_INLINE __m128i _mm_insert_epi16(__m128i a, int b, +// __constrange(0,8) int imm) +#define _mm_insert_epi16(a, b, imm) \ + __extension__({ \ + vreinterpretq_m128i_s16( \ + vsetq_lane_s16((b), vreinterpretq_s16_m128i(a), (imm))); \ + }) + +// Loads two double-precision from 16-byte aligned memory, floating-point +// values. +// +// dst[127:0] := MEM[mem_addr+127:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd +FORCE_INLINE __m128d _mm_load_pd(const double *p) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vld1q_f64(p)); +#else + const float *fp = (const float *) p; + float ALIGN_STRUCT(16) data[4] = {fp[0], fp[1], fp[2], fp[3]}; + return vreinterpretq_m128d_f32(vld1q_f32(data)); +#endif +} + +// Load a double-precision (64-bit) floating-point element from memory into both +// elements of dst. +// +// dst[63:0] := MEM[mem_addr+63:mem_addr] +// dst[127:64] := MEM[mem_addr+63:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1 +#define _mm_load_pd1 _mm_load1_pd + +// Load a double-precision (64-bit) floating-point element from memory into the +// lower of dst, and zero the upper element. mem_addr does not need to be +// aligned on any particular boundary. +// +// dst[63:0] := MEM[mem_addr+63:mem_addr] +// dst[127:64] := 0 +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sd +FORCE_INLINE __m128d _mm_load_sd(const double *p) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vsetq_lane_f64(*p, vdupq_n_f64(0), 0)); +#else + const float *fp = (const float *) p; + float ALIGN_STRUCT(16) data[4] = {fp[0], fp[1], 0, 0}; + return vreinterpretq_m128d_f32(vld1q_f32(data)); +#endif +} + +// Loads 128-bit value. : +// https://msdn.microsoft.com/en-us/library/atzzad1h(v=vs.80).aspx +FORCE_INLINE __m128i _mm_load_si128(const __m128i *p) +{ + return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *) p)); +} + +// Load a double-precision (64-bit) floating-point element from memory into both +// elements of dst. +// +// dst[63:0] := MEM[mem_addr+63:mem_addr] +// dst[127:64] := MEM[mem_addr+63:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_pd +FORCE_INLINE __m128d _mm_load1_pd(const double *p) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vld1q_dup_f64(p)); +#else + return vreinterpretq_m128d_s64(vdupq_n_s64(*(const int64_t *) p)); +#endif +} + +// Load a double-precision (64-bit) floating-point element from memory into the +// upper element of dst, and copy the lower element from a to dst. mem_addr does +// not need to be aligned on any particular boundary. +// +// dst[63:0] := a[63:0] +// dst[127:64] := MEM[mem_addr+63:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pd +FORCE_INLINE __m128d _mm_loadh_pd(__m128d a, const double *p) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vcombine_f64(vget_low_f64(vreinterpretq_f64_m128d(a)), vld1_f64(p))); +#else + return vreinterpretq_m128d_f32(vcombine_f32( + vget_low_f32(vreinterpretq_f32_m128d(a)), vld1_f32((const float *) p))); +#endif +} + +// Load 64-bit integer from memory into the first element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_epi64 +FORCE_INLINE __m128i _mm_loadl_epi64(__m128i const *p) +{ + /* Load the lower 64 bits of the value pointed to by p into the + * lower 64 bits of the result, zeroing the upper 64 bits of the result. + */ + return vreinterpretq_m128i_s32( + vcombine_s32(vld1_s32((int32_t const *) p), vcreate_s32(0))); +} + +// Load a double-precision (64-bit) floating-point element from memory into the +// lower element of dst, and copy the upper element from a to dst. mem_addr does +// not need to be aligned on any particular boundary. +// +// dst[63:0] := MEM[mem_addr+63:mem_addr] +// dst[127:64] := a[127:64] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pd +FORCE_INLINE __m128d _mm_loadl_pd(__m128d a, const double *p) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vcombine_f64(vld1_f64(p), vget_high_f64(vreinterpretq_f64_m128d(a)))); +#else + return vreinterpretq_m128d_f32( + vcombine_f32(vld1_f32((const float *) p), + vget_high_f32(vreinterpretq_f32_m128d(a)))); +#endif +} + +// Load 2 double-precision (64-bit) floating-point elements from memory into dst +// in reverse order. mem_addr must be aligned on a 16-byte boundary or a +// general-protection exception may be generated. +// +// dst[63:0] := MEM[mem_addr+127:mem_addr+64] +// dst[127:64] := MEM[mem_addr+63:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_pd +FORCE_INLINE __m128d _mm_loadr_pd(const double *p) +{ +#if defined(__aarch64__) + float64x2_t v = vld1q_f64(p); + return vreinterpretq_m128d_f64(vextq_f64(v, v, 1)); +#else + int64x2_t v = vld1q_s64((const int64_t *) p); + return vreinterpretq_m128d_s64(vextq_s64(v, v, 1)); +#endif +} + +// Loads two double-precision from unaligned memory, floating-point values. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_pd +FORCE_INLINE __m128d _mm_loadu_pd(const double *p) +{ + return _mm_load_pd(p); +} + +// Loads 128-bit value. : +// https://msdn.microsoft.com/zh-cn/library/f4k12ae8(v=vs.90).aspx +FORCE_INLINE __m128i _mm_loadu_si128(const __m128i *p) +{ + return vreinterpretq_m128i_s32(vld1q_s32((const int32_t *) p)); +} + +// Load unaligned 32-bit integer from memory into the first element of dst. +// +// dst[31:0] := MEM[mem_addr+31:mem_addr] +// dst[MAX:32] := 0 +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si32 +FORCE_INLINE __m128i _mm_loadu_si32(const void *p) +{ + return vreinterpretq_m128i_s32( + vsetq_lane_s32(*(const int32_t *) p, vdupq_n_s32(0), 0)); +} + +// Multiplies the 8 signed 16-bit integers from a by the 8 signed 16-bit +// integers from b. +// +// r0 := (a0 * b0) + (a1 * b1) +// r1 := (a2 * b2) + (a3 * b3) +// r2 := (a4 * b4) + (a5 * b5) +// r3 := (a6 * b6) + (a7 * b7) +// https://msdn.microsoft.com/en-us/library/yht36sa6(v=vs.90).aspx +FORCE_INLINE __m128i _mm_madd_epi16(__m128i a, __m128i b) +{ + int32x4_t low = vmull_s16(vget_low_s16(vreinterpretq_s16_m128i(a)), + vget_low_s16(vreinterpretq_s16_m128i(b))); + int32x4_t high = vmull_s16(vget_high_s16(vreinterpretq_s16_m128i(a)), + vget_high_s16(vreinterpretq_s16_m128i(b))); + + int32x2_t low_sum = vpadd_s32(vget_low_s32(low), vget_high_s32(low)); + int32x2_t high_sum = vpadd_s32(vget_low_s32(high), vget_high_s32(high)); + + return vreinterpretq_m128i_s32(vcombine_s32(low_sum, high_sum)); +} + +// Conditionally store 8-bit integer elements from a into memory using mask +// (elements are not stored when the highest bit is not set in the corresponding +// element) and a non-temporal memory hint. mem_addr does not need to be aligned +// on any particular boundary. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskmoveu_si128 +FORCE_INLINE void _mm_maskmoveu_si128(__m128i a, __m128i mask, char *mem_addr) +{ + int8x16_t shr_mask = vshrq_n_s8(vreinterpretq_s8_m128i(mask), 7); + __m128 b = _mm_load_ps((const float *) mem_addr); + int8x16_t masked = + vbslq_s8(vreinterpretq_u8_s8(shr_mask), vreinterpretq_s8_m128i(a), + vreinterpretq_s8_m128(b)); + vst1q_s8((int8_t *) mem_addr, masked); +} + +// Computes the pairwise maxima of the 8 signed 16-bit integers from a and the 8 +// signed 16-bit integers from b. +// https://msdn.microsoft.com/en-us/LIBRary/3x060h7c(v=vs.100).aspx +FORCE_INLINE __m128i _mm_max_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s16( + vmaxq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// Computes the pairwise maxima of the 16 unsigned 8-bit integers from a and the +// 16 unsigned 8-bit integers from b. +// https://msdn.microsoft.com/en-us/library/st6634za(v=vs.100).aspx +FORCE_INLINE __m128i _mm_max_epu8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vmaxq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b))); +} + +// Compare packed double-precision (64-bit) floating-point elements in a and b, +// and store packed maximum values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pd +FORCE_INLINE __m128d _mm_max_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) +#if SSE2NEON_PRECISE_MINMAX + float64x2_t _a = vreinterpretq_f64_m128d(a); + float64x2_t _b = vreinterpretq_f64_m128d(b); + return vreinterpretq_m128d_f64(vbslq_f64(vcgtq_f64(_a, _b), _a, _b)); +#else + return vreinterpretq_m128d_f64( + vmaxq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#endif +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t b1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(b)); + uint64_t d[2]; + d[0] = (*(double *) &a0) > (*(double *) &b0) ? a0 : b0; + d[1] = (*(double *) &a1) > (*(double *) &b1) ? a1 : b1; + + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point elements in a and +// b, store the maximum value in the lower element of dst, and copy the upper +// element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sd +FORCE_INLINE __m128d _mm_max_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return _mm_move_sd(a, _mm_max_pd(a, b)); +#else + double *da = (double *) &a; + double *db = (double *) &b; + double c[2] = {da[0] > db[0] ? da[0] : db[0], da[1]}; + return vreinterpretq_m128d_f32(vld1q_f32((float32_t *) c)); +#endif +} + +// Computes the pairwise minima of the 8 signed 16-bit integers from a and the 8 +// signed 16-bit integers from b. +// https://msdn.microsoft.com/en-us/library/vstudio/6te997ew(v=vs.100).aspx +FORCE_INLINE __m128i _mm_min_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s16( + vminq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// Computes the pairwise minima of the 16 unsigned 8-bit integers from a and the +// 16 unsigned 8-bit integers from b. +// https://msdn.microsoft.com/ko-kr/library/17k8cf58(v=vs.100).aspxx +FORCE_INLINE __m128i _mm_min_epu8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vminq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b))); +} + +// Compare packed double-precision (64-bit) floating-point elements in a and b, +// and store packed minimum values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pd +FORCE_INLINE __m128d _mm_min_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) +#if SSE2NEON_PRECISE_MINMAX + float64x2_t _a = vreinterpretq_f64_m128d(a); + float64x2_t _b = vreinterpretq_f64_m128d(b); + return vreinterpretq_m128d_f64(vbslq_f64(vcltq_f64(_a, _b), _a, _b)); +#else + return vreinterpretq_m128d_f64( + vminq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#endif +#else + uint64_t a0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(a)); + uint64_t a1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(a)); + uint64_t b0 = (uint64_t) vget_low_u64(vreinterpretq_u64_m128d(b)); + uint64_t b1 = (uint64_t) vget_high_u64(vreinterpretq_u64_m128d(b)); + uint64_t d[2]; + d[0] = (*(double *) &a0) < (*(double *) &b0) ? a0 : b0; + d[1] = (*(double *) &a1) < (*(double *) &b1) ? a1 : b1; + return vreinterpretq_m128d_u64(vld1q_u64(d)); +#endif +} + +// Compare the lower double-precision (64-bit) floating-point elements in a and +// b, store the minimum value in the lower element of dst, and copy the upper +// element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sd +FORCE_INLINE __m128d _mm_min_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return _mm_move_sd(a, _mm_min_pd(a, b)); +#else + double *da = (double *) &a; + double *db = (double *) &b; + double c[2] = {da[0] < db[0] ? da[0] : db[0], da[1]}; + return vreinterpretq_m128d_f32(vld1q_f32((float32_t *) c)); +#endif +} + +// Copy the lower 64-bit integer in a to the lower element of dst, and zero the +// upper element. +// +// dst[63:0] := a[63:0] +// dst[127:64] := 0 +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_epi64 +FORCE_INLINE __m128i _mm_move_epi64(__m128i a) +{ + return vreinterpretq_m128i_s64( + vsetq_lane_s64(0, vreinterpretq_s64_m128i(a), 1)); +} + +// Move the lower double-precision (64-bit) floating-point element from b to the +// lower element of dst, and copy the upper element from a to the upper element +// of dst. +// +// dst[63:0] := b[63:0] +// dst[127:64] := a[127:64] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sd +FORCE_INLINE __m128d _mm_move_sd(__m128d a, __m128d b) +{ + return vreinterpretq_m128d_f32( + vcombine_f32(vget_low_f32(vreinterpretq_f32_m128d(b)), + vget_high_f32(vreinterpretq_f32_m128d(a)))); +} + +// NEON does not provide a version of this function. +// Creates a 16-bit mask from the most significant bits of the 16 signed or +// unsigned 8-bit integers in a and zero extends the upper bits. +// https://msdn.microsoft.com/en-us/library/vstudio/s090c8fk(v=vs.100).aspx +FORCE_INLINE int _mm_movemask_epi8(__m128i a) +{ + // Use increasingly wide shifts+adds to collect the sign bits + // together. + // Since the widening shifts would be rather confusing to follow in little + // endian, everything will be illustrated in big endian order instead. This + // has a different result - the bits would actually be reversed on a big + // endian machine. + + // Starting input (only half the elements are shown): + // 89 ff 1d c0 00 10 99 33 + uint8x16_t input = vreinterpretq_u8_m128i(a); + + // Shift out everything but the sign bits with an unsigned shift right. + // + // Bytes of the vector:: + // 89 ff 1d c0 00 10 99 33 + // \ \ \ \ \ \ \ \ high_bits = (uint16x4_t)(input >> 7) + // | | | | | | | | + // 01 01 00 01 00 00 01 00 + // + // Bits of first important lane(s): + // 10001001 (89) + // \______ + // | + // 00000001 (01) + uint16x8_t high_bits = vreinterpretq_u16_u8(vshrq_n_u8(input, 7)); + + // Merge the even lanes together with a 16-bit unsigned shift right + add. + // 'xx' represents garbage data which will be ignored in the final result. + // In the important bytes, the add functions like a binary OR. + // + // 01 01 00 01 00 00 01 00 + // \_ | \_ | \_ | \_ | paired16 = (uint32x4_t)(input + (input >> 7)) + // \| \| \| \| + // xx 03 xx 01 xx 00 xx 02 + // + // 00000001 00000001 (01 01) + // \_______ | + // \| + // xxxxxxxx xxxxxx11 (xx 03) + uint32x4_t paired16 = + vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 7)); + + // Repeat with a wider 32-bit shift + add. + // xx 03 xx 01 xx 00 xx 02 + // \____ | \____ | paired32 = (uint64x1_t)(paired16 + (paired16 >> + // 14)) + // \| \| + // xx xx xx 0d xx xx xx 02 + // + // 00000011 00000001 (03 01) + // \\_____ || + // '----.\|| + // xxxxxxxx xxxx1101 (xx 0d) + uint64x2_t paired32 = + vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14)); + + // Last, an even wider 64-bit shift + add to get our result in the low 8 bit + // lanes. xx xx xx 0d xx xx xx 02 + // \_________ | paired64 = (uint8x8_t)(paired32 + (paired32 >> + // 28)) + // \| + // xx xx xx xx xx xx xx d2 + // + // 00001101 00000010 (0d 02) + // \ \___ | | + // '---. \| | + // xxxxxxxx 11010010 (xx d2) + uint8x16_t paired64 = + vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28)); + + // Extract the low 8 bits from each 64-bit lane with 2 8-bit extracts. + // xx xx xx xx xx xx xx d2 + // || return paired64[0] + // d2 + // Note: Little endian would return the correct value 4b (01001011) instead. + return vgetq_lane_u8(paired64, 0) | ((int) vgetq_lane_u8(paired64, 8) << 8); +} + +// Set each bit of mask dst based on the most significant bit of the +// corresponding packed double-precision (64-bit) floating-point element in a. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_pd +FORCE_INLINE int _mm_movemask_pd(__m128d a) +{ + uint64x2_t input = vreinterpretq_u64_m128d(a); + uint64x2_t high_bits = vshrq_n_u64(input, 63); + return vgetq_lane_u64(high_bits, 0) | (vgetq_lane_u64(high_bits, 1) << 1); +} + +// Copy the lower 64-bit integer in a to dst. +// +// dst[63:0] := a[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi64_pi64 +FORCE_INLINE __m64 _mm_movepi64_pi64(__m128i a) +{ + return vreinterpret_m64_s64(vget_low_s64(vreinterpretq_s64_m128i(a))); +} + +// Copy the 64-bit integer a to the lower element of dst, and zero the upper +// element. +// +// dst[63:0] := a[63:0] +// dst[127:64] := 0 +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movpi64_epi64 +FORCE_INLINE __m128i _mm_movpi64_epi64(__m64 a) +{ + return vreinterpretq_m128i_s64( + vcombine_s64(vreinterpret_s64_m64(a), vdup_n_s64(0))); +} + +// Multiply the low unsigned 32-bit integers from each packed 64-bit element in +// a and b, and store the unsigned 64-bit results in dst. +// +// r0 := (a0 & 0xFFFFFFFF) * (b0 & 0xFFFFFFFF) +// r1 := (a2 & 0xFFFFFFFF) * (b2 & 0xFFFFFFFF) +FORCE_INLINE __m128i _mm_mul_epu32(__m128i a, __m128i b) +{ + // vmull_u32 upcasts instead of masking, so we downcast. + uint32x2_t a_lo = vmovn_u64(vreinterpretq_u64_m128i(a)); + uint32x2_t b_lo = vmovn_u64(vreinterpretq_u64_m128i(b)); + return vreinterpretq_m128i_u64(vmull_u32(a_lo, b_lo)); +} + +// Multiply packed double-precision (64-bit) floating-point elements in a and b, +// and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pd +FORCE_INLINE __m128d _mm_mul_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vmulq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + double *da = (double *) &a; + double *db = (double *) &b; + double c[2]; + c[0] = da[0] * db[0]; + c[1] = da[1] * db[1]; + return vld1q_f32((float32_t *) c); +#endif +} + +// Multiply the lower double-precision (64-bit) floating-point element in a and +// b, store the result in the lower element of dst, and copy the upper element +// from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mul_sd +FORCE_INLINE __m128d _mm_mul_sd(__m128d a, __m128d b) +{ + return _mm_move_sd(a, _mm_mul_pd(a, b)); +} + +// Multiply the low unsigned 32-bit integers from a and b, and store the +// unsigned 64-bit result in dst. +// +// dst[63:0] := a[31:0] * b[31:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_su32 +FORCE_INLINE __m64 _mm_mul_su32(__m64 a, __m64 b) +{ + return vreinterpret_m64_u64(vget_low_u64( + vmull_u32(vreinterpret_u32_m64(a), vreinterpret_u32_m64(b)))); +} + +// Multiplies the 8 signed 16-bit integers from a by the 8 signed 16-bit +// integers from b. +// +// r0 := (a0 * b0)[31:16] +// r1 := (a1 * b1)[31:16] +// ... +// r7 := (a7 * b7)[31:16] +// +// https://msdn.microsoft.com/en-us/library/vstudio/59hddw1d(v=vs.100).aspx +FORCE_INLINE __m128i _mm_mulhi_epi16(__m128i a, __m128i b) +{ + /* FIXME: issue with large values because of result saturation */ + // int16x8_t ret = vqdmulhq_s16(vreinterpretq_s16_m128i(a), + // vreinterpretq_s16_m128i(b)); /* =2*a*b */ return + // vreinterpretq_m128i_s16(vshrq_n_s16(ret, 1)); + int16x4_t a3210 = vget_low_s16(vreinterpretq_s16_m128i(a)); + int16x4_t b3210 = vget_low_s16(vreinterpretq_s16_m128i(b)); + int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ + int16x4_t a7654 = vget_high_s16(vreinterpretq_s16_m128i(a)); + int16x4_t b7654 = vget_high_s16(vreinterpretq_s16_m128i(b)); + int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t r = + vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); + return vreinterpretq_m128i_u16(r.val[1]); +} + +// Multiply the packed unsigned 16-bit integers in a and b, producing +// intermediate 32-bit integers, and store the high 16 bits of the intermediate +// integers in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epu16 +FORCE_INLINE __m128i _mm_mulhi_epu16(__m128i a, __m128i b) +{ + uint16x4_t a3210 = vget_low_u16(vreinterpretq_u16_m128i(a)); + uint16x4_t b3210 = vget_low_u16(vreinterpretq_u16_m128i(b)); + uint32x4_t ab3210 = vmull_u16(a3210, b3210); +#if defined(__aarch64__) + uint32x4_t ab7654 = + vmull_high_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b)); + uint16x8_t r = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), + vreinterpretq_u16_u32(ab7654)); + return vreinterpretq_m128i_u16(r); +#else + uint16x4_t a7654 = vget_high_u16(vreinterpretq_u16_m128i(a)); + uint16x4_t b7654 = vget_high_u16(vreinterpretq_u16_m128i(b)); + uint32x4_t ab7654 = vmull_u16(a7654, b7654); + uint16x8x2_t r = + vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + return vreinterpretq_m128i_u16(r.val[1]); +#endif +} + +// Multiplies the 8 signed or unsigned 16-bit integers from a by the 8 signed or +// unsigned 16-bit integers from b. +// +// r0 := (a0 * b0)[15:0] +// r1 := (a1 * b1)[15:0] +// ... +// r7 := (a7 * b7)[15:0] +// +// https://msdn.microsoft.com/en-us/library/vstudio/9ks1472s(v=vs.100).aspx +FORCE_INLINE __m128i _mm_mullo_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s16( + vmulq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// Compute the bitwise OR of packed double-precision (64-bit) floating-point +// elements in a and b, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_or_pd +FORCE_INLINE __m128d _mm_or_pd(__m128d a, __m128d b) +{ + return vreinterpretq_m128d_s64( + vorrq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b))); +} + +// Computes the bitwise OR of the 128-bit value in a and the 128-bit value in b. +// +// r := a | b +// +// https://msdn.microsoft.com/en-us/library/vstudio/ew8ty0db(v=vs.100).aspx +FORCE_INLINE __m128i _mm_or_si128(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + vorrq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Packs the 16 signed 16-bit integers from a and b into 8-bit integers and +// saturates. +// https://msdn.microsoft.com/en-us/library/k4y4f7w5%28v=vs.90%29.aspx +FORCE_INLINE __m128i _mm_packs_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s8( + vcombine_s8(vqmovn_s16(vreinterpretq_s16_m128i(a)), + vqmovn_s16(vreinterpretq_s16_m128i(b)))); +} + +// Packs the 8 signed 32-bit integers from a and b into signed 16-bit integers +// and saturates. +// +// r0 := SignedSaturate(a0) +// r1 := SignedSaturate(a1) +// r2 := SignedSaturate(a2) +// r3 := SignedSaturate(a3) +// r4 := SignedSaturate(b0) +// r5 := SignedSaturate(b1) +// r6 := SignedSaturate(b2) +// r7 := SignedSaturate(b3) +// +// https://msdn.microsoft.com/en-us/library/393t56f9%28v=vs.90%29.aspx +FORCE_INLINE __m128i _mm_packs_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s16( + vcombine_s16(vqmovn_s32(vreinterpretq_s32_m128i(a)), + vqmovn_s32(vreinterpretq_s32_m128i(b)))); +} + +// Packs the 16 signed 16 - bit integers from a and b into 8 - bit unsigned +// integers and saturates. +// +// r0 := UnsignedSaturate(a0) +// r1 := UnsignedSaturate(a1) +// ... +// r7 := UnsignedSaturate(a7) +// r8 := UnsignedSaturate(b0) +// r9 := UnsignedSaturate(b1) +// ... +// r15 := UnsignedSaturate(b7) +// +// https://msdn.microsoft.com/en-us/library/07ad1wx4(v=vs.100).aspx +FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b) +{ + return vreinterpretq_m128i_u8( + vcombine_u8(vqmovun_s16(vreinterpretq_s16_m128i(a)), + vqmovun_s16(vreinterpretq_s16_m128i(b)))); +} + +// Pause the processor. This is typically used in spin-wait loops and depending +// on the x86 processor typical values are in the 40-100 cycle range. The +// 'yield' instruction isn't a good fit because it's effectively a nop on most +// Arm cores. Experience with several databases has shown has shown an 'isb' is +// a reasonable approximation. +FORCE_INLINE void _mm_pause() +{ + __asm__ __volatile__("isb\n"); +} + +// Compute the absolute differences of packed unsigned 8-bit integers in a and +// b, then horizontally sum each consecutive 8 differences to produce two +// unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low +// 16 bits of 64-bit elements in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_epu8 +FORCE_INLINE __m128i _mm_sad_epu8(__m128i a, __m128i b) +{ + uint16x8_t t = vpaddlq_u8(vabdq_u8((uint8x16_t) a, (uint8x16_t) b)); + return vreinterpretq_m128i_u64(vpaddlq_u32(vpaddlq_u16(t))); +} + +// Sets the 8 signed 16-bit integer values. +// https://msdn.microsoft.com/en-au/library/3e0fek84(v=vs.90).aspx +FORCE_INLINE __m128i _mm_set_epi16(short i7, + short i6, + short i5, + short i4, + short i3, + short i2, + short i1, + short i0) +{ + int16_t ALIGN_STRUCT(16) data[8] = {i0, i1, i2, i3, i4, i5, i6, i7}; + return vreinterpretq_m128i_s16(vld1q_s16(data)); +} + +// Sets the 4 signed 32-bit integer values. +// https://msdn.microsoft.com/en-us/library/vstudio/019beekt(v=vs.100).aspx +FORCE_INLINE __m128i _mm_set_epi32(int i3, int i2, int i1, int i0) +{ + int32_t ALIGN_STRUCT(16) data[4] = {i0, i1, i2, i3}; + return vreinterpretq_m128i_s32(vld1q_s32(data)); +} + +// Returns the __m128i structure with its two 64-bit integer values +// initialized to the values of the two 64-bit integers passed in. +// https://msdn.microsoft.com/en-us/library/dk2sdw0h(v=vs.120).aspx +FORCE_INLINE __m128i _mm_set_epi64(__m64 i1, __m64 i2) +{ + return _mm_set_epi64x((int64_t) i1, (int64_t) i2); +} + +// Returns the __m128i structure with its two 64-bit integer values +// initialized to the values of the two 64-bit integers passed in. +// https://msdn.microsoft.com/en-us/library/dk2sdw0h(v=vs.120).aspx +FORCE_INLINE __m128i _mm_set_epi64x(int64_t i1, int64_t i2) +{ + return vreinterpretq_m128i_s64( + vcombine_s64(vcreate_s64(i2), vcreate_s64(i1))); +} + +// Sets the 16 signed 8-bit integer values. +// https://msdn.microsoft.com/en-us/library/x0cx8zd3(v=vs.90).aspx +FORCE_INLINE __m128i _mm_set_epi8(signed char b15, + signed char b14, + signed char b13, + signed char b12, + signed char b11, + signed char b10, + signed char b9, + signed char b8, + signed char b7, + signed char b6, + signed char b5, + signed char b4, + signed char b3, + signed char b2, + signed char b1, + signed char b0) +{ + int8_t ALIGN_STRUCT(16) + data[16] = {(int8_t) b0, (int8_t) b1, (int8_t) b2, (int8_t) b3, + (int8_t) b4, (int8_t) b5, (int8_t) b6, (int8_t) b7, + (int8_t) b8, (int8_t) b9, (int8_t) b10, (int8_t) b11, + (int8_t) b12, (int8_t) b13, (int8_t) b14, (int8_t) b15}; + return (__m128i) vld1q_s8(data); +} + +// Set packed double-precision (64-bit) floating-point elements in dst with the +// supplied values. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd +FORCE_INLINE __m128d _mm_set_pd(double e1, double e0) +{ + double ALIGN_STRUCT(16) data[2] = {e0, e1}; +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vld1q_f64((float64_t *) data)); +#else + return vreinterpretq_m128d_f32(vld1q_f32((float32_t *) data)); +#endif +} + +// Broadcast double-precision (64-bit) floating-point value a to all elements of +// dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd1 +#define _mm_set_pd1 _mm_set1_pd + +// Copy double-precision (64-bit) floating-point element a to the lower element +// of dst, and zero the upper element. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sd +FORCE_INLINE __m128d _mm_set_sd(double a) +{ + return _mm_set_pd(0, a); +} + +// Sets the 8 signed 16-bit integer values to w. +// +// r0 := w +// r1 := w +// ... +// r7 := w +// +// https://msdn.microsoft.com/en-us/library/k0ya3x0e(v=vs.90).aspx +FORCE_INLINE __m128i _mm_set1_epi16(short w) +{ + return vreinterpretq_m128i_s16(vdupq_n_s16(w)); +} + +// Sets the 4 signed 32-bit integer values to i. +// +// r0 := i +// r1 := i +// r2 := i +// r3 := I +// +// https://msdn.microsoft.com/en-us/library/vstudio/h4xscxat(v=vs.100).aspx +FORCE_INLINE __m128i _mm_set1_epi32(int _i) +{ + return vreinterpretq_m128i_s32(vdupq_n_s32(_i)); +} + +// Sets the 2 signed 64-bit integer values to i. +// https://docs.microsoft.com/en-us/previous-versions/visualstudio/visual-studio-2010/whtfzhzk(v=vs.100) +FORCE_INLINE __m128i _mm_set1_epi64(__m64 _i) +{ + return vreinterpretq_m128i_s64(vdupq_n_s64((int64_t) _i)); +} + +// Sets the 2 signed 64-bit integer values to i. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64x +FORCE_INLINE __m128i _mm_set1_epi64x(int64_t _i) +{ + return vreinterpretq_m128i_s64(vdupq_n_s64(_i)); +} + +// Sets the 16 signed 8-bit integer values to b. +// +// r0 := b +// r1 := b +// ... +// r15 := b +// +// https://msdn.microsoft.com/en-us/library/6e14xhyf(v=vs.100).aspx +FORCE_INLINE __m128i _mm_set1_epi8(signed char w) +{ + return vreinterpretq_m128i_s8(vdupq_n_s8(w)); +} + +// Broadcast double-precision (64-bit) floating-point value a to all elements of +// dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pd +FORCE_INLINE __m128d _mm_set1_pd(double d) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vdupq_n_f64(d)); +#else + return vreinterpretq_m128d_s64(vdupq_n_s64(*(int64_t *) &d)); +#endif +} + +// Sets the 8 signed 16-bit integer values in reverse order. +// +// Return Value +// r0 := w0 +// r1 := w1 +// ... +// r7 := w7 +FORCE_INLINE __m128i _mm_setr_epi16(short w0, + short w1, + short w2, + short w3, + short w4, + short w5, + short w6, + short w7) +{ + int16_t ALIGN_STRUCT(16) data[8] = {w0, w1, w2, w3, w4, w5, w6, w7}; + return vreinterpretq_m128i_s16(vld1q_s16((int16_t *) data)); +} + +// Sets the 4 signed 32-bit integer values in reverse order +// https://technet.microsoft.com/en-us/library/security/27yb3ee5(v=vs.90).aspx +FORCE_INLINE __m128i _mm_setr_epi32(int i3, int i2, int i1, int i0) +{ + int32_t ALIGN_STRUCT(16) data[4] = {i3, i2, i1, i0}; + return vreinterpretq_m128i_s32(vld1q_s32(data)); +} + +// Set packed 64-bit integers in dst with the supplied values in reverse order. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi64 +FORCE_INLINE __m128i _mm_setr_epi64(__m64 e1, __m64 e0) +{ + return vreinterpretq_m128i_s64(vcombine_s64(e1, e0)); +} + +// Sets the 16 signed 8-bit integer values in reverse order. +// https://msdn.microsoft.com/en-us/library/2khb9c7k(v=vs.90).aspx +FORCE_INLINE __m128i _mm_setr_epi8(signed char b0, + signed char b1, + signed char b2, + signed char b3, + signed char b4, + signed char b5, + signed char b6, + signed char b7, + signed char b8, + signed char b9, + signed char b10, + signed char b11, + signed char b12, + signed char b13, + signed char b14, + signed char b15) +{ + int8_t ALIGN_STRUCT(16) + data[16] = {(int8_t) b0, (int8_t) b1, (int8_t) b2, (int8_t) b3, + (int8_t) b4, (int8_t) b5, (int8_t) b6, (int8_t) b7, + (int8_t) b8, (int8_t) b9, (int8_t) b10, (int8_t) b11, + (int8_t) b12, (int8_t) b13, (int8_t) b14, (int8_t) b15}; + return (__m128i) vld1q_s8(data); +} + +// Set packed double-precision (64-bit) floating-point elements in dst with the +// supplied values in reverse order. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_pd +FORCE_INLINE __m128d _mm_setr_pd(double e1, double e0) +{ + return _mm_set_pd(e0, e1); +} + +// Return vector of type __m128d with all elements set to zero. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_pd +FORCE_INLINE __m128d _mm_setzero_pd(void) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vdupq_n_f64(0)); +#else + return vreinterpretq_m128d_f32(vdupq_n_f32(0)); +#endif +} + +// Sets the 128-bit value to zero +// https://msdn.microsoft.com/en-us/library/vstudio/ys7dw0kh(v=vs.100).aspx +FORCE_INLINE __m128i _mm_setzero_si128(void) +{ + return vreinterpretq_m128i_s32(vdupq_n_s32(0)); +} + +// Shuffles the 4 signed or unsigned 32-bit integers in a as specified by imm. +// https://msdn.microsoft.com/en-us/library/56f67xbk%28v=vs.90%29.aspx +// FORCE_INLINE __m128i _mm_shuffle_epi32(__m128i a, +// __constrange(0,255) int imm) +#if __has_builtin(__builtin_shufflevector) +#define _mm_shuffle_epi32(a, imm) \ + __extension__({ \ + int32x4_t _input = vreinterpretq_s32_m128i(a); \ + int32x4_t _shuf = __builtin_shufflevector( \ + _input, _input, (imm) & (0x3), ((imm) >> 2) & 0x3, \ + ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3); \ + vreinterpretq_m128i_s32(_shuf); \ + }) +#else // generic +#define _mm_shuffle_epi32(a, imm) \ + __extension__({ \ + __m128i ret; \ + switch (imm) { \ + case _MM_SHUFFLE(1, 0, 3, 2): \ + ret = _mm_shuffle_epi_1032((a)); \ + break; \ + case _MM_SHUFFLE(2, 3, 0, 1): \ + ret = _mm_shuffle_epi_2301((a)); \ + break; \ + case _MM_SHUFFLE(0, 3, 2, 1): \ + ret = _mm_shuffle_epi_0321((a)); \ + break; \ + case _MM_SHUFFLE(2, 1, 0, 3): \ + ret = _mm_shuffle_epi_2103((a)); \ + break; \ + case _MM_SHUFFLE(1, 0, 1, 0): \ + ret = _mm_shuffle_epi_1010((a)); \ + break; \ + case _MM_SHUFFLE(1, 0, 0, 1): \ + ret = _mm_shuffle_epi_1001((a)); \ + break; \ + case _MM_SHUFFLE(0, 1, 0, 1): \ + ret = _mm_shuffle_epi_0101((a)); \ + break; \ + case _MM_SHUFFLE(2, 2, 1, 1): \ + ret = _mm_shuffle_epi_2211((a)); \ + break; \ + case _MM_SHUFFLE(0, 1, 2, 2): \ + ret = _mm_shuffle_epi_0122((a)); \ + break; \ + case _MM_SHUFFLE(3, 3, 3, 2): \ + ret = _mm_shuffle_epi_3332((a)); \ + break; \ + case _MM_SHUFFLE(0, 0, 0, 0): \ + ret = _mm_shuffle_epi32_splat((a), 0); \ + break; \ + case _MM_SHUFFLE(1, 1, 1, 1): \ + ret = _mm_shuffle_epi32_splat((a), 1); \ + break; \ + case _MM_SHUFFLE(2, 2, 2, 2): \ + ret = _mm_shuffle_epi32_splat((a), 2); \ + break; \ + case _MM_SHUFFLE(3, 3, 3, 3): \ + ret = _mm_shuffle_epi32_splat((a), 3); \ + break; \ + default: \ + ret = _mm_shuffle_epi32_default((a), (imm)); \ + break; \ + } \ + ret; \ + }) +#endif + +// Shuffle double-precision (64-bit) floating-point elements using the control +// in imm8, and store the results in dst. +// +// dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64] +// dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pd +#if __has_builtin(__builtin_shufflevector) +#define _mm_shuffle_pd(a, b, imm8) \ + vreinterpretq_m128d_s64(__builtin_shufflevector( \ + vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b), imm8 & 0x1, \ + ((imm8 & 0x2) >> 1) + 2)) +#else +#define _mm_shuffle_pd(a, b, imm8) \ + _mm_castsi128_pd(_mm_set_epi64x( \ + vgetq_lane_s64(vreinterpretq_s64_m128d(b), (imm8 & 0x2) >> 1), \ + vgetq_lane_s64(vreinterpretq_s64_m128d(a), imm8 & 0x1))) +#endif + +// FORCE_INLINE __m128i _mm_shufflehi_epi16(__m128i a, +// __constrange(0,255) int imm) +#if __has_builtin(__builtin_shufflevector) +#define _mm_shufflehi_epi16(a, imm) \ + __extension__({ \ + int16x8_t _input = vreinterpretq_s16_m128i(a); \ + int16x8_t _shuf = __builtin_shufflevector( \ + _input, _input, 0, 1, 2, 3, ((imm) & (0x3)) + 4, \ + (((imm) >> 2) & 0x3) + 4, (((imm) >> 4) & 0x3) + 4, \ + (((imm) >> 6) & 0x3) + 4); \ + vreinterpretq_m128i_s16(_shuf); \ + }) +#else // generic +#define _mm_shufflehi_epi16(a, imm) _mm_shufflehi_epi16_function((a), (imm)) +#endif + +// FORCE_INLINE __m128i _mm_shufflelo_epi16(__m128i a, +// __constrange(0,255) int imm) +#if __has_builtin(__builtin_shufflevector) +#define _mm_shufflelo_epi16(a, imm) \ + __extension__({ \ + int16x8_t _input = vreinterpretq_s16_m128i(a); \ + int16x8_t _shuf = __builtin_shufflevector( \ + _input, _input, ((imm) & (0x3)), (((imm) >> 2) & 0x3), \ + (((imm) >> 4) & 0x3), (((imm) >> 6) & 0x3), 4, 5, 6, 7); \ + vreinterpretq_m128i_s16(_shuf); \ + }) +#else // generic +#define _mm_shufflelo_epi16(a, imm) _mm_shufflelo_epi16_function((a), (imm)) +#endif + +// Shift packed 16-bit integers in a left by count while shifting in zeros, and +// store the results in dst. +// +// FOR j := 0 to 7 +// i := j*16 +// IF count[63:0] > 15 +// dst[i+15:i] := 0 +// ELSE +// dst[i+15:i] := ZeroExtend16(a[i+15:i] << count[63:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi16 +FORCE_INLINE __m128i _mm_sll_epi16(__m128i a, __m128i count) +{ + uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); + if (_sse2neon_unlikely(c & ~15)) + return _mm_setzero_si128(); + + int16x8_t vc = vdupq_n_s16((int16_t) c); + return vreinterpretq_m128i_s16(vshlq_s16(vreinterpretq_s16_m128i(a), vc)); +} + +// Shift packed 32-bit integers in a left by count while shifting in zeros, and +// store the results in dst. +// +// FOR j := 0 to 3 +// i := j*32 +// IF count[63:0] > 31 +// dst[i+31:i] := 0 +// ELSE +// dst[i+31:i] := ZeroExtend32(a[i+31:i] << count[63:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi32 +FORCE_INLINE __m128i _mm_sll_epi32(__m128i a, __m128i count) +{ + uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); + if (_sse2neon_unlikely(c & ~31)) + return _mm_setzero_si128(); + + int32x4_t vc = vdupq_n_s32((int32_t) c); + return vreinterpretq_m128i_s32(vshlq_s32(vreinterpretq_s32_m128i(a), vc)); +} + +// Shift packed 64-bit integers in a left by count while shifting in zeros, and +// store the results in dst. +// +// FOR j := 0 to 1 +// i := j*64 +// IF count[63:0] > 63 +// dst[i+63:i] := 0 +// ELSE +// dst[i+63:i] := ZeroExtend64(a[i+63:i] << count[63:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi64 +FORCE_INLINE __m128i _mm_sll_epi64(__m128i a, __m128i count) +{ + uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); + if (_sse2neon_unlikely(c & ~63)) + return _mm_setzero_si128(); + + int64x2_t vc = vdupq_n_s64((int64_t) c); + return vreinterpretq_m128i_s64(vshlq_s64(vreinterpretq_s64_m128i(a), vc)); +} + +// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and +// store the results in dst. +// +// FOR j := 0 to 7 +// i := j*16 +// IF imm8[7:0] > 15 +// dst[i+15:i] := 0 +// ELSE +// dst[i+15:i] := ZeroExtend16(a[i+15:i] << imm8[7:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi16 +FORCE_INLINE __m128i _mm_slli_epi16(__m128i a, int imm) +{ + if (_sse2neon_unlikely(imm & ~15)) + return _mm_setzero_si128(); + return vreinterpretq_m128i_s16( + vshlq_s16(vreinterpretq_s16_m128i(a), vdupq_n_s16(imm))); +} + +// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and +// store the results in dst. +// +// FOR j := 0 to 3 +// i := j*32 +// IF imm8[7:0] > 31 +// dst[i+31:i] := 0 +// ELSE +// dst[i+31:i] := ZeroExtend32(a[i+31:i] << imm8[7:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi32 +FORCE_INLINE __m128i _mm_slli_epi32(__m128i a, int imm) +{ + if (_sse2neon_unlikely(imm & ~31)) + return _mm_setzero_si128(); + return vreinterpretq_m128i_s32( + vshlq_s32(vreinterpretq_s32_m128i(a), vdupq_n_s32(imm))); +} + +// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and +// store the results in dst. +// +// FOR j := 0 to 1 +// i := j*64 +// IF imm8[7:0] > 63 +// dst[i+63:i] := 0 +// ELSE +// dst[i+63:i] := ZeroExtend64(a[i+63:i] << imm8[7:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi64 +FORCE_INLINE __m128i _mm_slli_epi64(__m128i a, int imm) +{ + if (_sse2neon_unlikely(imm & ~63)) + return _mm_setzero_si128(); + return vreinterpretq_m128i_s64( + vshlq_s64(vreinterpretq_s64_m128i(a), vdupq_n_s64(imm))); +} + +// Shift a left by imm8 bytes while shifting in zeros, and store the results in +// dst. +// +// tmp := imm8[7:0] +// IF tmp > 15 +// tmp := 16 +// FI +// dst[127:0] := a[127:0] << (tmp*8) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_si128 +FORCE_INLINE __m128i _mm_slli_si128(__m128i a, int imm) +{ + if (_sse2neon_unlikely(imm & ~15)) + return _mm_setzero_si128(); + uint8x16_t tmp[2] = {vdupq_n_u8(0), vreinterpretq_u8_m128i(a)}; + return vreinterpretq_m128i_u8( + vld1q_u8(((uint8_t const *) tmp) + (16 - imm))); +} + +// Compute the square root of packed double-precision (64-bit) floating-point +// elements in a, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_pd +FORCE_INLINE __m128d _mm_sqrt_pd(__m128d a) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vsqrtq_f64(vreinterpretq_f64_m128d(a))); +#else + double a0 = sqrt(((double *) &a)[0]); + double a1 = sqrt(((double *) &a)[1]); + return _mm_set_pd(a1, a0); +#endif +} + +// Compute the square root of the lower double-precision (64-bit) floating-point +// element in b, store the result in the lower element of dst, and copy the +// upper element from a to the upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sd +FORCE_INLINE __m128d _mm_sqrt_sd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return _mm_move_sd(a, _mm_sqrt_pd(b)); +#else + return _mm_set_pd(((double *) &a)[1], sqrt(((double *) &b)[0])); +#endif +} + +// Shift packed 16-bit integers in a right by count while shifting in sign bits, +// and store the results in dst. +// +// FOR j := 0 to 7 +// i := j*16 +// IF count[63:0] > 15 +// dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) +// ELSE +// dst[i+15:i] := SignExtend16(a[i+15:i] >> count[63:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi16 +FORCE_INLINE __m128i _mm_sra_epi16(__m128i a, __m128i count) +{ + int64_t c = (int64_t) vget_low_s64((int64x2_t) count); + if (_sse2neon_unlikely(c & ~15)) + return _mm_cmplt_epi16(a, _mm_setzero_si128()); + return vreinterpretq_m128i_s16(vshlq_s16((int16x8_t) a, vdupq_n_s16(-c))); +} + +// Shift packed 32-bit integers in a right by count while shifting in sign bits, +// and store the results in dst. +// +// FOR j := 0 to 3 +// i := j*32 +// IF count[63:0] > 31 +// dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) +// ELSE +// dst[i+31:i] := SignExtend32(a[i+31:i] >> count[63:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi32 +FORCE_INLINE __m128i _mm_sra_epi32(__m128i a, __m128i count) +{ + int64_t c = (int64_t) vget_low_s64((int64x2_t) count); + if (_sse2neon_unlikely(c & ~31)) + return _mm_cmplt_epi32(a, _mm_setzero_si128()); + return vreinterpretq_m128i_s32(vshlq_s32((int32x4_t) a, vdupq_n_s32(-c))); +} + +// Shift packed 16-bit integers in a right by imm8 while shifting in sign +// bits, and store the results in dst. +// +// FOR j := 0 to 7 +// i := j*16 +// IF imm8[7:0] > 15 +// dst[i+15:i] := (a[i+15] ? 0xFFFF : 0x0) +// ELSE +// dst[i+15:i] := SignExtend16(a[i+15:i] >> imm8[7:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi16 +FORCE_INLINE __m128i _mm_srai_epi16(__m128i a, int imm) +{ + const int count = (imm & ~15) ? 15 : imm; + return (__m128i) vshlq_s16((int16x8_t) a, vdupq_n_s16(-count)); +} + +// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, +// and store the results in dst. +// +// FOR j := 0 to 3 +// i := j*32 +// IF imm8[7:0] > 31 +// dst[i+31:i] := (a[i+31] ? 0xFFFFFFFF : 0x0) +// ELSE +// dst[i+31:i] := SignExtend32(a[i+31:i] >> imm8[7:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32 +// FORCE_INLINE __m128i _mm_srai_epi32(__m128i a, __constrange(0,255) int imm) +#define _mm_srai_epi32(a, imm) \ + __extension__({ \ + __m128i ret; \ + if (_sse2neon_unlikely((imm) == 0)) { \ + ret = a; \ + } else if (_sse2neon_likely(0 < (imm) && (imm) < 32)) { \ + ret = vreinterpretq_m128i_s32( \ + vshlq_s32(vreinterpretq_s32_m128i(a), vdupq_n_s32(-imm))); \ + } else { \ + ret = vreinterpretq_m128i_s32( \ + vshrq_n_s32(vreinterpretq_s32_m128i(a), 31)); \ + } \ + ret; \ + }) + +// Shift packed 16-bit integers in a right by count while shifting in zeros, and +// store the results in dst. +// +// FOR j := 0 to 7 +// i := j*16 +// IF count[63:0] > 15 +// dst[i+15:i] := 0 +// ELSE +// dst[i+15:i] := ZeroExtend16(a[i+15:i] >> count[63:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi16 +FORCE_INLINE __m128i _mm_srl_epi16(__m128i a, __m128i count) +{ + uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); + if (_sse2neon_unlikely(c & ~15)) + return _mm_setzero_si128(); + + int16x8_t vc = vdupq_n_s16(-(int16_t) c); + return vreinterpretq_m128i_u16(vshlq_u16(vreinterpretq_u16_m128i(a), vc)); +} + +// Shift packed 32-bit integers in a right by count while shifting in zeros, and +// store the results in dst. +// +// FOR j := 0 to 3 +// i := j*32 +// IF count[63:0] > 31 +// dst[i+31:i] := 0 +// ELSE +// dst[i+31:i] := ZeroExtend32(a[i+31:i] >> count[63:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi32 +FORCE_INLINE __m128i _mm_srl_epi32(__m128i a, __m128i count) +{ + uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); + if (_sse2neon_unlikely(c & ~31)) + return _mm_setzero_si128(); + + int32x4_t vc = vdupq_n_s32(-(int32_t) c); + return vreinterpretq_m128i_u32(vshlq_u32(vreinterpretq_u32_m128i(a), vc)); +} + +// Shift packed 64-bit integers in a right by count while shifting in zeros, and +// store the results in dst. +// +// FOR j := 0 to 1 +// i := j*64 +// IF count[63:0] > 63 +// dst[i+63:i] := 0 +// ELSE +// dst[i+63:i] := ZeroExtend64(a[i+63:i] >> count[63:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi64 +FORCE_INLINE __m128i _mm_srl_epi64(__m128i a, __m128i count) +{ + uint64_t c = vreinterpretq_nth_u64_m128i(count, 0); + if (_sse2neon_unlikely(c & ~63)) + return _mm_setzero_si128(); + + int64x2_t vc = vdupq_n_s64(-(int64_t) c); + return vreinterpretq_m128i_u64(vshlq_u64(vreinterpretq_u64_m128i(a), vc)); +} + +// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and +// store the results in dst. +// +// FOR j := 0 to 7 +// i := j*16 +// IF imm8[7:0] > 15 +// dst[i+15:i] := 0 +// ELSE +// dst[i+15:i] := ZeroExtend16(a[i+15:i] >> imm8[7:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16 +#define _mm_srli_epi16(a, imm) \ + __extension__({ \ + __m128i ret; \ + if (_sse2neon_unlikely((imm) & ~15)) { \ + ret = _mm_setzero_si128(); \ + } else { \ + ret = vreinterpretq_m128i_u16( \ + vshlq_u16(vreinterpretq_u16_m128i(a), vdupq_n_s16(-(imm)))); \ + } \ + ret; \ + }) + +// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and +// store the results in dst. +// +// FOR j := 0 to 3 +// i := j*32 +// IF imm8[7:0] > 31 +// dst[i+31:i] := 0 +// ELSE +// dst[i+31:i] := ZeroExtend32(a[i+31:i] >> imm8[7:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi32 +// FORCE_INLINE __m128i _mm_srli_epi32(__m128i a, __constrange(0,255) int imm) +#define _mm_srli_epi32(a, imm) \ + __extension__({ \ + __m128i ret; \ + if (_sse2neon_unlikely((imm) & ~31)) { \ + ret = _mm_setzero_si128(); \ + } else { \ + ret = vreinterpretq_m128i_u32( \ + vshlq_u32(vreinterpretq_u32_m128i(a), vdupq_n_s32(-(imm)))); \ + } \ + ret; \ + }) + +// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and +// store the results in dst. +// +// FOR j := 0 to 1 +// i := j*64 +// IF imm8[7:0] > 63 +// dst[i+63:i] := 0 +// ELSE +// dst[i+63:i] := ZeroExtend64(a[i+63:i] >> imm8[7:0]) +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64 +#define _mm_srli_epi64(a, imm) \ + __extension__({ \ + __m128i ret; \ + if (_sse2neon_unlikely((imm) & ~63)) { \ + ret = _mm_setzero_si128(); \ + } else { \ + ret = vreinterpretq_m128i_u64( \ + vshlq_u64(vreinterpretq_u64_m128i(a), vdupq_n_s64(-(imm)))); \ + } \ + ret; \ + }) + +// Shift a right by imm8 bytes while shifting in zeros, and store the results in +// dst. +// +// tmp := imm8[7:0] +// IF tmp > 15 +// tmp := 16 +// FI +// dst[127:0] := a[127:0] >> (tmp*8) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_si128 +FORCE_INLINE __m128i _mm_srli_si128(__m128i a, int imm) +{ + if (_sse2neon_unlikely(imm & ~15)) + return _mm_setzero_si128(); + uint8x16_t tmp[2] = {vreinterpretq_u8_m128i(a), vdupq_n_u8(0)}; + return vreinterpretq_m128i_u8(vld1q_u8(((uint8_t const *) tmp) + imm)); +} + +// Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point +// elements) from a into memory. mem_addr must be aligned on a 16-byte boundary +// or a general-protection exception may be generated. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd +FORCE_INLINE void _mm_store_pd(double *mem_addr, __m128d a) +{ +#if defined(__aarch64__) + vst1q_f64((float64_t *) mem_addr, vreinterpretq_f64_m128d(a)); +#else + vst1q_f32((float32_t *) mem_addr, vreinterpretq_f32_m128d(a)); +#endif +} + +// Store the lower double-precision (64-bit) floating-point element from a into +// 2 contiguous elements in memory. mem_addr must be aligned on a 16-byte +// boundary or a general-protection exception may be generated. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd1 +FORCE_INLINE void _mm_store_pd1(double *mem_addr, __m128d a) +{ +#if defined(__aarch64__) + float64x1_t a_low = vget_low_f64(vreinterpretq_f64_m128d(a)); + vst1q_f64((float64_t *) mem_addr, + vreinterpretq_f64_m128d(vcombine_f64(a_low, a_low))); +#else + float32x2_t a_low = vget_low_f32(vreinterpretq_f32_m128d(a)); + vst1q_f32((float32_t *) mem_addr, + vreinterpretq_f32_m128d(vcombine_f32(a_low, a_low))); +#endif +} + +// Store the lower double-precision (64-bit) floating-point element from a into +// memory. mem_addr does not need to be aligned on any particular boundary. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_store_sd +FORCE_INLINE void _mm_store_sd(double *mem_addr, __m128d a) +{ +#if defined(__aarch64__) + vst1_f64((float64_t *) mem_addr, vget_low_f64(vreinterpretq_f64_m128d(a))); +#else + vst1_u64((uint64_t *) mem_addr, vget_low_u64(vreinterpretq_u64_m128d(a))); +#endif +} + +// Stores four 32-bit integer values as (as a __m128i value) at the address p. +// https://msdn.microsoft.com/en-us/library/vstudio/edk11s13(v=vs.100).aspx +FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a) +{ + vst1q_s32((int32_t *) p, vreinterpretq_s32_m128i(a)); +} + +// Store the lower double-precision (64-bit) floating-point element from a into +// 2 contiguous elements in memory. mem_addr must be aligned on a 16-byte +// boundary or a general-protection exception may be generated. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=9,526,5601&text=_mm_store1_pd +#define _mm_store1_pd _mm_store_pd1 + +// Store the upper double-precision (64-bit) floating-point element from a into +// memory. +// +// MEM[mem_addr+63:mem_addr] := a[127:64] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeh_pd +FORCE_INLINE void _mm_storeh_pd(double *mem_addr, __m128d a) +{ +#if defined(__aarch64__) + vst1_f64((float64_t *) mem_addr, vget_high_f64(vreinterpretq_f64_m128d(a))); +#else + vst1_f32((float32_t *) mem_addr, vget_high_f32(vreinterpretq_f32_m128d(a))); +#endif +} + +// Reads the lower 64 bits of b and stores them into the lower 64 bits of a. +// https://msdn.microsoft.com/en-us/library/hhwf428f%28v=vs.90%29.aspx +FORCE_INLINE void _mm_storel_epi64(__m128i *a, __m128i b) +{ + vst1_u64((uint64_t *) a, vget_low_u64(vreinterpretq_u64_m128i(b))); +} + +// Store the lower double-precision (64-bit) floating-point element from a into +// memory. +// +// MEM[mem_addr+63:mem_addr] := a[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_pd +FORCE_INLINE void _mm_storel_pd(double *mem_addr, __m128d a) +{ +#if defined(__aarch64__) + vst1_f64((float64_t *) mem_addr, vget_low_f64(vreinterpretq_f64_m128d(a))); +#else + vst1_f32((float32_t *) mem_addr, vget_low_f32(vreinterpretq_f32_m128d(a))); +#endif +} + +// Store 2 double-precision (64-bit) floating-point elements from a into memory +// in reverse order. mem_addr must be aligned on a 16-byte boundary or a +// general-protection exception may be generated. +// +// MEM[mem_addr+63:mem_addr] := a[127:64] +// MEM[mem_addr+127:mem_addr+64] := a[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_pd +FORCE_INLINE void _mm_storer_pd(double *mem_addr, __m128d a) +{ + float32x4_t f = vreinterpretq_f32_m128d(a); + _mm_store_pd(mem_addr, vreinterpretq_m128d_f32(vextq_f32(f, f, 2))); +} + +// Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point +// elements) from a into memory. mem_addr does not need to be aligned on any +// particular boundary. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_pd +FORCE_INLINE void _mm_storeu_pd(double *mem_addr, __m128d a) +{ + _mm_store_pd(mem_addr, a); +} + +// Stores 128-bits of integer data a at the address p. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si128 +FORCE_INLINE void _mm_storeu_si128(__m128i *p, __m128i a) +{ + vst1q_s32((int32_t *) p, vreinterpretq_s32_m128i(a)); +} + +// Stores 32-bits of integer data a at the address p. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si32 +FORCE_INLINE void _mm_storeu_si32(void *p, __m128i a) +{ + vst1q_lane_s32((int32_t *) p, vreinterpretq_s32_m128i(a), 0); +} + +// Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point +// elements) from a into memory using a non-temporal memory hint. mem_addr must +// be aligned on a 16-byte boundary or a general-protection exception may be +// generated. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_pd +FORCE_INLINE void _mm_stream_pd(double *p, __m128d a) +{ +#if __has_builtin(__builtin_nontemporal_store) + __builtin_nontemporal_store(a, (float32x4_t *) p); +#elif defined(__aarch64__) + vst1q_f64(p, vreinterpretq_f64_m128d(a)); +#else + vst1q_s64((int64_t *) p, vreinterpretq_s64_m128d(a)); +#endif +} + +// Stores the data in a to the address p without polluting the caches. If the +// cache line containing address p is already in the cache, the cache will be +// updated. +// https://msdn.microsoft.com/en-us/library/ba08y07y%28v=vs.90%29.aspx +FORCE_INLINE void _mm_stream_si128(__m128i *p, __m128i a) +{ +#if __has_builtin(__builtin_nontemporal_store) + __builtin_nontemporal_store(a, p); +#else + vst1q_s64((int64_t *) p, vreinterpretq_s64_m128i(a)); +#endif +} + +// Store 32-bit integer a into memory using a non-temporal hint to minimize +// cache pollution. If the cache line containing address mem_addr is already in +// the cache, the cache will be updated. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si32 +FORCE_INLINE void _mm_stream_si32(int *p, int a) +{ + vst1q_lane_s32((int32_t *) p, vdupq_n_s32(a), 0); +} + +// Store 64-bit integer a into memory using a non-temporal hint to minimize +// cache pollution. If the cache line containing address mem_addr is already in +// the cache, the cache will be updated. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si64 +FORCE_INLINE void _mm_stream_si64(__int64 *p, __int64 a) +{ + vst1_s64((int64_t *) p, vdup_n_s64((int64_t) a)); +} + +// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and +// store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi16 +FORCE_INLINE __m128i _mm_sub_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s16( + vsubq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// Subtracts the 4 signed or unsigned 32-bit integers of b from the 4 signed or +// unsigned 32-bit integers of a. +// +// r0 := a0 - b0 +// r1 := a1 - b1 +// r2 := a2 - b2 +// r3 := a3 - b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/fhh866h0(v=vs.100).aspx +FORCE_INLINE __m128i _mm_sub_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + vsubq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Subtract 2 packed 64-bit integers in b from 2 packed 64-bit integers in a, +// and store the results in dst. +// r0 := a0 - b0 +// r1 := a1 - b1 +FORCE_INLINE __m128i _mm_sub_epi64(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s64( + vsubq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b))); +} + +// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and +// store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi8 +FORCE_INLINE __m128i _mm_sub_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s8( + vsubq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Subtract packed double-precision (64-bit) floating-point elements in b from +// packed double-precision (64-bit) floating-point elements in a, and store the +// results in dst. +// +// FOR j := 0 to 1 +// i := j*64 +// dst[i+63:i] := a[i+63:i] - b[i+63:i] +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sub_pd +FORCE_INLINE __m128d _mm_sub_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vsubq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + double *da = (double *) &a; + double *db = (double *) &b; + double c[2]; + c[0] = da[0] - db[0]; + c[1] = da[1] - db[1]; + return vld1q_f32((float32_t *) c); +#endif +} + +// Subtract the lower double-precision (64-bit) floating-point element in b from +// the lower double-precision (64-bit) floating-point element in a, store the +// result in the lower element of dst, and copy the upper element from a to the +// upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sd +FORCE_INLINE __m128d _mm_sub_sd(__m128d a, __m128d b) +{ + return _mm_move_sd(a, _mm_sub_pd(a, b)); +} + +// Subtract 64-bit integer b from 64-bit integer a, and store the result in dst. +// +// dst[63:0] := a[63:0] - b[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_si64 +FORCE_INLINE __m64 _mm_sub_si64(__m64 a, __m64 b) +{ + return vreinterpret_m64_s64( + vsub_s64(vreinterpret_s64_m64(a), vreinterpret_s64_m64(b))); +} + +// Subtracts the 8 signed 16-bit integers of b from the 8 signed 16-bit integers +// of a and saturates. +// +// r0 := SignedSaturate(a0 - b0) +// r1 := SignedSaturate(a1 - b1) +// ... +// r7 := SignedSaturate(a7 - b7) +// +// https://technet.microsoft.com/en-us/subscriptions/3247z5b8(v=vs.90) +FORCE_INLINE __m128i _mm_subs_epi16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s16( + vqsubq_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +} + +// Subtracts the 16 signed 8-bit integers of b from the 16 signed 8-bit integers +// of a and saturates. +// +// r0 := SignedSaturate(a0 - b0) +// r1 := SignedSaturate(a1 - b1) +// ... +// r15 := SignedSaturate(a15 - b15) +// +// https://technet.microsoft.com/en-us/subscriptions/by7kzks1(v=vs.90) +FORCE_INLINE __m128i _mm_subs_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s8( + vqsubq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Subtracts the 8 unsigned 16-bit integers of bfrom the 8 unsigned 16-bit +// integers of a and saturates.. +// https://technet.microsoft.com/en-us/subscriptions/index/f44y0s19(v=vs.90).aspx +FORCE_INLINE __m128i _mm_subs_epu16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u16( + vqsubq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b))); +} + +// Subtracts the 16 unsigned 8-bit integers of b from the 16 unsigned 8-bit +// integers of a and saturates. +// +// r0 := UnsignedSaturate(a0 - b0) +// r1 := UnsignedSaturate(a1 - b1) +// ... +// r15 := UnsignedSaturate(a15 - b15) +// +// https://technet.microsoft.com/en-us/subscriptions/yadkxc18(v=vs.90) +FORCE_INLINE __m128i _mm_subs_epu8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vqsubq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b))); +} + +#define _mm_ucomieq_sd _mm_comieq_sd +#define _mm_ucomige_sd _mm_comige_sd +#define _mm_ucomigt_sd _mm_comigt_sd +#define _mm_ucomile_sd _mm_comile_sd +#define _mm_ucomilt_sd _mm_comilt_sd +#define _mm_ucomineq_sd _mm_comineq_sd + +// Return vector of type __m128d with undefined elements. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_pd +FORCE_INLINE __m128d _mm_undefined_pd(void) +{ +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wuninitialized" +#endif + __m128d a; + return a; +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif +} + +// Interleaves the upper 4 signed or unsigned 16-bit integers in a with the +// upper 4 signed or unsigned 16-bit integers in b. +// +// r0 := a4 +// r1 := b4 +// r2 := a5 +// r3 := b5 +// r4 := a6 +// r5 := b6 +// r6 := a7 +// r7 := b7 +// +// https://msdn.microsoft.com/en-us/library/03196cz7(v=vs.100).aspx +FORCE_INLINE __m128i _mm_unpackhi_epi16(__m128i a, __m128i b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128i_s16( + vzip2q_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +#else + int16x4_t a1 = vget_high_s16(vreinterpretq_s16_m128i(a)); + int16x4_t b1 = vget_high_s16(vreinterpretq_s16_m128i(b)); + int16x4x2_t result = vzip_s16(a1, b1); + return vreinterpretq_m128i_s16(vcombine_s16(result.val[0], result.val[1])); +#endif +} + +// Interleaves the upper 2 signed or unsigned 32-bit integers in a with the +// upper 2 signed or unsigned 32-bit integers in b. +// https://msdn.microsoft.com/en-us/library/65sa7cbs(v=vs.100).aspx +FORCE_INLINE __m128i _mm_unpackhi_epi32(__m128i a, __m128i b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128i_s32( + vzip2q_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +#else + int32x2_t a1 = vget_high_s32(vreinterpretq_s32_m128i(a)); + int32x2_t b1 = vget_high_s32(vreinterpretq_s32_m128i(b)); + int32x2x2_t result = vzip_s32(a1, b1); + return vreinterpretq_m128i_s32(vcombine_s32(result.val[0], result.val[1])); +#endif +} + +// Interleaves the upper signed or unsigned 64-bit integer in a with the +// upper signed or unsigned 64-bit integer in b. +// +// r0 := a1 +// r1 := b1 +FORCE_INLINE __m128i _mm_unpackhi_epi64(__m128i a, __m128i b) +{ + int64x1_t a_h = vget_high_s64(vreinterpretq_s64_m128i(a)); + int64x1_t b_h = vget_high_s64(vreinterpretq_s64_m128i(b)); + return vreinterpretq_m128i_s64(vcombine_s64(a_h, b_h)); +} + +// Interleaves the upper 8 signed or unsigned 8-bit integers in a with the upper +// 8 signed or unsigned 8-bit integers in b. +// +// r0 := a8 +// r1 := b8 +// r2 := a9 +// r3 := b9 +// ... +// r14 := a15 +// r15 := b15 +// +// https://msdn.microsoft.com/en-us/library/t5h7783k(v=vs.100).aspx +FORCE_INLINE __m128i _mm_unpackhi_epi8(__m128i a, __m128i b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128i_s8( + vzip2q_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +#else + int8x8_t a1 = + vreinterpret_s8_s16(vget_high_s16(vreinterpretq_s16_m128i(a))); + int8x8_t b1 = + vreinterpret_s8_s16(vget_high_s16(vreinterpretq_s16_m128i(b))); + int8x8x2_t result = vzip_s8(a1, b1); + return vreinterpretq_m128i_s8(vcombine_s8(result.val[0], result.val[1])); +#endif +} + +// Unpack and interleave double-precision (64-bit) floating-point elements from +// the high half of a and b, and store the results in dst. +// +// DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) { +// dst[63:0] := src1[127:64] +// dst[127:64] := src2[127:64] +// RETURN dst[127:0] +// } +// dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_pd +FORCE_INLINE __m128d _mm_unpackhi_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vzip2q_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + return vreinterpretq_m128d_s64( + vcombine_s64(vget_high_s64(vreinterpretq_s64_m128d(a)), + vget_high_s64(vreinterpretq_s64_m128d(b)))); +#endif +} + +// Interleaves the lower 4 signed or unsigned 16-bit integers in a with the +// lower 4 signed or unsigned 16-bit integers in b. +// +// r0 := a0 +// r1 := b0 +// r2 := a1 +// r3 := b1 +// r4 := a2 +// r5 := b2 +// r6 := a3 +// r7 := b3 +// +// https://msdn.microsoft.com/en-us/library/btxb17bw%28v=vs.90%29.aspx +FORCE_INLINE __m128i _mm_unpacklo_epi16(__m128i a, __m128i b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128i_s16( + vzip1q_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b))); +#else + int16x4_t a1 = vget_low_s16(vreinterpretq_s16_m128i(a)); + int16x4_t b1 = vget_low_s16(vreinterpretq_s16_m128i(b)); + int16x4x2_t result = vzip_s16(a1, b1); + return vreinterpretq_m128i_s16(vcombine_s16(result.val[0], result.val[1])); +#endif +} + +// Interleaves the lower 2 signed or unsigned 32 - bit integers in a with the +// lower 2 signed or unsigned 32 - bit integers in b. +// +// r0 := a0 +// r1 := b0 +// r2 := a1 +// r3 := b1 +// +// https://msdn.microsoft.com/en-us/library/x8atst9d(v=vs.100).aspx +FORCE_INLINE __m128i _mm_unpacklo_epi32(__m128i a, __m128i b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128i_s32( + vzip1q_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +#else + int32x2_t a1 = vget_low_s32(vreinterpretq_s32_m128i(a)); + int32x2_t b1 = vget_low_s32(vreinterpretq_s32_m128i(b)); + int32x2x2_t result = vzip_s32(a1, b1); + return vreinterpretq_m128i_s32(vcombine_s32(result.val[0], result.val[1])); +#endif +} + +FORCE_INLINE __m128i _mm_unpacklo_epi64(__m128i a, __m128i b) +{ + int64x1_t a_l = vget_low_s64(vreinterpretq_s64_m128i(a)); + int64x1_t b_l = vget_low_s64(vreinterpretq_s64_m128i(b)); + return vreinterpretq_m128i_s64(vcombine_s64(a_l, b_l)); +} + +// Interleaves the lower 8 signed or unsigned 8-bit integers in a with the lower +// 8 signed or unsigned 8-bit integers in b. +// +// r0 := a0 +// r1 := b0 +// r2 := a1 +// r3 := b1 +// ... +// r14 := a7 +// r15 := b7 +// +// https://msdn.microsoft.com/en-us/library/xf7k860c%28v=vs.90%29.aspx +FORCE_INLINE __m128i _mm_unpacklo_epi8(__m128i a, __m128i b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128i_s8( + vzip1q_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +#else + int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(vreinterpretq_s16_m128i(a))); + int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(vreinterpretq_s16_m128i(b))); + int8x8x2_t result = vzip_s8(a1, b1); + return vreinterpretq_m128i_s8(vcombine_s8(result.val[0], result.val[1])); +#endif +} + +// Unpack and interleave double-precision (64-bit) floating-point elements from +// the low half of a and b, and store the results in dst. +// +// DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) { +// dst[63:0] := src1[63:0] +// dst[127:64] := src2[63:0] +// RETURN dst[127:0] +// } +// dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0]) +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_pd +FORCE_INLINE __m128d _mm_unpacklo_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vzip1q_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + return vreinterpretq_m128d_s64( + vcombine_s64(vget_low_s64(vreinterpretq_s64_m128d(a)), + vget_low_s64(vreinterpretq_s64_m128d(b)))); +#endif +} + +// Compute the bitwise XOR of packed double-precision (64-bit) floating-point +// elements in a and b, and store the results in dst. +// +// FOR j := 0 to 1 +// i := j*64 +// dst[i+63:i] := a[i+63:i] XOR b[i+63:i] +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_pd +FORCE_INLINE __m128d _mm_xor_pd(__m128d a, __m128d b) +{ + return vreinterpretq_m128d_s64( + veorq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b))); +} + +// Computes the bitwise XOR of the 128-bit value in a and the 128-bit value in +// b. https://msdn.microsoft.com/en-us/library/fzt08www(v=vs.100).aspx +FORCE_INLINE __m128i _mm_xor_si128(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + veorq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +/* SSE3 */ + +// Alternatively add and subtract packed double-precision (64-bit) +// floating-point elements in a to/from packed elements in b, and store the +// results in dst. +// +// FOR j := 0 to 1 +// i := j*64 +// IF ((j & 1) == 0) +// dst[i+63:i] := a[i+63:i] - b[i+63:i] +// ELSE +// dst[i+63:i] := a[i+63:i] + b[i+63:i] +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_addsub_pd +FORCE_INLINE __m128d _mm_addsub_pd(__m128d a, __m128d b) +{ + _sse2neon_const __m128d mask = _mm_set_pd(1.0f, -1.0f); +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vfmaq_f64(vreinterpretq_f64_m128d(a), + vreinterpretq_f64_m128d(b), + vreinterpretq_f64_m128d(mask))); +#else + return _mm_add_pd(_mm_mul_pd(b, mask), a); +#endif +} + +// Alternatively add and subtract packed single-precision (32-bit) +// floating-point elements in a to/from packed elements in b, and store the +// results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=addsub_ps +FORCE_INLINE __m128 _mm_addsub_ps(__m128 a, __m128 b) +{ + _sse2neon_const __m128 mask = _mm_setr_ps(-1.0f, 1.0f, -1.0f, 1.0f); +#if defined(__aarch64__) || defined(__ARM_FEATURE_FMA) /* VFPv4+ */ + return vreinterpretq_m128_f32(vfmaq_f32(vreinterpretq_f32_m128(a), + vreinterpretq_f32_m128(mask), + vreinterpretq_f32_m128(b))); +#else + return _mm_add_ps(_mm_mul_ps(b, mask), a); +#endif +} + +// Horizontally add adjacent pairs of double-precision (64-bit) floating-point +// elements in a and b, and pack the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pd +FORCE_INLINE __m128d _mm_hadd_pd(__m128d a, __m128d b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vpaddq_f64(vreinterpretq_f64_m128d(a), vreinterpretq_f64_m128d(b))); +#else + double *da = (double *) &a; + double *db = (double *) &b; + double c[] = {da[0] + da[1], db[0] + db[1]}; + return vreinterpretq_m128d_u64(vld1q_u64((uint64_t *) c)); +#endif +} + +// Computes pairwise add of each argument as single-precision, floating-point +// values a and b. +// https://msdn.microsoft.com/en-us/library/yd9wecaa.aspx +FORCE_INLINE __m128 _mm_hadd_ps(__m128 a, __m128 b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128_f32( + vpaddq_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(b))); +#else + float32x2_t a10 = vget_low_f32(vreinterpretq_f32_m128(a)); + float32x2_t a32 = vget_high_f32(vreinterpretq_f32_m128(a)); + float32x2_t b10 = vget_low_f32(vreinterpretq_f32_m128(b)); + float32x2_t b32 = vget_high_f32(vreinterpretq_f32_m128(b)); + return vreinterpretq_m128_f32( + vcombine_f32(vpadd_f32(a10, a32), vpadd_f32(b10, b32))); +#endif +} + +// Horizontally subtract adjacent pairs of double-precision (64-bit) +// floating-point elements in a and b, and pack the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_pd +FORCE_INLINE __m128d _mm_hsub_pd(__m128d _a, __m128d _b) +{ +#if defined(__aarch64__) + float64x2_t a = vreinterpretq_f64_m128d(_a); + float64x2_t b = vreinterpretq_f64_m128d(_b); + return vreinterpretq_m128d_f64( + vsubq_f64(vuzp1q_f64(a, b), vuzp2q_f64(a, b))); +#else + double *da = (double *) &_a; + double *db = (double *) &_b; + double c[] = {da[0] - da[1], db[0] - db[1]}; + return vreinterpretq_m128d_u64(vld1q_u64((uint64_t *) c)); +#endif +} + +// Horizontally subtract adjacent pairs of single-precision (32-bit) +// floating-point elements in a and b, and pack the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_ps +FORCE_INLINE __m128 _mm_hsub_ps(__m128 _a, __m128 _b) +{ + float32x4_t a = vreinterpretq_f32_m128(_a); + float32x4_t b = vreinterpretq_f32_m128(_b); +#if defined(__aarch64__) + return vreinterpretq_m128_f32( + vsubq_f32(vuzp1q_f32(a, b), vuzp2q_f32(a, b))); +#else + float32x4x2_t c = vuzpq_f32(a, b); + return vreinterpretq_m128_f32(vsubq_f32(c.val[0], c.val[1])); +#endif +} + +// Load 128-bits of integer data from unaligned memory into dst. This intrinsic +// may perform better than _mm_loadu_si128 when the data crosses a cache line +// boundary. +// +// dst[127:0] := MEM[mem_addr+127:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lddqu_si128 +#define _mm_lddqu_si128 _mm_loadu_si128 + +// Load a double-precision (64-bit) floating-point element from memory into both +// elements of dst. +// +// dst[63:0] := MEM[mem_addr+63:mem_addr] +// dst[127:64] := MEM[mem_addr+63:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loaddup_pd +#define _mm_loaddup_pd _mm_load1_pd + +// Duplicate the low double-precision (64-bit) floating-point element from a, +// and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movedup_pd +FORCE_INLINE __m128d _mm_movedup_pd(__m128d a) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64( + vdupq_laneq_f64(vreinterpretq_f64_m128d(a), 0)); +#else + return vreinterpretq_m128d_u64( + vdupq_n_u64(vgetq_lane_u64(vreinterpretq_u64_m128d(a), 0))); +#endif +} + +// Duplicate odd-indexed single-precision (32-bit) floating-point elements +// from a, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movehdup_ps +FORCE_INLINE __m128 _mm_movehdup_ps(__m128 a) +{ +#if __has_builtin(__builtin_shufflevector) + return vreinterpretq_m128_f32(__builtin_shufflevector( + vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 1, 1, 3, 3)); +#else + float32_t a1 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 1); + float32_t a3 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 3); + float ALIGN_STRUCT(16) data[4] = {a1, a1, a3, a3}; + return vreinterpretq_m128_f32(vld1q_f32(data)); +#endif +} + +// Duplicate even-indexed single-precision (32-bit) floating-point elements +// from a, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_moveldup_ps +FORCE_INLINE __m128 _mm_moveldup_ps(__m128 a) +{ +#if __has_builtin(__builtin_shufflevector) + return vreinterpretq_m128_f32(__builtin_shufflevector( + vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 0, 0, 2, 2)); +#else + float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); + float32_t a2 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 2); + float ALIGN_STRUCT(16) data[4] = {a0, a0, a2, a2}; + return vreinterpretq_m128_f32(vld1q_f32(data)); +#endif +} + +/* SSSE3 */ + +// Compute the absolute value of packed signed 16-bit integers in a, and store +// the unsigned results in dst. +// +// FOR j := 0 to 7 +// i := j*16 +// dst[i+15:i] := ABS(a[i+15:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi16 +FORCE_INLINE __m128i _mm_abs_epi16(__m128i a) +{ + return vreinterpretq_m128i_s16(vabsq_s16(vreinterpretq_s16_m128i(a))); +} + +// Compute the absolute value of packed signed 32-bit integers in a, and store +// the unsigned results in dst. +// +// FOR j := 0 to 3 +// i := j*32 +// dst[i+31:i] := ABS(a[i+31:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi32 +FORCE_INLINE __m128i _mm_abs_epi32(__m128i a) +{ + return vreinterpretq_m128i_s32(vabsq_s32(vreinterpretq_s32_m128i(a))); +} + +// Compute the absolute value of packed signed 8-bit integers in a, and store +// the unsigned results in dst. +// +// FOR j := 0 to 15 +// i := j*8 +// dst[i+7:i] := ABS(a[i+7:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi8 +FORCE_INLINE __m128i _mm_abs_epi8(__m128i a) +{ + return vreinterpretq_m128i_s8(vabsq_s8(vreinterpretq_s8_m128i(a))); +} + +// Compute the absolute value of packed signed 16-bit integers in a, and store +// the unsigned results in dst. +// +// FOR j := 0 to 3 +// i := j*16 +// dst[i+15:i] := ABS(a[i+15:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi16 +FORCE_INLINE __m64 _mm_abs_pi16(__m64 a) +{ + return vreinterpret_m64_s16(vabs_s16(vreinterpret_s16_m64(a))); +} + +// Compute the absolute value of packed signed 32-bit integers in a, and store +// the unsigned results in dst. +// +// FOR j := 0 to 1 +// i := j*32 +// dst[i+31:i] := ABS(a[i+31:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi32 +FORCE_INLINE __m64 _mm_abs_pi32(__m64 a) +{ + return vreinterpret_m64_s32(vabs_s32(vreinterpret_s32_m64(a))); +} + +// Compute the absolute value of packed signed 8-bit integers in a, and store +// the unsigned results in dst. +// +// FOR j := 0 to 7 +// i := j*8 +// dst[i+7:i] := ABS(a[i+7:i]) +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi8 +FORCE_INLINE __m64 _mm_abs_pi8(__m64 a) +{ + return vreinterpret_m64_s8(vabs_s8(vreinterpret_s8_m64(a))); +} + +// Concatenate 16-byte blocks in a and b into a 32-byte temporary result, shift +// the result right by imm8 bytes, and store the low 16 bytes in dst. +// +// tmp[255:0] := ((a[127:0] << 128)[255:0] OR b[127:0]) >> (imm8*8) +// dst[127:0] := tmp[127:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_epi8 +FORCE_INLINE __m128i _mm_alignr_epi8(__m128i a, __m128i b, int imm) +{ + if (_sse2neon_unlikely(imm & ~31)) + return _mm_setzero_si128(); + int idx; + uint8x16_t tmp[2]; + if (imm >= 16) { + idx = imm - 16; + tmp[0] = vreinterpretq_u8_m128i(a); + tmp[1] = vdupq_n_u8(0); + } else { + idx = imm; + tmp[0] = vreinterpretq_u8_m128i(b); + tmp[1] = vreinterpretq_u8_m128i(a); + } + return vreinterpretq_m128i_u8(vld1q_u8(((uint8_t const *) tmp) + idx)); +} + +// Concatenate 8-byte blocks in a and b into a 16-byte temporary result, shift +// the result right by imm8 bytes, and store the low 8 bytes in dst. +// +// tmp[127:0] := ((a[63:0] << 64)[127:0] OR b[63:0]) >> (imm8*8) +// dst[63:0] := tmp[63:0] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_pi8 +#define _mm_alignr_pi8(a, b, imm) \ + __extension__({ \ + __m64 ret; \ + if (_sse2neon_unlikely((imm) >= 16)) { \ + ret = vreinterpret_m64_s8(vdup_n_s8(0)); \ + } else { \ + uint8x8_t tmp_low, tmp_high; \ + if ((imm) >= 8) { \ + const int idx = (imm) -8; \ + tmp_low = vreinterpret_u8_m64(a); \ + tmp_high = vdup_n_u8(0); \ + ret = vreinterpret_m64_u8(vext_u8(tmp_low, tmp_high, idx)); \ + } else { \ + const int idx = (imm); \ + tmp_low = vreinterpret_u8_m64(b); \ + tmp_high = vreinterpret_u8_m64(a); \ + ret = vreinterpret_m64_u8(vext_u8(tmp_low, tmp_high, idx)); \ + } \ + } \ + ret; \ + }) + +// Computes pairwise add of each argument as a 16-bit signed or unsigned integer +// values a and b. +FORCE_INLINE __m128i _mm_hadd_epi16(__m128i _a, __m128i _b) +{ + int16x8_t a = vreinterpretq_s16_m128i(_a); + int16x8_t b = vreinterpretq_s16_m128i(_b); +#if defined(__aarch64__) + return vreinterpretq_m128i_s16(vpaddq_s16(a, b)); +#else + return vreinterpretq_m128i_s16( + vcombine_s16(vpadd_s16(vget_low_s16(a), vget_high_s16(a)), + vpadd_s16(vget_low_s16(b), vget_high_s16(b)))); +#endif +} + +// Computes pairwise add of each argument as a 32-bit signed or unsigned integer +// values a and b. +FORCE_INLINE __m128i _mm_hadd_epi32(__m128i _a, __m128i _b) +{ + int32x4_t a = vreinterpretq_s32_m128i(_a); + int32x4_t b = vreinterpretq_s32_m128i(_b); + return vreinterpretq_m128i_s32( + vcombine_s32(vpadd_s32(vget_low_s32(a), vget_high_s32(a)), + vpadd_s32(vget_low_s32(b), vget_high_s32(b)))); +} + +// Horizontally add adjacent pairs of 16-bit integers in a and b, and pack the +// signed 16-bit results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pi16 +FORCE_INLINE __m64 _mm_hadd_pi16(__m64 a, __m64 b) +{ + return vreinterpret_m64_s16( + vpadd_s16(vreinterpret_s16_m64(a), vreinterpret_s16_m64(b))); +} + +// Horizontally add adjacent pairs of 32-bit integers in a and b, and pack the +// signed 32-bit results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pi32 +FORCE_INLINE __m64 _mm_hadd_pi32(__m64 a, __m64 b) +{ + return vreinterpret_m64_s32( + vpadd_s32(vreinterpret_s32_m64(a), vreinterpret_s32_m64(b))); +} + +// Computes saturated pairwise sub of each argument as a 16-bit signed +// integer values a and b. +FORCE_INLINE __m128i _mm_hadds_epi16(__m128i _a, __m128i _b) +{ +#if defined(__aarch64__) + int16x8_t a = vreinterpretq_s16_m128i(_a); + int16x8_t b = vreinterpretq_s16_m128i(_b); + return vreinterpretq_s64_s16( + vqaddq_s16(vuzp1q_s16(a, b), vuzp2q_s16(a, b))); +#else + int32x4_t a = vreinterpretq_s32_m128i(_a); + int32x4_t b = vreinterpretq_s32_m128i(_b); + // Interleave using vshrn/vmovn + // [a0|a2|a4|a6|b0|b2|b4|b6] + // [a1|a3|a5|a7|b1|b3|b5|b7] + int16x8_t ab0246 = vcombine_s16(vmovn_s32(a), vmovn_s32(b)); + int16x8_t ab1357 = vcombine_s16(vshrn_n_s32(a, 16), vshrn_n_s32(b, 16)); + // Saturated add + return vreinterpretq_m128i_s16(vqaddq_s16(ab0246, ab1357)); +#endif +} + +// Horizontally add adjacent pairs of signed 16-bit integers in a and b using +// saturation, and pack the signed 16-bit results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadds_pi16 +FORCE_INLINE __m64 _mm_hadds_pi16(__m64 _a, __m64 _b) +{ + int16x4_t a = vreinterpret_s16_m64(_a); + int16x4_t b = vreinterpret_s16_m64(_b); +#if defined(__aarch64__) + return vreinterpret_s64_s16(vqadd_s16(vuzp1_s16(a, b), vuzp2_s16(a, b))); +#else + int16x4x2_t res = vuzp_s16(a, b); + return vreinterpret_s64_s16(vqadd_s16(res.val[0], res.val[1])); +#endif +} + +// Horizontally subtract adjacent pairs of 16-bit integers in a and b, and pack +// the signed 16-bit results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi16 +FORCE_INLINE __m128i _mm_hsub_epi16(__m128i _a, __m128i _b) +{ + int16x8_t a = vreinterpretq_s16_m128i(_a); + int16x8_t b = vreinterpretq_s16_m128i(_b); +#if defined(__aarch64__) + return vreinterpretq_m128i_s16( + vsubq_s16(vuzp1q_s16(a, b), vuzp2q_s16(a, b))); +#else + int16x8x2_t c = vuzpq_s16(a, b); + return vreinterpretq_m128i_s16(vsubq_s16(c.val[0], c.val[1])); +#endif +} + +// Horizontally subtract adjacent pairs of 32-bit integers in a and b, and pack +// the signed 32-bit results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi32 +FORCE_INLINE __m128i _mm_hsub_epi32(__m128i _a, __m128i _b) +{ + int32x4_t a = vreinterpretq_s32_m128i(_a); + int32x4_t b = vreinterpretq_s32_m128i(_b); +#if defined(__aarch64__) + return vreinterpretq_m128i_s32( + vsubq_s32(vuzp1q_s32(a, b), vuzp2q_s32(a, b))); +#else + int32x4x2_t c = vuzpq_s32(a, b); + return vreinterpretq_m128i_s32(vsubq_s32(c.val[0], c.val[1])); +#endif +} + +// Horizontally subtract adjacent pairs of 16-bit integers in a and b, and pack +// the signed 16-bit results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_pi16 +FORCE_INLINE __m64 _mm_hsub_pi16(__m64 _a, __m64 _b) +{ + int16x4_t a = vreinterpret_s16_m64(_a); + int16x4_t b = vreinterpret_s16_m64(_b); +#if defined(__aarch64__) + return vreinterpret_m64_s16(vsub_s16(vuzp1_s16(a, b), vuzp2_s16(a, b))); +#else + int16x4x2_t c = vuzp_s16(a, b); + return vreinterpret_m64_s16(vsub_s16(c.val[0], c.val[1])); +#endif +} + +// Horizontally subtract adjacent pairs of 32-bit integers in a and b, and pack +// the signed 32-bit results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_hsub_pi32 +FORCE_INLINE __m64 _mm_hsub_pi32(__m64 _a, __m64 _b) +{ + int32x2_t a = vreinterpret_s32_m64(_a); + int32x2_t b = vreinterpret_s32_m64(_b); +#if defined(__aarch64__) + return vreinterpret_m64_s32(vsub_s32(vuzp1_s32(a, b), vuzp2_s32(a, b))); +#else + int32x2x2_t c = vuzp_s32(a, b); + return vreinterpret_m64_s32(vsub_s32(c.val[0], c.val[1])); +#endif +} + +// Computes saturated pairwise difference of each argument as a 16-bit signed +// integer values a and b. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_epi16 +FORCE_INLINE __m128i _mm_hsubs_epi16(__m128i _a, __m128i _b) +{ + int16x8_t a = vreinterpretq_s16_m128i(_a); + int16x8_t b = vreinterpretq_s16_m128i(_b); +#if defined(__aarch64__) + return vreinterpretq_m128i_s16( + vqsubq_s16(vuzp1q_s16(a, b), vuzp2q_s16(a, b))); +#else + int16x8x2_t c = vuzpq_s16(a, b); + return vreinterpretq_m128i_s16(vqsubq_s16(c.val[0], c.val[1])); +#endif +} + +// Horizontally subtract adjacent pairs of signed 16-bit integers in a and b +// using saturation, and pack the signed 16-bit results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_pi16 +FORCE_INLINE __m64 _mm_hsubs_pi16(__m64 _a, __m64 _b) +{ + int16x4_t a = vreinterpret_s16_m64(_a); + int16x4_t b = vreinterpret_s16_m64(_b); +#if defined(__aarch64__) + return vreinterpret_m64_s16(vqsub_s16(vuzp1_s16(a, b), vuzp2_s16(a, b))); +#else + int16x4x2_t c = vuzp_s16(a, b); + return vreinterpret_m64_s16(vqsub_s16(c.val[0], c.val[1])); +#endif +} + +// Vertically multiply each unsigned 8-bit integer from a with the corresponding +// signed 8-bit integer from b, producing intermediate signed 16-bit integers. +// Horizontally add adjacent pairs of intermediate signed 16-bit integers, +// and pack the saturated results in dst. +// +// FOR j := 0 to 7 +// i := j*16 +// dst[i+15:i] := Saturate_To_Int16( a[i+15:i+8]*b[i+15:i+8] + +// a[i+7:i]*b[i+7:i] ) +// ENDFOR +FORCE_INLINE __m128i _mm_maddubs_epi16(__m128i _a, __m128i _b) +{ +#if defined(__aarch64__) + uint8x16_t a = vreinterpretq_u8_m128i(_a); + int8x16_t b = vreinterpretq_s8_m128i(_b); + int16x8_t tl = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(a))), + vmovl_s8(vget_low_s8(b))); + int16x8_t th = vmulq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(a))), + vmovl_s8(vget_high_s8(b))); + return vreinterpretq_m128i_s16( + vqaddq_s16(vuzp1q_s16(tl, th), vuzp2q_s16(tl, th))); +#else + // This would be much simpler if x86 would choose to zero extend OR sign + // extend, not both. This could probably be optimized better. + uint16x8_t a = vreinterpretq_u16_m128i(_a); + int16x8_t b = vreinterpretq_s16_m128i(_b); + + // Zero extend a + int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a, 8)); + int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a, vdupq_n_u16(0xff00))); + + // Sign extend by shifting left then shifting right. + int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b, 8), 8); + int16x8_t b_odd = vshrq_n_s16(b, 8); + + // multiply + int16x8_t prod1 = vmulq_s16(a_even, b_even); + int16x8_t prod2 = vmulq_s16(a_odd, b_odd); + + // saturated add + return vreinterpretq_m128i_s16(vqaddq_s16(prod1, prod2)); +#endif +} + +// Vertically multiply each unsigned 8-bit integer from a with the corresponding +// signed 8-bit integer from b, producing intermediate signed 16-bit integers. +// Horizontally add adjacent pairs of intermediate signed 16-bit integers, and +// pack the saturated results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maddubs_pi16 +FORCE_INLINE __m64 _mm_maddubs_pi16(__m64 _a, __m64 _b) +{ + uint16x4_t a = vreinterpret_u16_m64(_a); + int16x4_t b = vreinterpret_s16_m64(_b); + + // Zero extend a + int16x4_t a_odd = vreinterpret_s16_u16(vshr_n_u16(a, 8)); + int16x4_t a_even = vreinterpret_s16_u16(vand_u16(a, vdup_n_u16(0xff))); + + // Sign extend by shifting left then shifting right. + int16x4_t b_even = vshr_n_s16(vshl_n_s16(b, 8), 8); + int16x4_t b_odd = vshr_n_s16(b, 8); + + // multiply + int16x4_t prod1 = vmul_s16(a_even, b_even); + int16x4_t prod2 = vmul_s16(a_odd, b_odd); + + // saturated add + return vreinterpret_m64_s16(vqadd_s16(prod1, prod2)); +} + +// Multiply packed signed 16-bit integers in a and b, producing intermediate +// signed 32-bit integers. Shift right by 15 bits while rounding up, and store +// the packed 16-bit integers in dst. +// +// r0 := Round(((int32_t)a0 * (int32_t)b0) >> 15) +// r1 := Round(((int32_t)a1 * (int32_t)b1) >> 15) +// r2 := Round(((int32_t)a2 * (int32_t)b2) >> 15) +// ... +// r7 := Round(((int32_t)a7 * (int32_t)b7) >> 15) +FORCE_INLINE __m128i _mm_mulhrs_epi16(__m128i a, __m128i b) +{ + // Has issues due to saturation + // return vreinterpretq_m128i_s16(vqrdmulhq_s16(a, b)); + + // Multiply + int32x4_t mul_lo = vmull_s16(vget_low_s16(vreinterpretq_s16_m128i(a)), + vget_low_s16(vreinterpretq_s16_m128i(b))); + int32x4_t mul_hi = vmull_s16(vget_high_s16(vreinterpretq_s16_m128i(a)), + vget_high_s16(vreinterpretq_s16_m128i(b))); + + // Rounding narrowing shift right + // narrow = (int16_t)((mul + 16384) >> 15); + int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); + int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); + + // Join together + return vreinterpretq_m128i_s16(vcombine_s16(narrow_lo, narrow_hi)); +} + +// Multiply packed signed 16-bit integers in a and b, producing intermediate +// signed 32-bit integers. Truncate each intermediate integer to the 18 most +// significant bits, round by adding 1, and store bits [16:1] to dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhrs_pi16 +FORCE_INLINE __m64 _mm_mulhrs_pi16(__m64 a, __m64 b) +{ + int32x4_t mul_extend = + vmull_s16((vreinterpret_s16_m64(a)), (vreinterpret_s16_m64(b))); + + // Rounding narrowing shift right + return vreinterpret_m64_s16(vrshrn_n_s32(mul_extend, 15)); +} + +// Shuffle packed 8-bit integers in a according to shuffle control mask in the +// corresponding 8-bit element of b, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi8 +FORCE_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i b) +{ + int8x16_t tbl = vreinterpretq_s8_m128i(a); // input a + uint8x16_t idx = vreinterpretq_u8_m128i(b); // input b + uint8x16_t idx_masked = + vandq_u8(idx, vdupq_n_u8(0x8F)); // avoid using meaningless bits +#if defined(__aarch64__) + return vreinterpretq_m128i_s8(vqtbl1q_s8(tbl, idx_masked)); +#elif defined(__GNUC__) + int8x16_t ret; + // %e and %f represent the even and odd D registers + // respectively. + __asm__ __volatile__( + "vtbl.8 %e[ret], {%e[tbl], %f[tbl]}, %e[idx]\n" + "vtbl.8 %f[ret], {%e[tbl], %f[tbl]}, %f[idx]\n" + : [ret] "=&w"(ret) + : [tbl] "w"(tbl), [idx] "w"(idx_masked)); + return vreinterpretq_m128i_s8(ret); +#else + // use this line if testing on aarch64 + int8x8x2_t a_split = {vget_low_s8(tbl), vget_high_s8(tbl)}; + return vreinterpretq_m128i_s8( + vcombine_s8(vtbl2_s8(a_split, vget_low_u8(idx_masked)), + vtbl2_s8(a_split, vget_high_u8(idx_masked)))); +#endif +} + +// Shuffle packed 8-bit integers in a according to shuffle control mask in the +// corresponding 8-bit element of b, and store the results in dst. +// +// FOR j := 0 to 7 +// i := j*8 +// IF b[i+7] == 1 +// dst[i+7:i] := 0 +// ELSE +// index[2:0] := b[i+2:i] +// dst[i+7:i] := a[index*8+7:index*8] +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pi8 +FORCE_INLINE __m64 _mm_shuffle_pi8(__m64 a, __m64 b) +{ + const int8x8_t controlMask = + vand_s8(vreinterpret_s8_m64(b), vdup_n_s8((int8_t) (0x1 << 7 | 0x07))); + int8x8_t res = vtbl1_s8(vreinterpret_s8_m64(a), controlMask); + return vreinterpret_m64_s8(res); +} + +// Negate packed 16-bit integers in a when the corresponding signed +// 16-bit integer in b is negative, and store the results in dst. +// Element in dst are zeroed out when the corresponding element +// in b is zero. +// +// for i in 0..7 +// if b[i] < 0 +// r[i] := -a[i] +// else if b[i] == 0 +// r[i] := 0 +// else +// r[i] := a[i] +// fi +// done +FORCE_INLINE __m128i _mm_sign_epi16(__m128i _a, __m128i _b) +{ + int16x8_t a = vreinterpretq_s16_m128i(_a); + int16x8_t b = vreinterpretq_s16_m128i(_b); + + // signed shift right: faster than vclt + // (b < 0) ? 0xFFFF : 0 + uint16x8_t ltMask = vreinterpretq_u16_s16(vshrq_n_s16(b, 15)); + // (b == 0) ? 0xFFFF : 0 +#if defined(__aarch64__) + int16x8_t zeroMask = vreinterpretq_s16_u16(vceqzq_s16(b)); +#else + int16x8_t zeroMask = vreinterpretq_s16_u16(vceqq_s16(b, vdupq_n_s16(0))); +#endif + + // bitwise select either a or negative 'a' (vnegq_s16(a) equals to negative + // 'a') based on ltMask + int16x8_t masked = vbslq_s16(ltMask, vnegq_s16(a), a); + // res = masked & (~zeroMask) + int16x8_t res = vbicq_s16(masked, zeroMask); + return vreinterpretq_m128i_s16(res); +} + +// Negate packed 32-bit integers in a when the corresponding signed +// 32-bit integer in b is negative, and store the results in dst. +// Element in dst are zeroed out when the corresponding element +// in b is zero. +// +// for i in 0..3 +// if b[i] < 0 +// r[i] := -a[i] +// else if b[i] == 0 +// r[i] := 0 +// else +// r[i] := a[i] +// fi +// done +FORCE_INLINE __m128i _mm_sign_epi32(__m128i _a, __m128i _b) +{ + int32x4_t a = vreinterpretq_s32_m128i(_a); + int32x4_t b = vreinterpretq_s32_m128i(_b); + + // signed shift right: faster than vclt + // (b < 0) ? 0xFFFFFFFF : 0 + uint32x4_t ltMask = vreinterpretq_u32_s32(vshrq_n_s32(b, 31)); + + // (b == 0) ? 0xFFFFFFFF : 0 +#if defined(__aarch64__) + int32x4_t zeroMask = vreinterpretq_s32_u32(vceqzq_s32(b)); +#else + int32x4_t zeroMask = vreinterpretq_s32_u32(vceqq_s32(b, vdupq_n_s32(0))); +#endif + + // bitwise select either a or negative 'a' (vnegq_s32(a) equals to negative + // 'a') based on ltMask + int32x4_t masked = vbslq_s32(ltMask, vnegq_s32(a), a); + // res = masked & (~zeroMask) + int32x4_t res = vbicq_s32(masked, zeroMask); + return vreinterpretq_m128i_s32(res); +} + +// Negate packed 8-bit integers in a when the corresponding signed +// 8-bit integer in b is negative, and store the results in dst. +// Element in dst are zeroed out when the corresponding element +// in b is zero. +// +// for i in 0..15 +// if b[i] < 0 +// r[i] := -a[i] +// else if b[i] == 0 +// r[i] := 0 +// else +// r[i] := a[i] +// fi +// done +FORCE_INLINE __m128i _mm_sign_epi8(__m128i _a, __m128i _b) +{ + int8x16_t a = vreinterpretq_s8_m128i(_a); + int8x16_t b = vreinterpretq_s8_m128i(_b); + + // signed shift right: faster than vclt + // (b < 0) ? 0xFF : 0 + uint8x16_t ltMask = vreinterpretq_u8_s8(vshrq_n_s8(b, 7)); + + // (b == 0) ? 0xFF : 0 +#if defined(__aarch64__) + int8x16_t zeroMask = vreinterpretq_s8_u8(vceqzq_s8(b)); +#else + int8x16_t zeroMask = vreinterpretq_s8_u8(vceqq_s8(b, vdupq_n_s8(0))); +#endif + + // bitwise select either a or negative 'a' (vnegq_s8(a) return negative 'a') + // based on ltMask + int8x16_t masked = vbslq_s8(ltMask, vnegq_s8(a), a); + // res = masked & (~zeroMask) + int8x16_t res = vbicq_s8(masked, zeroMask); + + return vreinterpretq_m128i_s8(res); +} + +// Negate packed 16-bit integers in a when the corresponding signed 16-bit +// integer in b is negative, and store the results in dst. Element in dst are +// zeroed out when the corresponding element in b is zero. +// +// FOR j := 0 to 3 +// i := j*16 +// IF b[i+15:i] < 0 +// dst[i+15:i] := -(a[i+15:i]) +// ELSE IF b[i+15:i] == 0 +// dst[i+15:i] := 0 +// ELSE +// dst[i+15:i] := a[i+15:i] +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi16 +FORCE_INLINE __m64 _mm_sign_pi16(__m64 _a, __m64 _b) +{ + int16x4_t a = vreinterpret_s16_m64(_a); + int16x4_t b = vreinterpret_s16_m64(_b); + + // signed shift right: faster than vclt + // (b < 0) ? 0xFFFF : 0 + uint16x4_t ltMask = vreinterpret_u16_s16(vshr_n_s16(b, 15)); + + // (b == 0) ? 0xFFFF : 0 +#if defined(__aarch64__) + int16x4_t zeroMask = vreinterpret_s16_u16(vceqz_s16(b)); +#else + int16x4_t zeroMask = vreinterpret_s16_u16(vceq_s16(b, vdup_n_s16(0))); +#endif + + // bitwise select either a or negative 'a' (vneg_s16(a) return negative 'a') + // based on ltMask + int16x4_t masked = vbsl_s16(ltMask, vneg_s16(a), a); + // res = masked & (~zeroMask) + int16x4_t res = vbic_s16(masked, zeroMask); + + return vreinterpret_m64_s16(res); +} + +// Negate packed 32-bit integers in a when the corresponding signed 32-bit +// integer in b is negative, and store the results in dst. Element in dst are +// zeroed out when the corresponding element in b is zero. +// +// FOR j := 0 to 1 +// i := j*32 +// IF b[i+31:i] < 0 +// dst[i+31:i] := -(a[i+31:i]) +// ELSE IF b[i+31:i] == 0 +// dst[i+31:i] := 0 +// ELSE +// dst[i+31:i] := a[i+31:i] +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi32 +FORCE_INLINE __m64 _mm_sign_pi32(__m64 _a, __m64 _b) +{ + int32x2_t a = vreinterpret_s32_m64(_a); + int32x2_t b = vreinterpret_s32_m64(_b); + + // signed shift right: faster than vclt + // (b < 0) ? 0xFFFFFFFF : 0 + uint32x2_t ltMask = vreinterpret_u32_s32(vshr_n_s32(b, 31)); + + // (b == 0) ? 0xFFFFFFFF : 0 +#if defined(__aarch64__) + int32x2_t zeroMask = vreinterpret_s32_u32(vceqz_s32(b)); +#else + int32x2_t zeroMask = vreinterpret_s32_u32(vceq_s32(b, vdup_n_s32(0))); +#endif + + // bitwise select either a or negative 'a' (vneg_s32(a) return negative 'a') + // based on ltMask + int32x2_t masked = vbsl_s32(ltMask, vneg_s32(a), a); + // res = masked & (~zeroMask) + int32x2_t res = vbic_s32(masked, zeroMask); + + return vreinterpret_m64_s32(res); +} + +// Negate packed 8-bit integers in a when the corresponding signed 8-bit integer +// in b is negative, and store the results in dst. Element in dst are zeroed out +// when the corresponding element in b is zero. +// +// FOR j := 0 to 7 +// i := j*8 +// IF b[i+7:i] < 0 +// dst[i+7:i] := -(a[i+7:i]) +// ELSE IF b[i+7:i] == 0 +// dst[i+7:i] := 0 +// ELSE +// dst[i+7:i] := a[i+7:i] +// FI +// ENDFOR +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi8 +FORCE_INLINE __m64 _mm_sign_pi8(__m64 _a, __m64 _b) +{ + int8x8_t a = vreinterpret_s8_m64(_a); + int8x8_t b = vreinterpret_s8_m64(_b); + + // signed shift right: faster than vclt + // (b < 0) ? 0xFF : 0 + uint8x8_t ltMask = vreinterpret_u8_s8(vshr_n_s8(b, 7)); + + // (b == 0) ? 0xFF : 0 +#if defined(__aarch64__) + int8x8_t zeroMask = vreinterpret_s8_u8(vceqz_s8(b)); +#else + int8x8_t zeroMask = vreinterpret_s8_u8(vceq_s8(b, vdup_n_s8(0))); +#endif + + // bitwise select either a or negative 'a' (vneg_s8(a) return negative 'a') + // based on ltMask + int8x8_t masked = vbsl_s8(ltMask, vneg_s8(a), a); + // res = masked & (~zeroMask) + int8x8_t res = vbic_s8(masked, zeroMask); + + return vreinterpret_m64_s8(res); +} + +/* SSE4.1 */ + +// Blend packed 16-bit integers from a and b using control mask imm8, and store +// the results in dst. +// +// FOR j := 0 to 7 +// i := j*16 +// IF imm8[j] +// dst[i+15:i] := b[i+15:i] +// ELSE +// dst[i+15:i] := a[i+15:i] +// FI +// ENDFOR +// FORCE_INLINE __m128i _mm_blend_epi16(__m128i a, __m128i b, +// __constrange(0,255) int imm) +#define _mm_blend_epi16(a, b, imm) \ + __extension__({ \ + const uint16_t _mask[8] = {((imm) & (1 << 0)) ? (uint16_t) -1 : 0x0, \ + ((imm) & (1 << 1)) ? (uint16_t) -1 : 0x0, \ + ((imm) & (1 << 2)) ? (uint16_t) -1 : 0x0, \ + ((imm) & (1 << 3)) ? (uint16_t) -1 : 0x0, \ + ((imm) & (1 << 4)) ? (uint16_t) -1 : 0x0, \ + ((imm) & (1 << 5)) ? (uint16_t) -1 : 0x0, \ + ((imm) & (1 << 6)) ? (uint16_t) -1 : 0x0, \ + ((imm) & (1 << 7)) ? (uint16_t) -1 : 0x0}; \ + uint16x8_t _mask_vec = vld1q_u16(_mask); \ + uint16x8_t _a = vreinterpretq_u16_m128i(a); \ + uint16x8_t _b = vreinterpretq_u16_m128i(b); \ + vreinterpretq_m128i_u16(vbslq_u16(_mask_vec, _b, _a)); \ + }) + +// Blend packed double-precision (64-bit) floating-point elements from a and b +// using control mask imm8, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_pd +#define _mm_blend_pd(a, b, imm) \ + __extension__({ \ + const uint64_t _mask[2] = { \ + ((imm) & (1 << 0)) ? ~UINT64_C(0) : UINT64_C(0), \ + ((imm) & (1 << 1)) ? ~UINT64_C(0) : UINT64_C(0)}; \ + uint64x2_t _mask_vec = vld1q_u64(_mask); \ + uint64x2_t _a = vreinterpretq_u64_m128d(a); \ + uint64x2_t _b = vreinterpretq_u64_m128d(b); \ + vreinterpretq_m128d_u64(vbslq_u64(_mask_vec, _b, _a)); \ + }) + +// Blend packed single-precision (32-bit) floating-point elements from a and b +// using mask, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_ps +FORCE_INLINE __m128 _mm_blend_ps(__m128 _a, __m128 _b, const char imm8) +{ + const uint32_t ALIGN_STRUCT(16) + data[4] = {((imm8) & (1 << 0)) ? UINT32_MAX : 0, + ((imm8) & (1 << 1)) ? UINT32_MAX : 0, + ((imm8) & (1 << 2)) ? UINT32_MAX : 0, + ((imm8) & (1 << 3)) ? UINT32_MAX : 0}; + uint32x4_t mask = vld1q_u32(data); + float32x4_t a = vreinterpretq_f32_m128(_a); + float32x4_t b = vreinterpretq_f32_m128(_b); + return vreinterpretq_m128_f32(vbslq_f32(mask, b, a)); +} + +// Blend packed 8-bit integers from a and b using mask, and store the results in +// dst. +// +// FOR j := 0 to 15 +// i := j*8 +// IF mask[i+7] +// dst[i+7:i] := b[i+7:i] +// ELSE +// dst[i+7:i] := a[i+7:i] +// FI +// ENDFOR +FORCE_INLINE __m128i _mm_blendv_epi8(__m128i _a, __m128i _b, __m128i _mask) +{ + // Use a signed shift right to create a mask with the sign bit + uint8x16_t mask = + vreinterpretq_u8_s8(vshrq_n_s8(vreinterpretq_s8_m128i(_mask), 7)); + uint8x16_t a = vreinterpretq_u8_m128i(_a); + uint8x16_t b = vreinterpretq_u8_m128i(_b); + return vreinterpretq_m128i_u8(vbslq_u8(mask, b, a)); +} + +// Blend packed double-precision (64-bit) floating-point elements from a and b +// using mask, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_pd +FORCE_INLINE __m128d _mm_blendv_pd(__m128d _a, __m128d _b, __m128d _mask) +{ + uint64x2_t mask = + vreinterpretq_u64_s64(vshrq_n_s64(vreinterpretq_s64_m128d(_mask), 63)); +#if defined(__aarch64__) + float64x2_t a = vreinterpretq_f64_m128d(_a); + float64x2_t b = vreinterpretq_f64_m128d(_b); + return vreinterpretq_m128d_f64(vbslq_f64(mask, b, a)); +#else + uint64x2_t a = vreinterpretq_u64_m128d(_a); + uint64x2_t b = vreinterpretq_u64_m128d(_b); + return vreinterpretq_m128d_u64(vbslq_u64(mask, b, a)); +#endif +} + +// Blend packed single-precision (32-bit) floating-point elements from a and b +// using mask, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_ps +FORCE_INLINE __m128 _mm_blendv_ps(__m128 _a, __m128 _b, __m128 _mask) +{ + // Use a signed shift right to create a mask with the sign bit + uint32x4_t mask = + vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_m128(_mask), 31)); + float32x4_t a = vreinterpretq_f32_m128(_a); + float32x4_t b = vreinterpretq_f32_m128(_b); + return vreinterpretq_m128_f32(vbslq_f32(mask, b, a)); +} + +// Round the packed double-precision (64-bit) floating-point elements in a up +// to an integer value, and store the results as packed double-precision +// floating-point elements in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_pd +FORCE_INLINE __m128d _mm_ceil_pd(__m128d a) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vrndpq_f64(vreinterpretq_f64_m128d(a))); +#else + double *f = (double *) &a; + return _mm_set_pd(ceil(f[1]), ceil(f[0])); +#endif +} + +// Round the packed single-precision (32-bit) floating-point elements in a up to +// an integer value, and store the results as packed single-precision +// floating-point elements in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ps +FORCE_INLINE __m128 _mm_ceil_ps(__m128 a) +{ +#if defined(__aarch64__) || defined(__ARM_FEATURE_DIRECTED_ROUNDING) + return vreinterpretq_m128_f32(vrndpq_f32(vreinterpretq_f32_m128(a))); +#else + float *f = (float *) &a; + return _mm_set_ps(ceilf(f[3]), ceilf(f[2]), ceilf(f[1]), ceilf(f[0])); +#endif +} + +// Round the lower double-precision (64-bit) floating-point element in b up to +// an integer value, store the result as a double-precision floating-point +// element in the lower element of dst, and copy the upper element from a to the +// upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_sd +FORCE_INLINE __m128d _mm_ceil_sd(__m128d a, __m128d b) +{ + return _mm_move_sd(a, _mm_ceil_pd(b)); +} + +// Round the lower single-precision (32-bit) floating-point element in b up to +// an integer value, store the result as a single-precision floating-point +// element in the lower element of dst, and copy the upper 3 packed elements +// from a to the upper elements of dst. +// +// dst[31:0] := CEIL(b[31:0]) +// dst[127:32] := a[127:32] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ss +FORCE_INLINE __m128 _mm_ceil_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_ceil_ps(b)); +} + +// Compare packed 64-bit integers in a and b for equality, and store the results +// in dst +FORCE_INLINE __m128i _mm_cmpeq_epi64(__m128i a, __m128i b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128i_u64( + vceqq_u64(vreinterpretq_u64_m128i(a), vreinterpretq_u64_m128i(b))); +#else + // ARMv7 lacks vceqq_u64 + // (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) + uint32x4_t cmp = + vceqq_u32(vreinterpretq_u32_m128i(a), vreinterpretq_u32_m128i(b)); + uint32x4_t swapped = vrev64q_u32(cmp); + return vreinterpretq_m128i_u32(vandq_u32(cmp, swapped)); +#endif +} + +// Converts the four signed 16-bit integers in the lower 64 bits to four signed +// 32-bit integers. +FORCE_INLINE __m128i _mm_cvtepi16_epi32(__m128i a) +{ + return vreinterpretq_m128i_s32( + vmovl_s16(vget_low_s16(vreinterpretq_s16_m128i(a)))); +} + +// Converts the two signed 16-bit integers in the lower 32 bits two signed +// 32-bit integers. +FORCE_INLINE __m128i _mm_cvtepi16_epi64(__m128i a) +{ + int16x8_t s16x8 = vreinterpretq_s16_m128i(a); /* xxxx xxxx xxxx 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + return vreinterpretq_m128i_s64(s64x2); +} + +// Converts the two signed 32-bit integers in the lower 64 bits to two signed +// 64-bit integers. +FORCE_INLINE __m128i _mm_cvtepi32_epi64(__m128i a) +{ + return vreinterpretq_m128i_s64( + vmovl_s32(vget_low_s32(vreinterpretq_s32_m128i(a)))); +} + +// Converts the four unsigned 8-bit integers in the lower 16 bits to four +// unsigned 32-bit integers. +FORCE_INLINE __m128i _mm_cvtepi8_epi16(__m128i a) +{ + int8x16_t s8x16 = vreinterpretq_s8_m128i(a); /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + return vreinterpretq_m128i_s16(s16x8); +} + +// Converts the four unsigned 8-bit integers in the lower 32 bits to four +// unsigned 32-bit integers. +FORCE_INLINE __m128i _mm_cvtepi8_epi32(__m128i a) +{ + int8x16_t s8x16 = vreinterpretq_s8_m128i(a); /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ + return vreinterpretq_m128i_s32(s32x4); +} + +// Converts the two signed 8-bit integers in the lower 32 bits to four +// signed 64-bit integers. +FORCE_INLINE __m128i _mm_cvtepi8_epi64(__m128i a) +{ + int8x16_t s8x16 = vreinterpretq_s8_m128i(a); /* xxxx xxxx xxxx xxBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + return vreinterpretq_m128i_s64(s64x2); +} + +// Converts the four unsigned 16-bit integers in the lower 64 bits to four +// unsigned 32-bit integers. +FORCE_INLINE __m128i _mm_cvtepu16_epi32(__m128i a) +{ + return vreinterpretq_m128i_u32( + vmovl_u16(vget_low_u16(vreinterpretq_u16_m128i(a)))); +} + +// Converts the two unsigned 16-bit integers in the lower 32 bits to two +// unsigned 64-bit integers. +FORCE_INLINE __m128i _mm_cvtepu16_epi64(__m128i a) +{ + uint16x8_t u16x8 = vreinterpretq_u16_m128i(a); /* xxxx xxxx xxxx 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + return vreinterpretq_m128i_u64(u64x2); +} + +// Converts the two unsigned 32-bit integers in the lower 64 bits to two +// unsigned 64-bit integers. +FORCE_INLINE __m128i _mm_cvtepu32_epi64(__m128i a) +{ + return vreinterpretq_m128i_u64( + vmovl_u32(vget_low_u32(vreinterpretq_u32_m128i(a)))); +} + +// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, +// and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi16 +FORCE_INLINE __m128i _mm_cvtepu8_epi16(__m128i a) +{ + uint8x16_t u8x16 = vreinterpretq_u8_m128i(a); /* xxxx xxxx HGFE DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0H0G 0F0E 0D0C 0B0A */ + return vreinterpretq_m128i_u16(u16x8); +} + +// Converts the four unsigned 8-bit integers in the lower 32 bits to four +// unsigned 32-bit integers. +// https://msdn.microsoft.com/en-us/library/bb531467%28v=vs.100%29.aspx +FORCE_INLINE __m128i _mm_cvtepu8_epi32(__m128i a) +{ + uint8x16_t u8x16 = vreinterpretq_u8_m128i(a); /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ + return vreinterpretq_m128i_u32(u32x4); +} + +// Converts the two unsigned 8-bit integers in the lower 16 bits to two +// unsigned 64-bit integers. +FORCE_INLINE __m128i _mm_cvtepu8_epi64(__m128i a) +{ + uint8x16_t u8x16 = vreinterpretq_u8_m128i(a); /* xxxx xxxx xxxx xxBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + return vreinterpretq_m128i_u64(u64x2); +} + +// Conditionally multiply the packed double-precision (64-bit) floating-point +// elements in a and b using the high 4 bits in imm8, sum the four products, and +// conditionally store the sum in dst using the low 4 bits of imm8. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_pd +FORCE_INLINE __m128d _mm_dp_pd(__m128d a, __m128d b, const int imm) +{ + // Generate mask value from constant immediate bit value + const int64_t bit0Mask = imm & 0x01 ? UINT64_MAX : 0; + const int64_t bit1Mask = imm & 0x02 ? UINT64_MAX : 0; +#if !SSE2NEON_PRECISE_DP + const int64_t bit4Mask = imm & 0x10 ? UINT64_MAX : 0; + const int64_t bit5Mask = imm & 0x20 ? UINT64_MAX : 0; +#endif + // Conditional multiplication +#if !SSE2NEON_PRECISE_DP + __m128d mul = _mm_mul_pd(a, b); + const __m128d mulMask = + _mm_castsi128_pd(_mm_set_epi64x(bit5Mask, bit4Mask)); + __m128d tmp = _mm_and_pd(mul, mulMask); +#else +#if defined(__aarch64__) + double d0 = (imm & 0x10) ? vgetq_lane_f64(vreinterpretq_f64_m128d(a), 0) * + vgetq_lane_f64(vreinterpretq_f64_m128d(b), 0) + : 0; + double d1 = (imm & 0x20) ? vgetq_lane_f64(vreinterpretq_f64_m128d(a), 1) * + vgetq_lane_f64(vreinterpretq_f64_m128d(b), 1) + : 0; +#else + double d0 = (imm & 0x10) ? ((double *) &a)[0] * ((double *) &b)[0] : 0; + double d1 = (imm & 0x20) ? ((double *) &a)[1] * ((double *) &b)[1] : 0; +#endif + __m128d tmp = _mm_set_pd(d1, d0); +#endif + // Sum the products +#if defined(__aarch64__) + double sum = vpaddd_f64(vreinterpretq_f64_m128d(tmp)); +#else + double sum = *((double *) &tmp) + *(((double *) &tmp) + 1); +#endif + // Conditionally store the sum + const __m128d sumMask = + _mm_castsi128_pd(_mm_set_epi64x(bit1Mask, bit0Mask)); + __m128d res = _mm_and_pd(_mm_set_pd1(sum), sumMask); + return res; +} + +// Conditionally multiply the packed single-precision (32-bit) floating-point +// elements in a and b using the high 4 bits in imm8, sum the four products, +// and conditionally store the sum in dst using the low 4 bits of imm. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_ps +FORCE_INLINE __m128 _mm_dp_ps(__m128 a, __m128 b, const int imm) +{ +#if defined(__aarch64__) + /* shortcuts */ + if (imm == 0xFF) { + return _mm_set1_ps(vaddvq_f32(_mm_mul_ps(a, b))); + } + if (imm == 0x7F) { + float32x4_t m = _mm_mul_ps(a, b); + m[3] = 0; + return _mm_set1_ps(vaddvq_f32(m)); + } +#endif + + float s = 0, c = 0; + float32x4_t f32a = vreinterpretq_f32_m128(a); + float32x4_t f32b = vreinterpretq_f32_m128(b); + + /* To improve the accuracy of floating-point summation, Kahan algorithm + * is used for each operation. + */ + if (imm & (1 << 4)) + _sse2neon_kadd_f32(&s, &c, f32a[0] * f32b[0]); + if (imm & (1 << 5)) + _sse2neon_kadd_f32(&s, &c, f32a[1] * f32b[1]); + if (imm & (1 << 6)) + _sse2neon_kadd_f32(&s, &c, f32a[2] * f32b[2]); + if (imm & (1 << 7)) + _sse2neon_kadd_f32(&s, &c, f32a[3] * f32b[3]); + s += c; + + float32x4_t res = { + (imm & 0x1) ? s : 0, + (imm & 0x2) ? s : 0, + (imm & 0x4) ? s : 0, + (imm & 0x8) ? s : 0, + }; + return vreinterpretq_m128_f32(res); +} + +// Extracts the selected signed or unsigned 32-bit integer from a and zero +// extends. +// FORCE_INLINE int _mm_extract_epi32(__m128i a, __constrange(0,4) int imm) +#define _mm_extract_epi32(a, imm) \ + vgetq_lane_s32(vreinterpretq_s32_m128i(a), (imm)) + +// Extracts the selected signed or unsigned 64-bit integer from a and zero +// extends. +// FORCE_INLINE __int64 _mm_extract_epi64(__m128i a, __constrange(0,2) int imm) +#define _mm_extract_epi64(a, imm) \ + vgetq_lane_s64(vreinterpretq_s64_m128i(a), (imm)) + +// Extracts the selected signed or unsigned 8-bit integer from a and zero +// extends. +// FORCE_INLINE int _mm_extract_epi8(__m128i a, __constrange(0,16) int imm) +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi8 +#define _mm_extract_epi8(a, imm) vgetq_lane_u8(vreinterpretq_u8_m128i(a), (imm)) + +// Extracts the selected single-precision (32-bit) floating-point from a. +// FORCE_INLINE int _mm_extract_ps(__m128 a, __constrange(0,4) int imm) +#define _mm_extract_ps(a, imm) vgetq_lane_s32(vreinterpretq_s32_m128(a), (imm)) + +// Round the packed double-precision (64-bit) floating-point elements in a down +// to an integer value, and store the results as packed double-precision +// floating-point elements in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_pd +FORCE_INLINE __m128d _mm_floor_pd(__m128d a) +{ +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vrndmq_f64(vreinterpretq_f64_m128d(a))); +#else + double *f = (double *) &a; + return _mm_set_pd(floor(f[1]), floor(f[0])); +#endif +} + +// Round the packed single-precision (32-bit) floating-point elements in a down +// to an integer value, and store the results as packed single-precision +// floating-point elements in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ps +FORCE_INLINE __m128 _mm_floor_ps(__m128 a) +{ +#if defined(__aarch64__) || defined(__ARM_FEATURE_DIRECTED_ROUNDING) + return vreinterpretq_m128_f32(vrndmq_f32(vreinterpretq_f32_m128(a))); +#else + float *f = (float *) &a; + return _mm_set_ps(floorf(f[3]), floorf(f[2]), floorf(f[1]), floorf(f[0])); +#endif +} + +// Round the lower double-precision (64-bit) floating-point element in b down to +// an integer value, store the result as a double-precision floating-point +// element in the lower element of dst, and copy the upper element from a to the +// upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_sd +FORCE_INLINE __m128d _mm_floor_sd(__m128d a, __m128d b) +{ + return _mm_move_sd(a, _mm_floor_pd(b)); +} + +// Round the lower single-precision (32-bit) floating-point element in b down to +// an integer value, store the result as a single-precision floating-point +// element in the lower element of dst, and copy the upper 3 packed elements +// from a to the upper elements of dst. +// +// dst[31:0] := FLOOR(b[31:0]) +// dst[127:32] := a[127:32] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ss +FORCE_INLINE __m128 _mm_floor_ss(__m128 a, __m128 b) +{ + return _mm_move_ss(a, _mm_floor_ps(b)); +} + +// Inserts the least significant 32 bits of b into the selected 32-bit integer +// of a. +// FORCE_INLINE __m128i _mm_insert_epi32(__m128i a, int b, +// __constrange(0,4) int imm) +#define _mm_insert_epi32(a, b, imm) \ + __extension__({ \ + vreinterpretq_m128i_s32( \ + vsetq_lane_s32((b), vreinterpretq_s32_m128i(a), (imm))); \ + }) + +// Inserts the least significant 64 bits of b into the selected 64-bit integer +// of a. +// FORCE_INLINE __m128i _mm_insert_epi64(__m128i a, __int64 b, +// __constrange(0,2) int imm) +#define _mm_insert_epi64(a, b, imm) \ + __extension__({ \ + vreinterpretq_m128i_s64( \ + vsetq_lane_s64((b), vreinterpretq_s64_m128i(a), (imm))); \ + }) + +// Inserts the least significant 8 bits of b into the selected 8-bit integer +// of a. +// FORCE_INLINE __m128i _mm_insert_epi8(__m128i a, int b, +// __constrange(0,16) int imm) +#define _mm_insert_epi8(a, b, imm) \ + __extension__({ \ + vreinterpretq_m128i_s8( \ + vsetq_lane_s8((b), vreinterpretq_s8_m128i(a), (imm))); \ + }) + +// Copy a to tmp, then insert a single-precision (32-bit) floating-point +// element from b into tmp using the control in imm8. Store tmp to dst using +// the mask in imm8 (elements are zeroed out when the corresponding bit is set). +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=insert_ps +#define _mm_insert_ps(a, b, imm8) \ + __extension__({ \ + float32x4_t tmp1 = \ + vsetq_lane_f32(vgetq_lane_f32(b, (imm8 >> 6) & 0x3), \ + vreinterpretq_f32_m128(a), 0); \ + float32x4_t tmp2 = \ + vsetq_lane_f32(vgetq_lane_f32(tmp1, 0), vreinterpretq_f32_m128(a), \ + ((imm8 >> 4) & 0x3)); \ + const uint32_t data[4] = {((imm8) & (1 << 0)) ? UINT32_MAX : 0, \ + ((imm8) & (1 << 1)) ? UINT32_MAX : 0, \ + ((imm8) & (1 << 2)) ? UINT32_MAX : 0, \ + ((imm8) & (1 << 3)) ? UINT32_MAX : 0}; \ + uint32x4_t mask = vld1q_u32(data); \ + float32x4_t all_zeros = vdupq_n_f32(0); \ + \ + vreinterpretq_m128_f32( \ + vbslq_f32(mask, all_zeros, vreinterpretq_f32_m128(tmp2))); \ + }) + +// epi versions of min/max +// Computes the pariwise maximums of the four signed 32-bit integer values of a +// and b. +// +// A 128-bit parameter that can be defined with the following equations: +// r0 := (a0 > b0) ? a0 : b0 +// r1 := (a1 > b1) ? a1 : b1 +// r2 := (a2 > b2) ? a2 : b2 +// r3 := (a3 > b3) ? a3 : b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/bb514055(v=vs.100).aspx +FORCE_INLINE __m128i _mm_max_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + vmaxq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Compare packed signed 8-bit integers in a and b, and store packed maximum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi8 +FORCE_INLINE __m128i _mm_max_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s8( + vmaxq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Compare packed unsigned 16-bit integers in a and b, and store packed maximum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu16 +FORCE_INLINE __m128i _mm_max_epu16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u16( + vmaxq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b))); +} + +// Compare packed unsigned 32-bit integers in a and b, and store packed maximum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32 +FORCE_INLINE __m128i _mm_max_epu32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u32( + vmaxq_u32(vreinterpretq_u32_m128i(a), vreinterpretq_u32_m128i(b))); +} + +// Computes the pariwise minima of the four signed 32-bit integer values of a +// and b. +// +// A 128-bit parameter that can be defined with the following equations: +// r0 := (a0 < b0) ? a0 : b0 +// r1 := (a1 < b1) ? a1 : b1 +// r2 := (a2 < b2) ? a2 : b2 +// r3 := (a3 < b3) ? a3 : b3 +// +// https://msdn.microsoft.com/en-us/library/vstudio/bb531476(v=vs.100).aspx +FORCE_INLINE __m128i _mm_min_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + vminq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Compare packed signed 8-bit integers in a and b, and store packed minimum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi8 +FORCE_INLINE __m128i _mm_min_epi8(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s8( + vminq_s8(vreinterpretq_s8_m128i(a), vreinterpretq_s8_m128i(b))); +} + +// Compare packed unsigned 16-bit integers in a and b, and store packed minimum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu16 +FORCE_INLINE __m128i _mm_min_epu16(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u16( + vminq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b))); +} + +// Compare packed unsigned 32-bit integers in a and b, and store packed minimum +// values in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32 +FORCE_INLINE __m128i _mm_min_epu32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u32( + vminq_u32(vreinterpretq_u32_m128i(a), vreinterpretq_u32_m128i(b))); +} + +// Horizontally compute the minimum amongst the packed unsigned 16-bit integers +// in a, store the minimum and index in dst, and zero the remaining bits in dst. +// +// index[2:0] := 0 +// min[15:0] := a[15:0] +// FOR j := 0 to 7 +// i := j*16 +// IF a[i+15:i] < min[15:0] +// index[2:0] := j +// min[15:0] := a[i+15:i] +// FI +// ENDFOR +// dst[15:0] := min[15:0] +// dst[18:16] := index[2:0] +// dst[127:19] := 0 +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_minpos_epu16 +FORCE_INLINE __m128i _mm_minpos_epu16(__m128i a) +{ + __m128i dst; + uint16_t min, idx = 0; + // Find the minimum value +#if defined(__aarch64__) + min = vminvq_u16(vreinterpretq_u16_m128i(a)); +#else + __m64 tmp; + tmp = vreinterpret_m64_u16( + vmin_u16(vget_low_u16(vreinterpretq_u16_m128i(a)), + vget_high_u16(vreinterpretq_u16_m128i(a)))); + tmp = vreinterpret_m64_u16( + vpmin_u16(vreinterpret_u16_m64(tmp), vreinterpret_u16_m64(tmp))); + tmp = vreinterpret_m64_u16( + vpmin_u16(vreinterpret_u16_m64(tmp), vreinterpret_u16_m64(tmp))); + min = vget_lane_u16(vreinterpret_u16_m64(tmp), 0); +#endif + // Get the index of the minimum value + int i; + for (i = 0; i < 8; i++) { + if (min == vgetq_lane_u16(vreinterpretq_u16_m128i(a), 0)) { + idx = (uint16_t) i; + break; + } + a = _mm_srli_si128(a, 2); + } + // Generate result + dst = _mm_setzero_si128(); + dst = vreinterpretq_m128i_u16( + vsetq_lane_u16(min, vreinterpretq_u16_m128i(dst), 0)); + dst = vreinterpretq_m128i_u16( + vsetq_lane_u16(idx, vreinterpretq_u16_m128i(dst), 1)); + return dst; +} + +// Compute the sum of absolute differences (SADs) of quadruplets of unsigned +// 8-bit integers in a compared to those in b, and store the 16-bit results in +// dst. Eight SADs are performed using one quadruplet from b and eight +// quadruplets from a. One quadruplet is selected from b starting at on the +// offset specified in imm8. Eight quadruplets are formed from sequential 8-bit +// integers selected from a starting at the offset specified in imm8. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mpsadbw_epu8 +FORCE_INLINE __m128i _mm_mpsadbw_epu8(__m128i a, __m128i b, const int imm) +{ + uint8x16_t _a, _b; + + switch (imm & 0x4) { + case 0: + // do nothing + _a = vreinterpretq_u8_m128i(a); + break; + case 4: + _a = vreinterpretq_u8_u32(vextq_u32(vreinterpretq_u32_m128i(a), + vreinterpretq_u32_m128i(a), 1)); + break; + default: +#if defined(__GNUC__) || defined(__clang__) + __builtin_unreachable(); +#endif + break; + } + + switch (imm & 0x3) { + case 0: + _b = vreinterpretq_u8_u32( + vdupq_n_u32(vgetq_lane_u32(vreinterpretq_u32_m128i(b), 0))); + break; + case 1: + _b = vreinterpretq_u8_u32( + vdupq_n_u32(vgetq_lane_u32(vreinterpretq_u32_m128i(b), 1))); + break; + case 2: + _b = vreinterpretq_u8_u32( + vdupq_n_u32(vgetq_lane_u32(vreinterpretq_u32_m128i(b), 2))); + break; + case 3: + _b = vreinterpretq_u8_u32( + vdupq_n_u32(vgetq_lane_u32(vreinterpretq_u32_m128i(b), 3))); + break; + default: +#if defined(__GNUC__) || defined(__clang__) + __builtin_unreachable(); +#endif + break; + } + + int16x8_t c04, c15, c26, c37; + uint8x8_t low_b = vget_low_u8(_b); + c04 = vabsq_s16(vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(_a), low_b))); + _a = vextq_u8(_a, _a, 1); + c15 = vabsq_s16(vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(_a), low_b))); + _a = vextq_u8(_a, _a, 1); + c26 = vabsq_s16(vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(_a), low_b))); + _a = vextq_u8(_a, _a, 1); + c37 = vabsq_s16(vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(_a), low_b))); +#if defined(__aarch64__) + // |0|4|2|6| + c04 = vpaddq_s16(c04, c26); + // |1|5|3|7| + c15 = vpaddq_s16(c15, c37); + + int32x4_t trn1_c = + vtrn1q_s32(vreinterpretq_s32_s16(c04), vreinterpretq_s32_s16(c15)); + int32x4_t trn2_c = + vtrn2q_s32(vreinterpretq_s32_s16(c04), vreinterpretq_s32_s16(c15)); + return vreinterpretq_m128i_s16(vpaddq_s16(vreinterpretq_s16_s32(trn1_c), + vreinterpretq_s16_s32(trn2_c))); +#else + int16x4_t c01, c23, c45, c67; + c01 = vpadd_s16(vget_low_s16(c04), vget_low_s16(c15)); + c23 = vpadd_s16(vget_low_s16(c26), vget_low_s16(c37)); + c45 = vpadd_s16(vget_high_s16(c04), vget_high_s16(c15)); + c67 = vpadd_s16(vget_high_s16(c26), vget_high_s16(c37)); + + return vreinterpretq_m128i_s16( + vcombine_s16(vpadd_s16(c01, c23), vpadd_s16(c45, c67))); +#endif +} + +// Multiply the low signed 32-bit integers from each packed 64-bit element in +// a and b, and store the signed 64-bit results in dst. +// +// r0 := (int64_t)(int32_t)a0 * (int64_t)(int32_t)b0 +// r1 := (int64_t)(int32_t)a2 * (int64_t)(int32_t)b2 +FORCE_INLINE __m128i _mm_mul_epi32(__m128i a, __m128i b) +{ + // vmull_s32 upcasts instead of masking, so we downcast. + int32x2_t a_lo = vmovn_s64(vreinterpretq_s64_m128i(a)); + int32x2_t b_lo = vmovn_s64(vreinterpretq_s64_m128i(b)); + return vreinterpretq_m128i_s64(vmull_s32(a_lo, b_lo)); +} + +// Multiplies the 4 signed or unsigned 32-bit integers from a by the 4 signed or +// unsigned 32-bit integers from b. +// https://msdn.microsoft.com/en-us/library/vstudio/bb531409(v=vs.100).aspx +FORCE_INLINE __m128i _mm_mullo_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_s32( + vmulq_s32(vreinterpretq_s32_m128i(a), vreinterpretq_s32_m128i(b))); +} + +// Packs the 8 unsigned 32-bit integers from a and b into unsigned 16-bit +// integers and saturates. +// +// r0 := UnsignedSaturate(a0) +// r1 := UnsignedSaturate(a1) +// r2 := UnsignedSaturate(a2) +// r3 := UnsignedSaturate(a3) +// r4 := UnsignedSaturate(b0) +// r5 := UnsignedSaturate(b1) +// r6 := UnsignedSaturate(b2) +// r7 := UnsignedSaturate(b3) +FORCE_INLINE __m128i _mm_packus_epi32(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u16( + vcombine_u16(vqmovun_s32(vreinterpretq_s32_m128i(a)), + vqmovun_s32(vreinterpretq_s32_m128i(b)))); +} + +// Round the packed double-precision (64-bit) floating-point elements in a using +// the rounding parameter, and store the results as packed double-precision +// floating-point elements in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_pd +FORCE_INLINE __m128d _mm_round_pd(__m128d a, int rounding) +{ +#if defined(__aarch64__) + switch (rounding) { + case (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC): + return vreinterpretq_m128d_f64(vrndnq_f64(vreinterpretq_f64_m128d(a))); + case (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC): + return _mm_floor_pd(a); + case (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC): + return _mm_ceil_pd(a); + case (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC): + return vreinterpretq_m128d_f64(vrndq_f64(vreinterpretq_f64_m128d(a))); + default: //_MM_FROUND_CUR_DIRECTION + return vreinterpretq_m128d_f64(vrndiq_f64(vreinterpretq_f64_m128d(a))); + } +#else + double *v_double = (double *) &a; + + if (rounding == (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) || + (rounding == _MM_FROUND_CUR_DIRECTION && + _MM_GET_ROUNDING_MODE() == _MM_ROUND_NEAREST)) { + double res[2], tmp; + for (int i = 0; i < 2; i++) { + tmp = (v_double[i] < 0) ? -v_double[i] : v_double[i]; + double roundDown = floor(tmp); // Round down value + double roundUp = ceil(tmp); // Round up value + double diffDown = tmp - roundDown; + double diffUp = roundUp - tmp; + if (diffDown < diffUp) { + /* If it's closer to the round down value, then use it */ + res[i] = roundDown; + } else if (diffDown > diffUp) { + /* If it's closer to the round up value, then use it */ + res[i] = roundUp; + } else { + /* If it's equidistant between round up and round down value, + * pick the one which is an even number */ + double half = roundDown / 2; + if (half != floor(half)) { + /* If the round down value is odd, return the round up value + */ + res[i] = roundUp; + } else { + /* If the round up value is odd, return the round down value + */ + res[i] = roundDown; + } + } + res[i] = (v_double[i] < 0) ? -res[i] : res[i]; + } + return _mm_set_pd(res[1], res[0]); + } else if (rounding == (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) || + (rounding == _MM_FROUND_CUR_DIRECTION && + _MM_GET_ROUNDING_MODE() == _MM_ROUND_DOWN)) { + return _mm_floor_pd(a); + } else if (rounding == (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) || + (rounding == _MM_FROUND_CUR_DIRECTION && + _MM_GET_ROUNDING_MODE() == _MM_ROUND_UP)) { + return _mm_ceil_pd(a); + } + return _mm_set_pd(v_double[1] > 0 ? floor(v_double[1]) : ceil(v_double[1]), + v_double[0] > 0 ? floor(v_double[0]) : ceil(v_double[0])); +#endif +} + +// Round the packed single-precision (32-bit) floating-point elements in a using +// the rounding parameter, and store the results as packed single-precision +// floating-point elements in dst. +// software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ps +FORCE_INLINE __m128 _mm_round_ps(__m128 a, int rounding) +{ +#if defined(__aarch64__) || defined(__ARM_FEATURE_DIRECTED_ROUNDING) + switch (rounding) { + case (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC): + return vreinterpretq_m128_f32(vrndnq_f32(vreinterpretq_f32_m128(a))); + case (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC): + return _mm_floor_ps(a); + case (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC): + return _mm_ceil_ps(a); + case (_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC): + return vreinterpretq_m128_f32(vrndq_f32(vreinterpretq_f32_m128(a))); + default: //_MM_FROUND_CUR_DIRECTION + return vreinterpretq_m128_f32(vrndiq_f32(vreinterpretq_f32_m128(a))); + } +#else + float *v_float = (float *) &a; + + if (rounding == (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC) || + (rounding == _MM_FROUND_CUR_DIRECTION && + _MM_GET_ROUNDING_MODE() == _MM_ROUND_NEAREST)) { + uint32x4_t signmask = vdupq_n_u32(0x80000000); + float32x4_t half = vbslq_f32(signmask, vreinterpretq_f32_m128(a), + vdupq_n_f32(0.5f)); /* +/- 0.5 */ + int32x4_t r_normal = vcvtq_s32_f32(vaddq_f32( + vreinterpretq_f32_m128(a), half)); /* round to integer: [a + 0.5]*/ + int32x4_t r_trunc = vcvtq_s32_f32( + vreinterpretq_f32_m128(a)); /* truncate to integer: [a] */ + int32x4_t plusone = vreinterpretq_s32_u32(vshrq_n_u32( + vreinterpretq_u32_s32(vnegq_s32(r_trunc)), 31)); /* 1 or 0 */ + int32x4_t r_even = vbicq_s32(vaddq_s32(r_trunc, plusone), + vdupq_n_s32(1)); /* ([a] + {0,1}) & ~1 */ + float32x4_t delta = vsubq_f32( + vreinterpretq_f32_m128(a), + vcvtq_f32_s32(r_trunc)); /* compute delta: delta = (a - [a]) */ + uint32x4_t is_delta_half = + vceqq_f32(delta, half); /* delta == +/- 0.5 */ + return vreinterpretq_m128_f32( + vcvtq_f32_s32(vbslq_s32(is_delta_half, r_even, r_normal))); + } else if (rounding == (_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC) || + (rounding == _MM_FROUND_CUR_DIRECTION && + _MM_GET_ROUNDING_MODE() == _MM_ROUND_DOWN)) { + return _mm_floor_ps(a); + } else if (rounding == (_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC) || + (rounding == _MM_FROUND_CUR_DIRECTION && + _MM_GET_ROUNDING_MODE() == _MM_ROUND_UP)) { + return _mm_ceil_ps(a); + } + return _mm_set_ps(v_float[3] > 0 ? floorf(v_float[3]) : ceilf(v_float[3]), + v_float[2] > 0 ? floorf(v_float[2]) : ceilf(v_float[2]), + v_float[1] > 0 ? floorf(v_float[1]) : ceilf(v_float[1]), + v_float[0] > 0 ? floorf(v_float[0]) : ceilf(v_float[0])); +#endif +} + +// Round the lower double-precision (64-bit) floating-point element in b using +// the rounding parameter, store the result as a double-precision floating-point +// element in the lower element of dst, and copy the upper element from a to the +// upper element of dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_sd +FORCE_INLINE __m128d _mm_round_sd(__m128d a, __m128d b, int rounding) +{ + return _mm_move_sd(a, _mm_round_pd(b, rounding)); +} + +// Round the lower single-precision (32-bit) floating-point element in b using +// the rounding parameter, store the result as a single-precision floating-point +// element in the lower element of dst, and copy the upper 3 packed elements +// from a to the upper elements of dst. Rounding is done according to the +// rounding[3:0] parameter, which can be one of: +// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and +// suppress exceptions +// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and +// suppress exceptions +// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress +// exceptions +// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress +// exceptions _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see +// _MM_SET_ROUNDING_MODE +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ss +FORCE_INLINE __m128 _mm_round_ss(__m128 a, __m128 b, int rounding) +{ + return _mm_move_ss(a, _mm_round_ps(b, rounding)); +} + +// Load 128-bits of integer data from memory into dst using a non-temporal +// memory hint. mem_addr must be aligned on a 16-byte boundary or a +// general-protection exception may be generated. +// +// dst[127:0] := MEM[mem_addr+127:mem_addr] +// +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_load_si128 +FORCE_INLINE __m128i _mm_stream_load_si128(__m128i *p) +{ +#if __has_builtin(__builtin_nontemporal_store) + return __builtin_nontemporal_load(p); +#else + return vreinterpretq_m128i_s64(vld1q_s64((int64_t *) p)); +#endif +} + +// Compute the bitwise NOT of a and then AND with a 128-bit vector containing +// all 1's, and return 1 if the result is zero, otherwise return 0. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_ones +FORCE_INLINE int _mm_test_all_ones(__m128i a) +{ + return (uint64_t) (vgetq_lane_s64(a, 0) & vgetq_lane_s64(a, 1)) == + ~(uint64_t) 0; +} + +// Compute the bitwise AND of 128 bits (representing integer data) in a and +// mask, and return 1 if the result is zero, otherwise return 0. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_zeros +FORCE_INLINE int _mm_test_all_zeros(__m128i a, __m128i mask) +{ + int64x2_t a_and_mask = + vandq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(mask)); + return !(vgetq_lane_s64(a_and_mask, 0) | vgetq_lane_s64(a_and_mask, 1)); +} + +// Compute the bitwise AND of 128 bits (representing integer data) in a and +// mask, and set ZF to 1 if the result is zero, otherwise set ZF to 0. Compute +// the bitwise NOT of a and then AND with mask, and set CF to 1 if the result is +// zero, otherwise set CF to 0. Return 1 if both the ZF and CF values are zero, +// otherwise return 0. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_test_mix_ones_zero +FORCE_INLINE int _mm_test_mix_ones_zeros(__m128i a, __m128i mask) +{ + uint64x2_t zf = + vandq_u64(vreinterpretq_u64_m128i(mask), vreinterpretq_u64_m128i(a)); + uint64x2_t cf = + vbicq_u64(vreinterpretq_u64_m128i(mask), vreinterpretq_u64_m128i(a)); + uint64x2_t result = vandq_u64(zf, cf); + return !(vgetq_lane_u64(result, 0) | vgetq_lane_u64(result, 1)); +} + +// Compute the bitwise AND of 128 bits (representing integer data) in a and b, +// and set ZF to 1 if the result is zero, otherwise set ZF to 0. Compute the +// bitwise NOT of a and then AND with b, and set CF to 1 if the result is zero, +// otherwise set CF to 0. Return the CF value. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testc_si128 +FORCE_INLINE int _mm_testc_si128(__m128i a, __m128i b) +{ + int64x2_t s64 = + vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_m128i(a))), + vreinterpretq_s64_m128i(b)); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); +} + +// Compute the bitwise AND of 128 bits (representing integer data) in a and b, +// and set ZF to 1 if the result is zero, otherwise set ZF to 0. Compute the +// bitwise NOT of a and then AND with b, and set CF to 1 if the result is zero, +// otherwise set CF to 0. Return 1 if both the ZF and CF values are zero, +// otherwise return 0. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testnzc_si128 +#define _mm_testnzc_si128(a, b) _mm_test_mix_ones_zeros(a, b) + +// Compute the bitwise AND of 128 bits (representing integer data) in a and b, +// and set ZF to 1 if the result is zero, otherwise set ZF to 0. Compute the +// bitwise NOT of a and then AND with b, and set CF to 1 if the result is zero, +// otherwise set CF to 0. Return the ZF value. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testz_si128 +FORCE_INLINE int _mm_testz_si128(__m128i a, __m128i b) +{ + int64x2_t s64 = + vandq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b)); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); +} + +/* SSE4.2 */ + +// Compares the 2 signed 64-bit integers in a and the 2 signed 64-bit integers +// in b for greater than. +FORCE_INLINE __m128i _mm_cmpgt_epi64(__m128i a, __m128i b) +{ +#if defined(__aarch64__) + return vreinterpretq_m128i_u64( + vcgtq_s64(vreinterpretq_s64_m128i(a), vreinterpretq_s64_m128i(b))); +#else + return vreinterpretq_m128i_s64(vshrq_n_s64( + vqsubq_s64(vreinterpretq_s64_m128i(b), vreinterpretq_s64_m128i(a)), + 63)); +#endif +} + +// Starting with the initial value in crc, accumulates a CRC32 value for +// unsigned 16-bit integer v. +// https://msdn.microsoft.com/en-us/library/bb531411(v=vs.100) +FORCE_INLINE uint32_t _mm_crc32_u16(uint32_t crc, uint16_t v) +{ +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) + __asm__ __volatile__("crc32ch %w[c], %w[c], %w[v]\n\t" + : [c] "+r"(crc) + : [v] "r"(v)); +#elif (__ARM_ARCH == 8) && defined(__ARM_FEATURE_CRC32) + crc = __crc32ch(crc, v); +#else + crc = _mm_crc32_u8(crc, v & 0xff); + crc = _mm_crc32_u8(crc, (v >> 8) & 0xff); +#endif + return crc; +} + +// Starting with the initial value in crc, accumulates a CRC32 value for +// unsigned 32-bit integer v. +// https://msdn.microsoft.com/en-us/library/bb531394(v=vs.100) +FORCE_INLINE uint32_t _mm_crc32_u32(uint32_t crc, uint32_t v) +{ +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) + __asm__ __volatile__("crc32cw %w[c], %w[c], %w[v]\n\t" + : [c] "+r"(crc) + : [v] "r"(v)); +#elif (__ARM_ARCH == 8) && defined(__ARM_FEATURE_CRC32) + crc = __crc32cw(crc, v); +#else + crc = _mm_crc32_u16(crc, v & 0xffff); + crc = _mm_crc32_u16(crc, (v >> 16) & 0xffff); +#endif + return crc; +} + +// Starting with the initial value in crc, accumulates a CRC32 value for +// unsigned 64-bit integer v. +// https://msdn.microsoft.com/en-us/library/bb514033(v=vs.100) +FORCE_INLINE uint64_t _mm_crc32_u64(uint64_t crc, uint64_t v) +{ +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) + __asm__ __volatile__("crc32cx %w[c], %w[c], %x[v]\n\t" + : [c] "+r"(crc) + : [v] "r"(v)); +#else + crc = _mm_crc32_u32((uint32_t) (crc), v & 0xffffffff); + crc = _mm_crc32_u32((uint32_t) (crc), (v >> 32) & 0xffffffff); +#endif + return crc; +} + +// Starting with the initial value in crc, accumulates a CRC32 value for +// unsigned 8-bit integer v. +// https://msdn.microsoft.com/en-us/library/bb514036(v=vs.100) +FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v) +{ +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) + __asm__ __volatile__("crc32cb %w[c], %w[c], %w[v]\n\t" + : [c] "+r"(crc) + : [v] "r"(v)); +#elif (__ARM_ARCH == 8) && defined(__ARM_FEATURE_CRC32) + crc = __crc32cb(crc, v); +#else + crc ^= v; + for (int bit = 0; bit < 8; bit++) { + if (crc & 1) + crc = (crc >> 1) ^ UINT32_C(0x82f63b78); + else + crc = (crc >> 1); + } +#endif + return crc; +} + +/* AES */ + +#if !defined(__ARM_FEATURE_CRYPTO) +/* clang-format off */ +#define SSE2NEON_AES_DATA(w) \ + { \ + w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), \ + w(0xc5), w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), \ + w(0xab), w(0x76), w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), \ + w(0x59), w(0x47), w(0xf0), w(0xad), w(0xd4), w(0xa2), w(0xaf), \ + w(0x9c), w(0xa4), w(0x72), w(0xc0), w(0xb7), w(0xfd), w(0x93), \ + w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc), w(0x34), w(0xa5), \ + w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15), w(0x04), \ + w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a), \ + w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), \ + w(0x75), w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), \ + w(0x5a), w(0xa0), w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), \ + w(0xe3), w(0x2f), w(0x84), w(0x53), w(0xd1), w(0x00), w(0xed), \ + w(0x20), w(0xfc), w(0xb1), w(0x5b), w(0x6a), w(0xcb), w(0xbe), \ + w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf), w(0xd0), w(0xef), \ + w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85), w(0x45), \ + w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8), \ + w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), \ + w(0xf5), w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), \ + w(0xf3), w(0xd2), w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), \ + w(0x97), w(0x44), w(0x17), w(0xc4), w(0xa7), w(0x7e), w(0x3d), \ + w(0x64), w(0x5d), w(0x19), w(0x73), w(0x60), w(0x81), w(0x4f), \ + w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88), w(0x46), w(0xee), \ + w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb), w(0xe0), \ + w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c), \ + w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), \ + w(0x79), w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), \ + w(0x4e), w(0xa9), w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), \ + w(0x7a), w(0xae), w(0x08), w(0xba), w(0x78), w(0x25), w(0x2e), \ + w(0x1c), w(0xa6), w(0xb4), w(0xc6), w(0xe8), w(0xdd), w(0x74), \ + w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a), w(0x70), w(0x3e), \ + w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e), w(0x61), \ + w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e), \ + w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), \ + w(0x94), w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), \ + w(0x28), w(0xdf), w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), \ + w(0xe6), w(0x42), w(0x68), w(0x41), w(0x99), w(0x2d), w(0x0f), \ + w(0xb0), w(0x54), w(0xbb), w(0x16) \ + } +/* clang-format on */ + +/* X Macro trick. See https://en.wikipedia.org/wiki/X_Macro */ +#define SSE2NEON_AES_H0(x) (x) +static const uint8_t SSE2NEON_sbox[256] = SSE2NEON_AES_DATA(SSE2NEON_AES_H0); +#undef SSE2NEON_AES_H0 + +// In the absence of crypto extensions, implement aesenc using regular neon +// intrinsics instead. See: +// https://www.workofard.com/2017/01/accelerated-aes-for-the-arm64-linux-kernel/ +// https://www.workofard.com/2017/07/ghash-for-low-end-cores/ and +// https://github.com/ColinIanKing/linux-next-mirror/blob/b5f466091e130caaf0735976648f72bd5e09aa84/crypto/aegis128-neon-inner.c#L52 +// for more information Reproduced with permission of the author. +FORCE_INLINE __m128i _mm_aesenc_si128(__m128i EncBlock, __m128i RoundKey) +{ +#if defined(__aarch64__) + static const uint8_t shift_rows[] = {0x0, 0x5, 0xa, 0xf, 0x4, 0x9, + 0xe, 0x3, 0x8, 0xd, 0x2, 0x7, + 0xc, 0x1, 0x6, 0xb}; + static const uint8_t ror32by8[] = {0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4, + 0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc}; + + uint8x16_t v; + uint8x16_t w = vreinterpretq_u8_m128i(EncBlock); + + // shift rows + w = vqtbl1q_u8(w, vld1q_u8(shift_rows)); + + // sub bytes + v = vqtbl4q_u8(_sse2neon_vld1q_u8_x4(SSE2NEON_sbox), w); + v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_sbox + 0x40), w - 0x40); + v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_sbox + 0x80), w - 0x80); + v = vqtbx4q_u8(v, _sse2neon_vld1q_u8_x4(SSE2NEON_sbox + 0xc0), w - 0xc0); + + // mix columns + w = (v << 1) ^ (uint8x16_t) (((int8x16_t) v >> 7) & 0x1b); + w ^= (uint8x16_t) vrev32q_u16((uint16x8_t) v); + w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8)); + + // add round key + return vreinterpretq_m128i_u8(w) ^ RoundKey; + +#else /* ARMv7-A NEON implementation */ +#define SSE2NEON_AES_B2W(b0, b1, b2, b3) \ + (((uint32_t) (b3) << 24) | ((uint32_t) (b2) << 16) | \ + ((uint32_t) (b1) << 8) | (uint32_t) (b0)) +#define SSE2NEON_AES_F2(x) ((x << 1) ^ (((x >> 7) & 1) * 0x011b /* WPOLY */)) +#define SSE2NEON_AES_F3(x) (SSE2NEON_AES_F2(x) ^ x) +#define SSE2NEON_AES_U0(p) \ + SSE2NEON_AES_B2W(SSE2NEON_AES_F2(p), p, p, SSE2NEON_AES_F3(p)) +#define SSE2NEON_AES_U1(p) \ + SSE2NEON_AES_B2W(SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p), p, p) +#define SSE2NEON_AES_U2(p) \ + SSE2NEON_AES_B2W(p, SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p), p) +#define SSE2NEON_AES_U3(p) \ + SSE2NEON_AES_B2W(p, p, SSE2NEON_AES_F3(p), SSE2NEON_AES_F2(p)) + static const uint32_t ALIGN_STRUCT(16) aes_table[4][256] = { + SSE2NEON_AES_DATA(SSE2NEON_AES_U0), + SSE2NEON_AES_DATA(SSE2NEON_AES_U1), + SSE2NEON_AES_DATA(SSE2NEON_AES_U2), + SSE2NEON_AES_DATA(SSE2NEON_AES_U3), + }; +#undef SSE2NEON_AES_B2W +#undef SSE2NEON_AES_F2 +#undef SSE2NEON_AES_F3 +#undef SSE2NEON_AES_U0 +#undef SSE2NEON_AES_U1 +#undef SSE2NEON_AES_U2 +#undef SSE2NEON_AES_U3 + + uint32_t x0 = _mm_cvtsi128_si32(EncBlock); + uint32_t x1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(EncBlock, 0x55)); + uint32_t x2 = _mm_cvtsi128_si32(_mm_shuffle_epi32(EncBlock, 0xAA)); + uint32_t x3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(EncBlock, 0xFF)); + + __m128i out = _mm_set_epi32( + (aes_table[0][x3 & 0xff] ^ aes_table[1][(x0 >> 8) & 0xff] ^ + aes_table[2][(x1 >> 16) & 0xff] ^ aes_table[3][x2 >> 24]), + (aes_table[0][x2 & 0xff] ^ aes_table[1][(x3 >> 8) & 0xff] ^ + aes_table[2][(x0 >> 16) & 0xff] ^ aes_table[3][x1 >> 24]), + (aes_table[0][x1 & 0xff] ^ aes_table[1][(x2 >> 8) & 0xff] ^ + aes_table[2][(x3 >> 16) & 0xff] ^ aes_table[3][x0 >> 24]), + (aes_table[0][x0 & 0xff] ^ aes_table[1][(x1 >> 8) & 0xff] ^ + aes_table[2][(x2 >> 16) & 0xff] ^ aes_table[3][x3 >> 24])); + + return _mm_xor_si128(out, RoundKey); +#endif +} + +// Perform the last round of an AES encryption flow on data (state) in a using +// the round key in RoundKey, and store the result in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenclast_si128 +FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey) +{ + /* FIXME: optimized for NEON */ + uint8_t v[4][4] = { + {SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 0)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 5)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 10)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 15)]}, + {SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 4)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 9)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 14)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 3)]}, + {SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 8)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 13)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 2)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 7)]}, + {SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 12)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 1)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 6)], + SSE2NEON_sbox[vreinterpretq_nth_u8_m128i(a, 11)]}, + }; + for (int i = 0; i < 16; i++) + vreinterpretq_nth_u8_m128i(a, i) = + v[i / 4][i % 4] ^ vreinterpretq_nth_u8_m128i(RoundKey, i); + return a; +} + +// Emits the Advanced Encryption Standard (AES) instruction aeskeygenassist. +// This instruction generates a round key for AES encryption. See +// https://kazakov.life/2017/11/01/cryptocurrency-mining-on-ios-devices/ +// for details. +// +// https://msdn.microsoft.com/en-us/library/cc714138(v=vs.120).aspx +FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i key, const int rcon) +{ + uint32_t X1 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0x55)); + uint32_t X3 = _mm_cvtsi128_si32(_mm_shuffle_epi32(key, 0xFF)); + for (int i = 0; i < 4; ++i) { + ((uint8_t *) &X1)[i] = SSE2NEON_sbox[((uint8_t *) &X1)[i]]; + ((uint8_t *) &X3)[i] = SSE2NEON_sbox[((uint8_t *) &X3)[i]]; + } + return _mm_set_epi32(((X3 >> 8) | (X3 << 24)) ^ rcon, X3, + ((X1 >> 8) | (X1 << 24)) ^ rcon, X1); +} +#undef SSE2NEON_AES_DATA + +#else /* __ARM_FEATURE_CRYPTO */ +// Implements equivalent of 'aesenc' by combining AESE (with an empty key) and +// AESMC and then manually applying the real key as an xor operation. This +// unfortunately means an additional xor op; the compiler should be able to +// optimize this away for repeated calls however. See +// https://blog.michaelbrase.com/2018/05/08/emulating-x86-aes-intrinsics-on-armv8-a +// for more details. +FORCE_INLINE __m128i _mm_aesenc_si128(__m128i a, __m128i b) +{ + return vreinterpretq_m128i_u8( + vaesmcq_u8(vaeseq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0))) ^ + vreinterpretq_u8_m128i(b)); +} + +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenclast_si128 +FORCE_INLINE __m128i _mm_aesenclast_si128(__m128i a, __m128i RoundKey) +{ + return _mm_xor_si128(vreinterpretq_m128i_u8(vaeseq_u8( + vreinterpretq_u8_m128i(a), vdupq_n_u8(0))), + RoundKey); +} + +FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon) +{ + // AESE does ShiftRows and SubBytes on A + uint8x16_t u8 = vaeseq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0)); + + uint8x16_t dest = { + // Undo ShiftRows step from AESE and extract X1 and X3 + u8[0x4], u8[0x1], u8[0xE], u8[0xB], // SubBytes(X1) + u8[0x1], u8[0xE], u8[0xB], u8[0x4], // ROT(SubBytes(X1)) + u8[0xC], u8[0x9], u8[0x6], u8[0x3], // SubBytes(X3) + u8[0x9], u8[0x6], u8[0x3], u8[0xC], // ROT(SubBytes(X3)) + }; + uint32x4_t r = {0, (unsigned) rcon, 0, (unsigned) rcon}; + return vreinterpretq_m128i_u8(dest) ^ vreinterpretq_m128i_u32(r); +} +#endif + +/* Others */ + +// Perform a carry-less multiplication of two 64-bit integers, selected from a +// and b according to imm8, and store the results in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clmulepi64_si128 +FORCE_INLINE __m128i _mm_clmulepi64_si128(__m128i _a, __m128i _b, const int imm) +{ + uint64x2_t a = vreinterpretq_u64_m128i(_a); + uint64x2_t b = vreinterpretq_u64_m128i(_b); + switch (imm & 0x11) { + case 0x00: + return vreinterpretq_m128i_u64( + _sse2neon_vmull_p64(vget_low_u64(a), vget_low_u64(b))); + case 0x01: + return vreinterpretq_m128i_u64( + _sse2neon_vmull_p64(vget_high_u64(a), vget_low_u64(b))); + case 0x10: + return vreinterpretq_m128i_u64( + _sse2neon_vmull_p64(vget_low_u64(a), vget_high_u64(b))); + case 0x11: + return vreinterpretq_m128i_u64( + _sse2neon_vmull_p64(vget_high_u64(a), vget_high_u64(b))); + default: + abort(); + } +} + +FORCE_INLINE unsigned int _sse2neon_mm_get_denormals_zero_mode() +{ + union { + fpcr_bitfield field; +#if defined(__aarch64__) + uint64_t value; +#else + uint32_t value; +#endif + } r; + +#if defined(__aarch64__) + __asm__ __volatile__("mrs %0, FPCR" : "=r"(r.value)); /* read */ +#else + __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */ +#endif + + return r.field.bit24 ? _MM_DENORMALS_ZERO_ON : _MM_DENORMALS_ZERO_OFF; +} + +// Count the number of bits set to 1 in unsigned 32-bit integer a, and +// return that count in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_u32 +FORCE_INLINE int _mm_popcnt_u32(unsigned int a) +{ +#if defined(__aarch64__) +#if __has_builtin(__builtin_popcount) + return __builtin_popcount(a); +#else + return (int) vaddlv_u8(vcnt_u8(vcreate_u8((uint64_t) a))); +#endif +#else + uint32_t count = 0; + uint8x8_t input_val, count8x8_val; + uint16x4_t count16x4_val; + uint32x2_t count32x2_val; + + input_val = vld1_u8((uint8_t *) &a); + count8x8_val = vcnt_u8(input_val); + count16x4_val = vpaddl_u8(count8x8_val); + count32x2_val = vpaddl_u16(count16x4_val); + + vst1_u32(&count, count32x2_val); + return count; +#endif +} + +// Count the number of bits set to 1 in unsigned 64-bit integer a, and +// return that count in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_u64 +FORCE_INLINE int64_t _mm_popcnt_u64(uint64_t a) +{ +#if defined(__aarch64__) +#if __has_builtin(__builtin_popcountll) + return __builtin_popcountll(a); +#else + return (int64_t) vaddlv_u8(vcnt_u8(vcreate_u8(a))); +#endif +#else + uint64_t count = 0; + uint8x8_t input_val, count8x8_val; + uint16x4_t count16x4_val; + uint32x2_t count32x2_val; + uint64x1_t count64x1_val; + + input_val = vld1_u8((uint8_t *) &a); + count8x8_val = vcnt_u8(input_val); + count16x4_val = vpaddl_u8(count8x8_val); + count32x2_val = vpaddl_u16(count16x4_val); + count64x1_val = vpaddl_u32(count32x2_val); + vst1_u64(&count, count64x1_val); + return count; +#endif +} + +FORCE_INLINE void _sse2neon_mm_set_denormals_zero_mode(unsigned int flag) +{ + // AArch32 Advanced SIMD arithmetic always uses the Flush-to-zero setting, + // regardless of the value of the FZ bit. + union { + fpcr_bitfield field; +#if defined(__aarch64__) + uint64_t value; +#else + uint32_t value; +#endif + } r; + +#if defined(__aarch64__) + __asm__ __volatile__("mrs %0, FPCR" : "=r"(r.value)); /* read */ +#else + __asm__ __volatile__("vmrs %0, FPSCR" : "=r"(r.value)); /* read */ +#endif + + r.field.bit24 = (flag & _MM_DENORMALS_ZERO_MASK) == _MM_DENORMALS_ZERO_ON; + +#if defined(__aarch64__) + __asm__ __volatile__("msr FPCR, %0" ::"r"(r)); /* write */ +#else + __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r)); /* write */ +#endif +} + +#if defined(__GNUC__) || defined(__clang__) +#pragma pop_macro("ALIGN_STRUCT") +#pragma pop_macro("FORCE_INLINE") +#endif + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC pop_options +#endif + +#endif From 73a11e0b510e43cb0b727917ca501c6f3d9f0798 Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Thu, 24 Nov 2022 09:18:18 +0300 Subject: [PATCH 04/27] arm: add arm support in *.cpp, *.h, *.hpp --- src/libs/location/src/character.cpp | 2 +- src/libs/math/include/math3d.h | 9 +++++++++ src/libs/math/include/math_inlines.h | 10 +++++++++- src/libs/util/include/platform/platform.hpp | 13 +++++++++++++ 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/libs/location/src/character.cpp b/src/libs/location/src/character.cpp index e85a15275..9da903022 100644 --- a/src/libs/location/src/character.cpp +++ b/src/libs/location/src/character.cpp @@ -5206,7 +5206,7 @@ void Character::FindNearCharacters(MESSAGE &message) // Find the surrounding characters auto *const location = GetLocation(); auto fndCharacter = location->supervisor.FindCharacters(this, rad, viewAng, planeDist, ax, isSort); - auto n = fndCharacter.size(); + int32_t n = fndCharacter.size(); if (fndCharacter.empty()) { num->Set(0); diff --git a/src/libs/math/include/math3d.h b/src/libs/math/include/math3d.h index fef4d18ab..5b358f4e5 100644 --- a/src/libs/math/include/math3d.h +++ b/src/libs/math/include/math3d.h @@ -8,7 +8,16 @@ #include +#include "platform/platform.hpp" + +#include +#if defined(ARCH_X86) || defined(ARCH_X64) #include // espkk # remove inline asm # 30/Dec/2017 +#elif defined(ARCH_ARM) || defined(ARCH_ARM64) +#include "sse2neon.h" +#else +#error Add your platform here +#endif //#define inline __forceinline diff --git a/src/libs/math/include/math_inlines.h b/src/libs/math/include/math_inlines.h index 8e410a938..90f693d44 100644 --- a/src/libs/math/include/math_inlines.h +++ b/src/libs/math/include/math_inlines.h @@ -3,7 +3,15 @@ #include #include +#include "platform/platform.hpp" + +#if defined(ARCH_X86) || defined(ARCH_X64) #include +#elif defined(ARCH_ARM) || defined(ARCH_ARM64) +#include "sse2neon.h" +#else +#error Add your platform here +#endif #include "c_vector.h" @@ -137,4 +145,4 @@ inline bool IntersectLines2D(const CVECTOR &v1, const CVECTOR &v2, const CVECTOR inline int ftoi(float f) { return _mm_cvt_ss2si(_mm_load_ss(&f)); -} \ No newline at end of file +} diff --git a/src/libs/util/include/platform/platform.hpp b/src/libs/util/include/platform/platform.hpp index 77cb8a318..3d8bd97ae 100644 --- a/src/libs/util/include/platform/platform.hpp +++ b/src/libs/util/include/platform/platform.hpp @@ -1,4 +1,17 @@ #pragma once + +#if defined(_M_IX86) || defined(__i386__) || defined(_X86_) +#define ARCH_X86 +#elif defined(_M_X64) || defined(__amd64__) || defined(__x86_64__) +#define ARCH_X64 +#elif defined(_M_ARM) || defined(__arm__) +#define ARCH_ARM +#elif defined (_M_ARM64) || defined(__aarch64__) +#define ARCH_ARM64 +#else +#error Unsupported architecture +#endif + #ifdef _WIN32 #define PATH_SEP '\\' From f429173b7953442dda6cf98a0dc5baac6216b789 Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Thu, 24 Nov 2022 09:21:43 +0300 Subject: [PATCH 05/27] arm: add armhf support to cmake file --- CMakeLists.txt | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f8b04647e..06b14c819 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,6 +19,18 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/Debug set(STORM_WATERMARK_FILE ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/watermark.hpp CACHE FILEPATH "Include file containing build revision, etc." FORCE) +message(STATUS "CMAKE_SYSTEM_PROCESSOR: " ${CMAKE_SYSTEM_PROCESSOR}) +if (CMAKE_SYSTEM_PROCESSOR MATCHES "armv*") + set(PROJECT_PLATFORM_ARM TRUE) +elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + set(PROJECT_PLATFORM_ARM64 TRUE) +endif() + +if (PROJECT_PLATFORM_ARM) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -fsigned-char") +endif() + ### Set up third-party dependencies set(ENV{CONAN_REVISIONS_ENABLED} 1) conan_add_remote(NAME bincrafters @@ -68,8 +80,8 @@ else() # for DXVK Native include(ExternalProject) ExternalProject_Add(dxvk-native - GIT_REPOSITORY https://github.com/Joshua-Ashton/dxvk-native - GIT_TAG a2dc99c407340432d4ba5bfa29efa685c27942ea + GIT_REPOSITORY https://github.com/IonAgorria/dxvk-native + GIT_TAG 2e91e784a86bc1355100518cd90eee48433551ac GIT_SHALLOW ON BUILD_ALWAYS OFF CONFIGURE_HANDLED_BY_BUILD ON From 803f797e957f986a84b6b2577838b2a29606e1a0 Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Thu, 24 Nov 2022 21:38:06 +0300 Subject: [PATCH 06/27] nine-native: improve d3d9types.h to support Perimeter game --- nine-native/include/D3D9/d3d9types.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/nine-native/include/D3D9/d3d9types.h b/nine-native/include/D3D9/d3d9types.h index e18f29867..2550c8df2 100644 --- a/nine-native/include/D3D9/d3d9types.h +++ b/nine-native/include/D3D9/d3d9types.h @@ -48,6 +48,8 @@ /* Windows errors */ #define E_OUTOFMEMORY MAKE_HRESULT(1, 0x007, 14) +#define E_INVALIDARG MAKE_HRESULT(1, 0x007, 0x0057) +#define E_NOTIMPL MAKE_HRESULT(1, 0x000, 0x4001) #define E_NOINTERFACE MAKE_HRESULT(1, 0x000, 0x4002) #define E_POINTER MAKE_HRESULT(1, 0x000, 0x4003) #define E_FAIL MAKE_HRESULT(1, 0x000, 0x4005) @@ -197,6 +199,7 @@ typedef struct _RGNDATA { /* Adapter */ #define D3DADAPTER_DEFAULT 0 +#define D3DSGR_NO_CALIBRATION 0x00000000 /******************************************************** * Return codes * @@ -309,6 +312,10 @@ typedef struct _RGNDATA { #define D3DFVF_TEXTUREFORMAT2 0x00000000 #define D3DFVF_TEXTUREFORMAT3 0x00000001 #define D3DFVF_TEXTUREFORMAT4 0x00000002 +#define D3DFVF_TEXCOORDSIZE1(CoordIndex) (D3DFVF_TEXTUREFORMAT1 << (CoordIndex*2 + 16)) +#define D3DFVF_TEXCOORDSIZE2(CoordIndex) (D3DFVF_TEXTUREFORMAT2) +#define D3DFVF_TEXCOORDSIZE3(CoordIndex) (D3DFVF_TEXTUREFORMAT3 << (CoordIndex*2 + 16)) +#define D3DFVF_TEXCOORDSIZE4(CoordIndex) (D3DFVF_TEXTUREFORMAT4 << (CoordIndex*2 + 16)) #define D3DFVF_POSITION_MASK 0x0000400E #define D3DFVF_TEXCOUNT_MASK 0x00000F00 From f0bdb5f1c9bbea3f010560aa4b56ca6959426a0a Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Sun, 4 Dec 2022 13:38:21 +0300 Subject: [PATCH 07/27] conan: use find_package to add SDL2 dependency in Linux (without conan) part 2 --- CMakeLists.txt | 2 +- src/apps/engine/CMakeLists.txt | 2 +- src/libs/core/CMakeLists.txt | 2 +- src/libs/input/CMakeLists.txt | 2 +- src/libs/window/CMakeLists.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 06b14c819..1dd0236ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,7 +61,7 @@ endif() if (WIN32) message("Using Windows D3D9 API") - set(SDL_LIBRARIES "sdl") + set(SDL2_LIBRARIES "sdl") elseif (STORM_MESA_NINE) # for Gallium Nine message("Using Gallium Nine for native D3D9 API") diff --git a/src/apps/engine/CMakeLists.txt b/src/apps/engine/CMakeLists.txt index 249b5939e..db911fe9a 100644 --- a/src/apps/engine/CMakeLists.txt +++ b/src/apps/engine/CMakeLists.txt @@ -36,7 +36,7 @@ STORM_SETUP( # external mimalloc sentry-native - ${SDL_LIBRARIES} + ${SDL2_LIBRARIES} zlib # system diff --git a/src/libs/core/CMakeLists.txt b/src/libs/core/CMakeLists.txt index afd4561a1..5f2b3e783 100644 --- a/src/libs/core/CMakeLists.txt +++ b/src/libs/core/CMakeLists.txt @@ -1,5 +1,5 @@ STORM_SETUP( TARGET_NAME core TYPE library - DEPENDENCIES diagnostics math shared_headers steam_api fast_float ${SDL_LIBRARIES} window + DEPENDENCIES diagnostics math shared_headers steam_api fast_float ${SDL2_LIBRARIES} window ) diff --git a/src/libs/input/CMakeLists.txt b/src/libs/input/CMakeLists.txt index f33902846..f11f1e9e3 100644 --- a/src/libs/input/CMakeLists.txt +++ b/src/libs/input/CMakeLists.txt @@ -1,5 +1,5 @@ STORM_SETUP( TARGET_NAME input TYPE library - DEPENDENCIES ${SDL_LIBRARIES} util + DEPENDENCIES ${SDL2_LIBRARIES} util ) diff --git a/src/libs/window/CMakeLists.txt b/src/libs/window/CMakeLists.txt index e81319628..d026a7e38 100644 --- a/src/libs/window/CMakeLists.txt +++ b/src/libs/window/CMakeLists.txt @@ -1,5 +1,5 @@ STORM_SETUP( TARGET_NAME window TYPE library - DEPENDENCIES ${SDL_LIBRARIES} + DEPENDENCIES ${SDL2_LIBRARIES} ) From 1a1f42adfa09104e8251d5aca7db12d6cf8ff2af Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Sun, 8 May 2022 10:43:04 +0300 Subject: [PATCH 08/27] [arm] Add -fsigned-char to avoid differences between x86 and arm This fixes "reading" NO-BREAK SPACE chars (0xC2 0xA0) on arm. Example char: https://github.com/storm-devs/sd-teho-public/blob/f916551822b6b4f1c4c3c77c312379e3d3aec23e/program/seadogs.c#L1073 Where read fails on arm: https://github.com/storm-devs/storm-engine/blob/c729a650c7a45b46b72010935803c2ca5bed297a/src/libs/core/src/token.cpp#L917 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1dd0236ac..11bfdec57 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,7 +55,7 @@ conan_cmake_run(CONANFILE conanfile.py if (NOT WIN32) find_package(SDL2 REQUIRED) message(STATUS "SDL2_LIBRARIES="${SDL2_LIBRARIES}) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${SDL2_INCLUDE_DIRS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${SDL2_INCLUDE_DIRS} -fsigned-char") message(STATUS "CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}) endif() From 993ee43521d147823a87d792443073b9d9b88295 Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Wed, 14 Dec 2022 01:24:24 +0300 Subject: [PATCH 09/27] nine: don't use Vulkan window if defined STORM_MESA_NINE --- src/libs/window/src/sdl_window.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libs/window/src/sdl_window.cpp b/src/libs/window/src/sdl_window.cpp index d4b8207c9..429ad3422 100644 --- a/src/libs/window/src/sdl_window.cpp +++ b/src/libs/window/src/sdl_window.cpp @@ -8,7 +8,7 @@ SDLWindow::SDLWindow(int width, int height, int preferred_display, bool fullscre : fullscreen_(fullscreen) { uint32_t flags = (fullscreen ? SDL_WINDOW_FULLSCREEN : 0) | SDL_WINDOW_HIDDEN; -#ifndef _WIN32 // DXVK-Native +#if !defined(_WIN32) && !defined(STORM_MESA_NINE) // DXVK-Native flags |= SDL_WINDOW_VULKAN; #endif window_ = std::unique_ptr>( From 046b49887187f36dfbd44ca5df54a497d6f5f337 Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Sun, 18 Dec 2022 22:01:38 +0300 Subject: [PATCH 10/27] gcc: fix unions in math3d/color.h and math3d/plane.h Also remove unused class DColor --- src/libs/math/include/math3d.h | 2 - src/libs/math/include/math3d/color.h | 158 ++++----------------------- src/libs/math/include/math3d/plane.h | 38 ++----- 3 files changed, 27 insertions(+), 171 deletions(-) diff --git a/src/libs/math/include/math3d.h b/src/libs/math/include/math3d.h index 5b358f4e5..53302ec6c 100644 --- a/src/libs/math/include/math3d.h +++ b/src/libs/math/include/math3d.h @@ -9,8 +9,6 @@ #include #include "platform/platform.hpp" - -#include #if defined(ARCH_X86) || defined(ARCH_X64) #include // espkk # remove inline asm # 30/Dec/2017 #elif defined(ARCH_ARM) || defined(ARCH_ARM64) diff --git a/src/libs/math/include/math3d/color.h b/src/libs/math/include/math3d/color.h index 1beba3783..20fe2b460 100644 --- a/src/libs/math/include/math3d/color.h +++ b/src/libs/math/include/math3d/color.h @@ -18,49 +18,14 @@ class Color { public: - union { - struct - { - union { - struct - { - // Red - float r; - // Green - float g; - // Blue - float b; - }; - - union { - struct - { - // rgb in vector - Vector c; - }; - - struct - { - // rgb in vector - Vector color; - }; - }; - }; - - union { - // Transparency - float a; - // Transparency - float alpha; - }; - }; - - struct - { - // Vector4 representation - Vector4 v4; - }; - }; + // Red + float r; + // Green + float g; + // Blue + float b; + // Transparency + float a; // ----------------------------------------------------------- // Constructors @@ -80,8 +45,6 @@ class Color Color(const Vector4 &v); // Unpack Color(uint32_t c); - // Unpack - Color(int32_t c); // Copy constructor Color(const Color &c); @@ -216,44 +179,6 @@ class Color static unsigned short Make4444(uint32_t color); }; -// Integer color representation -class DColor -{ - public: - union { - struct - { - // Blue - unsigned char b; - // Green - unsigned char g; - // Red - unsigned char r; - // Transparency - unsigned char a; - }; - - union { - // Packed color - uint32_t c; - // Packed color - uint32_t color; - }; - }; - - // ----------------------------------------------------------- - // Operators - // ----------------------------------------------------------- - public: - // Assign - DColor &operator=(uint32_t color); - // Assign - DColor &operator=(int32_t color); - - // Get int32_t - operator uint32_t() const; -}; - // =========================================================== // Constructors // =========================================================== @@ -315,13 +240,10 @@ inline Color::Color(const Vector4 &v) // Unpack inline Color::Color(uint32_t c) { - *this = c; -} - -// Unpack -inline Color::Color(int32_t c) -{ - *this = static_cast(c); + r = static_cast(c >> 16) * (1.0f / 255.0f); + g = static_cast(c >> 8) * (1.0f / 255.0f); + b = static_cast(c >> 0) * (1.0f / 255.0f); + a = static_cast(c >> 24) * (1.0f / 255.0f); } // Copy constructor @@ -1213,7 +1135,13 @@ inline Color &Color::SwapRB() // Get packed color as uint32_t inline uint32_t Color::GetDword() const { - /* int32_t l;*/ + uint32_t t = (static_cast(fftoi(r * 255.0f)) << 16) + + (static_cast(fftoi(g * 255.0f)) << 8) + + (static_cast(fftoi(b * 255.0f)) << 0) + + (static_cast(fftoi(a * 255.0f)) << 24); + + return t; +/* DColor color; const auto k = 255.0f; @@ -1223,35 +1151,7 @@ inline uint32_t Color::GetDword() const color.a = static_cast(fftoi(a * k)); return color.c; - - /*_asm - { - mov eax, this - fld [eax]this.r - fld k - fmul - fistp l - mov ebx, l - mov color.r, bl - fld [eax]this.g - fld k - fmul - fistp l - mov ebx, l - mov color.g, bl - fld [eax]this.b - fld k - fmul - fistp l - mov ebx, l - mov color.b, bl - fld [eax]this.a - fld k - fmul - fistp l - mov ebx, l - mov color.a, bl - };*/ +*/ } // Converting A8R8G8B8 to R5G6B5 @@ -1300,22 +1200,4 @@ inline unsigned short Color::Make4444(uint32_t color) return static_cast(r | g | b | a); } -// =========================================================== -// DColor -// =========================================================== - -// Assign -inline DColor &DColor::operator=(uint32_t color) -{ - c = color; - return *this; -} - -// Assign -inline DColor &DColor::operator=(int32_t color) -{ - c = static_cast(color); - return *this; -} - #pragma pack(pop) diff --git a/src/libs/math/include/math3d/plane.h b/src/libs/math/include/math3d/plane.h index ff87741f1..e14b535d2 100644 --- a/src/libs/math/include/math3d/plane.h +++ b/src/libs/math/include/math3d/plane.h @@ -14,34 +14,10 @@ class Plane { public: - union { - struct - { - // Normal - Vector n; - }; - - struct - { - // Normal - Vector normal; - }; - - struct - { - // Normal - Vector N; - }; - }; - - union { - // Distance from center - float d; - // Distance from center - float dist; - // Distance from center - float D; - }; + // Normal + Vector N; + // Distance from center + float D; // ----------------------------------------------------------- // Constructors @@ -146,11 +122,11 @@ inline float operator*(const Plane &plane, const Vector &point) // Normalize inline Plane &Plane::Normalize() { - const auto d = normal.Normalize(); + const auto d = N.Normalize(); if (d != 0.0f) - dist /= d; + D /= d; else - dist = 0.0f; + D = 0.0f; return *this; } From 2d0f2d3a5a868d11b485e52c0e1702016ed86f7e Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Sun, 18 Dec 2022 23:23:08 +0300 Subject: [PATCH 11/27] gcc: fix unions in matrix.h and math3d/matrix.h --- src/libs/location/src/location_camera.cpp | 42 ++- src/libs/math/include/math3d/matrix.h | 260 ++++++++++++------ src/libs/math/include/matrix.h | 20 -- .../particles/src/system/emitters/base.cpp | 19 +- src/libs/sea/src/env_map.cpp | 4 +- src/libs/sea_cameras/src/ship_camera.cpp | 2 +- 6 files changed, 222 insertions(+), 125 deletions(-) diff --git a/src/libs/location/src/location_camera.cpp b/src/libs/location/src/location_camera.cpp index ace14faca..c92d5167f 100644 --- a/src/libs/location/src/location_camera.cpp +++ b/src/libs/location/src/location_camera.cpp @@ -764,10 +764,20 @@ bool LocationCamera::LoadCameraTrack(const char *pcTrackFile, float fTrackTime) Matrix view; m_track.GetPoint(0.f, pos, ang); ang.GetMatrix(view); - view.vx = -view.vx; - view.vz = -view.vz; + //view.vx = -view.vx; + view.m[0][0] = -view.m[0][0]; + view.m[0][1] = -view.m[0][1]; + view.m[0][2] = -view.m[0][2]; + //view.vz = -view.vz; + view.m[2][0] = -view.m[2][0]; + view.m[2][1] = -view.m[2][1]; + view.m[2][2] = -view.m[2][2]; view.Inverse(); - view.pos = view * -pos; + //view.pos = view * -pos; + pos = view * -pos; + view.m[3][0] = pos.x; + view.m[3][1] = pos.y; + view.m[3][2] = pos.z; rs->SetView(*(CMatrix *)&view); rs->SetPerspective(cameraPerspective); @@ -786,8 +796,14 @@ void LocationCamera::TurnOffTrackCamera() Matrix view; m_track.GetPoint(0.99999f, pos, ang); ang.GetMatrix(view); - view.vx = -view.vx; - view.vz = -view.vz; + //view.vx = -view.vx; + view.m[0][0] = -view.m[0][0]; + view.m[0][1] = -view.m[0][1]; + view.m[0][2] = -view.m[0][2]; + //view.vz = -view.vz; + view.m[2][0] = -view.m[2][0]; + view.m[2][1] = -view.m[2][1]; + view.m[2][2] = -view.m[2][2]; view.Inverse(); oldPos = *(CVECTOR *)&pos; @@ -811,10 +827,20 @@ void LocationCamera::ProcessTrackCamera() Matrix view; m_track.GetPoint(fTrackTime / m_fTrackMaxTime, pos, ang); ang.GetMatrix(view); - view.vx = -view.vx; - view.vz = -view.vz; + //view.vx = -view.vx; + view.m[0][0] = -view.m[0][0]; + view.m[0][1] = -view.m[0][1]; + view.m[0][2] = -view.m[0][2]; + //view.vz = -view.vz; + view.m[2][0] = -view.m[2][0]; + view.m[2][1] = -view.m[2][1]; + view.m[2][2] = -view.m[2][2]; view.Inverse(); - view.pos = view * -pos; + //view.pos = view * -pos; + pos = view * -pos; + view.m[3][0] = pos.x; + view.m[3][1] = pos.y; + view.m[3][2] = pos.z; rs->SetView(*(CMatrix *)&view); rs->SetPerspective(cameraPerspective); } diff --git a/src/libs/math/include/math3d/matrix.h b/src/libs/math/include/math3d/matrix.h index 4b0d5c99c..336cfa379 100644 --- a/src/libs/math/include/math3d/matrix.h +++ b/src/libs/math/include/math3d/matrix.h @@ -36,26 +36,6 @@ class Matrix alignas(16) float matrix[16]; // espkk # remove inline asm # 30/Dec/2017 // Two-dimensional array float m[4][4]; - - struct - { - // X direction - Vector vx; - // Weight value of X - float wx; - // Y direction - Vector vy; - // Weight value of Y - float wy; - // Z direction - Vector vz; - // Weight value of Z - float wz; - // Position - Vector pos; - // Added weight value - float w; - }; }; // ----------------------------------------------------------- @@ -101,6 +81,8 @@ class Matrix public: // Set identity matrix Matrix &SetIdentity(); + // Set 3x3 identity matrix + Matrix &SetIdentity3X3(); // Set Matrix Matrix &Set(const Matrix &matrix); @@ -314,21 +296,30 @@ inline Matrix &Matrix::operator=(const Matrix &mtx) // Assign a number to the position inline Matrix &Matrix::operator=(float f) { - pos = f; + //pos = f; + m[3][0] = f; + m[3][1] = f; + m[3][2] = f; return *this; } // Assign a number to the position inline Matrix &Matrix::operator=(double d) { - pos = d; + //pos = d; + m[3][0] = static_cast(d); + m[3][1] = static_cast(d); + m[3][2] = static_cast(d); return *this; } // Assign a vector to the position inline Matrix &Matrix::operator=(const Vector &v) { - pos = v; + //pos = v; + m[3][0] = v.x; + m[3][1] = v.y; + m[3][2] = v.z; return *this; } @@ -414,6 +405,21 @@ inline Matrix &Matrix::SetIdentity() return *this; } +inline Matrix &Matrix::SetIdentity3X3() +{ + m[0][0] = 1.f; + m[0][1] = 0; + m[0][2] = 0; + m[1][0] = 0; + m[1][1] = 1.f; + m[1][2] = 0; + m[2][0] = 0; + m[2][1] = 0; + m[2][2] = 1.f; + + return *this; +} + // Set Matrix inline Matrix &Matrix::Set(const Matrix &matrix) { @@ -799,7 +805,10 @@ inline bool Matrix::BuildView(Vector lookFrom, Vector lookTo, Vector upVector) if (lookTo.Normalize() == 0.0f) { // Putting a position for a non-rotated matrix - pos = -lookFrom; + //pos = -lookFrom; + m[3][0] = -lookFrom.x; + m[3][1] = -lookFrom.y; + m[3][2] = -lookFrom.z; return false; } // Directing the vector up in the desired direction @@ -825,12 +834,19 @@ inline bool Matrix::BuildView(Vector lookFrom, Vector lookTo, Vector upVector) else { // Setting a position for a non-rotated matrix - pos = -lookFrom; + //pos = -lookFrom; + m[3][0] = -lookFrom.x; + m[3][1] = -lookFrom.y; + m[3][2] = -lookFrom.z; return false; } // set the position // pos = -MulNormalByInverse (lookFrom); - pos = -MulNormal(lookFrom); + //pos = -MulNormal(lookFrom); + auto p = -MulNormal(lookFrom); + m[3][0] = p.x; + m[3][1] = p.y; + m[3][2] = p.z; return true; } @@ -840,22 +856,35 @@ inline bool Matrix::BuildOrient(Vector zAxisDirection, Vector upVector) // Normalize the direction vector z if (zAxisDirection.Normalize() < 1e-37f || upVector.Normalize() < 1e-37f) { - vx = Vector(1.0f, 0.0f, 0.0f); - vy = Vector(0.0f, 1.0f, 0.0f); - vz = Vector(0.0f, 0.0f, 1.0f); + //vx = Vector(1.0f, 0.0f, 0.0f); + //vy = Vector(0.0f, 1.0f, 0.0f); + //vz = Vector(0.0f, 0.0f, 1.0f); + SetIdentity3X3(); return false; } // calculate - vx = zAxisDirection ^ upVector; - if (vx.Normalize() == 0.0f) + //vx = zAxisDirection ^ upVector; + auto vec = zAxisDirection ^ upVector; + m[0][0] = vec.x; + m[0][1] = vec.y; + m[0][2] = vec.z; + if (vec.Normalize() == 0.0f) { - vx = Vector(1.0f, 0.0f, 0.0f); - vy = Vector(0.0f, 1.0f, 0.0f); - vz = Vector(0.0f, 0.0f, 1.0f); + //vx = Vector(1.0f, 0.0f, 0.0f); + //vy = Vector(0.0f, 1.0f, 0.0f); + //vz = Vector(0.0f, 0.0f, 1.0f); + SetIdentity3X3(); return false; } - vy = zAxisDirection ^ vx; - vz = zAxisDirection; + //vy = zAxisDirection ^ vx; + vec = zAxisDirection ^ vec; + m[1][0] = vec.x; + m[1][1] = vec.y; + m[1][2] = vec.z; + //vz = zAxisDirection; + m[2][0] = zAxisDirection.x; + m[2][1] = zAxisDirection.y; + m[2][2] = zAxisDirection.z; return true; } @@ -867,37 +896,68 @@ inline bool Matrix::BuildOriented(Vector position, Vector lookTo, Vector upVecto // Normalize the direction vector z if (lookTo.Normalize() == 0.0f || upVector.Normalize() == 0.0f) { - vx = Vector(1.0f, 0.0f, 0.0f); - wx = 0.0f; - vy = Vector(0.0f, 1.0f, 0.0f); - wy = 0.0f; - vz = Vector(0.0f, 0.0f, 1.0f); - wz = 0.0f; - pos = position; - w = 1.0f; + //vx = Vector(1.0f, 0.0f, 0.0f); + //wx = 0.0f; + //vy = Vector(0.0f, 1.0f, 0.0f); + //wy = 0.0f; + //vz = Vector(0.0f, 0.0f, 1.0f); + //wz = 0.0f; + SetIdentity3X3(); + m[0][3] = 0.0f; // wx + m[1][3] = 0.0f; // wy + m[2][3] = 0.0f; // wz + //pos = position; + //w = 1.0f; + m[3][0] = position.x; + m[3][1] = position.y; + m[3][2] = position.z; + m[3][3] = 1.0f; return false; } // calculate - vx = lookTo ^ upVector; - wx = 0.0f; - if (vx.Normalize() == 0.0f) + //vx = lookTo ^ upVector; + auto vec = lookTo ^ upVector; + m[0][0] = vec.x; + m[0][1] = vec.y; + m[0][2] = vec.z; + m[0][3] = 0.0f; + if (vec.Normalize() == 0.0f) { - vx = Vector(1.0f, 0.0f, 0.0f); - wx = 0.0f; - vy = Vector(0.0f, 1.0f, 0.0f); - wy = 0.0f; - vz = Vector(0.0f, 0.0f, 1.0f); - wz = 0.0f; - pos = position; - w = 1.0f; + //vx = Vector(1.0f, 0.0f, 0.0f); + //wx = 0.0f; + //vy = Vector(0.0f, 1.0f, 0.0f); + //wy = 0.0f; + //vz = Vector(0.0f, 0.0f, 1.0f); + //wz = 0.0f; + SetIdentity3X3(); + m[0][3] = 0.0f; // wx + m[1][3] = 0.0f; // wy + m[2][3] = 0.0f; // wz + //pos = position; + //w = 1.0f; + m[3][0] = position.x; + m[3][1] = position.y; + m[3][2] = position.z; + m[3][3] = 1.0f; return false; } - vy = lookTo ^ vx; - wy = 0.0f; - vz = lookTo; - wz = 0.0f; - pos = position; - w = 1.0f; + //vy = lookTo ^ vx; + vec = lookTo ^ vec; + m[1][0] = vec.x; + m[1][1] = vec.y; + m[1][2] = vec.z; + m[1][3] = 0.0f; + //vz = lookTo; + m[2][0] = lookTo.x; + m[2][1] = lookTo.y; + m[2][2] = lookTo.z; + m[2][3] = 0.0f; + //pos = position; + //w = 1.0f; + m[3][0] = position.x; + m[3][1] = position.y; + m[3][2] = position.z; + m[3][3] = 1.0f; return true; } @@ -975,18 +1035,24 @@ inline Matrix &Matrix::Rotate(const Vector &ang) // Move inline Matrix &Matrix::Move(float dX, float dY, float dZ) { - pos.x += dX; - pos.y += dY; - pos.z += dZ; + //pos.x += dX; + //pos.y += dY; + //pos.z += dZ; + m[3][0] += dX; + m[3][1] += dY; + m[3][2] += dZ; return *this; } // Move inline Matrix &Matrix::Move(const Vector &pos) { - this->pos.x += pos.x; - this->pos.y += pos.y; - this->pos.z += pos.z; + //this->pos.x += pos.x; + //this->pos.y += pos.y; + //this->pos.z += pos.z; + m[3][0] += pos.x; + m[3][1] += pos.y; + m[3][2] += pos.z; return *this; } @@ -1054,7 +1120,13 @@ inline Matrix &Matrix::Scale3x3(const Vector &scale) // Calculating the inverse matrix inline Matrix &Matrix::Inverse() { - pos = Vector(-(pos | vx), -(pos | vy), -(pos | vz)); + //pos = Vector(-(pos | vx), -(pos | vy), -(pos | vz)); + float px = -(m[3][0] * m[0][0] + m[3][1] * m[0][1] + m[3][2] * m[0][2]); //-(pos | vx) + float py = -(m[3][0] * m[1][0] + m[3][1] * m[1][1] + m[3][2] * m[1][2]); //-(pos | vy) + float pz = -(m[3][0] * m[2][0] + m[3][1] * m[2][1] + m[3][2] * m[2][2]); //-(pos | vz) + m[3][0] = px; + m[3][1] = py; + m[3][2] = pz; Transposition3X3(); return *this; } @@ -1062,16 +1134,19 @@ inline Matrix &Matrix::Inverse() // Calculating an inverse matrix from another inline Matrix &Matrix::Inverse(const Matrix &mtx) { - pos = Vector(-(mtx.pos | mtx.vx), -(mtx.pos | mtx.vy), -(mtx.pos | mtx.vz)); - matrix[0] = mtx.matrix[0]; - matrix[1] = mtx.matrix[4]; - matrix[2] = mtx.matrix[8]; - matrix[4] = mtx.matrix[1]; - matrix[5] = mtx.matrix[5]; - matrix[6] = mtx.matrix[9]; - matrix[8] = mtx.matrix[2]; - matrix[9] = mtx.matrix[6]; - matrix[10] = mtx.matrix[10]; + //pos = Vector(-(mtx.pos | mtx.vx), -(mtx.pos | mtx.vy), -(mtx.pos | mtx.vz)); + m[3][0] = -(mtx.m[3][0] * mtx.m[0][0] + mtx.m[3][1] * mtx.m[0][1] + mtx.m[3][2] * mtx.m[0][2]); //-(mtx.pos | mtx.vx) + m[3][1] = -(mtx.m[3][0] * mtx.m[1][0] + mtx.m[3][1] * mtx.m[1][1] + mtx.m[3][2] * mtx.m[1][2]); //-(mtx.pos | mtx.vy) + m[3][2] = -(mtx.m[3][0] * mtx.m[2][0] + mtx.m[3][1] * mtx.m[2][1] + mtx.m[3][2] * mtx.m[2][2]); //-(mtx.pos | mtx.vz) + m[0][0] = mtx.m[0][0]; + m[0][1] = mtx.m[1][0]; + m[0][2] = mtx.m[2][0]; + m[1][0] = mtx.m[0][1]; + m[1][1] = mtx.m[1][1]; + m[1][2] = mtx.m[2][1]; + m[2][0] = mtx.m[0][2]; + m[2][1] = mtx.m[1][2]; + m[2][2] = mtx.m[2][2]; /* _asm { @@ -1133,7 +1208,12 @@ inline Matrix &Matrix::InverseWhithScale() matrix[i] = 0.0f; } // Position + //pos = -(MulNormal(pos)); + auto pos = Vector(m[3][0], m[3][1], m[3][2]); pos = -(MulNormal(pos)); + m[3][0] = pos.x; + m[3][1] = pos.y; + m[3][2] = pos.z; return *this; } @@ -1341,7 +1421,7 @@ inline Vector Matrix::MulNormalByInverse(const Vector &v) const // Get camera position from camera matrix inline Vector Matrix::GetCamPos() const { - return -MulNormalByInverse(pos); + return -MulNormalByInverse(Vector(m[3][0], m[3][1], m[3][2])); } // Identity matrix or not @@ -1387,13 +1467,13 @@ inline bool Matrix::IsIdentity() const inline bool Matrix::IsScale() const { const auto eps = 1e-4f; - if (fabsf(~vx - 1.0f) > eps) + if (fabsf(m[0][0] * m[0][0] + m[0][1] * m[0][1] + m[0][2] * m[0][2] - 1.0f) > eps) //~vx return true; - if (fabsf(~vy - 1.0f) > eps) + if (fabsf(m[1][0] * m[1][0] + m[1][1] * m[1][1] + m[1][2] * m[1][2] - 1.0f) > eps) //~vy return true; - if (fabsf(~vz - 1.0f) > eps) + if (fabsf(m[2][0] * m[2][0] + m[2][1] * m[2][1] + m[2][2] * m[2][2] - 1.0f) > eps) //~vz return true; - if (fabsf(w - 1.0f) > eps) + if (fabsf(m[3][3] - 1.0f) > eps) return true; return false; } @@ -1444,24 +1524,24 @@ inline void Matrix::Projection(Vector4 *dstArray, Vector *srcArray, int32_t num, // Get angles from unscaled rotation matrix inline void Matrix::GetAngles(float &ax, float &ay, float &az) const { - if (vz.y < 1.0f) + if (m[2][1] < 1.0f) //vz.y { - if (vz.y > -1.0f) + if (m[2][1] > -1.0f) //vz.y { - ax = static_cast(asin(-vz.y)); - ay = static_cast(atan2(vz.x, vz.z)); - az = static_cast(atan2(vx.y, vy.y)); + ax = static_cast(asin(-m[2][1])); //-vz.y + ay = static_cast(atan2(m[2][0], m[2][2])); //vz.x, vz.z + az = static_cast(atan2(m[0][1], m[1][1])); //vx.y, vy.y return; } ax = 3.141592654f * 0.5f; ay = 0.0f; - az = static_cast(atan2(vx.z, vx.x)); + az = static_cast(atan2(m[0][2], m[0][0])); //vx.z, vx.x } else { ax = -3.141592654f * 0.5f; ay = 0.0f; - az = static_cast(-atan2(vx.z, vx.x)); + az = static_cast(-atan2(m[0][2], m[0][0])); //vx.z, vx.x } } diff --git a/src/libs/math/include/matrix.h b/src/libs/math/include/matrix.h index c598ad7f9..41ac2303f 100644 --- a/src/libs/math/include/matrix.h +++ b/src/libs/math/include/matrix.h @@ -34,26 +34,6 @@ class CMatrix union { alignas(16) float matrix[16]; // espkk # remove inline asm # 30/Dec/2017 float m[4][4]; - - struct - { - // X direction - CVECTOR vx; - // Weight value on X - float wx; - // Y direction - CVECTOR vy; - // Weight value on Y - float wy; - // Z direction - CVECTOR vz; - // Weight value on Z - float wz; - // Position - CVECTOR pos; - // Added weight value - float w; - }; }; //----------------------------------------------------------- diff --git a/src/libs/particles/src/system/emitters/base.cpp b/src/libs/particles/src/system/emitters/base.cpp index e2812955a..1fbb76e24 100644 --- a/src/libs/particles/src/system/emitters/base.cpp +++ b/src/libs/particles/src/system/emitters/base.cpp @@ -48,7 +48,10 @@ void BaseEmitter::BornParticles(float DeltaTime) BlendMatrix(matWorldTransform, matWorldTransformOld, matWorldTransformNew, MatrixBlend); const auto TransformPos = Position * matWorldTransform; - matWorldTransform.pos = TransformPos; + //matWorldTransform.pos = TransformPos; + matWorldTransform.m[3][0] = TransformPos.x; + matWorldTransform.m[3][1] = TransformPos.y; + matWorldTransform.m[3][2] = TransformPos.z; MatrixBlend += MatrixBlendInc; const auto DeltaTimeDiv = DeltaTime / INTERPOLATION_STEPS; @@ -82,7 +85,8 @@ void BaseEmitter::BornParticles(float DeltaTime) { auto ParticlePos = GetNewParticlePosition(DeltaTime); GetEmissionDirection(matTransform); - auto VelDir = matTransform.vy; + //auto VelDir = matTransform.vy; + auto VelDir = Vector(matTransform.m[1][0], matTransform.m[1][1], matTransform.m[1][2]); switch (ParticleTypes[n].Type) { case BILLBOARD_PARTICLE: @@ -278,8 +282,10 @@ void BaseEmitter::BlendMatrix(Matrix &result, const Matrix &mat1, const Matrix & { const Quaternion qRot1(mat1); const Quaternion qRot2(mat2); - const auto vPos1 = mat1.pos; - const auto vPos2 = mat2.pos; + //const auto vPos1 = mat1.pos; + const auto vPos1 = Vector(mat1.m[3][0], mat1.m[3][1], mat1.m[3][2]); + //const auto vPos2 = mat2.pos; + const auto vPos2 = Vector(mat2.m[3][0], mat2.m[3][1], mat2.m[3][2]); Quaternion qBlend; qBlend.SLerp(qRot1, qRot2, BlendK); @@ -288,7 +294,10 @@ void BaseEmitter::BlendMatrix(Matrix &result, const Matrix &mat1, const Matrix & vBlend.Lerp(vPos1, vPos2, BlendK); qBlend.GetMatrix(result); - result.pos = vBlend; + //result.pos = vBlend; + result.m[3][0] = vBlend.x; + result.m[3][1] = vBlend.y; + result.m[3][2] = vBlend.z; } const char *BaseEmitter::GetName() diff --git a/src/libs/sea/src/env_map.cpp b/src/libs/sea/src/env_map.cpp index 8b27d6567..d4de69c20 100644 --- a/src/libs/sea/src/env_map.cpp +++ b/src/libs/sea/src/env_map.cpp @@ -200,7 +200,9 @@ bool SEA::EnvMap_Render2() CMatrix mTex; mTex.BuildScale(CVECTOR(0.5f, -0.5f, 0.5f)); - mTex.pos = CVECTOR(0.5f, 0.5f, 0.5f); + mTex.m[3][0] = 0.5f; //pos.x + mTex.m[3][1] = 0.5f; //pos.y + mTex.m[3][2] = 0.5f; //pos.z CMatrix mProj = rs->GetProjection(); mProj = mProj * mTex; diff --git a/src/libs/sea_cameras/src/ship_camera.cpp b/src/libs/sea_cameras/src/ship_camera.cpp index 32abc3916..4dbecd0d4 100644 --- a/src/libs/sea_cameras/src/ship_camera.cpp +++ b/src/libs/sea_cameras/src/ship_camera.cpp @@ -125,7 +125,7 @@ void SHIP_CAMERA::Move(float fDeltaTime) const auto *modelMtx = GetAIObj()->GetMatrix(); auto boxSize = GetAIObj()->GetBoxsize(); // Recalculate box size: (box size + immersion) * hand-fitted scale - boxSize.y += modelMtx->pos.y; + boxSize.y += modelMtx->m[3][1]; //pos.y boxSize *= CVECTOR(SCMR_BOXSCALE_X * 0.5f, SCMR_BOXSCALE_Y * 0.5f, SCMR_BOXSCALE_Z * 0.5f); // Project real height (with masts) const auto realBoxSize = GetAIObj()->GetRealBoxsize(); From 80a408bb6cba74de1fedd9620d0b48cb3fb2bdc1 Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Sun, 18 Dec 2022 23:24:30 +0300 Subject: [PATCH 12/27] gcc: fix unions in math3d/vector4.h --- src/libs/math/include/math3d/vector4.h | 65 +++++++++++++++----------- 1 file changed, 38 insertions(+), 27 deletions(-) diff --git a/src/libs/math/include/math3d/vector4.h b/src/libs/math/include/math3d/vector4.h index c9a427e34..921953106 100644 --- a/src/libs/math/include/math3d/vector4.h +++ b/src/libs/math/include/math3d/vector4.h @@ -17,24 +17,12 @@ struct Vector4 union { struct { - union { - struct - { - // The X component - float x; - // The Y component - float y; - // The Z component - float z; - }; - - struct - { - // Vector representation - Vector v; - }; - }; - + // The X component + float x; + // The Y component + float y; + // The Z component + float z; // Weight component float w; }; @@ -106,8 +94,11 @@ inline Vector4::Vector4() } // Fill with number -inline Vector4::Vector4(float f) : v(f) +inline Vector4::Vector4(float f) { + x = f; + y = f; + z = f; w = f; } @@ -118,33 +109,47 @@ inline Vector4::Vector4(double d) } // Fill 3 components, 1 -inline Vector4::Vector4(float x, float y, float z) : v(x, y, z) +inline Vector4::Vector4(float x, float y, float z) { + this->x = x; + this->y = y; + this->z = z; w = 1.0f; } // Fill all components -inline Vector4::Vector4(float x, float y, float z, float w) : v(x, y, z) +inline Vector4::Vector4(float x, float y, float z, float w) { + this->x = x; + this->y = y; + this->z = z; this->w = w; } // Fill 3 components, 1 -inline Vector4::Vector4(const float f[3]) : v(f[0], f[1], f[2]) +inline Vector4::Vector4(const float f[3]) { + x = f[0]; + y = f[1]; + z = f[2]; w = 1.0f; } // Fill 3 components, 1 inline Vector4::Vector4(const double d[3]) - : v(static_cast(d[0]), static_cast(d[1]), static_cast(d[2])) { + x = static_cast(d[0]); + y = static_cast(d[1]); + z = static_cast(d[2]); w = 1.0f; } // Fill 3 components, 1 -inline Vector4::Vector4(const Vector &vc) : v(vc) +inline Vector4::Vector4(const Vector &vc) { + x = vc.x; + y = vc.y; + z = vc.z; w = 1.0f; } @@ -181,7 +186,9 @@ inline Vector4 Vector4::operator-() const // Assign inline Vector4 &Vector4::operator=(float f) { - v = f; + x = f; + y = f; + z = f; w = 1.0f; return *this; } @@ -189,7 +196,9 @@ inline Vector4 &Vector4::operator=(float f) // Assign inline Vector4 &Vector4::operator=(double d) { - v = d; + x = static_cast(d); + y = static_cast(d); + z = static_cast(d); w = 1.0f; return *this; } @@ -197,7 +206,9 @@ inline Vector4 &Vector4::operator=(double d) // Assign inline Vector4 &Vector4::operator=(const Vector &v) { - this->v = v; + x = v.x; + y = v.y; + z = v.z; w = 1.0f; return *this; } From 640c70b24c4c2126447546f95079fb1f29937a7e Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Mon, 19 Dec 2022 11:35:46 +0300 Subject: [PATCH 13/27] gcc: fix unions in math3d/sphere.h --- .../src/spyglass/spyglass.cpp | 4 +- src/libs/math/include/math3d/sphere.h | 75 +++++++------------ 2 files changed, 28 insertions(+), 51 deletions(-) diff --git a/src/libs/battle_interface/src/spyglass/spyglass.cpp b/src/libs/battle_interface/src/spyglass/spyglass.cpp index 11fff1457..5fa762164 100644 --- a/src/libs/battle_interface/src/spyglass/spyglass.cpp +++ b/src/libs/battle_interface/src/spyglass/spyglass.cpp @@ -487,7 +487,9 @@ void ISPYGLASS::FindNewTargetShip() continue; Sphere sph; const auto &vec = pSD->pShip->GetPos(); - sph.pos = {vec.x, vec.y, vec.z}; + sph.x = vec.x; + sph.y = vec.y; + sph.z = vec.z; sph.r = 40.f; if (sph.Intersection(vsrc, vdst)) { diff --git a/src/libs/math/include/math3d/sphere.h b/src/libs/math/include/math3d/sphere.h index 873ade967..b7e7a5bd2 100644 --- a/src/libs/math/include/math3d/sphere.h +++ b/src/libs/math/include/math3d/sphere.h @@ -18,45 +18,18 @@ class Sphere union { struct { - union { - struct - { - // X Position - float x; - // Y position - float y; - // Z Position - float z; - }; - - union { - struct - { - // Position - Vector p; - }; - - struct - { - // Position - Vector pos; - }; - }; - }; - - union { - // Radius - float r; - // Radius - float radius; - }; + // X Position + float x; + // Y position + float y; + // Z Position + float z; + // Radius + float r; }; - struct - { - // Vector4 representation - Vector4 v4; - }; + // Vector4 representation + Vector4 v4; }; // ----------------------------------------------------------- @@ -112,7 +85,7 @@ inline Sphere::Sphere(const Sphere &s) // Point in sphere inline bool Sphere::Intersection(const Vector &p) { - return ~(pos - p) <= radius * radius; + return ~Vector(x - p.x, y - p.y, z - p.z) <= r * r; } // Check intersection of line and sphere @@ -123,14 +96,14 @@ inline bool Sphere::Intersection(const Vector &src, const Vector &dst) if (len > 1e-10f) { float dist; - if (!Intersection(src, dir, pos, r, &dist)) + if (!Intersection(src, dir, Vector(x, y, z), r, &dist)) return false; if (dist >= 0.0f) { return (dist <= len); } dir = -dir; - if (!Intersection(dst, dir, pos, r, &dist)) + if (!Intersection(dst, dir, Vector(x, y, z), r, &dist)) return false; if (dist >= 0.0f) { @@ -138,25 +111,27 @@ inline bool Sphere::Intersection(const Vector &src, const Vector &dst) } return false; } - return ~(pos - src) <= radius * radius; + return ~Vector(x - src.x, y - src.y, z - src.z) <= r * r; } // Check ray and sphere intersection inline bool Sphere::Intersection(const Vector &orig, const Vector &normdir, float *res) { - return Intersection(orig, normdir, pos, r, res); + return Intersection(orig, normdir, Vector(x, y, z), r, res); } // Check sphere and sphere intersection inline bool Sphere::Intersection(const Sphere &sph) { - return (~(p - sph.p) <= (r + sph.r) * (r + sph.r)); + return (~Vector(x - sph.x, y - sph.y, z - sph.z) <= (r + sph.r) * (r + sph.r)); } // Set sphere in a point with 0 radius inline void Sphere::Reset(const Vector &p) { - pos = p; + x = p.x; + y = p.y; + z = p.z; r = 0.0f; } @@ -164,9 +139,9 @@ inline void Sphere::Reset(const Vector &p) inline void Sphere::AddPoint(const Vector &p) { // Vector from point to center - const float dx = pos.x - p.x; - const float dy = pos.y - p.y; - const float dz = pos.z - p.z; + const float dx = x - p.x; + const float dy = y - p.y; + const float dz = z - p.z; float len = dx * dx + dy * dy + dz * dz; if (len <= r * r) return; @@ -175,9 +150,9 @@ inline void Sphere::AddPoint(const Vector &p) r = (len + r) * 0.5f; // New position len = r / len; - pos.x = p.x + dx * len; - pos.y = p.y + dy * len; - pos.z = p.z + dz * len; + x = p.x + dx * len; + y = p.y + dy * len; + z = p.z + dz * len; } // Check ray and sphere intersection From 675b51d013f496507375a4c20e8030af464e5787 Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Mon, 19 Dec 2022 11:38:55 +0300 Subject: [PATCH 14/27] gcc: add name cv to 2nd struct in SAILGEOMETRY from sailone.h --- src/libs/rigging/src/sail.cpp | 6 +- src/libs/rigging/src/sailone.cpp | 104 +++++++++++++++---------------- src/libs/rigging/src/sailone.h | 2 +- 3 files changed, 56 insertions(+), 56 deletions(-) diff --git a/src/libs/rigging/src/sail.cpp b/src/libs/rigging/src/sail.cpp index bfac76bb4..96fa635af 100644 --- a/src/libs/rigging/src/sail.cpp +++ b/src/libs/rigging/src/sail.cpp @@ -801,8 +801,8 @@ uint64_t SAIL::ProcessMessage(MESSAGE &message) if (so->ss.eSailType != SAIL_TREANGLE) { CVECTOR tmpv; - tmpv = so->sgeo.dVv + so->sgeo.ddVv * static_cast(SAIL_ROW_MAX); - tmpv = so->sgeo.dVv + so->sgeo.ddVv * static_cast(SAIL_ROW_MAX); + tmpv = so->sgeo.cv.dVv + so->sgeo.cv.ddVv * static_cast(SAIL_ROW_MAX); + tmpv = so->sgeo.cv.dVv + so->sgeo.cv.ddVv * static_cast(SAIL_ROW_MAX); tmpv = (so->sailtrope.rrs[0]->r1) * (!tmpv); tmpv = *pos - tmpv; @@ -834,7 +834,7 @@ uint64_t SAIL::ProcessMessage(MESSAGE &message) if (so->ss.eSailType != SAIL_TREANGLE) { CVECTOR tmpv; - tmpv = so->sgeo.dVv + so->sgeo.ddVv * static_cast(SAIL_ROW_MAX); + tmpv = so->sgeo.cv.dVv + so->sgeo.cv.ddVv * static_cast(SAIL_ROW_MAX); tmpv = (so->sailtrope.rrs[1]->r1) * (!tmpv); tmpv = *pos - tmpv; diff --git a/src/libs/rigging/src/sailone.cpp b/src/libs/rigging/src/sailone.cpp index ea7b1ba08..530d747a0 100644 --- a/src/libs/rigging/src/sailone.cpp +++ b/src/libs/rigging/src/sailone.cpp @@ -461,14 +461,14 @@ void SAILONE::FillVertex(SAILVERTEX *pv) auto tmpRow = 1.f / static_cast((SAIL_ROW_MAX - 1)); auto pStart = ss.hardPoints[0]; - auto dVStart = sgeo.dVv; - auto dVhStart = sgeo.dVh; + auto dVStart = sgeo.cv.dVv; + auto dVhStart = sgeo.cv.dVh; if (ss.eSailType == SAIL_TREANGLE) { dStart = (ss.hardPoints[1] - pStart) * tmpRow; // define the center of divergence of normals - dnorm = ss.boundSphere.c - sgeo.normL * ss.boundSphere.r; + dnorm = ss.boundSphere.c - sgeo.cv.normL * ss.boundSphere.r; // enumeration of sections along the sail and looping through them // =============================================================== for (idx = ix = 0; ix < SAIL_ROW_MAX; ix++) @@ -488,7 +488,7 @@ void SAILONE::FillVertex(SAILVERTEX *pv) pcur += dV; } pStart += dStart; - dVhStart += sgeo.ddVh; + dVhStart += sgeo.cv.ddVh; dVStart += dVhStart; } @@ -499,9 +499,9 @@ void SAILONE::FillVertex(SAILVERTEX *pv) else { dStart = (ss.hardPoints[1] - pStart) * tmpCol; - dnorm = ss.boundSphere.c - sgeo.dnormL * ss.boundSphere.r; + dnorm = ss.boundSphere.c - sgeo.cv.dnormL * ss.boundSphere.r; if (ss.eSailType == SAIL_TRAPECIDAL) - dnorm = ss.boundSphere.c - sgeo.normL * ss.boundSphere.r; + dnorm = ss.boundSphere.c - sgeo.cv.normL * ss.boundSphere.r; // enumeration of sections along the sail and looping through them // =============================================================== for (idx = ix = 0; ix < SAIL_COL_MAX; ix++) @@ -509,8 +509,8 @@ void SAILONE::FillVertex(SAILVERTEX *pv) // set the coordinates of the starting point and their increments at each step pcur = pStart; dV = dVStart; - ddV = sgeo.ddVv; - dddV = sgeo.dddVv; + ddV = sgeo.cv.ddVv; + dddV = sgeo.cv.dddVv; // sail calculation along the section line // ||||||||||||||||||||||||||||||||||| for (iy = 0; iy < SAIL_ROW_MAX; iy++, idx++) @@ -524,7 +524,7 @@ void SAILONE::FillVertex(SAILVERTEX *pv) pcur += dV; } pStart += dStart; - dVhStart += sgeo.ddVh; + dVhStart += sgeo.cv.ddVh; dVStart += dVhStart; } @@ -632,7 +632,7 @@ bool SAILONE::SetSail() int hpq; // there is no rope tension on the sail by default - sgeo.dopV = CVECTOR(0.f, 0.f, 0.f); + sgeo.cv.dopV = CVECTOR(0.f, 0.f, 0.f); // Set sail turn and roll resolution ss.maxAngle = PI / 6.f; // the maximum sail turn angle is 30 degrees. @@ -766,12 +766,12 @@ void SAILONE::GoTWave(SAILVERTEX *pv) { int iy, ix, idx; - auto k = (sailWind.x * sgeo.normL.x + sailWind.y * sgeo.normL.y + sailWind.z * sgeo.normL.z); + auto k = (sailWind.x * sgeo.cv.normL.x + sailWind.y * sgeo.cv.normL.y + sailWind.z * sgeo.cv.normL.z); CVECTOR CenterFlex; if (k < 0.f) - CenterFlex = k * sgeo.normL * ss.fDeepH; + CenterFlex = k * sgeo.cv.normL * ss.fDeepH; else - CenterFlex = k * sgeo.normL * ss.fDeepZ; + CenterFlex = k * sgeo.cv.normL * ss.fDeepZ; auto pStart = ss.hardPoints[0]; auto pStartDelta = (ss.hardPoints[1] - ss.hardPoints[0]) / static_cast((SAIL_ROW_MAX - 1)); @@ -786,7 +786,7 @@ void SAILONE::GoTWave(SAILVERTEX *pv) // Set rope point if (sailtrope.pnttie[0]) { - pStart += WindAmplitude * sgeo.normL * pp->WindVect[VertIdx] * static_cast(SAIL_ROW_MAX); + pStart += WindAmplitude * sgeo.cv.normL * pp->WindVect[VertIdx] * static_cast(SAIL_ROW_MAX); *sailtrope.pPos[0] = pStart; } @@ -796,8 +796,8 @@ void SAILONE::GoTWave(SAILVERTEX *pv) for (ix = 0; ix < SAIL_ROW_MAX;) { pcur = pStart; - dV = sgeo.dVv + static_cast(ix) * (sgeo.dVh + static_cast(ix + 1) * 0.5f * sgeo.ddVh); - WindAdd = (pp->WindVect[VertIdx] * WindAmplitude * static_cast(ix)) * sgeo.normL + CenterFlex; + dV = sgeo.cv.dVv + static_cast(ix) * (sgeo.cv.dVh + static_cast(ix + 1) * 0.5f * sgeo.cv.ddVh); + WindAdd = (pp->WindVect[VertIdx] * WindAmplitude * static_cast(ix)) * sgeo.cv.normL + CenterFlex; dV += WindAdd; if (sailtrope.pnttie[2]) ddV = -WindAdd * 1.8f / static_cast((SAIL_ROW_MAX)); @@ -916,15 +916,15 @@ void SAILONE::GoVWave(SAILVERTEX *pv) auto dVH = (ss.hardPoints[1] - ss.hardPoints[3]) / static_cast((SAIL_ROW_MAX + 1)) * ss.fDeepVh; //*(1.f-fWindBase); - auto ddVH = sgeo.ddVh + dVH * 2.f / static_cast((SAIL_COL_MAX - 1)); - dVH = sgeo.dVh - dVH; + auto ddVH = sgeo.cv.ddVh + dVH * 2.f / static_cast((SAIL_COL_MAX - 1)); + dVH = sgeo.cv.dVh - dVH; auto StartPoint = ss.hardPoints[0]; auto StartDelta = (ss.hardPoints[1] - ss.hardPoints[0]) / static_cast((SAIL_COL_MAX - 1)); if (m_dwCol == 7) StartDelta *= 2.f; else if (m_dwCol == 4) StartDelta *= 4.f; - dddV = sgeo.dddVv; + dddV = sgeo.cv.dddVv; if (sroll) SailDownVect.y -= SumWind * sroll->delta; @@ -950,8 +950,8 @@ void SAILONE::GoVWave(SAILVERTEX *pv) // set the coordinates of the starting point and their increments at each step pcur = StartPoint; - dV = sgeo.dVv + static_cast(ix) * (dVH + static_cast(ix) * 0.5f * ddVH) + WindAdd + SailDownVect; - ddV = sgeo.ddVv - WindAdd * (2.f - k) / static_cast(SAIL_ROW_MAX) - + dV = sgeo.cv.dVv + static_cast(ix) * (dVH + static_cast(ix) * 0.5f * ddVH) + WindAdd + SailDownVect; + ddV = sgeo.cv.ddVv - WindAdd * (2.f - k) / static_cast(SAIL_ROW_MAX) - SailDownVect * 2.f / static_cast(SAIL_ROW_MAX); idx = ix * SAIL_ROW_MAX; @@ -1037,11 +1037,11 @@ void SAILONE::SetGeometry() p01 = ss.hardPoints[1] - ss.hardPoints[0]; p13 = ss.hardPoints[2] - ss.hardPoints[1]; - pG->normL = !(p13 ^ p01); - pG->dVv = p13 * tmpRow; + pG->cv.normL = !(p13 ^ p01); + pG->cv.dVv = p13 * tmpRow; - pG->dVh = p13 * (-ss.fDeepVh * tmpRow); - pG->ddVh = pG->dVh * (-2.f / static_cast(SAIL_ROW_MAX)); + pG->cv.dVh = p13 * (-ss.fDeepVh * tmpRow); + pG->cv.ddVh = pG->cv.dVh * (-2.f / static_cast(SAIL_ROW_MAX)); } else { @@ -1053,27 +1053,27 @@ void SAILONE::SetGeometry() p01 = ss.hardPoints[1] - ss.hardPoints[0]; p23 = ss.hardPoints[3] - ss.hardPoints[2]; - pG->normL = !(p02 ^ p01); - pG->normR = !(p13 ^ p01); + pG->cv.normL = !(p02 ^ p01); + pG->cv.normR = !(p13 ^ p01); normLD = !(p02 ^ p23); - pG->dnormL = (normLD - pG->normL) * tmpRow; - pG->dnormR = (!(p13 ^ p23) - pG->normR) * tmpRow; + pG->cv.dnormL = (normLD - pG->cv.normL) * tmpRow; + pG->cv.dnormR = (!(p13 ^ p23) - pG->cv.normR) * tmpRow; - pG->dddVv = CVECTOR(0.f, 0.f, 0.f); - pG->dVv = pG->normL * ss.fDeepZ * (1.f - ss.holeCount * pp->fSHoleFlexDepend) + pG->dopV; - pG->ddVv = pG->dVv * (-2.f / static_cast(SAIL_ROW_MAX)); - pG->dVv += p02 * tmpRow; + pG->cv.dddVv = CVECTOR(0.f, 0.f, 0.f); + pG->cv.dVv = pG->cv.normL * ss.fDeepZ * (1.f - ss.holeCount * pp->fSHoleFlexDepend) + pG->cv.dopV; + pG->cv.ddVv = pG->cv.dVv * (-2.f / static_cast(SAIL_ROW_MAX)); + pG->cv.dVv += p02 * tmpRow; - pG->dVh = (normLD * ss.fDeepH - p13 * ss.fDeepVh + pG->normR * ss.fDeepVz) * tmpRow * + pG->cv.dVh = (normLD * ss.fDeepH - p13 * ss.fDeepVh + pG->cv.normR * ss.fDeepVz) * tmpRow * (1.f - ss.holeCount * pp->fSHoleFlexDepend); - pG->ddVh = pG->dVh * (-2.f * tmpCol); - pG->dVh += (p13 - p02) * tmpCol * tmpRow; + pG->cv.ddVh = pG->cv.dVh * (-2.f * tmpCol); + pG->cv.dVh += (p13 - p02) * tmpCol * tmpRow; } // Calculate sphere radius ss.boundSphere.r = sqrtf(~(ss.hardPoints[0] - ss.boundSphere.c)); if (sroll != nullptr) - sgeo.dopV = sroll->oldgeo.dopV; + sgeo.cv.dopV = sroll->oldgeo.cv.dopV; } void SAILONE::SetRolling(bool bRoll) @@ -1163,15 +1163,15 @@ void SAILONE::DoRollingStep(uint32_t Delta_Time) } } - sgeo.dddVv = sroll->oldgeo.dddVv * delta; - sgeo.ddVh = sroll->oldgeo.ddVh * delta; - sgeo.ddVv = sroll->oldgeo.ddVv * delta; - sgeo.dVh = sroll->oldgeo.dVh * delta; - sgeo.dVv = sroll->oldgeo.dVv * delta; - sgeo.normL = sroll->oldgeo.normL * delta; - sgeo.normR = sroll->oldgeo.normR * delta; - sgeo.dnormL = sroll->oldgeo.dnormL * delta; - sgeo.dnormR = sroll->oldgeo.dnormR * delta; + sgeo.cv.dddVv = sroll->oldgeo.cv.dddVv * delta; + sgeo.cv.ddVh = sroll->oldgeo.cv.ddVh * delta; + sgeo.cv.ddVv = sroll->oldgeo.cv.ddVv * delta; + sgeo.cv.dVh = sroll->oldgeo.cv.dVh * delta; + sgeo.cv.dVv = sroll->oldgeo.cv.dVv * delta; + sgeo.cv.normL = sroll->oldgeo.cv.normL * delta; + sgeo.cv.normR = sroll->oldgeo.cv.normR * delta; + sgeo.cv.dnormL = sroll->oldgeo.cv.dnormL * delta; + sgeo.cv.dnormR = sroll->oldgeo.cv.dnormR * delta; sroll->delta = delta; } @@ -1460,12 +1460,12 @@ void SAILONE::DoSRollSail(SAILVERTEX *pv) auto idx = 0; CVECTOR dv1, dv2, dv3, dv4; - dv1 = sgeo.normL * (.5f * dz); + dv1 = sgeo.cv.normL * (.5f * dz); dv1.y -= dy; dv2 = CVECTOR(0.f, -dy * 1.2f, 0.f); - dv3 = sgeo.normL * (-1.f * dz); + dv3 = sgeo.cv.normL * (-1.f * dz); dv3.y -= dy; - dv4 = sgeo.normL * (-.5f * dz); + dv4 = sgeo.cv.normL * (-.5f * dz); for (i = 0; i < static_cast(m_dwCol); i++) { windVal = 1.f - pp->WindVect[VertIdx] * pp->ROLL_Z_DELTA; @@ -1504,12 +1504,12 @@ void SAILONE::DoTRollSail(SAILVERTEX *pv) } CVECTOR dv1, dv2, dv3, dv4; - dv1 = sgeo.normL * (.5f * dx); + dv1 = sgeo.cv.normL * (.5f * dx); dv1.y -= dy; dv2 = CVECTOR(0.f, -dy * 1.2f, 0.f); - dv3 = sgeo.normL * (-1.f * dx); + dv3 = sgeo.cv.normL * (-1.f * dx); dv3.y -= dy; - dv4 = sgeo.normL * (-.5f * dx); + dv4 = sgeo.cv.normL * (-.5f * dx); auto idx = 6; pv[idx++].pos = pcur; diff --git a/src/libs/rigging/src/sailone.h b/src/libs/rigging/src/sailone.h index 7c213445e..eef11a08a 100644 --- a/src/libs/rigging/src/sailone.h +++ b/src/libs/rigging/src/sailone.h @@ -89,7 +89,7 @@ struct SAILGEOMETRY CVECTOR normL, dnormL; // Vectors for leftside sail normals CVECTOR normR, dnormR; // Vectors for rightside sail normals CVECTOR dopV; // Vector for rope tie; - }; + } cv; }; }; From 902693fe391d6cfe92d98ecf90b16438369270cb Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Mon, 19 Dec 2022 14:37:35 +0300 Subject: [PATCH 15/27] gcc: fix few errors with static_cast and #include --- src/libs/battle_interface/src/timer/timer.cpp | 6 ++++-- src/libs/worldmap/src/wdm_interface_object.cpp | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/libs/battle_interface/src/timer/timer.cpp b/src/libs/battle_interface/src/timer/timer.cpp index 3d3c0c1ee..c4e8fc79f 100644 --- a/src/libs/battle_interface/src/timer/timer.cpp +++ b/src/libs/battle_interface/src/timer/timer.cpp @@ -83,8 +83,10 @@ bool BITimer::ReadAndCreate() BIUtils::ReadRectFromAttr(AttributesPointer, "timerforeuv", rForeUV, rForeUV); // read texture & color - auto *pcBackTexture = AttributesPointer ? AttributesPointer->GetAttribute("timerbacktexture") : nullptr; - auto *pcForeTexture = AttributesPointer ? AttributesPointer->GetAttribute("timerforetexture") : nullptr; + auto *pcBackTexture = + AttributesPointer ? static_cast(AttributesPointer->GetAttribute("timerbacktexture")) : nullptr; + auto *pcForeTexture = + AttributesPointer ? static_cast(AttributesPointer->GetAttribute("timerforetexture")) : nullptr; auto dwColorBack = AttributesPointer ? AttributesPointer->GetAttributeAsDword("timerbackcolor", 0xFFFFFFFF) : 0xFFFFFFFF; auto dwColorFore = diff --git a/src/libs/worldmap/src/wdm_interface_object.cpp b/src/libs/worldmap/src/wdm_interface_object.cpp index 5260b3999..a63a71189 100644 --- a/src/libs/worldmap/src/wdm_interface_object.cpp +++ b/src/libs/worldmap/src/wdm_interface_object.cpp @@ -8,7 +8,8 @@ // //============================================================================================ -#include +#include +#include #include "wdm_interface_object.h" #include "wdm_objects.h" From cb0ba0dbce6c219fdcc62a0c1876d8e07695310d Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Mon, 19 Dec 2022 18:58:51 +0300 Subject: [PATCH 16/27] gcc: replace "using struct" with "typedef struct" in types3d.h This fixes gcc build. --- src/libs/math/include/types3d.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/libs/math/include/types3d.h b/src/libs/math/include/types3d.h index 2dd869f2d..e23b04039 100644 --- a/src/libs/math/include/types3d.h +++ b/src/libs/math/include/types3d.h @@ -9,58 +9,58 @@ #define SQR(x) ((x) * (x)) -using VECTOR = struct +typedef struct { float x; float y; float z; -}; +} VECTOR; -using VECTOR4 = struct +typedef struct { float x; float y; float z; float w; -}; +} VECTOR4; -using PLANE = struct +typedef struct { float Nx; float Ny; float Nz; float D; -}; +} PLANE; -using CHECK_SPHERE = struct +typedef struct { float cx; float cy; float cz; float R; -}; +} CHECK_SPHERE; -using COLOR = struct +typedef struct { float r; float g; float b; -}; +} COLOR; -using COLORA = struct +typedef struct { float r; float g; float b; float a; -}; +} COLORA; -using MATRIX = struct +typedef struct { union { float matrix[16]; float m[4][4]; }; -}; +} MATRIX; #pragma pack(pop) From 00eef035a7a12aa0124435d4d48afc61a38fe05a Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Mon, 19 Dec 2022 20:39:15 +0300 Subject: [PATCH 17/27] arm: update sse2neon.h with version from 2022.12.11 Taken from: https://github.com/DLTcollab/sse2neon/blob/5b6ee3ee06c4dbd1dcf656b63af2f505d5f4d609/sse2neon.h --- src/libs/math/include/sse2neon.h | 1308 ++++++++++++++++++++++++++---- 1 file changed, 1154 insertions(+), 154 deletions(-) diff --git a/src/libs/math/include/sse2neon.h b/src/libs/math/include/sse2neon.h index 490c0a45a..382214aa1 100644 --- a/src/libs/math/include/sse2neon.h +++ b/src/libs/math/include/sse2neon.h @@ -25,6 +25,9 @@ // Yang-Hao Yuan // Syoyo Fujita // Brecht Van Lommel +// Jonathan Hue +// Cuda Chen +// Aymen Qader /* * sse2neon is freely redistributable under the MIT License. @@ -101,6 +104,51 @@ #include #include +#if defined(_WIN32) +/* Definitions for _mm_{malloc,free} are provided by + * from both MinGW-w64 and MSVC. + */ +#define SSE2NEON_ALLOC_DEFINED +#endif + +/* If using MSVC */ +#ifdef _MSC_VER +#include +#if (defined(_M_AMD64) || defined(__x86_64__)) || \ + (defined(_M_ARM) || defined(__arm__)) +#define SSE2NEON_HAS_BITSCAN64 +#endif +#endif + +/* Compiler barrier */ +#define SSE2NEON_BARRIER() \ + do { \ + __asm__ __volatile__("" ::: "memory"); \ + (void) 0; \ + } while (0) + +/* Memory barriers + * __atomic_thread_fence does not include a compiler barrier; instead, + * the barrier is part of __atomic_load/__atomic_store's "volatile-like" + * semantics. + */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) +#include +#endif + +FORCE_INLINE void _sse2neon_smp_mb(void) +{ + SSE2NEON_BARRIER(); +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(__STDC_NO_ATOMICS__) + atomic_thread_fence(memory_order_seq_cst); +#elif defined(__GNUC__) || defined(__clang__) + __atomic_thread_fence(__ATOMIC_SEQ_CST); +#else + /* FIXME: MSVC support */ +#endif +} + /* Architecture-specific build options */ /* FIXME: #pragma GCC push_options is only available on GCC */ #if defined(__GNUC__) @@ -141,11 +189,31 @@ #endif #endif +/* Apple Silicon cache lines are double of what is commonly used by Intel, AMD + * and other Arm microarchtectures use. + * From sysctl -a on Apple M1: + * hw.cachelinesize: 128 + */ +#if defined(__APPLE__) && (defined(__aarch64__) || defined(__arm64__)) +#define SSE2NEON_CACHELINE_SIZE 128 +#else +#define SSE2NEON_CACHELINE_SIZE 64 +#endif + /* Rounding functions require either Aarch64 instructions or libm failback */ #if !defined(__aarch64__) #include #endif +/* On ARMv7, some registers, such as PMUSERENR and PMCCNTR, are read-only + * or even not accessible in user mode. + * To write or access to these registers in user mode, + * we have to perform syscall instead. + */ +#if !defined(__aarch64__) +#include +#endif + /* "__has_builtin" can be used to query support for built-in functions * provided by gcc/clang and other compilers that support it. */ @@ -155,6 +223,16 @@ #define __has_builtin(x) HAS##x #define HAS__builtin_popcount 1 #define HAS__builtin_popcountll 1 + +// __builtin_shuffle introduced in GCC 4.7.0 +#if (__GNUC__ >= 5) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 7)) +#define HAS__builtin_shuffle 1 +#else +#define HAS__builtin_shuffle 0 +#endif + +#define HAS__builtin_shufflevector 0 +#define HAS__builtin_nontemporal_store 0 #else #define __has_builtin(x) 0 #endif @@ -171,6 +249,26 @@ #define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) +#if __has_builtin(__builtin_shufflevector) +#define _sse2neon_shuffle(type, a, b, ...) \ + __builtin_shufflevector(a, b, __VA_ARGS__) +#elif __has_builtin(__builtin_shuffle) +#define _sse2neon_shuffle(type, a, b, ...) \ + __extension__({ \ + type tmp = {__VA_ARGS__}; \ + __builtin_shuffle(a, b, tmp); \ + }) +#endif + +#ifdef _sse2neon_shuffle +#define vshuffle_s16(a, b, ...) _sse2neon_shuffle(int16x4_t, a, b, __VA_ARGS__) +#define vshuffleq_s16(a, b, ...) _sse2neon_shuffle(int16x8_t, a, b, __VA_ARGS__) +#define vshuffle_s32(a, b, ...) _sse2neon_shuffle(int32x2_t, a, b, __VA_ARGS__) +#define vshuffleq_s32(a, b, ...) _sse2neon_shuffle(int32x4_t, a, b, __VA_ARGS__) +#define vshuffle_s64(a, b, ...) _sse2neon_shuffle(int64x1_t, a, b, __VA_ARGS__) +#define vshuffleq_s64(a, b, ...) _sse2neon_shuffle(int64x2_t, a, b, __VA_ARGS__) +#endif + /* Rounding mode macros. */ #define _MM_FROUND_TO_NEAREST_INT 0x00 #define _MM_FROUND_TO_NEG_INF 0x01 @@ -420,7 +518,7 @@ FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t, uint8_t); // Older gcc does not define vld1q_u8_x4 type #if defined(__GNUC__) && !defined(__clang__) && \ - ((__GNUC__ <= 10 && defined(__arm__)) || \ + ((__GNUC__ <= 12 && defined(__arm__)) || \ (__GNUC__ == 10 && __GNUC_MINOR__ < 3 && defined(__aarch64__)) || \ (__GNUC__ <= 9 && defined(__aarch64__))) FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p) @@ -440,6 +538,57 @@ FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p) } #endif +#if !defined(__aarch64__) +/* emulate vaddv u8 variant */ +FORCE_INLINE uint8_t _sse2neon_vaddv_u8(uint8x8_t v8) +{ + const uint64x1_t v1 = vpaddl_u32(vpaddl_u16(vpaddl_u8(v8))); + return vget_lane_u8(vreinterpret_u8_u64(v1), 0); +} +#else +// Wraps vaddv_u8 +FORCE_INLINE uint8_t _sse2neon_vaddv_u8(uint8x8_t v8) +{ + return vaddv_u8(v8); +} +#endif + +#if !defined(__aarch64__) +/* emulate vaddvq u8 variant */ +FORCE_INLINE uint8_t _sse2neon_vaddvq_u8(uint8x16_t a) +{ + uint8x8_t tmp = vpadd_u8(vget_low_u8(a), vget_high_u8(a)); + uint8_t res = 0; + for (int i = 0; i < 8; ++i) + res += tmp[i]; + return res; +} +#else +// Wraps vaddvq_u8 +FORCE_INLINE uint8_t _sse2neon_vaddvq_u8(uint8x16_t a) +{ + return vaddvq_u8(a); +} +#endif + +#if !defined(__aarch64__) +/* emulate vaddvq u16 variant */ +FORCE_INLINE uint16_t _sse2neon_vaddvq_u16(uint16x8_t a) +{ + uint32x4_t m = vpaddlq_u16(a); + uint64x2_t n = vpaddlq_u32(m); + uint64x1_t o = vget_low_u64(n) + vget_high_u64(n); + + return vget_lane_u32((uint32x2_t) o, 0); +} +#else +// Wraps vaddvq_u16 +FORCE_INLINE uint16_t _sse2neon_vaddvq_u16(uint16x8_t a) +{ + return vaddvq_u16(a); +} +#endif + /* Function Naming Conventions * The naming convention of SSE intrinsics is straightforward. A generic SSE * intrinsic function is given as follows: @@ -517,16 +666,12 @@ FORCE_INLINE uint8x16x4_t _sse2neon_vld1q_u8_x4(const uint8_t *p) +------+------+------+------+------+------+-------------+ */ -/* Constants for use with _mm_prefetch. */ +/* Constants for use with _mm_prefetch. */ enum _mm_hint { - _MM_HINT_NTA = 0, /* load data to L1 and L2 cache, mark it as NTA */ - _MM_HINT_T0 = 1, /* load data to L1 and L2 cache */ - _MM_HINT_T1 = 2, /* load data to L2 cache only */ - _MM_HINT_T2 = 3, /* load data to L2 cache only, mark it as NTA */ - _MM_HINT_ENTA = 4, /* exclusive version of _MM_HINT_NTA */ - _MM_HINT_ET0 = 5, /* exclusive version of _MM_HINT_T0 */ - _MM_HINT_ET1 = 6, /* exclusive version of _MM_HINT_T1 */ - _MM_HINT_ET2 = 7 /* exclusive version of _MM_HINT_T2 */ + _MM_HINT_NTA = 0, /* load data to L1 and L2 cache, mark it as NTA */ + _MM_HINT_T0 = 1, /* load data to L1 and L2 cache */ + _MM_HINT_T1 = 2, /* load data to L2 cache only */ + _MM_HINT_T2 = 3, /* load data to L2 cache only, mark it as NTA */ }; // The bit field mapping to the FPCR(floating-point control register) @@ -1507,20 +1652,8 @@ FORCE_INLINE __m128 _mm_cvtpi8_ps(__m64 a) // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pi16 FORCE_INLINE __m64 _mm_cvtps_pi16(__m128 a) { - const __m128 i16Min = _mm_set_ps1((float) INT16_MIN); - const __m128 i16Max = _mm_set_ps1((float) INT16_MAX); - const __m128 i32Max = _mm_set_ps1((float) INT32_MAX); - const __m128i maxMask = _mm_castps_si128( - _mm_and_ps(_mm_cmpge_ps(a, i16Max), _mm_cmple_ps(a, i32Max))); - const __m128i betweenMask = _mm_castps_si128( - _mm_and_ps(_mm_cmpgt_ps(a, i16Min), _mm_cmplt_ps(a, i16Max))); - const __m128i minMask = _mm_cmpeq_epi32(_mm_or_si128(maxMask, betweenMask), - _mm_setzero_si128()); - __m128i max = _mm_and_si128(maxMask, _mm_set1_epi32(INT16_MAX)); - __m128i min = _mm_and_si128(minMask, _mm_set1_epi32(INT16_MIN)); - __m128i cvt = _mm_and_si128(betweenMask, _mm_cvtps_epi32(a)); - __m128i res32 = _mm_or_si128(_mm_or_si128(max, min), cvt); - return vreinterpret_m64_s16(vmovn_s32(vreinterpretq_s32_m128i(res32))); + return vreinterpret_m64_s16( + vqmovn_s32(vreinterpretq_s32_m128i(_mm_cvtps_epi32(a)))); } // Convert packed single-precision (32-bit) floating-point elements in a to @@ -1552,25 +1685,8 @@ FORCE_INLINE __m64 _mm_cvtps_pi16(__m128 a) // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pi8 FORCE_INLINE __m64 _mm_cvtps_pi8(__m128 a) { - const __m128 i8Min = _mm_set_ps1((float) INT8_MIN); - const __m128 i8Max = _mm_set_ps1((float) INT8_MAX); - const __m128 i32Max = _mm_set_ps1((float) INT32_MAX); - const __m128i maxMask = _mm_castps_si128( - _mm_and_ps(_mm_cmpge_ps(a, i8Max), _mm_cmple_ps(a, i32Max))); - const __m128i betweenMask = _mm_castps_si128( - _mm_and_ps(_mm_cmpgt_ps(a, i8Min), _mm_cmplt_ps(a, i8Max))); - const __m128i minMask = _mm_cmpeq_epi32(_mm_or_si128(maxMask, betweenMask), - _mm_setzero_si128()); - __m128i max = _mm_and_si128(maxMask, _mm_set1_epi32(INT8_MAX)); - __m128i min = _mm_and_si128(minMask, _mm_set1_epi32(INT8_MIN)); - __m128i cvt = _mm_and_si128(betweenMask, _mm_cvtps_epi32(a)); - __m128i res32 = _mm_or_si128(_mm_or_si128(max, min), cvt); - int16x4_t res16 = vmovn_s32(vreinterpretq_s32_m128i(res32)); - int8x8_t res8 = vmovn_s16(vcombine_s16(res16, res16)); - static const uint32_t bitMask[2] = {0xFFFFFFFF, 0}; - int8x8_t mask = vreinterpret_s8_u32(vld1_u32(bitMask)); - - return vreinterpret_m64_s8(vorr_s8(vand_s8(mask, res8), vdup_n_s8(0))); + return vreinterpret_m64_s8(vqmovn_s16( + vcombine_s16(vreinterpret_s16_m64(_mm_cvtps_pi16(a)), vdup_n_s16(0)))); } // Convert packed unsigned 16-bit integers in a to packed single-precision @@ -1763,10 +1879,12 @@ FORCE_INLINE __m128 _mm_div_ss(__m128 a, __m128 b) // Free aligned memory that was allocated with _mm_malloc. // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_free +#if !defined(SSE2NEON_ALLOC_DEFINED) FORCE_INLINE void _mm_free(void *addr) { free(addr); } +#endif // Macro: Get the flush zero bits from the MXCSR control and status register. // The flush zero may contain any of the following flags: _MM_FLUSH_ZERO_ON or @@ -1948,6 +2066,7 @@ FORCE_INLINE __m128i _mm_loadu_si64(const void *p) // Allocate aligned blocks of memory. // https://software.intel.com/en-us/ // cpp-compiler-developer-guide-and-reference-allocating-and-freeing-aligned-memory-blocks +#if !defined(SSE2NEON_ALLOC_DEFINED) FORCE_INLINE void *_mm_malloc(size_t size, size_t align) { void *ptr; @@ -1959,6 +2078,7 @@ FORCE_INLINE void *_mm_malloc(size_t size, size_t align) return ptr; return NULL; } +#endif // Conditionally store 8-bit integer elements from a into memory using mask // (elements are not stored when the highest bit is not set in the corresponding @@ -2279,12 +2399,25 @@ FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b) // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmulhuw #define _m_pmulhuw(a, b) _mm_mulhi_pu16(a, b) -// Loads one cache line of data from address p to a location closer to the -// processor. https://msdn.microsoft.com/en-us/library/84szxsww(v=vs.100).aspx -FORCE_INLINE void _mm_prefetch(const void *p, int i) +// Fetch the line of data from memory that contains address p to a location in +// the cache heirarchy specified by the locality hint i. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_prefetch +FORCE_INLINE void _mm_prefetch(char const *p, int i) { - (void) i; - __builtin_prefetch(p); + switch (i) { + case _MM_HINT_NTA: + __builtin_prefetch(p, 0, 0); + break; + case _MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case _MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + case _MM_HINT_T2: + __builtin_prefetch(p, 0, 1); + break; + } } // Compute the absolute differences of packed unsigned 8-bit integers in a and @@ -2467,8 +2600,7 @@ FORCE_INLINE void _MM_SET_ROUNDING_MODE(int rounding) // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ss FORCE_INLINE __m128 _mm_set_ss(float a) { - float ALIGN_STRUCT(16) data[4] = {a, 0, 0, 0}; - return vreinterpretq_m128_f32(vld1q_f32(data)); + return vreinterpretq_m128_f32(vsetq_lane_f32(a, vdupq_n_f32(0), 0)); } // Sets the four single-precision, floating-point values to w. @@ -2512,10 +2644,10 @@ FORCE_INLINE __m128 _mm_setzero_ps(void) // Shuffle 16-bit integers in a using the control in imm8, and store the results // in dst. // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pi16 -#if __has_builtin(__builtin_shufflevector) +#ifdef _sse2neon_shuffle #define _mm_shuffle_pi16(a, imm) \ __extension__({ \ - vreinterpret_m64_s16(__builtin_shufflevector( \ + vreinterpret_m64_s16(vshuffle_s16( \ vreinterpret_s16_m64(a), vreinterpret_s16_m64(a), (imm & 0x3), \ ((imm >> 2) & 0x3), ((imm >> 4) & 0x3), ((imm >> 6) & 0x3))); \ }) @@ -2538,25 +2670,48 @@ FORCE_INLINE __m128 _mm_setzero_ps(void) }) #endif -// Guarantees that every preceding store is globally visible before any -// subsequent store. -// https://msdn.microsoft.com/en-us/library/5h2w73d1%28v=vs.90%29.aspx +// Perform a serializing operation on all store-to-memory instructions that were +// issued prior to this instruction. Guarantees that every store instruction +// that precedes, in program order, is globally visible before any store +// instruction which follows the fence in program order. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sfence FORCE_INLINE void _mm_sfence(void) { - __sync_synchronize(); + _sse2neon_smp_mb(); +} + +// Perform a serializing operation on all load-from-memory and store-to-memory +// instructions that were issued prior to this instruction. Guarantees that +// every memory access that precedes, in program order, the memory fence +// instruction is globally visible before any memory instruction which follows +// the fence in program order. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mfence +FORCE_INLINE void _mm_mfence(void) +{ + _sse2neon_smp_mb(); +} + +// Perform a serializing operation on all load-from-memory instructions that +// were issued prior to this instruction. Guarantees that every load instruction +// that precedes, in program order, is globally visible before any load +// instruction which follows the fence in program order. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lfence +FORCE_INLINE void _mm_lfence(void) +{ + _sse2neon_smp_mb(); } // FORCE_INLINE __m128 _mm_shuffle_ps(__m128 a, __m128 b, __constrange(0,255) // int imm) -#if __has_builtin(__builtin_shufflevector) -#define _mm_shuffle_ps(a, b, imm) \ - __extension__({ \ - float32x4_t _input1 = vreinterpretq_f32_m128(a); \ - float32x4_t _input2 = vreinterpretq_f32_m128(b); \ - float32x4_t _shuf = __builtin_shufflevector( \ - _input1, _input2, (imm) & (0x3), ((imm) >> 2) & 0x3, \ - (((imm) >> 4) & 0x3) + 4, (((imm) >> 6) & 0x3) + 4); \ - vreinterpretq_m128_f32(_shuf); \ +#ifdef _sse2neon_shuffle +#define _mm_shuffle_ps(a, b, imm) \ + __extension__({ \ + float32x4_t _input1 = vreinterpretq_f32_m128(a); \ + float32x4_t _input2 = vreinterpretq_f32_m128(b); \ + float32x4_t _shuf = \ + vshuffleq_s32(_input1, _input2, (imm) & (0x3), ((imm) >> 2) & 0x3, \ + (((imm) >> 4) & 0x3) + 4, (((imm) >> 6) & 0x3) + 4); \ + vreinterpretq_m128_f32(_shuf); \ }) #else // generic #define _mm_shuffle_ps(a, b, imm) \ @@ -3228,13 +3383,29 @@ FORCE_INLINE __m128 _mm_castsi128_ps(__m128i a) return vreinterpretq_m128_s32(vreinterpretq_s32_m128i(a)); } -// Cache line containing p is flushed and invalidated from all caches in the -// coherency domain. : -// https://msdn.microsoft.com/en-us/library/ba08y07y(v=vs.100).aspx +// Invalidate and flush the cache line that contains p from all levels of the +// cache hierarchy. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clflush +#if defined(__APPLE__) +#include +#endif FORCE_INLINE void _mm_clflush(void const *p) { (void) p; - // no corollary for Neon? + + /* sys_icache_invalidate is supported since macOS 10.5. + * However, it does not work on non-jailbroken iOS devices, although the + * compilation is successful. + */ +#if defined(__APPLE__) + sys_icache_invalidate((void *) (uintptr_t) p, SSE2NEON_CACHELINE_SIZE); +#elif defined(__GNUC__) || defined(__clang__) + uintptr_t ptr = (uintptr_t) p; + __builtin___clear_cache((char *) ptr, + (char *) ptr + SSE2NEON_CACHELINE_SIZE); +#else + /* FIXME: MSVC support */ +#endif } // Compares the 8 signed or unsigned 16-bit integers in a and the 8 signed or @@ -3927,10 +4098,18 @@ FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a) // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epi32 FORCE_INLINE __m128i _mm_cvtpd_epi32(__m128d a) { +// vrnd32xq_f64 not supported on clang +#if defined(__ARM_FEATURE_FRINT) && !defined(__clang__) + float64x2_t rounded = vrnd32xq_f64(vreinterpretq_f64_m128d(a)); + int64x2_t integers = vcvtq_s64_f64(rounded); + return vreinterpretq_m128i_s32( + vcombine_s32(vmovn_s64(integers), vdup_n_s32(0))); +#else __m128d rnd = _mm_round_pd(a, _MM_FROUND_CUR_DIRECTION); double d0 = ((double *) &rnd)[0]; double d1 = ((double *) &rnd)[1]; return _mm_set_epi32(0, 0, (int32_t) d1, (int32_t) d0); +#endif } // Convert packed double-precision (64-bit) floating-point elements in a to @@ -4011,7 +4190,9 @@ FORCE_INLINE __m128d _mm_cvtpi32_pd(__m64 a) // does not support! It is supported on ARMv8-A however. FORCE_INLINE __m128i _mm_cvtps_epi32(__m128 a) { -#if defined(__aarch64__) || defined(__ARM_FEATURE_DIRECTED_ROUNDING) +#if defined(__ARM_FEATURE_FRINT) + return vreinterpretq_m128i_s32(vcvtq_s32_f32(vrnd32xq_f32(a))); +#elif defined(__aarch64__) || defined(__ARM_FEATURE_DIRECTED_ROUNDING) switch (_MM_GET_ROUNDING_MODE()) { case _MM_ROUND_NEAREST: return vreinterpretq_m128i_s32(vcvtnq_s32_f32(a)); @@ -4572,6 +4753,12 @@ FORCE_INLINE __m128i _mm_madd_epi16(__m128i a, __m128i b) { int32x4_t low = vmull_s16(vget_low_s16(vreinterpretq_s16_m128i(a)), vget_low_s16(vreinterpretq_s16_m128i(b))); +#if defined(__aarch64__) + int32x4_t high = + vmull_high_s16(vreinterpretq_s16_m128i(a), vreinterpretq_s16_m128i(b)); + + return vreinterpretq_m128i_s32(vpaddq_s32(low, high)); +#else int32x4_t high = vmull_s16(vget_high_s16(vreinterpretq_s16_m128i(a)), vget_high_s16(vreinterpretq_s16_m128i(b))); @@ -4579,6 +4766,7 @@ FORCE_INLINE __m128i _mm_madd_epi16(__m128i a, __m128i b) int32x2_t high_sum = vpadd_s32(vget_low_s32(high), vget_high_s32(high)); return vreinterpretq_m128i_s32(vcombine_s32(low_sum, high_sum)); +#endif } // Conditionally store 8-bit integer elements from a into memory using mask @@ -5161,7 +5349,11 @@ FORCE_INLINE __m128d _mm_set_pd(double e1, double e0) // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sd FORCE_INLINE __m128d _mm_set_sd(double a) { +#if defined(__aarch64__) + return vreinterpretq_m128d_f64(vsetq_lane_f64(a, vdupq_n_f64(0), 0)); +#else return _mm_set_pd(0, a); +#endif } // Sets the 8 signed 16-bit integer values to w. @@ -5321,14 +5513,14 @@ FORCE_INLINE __m128i _mm_setzero_si128(void) // https://msdn.microsoft.com/en-us/library/56f67xbk%28v=vs.90%29.aspx // FORCE_INLINE __m128i _mm_shuffle_epi32(__m128i a, // __constrange(0,255) int imm) -#if __has_builtin(__builtin_shufflevector) -#define _mm_shuffle_epi32(a, imm) \ - __extension__({ \ - int32x4_t _input = vreinterpretq_s32_m128i(a); \ - int32x4_t _shuf = __builtin_shufflevector( \ - _input, _input, (imm) & (0x3), ((imm) >> 2) & 0x3, \ - ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3); \ - vreinterpretq_m128i_s32(_shuf); \ +#ifdef _sse2neon_shuffle +#define _mm_shuffle_epi32(a, imm) \ + __extension__({ \ + int32x4_t _input = vreinterpretq_s32_m128i(a); \ + int32x4_t _shuf = \ + vshuffleq_s32(_input, _input, (imm) & (0x3), ((imm) >> 2) & 0x3, \ + ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3); \ + vreinterpretq_m128i_s32(_shuf); \ }) #else // generic #define _mm_shuffle_epi32(a, imm) \ @@ -5392,11 +5584,11 @@ FORCE_INLINE __m128i _mm_setzero_si128(void) // dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64] // // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pd -#if __has_builtin(__builtin_shufflevector) -#define _mm_shuffle_pd(a, b, imm8) \ - vreinterpretq_m128d_s64(__builtin_shufflevector( \ - vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b), imm8 & 0x1, \ - ((imm8 & 0x2) >> 1) + 2)) +#ifdef _sse2neon_shuffle +#define _mm_shuffle_pd(a, b, imm8) \ + vreinterpretq_m128d_s64( \ + vshuffleq_s64(vreinterpretq_s64_m128d(a), vreinterpretq_s64_m128d(b), \ + imm8 & 0x1, ((imm8 & 0x2) >> 1) + 2)) #else #define _mm_shuffle_pd(a, b, imm8) \ _mm_castsi128_pd(_mm_set_epi64x( \ @@ -5406,15 +5598,15 @@ FORCE_INLINE __m128i _mm_setzero_si128(void) // FORCE_INLINE __m128i _mm_shufflehi_epi16(__m128i a, // __constrange(0,255) int imm) -#if __has_builtin(__builtin_shufflevector) -#define _mm_shufflehi_epi16(a, imm) \ - __extension__({ \ - int16x8_t _input = vreinterpretq_s16_m128i(a); \ - int16x8_t _shuf = __builtin_shufflevector( \ - _input, _input, 0, 1, 2, 3, ((imm) & (0x3)) + 4, \ - (((imm) >> 2) & 0x3) + 4, (((imm) >> 4) & 0x3) + 4, \ - (((imm) >> 6) & 0x3) + 4); \ - vreinterpretq_m128i_s16(_shuf); \ +#ifdef _sse2neon_shuffle +#define _mm_shufflehi_epi16(a, imm) \ + __extension__({ \ + int16x8_t _input = vreinterpretq_s16_m128i(a); \ + int16x8_t _shuf = \ + vshuffleq_s16(_input, _input, 0, 1, 2, 3, ((imm) & (0x3)) + 4, \ + (((imm) >> 2) & 0x3) + 4, (((imm) >> 4) & 0x3) + 4, \ + (((imm) >> 6) & 0x3) + 4); \ + vreinterpretq_m128i_s16(_shuf); \ }) #else // generic #define _mm_shufflehi_epi16(a, imm) _mm_shufflehi_epi16_function((a), (imm)) @@ -5422,11 +5614,11 @@ FORCE_INLINE __m128i _mm_setzero_si128(void) // FORCE_INLINE __m128i _mm_shufflelo_epi16(__m128i a, // __constrange(0,255) int imm) -#if __has_builtin(__builtin_shufflevector) +#ifdef _sse2neon_shuffle #define _mm_shufflelo_epi16(a, imm) \ __extension__({ \ int16x8_t _input = vreinterpretq_s16_m128i(a); \ - int16x8_t _shuf = __builtin_shufflevector( \ + int16x8_t _shuf = vshuffleq_s16( \ _input, _input, ((imm) & (0x3)), (((imm) >> 2) & 0x3), \ (((imm) >> 4) & 0x3), (((imm) >> 6) & 0x3), 4, 5, 6, 7); \ vreinterpretq_m128i_s16(_shuf); \ @@ -5577,14 +5769,18 @@ FORCE_INLINE __m128i _mm_slli_epi64(__m128i a, int imm) // dst[127:0] := a[127:0] << (tmp*8) // // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_si128 -FORCE_INLINE __m128i _mm_slli_si128(__m128i a, int imm) -{ - if (_sse2neon_unlikely(imm & ~15)) - return _mm_setzero_si128(); - uint8x16_t tmp[2] = {vdupq_n_u8(0), vreinterpretq_u8_m128i(a)}; - return vreinterpretq_m128i_u8( - vld1q_u8(((uint8_t const *) tmp) + (16 - imm))); -} +#define _mm_slli_si128(a, imm) \ + __extension__({ \ + int8x16_t ret; \ + if (_sse2neon_unlikely(imm == 0)) \ + ret = vreinterpretq_s8_m128i(a); \ + else if (_sse2neon_unlikely((imm) & ~15)) \ + ret = vdupq_n_s8(0); \ + else \ + ret = vextq_s8(vdupq_n_s8(0), vreinterpretq_s8_m128i(a), \ + ((imm <= 0 || imm > 15) ? 0 : (16 - imm))); \ + vreinterpretq_m128i_s8(ret); \ + }) // Compute the square root of packed double-precision (64-bit) floating-point // elements in a, and store the results in dst. @@ -5688,19 +5884,19 @@ FORCE_INLINE __m128i _mm_srai_epi16(__m128i a, int imm) // // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32 // FORCE_INLINE __m128i _mm_srai_epi32(__m128i a, __constrange(0,255) int imm) -#define _mm_srai_epi32(a, imm) \ - __extension__({ \ - __m128i ret; \ - if (_sse2neon_unlikely((imm) == 0)) { \ - ret = a; \ - } else if (_sse2neon_likely(0 < (imm) && (imm) < 32)) { \ - ret = vreinterpretq_m128i_s32( \ - vshlq_s32(vreinterpretq_s32_m128i(a), vdupq_n_s32(-imm))); \ - } else { \ - ret = vreinterpretq_m128i_s32( \ - vshrq_n_s32(vreinterpretq_s32_m128i(a), 31)); \ - } \ - ret; \ +#define _mm_srai_epi32(a, imm) \ + __extension__({ \ + __m128i ret; \ + if (_sse2neon_unlikely((imm) == 0)) { \ + ret = a; \ + } else if (_sse2neon_likely(0 < (imm) && (imm) < 32)) { \ + ret = vreinterpretq_m128i_s32( \ + vshlq_s32(vreinterpretq_s32_m128i(a), vdupq_n_s32(-(imm)))); \ + } else { \ + ret = vreinterpretq_m128i_s32( \ + vshrq_n_s32(vreinterpretq_s32_m128i(a), 31)); \ + } \ + ret; \ }) // Shift packed 16-bit integers in a right by count while shifting in zeros, and @@ -5858,13 +6054,16 @@ FORCE_INLINE __m128i _mm_srl_epi64(__m128i a, __m128i count) // dst[127:0] := a[127:0] >> (tmp*8) // // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_si128 -FORCE_INLINE __m128i _mm_srli_si128(__m128i a, int imm) -{ - if (_sse2neon_unlikely(imm & ~15)) - return _mm_setzero_si128(); - uint8x16_t tmp[2] = {vreinterpretq_u8_m128i(a), vdupq_n_u8(0)}; - return vreinterpretq_m128i_u8(vld1q_u8(((uint8_t const *) tmp) + imm)); -} +#define _mm_srli_si128(a, imm) \ + __extension__({ \ + int8x16_t ret; \ + if (_sse2neon_unlikely((imm) & ~15)) \ + ret = vdupq_n_s8(0); \ + else \ + ret = vextq_s8(vreinterpretq_s8_m128i(a), vdupq_n_s8(0), \ + (imm > 15 ? 0 : imm)); \ + vreinterpretq_m128i_s8(ret); \ + }) // Store 128-bits (composed of 2 packed double-precision (64-bit) floating-point // elements) from a into memory. mem_addr must be aligned on a 16-byte boundary @@ -6586,8 +6785,11 @@ FORCE_INLINE __m128d _mm_movedup_pd(__m128d a) // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movehdup_ps FORCE_INLINE __m128 _mm_movehdup_ps(__m128 a) { -#if __has_builtin(__builtin_shufflevector) - return vreinterpretq_m128_f32(__builtin_shufflevector( +#if defined(__aarch64__) + return vreinterpretq_m128_f32( + vtrn2q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a))); +#elif defined(_sse2neon_shuffle) + return vreinterpretq_m128_f32(vshuffleq_s32( vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 1, 1, 3, 3)); #else float32_t a1 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 1); @@ -6602,8 +6804,11 @@ FORCE_INLINE __m128 _mm_movehdup_ps(__m128 a) // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_moveldup_ps FORCE_INLINE __m128 _mm_moveldup_ps(__m128 a) { -#if __has_builtin(__builtin_shufflevector) - return vreinterpretq_m128_f32(__builtin_shufflevector( +#if defined(__aarch64__) + return vreinterpretq_m128_f32( + vtrn1q_f32(vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a))); +#elif defined(_sse2neon_shuffle) + return vreinterpretq_m128_f32(vshuffleq_s32( vreinterpretq_f32_m128(a), vreinterpretq_f32_m128(a), 0, 0, 2, 2)); #else float32_t a0 = vgetq_lane_f32(vreinterpretq_f32_m128(a), 0); @@ -6706,23 +6911,20 @@ FORCE_INLINE __m64 _mm_abs_pi8(__m64 a) // dst[127:0] := tmp[127:0] // // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_epi8 -FORCE_INLINE __m128i _mm_alignr_epi8(__m128i a, __m128i b, int imm) -{ - if (_sse2neon_unlikely(imm & ~31)) - return _mm_setzero_si128(); - int idx; - uint8x16_t tmp[2]; - if (imm >= 16) { - idx = imm - 16; - tmp[0] = vreinterpretq_u8_m128i(a); - tmp[1] = vdupq_n_u8(0); - } else { - idx = imm; - tmp[0] = vreinterpretq_u8_m128i(b); - tmp[1] = vreinterpretq_u8_m128i(a); - } - return vreinterpretq_m128i_u8(vld1q_u8(((uint8_t const *) tmp) + idx)); -} +#define _mm_alignr_epi8(a, b, imm) \ + __extension__({ \ + uint8x16_t _a = vreinterpretq_u8_m128i(a); \ + uint8x16_t _b = vreinterpretq_u8_m128i(b); \ + __m128i ret; \ + if (_sse2neon_unlikely((imm) & ~31)) \ + ret = vreinterpretq_m128i_u8(vdupq_n_u8(0)); \ + else if (imm >= 16) \ + ret = _mm_srli_si128(a, imm >= 16 ? imm - 16 : 0); \ + else \ + ret = \ + vreinterpretq_m128i_u8(vextq_u8(_b, _a, imm < 16 ? imm : 0)); \ + ret; \ + }) // Concatenate 8-byte blocks in a and b into a 16-byte temporary result, shift // the result right by imm8 bytes, and store the low 8 bytes in dst. @@ -6774,9 +6976,13 @@ FORCE_INLINE __m128i _mm_hadd_epi32(__m128i _a, __m128i _b) { int32x4_t a = vreinterpretq_s32_m128i(_a); int32x4_t b = vreinterpretq_s32_m128i(_b); +#if defined(__aarch64__) + return vreinterpretq_m128i_s32(vpaddq_s32(a, b)); +#else return vreinterpretq_m128i_s32( vcombine_s32(vpadd_s32(vget_low_s32(a), vget_high_s32(a)), vpadd_s32(vget_low_s32(b), vget_high_s32(b)))); +#endif } // Horizontally add adjacent pairs of 16-bit integers in a and b, and pack the @@ -7949,10 +8155,17 @@ FORCE_INLINE __m128i _mm_minpos_epu16(__m128i a) { __m128i dst; uint16_t min, idx = 0; - // Find the minimum value #if defined(__aarch64__) + // Find the minimum value min = vminvq_u16(vreinterpretq_u16_m128i(a)); + + // Get the index of the minimum value + static const uint16_t idxv[] = {0, 1, 2, 3, 4, 5, 6, 7}; + uint16x8_t minv = vdupq_n_u16(min); + uint16x8_t cmeq = vceqq_u16(minv, vreinterpretq_u16_m128i(a)); + idx = vminvq_u16(vornq_u16(vld1q_u16(idxv), cmeq)); #else + // Find the minimum value __m64 tmp; tmp = vreinterpret_m64_u16( vmin_u16(vget_low_u16(vreinterpretq_u16_m128i(a)), @@ -7962,7 +8175,6 @@ FORCE_INLINE __m128i _mm_minpos_epu16(__m128i a) tmp = vreinterpret_m64_u16( vpmin_u16(vreinterpret_u16_m64(tmp), vreinterpret_u16_m64(tmp))); min = vget_lane_u16(vreinterpret_u16_m64(tmp), 0); -#endif // Get the index of the minimum value int i; for (i = 0; i < 8; i++) { @@ -7972,6 +8184,7 @@ FORCE_INLINE __m128i _mm_minpos_epu16(__m128i a) } a = _mm_srli_si128(a, 2); } +#endif // Generate result dst = _mm_setzero_si128(); dst = vreinterpretq_m128i_u16( @@ -8034,13 +8247,13 @@ FORCE_INLINE __m128i _mm_mpsadbw_epu8(__m128i a, __m128i b, const int imm) int16x8_t c04, c15, c26, c37; uint8x8_t low_b = vget_low_u8(_b); - c04 = vabsq_s16(vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(_a), low_b))); - _a = vextq_u8(_a, _a, 1); - c15 = vabsq_s16(vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(_a), low_b))); - _a = vextq_u8(_a, _a, 1); - c26 = vabsq_s16(vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(_a), low_b))); - _a = vextq_u8(_a, _a, 1); - c37 = vabsq_s16(vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(_a), low_b))); + c04 = vreinterpretq_s16_u16(vabdl_u8(vget_low_u8(_a), low_b)); + uint8x16_t _a_1 = vextq_u8(_a, _a, 1); + c15 = vreinterpretq_s16_u16(vabdl_u8(vget_low_u8(_a_1), low_b)); + uint8x16_t _a_2 = vextq_u8(_a, _a, 2); + c26 = vreinterpretq_s16_u16(vabdl_u8(vget_low_u8(_a_2), low_b)); + uint8x16_t _a_3 = vextq_u8(_a, _a, 3); + c37 = vreinterpretq_s16_u16(vabdl_u8(vget_low_u8(_a_3), low_b)); #if defined(__aarch64__) // |0|4|2|6| c04 = vpaddq_s16(c04, c26); @@ -8322,8 +8535,7 @@ FORCE_INLINE int _mm_test_mix_ones_zeros(__m128i a, __m128i mask) FORCE_INLINE int _mm_testc_si128(__m128i a, __m128i b) { int64x2_t s64 = - vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_m128i(a))), - vreinterpretq_s64_m128i(b)); + vbicq_s64(vreinterpretq_s64_m128i(b), vreinterpretq_s64_m128i(a)); return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); } @@ -8349,6 +8561,756 @@ FORCE_INLINE int _mm_testz_si128(__m128i a, __m128i b) /* SSE4.2 */ +const static uint16_t _sse2neon_cmpestr_mask16b[8] ALIGN_STRUCT(16) = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, +}; +const static uint8_t _sse2neon_cmpestr_mask8b[16] ALIGN_STRUCT(16) = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, +}; + +/* specify the source data format */ +#define _SIDD_UBYTE_OPS 0x00 /* unsigned 8-bit characters */ +#define _SIDD_UWORD_OPS 0x01 /* unsigned 16-bit characters */ +#define _SIDD_SBYTE_OPS 0x02 /* signed 8-bit characters */ +#define _SIDD_SWORD_OPS 0x03 /* signed 16-bit characters */ + +/* specify the comparison operation */ +#define _SIDD_CMP_EQUAL_ANY 0x00 /* compare equal any: strchr */ +#define _SIDD_CMP_RANGES 0x04 /* compare ranges */ +#define _SIDD_CMP_EQUAL_EACH 0x08 /* compare equal each: strcmp */ +#define _SIDD_CMP_EQUAL_ORDERED 0x0C /* compare equal ordered */ + +/* specify the polarity */ +#define _SIDD_POSITIVE_POLARITY 0x00 +#define _SIDD_MASKED_POSITIVE_POLARITY 0x20 +#define _SIDD_NEGATIVE_POLARITY 0x10 /* negate results */ +#define _SIDD_MASKED_NEGATIVE_POLARITY \ + 0x30 /* negate results only before end of string */ + +/* specify the output selection in _mm_cmpXstri */ +#define _SIDD_LEAST_SIGNIFICANT 0x00 +#define _SIDD_MOST_SIGNIFICANT 0x40 + +/* specify the output selection in _mm_cmpXstrm */ +#define _SIDD_BIT_MASK 0x00 +#define _SIDD_UNIT_MASK 0x40 + +/* Pattern Matching for C macros. + * https://github.com/pfultz2/Cloak/wiki/C-Preprocessor-tricks,-tips,-and-idioms + */ + +/* catenate */ +#define SSE2NEON_PRIMITIVE_CAT(a, ...) a##__VA_ARGS__ +#define SSE2NEON_CAT(a, b) SSE2NEON_PRIMITIVE_CAT(a, b) + +#define SSE2NEON_IIF(c) SSE2NEON_PRIMITIVE_CAT(SSE2NEON_IIF_, c) +/* run the 2nd parameter */ +#define SSE2NEON_IIF_0(t, ...) __VA_ARGS__ +/* run the 1st parameter */ +#define SSE2NEON_IIF_1(t, ...) t + +#define SSE2NEON_COMPL(b) SSE2NEON_PRIMITIVE_CAT(SSE2NEON_COMPL_, b) +#define SSE2NEON_COMPL_0 1 +#define SSE2NEON_COMPL_1 0 + +#define SSE2NEON_DEC(x) SSE2NEON_PRIMITIVE_CAT(SSE2NEON_DEC_, x) +#define SSE2NEON_DEC_1 0 +#define SSE2NEON_DEC_2 1 +#define SSE2NEON_DEC_3 2 +#define SSE2NEON_DEC_4 3 +#define SSE2NEON_DEC_5 4 +#define SSE2NEON_DEC_6 5 +#define SSE2NEON_DEC_7 6 +#define SSE2NEON_DEC_8 7 +#define SSE2NEON_DEC_9 8 +#define SSE2NEON_DEC_10 9 +#define SSE2NEON_DEC_11 10 +#define SSE2NEON_DEC_12 11 +#define SSE2NEON_DEC_13 12 +#define SSE2NEON_DEC_14 13 +#define SSE2NEON_DEC_15 14 +#define SSE2NEON_DEC_16 15 + +/* detection */ +#define SSE2NEON_CHECK_N(x, n, ...) n +#define SSE2NEON_CHECK(...) SSE2NEON_CHECK_N(__VA_ARGS__, 0, ) +#define SSE2NEON_PROBE(x) x, 1, + +#define SSE2NEON_NOT(x) SSE2NEON_CHECK(SSE2NEON_PRIMITIVE_CAT(SSE2NEON_NOT_, x)) +#define SSE2NEON_NOT_0 SSE2NEON_PROBE(~) + +#define SSE2NEON_BOOL(x) SSE2NEON_COMPL(SSE2NEON_NOT(x)) +#define SSE2NEON_IF(c) SSE2NEON_IIF(SSE2NEON_BOOL(c)) + +#define SSE2NEON_EAT(...) +#define SSE2NEON_EXPAND(...) __VA_ARGS__ +#define SSE2NEON_WHEN(c) SSE2NEON_IF(c)(SSE2NEON_EXPAND, SSE2NEON_EAT) + +/* recursion */ +/* deferred expression */ +#define SSE2NEON_EMPTY() +#define SSE2NEON_DEFER(id) id SSE2NEON_EMPTY() +#define SSE2NEON_OBSTRUCT(...) __VA_ARGS__ SSE2NEON_DEFER(SSE2NEON_EMPTY)() +#define SSE2NEON_EXPAND(...) __VA_ARGS__ + +#define SSE2NEON_EVAL(...) \ + SSE2NEON_EVAL1(SSE2NEON_EVAL1(SSE2NEON_EVAL1(__VA_ARGS__))) +#define SSE2NEON_EVAL1(...) \ + SSE2NEON_EVAL2(SSE2NEON_EVAL2(SSE2NEON_EVAL2(__VA_ARGS__))) +#define SSE2NEON_EVAL2(...) \ + SSE2NEON_EVAL3(SSE2NEON_EVAL3(SSE2NEON_EVAL3(__VA_ARGS__))) +#define SSE2NEON_EVAL3(...) __VA_ARGS__ + +#define SSE2NEON_REPEAT(count, macro, ...) \ + SSE2NEON_WHEN(count) \ + (SSE2NEON_OBSTRUCT(SSE2NEON_REPEAT_INDIRECT)()( \ + SSE2NEON_DEC(count), macro, \ + __VA_ARGS__) SSE2NEON_OBSTRUCT(macro)(SSE2NEON_DEC(count), \ + __VA_ARGS__)) +#define SSE2NEON_REPEAT_INDIRECT() SSE2NEON_REPEAT + +#define SSE2NEON_SIZE_OF_byte 8 +#define SSE2NEON_NUMBER_OF_LANES_byte 16 +#define SSE2NEON_SIZE_OF_word 16 +#define SSE2NEON_NUMBER_OF_LANES_word 8 + +#define SSE2NEON_COMPARE_EQUAL_THEN_FILL_LANE(i, type) \ + mtx[i] = vreinterpretq_m128i_##type(vceqq_##type( \ + vdupq_n_##type(vgetq_lane_##type(vreinterpretq_##type##_m128i(b), i)), \ + vreinterpretq_##type##_m128i(a))); + +#define SSE2NEON_FILL_LANE(i, type) \ + vec_b[i] = \ + vdupq_n_##type(vgetq_lane_##type(vreinterpretq_##type##_m128i(b), i)); + +#define PCMPSTR_RANGES(a, b, mtx, data_type_prefix, type_prefix, size, \ + number_of_lanes, byte_or_word) \ + do { \ + SSE2NEON_CAT( \ + data_type_prefix, \ + SSE2NEON_CAT(size, \ + SSE2NEON_CAT(x, SSE2NEON_CAT(number_of_lanes, _t)))) \ + vec_b[number_of_lanes]; \ + __m128i mask = SSE2NEON_IIF(byte_or_word)( \ + vreinterpretq_m128i_u16(vdupq_n_u16(0xff)), \ + vreinterpretq_m128i_u32(vdupq_n_u32(0xffff))); \ + SSE2NEON_EVAL(SSE2NEON_REPEAT(number_of_lanes, SSE2NEON_FILL_LANE, \ + SSE2NEON_CAT(type_prefix, size))) \ + for (int i = 0; i < number_of_lanes; i++) { \ + mtx[i] = SSE2NEON_CAT(vreinterpretq_m128i_u, \ + size)(SSE2NEON_CAT(vbslq_u, size)( \ + SSE2NEON_CAT(vreinterpretq_u, \ + SSE2NEON_CAT(size, _m128i))(mask), \ + SSE2NEON_CAT(vcgeq_, SSE2NEON_CAT(type_prefix, size))( \ + vec_b[i], \ + SSE2NEON_CAT( \ + vreinterpretq_, \ + SSE2NEON_CAT(type_prefix, \ + SSE2NEON_CAT(size, _m128i(a))))), \ + SSE2NEON_CAT(vcleq_, SSE2NEON_CAT(type_prefix, size))( \ + vec_b[i], \ + SSE2NEON_CAT( \ + vreinterpretq_, \ + SSE2NEON_CAT(type_prefix, \ + SSE2NEON_CAT(size, _m128i(a))))))); \ + } \ + } while (0) + +#define PCMPSTR_EQ(a, b, mtx, size, number_of_lanes) \ + do { \ + SSE2NEON_EVAL(SSE2NEON_REPEAT(number_of_lanes, \ + SSE2NEON_COMPARE_EQUAL_THEN_FILL_LANE, \ + SSE2NEON_CAT(u, size))) \ + } while (0) + +#define SSE2NEON_CMP_EQUAL_ANY_IMPL(type) \ + static int _sse2neon_cmp_##type##_equal_any(__m128i a, int la, __m128i b, \ + int lb) \ + { \ + __m128i mtx[16]; \ + PCMPSTR_EQ(a, b, mtx, SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type), \ + SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_, type)); \ + return SSE2NEON_CAT( \ + _sse2neon_aggregate_equal_any_, \ + SSE2NEON_CAT( \ + SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type), \ + SSE2NEON_CAT(x, SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_, \ + type))))(la, lb, mtx); \ + } + +#define SSE2NEON_CMP_RANGES_IMPL(type, data_type, us, byte_or_word) \ + static int _sse2neon_cmp_##us##type##_ranges(__m128i a, int la, __m128i b, \ + int lb) \ + { \ + __m128i mtx[16]; \ + PCMPSTR_RANGES( \ + a, b, mtx, data_type, us, SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type), \ + SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_, type), byte_or_word); \ + return SSE2NEON_CAT( \ + _sse2neon_aggregate_ranges_, \ + SSE2NEON_CAT( \ + SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type), \ + SSE2NEON_CAT(x, SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_, \ + type))))(la, lb, mtx); \ + } + +#define SSE2NEON_CMP_EQUAL_ORDERED_IMPL(type) \ + static int _sse2neon_cmp_##type##_equal_ordered(__m128i a, int la, \ + __m128i b, int lb) \ + { \ + __m128i mtx[16]; \ + PCMPSTR_EQ(a, b, mtx, SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type), \ + SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_, type)); \ + return SSE2NEON_CAT( \ + _sse2neon_aggregate_equal_ordered_, \ + SSE2NEON_CAT( \ + SSE2NEON_CAT(SSE2NEON_SIZE_OF_, type), \ + SSE2NEON_CAT(x, \ + SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_, type))))( \ + SSE2NEON_CAT(SSE2NEON_NUMBER_OF_LANES_, type), la, lb, mtx); \ + } + +static int _sse2neon_aggregate_equal_any_8x16(int la, int lb, __m128i mtx[16]) +{ + int res = 0; + int m = (1 << la) - 1; + uint8x8_t vec_mask = vld1_u8(_sse2neon_cmpestr_mask8b); + uint8x8_t t_lo = vtst_u8(vdup_n_u8(m & 0xff), vec_mask); + uint8x8_t t_hi = vtst_u8(vdup_n_u8(m >> 8), vec_mask); + uint8x16_t vec = vcombine_u8(t_lo, t_hi); + for (int j = 0; j < lb; j++) { + mtx[j] = vreinterpretq_m128i_u8( + vandq_u8(vec, vreinterpretq_u8_m128i(mtx[j]))); + mtx[j] = vreinterpretq_m128i_u8( + vshrq_n_u8(vreinterpretq_u8_m128i(mtx[j]), 7)); + int tmp = _sse2neon_vaddvq_u8(vreinterpretq_u8_m128i(mtx[j])) ? 1 : 0; + res |= (tmp << j); + } + return res; +} + +static int _sse2neon_aggregate_equal_any_16x8(int la, int lb, __m128i mtx[16]) +{ + int res = 0; + int m = (1 << la) - 1; + uint16x8_t vec = + vtstq_u16(vdupq_n_u16(m), vld1q_u16(_sse2neon_cmpestr_mask16b)); + for (int j = 0; j < lb; j++) { + mtx[j] = vreinterpretq_m128i_u16( + vandq_u16(vec, vreinterpretq_u16_m128i(mtx[j]))); + mtx[j] = vreinterpretq_m128i_u16( + vshrq_n_u16(vreinterpretq_u16_m128i(mtx[j]), 15)); + int tmp = _sse2neon_vaddvq_u16(vreinterpretq_u16_m128i(mtx[j])) ? 1 : 0; + res |= (tmp << j); + } + return res; +} + +/* clang-format off */ +#define SSE2NEON_GENERATE_CMP_EQUAL_ANY(prefix) \ + prefix##IMPL(byte) \ + prefix##IMPL(word) +/* clang-format on */ + +SSE2NEON_GENERATE_CMP_EQUAL_ANY(SSE2NEON_CMP_EQUAL_ANY_) + +static int _sse2neon_aggregate_ranges_16x8(int la, int lb, __m128i mtx[16]) +{ + int res = 0; + int m = (1 << la) - 1; + uint16x8_t vec = + vtstq_u16(vdupq_n_u16(m), vld1q_u16(_sse2neon_cmpestr_mask16b)); + for (int j = 0; j < lb; j++) { + mtx[j] = vreinterpretq_m128i_u16( + vandq_u16(vec, vreinterpretq_u16_m128i(mtx[j]))); + mtx[j] = vreinterpretq_m128i_u16( + vshrq_n_u16(vreinterpretq_u16_m128i(mtx[j]), 15)); + __m128i tmp = vreinterpretq_m128i_u32( + vshrq_n_u32(vreinterpretq_u32_m128i(mtx[j]), 16)); + uint32x4_t vec_res = vandq_u32(vreinterpretq_u32_m128i(mtx[j]), + vreinterpretq_u32_m128i(tmp)); +#if defined(__aarch64__) + int t = vaddvq_u32(vec_res) ? 1 : 0; +#else + uint64x2_t sumh = vpaddlq_u32(vec_res); + int t = vgetq_lane_u64(sumh, 0) + vgetq_lane_u64(sumh, 1); +#endif + res |= (t << j); + } + return res; +} + +static int _sse2neon_aggregate_ranges_8x16(int la, int lb, __m128i mtx[16]) +{ + int res = 0; + int m = (1 << la) - 1; + uint8x8_t vec_mask = vld1_u8(_sse2neon_cmpestr_mask8b); + uint8x8_t t_lo = vtst_u8(vdup_n_u8(m & 0xff), vec_mask); + uint8x8_t t_hi = vtst_u8(vdup_n_u8(m >> 8), vec_mask); + uint8x16_t vec = vcombine_u8(t_lo, t_hi); + for (int j = 0; j < lb; j++) { + mtx[j] = vreinterpretq_m128i_u8( + vandq_u8(vec, vreinterpretq_u8_m128i(mtx[j]))); + mtx[j] = vreinterpretq_m128i_u8( + vshrq_n_u8(vreinterpretq_u8_m128i(mtx[j]), 7)); + __m128i tmp = vreinterpretq_m128i_u16( + vshrq_n_u16(vreinterpretq_u16_m128i(mtx[j]), 8)); + uint16x8_t vec_res = vandq_u16(vreinterpretq_u16_m128i(mtx[j]), + vreinterpretq_u16_m128i(tmp)); + int t = _sse2neon_vaddvq_u16(vec_res) ? 1 : 0; + res |= (t << j); + } + return res; +} + +#define SSE2NEON_CMP_RANGES_IS_BYTE 1 +#define SSE2NEON_CMP_RANGES_IS_WORD 0 + +/* clang-format off */ +#define SSE2NEON_GENERATE_CMP_RANGES(prefix) \ + prefix##IMPL(byte, uint, u, prefix##IS_BYTE) \ + prefix##IMPL(byte, int, s, prefix##IS_BYTE) \ + prefix##IMPL(word, uint, u, prefix##IS_WORD) \ + prefix##IMPL(word, int, s, prefix##IS_WORD) +/* clang-format on */ + +SSE2NEON_GENERATE_CMP_RANGES(SSE2NEON_CMP_RANGES_) + +#undef SSE2NEON_CMP_RANGES_IS_BYTE +#undef SSE2NEON_CMP_RANGES_IS_WORD + +static int _sse2neon_cmp_byte_equal_each(__m128i a, int la, __m128i b, int lb) +{ + uint8x16_t mtx = + vceqq_u8(vreinterpretq_u8_m128i(a), vreinterpretq_u8_m128i(b)); + int m0 = (la < lb) ? 0 : ((1 << la) - (1 << lb)); + int m1 = 0x10000 - (1 << la); + int tb = 0x10000 - (1 << lb); + uint8x8_t vec_mask, vec0_lo, vec0_hi, vec1_lo, vec1_hi; + uint8x8_t tmp_lo, tmp_hi, res_lo, res_hi; + vec_mask = vld1_u8(_sse2neon_cmpestr_mask8b); + vec0_lo = vtst_u8(vdup_n_u8(m0), vec_mask); + vec0_hi = vtst_u8(vdup_n_u8(m0 >> 8), vec_mask); + vec1_lo = vtst_u8(vdup_n_u8(m1), vec_mask); + vec1_hi = vtst_u8(vdup_n_u8(m1 >> 8), vec_mask); + tmp_lo = vtst_u8(vdup_n_u8(tb), vec_mask); + tmp_hi = vtst_u8(vdup_n_u8(tb >> 8), vec_mask); + + res_lo = vbsl_u8(vec0_lo, vdup_n_u8(0), vget_low_u8(mtx)); + res_hi = vbsl_u8(vec0_hi, vdup_n_u8(0), vget_high_u8(mtx)); + res_lo = vbsl_u8(vec1_lo, tmp_lo, res_lo); + res_hi = vbsl_u8(vec1_hi, tmp_hi, res_hi); + res_lo = vand_u8(res_lo, vec_mask); + res_hi = vand_u8(res_hi, vec_mask); + + int res = _sse2neon_vaddv_u8(res_lo) + (_sse2neon_vaddv_u8(res_hi) << 8); + return res; +} + +static int _sse2neon_cmp_word_equal_each(__m128i a, int la, __m128i b, int lb) +{ + uint16x8_t mtx = + vceqq_u16(vreinterpretq_u16_m128i(a), vreinterpretq_u16_m128i(b)); + int m0 = (la < lb) ? 0 : ((1 << la) - (1 << lb)); + int m1 = 0x100 - (1 << la); + int tb = 0x100 - (1 << lb); + uint16x8_t vec_mask = vld1q_u16(_sse2neon_cmpestr_mask16b); + uint16x8_t vec0 = vtstq_u16(vdupq_n_u16(m0), vec_mask); + uint16x8_t vec1 = vtstq_u16(vdupq_n_u16(m1), vec_mask); + uint16x8_t tmp = vtstq_u16(vdupq_n_u16(tb), vec_mask); + mtx = vbslq_u16(vec0, vdupq_n_u16(0), mtx); + mtx = vbslq_u16(vec1, tmp, mtx); + mtx = vandq_u16(mtx, vec_mask); + return _sse2neon_vaddvq_u16(mtx); +} + +#define SSE2NEON_AGGREGATE_EQUAL_ORDER_IS_UBYTE 1 +#define SSE2NEON_AGGREGATE_EQUAL_ORDER_IS_UWORD 0 + +#define SSE2NEON_AGGREGATE_EQUAL_ORDER_IMPL(size, number_of_lanes, data_type) \ + static int _sse2neon_aggregate_equal_ordered_##size##x##number_of_lanes( \ + int bound, int la, int lb, __m128i mtx[16]) \ + { \ + int res = 0; \ + int m1 = SSE2NEON_IIF(data_type)(0x10000, 0x100) - (1 << la); \ + uint##size##x8_t vec_mask = SSE2NEON_IIF(data_type)( \ + vld1_u##size(_sse2neon_cmpestr_mask##size##b), \ + vld1q_u##size(_sse2neon_cmpestr_mask##size##b)); \ + uint##size##x##number_of_lanes##_t vec1 = SSE2NEON_IIF(data_type)( \ + vcombine_u##size(vtst_u##size(vdup_n_u##size(m1), vec_mask), \ + vtst_u##size(vdup_n_u##size(m1 >> 8), vec_mask)), \ + vtstq_u##size(vdupq_n_u##size(m1), vec_mask)); \ + uint##size##x##number_of_lanes##_t vec_minusone = vdupq_n_u##size(-1); \ + uint##size##x##number_of_lanes##_t vec_zero = vdupq_n_u##size(0); \ + for (int j = 0; j < lb; j++) { \ + mtx[j] = vreinterpretq_m128i_u##size(vbslq_u##size( \ + vec1, vec_minusone, vreinterpretq_u##size##_m128i(mtx[j]))); \ + } \ + for (int j = lb; j < bound; j++) { \ + mtx[j] = vreinterpretq_m128i_u##size( \ + vbslq_u##size(vec1, vec_minusone, vec_zero)); \ + } \ + unsigned SSE2NEON_IIF(data_type)(char, short) *ptr = \ + (unsigned SSE2NEON_IIF(data_type)(char, short) *) mtx; \ + for (int i = 0; i < bound; i++) { \ + int val = 1; \ + for (int j = 0, k = i; j < bound - i && k < bound; j++, k++) \ + val &= ptr[k * bound + j]; \ + res += val << i; \ + } \ + return res; \ + } + +/* clang-format off */ +#define SSE2NEON_GENERATE_AGGREGATE_EQUAL_ORDER(prefix) \ + prefix##IMPL(8, 16, prefix##IS_UBYTE) \ + prefix##IMPL(16, 8, prefix##IS_UWORD) +/* clang-format on */ + +SSE2NEON_GENERATE_AGGREGATE_EQUAL_ORDER(SSE2NEON_AGGREGATE_EQUAL_ORDER_) + +#undef SSE2NEON_AGGREGATE_EQUAL_ORDER_IS_UBYTE +#undef SSE2NEON_AGGREGATE_EQUAL_ORDER_IS_UWORD + +/* clang-format off */ +#define SSE2NEON_GENERATE_CMP_EQUAL_ORDERED(prefix) \ + prefix##IMPL(byte) \ + prefix##IMPL(word) +/* clang-format on */ + +SSE2NEON_GENERATE_CMP_EQUAL_ORDERED(SSE2NEON_CMP_EQUAL_ORDERED_) + +#define SSE2NEON_CMPESTR_LIST \ + _(CMP_UBYTE_EQUAL_ANY, cmp_byte_equal_any) \ + _(CMP_UWORD_EQUAL_ANY, cmp_word_equal_any) \ + _(CMP_SBYTE_EQUAL_ANY, cmp_byte_equal_any) \ + _(CMP_SWORD_EQUAL_ANY, cmp_word_equal_any) \ + _(CMP_UBYTE_RANGES, cmp_ubyte_ranges) \ + _(CMP_UWORD_RANGES, cmp_uword_ranges) \ + _(CMP_SBYTE_RANGES, cmp_sbyte_ranges) \ + _(CMP_SWORD_RANGES, cmp_sword_ranges) \ + _(CMP_UBYTE_EQUAL_EACH, cmp_byte_equal_each) \ + _(CMP_UWORD_EQUAL_EACH, cmp_word_equal_each) \ + _(CMP_SBYTE_EQUAL_EACH, cmp_byte_equal_each) \ + _(CMP_SWORD_EQUAL_EACH, cmp_word_equal_each) \ + _(CMP_UBYTE_EQUAL_ORDERED, cmp_byte_equal_ordered) \ + _(CMP_UWORD_EQUAL_ORDERED, cmp_word_equal_ordered) \ + _(CMP_SBYTE_EQUAL_ORDERED, cmp_byte_equal_ordered) \ + _(CMP_SWORD_EQUAL_ORDERED, cmp_word_equal_ordered) + +enum { +#define _(name, func_suffix) name, + SSE2NEON_CMPESTR_LIST +#undef _ +}; +typedef int (*cmpestr_func_t)(__m128i a, int la, __m128i b, int lb); +static cmpestr_func_t _sse2neon_cmpfunc_table[] = { +#define _(name, func_suffix) _sse2neon_##func_suffix, + SSE2NEON_CMPESTR_LIST +#undef _ +}; + +FORCE_INLINE int _sse2neon_sido_negative(int res, int lb, int imm8, int bound) +{ + switch (imm8 & 0x30) { + case _SIDD_NEGATIVE_POLARITY: + res ^= 0xffffffff; + break; + case _SIDD_MASKED_NEGATIVE_POLARITY: + res ^= (1 << lb) - 1; + break; + default: + break; + } + + return res & ((bound == 8) ? 0xFF : 0xFFFF); +} + +FORCE_INLINE int _sse2neon_clz(unsigned int x) +{ +#if _MSC_VER + DWORD cnt = 0; + if (_BitScanForward(&cnt, x)) + return cnt; + return 32; +#else + return x != 0 ? __builtin_clz(x) : 32; +#endif +} + +FORCE_INLINE int _sse2neon_ctz(unsigned int x) +{ +#if _MSC_VER + DWORD cnt = 0; + if (_BitScanReverse(&cnt, x)) + return 31 - cnt; + return 32; +#else + return x != 0 ? __builtin_ctz(x) : 32; +#endif +} + +FORCE_INLINE int _sse2neon_ctzll(unsigned long long x) +{ +#if _MSC_VER + unsigned long cnt; +#ifdef defined(SSE2NEON_HAS_BITSCAN64) + (defined(_M_AMD64) || defined(__x86_64__)) + if((_BitScanForward64(&cnt, x)) + return (int)(cnt); +#else + if (_BitScanForward(&cnt, (unsigned long) (x))) + return (int) cnt; + if (_BitScanForward(&cnt, (unsigned long) (x >> 32))) + return (int) (cnt + 32); +#endif + return 64; +#else + return x != 0 ? __builtin_ctzll(x) : 64; +#endif +} + +#define SSE2NEON_MIN(x, y) (x) < (y) ? (x) : (y) + +#define SSE2NEON_CMPSTR_SET_UPPER(var, imm) \ + const int var = (imm & 0x01) ? 8 : 16 + +#define SSE2NEON_CMPESTRX_LEN_PAIR(a, b, la, lb) \ + int tmp1 = la ^ (la >> 31); \ + la = tmp1 - (la >> 31); \ + int tmp2 = lb ^ (lb >> 31); \ + lb = tmp2 - (lb >> 31); \ + la = SSE2NEON_MIN(la, bound); \ + lb = SSE2NEON_MIN(lb, bound) + +// Compare all pairs of character in string a and b, +// then aggregate the result. +// As the only difference of PCMPESTR* and PCMPISTR* is the way to calculate the +// length of string, we use SSE2NEON_CMP{I,E}STRX_GET_LEN to get the length of +// string a and b. +#define SSE2NEON_COMP_AGG(a, b, la, lb, imm8, IE) \ + SSE2NEON_CMPSTR_SET_UPPER(bound, imm8); \ + SSE2NEON_##IE##_LEN_PAIR(a, b, la, lb); \ + int r2 = (_sse2neon_cmpfunc_table[imm8 & 0x0f])(a, la, b, lb); \ + r2 = _sse2neon_sido_negative(r2, lb, imm8, bound) + +#define SSE2NEON_CMPSTR_GENERATE_INDEX(r2, bound, imm8) \ + return (r2 == 0) ? bound \ + : ((imm8 & 0x40) ? (31 - _sse2neon_clz(r2)) \ + : _sse2neon_ctz(r2)) + +#define SSE2NEON_CMPSTR_GENERATE_MASK(dst) \ + __m128i dst = vreinterpretq_m128i_u8(vdupq_n_u8(0)); \ + if (imm8 & 0x40) { \ + if (bound == 8) { \ + uint16x8_t tmp = vtstq_u16(vdupq_n_u16(r2), \ + vld1q_u16(_sse2neon_cmpestr_mask16b)); \ + dst = vreinterpretq_m128i_u16(vbslq_u16( \ + tmp, vdupq_n_u16(-1), vreinterpretq_u16_m128i(dst))); \ + } else { \ + uint8x16_t vec_r2 = \ + vcombine_u8(vdup_n_u8(r2), vdup_n_u8(r2 >> 8)); \ + uint8x16_t tmp = \ + vtstq_u8(vec_r2, vld1q_u8(_sse2neon_cmpestr_mask8b)); \ + dst = vreinterpretq_m128i_u8( \ + vbslq_u8(tmp, vdupq_n_u8(-1), vreinterpretq_u8_m128i(dst))); \ + } \ + } else { \ + if (bound == 16) { \ + dst = vreinterpretq_m128i_u16( \ + vsetq_lane_u16(r2 & 0xffff, vreinterpretq_u16_m128i(dst), 0)); \ + } else { \ + dst = vreinterpretq_m128i_u8( \ + vsetq_lane_u8(r2 & 0xff, vreinterpretq_u8_m128i(dst), 0)); \ + } \ + } \ + return dst + +// Compare packed strings in a and b with lengths la and lb using the control +// in imm8, and returns 1 if b did not contain a null character and the +// resulting mask was zero, and 0 otherwise. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestra +FORCE_INLINE int _mm_cmpestra(__m128i a, + int la, + __m128i b, + int lb, + const int imm8) +{ + int lb_cpy = lb; + SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPESTRX); + return !r2 & (lb_cpy > bound); +} + +// Compare packed strings in a and b with lengths la and lb using the control in +// imm8, and returns 1 if the resulting mask was non-zero, and 0 otherwise. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrc +FORCE_INLINE int _mm_cmpestrc(__m128i a, + int la, + __m128i b, + int lb, + const int imm8) +{ + SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPESTRX); + return r2 != 0; +} + +// Compare packed strings in a and b with lengths la and lb using the control +// in imm8, and store the generated index in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestri +FORCE_INLINE int _mm_cmpestri(__m128i a, + int la, + __m128i b, + int lb, + const int imm8) +{ + SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPESTRX); + SSE2NEON_CMPSTR_GENERATE_INDEX(r2, bound, imm8); +} + +// Compare packed strings in a and b with lengths la and lb using the control +// in imm8, and store the generated mask in dst. +// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrm +FORCE_INLINE __m128i +_mm_cmpestrm(__m128i a, int la, __m128i b, int lb, const int imm8) +{ + SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPESTRX); + SSE2NEON_CMPSTR_GENERATE_MASK(dst); +} + +// Compare packed strings in a and b with lengths la and lb using the control in +// imm8, and returns bit 0 of the resulting bit mask. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestro +FORCE_INLINE int _mm_cmpestro(__m128i a, + int la, + __m128i b, + int lb, + const int imm8) +{ + SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPESTRX); + return r2 & 1; +} + +// Compare packed strings in a and b with lengths la and lb using the control in +// imm8, and returns 1 if any character in a was null, and 0 otherwise. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrs +FORCE_INLINE int _mm_cmpestrs(__m128i a, + int la, + __m128i b, + int lb, + const int imm8) +{ + SSE2NEON_CMPSTR_SET_UPPER(bound, imm8); + return la <= (bound - 1); +} + +// Compare packed strings in a and b with lengths la and lb using the control in +// imm8, and returns 1 if any character in b was null, and 0 otherwise. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrz +FORCE_INLINE int _mm_cmpestrz(__m128i a, + int la, + __m128i b, + int lb, + const int imm8) +{ + SSE2NEON_CMPSTR_SET_UPPER(bound, imm8); + return lb <= (bound - 1); +} + +#define SSE2NEON_CMPISTRX_LENGTH(str, len, imm8) \ + do { \ + if (imm8 & 0x01) { \ + uint16x8_t equal_mask_##str = \ + vceqq_u16(vreinterpretq_u16_m128i(str), vdupq_n_u16(0)); \ + uint8x8_t res_##str = vshrn_n_u16(equal_mask_##str, 4); \ + uint64_t matches_##str = \ + vget_lane_u64(vreinterpret_u64_u8(res_##str), 0); \ + len = _sse2neon_ctzll(matches_##str) >> 3; \ + } else { \ + uint16x8_t equal_mask_##str = vreinterpretq_u16_u8( \ + vceqq_u8(vreinterpretq_u8_m128i(str), vdupq_n_u8(0))); \ + uint8x8_t res_##str = vshrn_n_u16(equal_mask_##str, 4); \ + uint64_t matches_##str = \ + vget_lane_u64(vreinterpret_u64_u8(res_##str), 0); \ + len = _sse2neon_ctzll(matches_##str) >> 2; \ + } \ + } while (0) + +#define SSE2NEON_CMPISTRX_LEN_PAIR(a, b, la, lb) \ + int la, lb; \ + do { \ + SSE2NEON_CMPISTRX_LENGTH(a, la, imm8); \ + SSE2NEON_CMPISTRX_LENGTH(b, lb, imm8); \ + } while (0) + +// Compare packed strings with implicit lengths in a and b using the control in +// imm8, and returns 1 if b did not contain a null character and the resulting +// mask was zero, and 0 otherwise. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistra +FORCE_INLINE int _mm_cmpistra(__m128i a, __m128i b, const int imm8) +{ + SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPISTRX); + return !r2 & (lb >= bound); +} + +// Compare packed strings with implicit lengths in a and b using the control in +// imm8, and returns 1 if the resulting mask was non-zero, and 0 otherwise. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrc +FORCE_INLINE int _mm_cmpistrc(__m128i a, __m128i b, const int imm8) +{ + SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPISTRX); + return r2 != 0; +} + +// Compare packed strings with implicit lengths in a and b using the control in +// imm8, and store the generated index in dst. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistri +FORCE_INLINE int _mm_cmpistri(__m128i a, __m128i b, const int imm8) +{ + SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPISTRX); + SSE2NEON_CMPSTR_GENERATE_INDEX(r2, bound, imm8); +} + +// Compare packed strings with implicit lengths in a and b using the control in +// imm8, and store the generated mask in dst. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrm +FORCE_INLINE __m128i _mm_cmpistrm(__m128i a, __m128i b, const int imm8) +{ + SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPISTRX); + SSE2NEON_CMPSTR_GENERATE_MASK(dst); +} + +// Compare packed strings with implicit lengths in a and b using the control in +// imm8, and returns bit 0 of the resulting bit mask. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistro +FORCE_INLINE int _mm_cmpistro(__m128i a, __m128i b, const int imm8) +{ + SSE2NEON_COMP_AGG(a, b, la, lb, imm8, CMPISTRX); + return r2 & 1; +} + +// Compare packed strings with implicit lengths in a and b using the control in +// imm8, and returns 1 if any character in a was null, and 0 otherwise. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrs +FORCE_INLINE int _mm_cmpistrs(__m128i a, __m128i b, const int imm8) +{ + SSE2NEON_CMPSTR_SET_UPPER(bound, imm8); + int la; + SSE2NEON_CMPISTRX_LENGTH(a, la, imm8); + return la <= (bound - 1); +} + +// Compare packed strings with implicit lengths in a and b using the control in +// imm8, and returns 1 if any character in b was null, and 0 otherwise. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrz +FORCE_INLINE int _mm_cmpistrz(__m128i a, __m128i b, const int imm8) +{ + SSE2NEON_CMPSTR_SET_UPPER(bound, imm8); + int lb; + SSE2NEON_CMPISTRX_LENGTH(b, lb, imm8); + return lb <= (bound - 1); +} + // Compares the 2 signed 64-bit integers in a and the 2 signed 64-bit integers // in b for greater than. FORCE_INLINE __m128i _mm_cmpgt_epi64(__m128i a, __m128i b) @@ -8789,6 +9751,44 @@ FORCE_INLINE void _sse2neon_mm_set_denormals_zero_mode(unsigned int flag) #endif } +// Return the current 64-bit value of the processor's time-stamp counter. +// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=rdtsc + +FORCE_INLINE uint64_t _rdtsc(void) +{ +#if defined(__aarch64__) + uint64_t val; + + /* According to ARM DDI 0487F.c, from Armv8.0 to Armv8.5 inclusive, the + * system counter is at least 56 bits wide; from Armv8.6, the counter + * must be 64 bits wide. So the system counter could be less than 64 + * bits wide and it is attributed with the flag 'cap_user_time_short' + * is true. + */ + __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(val)); + + return val; +#else + uint32_t pmccntr, pmuseren, pmcntenset; + // Read the user mode Performance Monitoring Unit (PMU) + // User Enable Register (PMUSERENR) access permissions. + __asm__ __volatile__("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren)); + if (pmuseren & 1) { // Allows reading PMUSERENR for user mode code. + __asm__ __volatile__("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset)); + if (pmcntenset & 0x80000000UL) { // Is it counting? + __asm__ __volatile__("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr)); + // The counter is set up to count every 64th cycle + return (uint64_t) (pmccntr) << 6; + } + } + + // Fallback to syscall as we can't enable PMUSERENR in user mode. + struct timeval tv; + gettimeofday(&tv, NULL); + return (uint64_t) (tv.tv_sec) * 1000000 + tv.tv_usec; +#endif +} + #if defined(__GNUC__) || defined(__clang__) #pragma pop_macro("ALIGN_STRUCT") #pragma pop_macro("FORCE_INLINE") From 74a46e39662b841124832854ed8bd221f0db8125 Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Mon, 19 Dec 2022 20:41:15 +0300 Subject: [PATCH 18/27] arm: rename _rdtsc to storm_rdtsc In order not to overlap the _rdtsc function from sse2neon.h --- src/libs/renderer/src/s_device.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libs/renderer/src/s_device.cpp b/src/libs/renderer/src/s_device.cpp index ee15173e5..e8775f7ed 100644 --- a/src/libs/renderer/src/s_device.cpp +++ b/src/libs/renderer/src/s_device.cpp @@ -236,7 +236,7 @@ bool DX9RENDER_SCRIPT_LIBRIARY::Init() return true; } -uint64_t _rdtsc; +uint64_t storm_rdtsc; uint32_t dwTotalSize = 0; uint32_t dwSplashTime = 0; bool bSplash = false; @@ -2380,11 +2380,11 @@ void DX9RENDER::RenderAnimation(int32_t ib, void *src, int32_t numVrts, int32_t } // Copy verteces uint8_t *ptr; - RDTSC_B(_rdtsc); + RDTSC_B(storm_rdtsc); if (CHECKD3DERR(aniVBuffer->Lock(0, size, (void **)&ptr, 0)) == true) return; dwNumLV++; - RDTSC_E(_rdtsc); + RDTSC_E(storm_rdtsc); memcpy(ptr, src, size); CHECKD3DERR(aniVBuffer->Unlock()); } From 1f7bd573b3cc819dda34a7da702b2ef55d22e9a6 Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Tue, 20 Dec 2022 00:46:43 +0300 Subject: [PATCH 19/27] arm: simple fix for aligning asm vldr https://developer.arm.com/documentation/ddi0406/c/Application-Level-Architecture/Instruction-Details/Alphabetical-list-of-instructions/VLDR --- src/libs/core/src/compiler.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/libs/core/src/compiler.cpp b/src/libs/core/src/compiler.cpp index 3ffe29702..561766a3a 100644 --- a/src/libs/core/src/compiler.cpp +++ b/src/libs/core/src/compiler.cpp @@ -3845,6 +3845,7 @@ bool COMPILER::BC_Execute(uint32_t function_code, DATA *&pVReturnResult, const c S_TOKEN_TYPE Token_type; FuncInfo fi; const VarInfo *real_var; + float float_var; DATA *pV; DATA *pVResult; // DATA ExpressionResult; // while compile expression not ready, each function have its own register @@ -5386,7 +5387,8 @@ bool COMPILER::BC_Execute(uint32_t function_code, DATA *&pVReturnResult, const c pV->Set(*((int32_t *)&pRunCodeBase[TLR_DataOffset])); break; case FLOAT_NUMBER: - pV->Set(*((float *)&pRunCodeBase[TLR_DataOffset])); + float_var = *((float *)&pRunCodeBase[TLR_DataOffset]); + pV->Set(float_var); break; case STRING: pV->Set((char *)&pRunCodeBase[TLR_DataOffset + 4]); // 4 - string length From cb67796d4c63fe6f7d8400ea3c1b6d63ddfa250c Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Thu, 22 Dec 2022 15:10:58 +0300 Subject: [PATCH 20/27] nine: add armhf and aarch64 search paths for d3dadapter9.so --- nine-native/src/nine_sdl.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/nine-native/src/nine_sdl.c b/nine-native/src/nine_sdl.c index 873628935..1f6f1633a 100644 --- a/nine-native/src/nine_sdl.c +++ b/nine-native/src/nine_sdl.c @@ -1159,15 +1159,20 @@ d3dadapter9_new( BOOL ex, Display *dpy, ERR("Failed to load d3d9 lib '%s': %s\n", str, dlerror()); } } else { - const char *paths[] = { - "/usr/lib32/d3d/d3dadapter9.so", - "/usr/lib/d3d/d3dadapter9.so", - "/usr/lib/x86_64-linux-gnu/d3d/d3dadapter9.so", - }; - for (unsigned i = 0; !handle && i < 3; i++) - handle = dlopen(paths[i], RTLD_LOCAL | RTLD_NOW); - if (!handle) - ERR("Failed to load d3d9 lib: %s\n", dlerror()); + const char *paths[] = { + "/usr/lib32/d3d/d3dadapter9.so", + "/usr/lib/d3d/d3dadapter9.so", + "/usr/local/lib/x86_64-linux-gnu/d3d/d3dadapter9.so", + "/usr/lib/x86_64-linux-gnu/d3d/d3dadapter9.so", + "/usr/local/lib/arm-linux-gnueabihf/d3d/d3dadapter9.so", + "/usr/lib/arm-linux-gnueabihf/d3d/d3dadapter9.so", + "/usr/local/lib/aarch64-linux-gnu/d3d/d3dadapter9.so", + "/usr/lib/aarch64-linux-gnu/d3d/d3dadapter9.so", + }; + for (unsigned i = 0; !handle && i < 8; i++) + handle = dlopen(paths[i], RTLD_LOCAL | RTLD_NOW); + if (!handle) + ERR("Failed to load d3d9 lib: %s\n", dlerror()); } if (!handle) { return D3DERR_NOTAVAILABLE; From d8072609807454f230604641ea3b311a7e8ebacb Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Mon, 20 Feb 2023 11:50:14 +0300 Subject: [PATCH 21/27] Revert "gcc: fix unions in matrix.h and math3d/matrix.h" This reverts commit 1c818346a6825a7e5b2562340e3b342541991b19. --- src/libs/location/src/location_camera.cpp | 42 +-- src/libs/math/include/math3d/matrix.h | 260 ++++++------------ src/libs/math/include/matrix.h | 20 ++ .../particles/src/system/emitters/base.cpp | 19 +- src/libs/sea/src/env_map.cpp | 4 +- src/libs/sea_cameras/src/ship_camera.cpp | 2 +- 6 files changed, 125 insertions(+), 222 deletions(-) diff --git a/src/libs/location/src/location_camera.cpp b/src/libs/location/src/location_camera.cpp index c92d5167f..ace14faca 100644 --- a/src/libs/location/src/location_camera.cpp +++ b/src/libs/location/src/location_camera.cpp @@ -764,20 +764,10 @@ bool LocationCamera::LoadCameraTrack(const char *pcTrackFile, float fTrackTime) Matrix view; m_track.GetPoint(0.f, pos, ang); ang.GetMatrix(view); - //view.vx = -view.vx; - view.m[0][0] = -view.m[0][0]; - view.m[0][1] = -view.m[0][1]; - view.m[0][2] = -view.m[0][2]; - //view.vz = -view.vz; - view.m[2][0] = -view.m[2][0]; - view.m[2][1] = -view.m[2][1]; - view.m[2][2] = -view.m[2][2]; + view.vx = -view.vx; + view.vz = -view.vz; view.Inverse(); - //view.pos = view * -pos; - pos = view * -pos; - view.m[3][0] = pos.x; - view.m[3][1] = pos.y; - view.m[3][2] = pos.z; + view.pos = view * -pos; rs->SetView(*(CMatrix *)&view); rs->SetPerspective(cameraPerspective); @@ -796,14 +786,8 @@ void LocationCamera::TurnOffTrackCamera() Matrix view; m_track.GetPoint(0.99999f, pos, ang); ang.GetMatrix(view); - //view.vx = -view.vx; - view.m[0][0] = -view.m[0][0]; - view.m[0][1] = -view.m[0][1]; - view.m[0][2] = -view.m[0][2]; - //view.vz = -view.vz; - view.m[2][0] = -view.m[2][0]; - view.m[2][1] = -view.m[2][1]; - view.m[2][2] = -view.m[2][2]; + view.vx = -view.vx; + view.vz = -view.vz; view.Inverse(); oldPos = *(CVECTOR *)&pos; @@ -827,20 +811,10 @@ void LocationCamera::ProcessTrackCamera() Matrix view; m_track.GetPoint(fTrackTime / m_fTrackMaxTime, pos, ang); ang.GetMatrix(view); - //view.vx = -view.vx; - view.m[0][0] = -view.m[0][0]; - view.m[0][1] = -view.m[0][1]; - view.m[0][2] = -view.m[0][2]; - //view.vz = -view.vz; - view.m[2][0] = -view.m[2][0]; - view.m[2][1] = -view.m[2][1]; - view.m[2][2] = -view.m[2][2]; + view.vx = -view.vx; + view.vz = -view.vz; view.Inverse(); - //view.pos = view * -pos; - pos = view * -pos; - view.m[3][0] = pos.x; - view.m[3][1] = pos.y; - view.m[3][2] = pos.z; + view.pos = view * -pos; rs->SetView(*(CMatrix *)&view); rs->SetPerspective(cameraPerspective); } diff --git a/src/libs/math/include/math3d/matrix.h b/src/libs/math/include/math3d/matrix.h index 336cfa379..4b0d5c99c 100644 --- a/src/libs/math/include/math3d/matrix.h +++ b/src/libs/math/include/math3d/matrix.h @@ -36,6 +36,26 @@ class Matrix alignas(16) float matrix[16]; // espkk # remove inline asm # 30/Dec/2017 // Two-dimensional array float m[4][4]; + + struct + { + // X direction + Vector vx; + // Weight value of X + float wx; + // Y direction + Vector vy; + // Weight value of Y + float wy; + // Z direction + Vector vz; + // Weight value of Z + float wz; + // Position + Vector pos; + // Added weight value + float w; + }; }; // ----------------------------------------------------------- @@ -81,8 +101,6 @@ class Matrix public: // Set identity matrix Matrix &SetIdentity(); - // Set 3x3 identity matrix - Matrix &SetIdentity3X3(); // Set Matrix Matrix &Set(const Matrix &matrix); @@ -296,30 +314,21 @@ inline Matrix &Matrix::operator=(const Matrix &mtx) // Assign a number to the position inline Matrix &Matrix::operator=(float f) { - //pos = f; - m[3][0] = f; - m[3][1] = f; - m[3][2] = f; + pos = f; return *this; } // Assign a number to the position inline Matrix &Matrix::operator=(double d) { - //pos = d; - m[3][0] = static_cast(d); - m[3][1] = static_cast(d); - m[3][2] = static_cast(d); + pos = d; return *this; } // Assign a vector to the position inline Matrix &Matrix::operator=(const Vector &v) { - //pos = v; - m[3][0] = v.x; - m[3][1] = v.y; - m[3][2] = v.z; + pos = v; return *this; } @@ -405,21 +414,6 @@ inline Matrix &Matrix::SetIdentity() return *this; } -inline Matrix &Matrix::SetIdentity3X3() -{ - m[0][0] = 1.f; - m[0][1] = 0; - m[0][2] = 0; - m[1][0] = 0; - m[1][1] = 1.f; - m[1][2] = 0; - m[2][0] = 0; - m[2][1] = 0; - m[2][2] = 1.f; - - return *this; -} - // Set Matrix inline Matrix &Matrix::Set(const Matrix &matrix) { @@ -805,10 +799,7 @@ inline bool Matrix::BuildView(Vector lookFrom, Vector lookTo, Vector upVector) if (lookTo.Normalize() == 0.0f) { // Putting a position for a non-rotated matrix - //pos = -lookFrom; - m[3][0] = -lookFrom.x; - m[3][1] = -lookFrom.y; - m[3][2] = -lookFrom.z; + pos = -lookFrom; return false; } // Directing the vector up in the desired direction @@ -834,19 +825,12 @@ inline bool Matrix::BuildView(Vector lookFrom, Vector lookTo, Vector upVector) else { // Setting a position for a non-rotated matrix - //pos = -lookFrom; - m[3][0] = -lookFrom.x; - m[3][1] = -lookFrom.y; - m[3][2] = -lookFrom.z; + pos = -lookFrom; return false; } // set the position // pos = -MulNormalByInverse (lookFrom); - //pos = -MulNormal(lookFrom); - auto p = -MulNormal(lookFrom); - m[3][0] = p.x; - m[3][1] = p.y; - m[3][2] = p.z; + pos = -MulNormal(lookFrom); return true; } @@ -856,35 +840,22 @@ inline bool Matrix::BuildOrient(Vector zAxisDirection, Vector upVector) // Normalize the direction vector z if (zAxisDirection.Normalize() < 1e-37f || upVector.Normalize() < 1e-37f) { - //vx = Vector(1.0f, 0.0f, 0.0f); - //vy = Vector(0.0f, 1.0f, 0.0f); - //vz = Vector(0.0f, 0.0f, 1.0f); - SetIdentity3X3(); + vx = Vector(1.0f, 0.0f, 0.0f); + vy = Vector(0.0f, 1.0f, 0.0f); + vz = Vector(0.0f, 0.0f, 1.0f); return false; } // calculate - //vx = zAxisDirection ^ upVector; - auto vec = zAxisDirection ^ upVector; - m[0][0] = vec.x; - m[0][1] = vec.y; - m[0][2] = vec.z; - if (vec.Normalize() == 0.0f) + vx = zAxisDirection ^ upVector; + if (vx.Normalize() == 0.0f) { - //vx = Vector(1.0f, 0.0f, 0.0f); - //vy = Vector(0.0f, 1.0f, 0.0f); - //vz = Vector(0.0f, 0.0f, 1.0f); - SetIdentity3X3(); + vx = Vector(1.0f, 0.0f, 0.0f); + vy = Vector(0.0f, 1.0f, 0.0f); + vz = Vector(0.0f, 0.0f, 1.0f); return false; } - //vy = zAxisDirection ^ vx; - vec = zAxisDirection ^ vec; - m[1][0] = vec.x; - m[1][1] = vec.y; - m[1][2] = vec.z; - //vz = zAxisDirection; - m[2][0] = zAxisDirection.x; - m[2][1] = zAxisDirection.y; - m[2][2] = zAxisDirection.z; + vy = zAxisDirection ^ vx; + vz = zAxisDirection; return true; } @@ -896,68 +867,37 @@ inline bool Matrix::BuildOriented(Vector position, Vector lookTo, Vector upVecto // Normalize the direction vector z if (lookTo.Normalize() == 0.0f || upVector.Normalize() == 0.0f) { - //vx = Vector(1.0f, 0.0f, 0.0f); - //wx = 0.0f; - //vy = Vector(0.0f, 1.0f, 0.0f); - //wy = 0.0f; - //vz = Vector(0.0f, 0.0f, 1.0f); - //wz = 0.0f; - SetIdentity3X3(); - m[0][3] = 0.0f; // wx - m[1][3] = 0.0f; // wy - m[2][3] = 0.0f; // wz - //pos = position; - //w = 1.0f; - m[3][0] = position.x; - m[3][1] = position.y; - m[3][2] = position.z; - m[3][3] = 1.0f; + vx = Vector(1.0f, 0.0f, 0.0f); + wx = 0.0f; + vy = Vector(0.0f, 1.0f, 0.0f); + wy = 0.0f; + vz = Vector(0.0f, 0.0f, 1.0f); + wz = 0.0f; + pos = position; + w = 1.0f; return false; } // calculate - //vx = lookTo ^ upVector; - auto vec = lookTo ^ upVector; - m[0][0] = vec.x; - m[0][1] = vec.y; - m[0][2] = vec.z; - m[0][3] = 0.0f; - if (vec.Normalize() == 0.0f) + vx = lookTo ^ upVector; + wx = 0.0f; + if (vx.Normalize() == 0.0f) { - //vx = Vector(1.0f, 0.0f, 0.0f); - //wx = 0.0f; - //vy = Vector(0.0f, 1.0f, 0.0f); - //wy = 0.0f; - //vz = Vector(0.0f, 0.0f, 1.0f); - //wz = 0.0f; - SetIdentity3X3(); - m[0][3] = 0.0f; // wx - m[1][3] = 0.0f; // wy - m[2][3] = 0.0f; // wz - //pos = position; - //w = 1.0f; - m[3][0] = position.x; - m[3][1] = position.y; - m[3][2] = position.z; - m[3][3] = 1.0f; + vx = Vector(1.0f, 0.0f, 0.0f); + wx = 0.0f; + vy = Vector(0.0f, 1.0f, 0.0f); + wy = 0.0f; + vz = Vector(0.0f, 0.0f, 1.0f); + wz = 0.0f; + pos = position; + w = 1.0f; return false; } - //vy = lookTo ^ vx; - vec = lookTo ^ vec; - m[1][0] = vec.x; - m[1][1] = vec.y; - m[1][2] = vec.z; - m[1][3] = 0.0f; - //vz = lookTo; - m[2][0] = lookTo.x; - m[2][1] = lookTo.y; - m[2][2] = lookTo.z; - m[2][3] = 0.0f; - //pos = position; - //w = 1.0f; - m[3][0] = position.x; - m[3][1] = position.y; - m[3][2] = position.z; - m[3][3] = 1.0f; + vy = lookTo ^ vx; + wy = 0.0f; + vz = lookTo; + wz = 0.0f; + pos = position; + w = 1.0f; return true; } @@ -1035,24 +975,18 @@ inline Matrix &Matrix::Rotate(const Vector &ang) // Move inline Matrix &Matrix::Move(float dX, float dY, float dZ) { - //pos.x += dX; - //pos.y += dY; - //pos.z += dZ; - m[3][0] += dX; - m[3][1] += dY; - m[3][2] += dZ; + pos.x += dX; + pos.y += dY; + pos.z += dZ; return *this; } // Move inline Matrix &Matrix::Move(const Vector &pos) { - //this->pos.x += pos.x; - //this->pos.y += pos.y; - //this->pos.z += pos.z; - m[3][0] += pos.x; - m[3][1] += pos.y; - m[3][2] += pos.z; + this->pos.x += pos.x; + this->pos.y += pos.y; + this->pos.z += pos.z; return *this; } @@ -1120,13 +1054,7 @@ inline Matrix &Matrix::Scale3x3(const Vector &scale) // Calculating the inverse matrix inline Matrix &Matrix::Inverse() { - //pos = Vector(-(pos | vx), -(pos | vy), -(pos | vz)); - float px = -(m[3][0] * m[0][0] + m[3][1] * m[0][1] + m[3][2] * m[0][2]); //-(pos | vx) - float py = -(m[3][0] * m[1][0] + m[3][1] * m[1][1] + m[3][2] * m[1][2]); //-(pos | vy) - float pz = -(m[3][0] * m[2][0] + m[3][1] * m[2][1] + m[3][2] * m[2][2]); //-(pos | vz) - m[3][0] = px; - m[3][1] = py; - m[3][2] = pz; + pos = Vector(-(pos | vx), -(pos | vy), -(pos | vz)); Transposition3X3(); return *this; } @@ -1134,19 +1062,16 @@ inline Matrix &Matrix::Inverse() // Calculating an inverse matrix from another inline Matrix &Matrix::Inverse(const Matrix &mtx) { - //pos = Vector(-(mtx.pos | mtx.vx), -(mtx.pos | mtx.vy), -(mtx.pos | mtx.vz)); - m[3][0] = -(mtx.m[3][0] * mtx.m[0][0] + mtx.m[3][1] * mtx.m[0][1] + mtx.m[3][2] * mtx.m[0][2]); //-(mtx.pos | mtx.vx) - m[3][1] = -(mtx.m[3][0] * mtx.m[1][0] + mtx.m[3][1] * mtx.m[1][1] + mtx.m[3][2] * mtx.m[1][2]); //-(mtx.pos | mtx.vy) - m[3][2] = -(mtx.m[3][0] * mtx.m[2][0] + mtx.m[3][1] * mtx.m[2][1] + mtx.m[3][2] * mtx.m[2][2]); //-(mtx.pos | mtx.vz) - m[0][0] = mtx.m[0][0]; - m[0][1] = mtx.m[1][0]; - m[0][2] = mtx.m[2][0]; - m[1][0] = mtx.m[0][1]; - m[1][1] = mtx.m[1][1]; - m[1][2] = mtx.m[2][1]; - m[2][0] = mtx.m[0][2]; - m[2][1] = mtx.m[1][2]; - m[2][2] = mtx.m[2][2]; + pos = Vector(-(mtx.pos | mtx.vx), -(mtx.pos | mtx.vy), -(mtx.pos | mtx.vz)); + matrix[0] = mtx.matrix[0]; + matrix[1] = mtx.matrix[4]; + matrix[2] = mtx.matrix[8]; + matrix[4] = mtx.matrix[1]; + matrix[5] = mtx.matrix[5]; + matrix[6] = mtx.matrix[9]; + matrix[8] = mtx.matrix[2]; + matrix[9] = mtx.matrix[6]; + matrix[10] = mtx.matrix[10]; /* _asm { @@ -1208,12 +1133,7 @@ inline Matrix &Matrix::InverseWhithScale() matrix[i] = 0.0f; } // Position - //pos = -(MulNormal(pos)); - auto pos = Vector(m[3][0], m[3][1], m[3][2]); pos = -(MulNormal(pos)); - m[3][0] = pos.x; - m[3][1] = pos.y; - m[3][2] = pos.z; return *this; } @@ -1421,7 +1341,7 @@ inline Vector Matrix::MulNormalByInverse(const Vector &v) const // Get camera position from camera matrix inline Vector Matrix::GetCamPos() const { - return -MulNormalByInverse(Vector(m[3][0], m[3][1], m[3][2])); + return -MulNormalByInverse(pos); } // Identity matrix or not @@ -1467,13 +1387,13 @@ inline bool Matrix::IsIdentity() const inline bool Matrix::IsScale() const { const auto eps = 1e-4f; - if (fabsf(m[0][0] * m[0][0] + m[0][1] * m[0][1] + m[0][2] * m[0][2] - 1.0f) > eps) //~vx + if (fabsf(~vx - 1.0f) > eps) return true; - if (fabsf(m[1][0] * m[1][0] + m[1][1] * m[1][1] + m[1][2] * m[1][2] - 1.0f) > eps) //~vy + if (fabsf(~vy - 1.0f) > eps) return true; - if (fabsf(m[2][0] * m[2][0] + m[2][1] * m[2][1] + m[2][2] * m[2][2] - 1.0f) > eps) //~vz + if (fabsf(~vz - 1.0f) > eps) return true; - if (fabsf(m[3][3] - 1.0f) > eps) + if (fabsf(w - 1.0f) > eps) return true; return false; } @@ -1524,24 +1444,24 @@ inline void Matrix::Projection(Vector4 *dstArray, Vector *srcArray, int32_t num, // Get angles from unscaled rotation matrix inline void Matrix::GetAngles(float &ax, float &ay, float &az) const { - if (m[2][1] < 1.0f) //vz.y + if (vz.y < 1.0f) { - if (m[2][1] > -1.0f) //vz.y + if (vz.y > -1.0f) { - ax = static_cast(asin(-m[2][1])); //-vz.y - ay = static_cast(atan2(m[2][0], m[2][2])); //vz.x, vz.z - az = static_cast(atan2(m[0][1], m[1][1])); //vx.y, vy.y + ax = static_cast(asin(-vz.y)); + ay = static_cast(atan2(vz.x, vz.z)); + az = static_cast(atan2(vx.y, vy.y)); return; } ax = 3.141592654f * 0.5f; ay = 0.0f; - az = static_cast(atan2(m[0][2], m[0][0])); //vx.z, vx.x + az = static_cast(atan2(vx.z, vx.x)); } else { ax = -3.141592654f * 0.5f; ay = 0.0f; - az = static_cast(-atan2(m[0][2], m[0][0])); //vx.z, vx.x + az = static_cast(-atan2(vx.z, vx.x)); } } diff --git a/src/libs/math/include/matrix.h b/src/libs/math/include/matrix.h index 41ac2303f..c598ad7f9 100644 --- a/src/libs/math/include/matrix.h +++ b/src/libs/math/include/matrix.h @@ -34,6 +34,26 @@ class CMatrix union { alignas(16) float matrix[16]; // espkk # remove inline asm # 30/Dec/2017 float m[4][4]; + + struct + { + // X direction + CVECTOR vx; + // Weight value on X + float wx; + // Y direction + CVECTOR vy; + // Weight value on Y + float wy; + // Z direction + CVECTOR vz; + // Weight value on Z + float wz; + // Position + CVECTOR pos; + // Added weight value + float w; + }; }; //----------------------------------------------------------- diff --git a/src/libs/particles/src/system/emitters/base.cpp b/src/libs/particles/src/system/emitters/base.cpp index 1fbb76e24..e2812955a 100644 --- a/src/libs/particles/src/system/emitters/base.cpp +++ b/src/libs/particles/src/system/emitters/base.cpp @@ -48,10 +48,7 @@ void BaseEmitter::BornParticles(float DeltaTime) BlendMatrix(matWorldTransform, matWorldTransformOld, matWorldTransformNew, MatrixBlend); const auto TransformPos = Position * matWorldTransform; - //matWorldTransform.pos = TransformPos; - matWorldTransform.m[3][0] = TransformPos.x; - matWorldTransform.m[3][1] = TransformPos.y; - matWorldTransform.m[3][2] = TransformPos.z; + matWorldTransform.pos = TransformPos; MatrixBlend += MatrixBlendInc; const auto DeltaTimeDiv = DeltaTime / INTERPOLATION_STEPS; @@ -85,8 +82,7 @@ void BaseEmitter::BornParticles(float DeltaTime) { auto ParticlePos = GetNewParticlePosition(DeltaTime); GetEmissionDirection(matTransform); - //auto VelDir = matTransform.vy; - auto VelDir = Vector(matTransform.m[1][0], matTransform.m[1][1], matTransform.m[1][2]); + auto VelDir = matTransform.vy; switch (ParticleTypes[n].Type) { case BILLBOARD_PARTICLE: @@ -282,10 +278,8 @@ void BaseEmitter::BlendMatrix(Matrix &result, const Matrix &mat1, const Matrix & { const Quaternion qRot1(mat1); const Quaternion qRot2(mat2); - //const auto vPos1 = mat1.pos; - const auto vPos1 = Vector(mat1.m[3][0], mat1.m[3][1], mat1.m[3][2]); - //const auto vPos2 = mat2.pos; - const auto vPos2 = Vector(mat2.m[3][0], mat2.m[3][1], mat2.m[3][2]); + const auto vPos1 = mat1.pos; + const auto vPos2 = mat2.pos; Quaternion qBlend; qBlend.SLerp(qRot1, qRot2, BlendK); @@ -294,10 +288,7 @@ void BaseEmitter::BlendMatrix(Matrix &result, const Matrix &mat1, const Matrix & vBlend.Lerp(vPos1, vPos2, BlendK); qBlend.GetMatrix(result); - //result.pos = vBlend; - result.m[3][0] = vBlend.x; - result.m[3][1] = vBlend.y; - result.m[3][2] = vBlend.z; + result.pos = vBlend; } const char *BaseEmitter::GetName() diff --git a/src/libs/sea/src/env_map.cpp b/src/libs/sea/src/env_map.cpp index d4de69c20..8b27d6567 100644 --- a/src/libs/sea/src/env_map.cpp +++ b/src/libs/sea/src/env_map.cpp @@ -200,9 +200,7 @@ bool SEA::EnvMap_Render2() CMatrix mTex; mTex.BuildScale(CVECTOR(0.5f, -0.5f, 0.5f)); - mTex.m[3][0] = 0.5f; //pos.x - mTex.m[3][1] = 0.5f; //pos.y - mTex.m[3][2] = 0.5f; //pos.z + mTex.pos = CVECTOR(0.5f, 0.5f, 0.5f); CMatrix mProj = rs->GetProjection(); mProj = mProj * mTex; diff --git a/src/libs/sea_cameras/src/ship_camera.cpp b/src/libs/sea_cameras/src/ship_camera.cpp index 4dbecd0d4..32abc3916 100644 --- a/src/libs/sea_cameras/src/ship_camera.cpp +++ b/src/libs/sea_cameras/src/ship_camera.cpp @@ -125,7 +125,7 @@ void SHIP_CAMERA::Move(float fDeltaTime) const auto *modelMtx = GetAIObj()->GetMatrix(); auto boxSize = GetAIObj()->GetBoxsize(); // Recalculate box size: (box size + immersion) * hand-fitted scale - boxSize.y += modelMtx->m[3][1]; //pos.y + boxSize.y += modelMtx->pos.y; boxSize *= CVECTOR(SCMR_BOXSCALE_X * 0.5f, SCMR_BOXSCALE_Y * 0.5f, SCMR_BOXSCALE_Z * 0.5f); // Project real height (with masts) const auto realBoxSize = GetAIObj()->GetRealBoxsize(); From 2b0633a26953296b93dfe7a1f24f33733e20cb94 Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Mon, 20 Feb 2023 13:38:10 +0300 Subject: [PATCH 22/27] gcc: fix unions in matrix.h and math3d/matrix.h --- src/libs/location/src/location_camera.cpp | 16 +- src/libs/math/include/math3d/matrix.h | 138 +++++++++--------- src/libs/math/include/matrix.h | 2 +- .../particles/src/system/emitters/base.cpp | 10 +- src/libs/sea/src/env_map.cpp | 2 +- src/libs/sea_cameras/src/ship_camera.cpp | 2 +- 6 files changed, 85 insertions(+), 85 deletions(-) diff --git a/src/libs/location/src/location_camera.cpp b/src/libs/location/src/location_camera.cpp index ace14faca..4811d84b7 100644 --- a/src/libs/location/src/location_camera.cpp +++ b/src/libs/location/src/location_camera.cpp @@ -764,10 +764,10 @@ bool LocationCamera::LoadCameraTrack(const char *pcTrackFile, float fTrackTime) Matrix view; m_track.GetPoint(0.f, pos, ang); ang.GetMatrix(view); - view.vx = -view.vx; - view.vz = -view.vz; + view.v.vx = -view.v.vx; + view.v.vz = -view.v.vz; view.Inverse(); - view.pos = view * -pos; + view.v.pos = view * -pos; rs->SetView(*(CMatrix *)&view); rs->SetPerspective(cameraPerspective); @@ -786,8 +786,8 @@ void LocationCamera::TurnOffTrackCamera() Matrix view; m_track.GetPoint(0.99999f, pos, ang); ang.GetMatrix(view); - view.vx = -view.vx; - view.vz = -view.vz; + view.v.vx = -view.v.vx; + view.v.vz = -view.v.vz; view.Inverse(); oldPos = *(CVECTOR *)&pos; @@ -811,10 +811,10 @@ void LocationCamera::ProcessTrackCamera() Matrix view; m_track.GetPoint(fTrackTime / m_fTrackMaxTime, pos, ang); ang.GetMatrix(view); - view.vx = -view.vx; - view.vz = -view.vz; + view.v.vx = -view.v.vx; + view.v.vz = -view.v.vz; view.Inverse(); - view.pos = view * -pos; + view.v.pos = view * -pos; rs->SetView(*(CMatrix *)&view); rs->SetPerspective(cameraPerspective); } diff --git a/src/libs/math/include/math3d/matrix.h b/src/libs/math/include/math3d/matrix.h index 4b0d5c99c..ab15e6124 100644 --- a/src/libs/math/include/math3d/matrix.h +++ b/src/libs/math/include/math3d/matrix.h @@ -55,7 +55,7 @@ class Matrix Vector pos; // Added weight value float w; - }; + } v; }; // ----------------------------------------------------------- @@ -314,21 +314,21 @@ inline Matrix &Matrix::operator=(const Matrix &mtx) // Assign a number to the position inline Matrix &Matrix::operator=(float f) { - pos = f; + v.pos = f; return *this; } // Assign a number to the position inline Matrix &Matrix::operator=(double d) { - pos = d; + v.pos = d; return *this; } // Assign a vector to the position -inline Matrix &Matrix::operator=(const Vector &v) +inline Matrix &Matrix::operator=(const Vector &vec) { - pos = v; + v.pos = vec; return *this; } @@ -799,7 +799,7 @@ inline bool Matrix::BuildView(Vector lookFrom, Vector lookTo, Vector upVector) if (lookTo.Normalize() == 0.0f) { // Putting a position for a non-rotated matrix - pos = -lookFrom; + v.pos = -lookFrom; return false; } // Directing the vector up in the desired direction @@ -808,13 +808,13 @@ inline bool Matrix::BuildView(Vector lookFrom, Vector lookTo, Vector upVector) if (upVector.Normalize() == 0.0f) upVector.y = 1.0f; // looking for the third vector of the basis - auto v = upVector ^ lookTo; - if (v.Normalize() != 0.0f) + auto vec = upVector ^ lookTo; + if (vec.Normalize() != 0.0f) { // set the rotation matrix - m[0][0] = v.x; - m[1][0] = v.y; - m[2][0] = v.z; + m[0][0] = vec.x; + m[1][0] = vec.y; + m[2][0] = vec.z; m[0][1] = upVector.x; m[1][1] = upVector.y; m[2][1] = upVector.z; @@ -825,12 +825,12 @@ inline bool Matrix::BuildView(Vector lookFrom, Vector lookTo, Vector upVector) else { // Setting a position for a non-rotated matrix - pos = -lookFrom; + v.pos = -lookFrom; return false; } // set the position // pos = -MulNormalByInverse (lookFrom); - pos = -MulNormal(lookFrom); + v.pos = -MulNormal(lookFrom); return true; } @@ -840,22 +840,22 @@ inline bool Matrix::BuildOrient(Vector zAxisDirection, Vector upVector) // Normalize the direction vector z if (zAxisDirection.Normalize() < 1e-37f || upVector.Normalize() < 1e-37f) { - vx = Vector(1.0f, 0.0f, 0.0f); - vy = Vector(0.0f, 1.0f, 0.0f); - vz = Vector(0.0f, 0.0f, 1.0f); + v.vx = Vector(1.0f, 0.0f, 0.0f); + v.vy = Vector(0.0f, 1.0f, 0.0f); + v.vz = Vector(0.0f, 0.0f, 1.0f); return false; } // calculate - vx = zAxisDirection ^ upVector; - if (vx.Normalize() == 0.0f) + v.vx = zAxisDirection ^ upVector; + if (v.vx.Normalize() == 0.0f) { - vx = Vector(1.0f, 0.0f, 0.0f); - vy = Vector(0.0f, 1.0f, 0.0f); - vz = Vector(0.0f, 0.0f, 1.0f); + v.vx = Vector(1.0f, 0.0f, 0.0f); + v.vy = Vector(0.0f, 1.0f, 0.0f); + v.vz = Vector(0.0f, 0.0f, 1.0f); return false; } - vy = zAxisDirection ^ vx; - vz = zAxisDirection; + v.vy = zAxisDirection ^ v.vx; + v.vz = zAxisDirection; return true; } @@ -867,37 +867,37 @@ inline bool Matrix::BuildOriented(Vector position, Vector lookTo, Vector upVecto // Normalize the direction vector z if (lookTo.Normalize() == 0.0f || upVector.Normalize() == 0.0f) { - vx = Vector(1.0f, 0.0f, 0.0f); - wx = 0.0f; - vy = Vector(0.0f, 1.0f, 0.0f); - wy = 0.0f; - vz = Vector(0.0f, 0.0f, 1.0f); - wz = 0.0f; - pos = position; - w = 1.0f; + v.vx = Vector(1.0f, 0.0f, 0.0f); + v.wx = 0.0f; + v.vy = Vector(0.0f, 1.0f, 0.0f); + v.wy = 0.0f; + v.vz = Vector(0.0f, 0.0f, 1.0f); + v.wz = 0.0f; + v.pos = position; + v.w = 1.0f; return false; } // calculate - vx = lookTo ^ upVector; - wx = 0.0f; - if (vx.Normalize() == 0.0f) + v.vx = lookTo ^ upVector; + v.wx = 0.0f; + if (v.vx.Normalize() == 0.0f) { - vx = Vector(1.0f, 0.0f, 0.0f); - wx = 0.0f; - vy = Vector(0.0f, 1.0f, 0.0f); - wy = 0.0f; - vz = Vector(0.0f, 0.0f, 1.0f); - wz = 0.0f; - pos = position; - w = 1.0f; + v.vx = Vector(1.0f, 0.0f, 0.0f); + v.wx = 0.0f; + v.vy = Vector(0.0f, 1.0f, 0.0f); + v.wy = 0.0f; + v.vz = Vector(0.0f, 0.0f, 1.0f); + v.wz = 0.0f; + v.pos = position; + v.w = 1.0f; return false; } - vy = lookTo ^ vx; - wy = 0.0f; - vz = lookTo; - wz = 0.0f; - pos = position; - w = 1.0f; + v.vy = lookTo ^ v.vx; + v.wy = 0.0f; + v.vz = lookTo; + v.wz = 0.0f; + v.pos = position; + v.w = 1.0f; return true; } @@ -975,18 +975,18 @@ inline Matrix &Matrix::Rotate(const Vector &ang) // Move inline Matrix &Matrix::Move(float dX, float dY, float dZ) { - pos.x += dX; - pos.y += dY; - pos.z += dZ; + v.pos.x += dX; + v.pos.y += dY; + v.pos.z += dZ; return *this; } // Move inline Matrix &Matrix::Move(const Vector &pos) { - this->pos.x += pos.x; - this->pos.y += pos.y; - this->pos.z += pos.z; + this->v.pos.x += pos.x; + this->v.pos.y += pos.y; + this->v.pos.z += pos.z; return *this; } @@ -1054,7 +1054,7 @@ inline Matrix &Matrix::Scale3x3(const Vector &scale) // Calculating the inverse matrix inline Matrix &Matrix::Inverse() { - pos = Vector(-(pos | vx), -(pos | vy), -(pos | vz)); + v.pos = Vector(-(v.pos | v.vx), -(v.pos | v.vy), -(v.pos | v.vz)); Transposition3X3(); return *this; } @@ -1062,7 +1062,7 @@ inline Matrix &Matrix::Inverse() // Calculating an inverse matrix from another inline Matrix &Matrix::Inverse(const Matrix &mtx) { - pos = Vector(-(mtx.pos | mtx.vx), -(mtx.pos | mtx.vy), -(mtx.pos | mtx.vz)); + v.pos = Vector(-(mtx.v.pos | mtx.v.vx), -(mtx.v.pos | mtx.v.vy), -(mtx.v.pos | mtx.v.vz)); matrix[0] = mtx.matrix[0]; matrix[1] = mtx.matrix[4]; matrix[2] = mtx.matrix[8]; @@ -1133,7 +1133,7 @@ inline Matrix &Matrix::InverseWhithScale() matrix[i] = 0.0f; } // Position - pos = -(MulNormal(pos)); + v.pos = -(MulNormal(v.pos)); return *this; } @@ -1341,7 +1341,7 @@ inline Vector Matrix::MulNormalByInverse(const Vector &v) const // Get camera position from camera matrix inline Vector Matrix::GetCamPos() const { - return -MulNormalByInverse(pos); + return -MulNormalByInverse(v.pos); } // Identity matrix or not @@ -1387,13 +1387,13 @@ inline bool Matrix::IsIdentity() const inline bool Matrix::IsScale() const { const auto eps = 1e-4f; - if (fabsf(~vx - 1.0f) > eps) + if (fabsf(~v.vx - 1.0f) > eps) return true; - if (fabsf(~vy - 1.0f) > eps) + if (fabsf(~v.vy - 1.0f) > eps) return true; - if (fabsf(~vz - 1.0f) > eps) + if (fabsf(~v.vz - 1.0f) > eps) return true; - if (fabsf(w - 1.0f) > eps) + if (fabsf(v.w - 1.0f) > eps) return true; return false; } @@ -1444,24 +1444,24 @@ inline void Matrix::Projection(Vector4 *dstArray, Vector *srcArray, int32_t num, // Get angles from unscaled rotation matrix inline void Matrix::GetAngles(float &ax, float &ay, float &az) const { - if (vz.y < 1.0f) + if (v.vz.y < 1.0f) { - if (vz.y > -1.0f) + if (v.vz.y > -1.0f) { - ax = static_cast(asin(-vz.y)); - ay = static_cast(atan2(vz.x, vz.z)); - az = static_cast(atan2(vx.y, vy.y)); + ax = static_cast(asin(-v.vz.y)); + ay = static_cast(atan2(v.vz.x, v.vz.z)); + az = static_cast(atan2(v.vx.y, v.vy.y)); return; } ax = 3.141592654f * 0.5f; ay = 0.0f; - az = static_cast(atan2(vx.z, vx.x)); + az = static_cast(atan2(v.vx.z, v.vx.x)); } else { ax = -3.141592654f * 0.5f; ay = 0.0f; - az = static_cast(-atan2(vx.z, vx.x)); + az = static_cast(-atan2(v.vx.z, v.vx.x)); } } diff --git a/src/libs/math/include/matrix.h b/src/libs/math/include/matrix.h index c598ad7f9..10c6f05df 100644 --- a/src/libs/math/include/matrix.h +++ b/src/libs/math/include/matrix.h @@ -53,7 +53,7 @@ class CMatrix CVECTOR pos; // Added weight value float w; - }; + } v; }; //----------------------------------------------------------- diff --git a/src/libs/particles/src/system/emitters/base.cpp b/src/libs/particles/src/system/emitters/base.cpp index e2812955a..8cd052f95 100644 --- a/src/libs/particles/src/system/emitters/base.cpp +++ b/src/libs/particles/src/system/emitters/base.cpp @@ -48,7 +48,7 @@ void BaseEmitter::BornParticles(float DeltaTime) BlendMatrix(matWorldTransform, matWorldTransformOld, matWorldTransformNew, MatrixBlend); const auto TransformPos = Position * matWorldTransform; - matWorldTransform.pos = TransformPos; + matWorldTransform.v.pos = TransformPos; MatrixBlend += MatrixBlendInc; const auto DeltaTimeDiv = DeltaTime / INTERPOLATION_STEPS; @@ -82,7 +82,7 @@ void BaseEmitter::BornParticles(float DeltaTime) { auto ParticlePos = GetNewParticlePosition(DeltaTime); GetEmissionDirection(matTransform); - auto VelDir = matTransform.vy; + auto VelDir = matTransform.v.vy; switch (ParticleTypes[n].Type) { case BILLBOARD_PARTICLE: @@ -278,8 +278,8 @@ void BaseEmitter::BlendMatrix(Matrix &result, const Matrix &mat1, const Matrix & { const Quaternion qRot1(mat1); const Quaternion qRot2(mat2); - const auto vPos1 = mat1.pos; - const auto vPos2 = mat2.pos; + const auto vPos1 = mat1.v.pos; + const auto vPos2 = mat2.v.pos; Quaternion qBlend; qBlend.SLerp(qRot1, qRot2, BlendK); @@ -288,7 +288,7 @@ void BaseEmitter::BlendMatrix(Matrix &result, const Matrix &mat1, const Matrix & vBlend.Lerp(vPos1, vPos2, BlendK); qBlend.GetMatrix(result); - result.pos = vBlend; + result.v.pos = vBlend; } const char *BaseEmitter::GetName() diff --git a/src/libs/sea/src/env_map.cpp b/src/libs/sea/src/env_map.cpp index 8b27d6567..43d0fb5be 100644 --- a/src/libs/sea/src/env_map.cpp +++ b/src/libs/sea/src/env_map.cpp @@ -200,7 +200,7 @@ bool SEA::EnvMap_Render2() CMatrix mTex; mTex.BuildScale(CVECTOR(0.5f, -0.5f, 0.5f)); - mTex.pos = CVECTOR(0.5f, 0.5f, 0.5f); + mTex.v.pos = CVECTOR(0.5f, 0.5f, 0.5f); CMatrix mProj = rs->GetProjection(); mProj = mProj * mTex; diff --git a/src/libs/sea_cameras/src/ship_camera.cpp b/src/libs/sea_cameras/src/ship_camera.cpp index 32abc3916..564739708 100644 --- a/src/libs/sea_cameras/src/ship_camera.cpp +++ b/src/libs/sea_cameras/src/ship_camera.cpp @@ -125,7 +125,7 @@ void SHIP_CAMERA::Move(float fDeltaTime) const auto *modelMtx = GetAIObj()->GetMatrix(); auto boxSize = GetAIObj()->GetBoxsize(); // Recalculate box size: (box size + immersion) * hand-fitted scale - boxSize.y += modelMtx->pos.y; + boxSize.y += modelMtx->v.pos.y; boxSize *= CVECTOR(SCMR_BOXSCALE_X * 0.5f, SCMR_BOXSCALE_Y * 0.5f, SCMR_BOXSCALE_Z * 0.5f); // Project real height (with masts) const auto realBoxSize = GetAIObj()->GetRealBoxsize(); From 57703c45e41c419edcb65ca2815dddc1171d1eec Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Mon, 13 Mar 2023 23:28:34 +0300 Subject: [PATCH 23/27] ci: add sdl2 ppa for sdl2 >= 2.0.18 on Linux This fixes Linux build --- .github/workflows/ci_linux.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci_linux.yml b/.github/workflows/ci_linux.yml index 796ac9e87..d22e490df 100644 --- a/.github/workflows/ci_linux.yml +++ b/.github/workflows/ci_linux.yml @@ -40,9 +40,10 @@ jobs: - uses: actions/checkout@v2 with: submodules: 'recursive' - - name: Install libs + - name: Install sdl2 ppa and libs run: | - sudo apt-get update && sudo apt-get install meson libstdc++-10-dev libvulkan-dev glslang-tools libsdl2-dev libgl-dev libegl-dev + sudo add-apt-repository ppa:savoury1/games + sudo apt-get update && sudo apt-get install meson libstdc++-10-dev libvulkan-dev glslang-tools libsdl2-dev libgl-dev libegl-dev clang-${{matrix.clang_version}} # fix for conan ci sudo apt-get install libx11-dev libx11-xcb-dev libfontenc-dev libice-dev libsm-dev libxau-dev libxaw7-dev libxcomposite-dev libxcursor-dev libxdamage-dev libxdmcp-dev libxext-dev libxfixes-dev libxft-dev libxi-dev libxinerama-dev libxkbfile-dev libxmu-dev libxmuu-dev libxpm-dev libxrandr-dev libxrender-dev libxres-dev libxss-dev libxt-dev libxtst-dev libxv-dev libxvmc-dev libxxf86vm-dev xtrans-dev libxcb-render0-dev libxcb-render-util0-dev libxcb-xkb-dev libxcb-icccm4-dev libxcb-image0-dev libxcb-keysyms1-dev libxcb-randr0-dev libxcb-shape0-dev libxcb-sync-dev libxcb-xfixes0-dev libxcb-xinerama0-dev xkb-data libxcb-dri3-dev uuid-dev libxcb-util-dev - name: Install Conan From dc875f37f7805f9c71132c84dbbb49675585bbc4 Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Tue, 14 Mar 2023 15:26:30 +0300 Subject: [PATCH 24/27] nine: remove nine-native folder --- nine-native/CMakeLists.txt | 23 - nine-native/include/D3D9/d3d9.h | 2011 -------------------------- nine-native/include/D3D9/d3d9caps.h | 390 ----- nine-native/include/D3D9/d3d9types.h | 1824 ----------------------- nine-native/include/nine_sdl.h | 13 - nine-native/src/dri3.c | 788 ---------- nine-native/src/dri3.h | 80 - nine-native/src/nine_sdl.c | 1256 ---------------- 8 files changed, 6385 deletions(-) delete mode 100644 nine-native/CMakeLists.txt delete mode 100644 nine-native/include/D3D9/d3d9.h delete mode 100644 nine-native/include/D3D9/d3d9caps.h delete mode 100644 nine-native/include/D3D9/d3d9types.h delete mode 100644 nine-native/include/nine_sdl.h delete mode 100644 nine-native/src/dri3.c delete mode 100644 nine-native/src/dri3.h delete mode 100644 nine-native/src/nine_sdl.c diff --git a/nine-native/CMakeLists.txt b/nine-native/CMakeLists.txt deleted file mode 100644 index 0376ea2ec..000000000 --- a/nine-native/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -project(nine-native) - -add_library(${PROJECT_NAME} STATIC - include/nine_sdl.h - src/nine_sdl.c - src/dri3.c - src/dri3.h -) - -target_include_directories(${PROJECT_NAME} PRIVATE - include - include/D3D9 -) - -target_link_libraries(${PROJECT_NAME} PRIVATE - SDL2 - X11 - xcb - xcb-present - xcb-dri3 - xcb-xfixes - X11-xcb -) diff --git a/nine-native/include/D3D9/d3d9.h b/nine-native/include/D3D9/d3d9.h deleted file mode 100644 index d7fc714d2..000000000 --- a/nine-native/include/D3D9/d3d9.h +++ /dev/null @@ -1,2011 +0,0 @@ -/* - * Copyright 2011 Joakim Sindholt - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -/*** THIS FILE IS AUTOGENERATED. DO NOT MODIFY MANUALLY. ***/ - -#ifndef _D3D9_H_ -#define _D3D9_H_ - -#include "d3d9types.h" -#include "d3d9caps.h" - -typedef struct IDirect3D9 IDirect3D9, *PDIRECT3D9, *LPDIRECT3D9; -typedef struct IDirect3D9Ex IDirect3D9Ex, *PDIRECT3D9EX, *LPDIRECT3D9EX; -typedef struct IDirect3D9ExOverlayExtension IDirect3D9ExOverlayExtension, *PDIRECT3D9EXOVERLAYEXTENSION, *LPDIRECT3D9EXOVERLAYEXTENSION; -typedef struct IDirect3DAuthenticatedChannel9 IDirect3DAuthenticatedChannel9, *PDIRECT3DAUTHENTICATEDCHANNEL9, *LPDIRECT3DAUTHENTICATEDCHANNEL9; -typedef struct IDirect3DBaseTexture9 IDirect3DBaseTexture9, *PDIRECT3DBASETEXTURE9, *LPDIRECT3DBASETEXTURE9; -typedef struct IDirect3DCryptoSession9 IDirect3DCryptoSession9, *PDIRECT3DCRYPTOSESSION9, *LPDIRECT3DCRYPTOSESSION9; -typedef struct IDirect3DCubeTexture9 IDirect3DCubeTexture9, *PDIRECT3DCUBETEXTURE9, *LPDIRECT3DCUBETEXTURE9; -typedef struct IDirect3DDevice9 IDirect3DDevice9, *PDIRECT3DDEVICE9, *LPDIRECT3DDEVICE9; -typedef struct IDirect3DDevice9Ex IDirect3DDevice9Ex, *PDIRECT3DDEVICE9EX, *LPDIRECT3DDEVICE9EX; -typedef struct IDirect3DDevice9Video IDirect3DDevice9Video, *PDIRECT3DDEVICE9VIDEO, *LPDIRECT3DDEVICE9VIDEO; -typedef struct IDirect3DIndexBuffer9 IDirect3DIndexBuffer9, *PDIRECT3DINDEXBUFFER9, *LPDIRECT3DINDEXBUFFER9; -typedef struct IDirect3DPixelShader9 IDirect3DPixelShader9, *PDIRECT3DPIXELSHADER9, *LPDIRECT3DPIXELSHADER9; -typedef struct IDirect3DQuery9 IDirect3DQuery9, *PDIRECT3DQUERY9, *LPDIRECT3DQUERY9; -typedef struct IDirect3DResource9 IDirect3DResource9, *PDIRECT3DRESOURCE9, *LPDIRECT3DRESOURCE9; -typedef struct IDirect3DStateBlock9 IDirect3DStateBlock9, *PDIRECT3DSTATEBLOCK9, *LPDIRECT3DSTATEBLOCK9; -typedef struct IDirect3DSurface9 IDirect3DSurface9, *PDIRECT3DSURFACE9, *LPDIRECT3DSURFACE9; -typedef struct IDirect3DSwapChain9 IDirect3DSwapChain9, *PDIRECT3DSWAPCHAIN9, *LPDIRECT3DSWAPCHAIN9; -typedef struct IDirect3DSwapChain9Ex IDirect3DSwapChain9Ex, *PDIRECT3DSWAPCHAIN9EX, *LPDIRECT3DSWAPCHAIN9EX; -typedef struct IDirect3DTexture9 IDirect3DTexture9, *PDIRECT3DTEXTURE9, *LPDIRECT3DTEXTURE9; -typedef struct IDirect3DVertexBuffer9 IDirect3DVertexBuffer9, *PDIRECT3DVERTEXBUFFER9, *LPDIRECT3DVERTEXBUFFER9; -typedef struct IDirect3DVertexDeclaration9 IDirect3DVertexDeclaration9, *PDIRECT3DVERTEXDECLARATION9, *LPDIRECT3DVERTEXDECLARATION9; -typedef struct IDirect3DVertexShader9 IDirect3DVertexShader9, *PDIRECT3DVERTEXSHADER9, *LPDIRECT3DVERTEXSHADER9; -typedef struct IDirect3DVolume9 IDirect3DVolume9, *PDIRECT3DVOLUME9, *LPDIRECT3DVOLUME9; -typedef struct IDirect3DVolumeTexture9 IDirect3DVolumeTexture9, *PDIRECT3DVOLUMETEXTURE9, *LPDIRECT3DVOLUMETEXTURE9; - -#ifdef __cplusplus - -extern "C" const GUID IID_IDirect3D9; -extern "C" const GUID IID_IDirect3D9Ex; -extern "C" const GUID IID_IDirect3D9ExOverlayExtension; -extern "C" const GUID IID_IDirect3DAuthenticatedChannel9; -extern "C" const GUID IID_IDirect3DBaseTexture9; -extern "C" const GUID IID_IDirect3DCryptoSession9; -extern "C" const GUID IID_IDirect3DCubeTexture9; -extern "C" const GUID IID_IDirect3DDevice9; -extern "C" const GUID IID_IDirect3DDevice9Ex; -extern "C" const GUID IID_IDirect3DDevice9Video; -extern "C" const GUID IID_IDirect3DIndexBuffer9; -extern "C" const GUID IID_IDirect3DPixelShader9; -extern "C" const GUID IID_IDirect3DQuery9; -extern "C" const GUID IID_IDirect3DResource9; -extern "C" const GUID IID_IDirect3DStateBlock9; -extern "C" const GUID IID_IDirect3DSurface9; -extern "C" const GUID IID_IDirect3DSwapChain9; -extern "C" const GUID IID_IDirect3DSwapChain9Ex; -extern "C" const GUID IID_IDirect3DTexture9; -extern "C" const GUID IID_IDirect3DVertexBuffer9; -extern "C" const GUID IID_IDirect3DVertexDeclaration9; -extern "C" const GUID IID_IDirect3DVertexShader9; -extern "C" const GUID IID_IDirect3DVolume9; -extern "C" const GUID IID_IDirect3DVolumeTexture9; - -struct IDirect3D9 : public IUnknown -{ - virtual HRESULT WINAPI RegisterSoftwareDevice(void *pInitializeFunction) = 0; - virtual UINT WINAPI GetAdapterCount() = 0; - virtual HRESULT WINAPI GetAdapterIdentifier(UINT Adapter, DWORD Flags, D3DADAPTER_IDENTIFIER9 *pIdentifier) = 0; - virtual UINT WINAPI GetAdapterModeCount(UINT Adapter, D3DFORMAT Format) = 0; - virtual HRESULT WINAPI EnumAdapterModes(UINT Adapter, D3DFORMAT Format, UINT Mode, D3DDISPLAYMODE *pMode) = 0; - virtual HRESULT WINAPI GetAdapterDisplayMode(UINT Adapter, D3DDISPLAYMODE *pMode) = 0; - virtual HRESULT WINAPI CheckDeviceType(UINT Adapter, D3DDEVTYPE DevType, D3DFORMAT AdapterFormat, D3DFORMAT BackBufferFormat, BOOL bWindowed) = 0; - virtual HRESULT WINAPI CheckDeviceFormat(UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT AdapterFormat, DWORD Usage, D3DRESOURCETYPE RType, D3DFORMAT CheckFormat) = 0; - virtual HRESULT WINAPI CheckDeviceMultiSampleType(UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT SurfaceFormat, BOOL Windowed, D3DMULTISAMPLE_TYPE MultiSampleType, DWORD *pQualityLevels) = 0; - virtual HRESULT WINAPI CheckDepthStencilMatch(UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT AdapterFormat, D3DFORMAT RenderTargetFormat, D3DFORMAT DepthStencilFormat) = 0; - virtual HRESULT WINAPI CheckDeviceFormatConversion(UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT SourceFormat, D3DFORMAT TargetFormat) = 0; - virtual HRESULT WINAPI GetDeviceCaps(UINT Adapter, D3DDEVTYPE DeviceType, D3DCAPS9 *pCaps) = 0; - virtual HMONITOR WINAPI GetAdapterMonitor(UINT Adapter) = 0; - virtual HRESULT WINAPI CreateDevice(UINT Adapter, D3DDEVTYPE DeviceType, HWND hFocusWindow, DWORD BehaviorFlags, D3DPRESENT_PARAMETERS *pPresentationParameters, IDirect3DDevice9 **ppReturnedDeviceInterface) = 0; -}; - -struct IDirect3D9Ex : public IDirect3D9 -{ - virtual UINT WINAPI GetAdapterModeCountEx(UINT Adapter, const D3DDISPLAYMODEFILTER *pFilter) = 0; - virtual HRESULT WINAPI EnumAdapterModesEx(UINT Adapter, const D3DDISPLAYMODEFILTER *pFilter, UINT Mode, D3DDISPLAYMODEEX *pMode) = 0; - virtual HRESULT WINAPI GetAdapterDisplayModeEx(UINT Adapter, D3DDISPLAYMODEEX *pMode, D3DDISPLAYROTATION *pRotation) = 0; - virtual HRESULT WINAPI CreateDeviceEx(UINT Adapter, D3DDEVTYPE DeviceType, HWND hFocusWindow, DWORD BehaviorFlags, D3DPRESENT_PARAMETERS *pPresentationParameters, D3DDISPLAYMODEEX *pFullscreenDisplayMode, IDirect3DDevice9Ex **ppReturnedDeviceInterface) = 0; - virtual HRESULT WINAPI GetAdapterLUID(UINT Adapter, LUID *pLUID) = 0; -}; - -struct IDirect3D9ExOverlayExtension : public IUnknown -{ - virtual HRESULT WINAPI CheckDeviceOverlayType(UINT Adapter, D3DDEVTYPE DevType, UINT OverlayWidth, UINT OverlayHeight, D3DFORMAT OverlayFormat, D3DDISPLAYMODEEX *pDisplayMode, D3DDISPLAYROTATION DisplayRotation, D3DOVERLAYCAPS *pOverlayCaps) = 0; -}; - -struct IDirect3DResource9 : public IUnknown -{ - virtual HRESULT WINAPI GetDevice(IDirect3DDevice9 **ppDevice) = 0; - virtual HRESULT WINAPI SetPrivateData(REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags) = 0; - virtual HRESULT WINAPI GetPrivateData(REFGUID refguid, void *pData, DWORD *pSizeOfData) = 0; - virtual HRESULT WINAPI FreePrivateData(REFGUID refguid) = 0; - virtual DWORD WINAPI SetPriority(DWORD PriorityNew) = 0; - virtual DWORD WINAPI GetPriority() = 0; - virtual void WINAPI PreLoad() = 0; - virtual D3DRESOURCETYPE WINAPI GetType() = 0; -}; - -struct IDirect3DBaseTexture9 : public IDirect3DResource9 -{ - virtual DWORD WINAPI SetLOD(DWORD LODNew) = 0; - virtual DWORD WINAPI GetLOD() = 0; - virtual DWORD WINAPI GetLevelCount() = 0; - virtual HRESULT WINAPI SetAutoGenFilterType(D3DTEXTUREFILTERTYPE FilterType) = 0; - virtual D3DTEXTUREFILTERTYPE WINAPI GetAutoGenFilterType() = 0; - virtual void WINAPI GenerateMipSubLevels() = 0; -}; - -struct IDirect3DCryptoSession9 : public IUnknown -{ - virtual HRESULT WINAPI GetCertificateSize(UINT *pCertificateSize) = 0; - virtual HRESULT WINAPI GetCertificate(UINT CertifacteSize, BYTE *ppCertificate) = 0; - virtual HRESULT WINAPI NegotiateKeyExchange(UINT DataSize, void *pData) = 0; - virtual HRESULT WINAPI EncryptionBlt(IDirect3DSurface9 *pSrcSurface, IDirect3DSurface9 *pDstSurface, UINT DstSurfaceSize, void *pIV) = 0; - virtual HRESULT WINAPI DecryptionBlt(IDirect3DSurface9 *pSrcSurface, IDirect3DSurface9 *pDstSurface, UINT SrcSurfaceSize, D3DENCRYPTED_BLOCK_INFO *pEncryptedBlockInfo, void *pContentKey, void *pIV) = 0; - virtual HRESULT WINAPI GetSurfacePitch(IDirect3DSurface9 *pSrcSurface, UINT *pSurfacePitch) = 0; - virtual HRESULT WINAPI StartSessionKeyRefresh(void *pRandomNumber, UINT RandomNumberSize) = 0; - virtual HRESULT WINAPI FinishSessionKeyRefresh() = 0; - virtual HRESULT WINAPI GetEncryptionBltKey(void *pReadbackKey, UINT KeySize) = 0; -}; - -struct IDirect3DCubeTexture9 : public IDirect3DBaseTexture9 -{ - virtual HRESULT WINAPI GetLevelDesc(UINT Level, D3DSURFACE_DESC *pDesc) = 0; - virtual HRESULT WINAPI GetCubeMapSurface(D3DCUBEMAP_FACES FaceType, UINT Level, IDirect3DSurface9 **ppCubeMapSurface) = 0; - virtual HRESULT WINAPI LockRect(D3DCUBEMAP_FACES FaceType, UINT Level, D3DLOCKED_RECT *pLockedRect, const RECT *pRect, DWORD Flags) = 0; - virtual HRESULT WINAPI UnlockRect(D3DCUBEMAP_FACES FaceType, UINT Level) = 0; - virtual HRESULT WINAPI AddDirtyRect(D3DCUBEMAP_FACES FaceType, const RECT *pDirtyRect) = 0; -}; - -struct IDirect3DDevice9 : public IUnknown -{ - virtual HRESULT WINAPI TestCooperativeLevel() = 0; - virtual UINT WINAPI GetAvailableTextureMem() = 0; - virtual HRESULT WINAPI EvictManagedResources() = 0; - virtual HRESULT WINAPI GetDirect3D(IDirect3D9 **ppD3D9) = 0; - virtual HRESULT WINAPI GetDeviceCaps(D3DCAPS9 *pCaps) = 0; - virtual HRESULT WINAPI GetDisplayMode(UINT iSwapChain, D3DDISPLAYMODE *pMode) = 0; - virtual HRESULT WINAPI GetCreationParameters(D3DDEVICE_CREATION_PARAMETERS *pParameters) = 0; - virtual HRESULT WINAPI SetCursorProperties(UINT XHotSpot, UINT YHotSpot, IDirect3DSurface9 *pCursorBitmap) = 0; - virtual void WINAPI SetCursorPosition(int X, int Y, DWORD Flags) = 0; - virtual BOOL WINAPI ShowCursor(BOOL bShow) = 0; - virtual HRESULT WINAPI CreateAdditionalSwapChain(D3DPRESENT_PARAMETERS *pPresentationParameters, IDirect3DSwapChain9 **pSwapChain) = 0; - virtual HRESULT WINAPI GetSwapChain(UINT iSwapChain, IDirect3DSwapChain9 **pSwapChain) = 0; - virtual UINT WINAPI GetNumberOfSwapChains() = 0; - virtual HRESULT WINAPI Reset(D3DPRESENT_PARAMETERS *pPresentationParameters) = 0; - virtual HRESULT WINAPI Present(const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion) = 0; - virtual HRESULT WINAPI GetBackBuffer(UINT iSwapChain, UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, IDirect3DSurface9 **ppBackBuffer) = 0; - virtual HRESULT WINAPI GetRasterStatus(UINT iSwapChain, D3DRASTER_STATUS *pRasterStatus) = 0; - virtual HRESULT WINAPI SetDialogBoxMode(BOOL bEnableDialogs) = 0; - virtual void WINAPI SetGammaRamp(UINT iSwapChain, DWORD Flags, const D3DGAMMARAMP *pRamp) = 0; - virtual void WINAPI GetGammaRamp(UINT iSwapChain, D3DGAMMARAMP *pRamp) = 0; - virtual HRESULT WINAPI CreateTexture(UINT Width, UINT Height, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DTexture9 **ppTexture, HANDLE *pSharedHandle) = 0; - virtual HRESULT WINAPI CreateVolumeTexture(UINT Width, UINT Height, UINT Depth, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DVolumeTexture9 **ppVolumeTexture, HANDLE *pSharedHandle) = 0; - virtual HRESULT WINAPI CreateCubeTexture(UINT EdgeLength, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DCubeTexture9 **ppCubeTexture, HANDLE *pSharedHandle) = 0; - virtual HRESULT WINAPI CreateVertexBuffer(UINT Length, DWORD Usage, DWORD FVF, D3DPOOL Pool, IDirect3DVertexBuffer9 **ppVertexBuffer, HANDLE *pSharedHandle) = 0; - virtual HRESULT WINAPI CreateIndexBuffer(UINT Length, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DIndexBuffer9 **ppIndexBuffer, HANDLE *pSharedHandle) = 0; - virtual HRESULT WINAPI CreateRenderTarget(UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Lockable, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle) = 0; - virtual HRESULT WINAPI CreateDepthStencilSurface(UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Discard, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle) = 0; - virtual HRESULT WINAPI UpdateSurface(IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, IDirect3DSurface9 *pDestinationSurface, const POINT *pDestPoint) = 0; - virtual HRESULT WINAPI UpdateTexture(IDirect3DBaseTexture9 *pSourceTexture, IDirect3DBaseTexture9 *pDestinationTexture) = 0; - virtual HRESULT WINAPI GetRenderTargetData(IDirect3DSurface9 *pRenderTarget, IDirect3DSurface9 *pDestSurface) = 0; - virtual HRESULT WINAPI GetFrontBufferData(UINT iSwapChain, IDirect3DSurface9 *pDestSurface) = 0; - virtual HRESULT WINAPI StretchRect(IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, IDirect3DSurface9 *pDestSurface, const RECT *pDestRect, D3DTEXTUREFILTERTYPE Filter) = 0; - virtual HRESULT WINAPI ColorFill(IDirect3DSurface9 *pSurface, const RECT *pRect, D3DCOLOR color) = 0; - virtual HRESULT WINAPI CreateOffscreenPlainSurface(UINT Width, UINT Height, D3DFORMAT Format, D3DPOOL Pool, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle) = 0; - virtual HRESULT WINAPI SetRenderTarget(DWORD RenderTargetIndex, IDirect3DSurface9 *pRenderTarget) = 0; - virtual HRESULT WINAPI GetRenderTarget(DWORD RenderTargetIndex, IDirect3DSurface9 **ppRenderTarget) = 0; - virtual HRESULT WINAPI SetDepthStencilSurface(IDirect3DSurface9 *pNewZStencil) = 0; - virtual HRESULT WINAPI GetDepthStencilSurface(IDirect3DSurface9 **ppZStencilSurface) = 0; - virtual HRESULT WINAPI BeginScene() = 0; - virtual HRESULT WINAPI EndScene() = 0; - virtual HRESULT WINAPI Clear(DWORD Count, const D3DRECT *pRects, DWORD Flags, D3DCOLOR Color, float Z, DWORD Stencil) = 0; - virtual HRESULT WINAPI SetTransform(D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix) = 0; - virtual HRESULT WINAPI GetTransform(D3DTRANSFORMSTATETYPE State, D3DMATRIX *pMatrix) = 0; - virtual HRESULT WINAPI MultiplyTransform(D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix) = 0; - virtual HRESULT WINAPI SetViewport(const D3DVIEWPORT9 *pViewport) = 0; - virtual HRESULT WINAPI GetViewport(D3DVIEWPORT9 *pViewport) = 0; - virtual HRESULT WINAPI SetMaterial(const D3DMATERIAL9 *pMaterial) = 0; - virtual HRESULT WINAPI GetMaterial(D3DMATERIAL9 *pMaterial) = 0; - virtual HRESULT WINAPI SetLight(DWORD Index, const D3DLIGHT9 *pLight) = 0; - virtual HRESULT WINAPI GetLight(DWORD Index, D3DLIGHT9 *pLight) = 0; - virtual HRESULT WINAPI LightEnable(DWORD Index, BOOL Enable) = 0; - virtual HRESULT WINAPI GetLightEnable(DWORD Index, BOOL *pEnable) = 0; - virtual HRESULT WINAPI SetClipPlane(DWORD Index, const float *pPlane) = 0; - virtual HRESULT WINAPI GetClipPlane(DWORD Index, float *pPlane) = 0; - virtual HRESULT WINAPI SetRenderState(D3DRENDERSTATETYPE State, DWORD Value) = 0; - virtual HRESULT WINAPI GetRenderState(D3DRENDERSTATETYPE State, DWORD *pValue) = 0; - virtual HRESULT WINAPI CreateStateBlock(D3DSTATEBLOCKTYPE Type, IDirect3DStateBlock9 **ppSB) = 0; - virtual HRESULT WINAPI BeginStateBlock() = 0; - virtual HRESULT WINAPI EndStateBlock(IDirect3DStateBlock9 **ppSB) = 0; - virtual HRESULT WINAPI SetClipStatus(const D3DCLIPSTATUS9 *pClipStatus) = 0; - virtual HRESULT WINAPI GetClipStatus(D3DCLIPSTATUS9 *pClipStatus) = 0; - virtual HRESULT WINAPI GetTexture(DWORD Stage, IDirect3DBaseTexture9 **ppTexture) = 0; - virtual HRESULT WINAPI SetTexture(DWORD Stage, IDirect3DBaseTexture9 *pTexture) = 0; - virtual HRESULT WINAPI GetTextureStageState(DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD *pValue) = 0; - virtual HRESULT WINAPI SetTextureStageState(DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD Value) = 0; - virtual HRESULT WINAPI GetSamplerState(DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD *pValue) = 0; - virtual HRESULT WINAPI SetSamplerState(DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD Value) = 0; - virtual HRESULT WINAPI ValidateDevice(DWORD *pNumPasses) = 0; - virtual HRESULT WINAPI SetPaletteEntries(UINT PaletteNumber, const PALETTEENTRY *pEntries) = 0; - virtual HRESULT WINAPI GetPaletteEntries(UINT PaletteNumber, PALETTEENTRY *pEntries) = 0; - virtual HRESULT WINAPI SetCurrentTexturePalette(UINT PaletteNumber) = 0; - virtual HRESULT WINAPI GetCurrentTexturePalette(UINT *PaletteNumber) = 0; - virtual HRESULT WINAPI SetScissorRect(const RECT *pRect) = 0; - virtual HRESULT WINAPI GetScissorRect(RECT *pRect) = 0; - virtual HRESULT WINAPI SetSoftwareVertexProcessing(BOOL bSoftware) = 0; - virtual BOOL WINAPI GetSoftwareVertexProcessing() = 0; - virtual HRESULT WINAPI SetNPatchMode(float nSegments) = 0; - virtual float WINAPI GetNPatchMode() = 0; - virtual HRESULT WINAPI DrawPrimitive(D3DPRIMITIVETYPE PrimitiveType, UINT StartVertex, UINT PrimitiveCount) = 0; - virtual HRESULT WINAPI DrawIndexedPrimitive(D3DPRIMITIVETYPE PrimitiveType, INT BaseVertexIndex, UINT MinVertexIndex, UINT NumVertices, UINT startIndex, UINT primCount) = 0; - virtual HRESULT WINAPI DrawPrimitiveUP(D3DPRIMITIVETYPE PrimitiveType, UINT PrimitiveCount, const void *pVertexStreamZeroData, UINT VertexStreamZeroStride) = 0; - virtual HRESULT WINAPI DrawIndexedPrimitiveUP(D3DPRIMITIVETYPE PrimitiveType, UINT MinVertexIndex, UINT NumVertices, UINT PrimitiveCount, const void *pIndexData, D3DFORMAT IndexDataFormat, const void *pVertexStreamZeroData, UINT VertexStreamZeroStride) = 0; - virtual HRESULT WINAPI ProcessVertices(UINT SrcStartIndex, UINT DestIndex, UINT VertexCount, IDirect3DVertexBuffer9 *pDestBuffer, IDirect3DVertexDeclaration9 *pVertexDecl, DWORD Flags) = 0; - virtual HRESULT WINAPI CreateVertexDeclaration(const D3DVERTEXELEMENT9 *pVertexElements, IDirect3DVertexDeclaration9 **ppDecl) = 0; - virtual HRESULT WINAPI SetVertexDeclaration(IDirect3DVertexDeclaration9 *pDecl) = 0; - virtual HRESULT WINAPI GetVertexDeclaration(IDirect3DVertexDeclaration9 **ppDecl) = 0; - virtual HRESULT WINAPI SetFVF(DWORD FVF) = 0; - virtual HRESULT WINAPI GetFVF(DWORD *pFVF) = 0; - virtual HRESULT WINAPI CreateVertexShader(const DWORD *pFunction, IDirect3DVertexShader9 **ppShader) = 0; - virtual HRESULT WINAPI SetVertexShader(IDirect3DVertexShader9 *pShader) = 0; - virtual HRESULT WINAPI GetVertexShader(IDirect3DVertexShader9 **ppShader) = 0; - virtual HRESULT WINAPI SetVertexShaderConstantF(UINT StartRegister, const float *pConstantData, UINT Vector4fCount) = 0; - virtual HRESULT WINAPI GetVertexShaderConstantF(UINT StartRegister, float *pConstantData, UINT Vector4fCount) = 0; - virtual HRESULT WINAPI SetVertexShaderConstantI(UINT StartRegister, const int *pConstantData, UINT Vector4iCount) = 0; - virtual HRESULT WINAPI GetVertexShaderConstantI(UINT StartRegister, int *pConstantData, UINT Vector4iCount) = 0; - virtual HRESULT WINAPI SetVertexShaderConstantB(UINT StartRegister, const BOOL *pConstantData, UINT BoolCount) = 0; - virtual HRESULT WINAPI GetVertexShaderConstantB(UINT StartRegister, BOOL *pConstantData, UINT BoolCount) = 0; - virtual HRESULT WINAPI SetStreamSource(UINT StreamNumber, IDirect3DVertexBuffer9 *pStreamData, UINT OffsetInBytes, UINT Stride) = 0; - virtual HRESULT WINAPI GetStreamSource(UINT StreamNumber, IDirect3DVertexBuffer9 **ppStreamData, UINT *pOffsetInBytes, UINT *pStride) = 0; - virtual HRESULT WINAPI SetStreamSourceFreq(UINT StreamNumber, UINT Setting) = 0; - virtual HRESULT WINAPI GetStreamSourceFreq(UINT StreamNumber, UINT *pSetting) = 0; - virtual HRESULT WINAPI SetIndices(IDirect3DIndexBuffer9 *pIndexData) = 0; - virtual HRESULT WINAPI GetIndices(IDirect3DIndexBuffer9 **ppIndexData) = 0; - virtual HRESULT WINAPI CreatePixelShader(const DWORD *pFunction, IDirect3DPixelShader9 **ppShader) = 0; - virtual HRESULT WINAPI SetPixelShader(IDirect3DPixelShader9 *pShader) = 0; - virtual HRESULT WINAPI GetPixelShader(IDirect3DPixelShader9 **ppShader) = 0; - virtual HRESULT WINAPI SetPixelShaderConstantF(UINT StartRegister, const float *pConstantData, UINT Vector4fCount) = 0; - virtual HRESULT WINAPI GetPixelShaderConstantF(UINT StartRegister, float *pConstantData, UINT Vector4fCount) = 0; - virtual HRESULT WINAPI SetPixelShaderConstantI(UINT StartRegister, const int *pConstantData, UINT Vector4iCount) = 0; - virtual HRESULT WINAPI GetPixelShaderConstantI(UINT StartRegister, int *pConstantData, UINT Vector4iCount) = 0; - virtual HRESULT WINAPI SetPixelShaderConstantB(UINT StartRegister, const BOOL *pConstantData, UINT BoolCount) = 0; - virtual HRESULT WINAPI GetPixelShaderConstantB(UINT StartRegister, BOOL *pConstantData, UINT BoolCount) = 0; - virtual HRESULT WINAPI DrawRectPatch(UINT Handle, const float *pNumSegs, const D3DRECTPATCH_INFO *pRectPatchInfo) = 0; - virtual HRESULT WINAPI DrawTriPatch(UINT Handle, const float *pNumSegs, const D3DTRIPATCH_INFO *pTriPatchInfo) = 0; - virtual HRESULT WINAPI DeletePatch(UINT Handle) = 0; - virtual HRESULT WINAPI CreateQuery(D3DQUERYTYPE Type, IDirect3DQuery9 **ppQuery) = 0; -}; - -/*** IUnknown methods ***/ -#define IDirect3DDevice9_QueryInterface(p,a,b) (p)->QueryInterface(a,b) -#define IDirect3DDevice9_AddRef(p) (p)->AddRef() -#define IDirect3DDevice9_Release(p) (p)->Release() -/*** IDirect3DDevice9 methods ***/ -#define IDirect3DDevice9_TestCooperativeLevel(p) (p)->TestCooperativeLevel() -#define IDirect3DDevice9_GetAvailableTextureMem(p) (p)->GetAvailableTextureMem() -#define IDirect3DDevice9_EvictManagedResources(p) (p)->EvictManagedResources() -#define IDirect3DDevice9_GetDirect3D(p,a) (p)->GetDirect3D(a) -#define IDirect3DDevice9_GetDeviceCaps(p,a) (p)->GetDeviceCaps(a) -#define IDirect3DDevice9_GetDisplayMode(p,a,b) (p)->GetDisplayMode(a,b) -#define IDirect3DDevice9_GetCreationParameters(p,a) (p)->GetCreationParameters(a) -#define IDirect3DDevice9_SetCursorProperties(p,a,b,c) (p)->SetCursorProperties(a,b,c) -#define IDirect3DDevice9_SetCursorPosition(p,a,b,c) (p)->SetCursorPosition(a,b,c) -#define IDirect3DDevice9_ShowCursor(p,a) (p)->ShowCursor(a) -#define IDirect3DDevice9_CreateAdditionalSwapChain(p,a,b) (p)->CreateAdditionalSwapChain(a,b) -#define IDirect3DDevice9_GetSwapChain(p,a,b) (p)->GetSwapChain(a,b) -#define IDirect3DDevice9_GetNumberOfSwapChains(p) (p)->GetNumberOfSwapChains() -#define IDirect3DDevice9_Reset(p,a) (p)->Reset(a) -#define IDirect3DDevice9_Present(p,a,b,c,d) (p)->Present(a,b,c,d) -#define IDirect3DDevice9_GetBackBuffer(p,a,b,c,d) (p)->GetBackBuffer(a,b,c,d) -#define IDirect3DDevice9_GetRasterStatus(p,a,b) (p)->GetRasterStatus(a,b) -#define IDirect3DDevice9_SetDialogBoxMode(p,a) (p)->SetDialogBoxMode(a) -#define IDirect3DDevice9_SetGammaRamp(p,a,b,c) (p)->SetGammaRamp(a,b,c) -#define IDirect3DDevice9_GetGammaRamp(p,a,b) (p)->GetGammaRamp(a,b) -#define IDirect3DDevice9_CreateTexture(p,a,b,c,d,e,f,g,h) (p)->CreateTexture(a,b,c,d,e,f,g,h) -#define IDirect3DDevice9_CreateVolumeTexture(p,a,b,c,d,e,f,g,h,i) (p)->CreateVolumeTexture(a,b,c,d,e,f,g,h,i) -#define IDirect3DDevice9_CreateCubeTexture(p,a,b,c,d,e,f,g) (p)->CreateCubeTexture(a,b,c,d,e,f,g) -#define IDirect3DDevice9_CreateVertexBuffer(p,a,b,c,d,e,f) (p)->CreateVertexBuffer(a,b,c,d,e,f) -#define IDirect3DDevice9_CreateIndexBuffer(p,a,b,c,d,e,f) (p)->CreateIndexBuffer(a,b,c,d,e,f) -#define IDirect3DDevice9_CreateRenderTarget(p,a,b,c,d,e,f,g,h) (p)->CreateRenderTarget(a,b,c,d,e,f,g,h) -#define IDirect3DDevice9_CreateDepthStencilSurface(p,a,b,c,d,e,f,g,h) (p)->CreateDepthStencilSurface(a,b,c,d,e,f,g,h) -#define IDirect3DDevice9_UpdateSurface(p,a,b,c,d) (p)->UpdateSurface(a,b,c,d) -#define IDirect3DDevice9_UpdateTexture(p,a,b) (p)->UpdateTexture(a,b) -#define IDirect3DDevice9_GetRenderTargetData(p,a,b) (p)->GetRenderTargetData(a,b) -#define IDirect3DDevice9_GetFrontBufferData(p,a,b) (p)->GetFrontBufferData(a,b) -#define IDirect3DDevice9_StretchRect(p,a,b,c,d,e) (p)->StretchRect(a,b,c,d,e) -#define IDirect3DDevice9_ColorFill(p,a,b,c) (p)->ColorFill(a,b,c) -#define IDirect3DDevice9_CreateOffscreenPlainSurface(p,a,b,c,d,e,f) (p)->CreateOffscreenPlainSurface(a,b,c,d,e,f) -#define IDirect3DDevice9_SetRenderTarget(p,a,b) (p)->SetRenderTarget(a,b) -#define IDirect3DDevice9_GetRenderTarget(p,a,b) (p)->GetRenderTarget(a,b) -#define IDirect3DDevice9_SetDepthStencilSurface(p,a) (p)->SetDepthStencilSurface(a) -#define IDirect3DDevice9_GetDepthStencilSurface(p,a) (p)->GetDepthStencilSurface(a) -#define IDirect3DDevice9_BeginScene(p) (p)->BeginScene() -#define IDirect3DDevice9_EndScene(p) (p)->EndScene() -#define IDirect3DDevice9_Clear(p,a,b,c,d,e,f) (p)->Clear(a,b,c,d,e,f) -#define IDirect3DDevice9_SetTransform(p,a,b) (p)->SetTransform(a,b) -#define IDirect3DDevice9_GetTransform(p,a,b) (p)->GetTransform(a,b) -#define IDirect3DDevice9_MultiplyTransform(p,a,b) (p)->MultiplyTransform(a,b) -#define IDirect3DDevice9_SetViewport(p,a) (p)->SetViewport(a) -#define IDirect3DDevice9_GetViewport(p,a) (p)->GetViewport(a) -#define IDirect3DDevice9_SetMaterial(p,a) (p)->SetMaterial(a) -#define IDirect3DDevice9_GetMaterial(p,a) (p)->GetMaterial(a) -#define IDirect3DDevice9_SetLight(p,a,b) (p)->SetLight(a,b) -#define IDirect3DDevice9_GetLight(p,a,b) (p)->GetLight(a,b) -#define IDirect3DDevice9_LightEnable(p,a,b) (p)->LightEnable(a,b) -#define IDirect3DDevice9_GetLightEnable(p,a,b) (p)->GetLightEnable(a,b) -#define IDirect3DDevice9_SetClipPlane(p,a,b) (p)->SetClipPlane(a,b) -#define IDirect3DDevice9_GetClipPlane(p,a,b) (p)->GetClipPlane(a,b) -#define IDirect3DDevice9_SetRenderState(p,a,b) (p)->SetRenderState(a,b) -#define IDirect3DDevice9_GetRenderState(p,a,b) (p)->GetRenderState(a,b) -#define IDirect3DDevice9_CreateStateBlock(p,a,b) (p)->CreateStateBlock(a,b) -#define IDirect3DDevice9_BeginStateBlock(p) (p)->BeginStateBlock() -#define IDirect3DDevice9_EndStateBlock(p,a) (p)->EndStateBlock(a) -#define IDirect3DDevice9_SetClipStatus(p,a) (p)->SetClipStatus(a) -#define IDirect3DDevice9_GetClipStatus(p,a) (p)->GetClipStatus(a) -#define IDirect3DDevice9_GetTexture(p,a,b) (p)->GetTexture(a,b) -#define IDirect3DDevice9_SetTexture(p,a,b) (p)->SetTexture(a,b) -#define IDirect3DDevice9_GetTextureStageState(p,a,b,c) (p)->GetTextureStageState(a,b,c) -#define IDirect3DDevice9_SetTextureStageState(p,a,b,c) (p)->SetTextureStageState(a,b,c) -#define IDirect3DDevice9_GetSamplerState(p,a,b,c) (p)->GetSamplerState(a,b,c) -#define IDirect3DDevice9_SetSamplerState(p,a,b,c) (p)->SetSamplerState(a,b,c) -#define IDirect3DDevice9_ValidateDevice(p,a) (p)->ValidateDevice(a) -#define IDirect3DDevice9_SetPaletteEntries(p,a,b) (p)->SetPaletteEntries(a,b) -#define IDirect3DDevice9_GetPaletteEntries(p,a,b) (p)->GetPaletteEntries(a,b) -#define IDirect3DDevice9_SetCurrentTexturePalette(p,a) (p)->SetCurrentTexturePalette(a) -#define IDirect3DDevice9_GetCurrentTexturePalette(p,a) (p)->GetCurrentTexturePalette(a) -#define IDirect3DDevice9_SetScissorRect(p,a) (p)->SetScissorRect(a) -#define IDirect3DDevice9_GetScissorRect(p,a) (p)->GetScissorRect(a) -#define IDirect3DDevice9_SetSoftwareVertexProcessing(p,a) (p)->SetSoftwareVertexProcessing(a) -#define IDirect3DDevice9_GetSoftwareVertexProcessing(p) (p)->GetSoftwareVertexProcessing() -#define IDirect3DDevice9_SetNPatchMode(p,a) (p)->SetNPatchMode(a) -#define IDirect3DDevice9_GetNPatchMode(p) (p)->GetNPatchMode() -#define IDirect3DDevice9_DrawPrimitive(p,a,b,c) (p)->DrawPrimitive(a,b,c) -#define IDirect3DDevice9_DrawIndexedPrimitive(p,a,b,c,d,e,f) (p)->DrawIndexedPrimitive(a,b,c,d,e,f) -#define IDirect3DDevice9_DrawPrimitiveUP(p,a,b,c,d) (p)->DrawPrimitiveUP(a,b,c,d) -#define IDirect3DDevice9_DrawIndexedPrimitiveUP(p,a,b,c,d,e,f,g,h) (p)->DrawIndexedPrimitiveUP(a,b,c,d,e,f,g,h) -#define IDirect3DDevice9_ProcessVertices(p,a,b,c,d,e,f) (p)->ProcessVertices(a,b,c,d,e,f) -#define IDirect3DDevice9_CreateVertexDeclaration(p,a,b) (p)->CreateVertexDeclaration(a,b) -#define IDirect3DDevice9_SetVertexDeclaration(p,a) (p)->SetVertexDeclaration(a) -#define IDirect3DDevice9_GetVertexDeclaration(p,a) (p)->GetVertexDeclaration(a) -#define IDirect3DDevice9_SetFVF(p,a) (p)->SetFVF(a) -#define IDirect3DDevice9_GetFVF(p,a) (p)->GetFVF(a) -#define IDirect3DDevice9_CreateVertexShader(p,a,b) (p)->CreateVertexShader(a,b) -#define IDirect3DDevice9_SetVertexShader(p,a) (p)->SetVertexShader(a) -#define IDirect3DDevice9_GetVertexShader(p,a) (p)->GetVertexShader(a) -#define IDirect3DDevice9_SetVertexShaderConstantF(p,a,b,c) (p)->SetVertexShaderConstantF(a,b,c) -#define IDirect3DDevice9_GetVertexShaderConstantF(p,a,b,c) (p)->GetVertexShaderConstantF(a,b,c) -#define IDirect3DDevice9_SetVertexShaderConstantI(p,a,b,c) (p)->SetVertexShaderConstantI(a,b,c) -#define IDirect3DDevice9_GetVertexShaderConstantI(p,a,b,c) (p)->GetVertexShaderConstantI(a,b,c) -#define IDirect3DDevice9_SetVertexShaderConstantB(p,a,b,c) (p)->SetVertexShaderConstantB(a,b,c) -#define IDirect3DDevice9_GetVertexShaderConstantB(p,a,b,c) (p)->GetVertexShaderConstantB(a,b,c) -#define IDirect3DDevice9_SetStreamSource(p,a,b,c,d) (p)->SetStreamSource(a,b,c,d) -#define IDirect3DDevice9_GetStreamSource(p,a,b,c,d) (p)->GetStreamSource(a,b,c,d) -#define IDirect3DDevice9_SetStreamSourceFreq(p,a,b) (p)->SetStreamSourceFreq(a,b) -#define IDirect3DDevice9_GetStreamSourceFreq(p,a,b) (p)->GetStreamSourceFreq(a,b) -#define IDirect3DDevice9_SetIndices(p,a) (p)->SetIndices(a) -#define IDirect3DDevice9_GetIndices(p,a) (p)->GetIndices(a) -#define IDirect3DDevice9_CreatePixelShader(p,a,b) (p)->CreatePixelShader(a,b) -#define IDirect3DDevice9_SetPixelShader(p,a) (p)->SetPixelShader(a) -#define IDirect3DDevice9_GetPixelShader(p,a) (p)->GetPixelShader(a) -#define IDirect3DDevice9_SetPixelShaderConstantF(p,a,b,c) (p)->SetPixelShaderConstantF(a,b,c) -#define IDirect3DDevice9_GetPixelShaderConstantF(p,a,b,c) (p)->GetPixelShaderConstantF(a,b,c) -#define IDirect3DDevice9_SetPixelShaderConstantI(p,a,b,c) (p)->SetPixelShaderConstantI(a,b,c) -#define IDirect3DDevice9_GetPixelShaderConstantI(p,a,b,c) (p)->GetPixelShaderConstantI(a,b,c) -#define IDirect3DDevice9_SetPixelShaderConstantB(p,a,b,c) (p)->SetPixelShaderConstantB(a,b,c) -#define IDirect3DDevice9_GetPixelShaderConstantB(p,a,b,c) (p)->GetPixelShaderConstantB(a,b,c) -#define IDirect3DDevice9_DrawRectPatch(p,a,b,c) (p)->DrawRectPatch(a,b,c) -#define IDirect3DDevice9_DrawTriPatch(p,a,b,c) (p)->DrawTriPatch(a,b,c) -#define IDirect3DDevice9_DeletePatch(p,a) (p)->DeletePatch(a) -#define IDirect3DDevice9_CreateQuery(p,a,b) (p)->CreateQuery(a,b) - -struct IDirect3DDevice9Ex : public IDirect3DDevice9 -{ - virtual HRESULT WINAPI SetConvolutionMonoKernel(UINT width, UINT height, float *rows, float *columns) = 0; - virtual HRESULT WINAPI ComposeRects(IDirect3DSurface9 *pSrc, IDirect3DSurface9 *pDst, IDirect3DVertexBuffer9 *pSrcRectDescs, UINT NumRects, IDirect3DVertexBuffer9 *pDstRectDescs, D3DCOMPOSERECTSOP Operation, int Xoffset, int Yoffset) = 0; - virtual HRESULT WINAPI PresentEx(const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion, DWORD dwFlags) = 0; - virtual HRESULT WINAPI GetGPUThreadPriority(INT *pPriority) = 0; - virtual HRESULT WINAPI SetGPUThreadPriority(INT Priority) = 0; - virtual HRESULT WINAPI WaitForVBlank(UINT iSwapChain) = 0; - virtual HRESULT WINAPI CheckResourceResidency(IDirect3DResource9 **pResourceArray, UINT32 NumResources) = 0; - virtual HRESULT WINAPI SetMaximumFrameLatency(UINT MaxLatency) = 0; - virtual HRESULT WINAPI GetMaximumFrameLatency(UINT *pMaxLatency) = 0; - virtual HRESULT WINAPI CheckDeviceState(HWND hDestinationWindow) = 0; - virtual HRESULT WINAPI CreateRenderTargetEx(UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Lockable, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle, DWORD Usage) = 0; - virtual HRESULT WINAPI CreateOffscreenPlainSurfaceEx(UINT Width, UINT Height, D3DFORMAT Format, D3DPOOL Pool, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle, DWORD Usage) = 0; - virtual HRESULT WINAPI CreateDepthStencilSurfaceEx(UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Discard, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle, DWORD Usage) = 0; - virtual HRESULT WINAPI ResetEx(D3DPRESENT_PARAMETERS *pPresentationParameters, D3DDISPLAYMODEEX *pFullscreenDisplayMode) = 0; - virtual HRESULT WINAPI GetDisplayModeEx(UINT iSwapChain, D3DDISPLAYMODEEX *pMode, D3DDISPLAYROTATION *pRotation) = 0; -}; - -struct IDirect3DDevice9Video : public IUnknown -{ - virtual HRESULT WINAPI GetContentProtectionCaps(const GUID *pCryptoType, const GUID *pDecodeProfile, D3DCONTENTPROTECTIONCAPS *pCaps) = 0; - virtual HRESULT WINAPI CreateAuthenticatedChannel(D3DAUTHENTICATEDCHANNELTYPE ChannelType, IDirect3DAuthenticatedChannel9 **ppAuthenticatedChannel, HANDLE *pChannelHandle) = 0; - virtual HRESULT WINAPI CreateCryptoSession(const GUID *pCryptoType, const GUID *pDecodeProfile, IDirect3DCryptoSession9 **ppCryptoSession, HANDLE *pCryptoHandle) = 0; -}; - -struct IDirect3DIndexBuffer9 : public IDirect3DResource9 -{ - virtual HRESULT WINAPI Lock(UINT OffsetToLock, UINT SizeToLock, void **ppbData, DWORD Flags) = 0; - virtual HRESULT WINAPI Unlock() = 0; - virtual HRESULT WINAPI GetDesc(D3DINDEXBUFFER_DESC *pDesc) = 0; -}; - -struct IDirect3DPixelShader9 : public IUnknown -{ - virtual HRESULT WINAPI GetDevice(IDirect3DDevice9 **ppDevice) = 0; - virtual HRESULT WINAPI GetFunction(void *pData, UINT *pSizeOfData) = 0; -}; - -struct IDirect3DQuery9 : public IUnknown -{ - virtual HRESULT WINAPI GetDevice(IDirect3DDevice9 **ppDevice) = 0; - virtual D3DQUERYTYPE WINAPI GetType() = 0; - virtual DWORD WINAPI GetDataSize() = 0; - virtual HRESULT WINAPI Issue(DWORD dwIssueFlags) = 0; - virtual HRESULT WINAPI GetData(void *pData, DWORD dwSize, DWORD dwGetDataFlags) = 0; -}; - -struct IDirect3DStateBlock9 : public IUnknown -{ - virtual HRESULT WINAPI GetDevice(IDirect3DDevice9 **ppDevice) = 0; - virtual HRESULT WINAPI Capture() = 0; - virtual HRESULT WINAPI Apply() = 0; -}; - -struct IDirect3DSurface9 : public IDirect3DResource9 -{ - virtual HRESULT WINAPI GetContainer(REFIID riid, void **ppContainer) = 0; - virtual HRESULT WINAPI GetDesc(D3DSURFACE_DESC *pDesc) = 0; - virtual HRESULT WINAPI LockRect(D3DLOCKED_RECT *pLockedRect, const RECT *pRect, DWORD Flags) = 0; - virtual HRESULT WINAPI UnlockRect() = 0; - virtual HRESULT WINAPI GetDC(HDC *phdc) = 0; - virtual HRESULT WINAPI ReleaseDC(HDC hdc) = 0; -}; - -/*** IUnknown methods ***/ -#define IDirect3DSurface9_QueryInterface(p,a,b) (p)->QueryInterface(a,b) -#define IDirect3DSurface9_AddRef(p) (p)->AddRef() -#define IDirect3DSurface9_Release(p) (p)->Release() -/*** IDirect3DSurface9 methods: IDirect3DResource9 ***/ -#define IDirect3DSurface9_GetDevice(p,a) (p)->GetDevice(a) -#define IDirect3DSurface9_SetPrivateData(p,a,b,c,d) (p)->SetPrivateData(a,b,c,d) -#define IDirect3DSurface9_GetPrivateData(p,a,b,c) (p)->GetPrivateData(a,b,c) -#define IDirect3DSurface9_FreePrivateData(p,a) (p)->FreePrivateData(a) -#define IDirect3DSurface9_SetPriority(p,a) (p)->SetPriority(a) -#define IDirect3DSurface9_GetPriority(p) (p)->GetPriority() -#define IDirect3DSurface9_PreLoad(p) (p)->PreLoad() -#define IDirect3DSurface9_GetType(p) (p)->GetType() -/*** IDirect3DSurface9 methods ***/ -#define IDirect3DSurface9_GetContainer(p,a,b) (p)->GetContainer(a,b) -#define IDirect3DSurface9_GetDesc(p,a) (p)->GetDesc(a) -#define IDirect3DSurface9_LockRect(p,a,b,c) (p)->LockRect(a,b,c) -#define IDirect3DSurface9_UnlockRect(p) (p)->UnlockRect() -#define IDirect3DSurface9_GetDC(p,a) (p)->GetDC(a) -#define IDirect3DSurface9_ReleaseDC(p,a) (p)->ReleaseDC(a) - -struct IDirect3DSwapChain9 : public IUnknown -{ - virtual HRESULT WINAPI Present(const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion, DWORD dwFlags) = 0; - virtual HRESULT WINAPI GetFrontBufferData(IDirect3DSurface9 *pDestSurface) = 0; - virtual HRESULT WINAPI GetBackBuffer(UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, IDirect3DSurface9 **ppBackBuffer) = 0; - virtual HRESULT WINAPI GetRasterStatus(D3DRASTER_STATUS *pRasterStatus) = 0; - virtual HRESULT WINAPI GetDisplayMode(D3DDISPLAYMODE *pMode) = 0; - virtual HRESULT WINAPI GetDevice(IDirect3DDevice9 **ppDevice) = 0; - virtual HRESULT WINAPI GetPresentParameters(D3DPRESENT_PARAMETERS *pPresentationParameters) = 0; -}; - -struct IDirect3DSwapChain9Ex : public IDirect3DSwapChain9 -{ - virtual HRESULT WINAPI GetLastPresentCount(UINT *pLastPresentCount) = 0; - virtual HRESULT WINAPI GetPresentStats(D3DPRESENTSTATS *pPresentationStatistics) = 0; - virtual HRESULT WINAPI GetDisplayModeEx(D3DDISPLAYMODEEX *pMode, D3DDISPLAYROTATION *pRotation) = 0; -}; - -struct IDirect3DTexture9 : public IDirect3DBaseTexture9 -{ - virtual HRESULT WINAPI GetLevelDesc(UINT Level, D3DSURFACE_DESC *pDesc) = 0; - virtual HRESULT WINAPI GetSurfaceLevel(UINT Level, IDirect3DSurface9 **ppSurfaceLevel) = 0; - virtual HRESULT WINAPI LockRect(UINT Level, D3DLOCKED_RECT *pLockedRect, const RECT *pRect, DWORD Flags) = 0; - virtual HRESULT WINAPI UnlockRect(UINT Level) = 0; - virtual HRESULT WINAPI AddDirtyRect(const RECT *pDirtyRect) = 0; -}; - -struct IDirect3DVertexBuffer9 : public IDirect3DResource9 -{ - virtual HRESULT WINAPI Lock(UINT OffsetToLock, UINT SizeToLock, void **ppbData, DWORD Flags) = 0; - virtual HRESULT WINAPI Unlock() = 0; - virtual HRESULT WINAPI GetDesc(D3DVERTEXBUFFER_DESC *pDesc) = 0; -}; - -struct IDirect3DVertexDeclaration9 : public IUnknown -{ - virtual HRESULT WINAPI GetDevice(IDirect3DDevice9 **ppDevice) = 0; - virtual HRESULT WINAPI GetDeclaration(D3DVERTEXELEMENT9 *pElement, UINT *pNumElements) = 0; -}; - -struct IDirect3DVertexShader9 : public IUnknown -{ - virtual HRESULT WINAPI GetDevice(IDirect3DDevice9 **ppDevice) = 0; - virtual HRESULT WINAPI GetFunction(void *pData, UINT *pSizeOfData) = 0; -}; - -struct IDirect3DVolume9 : public IUnknown -{ - virtual HRESULT WINAPI GetDevice(IDirect3DDevice9 **ppDevice) = 0; - virtual HRESULT WINAPI SetPrivateData(REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags) = 0; - virtual HRESULT WINAPI GetPrivateData(REFGUID refguid, void *pData, DWORD *pSizeOfData) = 0; - virtual HRESULT WINAPI FreePrivateData(REFGUID refguid) = 0; - virtual HRESULT WINAPI GetContainer(REFIID riid, void **ppContainer) = 0; - virtual HRESULT WINAPI GetDesc(D3DVOLUME_DESC *pDesc) = 0; - virtual HRESULT WINAPI LockBox(D3DLOCKED_BOX *pLockedVolume, const D3DBOX *pBox, DWORD Flags) = 0; - virtual HRESULT WINAPI UnlockBox() = 0; -}; - -struct IDirect3DVolumeTexture9 : public IDirect3DBaseTexture9 -{ - virtual HRESULT WINAPI GetLevelDesc(UINT Level, D3DVOLUME_DESC *pDesc) = 0; - virtual HRESULT WINAPI GetVolumeLevel(UINT Level, IDirect3DVolume9 **ppVolumeLevel) = 0; - virtual HRESULT WINAPI LockBox(UINT Level, D3DLOCKED_BOX *pLockedVolume, const D3DBOX *pBox, DWORD Flags) = 0; - virtual HRESULT WINAPI UnlockBox(UINT Level) = 0; - virtual HRESULT WINAPI AddDirtyBox(const D3DBOX *pDirtyBox) = 0; -}; - - -#else /* __cplusplus */ - -extern const GUID IID_IDirect3D9; -extern const GUID IID_IDirect3D9Ex; -extern const GUID IID_IDirect3D9ExOverlayExtension; -extern const GUID IID_IDirect3DAuthenticatedChannel9; -extern const GUID IID_IDirect3DBaseTexture9; -extern const GUID IID_IDirect3DCryptoSession9; -extern const GUID IID_IDirect3DCubeTexture9; -extern const GUID IID_IDirect3DDevice9; -extern const GUID IID_IDirect3DDevice9Ex; -extern const GUID IID_IDirect3DDevice9Video; -extern const GUID IID_IDirect3DIndexBuffer9; -extern const GUID IID_IDirect3DPixelShader9; -extern const GUID IID_IDirect3DQuery9; -extern const GUID IID_IDirect3DResource9; -extern const GUID IID_IDirect3DStateBlock9; -extern const GUID IID_IDirect3DSurface9; -extern const GUID IID_IDirect3DSwapChain9; -extern const GUID IID_IDirect3DSwapChain9Ex; -extern const GUID IID_IDirect3DTexture9; -extern const GUID IID_IDirect3DVertexBuffer9; -extern const GUID IID_IDirect3DVertexDeclaration9; -extern const GUID IID_IDirect3DVertexShader9; -extern const GUID IID_IDirect3DVolume9; -extern const GUID IID_IDirect3DVolumeTexture9; - -typedef struct IDirect3D9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3D9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3D9 *This); - ULONG (WINAPI *Release)(IDirect3D9 *This); - /* IDirect3D9 */ - HRESULT (WINAPI *RegisterSoftwareDevice)(IDirect3D9 *This, void *pInitializeFunction); - UINT (WINAPI *GetAdapterCount)(IDirect3D9 *This); - HRESULT (WINAPI *GetAdapterIdentifier)(IDirect3D9 *This, UINT Adapter, DWORD Flags, D3DADAPTER_IDENTIFIER9 *pIdentifier); - UINT (WINAPI *GetAdapterModeCount)(IDirect3D9 *This, UINT Adapter, D3DFORMAT Format); - HRESULT (WINAPI *EnumAdapterModes)(IDirect3D9 *This, UINT Adapter, D3DFORMAT Format, UINT Mode, D3DDISPLAYMODE *pMode); - HRESULT (WINAPI *GetAdapterDisplayMode)(IDirect3D9 *This, UINT Adapter, D3DDISPLAYMODE *pMode); - HRESULT (WINAPI *CheckDeviceType)(IDirect3D9 *This, UINT Adapter, D3DDEVTYPE DevType, D3DFORMAT AdapterFormat, D3DFORMAT BackBufferFormat, BOOL bWindowed); - HRESULT (WINAPI *CheckDeviceFormat)(IDirect3D9 *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT AdapterFormat, DWORD Usage, D3DRESOURCETYPE RType, D3DFORMAT CheckFormat); - HRESULT (WINAPI *CheckDeviceMultiSampleType)(IDirect3D9 *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT SurfaceFormat, BOOL Windowed, D3DMULTISAMPLE_TYPE MultiSampleType, DWORD *pQualityLevels); - HRESULT (WINAPI *CheckDepthStencilMatch)(IDirect3D9 *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT AdapterFormat, D3DFORMAT RenderTargetFormat, D3DFORMAT DepthStencilFormat); - HRESULT (WINAPI *CheckDeviceFormatConversion)(IDirect3D9 *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT SourceFormat, D3DFORMAT TargetFormat); - HRESULT (WINAPI *GetDeviceCaps)(IDirect3D9 *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DCAPS9 *pCaps); - HMONITOR (WINAPI *GetAdapterMonitor)(IDirect3D9 *This, UINT Adapter); - HRESULT (WINAPI *CreateDevice)(IDirect3D9 *This, UINT Adapter, D3DDEVTYPE DeviceType, HWND hFocusWindow, DWORD BehaviorFlags, D3DPRESENT_PARAMETERS *pPresentationParameters, IDirect3DDevice9 **ppReturnedDeviceInterface); -} IDirect3D9Vtbl; -struct IDirect3D9 -{ - IDirect3D9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3D9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3D9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3D9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3D9 macros */ -#define IDirect3D9_RegisterSoftwareDevice(p,a) (p)->lpVtbl->RegisterSoftwareDevice(p,a) -#define IDirect3D9_GetAdapterCount(p) (p)->lpVtbl->GetAdapterCount(p) -#define IDirect3D9_GetAdapterIdentifier(p,a,b,c) (p)->lpVtbl->GetAdapterIdentifier(p,a,b,c) -#define IDirect3D9_GetAdapterModeCount(p,a,b) (p)->lpVtbl->GetAdapterModeCount(p,a,b) -#define IDirect3D9_EnumAdapterModes(p,a,b,c,d) (p)->lpVtbl->EnumAdapterModes(p,a,b,c,d) -#define IDirect3D9_GetAdapterDisplayMode(p,a,b) (p)->lpVtbl->GetAdapterDisplayMode(p,a,b) -#define IDirect3D9_CheckDeviceType(p,a,b,c,d,e) (p)->lpVtbl->CheckDeviceType(p,a,b,c,d,e) -#define IDirect3D9_CheckDeviceFormat(p,a,b,c,d,e,f) (p)->lpVtbl->CheckDeviceFormat(p,a,b,c,d,e,f) -#define IDirect3D9_CheckDeviceMultiSampleType(p,a,b,c,d,e,f) (p)->lpVtbl->CheckDeviceMultiSampleType(p,a,b,c,d,e,f) -#define IDirect3D9_CheckDepthStencilMatch(p,a,b,c,d,e) (p)->lpVtbl->CheckDepthStencilMatch(p,a,b,c,d,e) -#define IDirect3D9_CheckDeviceFormatConversion(p,a,b,c,d) (p)->lpVtbl->CheckDeviceFormatConversion(p,a,b,c,d) -#define IDirect3D9_GetDeviceCaps(p,a,b,c) (p)->lpVtbl->GetDeviceCaps(p,a,b,c) -#define IDirect3D9_GetAdapterMonitor(p,a) (p)->lpVtbl->GetAdapterMonitor(p,a) -#define IDirect3D9_CreateDevice(p,a,b,c,d,e,f) (p)->lpVtbl->CreateDevice(p,a,b,c,d,e,f) - -typedef struct IDirect3D9ExVtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3D9Ex *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3D9Ex *This); - ULONG (WINAPI *Release)(IDirect3D9Ex *This); - /* IDirect3D9 */ - HRESULT (WINAPI *RegisterSoftwareDevice)(IDirect3D9Ex *This, void *pInitializeFunction); - UINT (WINAPI *GetAdapterCount)(IDirect3D9Ex *This); - HRESULT (WINAPI *GetAdapterIdentifier)(IDirect3D9Ex *This, UINT Adapter, DWORD Flags, D3DADAPTER_IDENTIFIER9 *pIdentifier); - UINT (WINAPI *GetAdapterModeCount)(IDirect3D9Ex *This, UINT Adapter, D3DFORMAT Format); - HRESULT (WINAPI *EnumAdapterModes)(IDirect3D9Ex *This, UINT Adapter, D3DFORMAT Format, UINT Mode, D3DDISPLAYMODE *pMode); - HRESULT (WINAPI *GetAdapterDisplayMode)(IDirect3D9Ex *This, UINT Adapter, D3DDISPLAYMODE *pMode); - HRESULT (WINAPI *CheckDeviceType)(IDirect3D9Ex *This, UINT Adapter, D3DDEVTYPE DevType, D3DFORMAT AdapterFormat, D3DFORMAT BackBufferFormat, BOOL bWindowed); - HRESULT (WINAPI *CheckDeviceFormat)(IDirect3D9Ex *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT AdapterFormat, DWORD Usage, D3DRESOURCETYPE RType, D3DFORMAT CheckFormat); - HRESULT (WINAPI *CheckDeviceMultiSampleType)(IDirect3D9Ex *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT SurfaceFormat, BOOL Windowed, D3DMULTISAMPLE_TYPE MultiSampleType, DWORD *pQualityLevels); - HRESULT (WINAPI *CheckDepthStencilMatch)(IDirect3D9Ex *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT AdapterFormat, D3DFORMAT RenderTargetFormat, D3DFORMAT DepthStencilFormat); - HRESULT (WINAPI *CheckDeviceFormatConversion)(IDirect3D9Ex *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DFORMAT SourceFormat, D3DFORMAT TargetFormat); - HRESULT (WINAPI *GetDeviceCaps)(IDirect3D9Ex *This, UINT Adapter, D3DDEVTYPE DeviceType, D3DCAPS9 *pCaps); - HMONITOR (WINAPI *GetAdapterMonitor)(IDirect3D9Ex *This, UINT Adapter); - HRESULT (WINAPI *CreateDevice)(IDirect3D9Ex *This, UINT Adapter, D3DDEVTYPE DeviceType, HWND hFocusWindow, DWORD BehaviorFlags, D3DPRESENT_PARAMETERS *pPresentationParameters, IDirect3DDevice9 **ppReturnedDeviceInterface); - /* IDirect3D9Ex */ - UINT (WINAPI *GetAdapterModeCountEx)(IDirect3D9Ex *This, UINT Adapter, const D3DDISPLAYMODEFILTER *pFilter); - HRESULT (WINAPI *EnumAdapterModesEx)(IDirect3D9Ex *This, UINT Adapter, const D3DDISPLAYMODEFILTER *pFilter, UINT Mode, D3DDISPLAYMODEEX *pMode); - HRESULT (WINAPI *GetAdapterDisplayModeEx)(IDirect3D9Ex *This, UINT Adapter, D3DDISPLAYMODEEX *pMode, D3DDISPLAYROTATION *pRotation); - HRESULT (WINAPI *CreateDeviceEx)(IDirect3D9Ex *This, UINT Adapter, D3DDEVTYPE DeviceType, HWND hFocusWindow, DWORD BehaviorFlags, D3DPRESENT_PARAMETERS *pPresentationParameters, D3DDISPLAYMODEEX *pFullscreenDisplayMode, IDirect3DDevice9Ex **ppReturnedDeviceInterface); - HRESULT (WINAPI *GetAdapterLUID)(IDirect3D9Ex *This, UINT Adapter, LUID *pLUID); -} IDirect3D9ExVtbl; -struct IDirect3D9Ex -{ - IDirect3D9ExVtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3D9Ex_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3D9Ex_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3D9Ex_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3D9 macros */ -#define IDirect3D9Ex_RegisterSoftwareDevice(p,a) (p)->lpVtbl->RegisterSoftwareDevice(p,a) -#define IDirect3D9Ex_GetAdapterCount(p) (p)->lpVtbl->GetAdapterCount(p) -#define IDirect3D9Ex_GetAdapterIdentifier(p,a,b,c) (p)->lpVtbl->GetAdapterIdentifier(p,a,b,c) -#define IDirect3D9Ex_GetAdapterModeCount(p,a,b) (p)->lpVtbl->GetAdapterModeCount(p,a,b) -#define IDirect3D9Ex_EnumAdapterModes(p,a,b,c,d) (p)->lpVtbl->EnumAdapterModes(p,a,b,c,d) -#define IDirect3D9Ex_GetAdapterDisplayMode(p,a,b) (p)->lpVtbl->GetAdapterDisplayMode(p,a,b) -#define IDirect3D9Ex_CheckDeviceType(p,a,b,c,d,e) (p)->lpVtbl->CheckDeviceType(p,a,b,c,d,e) -#define IDirect3D9Ex_CheckDeviceFormat(p,a,b,c,d,e,f) (p)->lpVtbl->CheckDeviceFormat(p,a,b,c,d,e,f) -#define IDirect3D9Ex_CheckDeviceMultiSampleType(p,a,b,c,d,e,f) (p)->lpVtbl->CheckDeviceMultiSampleType(p,a,b,c,d,e,f) -#define IDirect3D9Ex_CheckDepthStencilMatch(p,a,b,c,d,e) (p)->lpVtbl->CheckDepthStencilMatch(p,a,b,c,d,e) -#define IDirect3D9Ex_CheckDeviceFormatConversion(p,a,b,c,d) (p)->lpVtbl->CheckDeviceFormatConversion(p,a,b,c,d) -#define IDirect3D9Ex_GetDeviceCaps(p,a,b,c) (p)->lpVtbl->GetDeviceCaps(p,a,b,c) -#define IDirect3D9Ex_GetAdapterMonitor(p,a) (p)->lpVtbl->GetAdapterMonitor(p,a) -#define IDirect3D9Ex_CreateDevice(p,a,b,c,d,e,f) (p)->lpVtbl->CreateDevice(p,a,b,c,d,e,f) -/* IDirect3D9Ex macros */ -#define IDirect3D9Ex_GetAdapterModeCountEx(p,a,b) (p)->lpVtbl->GetAdapterModeCountEx(p,a,b) -#define IDirect3D9Ex_EnumAdapterModesEx(p,a,b,c,d) (p)->lpVtbl->EnumAdapterModesEx(p,a,b,c,d) -#define IDirect3D9Ex_GetAdapterDisplayModeEx(p,a,b,c) (p)->lpVtbl->GetAdapterDisplayModeEx(p,a,b,c) -#define IDirect3D9Ex_CreateDeviceEx(p,a,b,c,d,e,f,g) (p)->lpVtbl->CreateDeviceEx(p,a,b,c,d,e,f,g) -#define IDirect3D9Ex_GetAdapterLUID(p,a,b) (p)->lpVtbl->GetAdapterLUID(p,a,b) - -typedef struct IDirect3D9ExOverlayExtensionVtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3D9ExOverlayExtension *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3D9ExOverlayExtension *This); - ULONG (WINAPI *Release)(IDirect3D9ExOverlayExtension *This); - /* IDirect3D9ExOverlayExtension */ - HRESULT (WINAPI *CheckDeviceOverlayType)(IDirect3D9ExOverlayExtension *This, UINT Adapter, D3DDEVTYPE DevType, UINT OverlayWidth, UINT OverlayHeight, D3DFORMAT OverlayFormat, D3DDISPLAYMODEEX *pDisplayMode, D3DDISPLAYROTATION DisplayRotation, D3DOVERLAYCAPS *pOverlayCaps); -} IDirect3D9ExOverlayExtensionVtbl; -struct IDirect3D9ExOverlayExtension -{ - IDirect3D9ExOverlayExtensionVtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3D9ExOverlayExtension_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3D9ExOverlayExtension_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3D9ExOverlayExtension_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3D9ExOverlayExtension macros */ -#define IDirect3D9ExOverlayExtension_CheckDeviceOverlayType(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->CheckDeviceOverlayType(p,a,b,c,d,e,f,g,h) - -typedef struct IDirect3DAuthenticatedChannel9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DAuthenticatedChannel9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DAuthenticatedChannel9 *This); - ULONG (WINAPI *Release)(IDirect3DAuthenticatedChannel9 *This); - /* IDirect3DAuthenticatedChannel9 */ - HRESULT (WINAPI *GetCertificateSize)(IDirect3DAuthenticatedChannel9 *This, UINT *pCertificateSize); - HRESULT (WINAPI *GetCertificate)(IDirect3DAuthenticatedChannel9 *This, UINT CertifacteSize, BYTE *ppCertificate); - HRESULT (WINAPI *NegotiateKeyExchange)(IDirect3DAuthenticatedChannel9 *This, UINT DataSize, void *pData); - HRESULT (WINAPI *Query)(IDirect3DAuthenticatedChannel9 *This, UINT InputSize, const void *pInput, UINT OutputSize, void *pOutput); - HRESULT (WINAPI *Configure)(IDirect3DAuthenticatedChannel9 *This, UINT InputSize, const void *pInput, D3DAUTHENTICATEDCHANNEL_CONFIGURE_OUTPUT *pOutput); -} IDirect3DAuthenticatedChannel9Vtbl; -struct IDirect3DAuthenticatedChannel9 -{ - IDirect3DAuthenticatedChannel9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DAuthenticatedChannel9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DAuthenticatedChannel9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DAuthenticatedChannel9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DAuthenticatedChannel9 macros */ -#define IDirect3DAuthenticatedChannel9_GetCertificateSize(p,a) (p)->lpVtbl->GetCertificateSize(p,a) -#define IDirect3DAuthenticatedChannel9_GetCertificate(p,a,b) (p)->lpVtbl->GetCertificate(p,a,b) -#define IDirect3DAuthenticatedChannel9_NegotiateKeyExchange(p,a,b) (p)->lpVtbl->NegotiateKeyExchange(p,a,b) -#define IDirect3DAuthenticatedChannel9_Query(p,a,b,c,d) (p)->lpVtbl->Query(p,a,b,c,d) -#define IDirect3DAuthenticatedChannel9_Configure(p,a,b,c) (p)->lpVtbl->Configure(p,a,b,c) - -typedef struct IDirect3DBaseTexture9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DBaseTexture9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DBaseTexture9 *This); - ULONG (WINAPI *Release)(IDirect3DBaseTexture9 *This); - /* IDirect3DResource9 */ - HRESULT (WINAPI *GetDevice)(IDirect3DBaseTexture9 *This, IDirect3DDevice9 **ppDevice); - HRESULT (WINAPI *SetPrivateData)(IDirect3DBaseTexture9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); - HRESULT (WINAPI *GetPrivateData)(IDirect3DBaseTexture9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); - HRESULT (WINAPI *FreePrivateData)(IDirect3DBaseTexture9 *This, REFGUID refguid); - DWORD (WINAPI *SetPriority)(IDirect3DBaseTexture9 *This, DWORD PriorityNew); - DWORD (WINAPI *GetPriority)(IDirect3DBaseTexture9 *This); - void (WINAPI *PreLoad)(IDirect3DBaseTexture9 *This); - D3DRESOURCETYPE (WINAPI *GetType)(IDirect3DBaseTexture9 *This); - /* IDirect3DBaseTexture9 */ - DWORD (WINAPI *SetLOD)(IDirect3DBaseTexture9 *This, DWORD LODNew); - DWORD (WINAPI *GetLOD)(IDirect3DBaseTexture9 *This); - DWORD (WINAPI *GetLevelCount)(IDirect3DBaseTexture9 *This); - HRESULT (WINAPI *SetAutoGenFilterType)(IDirect3DBaseTexture9 *This, D3DTEXTUREFILTERTYPE FilterType); - D3DTEXTUREFILTERTYPE (WINAPI *GetAutoGenFilterType)(IDirect3DBaseTexture9 *This); - void (WINAPI *GenerateMipSubLevels)(IDirect3DBaseTexture9 *This); -} IDirect3DBaseTexture9Vtbl; -struct IDirect3DBaseTexture9 -{ - IDirect3DBaseTexture9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DBaseTexture9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DBaseTexture9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DBaseTexture9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DResource9 macros */ -#define IDirect3DBaseTexture9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) -#define IDirect3DBaseTexture9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) -#define IDirect3DBaseTexture9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) -#define IDirect3DBaseTexture9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) -#define IDirect3DBaseTexture9_SetPriority(p,a) (p)->lpVtbl->SetPriority(p,a) -#define IDirect3DBaseTexture9_GetPriority(p) (p)->lpVtbl->GetPriority(p) -#define IDirect3DBaseTexture9_PreLoad(p) (p)->lpVtbl->PreLoad(p) -#define IDirect3DBaseTexture9_GetType(p) (p)->lpVtbl->GetType(p) -/* IDirect3DBaseTexture9 macros */ -#define IDirect3DBaseTexture9_SetLOD(p,a) (p)->lpVtbl->SetLOD(p,a) -#define IDirect3DBaseTexture9_GetLOD(p) (p)->lpVtbl->GetLOD(p) -#define IDirect3DBaseTexture9_GetLevelCount(p) (p)->lpVtbl->GetLevelCount(p) -#define IDirect3DBaseTexture9_SetAutoGenFilterType(p,a) (p)->lpVtbl->SetAutoGenFilterType(p,a) -#define IDirect3DBaseTexture9_GetAutoGenFilterType(p) (p)->lpVtbl->GetAutoGenFilterType(p) -#define IDirect3DBaseTexture9_GenerateMipSubLevels(p) (p)->lpVtbl->GenerateMipSubLevels(p) - -typedef struct IDirect3DCryptoSession9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DCryptoSession9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DCryptoSession9 *This); - ULONG (WINAPI *Release)(IDirect3DCryptoSession9 *This); - /* IDirect3DCryptoSession9 */ - HRESULT (WINAPI *GetCertificateSize)(IDirect3DCryptoSession9 *This, UINT *pCertificateSize); - HRESULT (WINAPI *GetCertificate)(IDirect3DCryptoSession9 *This, UINT CertifacteSize, BYTE *ppCertificate); - HRESULT (WINAPI *NegotiateKeyExchange)(IDirect3DCryptoSession9 *This, UINT DataSize, void *pData); - HRESULT (WINAPI *EncryptionBlt)(IDirect3DCryptoSession9 *This, IDirect3DSurface9 *pSrcSurface, IDirect3DSurface9 *pDstSurface, UINT DstSurfaceSize, void *pIV); - HRESULT (WINAPI *DecryptionBlt)(IDirect3DCryptoSession9 *This, IDirect3DSurface9 *pSrcSurface, IDirect3DSurface9 *pDstSurface, UINT SrcSurfaceSize, D3DENCRYPTED_BLOCK_INFO *pEncryptedBlockInfo, void *pContentKey, void *pIV); - HRESULT (WINAPI *GetSurfacePitch)(IDirect3DCryptoSession9 *This, IDirect3DSurface9 *pSrcSurface, UINT *pSurfacePitch); - HRESULT (WINAPI *StartSessionKeyRefresh)(IDirect3DCryptoSession9 *This, void *pRandomNumber, UINT RandomNumberSize); - HRESULT (WINAPI *FinishSessionKeyRefresh)(IDirect3DCryptoSession9 *This); - HRESULT (WINAPI *GetEncryptionBltKey)(IDirect3DCryptoSession9 *This, void *pReadbackKey, UINT KeySize); -} IDirect3DCryptoSession9Vtbl; -struct IDirect3DCryptoSession9 -{ - IDirect3DCryptoSession9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DCryptoSession9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DCryptoSession9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DCryptoSession9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DCryptoSession9 macros */ -#define IDirect3DCryptoSession9_GetCertificateSize(p,a) (p)->lpVtbl->GetCertificateSize(p,a) -#define IDirect3DCryptoSession9_GetCertificate(p,a,b) (p)->lpVtbl->GetCertificate(p,a,b) -#define IDirect3DCryptoSession9_NegotiateKeyExchange(p,a,b) (p)->lpVtbl->NegotiateKeyExchange(p,a,b) -#define IDirect3DCryptoSession9_EncryptionBlt(p,a,b,c,d) (p)->lpVtbl->EncryptionBlt(p,a,b,c,d) -#define IDirect3DCryptoSession9_DecryptionBlt(p,a,b,c,d,e,f) (p)->lpVtbl->DecryptionBlt(p,a,b,c,d,e,f) -#define IDirect3DCryptoSession9_GetSurfacePitch(p,a,b) (p)->lpVtbl->GetSurfacePitch(p,a,b) -#define IDirect3DCryptoSession9_StartSessionKeyRefresh(p,a,b) (p)->lpVtbl->StartSessionKeyRefresh(p,a,b) -#define IDirect3DCryptoSession9_FinishSessionKeyRefresh(p) (p)->lpVtbl->FinishSessionKeyRefresh(p) -#define IDirect3DCryptoSession9_GetEncryptionBltKey(p,a,b) (p)->lpVtbl->GetEncryptionBltKey(p,a,b) - -typedef struct IDirect3DCubeTexture9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DCubeTexture9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DCubeTexture9 *This); - ULONG (WINAPI *Release)(IDirect3DCubeTexture9 *This); - /* IDirect3DResource9 */ - HRESULT (WINAPI *GetDevice)(IDirect3DCubeTexture9 *This, IDirect3DDevice9 **ppDevice); - HRESULT (WINAPI *SetPrivateData)(IDirect3DCubeTexture9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); - HRESULT (WINAPI *GetPrivateData)(IDirect3DCubeTexture9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); - HRESULT (WINAPI *FreePrivateData)(IDirect3DCubeTexture9 *This, REFGUID refguid); - DWORD (WINAPI *SetPriority)(IDirect3DCubeTexture9 *This, DWORD PriorityNew); - DWORD (WINAPI *GetPriority)(IDirect3DCubeTexture9 *This); - void (WINAPI *PreLoad)(IDirect3DCubeTexture9 *This); - D3DRESOURCETYPE (WINAPI *GetType)(IDirect3DCubeTexture9 *This); - /* IDirect3DBaseTexture9 */ - DWORD (WINAPI *SetLOD)(IDirect3DCubeTexture9 *This, DWORD LODNew); - DWORD (WINAPI *GetLOD)(IDirect3DCubeTexture9 *This); - DWORD (WINAPI *GetLevelCount)(IDirect3DCubeTexture9 *This); - HRESULT (WINAPI *SetAutoGenFilterType)(IDirect3DCubeTexture9 *This, D3DTEXTUREFILTERTYPE FilterType); - D3DTEXTUREFILTERTYPE (WINAPI *GetAutoGenFilterType)(IDirect3DCubeTexture9 *This); - void (WINAPI *GenerateMipSubLevels)(IDirect3DCubeTexture9 *This); - /* IDirect3DCubeTexture9 */ - HRESULT (WINAPI *GetLevelDesc)(IDirect3DCubeTexture9 *This, UINT Level, D3DSURFACE_DESC *pDesc); - HRESULT (WINAPI *GetCubeMapSurface)(IDirect3DCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, UINT Level, IDirect3DSurface9 **ppCubeMapSurface); - HRESULT (WINAPI *LockRect)(IDirect3DCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, UINT Level, D3DLOCKED_RECT *pLockedRect, const RECT *pRect, DWORD Flags); - HRESULT (WINAPI *UnlockRect)(IDirect3DCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, UINT Level); - HRESULT (WINAPI *AddDirtyRect)(IDirect3DCubeTexture9 *This, D3DCUBEMAP_FACES FaceType, const RECT *pDirtyRect); -} IDirect3DCubeTexture9Vtbl; -struct IDirect3DCubeTexture9 -{ - IDirect3DCubeTexture9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DCubeTexture9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DCubeTexture9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DCubeTexture9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DResource9 macros */ -#define IDirect3DCubeTexture9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) -#define IDirect3DCubeTexture9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) -#define IDirect3DCubeTexture9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) -#define IDirect3DCubeTexture9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) -#define IDirect3DCubeTexture9_SetPriority(p,a) (p)->lpVtbl->SetPriority(p,a) -#define IDirect3DCubeTexture9_GetPriority(p) (p)->lpVtbl->GetPriority(p) -#define IDirect3DCubeTexture9_PreLoad(p) (p)->lpVtbl->PreLoad(p) -#define IDirect3DCubeTexture9_GetType(p) (p)->lpVtbl->GetType(p) -/* IDirect3DBaseTexture9 macros */ -#define IDirect3DCubeTexture9_SetLOD(p,a) (p)->lpVtbl->SetLOD(p,a) -#define IDirect3DCubeTexture9_GetLOD(p) (p)->lpVtbl->GetLOD(p) -#define IDirect3DCubeTexture9_GetLevelCount(p) (p)->lpVtbl->GetLevelCount(p) -#define IDirect3DCubeTexture9_SetAutoGenFilterType(p,a) (p)->lpVtbl->SetAutoGenFilterType(p,a) -#define IDirect3DCubeTexture9_GetAutoGenFilterType(p) (p)->lpVtbl->GetAutoGenFilterType(p) -#define IDirect3DCubeTexture9_GenerateMipSubLevels(p) (p)->lpVtbl->GenerateMipSubLevels(p) -/* IDirect3DCubeTexture9 macros */ -#define IDirect3DCubeTexture9_GetLevelDesc(p,a,b) (p)->lpVtbl->GetLevelDesc(p,a,b) -#define IDirect3DCubeTexture9_GetCubeMapSurface(p,a,b,c) (p)->lpVtbl->GetCubeMapSurface(p,a,b,c) -#define IDirect3DCubeTexture9_LockRect(p,a,b,c,d,e) (p)->lpVtbl->LockRect(p,a,b,c,d,e) -#define IDirect3DCubeTexture9_UnlockRect(p,a,b) (p)->lpVtbl->UnlockRect(p,a,b) -#define IDirect3DCubeTexture9_AddDirtyRect(p,a,b) (p)->lpVtbl->AddDirtyRect(p,a,b) - -typedef struct IDirect3DDevice9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DDevice9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DDevice9 *This); - ULONG (WINAPI *Release)(IDirect3DDevice9 *This); - /* IDirect3DDevice9 */ - HRESULT (WINAPI *TestCooperativeLevel)(IDirect3DDevice9 *This); - UINT (WINAPI *GetAvailableTextureMem)(IDirect3DDevice9 *This); - HRESULT (WINAPI *EvictManagedResources)(IDirect3DDevice9 *This); - HRESULT (WINAPI *GetDirect3D)(IDirect3DDevice9 *This, IDirect3D9 **ppD3D9); - HRESULT (WINAPI *GetDeviceCaps)(IDirect3DDevice9 *This, D3DCAPS9 *pCaps); - HRESULT (WINAPI *GetDisplayMode)(IDirect3DDevice9 *This, UINT iSwapChain, D3DDISPLAYMODE *pMode); - HRESULT (WINAPI *GetCreationParameters)(IDirect3DDevice9 *This, D3DDEVICE_CREATION_PARAMETERS *pParameters); - HRESULT (WINAPI *SetCursorProperties)(IDirect3DDevice9 *This, UINT XHotSpot, UINT YHotSpot, IDirect3DSurface9 *pCursorBitmap); - void (WINAPI *SetCursorPosition)(IDirect3DDevice9 *This, int X, int Y, DWORD Flags); - BOOL (WINAPI *ShowCursor)(IDirect3DDevice9 *This, BOOL bShow); - HRESULT (WINAPI *CreateAdditionalSwapChain)(IDirect3DDevice9 *This, D3DPRESENT_PARAMETERS *pPresentationParameters, IDirect3DSwapChain9 **pSwapChain); - HRESULT (WINAPI *GetSwapChain)(IDirect3DDevice9 *This, UINT iSwapChain, IDirect3DSwapChain9 **pSwapChain); - UINT (WINAPI *GetNumberOfSwapChains)(IDirect3DDevice9 *This); - HRESULT (WINAPI *Reset)(IDirect3DDevice9 *This, D3DPRESENT_PARAMETERS *pPresentationParameters); - HRESULT (WINAPI *Present)(IDirect3DDevice9 *This, const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion); - HRESULT (WINAPI *GetBackBuffer)(IDirect3DDevice9 *This, UINT iSwapChain, UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, IDirect3DSurface9 **ppBackBuffer); - HRESULT (WINAPI *GetRasterStatus)(IDirect3DDevice9 *This, UINT iSwapChain, D3DRASTER_STATUS *pRasterStatus); - HRESULT (WINAPI *SetDialogBoxMode)(IDirect3DDevice9 *This, BOOL bEnableDialogs); - void (WINAPI *SetGammaRamp)(IDirect3DDevice9 *This, UINT iSwapChain, DWORD Flags, const D3DGAMMARAMP *pRamp); - void (WINAPI *GetGammaRamp)(IDirect3DDevice9 *This, UINT iSwapChain, D3DGAMMARAMP *pRamp); - HRESULT (WINAPI *CreateTexture)(IDirect3DDevice9 *This, UINT Width, UINT Height, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DTexture9 **ppTexture, HANDLE *pSharedHandle); - HRESULT (WINAPI *CreateVolumeTexture)(IDirect3DDevice9 *This, UINT Width, UINT Height, UINT Depth, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DVolumeTexture9 **ppVolumeTexture, HANDLE *pSharedHandle); - HRESULT (WINAPI *CreateCubeTexture)(IDirect3DDevice9 *This, UINT EdgeLength, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DCubeTexture9 **ppCubeTexture, HANDLE *pSharedHandle); - HRESULT (WINAPI *CreateVertexBuffer)(IDirect3DDevice9 *This, UINT Length, DWORD Usage, DWORD FVF, D3DPOOL Pool, IDirect3DVertexBuffer9 **ppVertexBuffer, HANDLE *pSharedHandle); - HRESULT (WINAPI *CreateIndexBuffer)(IDirect3DDevice9 *This, UINT Length, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DIndexBuffer9 **ppIndexBuffer, HANDLE *pSharedHandle); - HRESULT (WINAPI *CreateRenderTarget)(IDirect3DDevice9 *This, UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Lockable, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle); - HRESULT (WINAPI *CreateDepthStencilSurface)(IDirect3DDevice9 *This, UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Discard, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle); - HRESULT (WINAPI *UpdateSurface)(IDirect3DDevice9 *This, IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, IDirect3DSurface9 *pDestinationSurface, const POINT *pDestPoint); - HRESULT (WINAPI *UpdateTexture)(IDirect3DDevice9 *This, IDirect3DBaseTexture9 *pSourceTexture, IDirect3DBaseTexture9 *pDestinationTexture); - HRESULT (WINAPI *GetRenderTargetData)(IDirect3DDevice9 *This, IDirect3DSurface9 *pRenderTarget, IDirect3DSurface9 *pDestSurface); - HRESULT (WINAPI *GetFrontBufferData)(IDirect3DDevice9 *This, UINT iSwapChain, IDirect3DSurface9 *pDestSurface); - HRESULT (WINAPI *StretchRect)(IDirect3DDevice9 *This, IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, IDirect3DSurface9 *pDestSurface, const RECT *pDestRect, D3DTEXTUREFILTERTYPE Filter); - HRESULT (WINAPI *ColorFill)(IDirect3DDevice9 *This, IDirect3DSurface9 *pSurface, const RECT *pRect, D3DCOLOR color); - HRESULT (WINAPI *CreateOffscreenPlainSurface)(IDirect3DDevice9 *This, UINT Width, UINT Height, D3DFORMAT Format, D3DPOOL Pool, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle); - HRESULT (WINAPI *SetRenderTarget)(IDirect3DDevice9 *This, DWORD RenderTargetIndex, IDirect3DSurface9 *pRenderTarget); - HRESULT (WINAPI *GetRenderTarget)(IDirect3DDevice9 *This, DWORD RenderTargetIndex, IDirect3DSurface9 **ppRenderTarget); - HRESULT (WINAPI *SetDepthStencilSurface)(IDirect3DDevice9 *This, IDirect3DSurface9 *pNewZStencil); - HRESULT (WINAPI *GetDepthStencilSurface)(IDirect3DDevice9 *This, IDirect3DSurface9 **ppZStencilSurface); - HRESULT (WINAPI *BeginScene)(IDirect3DDevice9 *This); - HRESULT (WINAPI *EndScene)(IDirect3DDevice9 *This); - HRESULT (WINAPI *Clear)(IDirect3DDevice9 *This, DWORD Count, const D3DRECT *pRects, DWORD Flags, D3DCOLOR Color, float Z, DWORD Stencil); - HRESULT (WINAPI *SetTransform)(IDirect3DDevice9 *This, D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix); - HRESULT (WINAPI *GetTransform)(IDirect3DDevice9 *This, D3DTRANSFORMSTATETYPE State, D3DMATRIX *pMatrix); - HRESULT (WINAPI *MultiplyTransform)(IDirect3DDevice9 *This, D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix); - HRESULT (WINAPI *SetViewport)(IDirect3DDevice9 *This, const D3DVIEWPORT9 *pViewport); - HRESULT (WINAPI *GetViewport)(IDirect3DDevice9 *This, D3DVIEWPORT9 *pViewport); - HRESULT (WINAPI *SetMaterial)(IDirect3DDevice9 *This, const D3DMATERIAL9 *pMaterial); - HRESULT (WINAPI *GetMaterial)(IDirect3DDevice9 *This, D3DMATERIAL9 *pMaterial); - HRESULT (WINAPI *SetLight)(IDirect3DDevice9 *This, DWORD Index, const D3DLIGHT9 *pLight); - HRESULT (WINAPI *GetLight)(IDirect3DDevice9 *This, DWORD Index, D3DLIGHT9 *pLight); - HRESULT (WINAPI *LightEnable)(IDirect3DDevice9 *This, DWORD Index, BOOL Enable); - HRESULT (WINAPI *GetLightEnable)(IDirect3DDevice9 *This, DWORD Index, BOOL *pEnable); - HRESULT (WINAPI *SetClipPlane)(IDirect3DDevice9 *This, DWORD Index, const float *pPlane); - HRESULT (WINAPI *GetClipPlane)(IDirect3DDevice9 *This, DWORD Index, float *pPlane); - HRESULT (WINAPI *SetRenderState)(IDirect3DDevice9 *This, D3DRENDERSTATETYPE State, DWORD Value); - HRESULT (WINAPI *GetRenderState)(IDirect3DDevice9 *This, D3DRENDERSTATETYPE State, DWORD *pValue); - HRESULT (WINAPI *CreateStateBlock)(IDirect3DDevice9 *This, D3DSTATEBLOCKTYPE Type, IDirect3DStateBlock9 **ppSB); - HRESULT (WINAPI *BeginStateBlock)(IDirect3DDevice9 *This); - HRESULT (WINAPI *EndStateBlock)(IDirect3DDevice9 *This, IDirect3DStateBlock9 **ppSB); - HRESULT (WINAPI *SetClipStatus)(IDirect3DDevice9 *This, const D3DCLIPSTATUS9 *pClipStatus); - HRESULT (WINAPI *GetClipStatus)(IDirect3DDevice9 *This, D3DCLIPSTATUS9 *pClipStatus); - HRESULT (WINAPI *GetTexture)(IDirect3DDevice9 *This, DWORD Stage, IDirect3DBaseTexture9 **ppTexture); - HRESULT (WINAPI *SetTexture)(IDirect3DDevice9 *This, DWORD Stage, IDirect3DBaseTexture9 *pTexture); - HRESULT (WINAPI *GetTextureStageState)(IDirect3DDevice9 *This, DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD *pValue); - HRESULT (WINAPI *SetTextureStageState)(IDirect3DDevice9 *This, DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD Value); - HRESULT (WINAPI *GetSamplerState)(IDirect3DDevice9 *This, DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD *pValue); - HRESULT (WINAPI *SetSamplerState)(IDirect3DDevice9 *This, DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD Value); - HRESULT (WINAPI *ValidateDevice)(IDirect3DDevice9 *This, DWORD *pNumPasses); - HRESULT (WINAPI *SetPaletteEntries)(IDirect3DDevice9 *This, UINT PaletteNumber, const PALETTEENTRY *pEntries); - HRESULT (WINAPI *GetPaletteEntries)(IDirect3DDevice9 *This, UINT PaletteNumber, PALETTEENTRY *pEntries); - HRESULT (WINAPI *SetCurrentTexturePalette)(IDirect3DDevice9 *This, UINT PaletteNumber); - HRESULT (WINAPI *GetCurrentTexturePalette)(IDirect3DDevice9 *This, UINT *PaletteNumber); - HRESULT (WINAPI *SetScissorRect)(IDirect3DDevice9 *This, const RECT *pRect); - HRESULT (WINAPI *GetScissorRect)(IDirect3DDevice9 *This, RECT *pRect); - HRESULT (WINAPI *SetSoftwareVertexProcessing)(IDirect3DDevice9 *This, BOOL bSoftware); - BOOL (WINAPI *GetSoftwareVertexProcessing)(IDirect3DDevice9 *This); - HRESULT (WINAPI *SetNPatchMode)(IDirect3DDevice9 *This, float nSegments); - float (WINAPI *GetNPatchMode)(IDirect3DDevice9 *This); - HRESULT (WINAPI *DrawPrimitive)(IDirect3DDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, UINT StartVertex, UINT PrimitiveCount); - HRESULT (WINAPI *DrawIndexedPrimitive)(IDirect3DDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, INT BaseVertexIndex, UINT MinVertexIndex, UINT NumVertices, UINT startIndex, UINT primCount); - HRESULT (WINAPI *DrawPrimitiveUP)(IDirect3DDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, UINT PrimitiveCount, const void *pVertexStreamZeroData, UINT VertexStreamZeroStride); - HRESULT (WINAPI *DrawIndexedPrimitiveUP)(IDirect3DDevice9 *This, D3DPRIMITIVETYPE PrimitiveType, UINT MinVertexIndex, UINT NumVertices, UINT PrimitiveCount, const void *pIndexData, D3DFORMAT IndexDataFormat, const void *pVertexStreamZeroData, UINT VertexStreamZeroStride); - HRESULT (WINAPI *ProcessVertices)(IDirect3DDevice9 *This, UINT SrcStartIndex, UINT DestIndex, UINT VertexCount, IDirect3DVertexBuffer9 *pDestBuffer, IDirect3DVertexDeclaration9 *pVertexDecl, DWORD Flags); - HRESULT (WINAPI *CreateVertexDeclaration)(IDirect3DDevice9 *This, const D3DVERTEXELEMENT9 *pVertexElements, IDirect3DVertexDeclaration9 **ppDecl); - HRESULT (WINAPI *SetVertexDeclaration)(IDirect3DDevice9 *This, IDirect3DVertexDeclaration9 *pDecl); - HRESULT (WINAPI *GetVertexDeclaration)(IDirect3DDevice9 *This, IDirect3DVertexDeclaration9 **ppDecl); - HRESULT (WINAPI *SetFVF)(IDirect3DDevice9 *This, DWORD FVF); - HRESULT (WINAPI *GetFVF)(IDirect3DDevice9 *This, DWORD *pFVF); - HRESULT (WINAPI *CreateVertexShader)(IDirect3DDevice9 *This, const DWORD *pFunction, IDirect3DVertexShader9 **ppShader); - HRESULT (WINAPI *SetVertexShader)(IDirect3DDevice9 *This, IDirect3DVertexShader9 *pShader); - HRESULT (WINAPI *GetVertexShader)(IDirect3DDevice9 *This, IDirect3DVertexShader9 **ppShader); - HRESULT (WINAPI *SetVertexShaderConstantF)(IDirect3DDevice9 *This, UINT StartRegister, const float *pConstantData, UINT Vector4fCount); - HRESULT (WINAPI *GetVertexShaderConstantF)(IDirect3DDevice9 *This, UINT StartRegister, float *pConstantData, UINT Vector4fCount); - HRESULT (WINAPI *SetVertexShaderConstantI)(IDirect3DDevice9 *This, UINT StartRegister, const int *pConstantData, UINT Vector4iCount); - HRESULT (WINAPI *GetVertexShaderConstantI)(IDirect3DDevice9 *This, UINT StartRegister, int *pConstantData, UINT Vector4iCount); - HRESULT (WINAPI *SetVertexShaderConstantB)(IDirect3DDevice9 *This, UINT StartRegister, const BOOL *pConstantData, UINT BoolCount); - HRESULT (WINAPI *GetVertexShaderConstantB)(IDirect3DDevice9 *This, UINT StartRegister, BOOL *pConstantData, UINT BoolCount); - HRESULT (WINAPI *SetStreamSource)(IDirect3DDevice9 *This, UINT StreamNumber, IDirect3DVertexBuffer9 *pStreamData, UINT OffsetInBytes, UINT Stride); - HRESULT (WINAPI *GetStreamSource)(IDirect3DDevice9 *This, UINT StreamNumber, IDirect3DVertexBuffer9 **ppStreamData, UINT *pOffsetInBytes, UINT *pStride); - HRESULT (WINAPI *SetStreamSourceFreq)(IDirect3DDevice9 *This, UINT StreamNumber, UINT Setting); - HRESULT (WINAPI *GetStreamSourceFreq)(IDirect3DDevice9 *This, UINT StreamNumber, UINT *pSetting); - HRESULT (WINAPI *SetIndices)(IDirect3DDevice9 *This, IDirect3DIndexBuffer9 *pIndexData); - HRESULT (WINAPI *GetIndices)(IDirect3DDevice9 *This, IDirect3DIndexBuffer9 **ppIndexData); - HRESULT (WINAPI *CreatePixelShader)(IDirect3DDevice9 *This, const DWORD *pFunction, IDirect3DPixelShader9 **ppShader); - HRESULT (WINAPI *SetPixelShader)(IDirect3DDevice9 *This, IDirect3DPixelShader9 *pShader); - HRESULT (WINAPI *GetPixelShader)(IDirect3DDevice9 *This, IDirect3DPixelShader9 **ppShader); - HRESULT (WINAPI *SetPixelShaderConstantF)(IDirect3DDevice9 *This, UINT StartRegister, const float *pConstantData, UINT Vector4fCount); - HRESULT (WINAPI *GetPixelShaderConstantF)(IDirect3DDevice9 *This, UINT StartRegister, float *pConstantData, UINT Vector4fCount); - HRESULT (WINAPI *SetPixelShaderConstantI)(IDirect3DDevice9 *This, UINT StartRegister, const int *pConstantData, UINT Vector4iCount); - HRESULT (WINAPI *GetPixelShaderConstantI)(IDirect3DDevice9 *This, UINT StartRegister, int *pConstantData, UINT Vector4iCount); - HRESULT (WINAPI *SetPixelShaderConstantB)(IDirect3DDevice9 *This, UINT StartRegister, const BOOL *pConstantData, UINT BoolCount); - HRESULT (WINAPI *GetPixelShaderConstantB)(IDirect3DDevice9 *This, UINT StartRegister, BOOL *pConstantData, UINT BoolCount); - HRESULT (WINAPI *DrawRectPatch)(IDirect3DDevice9 *This, UINT Handle, const float *pNumSegs, const D3DRECTPATCH_INFO *pRectPatchInfo); - HRESULT (WINAPI *DrawTriPatch)(IDirect3DDevice9 *This, UINT Handle, const float *pNumSegs, const D3DTRIPATCH_INFO *pTriPatchInfo); - HRESULT (WINAPI *DeletePatch)(IDirect3DDevice9 *This, UINT Handle); - HRESULT (WINAPI *CreateQuery)(IDirect3DDevice9 *This, D3DQUERYTYPE Type, IDirect3DQuery9 **ppQuery); -} IDirect3DDevice9Vtbl; -struct IDirect3DDevice9 -{ - IDirect3DDevice9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DDevice9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DDevice9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DDevice9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DDevice9 macros */ -#define IDirect3DDevice9_TestCooperativeLevel(p) (p)->lpVtbl->TestCooperativeLevel(p) -#define IDirect3DDevice9_GetAvailableTextureMem(p) (p)->lpVtbl->GetAvailableTextureMem(p) -#define IDirect3DDevice9_EvictManagedResources(p) (p)->lpVtbl->EvictManagedResources(p) -#define IDirect3DDevice9_GetDirect3D(p,a) (p)->lpVtbl->GetDirect3D(p,a) -#define IDirect3DDevice9_GetDeviceCaps(p,a) (p)->lpVtbl->GetDeviceCaps(p,a) -#define IDirect3DDevice9_GetDisplayMode(p,a,b) (p)->lpVtbl->GetDisplayMode(p,a,b) -#define IDirect3DDevice9_GetCreationParameters(p,a) (p)->lpVtbl->GetCreationParameters(p,a) -#define IDirect3DDevice9_SetCursorProperties(p,a,b,c) (p)->lpVtbl->SetCursorProperties(p,a,b,c) -#define IDirect3DDevice9_SetCursorPosition(p,a,b,c) (p)->lpVtbl->SetCursorPosition(p,a,b,c) -#define IDirect3DDevice9_ShowCursor(p,a) (p)->lpVtbl->ShowCursor(p,a) -#define IDirect3DDevice9_CreateAdditionalSwapChain(p,a,b) (p)->lpVtbl->CreateAdditionalSwapChain(p,a,b) -#define IDirect3DDevice9_GetSwapChain(p,a,b) (p)->lpVtbl->GetSwapChain(p,a,b) -#define IDirect3DDevice9_GetNumberOfSwapChains(p) (p)->lpVtbl->GetNumberOfSwapChains(p) -#define IDirect3DDevice9_Reset(p,a) (p)->lpVtbl->Reset(p,a) -#define IDirect3DDevice9_Present(p,a,b,c,d) (p)->lpVtbl->Present(p,a,b,c,d) -#define IDirect3DDevice9_GetBackBuffer(p,a,b,c,d) (p)->lpVtbl->GetBackBuffer(p,a,b,c,d) -#define IDirect3DDevice9_GetRasterStatus(p,a,b) (p)->lpVtbl->GetRasterStatus(p,a,b) -#define IDirect3DDevice9_SetDialogBoxMode(p,a) (p)->lpVtbl->SetDialogBoxMode(p,a) -#define IDirect3DDevice9_SetGammaRamp(p,a,b,c) (p)->lpVtbl->SetGammaRamp(p,a,b,c) -#define IDirect3DDevice9_GetGammaRamp(p,a,b) (p)->lpVtbl->GetGammaRamp(p,a,b) -#define IDirect3DDevice9_CreateTexture(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->CreateTexture(p,a,b,c,d,e,f,g,h) -#define IDirect3DDevice9_CreateVolumeTexture(p,a,b,c,d,e,f,g,h,i) (p)->lpVtbl->CreateVolumeTexture(p,a,b,c,d,e,f,g,h,i) -#define IDirect3DDevice9_CreateCubeTexture(p,a,b,c,d,e,f,g) (p)->lpVtbl->CreateCubeTexture(p,a,b,c,d,e,f,g) -#define IDirect3DDevice9_CreateVertexBuffer(p,a,b,c,d,e,f) (p)->lpVtbl->CreateVertexBuffer(p,a,b,c,d,e,f) -#define IDirect3DDevice9_CreateIndexBuffer(p,a,b,c,d,e,f) (p)->lpVtbl->CreateIndexBuffer(p,a,b,c,d,e,f) -#define IDirect3DDevice9_CreateRenderTarget(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->CreateRenderTarget(p,a,b,c,d,e,f,g,h) -#define IDirect3DDevice9_CreateDepthStencilSurface(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->CreateDepthStencilSurface(p,a,b,c,d,e,f,g,h) -#define IDirect3DDevice9_UpdateSurface(p,a,b,c,d) (p)->lpVtbl->UpdateSurface(p,a,b,c,d) -#define IDirect3DDevice9_UpdateTexture(p,a,b) (p)->lpVtbl->UpdateTexture(p,a,b) -#define IDirect3DDevice9_GetRenderTargetData(p,a,b) (p)->lpVtbl->GetRenderTargetData(p,a,b) -#define IDirect3DDevice9_GetFrontBufferData(p,a,b) (p)->lpVtbl->GetFrontBufferData(p,a,b) -#define IDirect3DDevice9_StretchRect(p,a,b,c,d,e) (p)->lpVtbl->StretchRect(p,a,b,c,d,e) -#define IDirect3DDevice9_ColorFill(p,a,b,c) (p)->lpVtbl->ColorFill(p,a,b,c) -#define IDirect3DDevice9_CreateOffscreenPlainSurface(p,a,b,c,d,e,f) (p)->lpVtbl->CreateOffscreenPlainSurface(p,a,b,c,d,e,f) -#define IDirect3DDevice9_SetRenderTarget(p,a,b) (p)->lpVtbl->SetRenderTarget(p,a,b) -#define IDirect3DDevice9_GetRenderTarget(p,a,b) (p)->lpVtbl->GetRenderTarget(p,a,b) -#define IDirect3DDevice9_SetDepthStencilSurface(p,a) (p)->lpVtbl->SetDepthStencilSurface(p,a) -#define IDirect3DDevice9_GetDepthStencilSurface(p,a) (p)->lpVtbl->GetDepthStencilSurface(p,a) -#define IDirect3DDevice9_BeginScene(p) (p)->lpVtbl->BeginScene(p) -#define IDirect3DDevice9_EndScene(p) (p)->lpVtbl->EndScene(p) -#define IDirect3DDevice9_Clear(p,a,b,c,d,e,f) (p)->lpVtbl->Clear(p,a,b,c,d,e,f) -#define IDirect3DDevice9_SetTransform(p,a,b) (p)->lpVtbl->SetTransform(p,a,b) -#define IDirect3DDevice9_GetTransform(p,a,b) (p)->lpVtbl->GetTransform(p,a,b) -#define IDirect3DDevice9_MultiplyTransform(p,a,b) (p)->lpVtbl->MultiplyTransform(p,a,b) -#define IDirect3DDevice9_SetViewport(p,a) (p)->lpVtbl->SetViewport(p,a) -#define IDirect3DDevice9_GetViewport(p,a) (p)->lpVtbl->GetViewport(p,a) -#define IDirect3DDevice9_SetMaterial(p,a) (p)->lpVtbl->SetMaterial(p,a) -#define IDirect3DDevice9_GetMaterial(p,a) (p)->lpVtbl->GetMaterial(p,a) -#define IDirect3DDevice9_SetLight(p,a,b) (p)->lpVtbl->SetLight(p,a,b) -#define IDirect3DDevice9_GetLight(p,a,b) (p)->lpVtbl->GetLight(p,a,b) -#define IDirect3DDevice9_LightEnable(p,a,b) (p)->lpVtbl->LightEnable(p,a,b) -#define IDirect3DDevice9_GetLightEnable(p,a,b) (p)->lpVtbl->GetLightEnable(p,a,b) -#define IDirect3DDevice9_SetClipPlane(p,a,b) (p)->lpVtbl->SetClipPlane(p,a,b) -#define IDirect3DDevice9_GetClipPlane(p,a,b) (p)->lpVtbl->GetClipPlane(p,a,b) -#define IDirect3DDevice9_SetRenderState(p,a,b) (p)->lpVtbl->SetRenderState(p,a,b) -#define IDirect3DDevice9_GetRenderState(p,a,b) (p)->lpVtbl->GetRenderState(p,a,b) -#define IDirect3DDevice9_CreateStateBlock(p,a,b) (p)->lpVtbl->CreateStateBlock(p,a,b) -#define IDirect3DDevice9_BeginStateBlock(p) (p)->lpVtbl->BeginStateBlock(p) -#define IDirect3DDevice9_EndStateBlock(p,a) (p)->lpVtbl->EndStateBlock(p,a) -#define IDirect3DDevice9_SetClipStatus(p,a) (p)->lpVtbl->SetClipStatus(p,a) -#define IDirect3DDevice9_GetClipStatus(p,a) (p)->lpVtbl->GetClipStatus(p,a) -#define IDirect3DDevice9_GetTexture(p,a,b) (p)->lpVtbl->GetTexture(p,a,b) -#define IDirect3DDevice9_SetTexture(p,a,b) (p)->lpVtbl->SetTexture(p,a,b) -#define IDirect3DDevice9_GetTextureStageState(p,a,b,c) (p)->lpVtbl->GetTextureStageState(p,a,b,c) -#define IDirect3DDevice9_SetTextureStageState(p,a,b,c) (p)->lpVtbl->SetTextureStageState(p,a,b,c) -#define IDirect3DDevice9_GetSamplerState(p,a,b,c) (p)->lpVtbl->GetSamplerState(p,a,b,c) -#define IDirect3DDevice9_SetSamplerState(p,a,b,c) (p)->lpVtbl->SetSamplerState(p,a,b,c) -#define IDirect3DDevice9_ValidateDevice(p,a) (p)->lpVtbl->ValidateDevice(p,a) -#define IDirect3DDevice9_SetPaletteEntries(p,a,b) (p)->lpVtbl->SetPaletteEntries(p,a,b) -#define IDirect3DDevice9_GetPaletteEntries(p,a,b) (p)->lpVtbl->GetPaletteEntries(p,a,b) -#define IDirect3DDevice9_SetCurrentTexturePalette(p,a) (p)->lpVtbl->SetCurrentTexturePalette(p,a) -#define IDirect3DDevice9_GetCurrentTexturePalette(p,a) (p)->lpVtbl->GetCurrentTexturePalette(p,a) -#define IDirect3DDevice9_SetScissorRect(p,a) (p)->lpVtbl->SetScissorRect(p,a) -#define IDirect3DDevice9_GetScissorRect(p,a) (p)->lpVtbl->GetScissorRect(p,a) -#define IDirect3DDevice9_SetSoftwareVertexProcessing(p,a) (p)->lpVtbl->SetSoftwareVertexProcessing(p,a) -#define IDirect3DDevice9_GetSoftwareVertexProcessing(p) (p)->lpVtbl->GetSoftwareVertexProcessing(p) -#define IDirect3DDevice9_SetNPatchMode(p,a) (p)->lpVtbl->SetNPatchMode(p,a) -#define IDirect3DDevice9_GetNPatchMode(p) (p)->lpVtbl->GetNPatchMode(p) -#define IDirect3DDevice9_DrawPrimitive(p,a,b,c) (p)->lpVtbl->DrawPrimitive(p,a,b,c) -#define IDirect3DDevice9_DrawIndexedPrimitive(p,a,b,c,d,e,f) (p)->lpVtbl->DrawIndexedPrimitive(p,a,b,c,d,e,f) -#define IDirect3DDevice9_DrawPrimitiveUP(p,a,b,c,d) (p)->lpVtbl->DrawPrimitiveUP(p,a,b,c,d) -#define IDirect3DDevice9_DrawIndexedPrimitiveUP(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->DrawIndexedPrimitiveUP(p,a,b,c,d,e,f,g,h) -#define IDirect3DDevice9_ProcessVertices(p,a,b,c,d,e,f) (p)->lpVtbl->ProcessVertices(p,a,b,c,d,e,f) -#define IDirect3DDevice9_CreateVertexDeclaration(p,a,b) (p)->lpVtbl->CreateVertexDeclaration(p,a,b) -#define IDirect3DDevice9_SetVertexDeclaration(p,a) (p)->lpVtbl->SetVertexDeclaration(p,a) -#define IDirect3DDevice9_GetVertexDeclaration(p,a) (p)->lpVtbl->GetVertexDeclaration(p,a) -#define IDirect3DDevice9_SetFVF(p,a) (p)->lpVtbl->SetFVF(p,a) -#define IDirect3DDevice9_GetFVF(p,a) (p)->lpVtbl->GetFVF(p,a) -#define IDirect3DDevice9_CreateVertexShader(p,a,b) (p)->lpVtbl->CreateVertexShader(p,a,b) -#define IDirect3DDevice9_SetVertexShader(p,a) (p)->lpVtbl->SetVertexShader(p,a) -#define IDirect3DDevice9_GetVertexShader(p,a) (p)->lpVtbl->GetVertexShader(p,a) -#define IDirect3DDevice9_SetVertexShaderConstantF(p,a,b,c) (p)->lpVtbl->SetVertexShaderConstantF(p,a,b,c) -#define IDirect3DDevice9_GetVertexShaderConstantF(p,a,b,c) (p)->lpVtbl->GetVertexShaderConstantF(p,a,b,c) -#define IDirect3DDevice9_SetVertexShaderConstantI(p,a,b,c) (p)->lpVtbl->SetVertexShaderConstantI(p,a,b,c) -#define IDirect3DDevice9_GetVertexShaderConstantI(p,a,b,c) (p)->lpVtbl->GetVertexShaderConstantI(p,a,b,c) -#define IDirect3DDevice9_SetVertexShaderConstantB(p,a,b,c) (p)->lpVtbl->SetVertexShaderConstantB(p,a,b,c) -#define IDirect3DDevice9_GetVertexShaderConstantB(p,a,b,c) (p)->lpVtbl->GetVertexShaderConstantB(p,a,b,c) -#define IDirect3DDevice9_SetStreamSource(p,a,b,c,d) (p)->lpVtbl->SetStreamSource(p,a,b,c,d) -#define IDirect3DDevice9_GetStreamSource(p,a,b,c,d) (p)->lpVtbl->GetStreamSource(p,a,b,c,d) -#define IDirect3DDevice9_SetStreamSourceFreq(p,a,b) (p)->lpVtbl->SetStreamSourceFreq(p,a,b) -#define IDirect3DDevice9_GetStreamSourceFreq(p,a,b) (p)->lpVtbl->GetStreamSourceFreq(p,a,b) -#define IDirect3DDevice9_SetIndices(p,a) (p)->lpVtbl->SetIndices(p,a) -#define IDirect3DDevice9_GetIndices(p,a) (p)->lpVtbl->GetIndices(p,a) -#define IDirect3DDevice9_CreatePixelShader(p,a,b) (p)->lpVtbl->CreatePixelShader(p,a,b) -#define IDirect3DDevice9_SetPixelShader(p,a) (p)->lpVtbl->SetPixelShader(p,a) -#define IDirect3DDevice9_GetPixelShader(p,a) (p)->lpVtbl->GetPixelShader(p,a) -#define IDirect3DDevice9_SetPixelShaderConstantF(p,a,b,c) (p)->lpVtbl->SetPixelShaderConstantF(p,a,b,c) -#define IDirect3DDevice9_GetPixelShaderConstantF(p,a,b,c) (p)->lpVtbl->GetPixelShaderConstantF(p,a,b,c) -#define IDirect3DDevice9_SetPixelShaderConstantI(p,a,b,c) (p)->lpVtbl->SetPixelShaderConstantI(p,a,b,c) -#define IDirect3DDevice9_GetPixelShaderConstantI(p,a,b,c) (p)->lpVtbl->GetPixelShaderConstantI(p,a,b,c) -#define IDirect3DDevice9_SetPixelShaderConstantB(p,a,b,c) (p)->lpVtbl->SetPixelShaderConstantB(p,a,b,c) -#define IDirect3DDevice9_GetPixelShaderConstantB(p,a,b,c) (p)->lpVtbl->GetPixelShaderConstantB(p,a,b,c) -#define IDirect3DDevice9_DrawRectPatch(p,a,b,c) (p)->lpVtbl->DrawRectPatch(p,a,b,c) -#define IDirect3DDevice9_DrawTriPatch(p,a,b,c) (p)->lpVtbl->DrawTriPatch(p,a,b,c) -#define IDirect3DDevice9_DeletePatch(p,a) (p)->lpVtbl->DeletePatch(p,a) -#define IDirect3DDevice9_CreateQuery(p,a,b) (p)->lpVtbl->CreateQuery(p,a,b) - -typedef struct IDirect3DDevice9ExVtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DDevice9Ex *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DDevice9Ex *This); - ULONG (WINAPI *Release)(IDirect3DDevice9Ex *This); - /* IDirect3DDevice9 */ - HRESULT (WINAPI *TestCooperativeLevel)(IDirect3DDevice9Ex *This); - UINT (WINAPI *GetAvailableTextureMem)(IDirect3DDevice9Ex *This); - HRESULT (WINAPI *EvictManagedResources)(IDirect3DDevice9Ex *This); - HRESULT (WINAPI *GetDirect3D)(IDirect3DDevice9Ex *This, IDirect3D9 **ppD3D9); - HRESULT (WINAPI *GetDeviceCaps)(IDirect3DDevice9Ex *This, D3DCAPS9 *pCaps); - HRESULT (WINAPI *GetDisplayMode)(IDirect3DDevice9Ex *This, UINT iSwapChain, D3DDISPLAYMODE *pMode); - HRESULT (WINAPI *GetCreationParameters)(IDirect3DDevice9Ex *This, D3DDEVICE_CREATION_PARAMETERS *pParameters); - HRESULT (WINAPI *SetCursorProperties)(IDirect3DDevice9Ex *This, UINT XHotSpot, UINT YHotSpot, IDirect3DSurface9 *pCursorBitmap); - void (WINAPI *SetCursorPosition)(IDirect3DDevice9Ex *This, int X, int Y, DWORD Flags); - BOOL (WINAPI *ShowCursor)(IDirect3DDevice9Ex *This, BOOL bShow); - HRESULT (WINAPI *CreateAdditionalSwapChain)(IDirect3DDevice9Ex *This, D3DPRESENT_PARAMETERS *pPresentationParameters, IDirect3DSwapChain9 **pSwapChain); - HRESULT (WINAPI *GetSwapChain)(IDirect3DDevice9Ex *This, UINT iSwapChain, IDirect3DSwapChain9 **pSwapChain); - UINT (WINAPI *GetNumberOfSwapChains)(IDirect3DDevice9Ex *This); - HRESULT (WINAPI *Reset)(IDirect3DDevice9Ex *This, D3DPRESENT_PARAMETERS *pPresentationParameters); - HRESULT (WINAPI *Present)(IDirect3DDevice9Ex *This, const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion); - HRESULT (WINAPI *GetBackBuffer)(IDirect3DDevice9Ex *This, UINT iSwapChain, UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, IDirect3DSurface9 **ppBackBuffer); - HRESULT (WINAPI *GetRasterStatus)(IDirect3DDevice9Ex *This, UINT iSwapChain, D3DRASTER_STATUS *pRasterStatus); - HRESULT (WINAPI *SetDialogBoxMode)(IDirect3DDevice9Ex *This, BOOL bEnableDialogs); - void (WINAPI *SetGammaRamp)(IDirect3DDevice9Ex *This, UINT iSwapChain, DWORD Flags, const D3DGAMMARAMP *pRamp); - void (WINAPI *GetGammaRamp)(IDirect3DDevice9Ex *This, UINT iSwapChain, D3DGAMMARAMP *pRamp); - HRESULT (WINAPI *CreateTexture)(IDirect3DDevice9Ex *This, UINT Width, UINT Height, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DTexture9 **ppTexture, HANDLE *pSharedHandle); - HRESULT (WINAPI *CreateVolumeTexture)(IDirect3DDevice9Ex *This, UINT Width, UINT Height, UINT Depth, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DVolumeTexture9 **ppVolumeTexture, HANDLE *pSharedHandle); - HRESULT (WINAPI *CreateCubeTexture)(IDirect3DDevice9Ex *This, UINT EdgeLength, UINT Levels, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DCubeTexture9 **ppCubeTexture, HANDLE *pSharedHandle); - HRESULT (WINAPI *CreateVertexBuffer)(IDirect3DDevice9Ex *This, UINT Length, DWORD Usage, DWORD FVF, D3DPOOL Pool, IDirect3DVertexBuffer9 **ppVertexBuffer, HANDLE *pSharedHandle); - HRESULT (WINAPI *CreateIndexBuffer)(IDirect3DDevice9Ex *This, UINT Length, DWORD Usage, D3DFORMAT Format, D3DPOOL Pool, IDirect3DIndexBuffer9 **ppIndexBuffer, HANDLE *pSharedHandle); - HRESULT (WINAPI *CreateRenderTarget)(IDirect3DDevice9Ex *This, UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Lockable, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle); - HRESULT (WINAPI *CreateDepthStencilSurface)(IDirect3DDevice9Ex *This, UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Discard, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle); - HRESULT (WINAPI *UpdateSurface)(IDirect3DDevice9Ex *This, IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, IDirect3DSurface9 *pDestinationSurface, const POINT *pDestPoint); - HRESULT (WINAPI *UpdateTexture)(IDirect3DDevice9Ex *This, IDirect3DBaseTexture9 *pSourceTexture, IDirect3DBaseTexture9 *pDestinationTexture); - HRESULT (WINAPI *GetRenderTargetData)(IDirect3DDevice9Ex *This, IDirect3DSurface9 *pRenderTarget, IDirect3DSurface9 *pDestSurface); - HRESULT (WINAPI *GetFrontBufferData)(IDirect3DDevice9Ex *This, UINT iSwapChain, IDirect3DSurface9 *pDestSurface); - HRESULT (WINAPI *StretchRect)(IDirect3DDevice9Ex *This, IDirect3DSurface9 *pSourceSurface, const RECT *pSourceRect, IDirect3DSurface9 *pDestSurface, const RECT *pDestRect, D3DTEXTUREFILTERTYPE Filter); - HRESULT (WINAPI *ColorFill)(IDirect3DDevice9Ex *This, IDirect3DSurface9 *pSurface, const RECT *pRect, D3DCOLOR color); - HRESULT (WINAPI *CreateOffscreenPlainSurface)(IDirect3DDevice9Ex *This, UINT Width, UINT Height, D3DFORMAT Format, D3DPOOL Pool, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle); - HRESULT (WINAPI *SetRenderTarget)(IDirect3DDevice9Ex *This, DWORD RenderTargetIndex, IDirect3DSurface9 *pRenderTarget); - HRESULT (WINAPI *GetRenderTarget)(IDirect3DDevice9Ex *This, DWORD RenderTargetIndex, IDirect3DSurface9 **ppRenderTarget); - HRESULT (WINAPI *SetDepthStencilSurface)(IDirect3DDevice9Ex *This, IDirect3DSurface9 *pNewZStencil); - HRESULT (WINAPI *GetDepthStencilSurface)(IDirect3DDevice9Ex *This, IDirect3DSurface9 **ppZStencilSurface); - HRESULT (WINAPI *BeginScene)(IDirect3DDevice9Ex *This); - HRESULT (WINAPI *EndScene)(IDirect3DDevice9Ex *This); - HRESULT (WINAPI *Clear)(IDirect3DDevice9Ex *This, DWORD Count, const D3DRECT *pRects, DWORD Flags, D3DCOLOR Color, float Z, DWORD Stencil); - HRESULT (WINAPI *SetTransform)(IDirect3DDevice9Ex *This, D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix); - HRESULT (WINAPI *GetTransform)(IDirect3DDevice9Ex *This, D3DTRANSFORMSTATETYPE State, D3DMATRIX *pMatrix); - HRESULT (WINAPI *MultiplyTransform)(IDirect3DDevice9Ex *This, D3DTRANSFORMSTATETYPE State, const D3DMATRIX *pMatrix); - HRESULT (WINAPI *SetViewport)(IDirect3DDevice9Ex *This, const D3DVIEWPORT9 *pViewport); - HRESULT (WINAPI *GetViewport)(IDirect3DDevice9Ex *This, D3DVIEWPORT9 *pViewport); - HRESULT (WINAPI *SetMaterial)(IDirect3DDevice9Ex *This, const D3DMATERIAL9 *pMaterial); - HRESULT (WINAPI *GetMaterial)(IDirect3DDevice9Ex *This, D3DMATERIAL9 *pMaterial); - HRESULT (WINAPI *SetLight)(IDirect3DDevice9Ex *This, DWORD Index, const D3DLIGHT9 *pLight); - HRESULT (WINAPI *GetLight)(IDirect3DDevice9Ex *This, DWORD Index, D3DLIGHT9 *pLight); - HRESULT (WINAPI *LightEnable)(IDirect3DDevice9Ex *This, DWORD Index, BOOL Enable); - HRESULT (WINAPI *GetLightEnable)(IDirect3DDevice9Ex *This, DWORD Index, BOOL *pEnable); - HRESULT (WINAPI *SetClipPlane)(IDirect3DDevice9Ex *This, DWORD Index, const float *pPlane); - HRESULT (WINAPI *GetClipPlane)(IDirect3DDevice9Ex *This, DWORD Index, float *pPlane); - HRESULT (WINAPI *SetRenderState)(IDirect3DDevice9Ex *This, D3DRENDERSTATETYPE State, DWORD Value); - HRESULT (WINAPI *GetRenderState)(IDirect3DDevice9Ex *This, D3DRENDERSTATETYPE State, DWORD *pValue); - HRESULT (WINAPI *CreateStateBlock)(IDirect3DDevice9Ex *This, D3DSTATEBLOCKTYPE Type, IDirect3DStateBlock9 **ppSB); - HRESULT (WINAPI *BeginStateBlock)(IDirect3DDevice9Ex *This); - HRESULT (WINAPI *EndStateBlock)(IDirect3DDevice9Ex *This, IDirect3DStateBlock9 **ppSB); - HRESULT (WINAPI *SetClipStatus)(IDirect3DDevice9Ex *This, const D3DCLIPSTATUS9 *pClipStatus); - HRESULT (WINAPI *GetClipStatus)(IDirect3DDevice9Ex *This, D3DCLIPSTATUS9 *pClipStatus); - HRESULT (WINAPI *GetTexture)(IDirect3DDevice9Ex *This, DWORD Stage, IDirect3DBaseTexture9 **ppTexture); - HRESULT (WINAPI *SetTexture)(IDirect3DDevice9Ex *This, DWORD Stage, IDirect3DBaseTexture9 *pTexture); - HRESULT (WINAPI *GetTextureStageState)(IDirect3DDevice9Ex *This, DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD *pValue); - HRESULT (WINAPI *SetTextureStageState)(IDirect3DDevice9Ex *This, DWORD Stage, D3DTEXTURESTAGESTATETYPE Type, DWORD Value); - HRESULT (WINAPI *GetSamplerState)(IDirect3DDevice9Ex *This, DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD *pValue); - HRESULT (WINAPI *SetSamplerState)(IDirect3DDevice9Ex *This, DWORD Sampler, D3DSAMPLERSTATETYPE Type, DWORD Value); - HRESULT (WINAPI *ValidateDevice)(IDirect3DDevice9Ex *This, DWORD *pNumPasses); - HRESULT (WINAPI *SetPaletteEntries)(IDirect3DDevice9Ex *This, UINT PaletteNumber, const PALETTEENTRY *pEntries); - HRESULT (WINAPI *GetPaletteEntries)(IDirect3DDevice9Ex *This, UINT PaletteNumber, PALETTEENTRY *pEntries); - HRESULT (WINAPI *SetCurrentTexturePalette)(IDirect3DDevice9Ex *This, UINT PaletteNumber); - HRESULT (WINAPI *GetCurrentTexturePalette)(IDirect3DDevice9Ex *This, UINT *PaletteNumber); - HRESULT (WINAPI *SetScissorRect)(IDirect3DDevice9Ex *This, const RECT *pRect); - HRESULT (WINAPI *GetScissorRect)(IDirect3DDevice9Ex *This, RECT *pRect); - HRESULT (WINAPI *SetSoftwareVertexProcessing)(IDirect3DDevice9Ex *This, BOOL bSoftware); - BOOL (WINAPI *GetSoftwareVertexProcessing)(IDirect3DDevice9Ex *This); - HRESULT (WINAPI *SetNPatchMode)(IDirect3DDevice9Ex *This, float nSegments); - float (WINAPI *GetNPatchMode)(IDirect3DDevice9Ex *This); - HRESULT (WINAPI *DrawPrimitive)(IDirect3DDevice9Ex *This, D3DPRIMITIVETYPE PrimitiveType, UINT StartVertex, UINT PrimitiveCount); - HRESULT (WINAPI *DrawIndexedPrimitive)(IDirect3DDevice9Ex *This, D3DPRIMITIVETYPE PrimitiveType, INT BaseVertexIndex, UINT MinVertexIndex, UINT NumVertices, UINT startIndex, UINT primCount); - HRESULT (WINAPI *DrawPrimitiveUP)(IDirect3DDevice9Ex *This, D3DPRIMITIVETYPE PrimitiveType, UINT PrimitiveCount, const void *pVertexStreamZeroData, UINT VertexStreamZeroStride); - HRESULT (WINAPI *DrawIndexedPrimitiveUP)(IDirect3DDevice9Ex *This, D3DPRIMITIVETYPE PrimitiveType, UINT MinVertexIndex, UINT NumVertices, UINT PrimitiveCount, const void *pIndexData, D3DFORMAT IndexDataFormat, const void *pVertexStreamZeroData, UINT VertexStreamZeroStride); - HRESULT (WINAPI *ProcessVertices)(IDirect3DDevice9Ex *This, UINT SrcStartIndex, UINT DestIndex, UINT VertexCount, IDirect3DVertexBuffer9 *pDestBuffer, IDirect3DVertexDeclaration9 *pVertexDecl, DWORD Flags); - HRESULT (WINAPI *CreateVertexDeclaration)(IDirect3DDevice9Ex *This, const D3DVERTEXELEMENT9 *pVertexElements, IDirect3DVertexDeclaration9 **ppDecl); - HRESULT (WINAPI *SetVertexDeclaration)(IDirect3DDevice9Ex *This, IDirect3DVertexDeclaration9 *pDecl); - HRESULT (WINAPI *GetVertexDeclaration)(IDirect3DDevice9Ex *This, IDirect3DVertexDeclaration9 **ppDecl); - HRESULT (WINAPI *SetFVF)(IDirect3DDevice9Ex *This, DWORD FVF); - HRESULT (WINAPI *GetFVF)(IDirect3DDevice9Ex *This, DWORD *pFVF); - HRESULT (WINAPI *CreateVertexShader)(IDirect3DDevice9Ex *This, const DWORD *pFunction, IDirect3DVertexShader9 **ppShader); - HRESULT (WINAPI *SetVertexShader)(IDirect3DDevice9Ex *This, IDirect3DVertexShader9 *pShader); - HRESULT (WINAPI *GetVertexShader)(IDirect3DDevice9Ex *This, IDirect3DVertexShader9 **ppShader); - HRESULT (WINAPI *SetVertexShaderConstantF)(IDirect3DDevice9Ex *This, UINT StartRegister, const float *pConstantData, UINT Vector4fCount); - HRESULT (WINAPI *GetVertexShaderConstantF)(IDirect3DDevice9Ex *This, UINT StartRegister, float *pConstantData, UINT Vector4fCount); - HRESULT (WINAPI *SetVertexShaderConstantI)(IDirect3DDevice9Ex *This, UINT StartRegister, const int *pConstantData, UINT Vector4iCount); - HRESULT (WINAPI *GetVertexShaderConstantI)(IDirect3DDevice9Ex *This, UINT StartRegister, int *pConstantData, UINT Vector4iCount); - HRESULT (WINAPI *SetVertexShaderConstantB)(IDirect3DDevice9Ex *This, UINT StartRegister, const BOOL *pConstantData, UINT BoolCount); - HRESULT (WINAPI *GetVertexShaderConstantB)(IDirect3DDevice9Ex *This, UINT StartRegister, BOOL *pConstantData, UINT BoolCount); - HRESULT (WINAPI *SetStreamSource)(IDirect3DDevice9Ex *This, UINT StreamNumber, IDirect3DVertexBuffer9 *pStreamData, UINT OffsetInBytes, UINT Stride); - HRESULT (WINAPI *GetStreamSource)(IDirect3DDevice9Ex *This, UINT StreamNumber, IDirect3DVertexBuffer9 **ppStreamData, UINT *pOffsetInBytes, UINT *pStride); - HRESULT (WINAPI *SetStreamSourceFreq)(IDirect3DDevice9Ex *This, UINT StreamNumber, UINT Setting); - HRESULT (WINAPI *GetStreamSourceFreq)(IDirect3DDevice9Ex *This, UINT StreamNumber, UINT *pSetting); - HRESULT (WINAPI *SetIndices)(IDirect3DDevice9Ex *This, IDirect3DIndexBuffer9 *pIndexData); - HRESULT (WINAPI *GetIndices)(IDirect3DDevice9Ex *This, IDirect3DIndexBuffer9 **ppIndexData); - HRESULT (WINAPI *CreatePixelShader)(IDirect3DDevice9Ex *This, const DWORD *pFunction, IDirect3DPixelShader9 **ppShader); - HRESULT (WINAPI *SetPixelShader)(IDirect3DDevice9Ex *This, IDirect3DPixelShader9 *pShader); - HRESULT (WINAPI *GetPixelShader)(IDirect3DDevice9Ex *This, IDirect3DPixelShader9 **ppShader); - HRESULT (WINAPI *SetPixelShaderConstantF)(IDirect3DDevice9Ex *This, UINT StartRegister, const float *pConstantData, UINT Vector4fCount); - HRESULT (WINAPI *GetPixelShaderConstantF)(IDirect3DDevice9Ex *This, UINT StartRegister, float *pConstantData, UINT Vector4fCount); - HRESULT (WINAPI *SetPixelShaderConstantI)(IDirect3DDevice9Ex *This, UINT StartRegister, const int *pConstantData, UINT Vector4iCount); - HRESULT (WINAPI *GetPixelShaderConstantI)(IDirect3DDevice9Ex *This, UINT StartRegister, int *pConstantData, UINT Vector4iCount); - HRESULT (WINAPI *SetPixelShaderConstantB)(IDirect3DDevice9Ex *This, UINT StartRegister, const BOOL *pConstantData, UINT BoolCount); - HRESULT (WINAPI *GetPixelShaderConstantB)(IDirect3DDevice9Ex *This, UINT StartRegister, BOOL *pConstantData, UINT BoolCount); - HRESULT (WINAPI *DrawRectPatch)(IDirect3DDevice9Ex *This, UINT Handle, const float *pNumSegs, const D3DRECTPATCH_INFO *pRectPatchInfo); - HRESULT (WINAPI *DrawTriPatch)(IDirect3DDevice9Ex *This, UINT Handle, const float *pNumSegs, const D3DTRIPATCH_INFO *pTriPatchInfo); - HRESULT (WINAPI *DeletePatch)(IDirect3DDevice9Ex *This, UINT Handle); - HRESULT (WINAPI *CreateQuery)(IDirect3DDevice9Ex *This, D3DQUERYTYPE Type, IDirect3DQuery9 **ppQuery); - /* IDirect3DDevice9Ex */ - HRESULT (WINAPI *SetConvolutionMonoKernel)(IDirect3DDevice9Ex *This, UINT width, UINT height, float *rows, float *columns); - HRESULT (WINAPI *ComposeRects)(IDirect3DDevice9Ex *This, IDirect3DSurface9 *pSrc, IDirect3DSurface9 *pDst, IDirect3DVertexBuffer9 *pSrcRectDescs, UINT NumRects, IDirect3DVertexBuffer9 *pDstRectDescs, D3DCOMPOSERECTSOP Operation, int Xoffset, int Yoffset); - HRESULT (WINAPI *PresentEx)(IDirect3DDevice9Ex *This, const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion, DWORD dwFlags); - HRESULT (WINAPI *GetGPUThreadPriority)(IDirect3DDevice9Ex *This, INT *pPriority); - HRESULT (WINAPI *SetGPUThreadPriority)(IDirect3DDevice9Ex *This, INT Priority); - HRESULT (WINAPI *WaitForVBlank)(IDirect3DDevice9Ex *This, UINT iSwapChain); - HRESULT (WINAPI *CheckResourceResidency)(IDirect3DDevice9Ex *This, IDirect3DResource9 **pResourceArray, UINT32 NumResources); - HRESULT (WINAPI *SetMaximumFrameLatency)(IDirect3DDevice9Ex *This, UINT MaxLatency); - HRESULT (WINAPI *GetMaximumFrameLatency)(IDirect3DDevice9Ex *This, UINT *pMaxLatency); - HRESULT (WINAPI *CheckDeviceState)(IDirect3DDevice9Ex *This, HWND hDestinationWindow); - HRESULT (WINAPI *CreateRenderTargetEx)(IDirect3DDevice9Ex *This, UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Lockable, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle, DWORD Usage); - HRESULT (WINAPI *CreateOffscreenPlainSurfaceEx)(IDirect3DDevice9Ex *This, UINT Width, UINT Height, D3DFORMAT Format, D3DPOOL Pool, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle, DWORD Usage); - HRESULT (WINAPI *CreateDepthStencilSurfaceEx)(IDirect3DDevice9Ex *This, UINT Width, UINT Height, D3DFORMAT Format, D3DMULTISAMPLE_TYPE MultiSample, DWORD MultisampleQuality, BOOL Discard, IDirect3DSurface9 **ppSurface, HANDLE *pSharedHandle, DWORD Usage); - HRESULT (WINAPI *ResetEx)(IDirect3DDevice9Ex *This, D3DPRESENT_PARAMETERS *pPresentationParameters, D3DDISPLAYMODEEX *pFullscreenDisplayMode); - HRESULT (WINAPI *GetDisplayModeEx)(IDirect3DDevice9Ex *This, UINT iSwapChain, D3DDISPLAYMODEEX *pMode, D3DDISPLAYROTATION *pRotation); -} IDirect3DDevice9ExVtbl; -struct IDirect3DDevice9Ex -{ - IDirect3DDevice9ExVtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DDevice9Ex_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DDevice9Ex_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DDevice9Ex_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DDevice9 macros */ -#define IDirect3DDevice9Ex_TestCooperativeLevel(p) (p)->lpVtbl->TestCooperativeLevel(p) -#define IDirect3DDevice9Ex_GetAvailableTextureMem(p) (p)->lpVtbl->GetAvailableTextureMem(p) -#define IDirect3DDevice9Ex_EvictManagedResources(p) (p)->lpVtbl->EvictManagedResources(p) -#define IDirect3DDevice9Ex_GetDirect3D(p,a) (p)->lpVtbl->GetDirect3D(p,a) -#define IDirect3DDevice9Ex_GetDeviceCaps(p,a) (p)->lpVtbl->GetDeviceCaps(p,a) -#define IDirect3DDevice9Ex_GetDisplayMode(p,a,b) (p)->lpVtbl->GetDisplayMode(p,a,b) -#define IDirect3DDevice9Ex_GetCreationParameters(p,a) (p)->lpVtbl->GetCreationParameters(p,a) -#define IDirect3DDevice9Ex_SetCursorProperties(p,a,b,c) (p)->lpVtbl->SetCursorProperties(p,a,b,c) -#define IDirect3DDevice9Ex_SetCursorPosition(p,a,b,c) (p)->lpVtbl->SetCursorPosition(p,a,b,c) -#define IDirect3DDevice9Ex_ShowCursor(p,a) (p)->lpVtbl->ShowCursor(p,a) -#define IDirect3DDevice9Ex_CreateAdditionalSwapChain(p,a,b) (p)->lpVtbl->CreateAdditionalSwapChain(p,a,b) -#define IDirect3DDevice9Ex_GetSwapChain(p,a,b) (p)->lpVtbl->GetSwapChain(p,a,b) -#define IDirect3DDevice9Ex_GetNumberOfSwapChains(p) (p)->lpVtbl->GetNumberOfSwapChains(p) -#define IDirect3DDevice9Ex_Reset(p,a) (p)->lpVtbl->Reset(p,a) -#define IDirect3DDevice9Ex_Present(p,a,b,c,d) (p)->lpVtbl->Present(p,a,b,c,d) -#define IDirect3DDevice9Ex_GetBackBuffer(p,a,b,c,d) (p)->lpVtbl->GetBackBuffer(p,a,b,c,d) -#define IDirect3DDevice9Ex_GetRasterStatus(p,a,b) (p)->lpVtbl->GetRasterStatus(p,a,b) -#define IDirect3DDevice9Ex_SetDialogBoxMode(p,a) (p)->lpVtbl->SetDialogBoxMode(p,a) -#define IDirect3DDevice9Ex_SetGammaRamp(p,a,b,c) (p)->lpVtbl->SetGammaRamp(p,a,b,c) -#define IDirect3DDevice9Ex_GetGammaRamp(p,a,b) (p)->lpVtbl->GetGammaRamp(p,a,b) -#define IDirect3DDevice9Ex_CreateTexture(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->CreateTexture(p,a,b,c,d,e,f,g,h) -#define IDirect3DDevice9Ex_CreateVolumeTexture(p,a,b,c,d,e,f,g,h,i) (p)->lpVtbl->CreateVolumeTexture(p,a,b,c,d,e,f,g,h,i) -#define IDirect3DDevice9Ex_CreateCubeTexture(p,a,b,c,d,e,f,g) (p)->lpVtbl->CreateCubeTexture(p,a,b,c,d,e,f,g) -#define IDirect3DDevice9Ex_CreateVertexBuffer(p,a,b,c,d,e,f) (p)->lpVtbl->CreateVertexBuffer(p,a,b,c,d,e,f) -#define IDirect3DDevice9Ex_CreateIndexBuffer(p,a,b,c,d,e,f) (p)->lpVtbl->CreateIndexBuffer(p,a,b,c,d,e,f) -#define IDirect3DDevice9Ex_CreateRenderTarget(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->CreateRenderTarget(p,a,b,c,d,e,f,g,h) -#define IDirect3DDevice9Ex_CreateDepthStencilSurface(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->CreateDepthStencilSurface(p,a,b,c,d,e,f,g,h) -#define IDirect3DDevice9Ex_UpdateSurface(p,a,b,c,d) (p)->lpVtbl->UpdateSurface(p,a,b,c,d) -#define IDirect3DDevice9Ex_UpdateTexture(p,a,b) (p)->lpVtbl->UpdateTexture(p,a,b) -#define IDirect3DDevice9Ex_GetRenderTargetData(p,a,b) (p)->lpVtbl->GetRenderTargetData(p,a,b) -#define IDirect3DDevice9Ex_GetFrontBufferData(p,a,b) (p)->lpVtbl->GetFrontBufferData(p,a,b) -#define IDirect3DDevice9Ex_StretchRect(p,a,b,c,d,e) (p)->lpVtbl->StretchRect(p,a,b,c,d,e) -#define IDirect3DDevice9Ex_ColorFill(p,a,b,c) (p)->lpVtbl->ColorFill(p,a,b,c) -#define IDirect3DDevice9Ex_CreateOffscreenPlainSurface(p,a,b,c,d,e,f) (p)->lpVtbl->CreateOffscreenPlainSurface(p,a,b,c,d,e,f) -#define IDirect3DDevice9Ex_SetRenderTarget(p,a,b) (p)->lpVtbl->SetRenderTarget(p,a,b) -#define IDirect3DDevice9Ex_GetRenderTarget(p,a,b) (p)->lpVtbl->GetRenderTarget(p,a,b) -#define IDirect3DDevice9Ex_SetDepthStencilSurface(p,a) (p)->lpVtbl->SetDepthStencilSurface(p,a) -#define IDirect3DDevice9Ex_GetDepthStencilSurface(p,a) (p)->lpVtbl->GetDepthStencilSurface(p,a) -#define IDirect3DDevice9Ex_BeginScene(p) (p)->lpVtbl->BeginScene(p) -#define IDirect3DDevice9Ex_EndScene(p) (p)->lpVtbl->EndScene(p) -#define IDirect3DDevice9Ex_Clear(p,a,b,c,d,e,f) (p)->lpVtbl->Clear(p,a,b,c,d,e,f) -#define IDirect3DDevice9Ex_SetTransform(p,a,b) (p)->lpVtbl->SetTransform(p,a,b) -#define IDirect3DDevice9Ex_GetTransform(p,a,b) (p)->lpVtbl->GetTransform(p,a,b) -#define IDirect3DDevice9Ex_MultiplyTransform(p,a,b) (p)->lpVtbl->MultiplyTransform(p,a,b) -#define IDirect3DDevice9Ex_SetViewport(p,a) (p)->lpVtbl->SetViewport(p,a) -#define IDirect3DDevice9Ex_GetViewport(p,a) (p)->lpVtbl->GetViewport(p,a) -#define IDirect3DDevice9Ex_SetMaterial(p,a) (p)->lpVtbl->SetMaterial(p,a) -#define IDirect3DDevice9Ex_GetMaterial(p,a) (p)->lpVtbl->GetMaterial(p,a) -#define IDirect3DDevice9Ex_SetLight(p,a,b) (p)->lpVtbl->SetLight(p,a,b) -#define IDirect3DDevice9Ex_GetLight(p,a,b) (p)->lpVtbl->GetLight(p,a,b) -#define IDirect3DDevice9Ex_LightEnable(p,a,b) (p)->lpVtbl->LightEnable(p,a,b) -#define IDirect3DDevice9Ex_GetLightEnable(p,a,b) (p)->lpVtbl->GetLightEnable(p,a,b) -#define IDirect3DDevice9Ex_SetClipPlane(p,a,b) (p)->lpVtbl->SetClipPlane(p,a,b) -#define IDirect3DDevice9Ex_GetClipPlane(p,a,b) (p)->lpVtbl->GetClipPlane(p,a,b) -#define IDirect3DDevice9Ex_SetRenderState(p,a,b) (p)->lpVtbl->SetRenderState(p,a,b) -#define IDirect3DDevice9Ex_GetRenderState(p,a,b) (p)->lpVtbl->GetRenderState(p,a,b) -#define IDirect3DDevice9Ex_CreateStateBlock(p,a,b) (p)->lpVtbl->CreateStateBlock(p,a,b) -#define IDirect3DDevice9Ex_BeginStateBlock(p) (p)->lpVtbl->BeginStateBlock(p) -#define IDirect3DDevice9Ex_EndStateBlock(p,a) (p)->lpVtbl->EndStateBlock(p,a) -#define IDirect3DDevice9Ex_SetClipStatus(p,a) (p)->lpVtbl->SetClipStatus(p,a) -#define IDirect3DDevice9Ex_GetClipStatus(p,a) (p)->lpVtbl->GetClipStatus(p,a) -#define IDirect3DDevice9Ex_GetTexture(p,a,b) (p)->lpVtbl->GetTexture(p,a,b) -#define IDirect3DDevice9Ex_SetTexture(p,a,b) (p)->lpVtbl->SetTexture(p,a,b) -#define IDirect3DDevice9Ex_GetTextureStageState(p,a,b,c) (p)->lpVtbl->GetTextureStageState(p,a,b,c) -#define IDirect3DDevice9Ex_SetTextureStageState(p,a,b,c) (p)->lpVtbl->SetTextureStageState(p,a,b,c) -#define IDirect3DDevice9Ex_GetSamplerState(p,a,b,c) (p)->lpVtbl->GetSamplerState(p,a,b,c) -#define IDirect3DDevice9Ex_SetSamplerState(p,a,b,c) (p)->lpVtbl->SetSamplerState(p,a,b,c) -#define IDirect3DDevice9Ex_ValidateDevice(p,a) (p)->lpVtbl->ValidateDevice(p,a) -#define IDirect3DDevice9Ex_SetPaletteEntries(p,a,b) (p)->lpVtbl->SetPaletteEntries(p,a,b) -#define IDirect3DDevice9Ex_GetPaletteEntries(p,a,b) (p)->lpVtbl->GetPaletteEntries(p,a,b) -#define IDirect3DDevice9Ex_SetCurrentTexturePalette(p,a) (p)->lpVtbl->SetCurrentTexturePalette(p,a) -#define IDirect3DDevice9Ex_GetCurrentTexturePalette(p,a) (p)->lpVtbl->GetCurrentTexturePalette(p,a) -#define IDirect3DDevice9Ex_SetScissorRect(p,a) (p)->lpVtbl->SetScissorRect(p,a) -#define IDirect3DDevice9Ex_GetScissorRect(p,a) (p)->lpVtbl->GetScissorRect(p,a) -#define IDirect3DDevice9Ex_SetSoftwareVertexProcessing(p,a) (p)->lpVtbl->SetSoftwareVertexProcessing(p,a) -#define IDirect3DDevice9Ex_GetSoftwareVertexProcessing(p) (p)->lpVtbl->GetSoftwareVertexProcessing(p) -#define IDirect3DDevice9Ex_SetNPatchMode(p,a) (p)->lpVtbl->SetNPatchMode(p,a) -#define IDirect3DDevice9Ex_GetNPatchMode(p) (p)->lpVtbl->GetNPatchMode(p) -#define IDirect3DDevice9Ex_DrawPrimitive(p,a,b,c) (p)->lpVtbl->DrawPrimitive(p,a,b,c) -#define IDirect3DDevice9Ex_DrawIndexedPrimitive(p,a,b,c,d,e,f) (p)->lpVtbl->DrawIndexedPrimitive(p,a,b,c,d,e,f) -#define IDirect3DDevice9Ex_DrawPrimitiveUP(p,a,b,c,d) (p)->lpVtbl->DrawPrimitiveUP(p,a,b,c,d) -#define IDirect3DDevice9Ex_DrawIndexedPrimitiveUP(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->DrawIndexedPrimitiveUP(p,a,b,c,d,e,f,g,h) -#define IDirect3DDevice9Ex_ProcessVertices(p,a,b,c,d,e,f) (p)->lpVtbl->ProcessVertices(p,a,b,c,d,e,f) -#define IDirect3DDevice9Ex_CreateVertexDeclaration(p,a,b) (p)->lpVtbl->CreateVertexDeclaration(p,a,b) -#define IDirect3DDevice9Ex_SetVertexDeclaration(p,a) (p)->lpVtbl->SetVertexDeclaration(p,a) -#define IDirect3DDevice9Ex_GetVertexDeclaration(p,a) (p)->lpVtbl->GetVertexDeclaration(p,a) -#define IDirect3DDevice9Ex_SetFVF(p,a) (p)->lpVtbl->SetFVF(p,a) -#define IDirect3DDevice9Ex_GetFVF(p,a) (p)->lpVtbl->GetFVF(p,a) -#define IDirect3DDevice9Ex_CreateVertexShader(p,a,b) (p)->lpVtbl->CreateVertexShader(p,a,b) -#define IDirect3DDevice9Ex_SetVertexShader(p,a) (p)->lpVtbl->SetVertexShader(p,a) -#define IDirect3DDevice9Ex_GetVertexShader(p,a) (p)->lpVtbl->GetVertexShader(p,a) -#define IDirect3DDevice9Ex_SetVertexShaderConstantF(p,a,b,c) (p)->lpVtbl->SetVertexShaderConstantF(p,a,b,c) -#define IDirect3DDevice9Ex_GetVertexShaderConstantF(p,a,b,c) (p)->lpVtbl->GetVertexShaderConstantF(p,a,b,c) -#define IDirect3DDevice9Ex_SetVertexShaderConstantI(p,a,b,c) (p)->lpVtbl->SetVertexShaderConstantI(p,a,b,c) -#define IDirect3DDevice9Ex_GetVertexShaderConstantI(p,a,b,c) (p)->lpVtbl->GetVertexShaderConstantI(p,a,b,c) -#define IDirect3DDevice9Ex_SetVertexShaderConstantB(p,a,b,c) (p)->lpVtbl->SetVertexShaderConstantB(p,a,b,c) -#define IDirect3DDevice9Ex_GetVertexShaderConstantB(p,a,b,c) (p)->lpVtbl->GetVertexShaderConstantB(p,a,b,c) -#define IDirect3DDevice9Ex_SetStreamSource(p,a,b,c,d) (p)->lpVtbl->SetStreamSource(p,a,b,c,d) -#define IDirect3DDevice9Ex_GetStreamSource(p,a,b,c,d) (p)->lpVtbl->GetStreamSource(p,a,b,c,d) -#define IDirect3DDevice9Ex_SetStreamSourceFreq(p,a,b) (p)->lpVtbl->SetStreamSourceFreq(p,a,b) -#define IDirect3DDevice9Ex_GetStreamSourceFreq(p,a,b) (p)->lpVtbl->GetStreamSourceFreq(p,a,b) -#define IDirect3DDevice9Ex_SetIndices(p,a) (p)->lpVtbl->SetIndices(p,a) -#define IDirect3DDevice9Ex_GetIndices(p,a) (p)->lpVtbl->GetIndices(p,a) -#define IDirect3DDevice9Ex_CreatePixelShader(p,a,b) (p)->lpVtbl->CreatePixelShader(p,a,b) -#define IDirect3DDevice9Ex_SetPixelShader(p,a) (p)->lpVtbl->SetPixelShader(p,a) -#define IDirect3DDevice9Ex_GetPixelShader(p,a) (p)->lpVtbl->GetPixelShader(p,a) -#define IDirect3DDevice9Ex_SetPixelShaderConstantF(p,a,b,c) (p)->lpVtbl->SetPixelShaderConstantF(p,a,b,c) -#define IDirect3DDevice9Ex_GetPixelShaderConstantF(p,a,b,c) (p)->lpVtbl->GetPixelShaderConstantF(p,a,b,c) -#define IDirect3DDevice9Ex_SetPixelShaderConstantI(p,a,b,c) (p)->lpVtbl->SetPixelShaderConstantI(p,a,b,c) -#define IDirect3DDevice9Ex_GetPixelShaderConstantI(p,a,b,c) (p)->lpVtbl->GetPixelShaderConstantI(p,a,b,c) -#define IDirect3DDevice9Ex_SetPixelShaderConstantB(p,a,b,c) (p)->lpVtbl->SetPixelShaderConstantB(p,a,b,c) -#define IDirect3DDevice9Ex_GetPixelShaderConstantB(p,a,b,c) (p)->lpVtbl->GetPixelShaderConstantB(p,a,b,c) -#define IDirect3DDevice9Ex_DrawRectPatch(p,a,b,c) (p)->lpVtbl->DrawRectPatch(p,a,b,c) -#define IDirect3DDevice9Ex_DrawTriPatch(p,a,b,c) (p)->lpVtbl->DrawTriPatch(p,a,b,c) -#define IDirect3DDevice9Ex_DeletePatch(p,a) (p)->lpVtbl->DeletePatch(p,a) -#define IDirect3DDevice9Ex_CreateQuery(p,a,b) (p)->lpVtbl->CreateQuery(p,a,b) -/* IDirect3DDevice9Ex macros */ -#define IDirect3DDevice9Ex_SetConvolutionMonoKernel(p,a,b,c,d) (p)->lpVtbl->SetConvolutionMonoKernel(p,a,b,c,d) -#define IDirect3DDevice9Ex_ComposeRects(p,a,b,c,d,e,f,g,h) (p)->lpVtbl->ComposeRects(p,a,b,c,d,e,f,g,h) -#define IDirect3DDevice9Ex_PresentEx(p,a,b,c,d,e) (p)->lpVtbl->PresentEx(p,a,b,c,d,e) -#define IDirect3DDevice9Ex_GetGPUThreadPriority(p,a) (p)->lpVtbl->GetGPUThreadPriority(p,a) -#define IDirect3DDevice9Ex_SetGPUThreadPriority(p,a) (p)->lpVtbl->SetGPUThreadPriority(p,a) -#define IDirect3DDevice9Ex_WaitForVBlank(p,a) (p)->lpVtbl->WaitForVBlank(p,a) -#define IDirect3DDevice9Ex_CheckResourceResidency(p,a,b) (p)->lpVtbl->CheckResourceResidency(p,a,b) -#define IDirect3DDevice9Ex_SetMaximumFrameLatency(p,a) (p)->lpVtbl->SetMaximumFrameLatency(p,a) -#define IDirect3DDevice9Ex_GetMaximumFrameLatency(p,a) (p)->lpVtbl->GetMaximumFrameLatency(p,a) -#define IDirect3DDevice9Ex_CheckDeviceState(p,a) (p)->lpVtbl->CheckDeviceState(p,a) -#define IDirect3DDevice9Ex_CreateRenderTargetEx(p,a,b,c,d,e,f,g,h,i) (p)->lpVtbl->CreateRenderTargetEx(p,a,b,c,d,e,f,g,h,i) -#define IDirect3DDevice9Ex_CreateOffscreenPlainSurfaceEx(p,a,b,c,d,e,f,g) (p)->lpVtbl->CreateOffscreenPlainSurfaceEx(p,a,b,c,d,e,f,g) -#define IDirect3DDevice9Ex_CreateDepthStencilSurfaceEx(p,a,b,c,d,e,f,g,h,i) (p)->lpVtbl->CreateDepthStencilSurfaceEx(p,a,b,c,d,e,f,g,h,i) -#define IDirect3DDevice9Ex_ResetEx(p,a,b) (p)->lpVtbl->ResetEx(p,a,b) -#define IDirect3DDevice9Ex_GetDisplayModeEx(p,a,b,c) (p)->lpVtbl->GetDisplayModeEx(p,a,b,c) - -typedef struct IDirect3DDevice9VideoVtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DDevice9Video *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DDevice9Video *This); - ULONG (WINAPI *Release)(IDirect3DDevice9Video *This); - /* IDirect3DDevice9Video */ - HRESULT (WINAPI *GetContentProtectionCaps)(IDirect3DDevice9Video *This, const GUID *pCryptoType, const GUID *pDecodeProfile, D3DCONTENTPROTECTIONCAPS *pCaps); - HRESULT (WINAPI *CreateAuthenticatedChannel)(IDirect3DDevice9Video *This, D3DAUTHENTICATEDCHANNELTYPE ChannelType, IDirect3DAuthenticatedChannel9 **ppAuthenticatedChannel, HANDLE *pChannelHandle); - HRESULT (WINAPI *CreateCryptoSession)(IDirect3DDevice9Video *This, const GUID *pCryptoType, const GUID *pDecodeProfile, IDirect3DCryptoSession9 **ppCryptoSession, HANDLE *pCryptoHandle); -} IDirect3DDevice9VideoVtbl; -struct IDirect3DDevice9Video -{ - IDirect3DDevice9VideoVtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DDevice9Video_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DDevice9Video_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DDevice9Video_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DDevice9Video macros */ -#define IDirect3DDevice9Video_GetContentProtectionCaps(p,a,b,c) (p)->lpVtbl->GetContentProtectionCaps(p,a,b,c) -#define IDirect3DDevice9Video_CreateAuthenticatedChannel(p,a,b,c) (p)->lpVtbl->CreateAuthenticatedChannel(p,a,b,c) -#define IDirect3DDevice9Video_CreateCryptoSession(p,a,b,c,d) (p)->lpVtbl->CreateCryptoSession(p,a,b,c,d) - -typedef struct IDirect3DIndexBuffer9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DIndexBuffer9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DIndexBuffer9 *This); - ULONG (WINAPI *Release)(IDirect3DIndexBuffer9 *This); - /* IDirect3DResource9 */ - HRESULT (WINAPI *GetDevice)(IDirect3DIndexBuffer9 *This, IDirect3DDevice9 **ppDevice); - HRESULT (WINAPI *SetPrivateData)(IDirect3DIndexBuffer9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); - HRESULT (WINAPI *GetPrivateData)(IDirect3DIndexBuffer9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); - HRESULT (WINAPI *FreePrivateData)(IDirect3DIndexBuffer9 *This, REFGUID refguid); - DWORD (WINAPI *SetPriority)(IDirect3DIndexBuffer9 *This, DWORD PriorityNew); - DWORD (WINAPI *GetPriority)(IDirect3DIndexBuffer9 *This); - void (WINAPI *PreLoad)(IDirect3DIndexBuffer9 *This); - D3DRESOURCETYPE (WINAPI *GetType)(IDirect3DIndexBuffer9 *This); - /* IDirect3DIndexBuffer9 */ - HRESULT (WINAPI *Lock)(IDirect3DIndexBuffer9 *This, UINT OffsetToLock, UINT SizeToLock, void **ppbData, DWORD Flags); - HRESULT (WINAPI *Unlock)(IDirect3DIndexBuffer9 *This); - HRESULT (WINAPI *GetDesc)(IDirect3DIndexBuffer9 *This, D3DINDEXBUFFER_DESC *pDesc); -} IDirect3DIndexBuffer9Vtbl; -struct IDirect3DIndexBuffer9 -{ - IDirect3DIndexBuffer9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DIndexBuffer9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DIndexBuffer9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DIndexBuffer9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DResource9 macros */ -#define IDirect3DIndexBuffer9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) -#define IDirect3DIndexBuffer9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) -#define IDirect3DIndexBuffer9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) -#define IDirect3DIndexBuffer9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) -#define IDirect3DIndexBuffer9_SetPriority(p,a) (p)->lpVtbl->SetPriority(p,a) -#define IDirect3DIndexBuffer9_GetPriority(p) (p)->lpVtbl->GetPriority(p) -#define IDirect3DIndexBuffer9_PreLoad(p) (p)->lpVtbl->PreLoad(p) -#define IDirect3DIndexBuffer9_GetType(p) (p)->lpVtbl->GetType(p) -/* IDirect3DIndexBuffer9 macros */ -#define IDirect3DIndexBuffer9_Lock(p,a,b,c,d) (p)->lpVtbl->Lock(p,a,b,c,d) -#define IDirect3DIndexBuffer9_Unlock(p) (p)->lpVtbl->Unlock(p) -#define IDirect3DIndexBuffer9_GetDesc(p,a) (p)->lpVtbl->GetDesc(p,a) - -typedef struct IDirect3DPixelShader9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DPixelShader9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DPixelShader9 *This); - ULONG (WINAPI *Release)(IDirect3DPixelShader9 *This); - /* IDirect3DPixelShader9 */ - HRESULT (WINAPI *GetDevice)(IDirect3DPixelShader9 *This, IDirect3DDevice9 **ppDevice); - HRESULT (WINAPI *GetFunction)(IDirect3DPixelShader9 *This, void *pData, UINT *pSizeOfData); -} IDirect3DPixelShader9Vtbl; -struct IDirect3DPixelShader9 -{ - IDirect3DPixelShader9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DPixelShader9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DPixelShader9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DPixelShader9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DPixelShader9 macros */ -#define IDirect3DPixelShader9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) -#define IDirect3DPixelShader9_GetFunction(p,a,b) (p)->lpVtbl->GetFunction(p,a,b) - -typedef struct IDirect3DQuery9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DQuery9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DQuery9 *This); - ULONG (WINAPI *Release)(IDirect3DQuery9 *This); - /* IDirect3DQuery9 */ - HRESULT (WINAPI *GetDevice)(IDirect3DQuery9 *This, IDirect3DDevice9 **ppDevice); - D3DQUERYTYPE (WINAPI *GetType)(IDirect3DQuery9 *This); - DWORD (WINAPI *GetDataSize)(IDirect3DQuery9 *This); - HRESULT (WINAPI *Issue)(IDirect3DQuery9 *This, DWORD dwIssueFlags); - HRESULT (WINAPI *GetData)(IDirect3DQuery9 *This, void *pData, DWORD dwSize, DWORD dwGetDataFlags); -} IDirect3DQuery9Vtbl; -struct IDirect3DQuery9 -{ - IDirect3DQuery9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DQuery9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DQuery9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DQuery9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DQuery9 macros */ -#define IDirect3DQuery9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) -#define IDirect3DQuery9_GetType(p) (p)->lpVtbl->GetType(p) -#define IDirect3DQuery9_GetDataSize(p) (p)->lpVtbl->GetDataSize(p) -#define IDirect3DQuery9_Issue(p,a) (p)->lpVtbl->Issue(p,a) -#define IDirect3DQuery9_GetData(p,a,b,c) (p)->lpVtbl->GetData(p,a,b,c) - -typedef struct IDirect3DResource9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DResource9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DResource9 *This); - ULONG (WINAPI *Release)(IDirect3DResource9 *This); - /* IDirect3DResource9 */ - HRESULT (WINAPI *GetDevice)(IDirect3DResource9 *This, IDirect3DDevice9 **ppDevice); - HRESULT (WINAPI *SetPrivateData)(IDirect3DResource9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); - HRESULT (WINAPI *GetPrivateData)(IDirect3DResource9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); - HRESULT (WINAPI *FreePrivateData)(IDirect3DResource9 *This, REFGUID refguid); - DWORD (WINAPI *SetPriority)(IDirect3DResource9 *This, DWORD PriorityNew); - DWORD (WINAPI *GetPriority)(IDirect3DResource9 *This); - void (WINAPI *PreLoad)(IDirect3DResource9 *This); - D3DRESOURCETYPE (WINAPI *GetType)(IDirect3DResource9 *This); -} IDirect3DResource9Vtbl; -struct IDirect3DResource9 -{ - IDirect3DResource9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DResource9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DResource9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DResource9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DResource9 macros */ -#define IDirect3DResource9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) -#define IDirect3DResource9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) -#define IDirect3DResource9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) -#define IDirect3DResource9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) -#define IDirect3DResource9_SetPriority(p,a) (p)->lpVtbl->SetPriority(p,a) -#define IDirect3DResource9_GetPriority(p) (p)->lpVtbl->GetPriority(p) -#define IDirect3DResource9_PreLoad(p) (p)->lpVtbl->PreLoad(p) -#define IDirect3DResource9_GetType(p) (p)->lpVtbl->GetType(p) - -typedef struct IDirect3DStateBlock9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DStateBlock9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DStateBlock9 *This); - ULONG (WINAPI *Release)(IDirect3DStateBlock9 *This); - /* IDirect3DStateBlock9 */ - HRESULT (WINAPI *GetDevice)(IDirect3DStateBlock9 *This, IDirect3DDevice9 **ppDevice); - HRESULT (WINAPI *Capture)(IDirect3DStateBlock9 *This); - HRESULT (WINAPI *Apply)(IDirect3DStateBlock9 *This); -} IDirect3DStateBlock9Vtbl; -struct IDirect3DStateBlock9 -{ - IDirect3DStateBlock9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DStateBlock9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DStateBlock9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DStateBlock9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DStateBlock9 macros */ -#define IDirect3DStateBlock9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) -#define IDirect3DStateBlock9_Capture(p) (p)->lpVtbl->Capture(p) -#define IDirect3DStateBlock9_Apply(p) (p)->lpVtbl->Apply(p) - -typedef struct IDirect3DSurface9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DSurface9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DSurface9 *This); - ULONG (WINAPI *Release)(IDirect3DSurface9 *This); - /* IDirect3DResource9 */ - HRESULT (WINAPI *GetDevice)(IDirect3DSurface9 *This, IDirect3DDevice9 **ppDevice); - HRESULT (WINAPI *SetPrivateData)(IDirect3DSurface9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); - HRESULT (WINAPI *GetPrivateData)(IDirect3DSurface9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); - HRESULT (WINAPI *FreePrivateData)(IDirect3DSurface9 *This, REFGUID refguid); - DWORD (WINAPI *SetPriority)(IDirect3DSurface9 *This, DWORD PriorityNew); - DWORD (WINAPI *GetPriority)(IDirect3DSurface9 *This); - void (WINAPI *PreLoad)(IDirect3DSurface9 *This); - D3DRESOURCETYPE (WINAPI *GetType)(IDirect3DSurface9 *This); - /* IDirect3DSurface9 */ - HRESULT (WINAPI *GetContainer)(IDirect3DSurface9 *This, REFIID riid, void **ppContainer); - HRESULT (WINAPI *GetDesc)(IDirect3DSurface9 *This, D3DSURFACE_DESC *pDesc); - HRESULT (WINAPI *LockRect)(IDirect3DSurface9 *This, D3DLOCKED_RECT *pLockedRect, const RECT *pRect, DWORD Flags); - HRESULT (WINAPI *UnlockRect)(IDirect3DSurface9 *This); - HRESULT (WINAPI *GetDC)(IDirect3DSurface9 *This, HDC *phdc); - HRESULT (WINAPI *ReleaseDC)(IDirect3DSurface9 *This, HDC hdc); -} IDirect3DSurface9Vtbl; -struct IDirect3DSurface9 -{ - IDirect3DSurface9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DSurface9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DSurface9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DSurface9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DResource9 macros */ -#define IDirect3DSurface9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) -#define IDirect3DSurface9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) -#define IDirect3DSurface9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) -#define IDirect3DSurface9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) -#define IDirect3DSurface9_SetPriority(p,a) (p)->lpVtbl->SetPriority(p,a) -#define IDirect3DSurface9_GetPriority(p) (p)->lpVtbl->GetPriority(p) -#define IDirect3DSurface9_PreLoad(p) (p)->lpVtbl->PreLoad(p) -#define IDirect3DSurface9_GetType(p) (p)->lpVtbl->GetType(p) -/* IDirect3DSurface9 macros */ -#define IDirect3DSurface9_GetContainer(p,a,b) (p)->lpVtbl->GetContainer(p,a,b) -#define IDirect3DSurface9_GetDesc(p,a) (p)->lpVtbl->GetDesc(p,a) -#define IDirect3DSurface9_LockRect(p,a,b,c) (p)->lpVtbl->LockRect(p,a,b,c) -#define IDirect3DSurface9_UnlockRect(p) (p)->lpVtbl->UnlockRect(p) -#define IDirect3DSurface9_GetDC(p,a) (p)->lpVtbl->GetDC(p,a) -#define IDirect3DSurface9_ReleaseDC(p,a) (p)->lpVtbl->ReleaseDC(p,a) - -typedef struct IDirect3DSwapChain9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DSwapChain9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DSwapChain9 *This); - ULONG (WINAPI *Release)(IDirect3DSwapChain9 *This); - /* IDirect3DSwapChain9 */ - HRESULT (WINAPI *Present)(IDirect3DSwapChain9 *This, const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion, DWORD dwFlags); - HRESULT (WINAPI *GetFrontBufferData)(IDirect3DSwapChain9 *This, IDirect3DSurface9 *pDestSurface); - HRESULT (WINAPI *GetBackBuffer)(IDirect3DSwapChain9 *This, UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, IDirect3DSurface9 **ppBackBuffer); - HRESULT (WINAPI *GetRasterStatus)(IDirect3DSwapChain9 *This, D3DRASTER_STATUS *pRasterStatus); - HRESULT (WINAPI *GetDisplayMode)(IDirect3DSwapChain9 *This, D3DDISPLAYMODE *pMode); - HRESULT (WINAPI *GetDevice)(IDirect3DSwapChain9 *This, IDirect3DDevice9 **ppDevice); - HRESULT (WINAPI *GetPresentParameters)(IDirect3DSwapChain9 *This, D3DPRESENT_PARAMETERS *pPresentationParameters); -} IDirect3DSwapChain9Vtbl; -struct IDirect3DSwapChain9 -{ - IDirect3DSwapChain9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DSwapChain9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DSwapChain9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DSwapChain9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DSwapChain9 macros */ -#define IDirect3DSwapChain9_Present(p,a,b,c,d,e) (p)->lpVtbl->Present(p,a,b,c,d,e) -#define IDirect3DSwapChain9_GetFrontBufferData(p,a) (p)->lpVtbl->GetFrontBufferData(p,a) -#define IDirect3DSwapChain9_GetBackBuffer(p,a,b,c) (p)->lpVtbl->GetBackBuffer(p,a,b,c) -#define IDirect3DSwapChain9_GetRasterStatus(p,a) (p)->lpVtbl->GetRasterStatus(p,a) -#define IDirect3DSwapChain9_GetDisplayMode(p,a) (p)->lpVtbl->GetDisplayMode(p,a) -#define IDirect3DSwapChain9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) -#define IDirect3DSwapChain9_GetPresentParameters(p,a) (p)->lpVtbl->GetPresentParameters(p,a) - -typedef struct IDirect3DSwapChain9ExVtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DSwapChain9Ex *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DSwapChain9Ex *This); - ULONG (WINAPI *Release)(IDirect3DSwapChain9Ex *This); - /* IDirect3DSwapChain9 */ - HRESULT (WINAPI *Present)(IDirect3DSwapChain9Ex *This, const RECT *pSourceRect, const RECT *pDestRect, HWND hDestWindowOverride, const RGNDATA *pDirtyRegion, DWORD dwFlags); - HRESULT (WINAPI *GetFrontBufferData)(IDirect3DSwapChain9Ex *This, IDirect3DSurface9 *pDestSurface); - HRESULT (WINAPI *GetBackBuffer)(IDirect3DSwapChain9Ex *This, UINT iBackBuffer, D3DBACKBUFFER_TYPE Type, IDirect3DSurface9 **ppBackBuffer); - HRESULT (WINAPI *GetRasterStatus)(IDirect3DSwapChain9Ex *This, D3DRASTER_STATUS *pRasterStatus); - HRESULT (WINAPI *GetDisplayMode)(IDirect3DSwapChain9Ex *This, D3DDISPLAYMODE *pMode); - HRESULT (WINAPI *GetDevice)(IDirect3DSwapChain9Ex *This, IDirect3DDevice9 **ppDevice); - HRESULT (WINAPI *GetPresentParameters)(IDirect3DSwapChain9Ex *This, D3DPRESENT_PARAMETERS *pPresentationParameters); - /* IDirect3DSwapChain9Ex */ - HRESULT (WINAPI *GetLastPresentCount)(IDirect3DSwapChain9Ex *This, UINT *pLastPresentCount); - HRESULT (WINAPI *GetPresentStats)(IDirect3DSwapChain9Ex *This, D3DPRESENTSTATS *pPresentationStatistics); - HRESULT (WINAPI *GetDisplayModeEx)(IDirect3DSwapChain9Ex *This, D3DDISPLAYMODEEX *pMode, D3DDISPLAYROTATION *pRotation); -} IDirect3DSwapChain9ExVtbl; -struct IDirect3DSwapChain9Ex -{ - IDirect3DSwapChain9ExVtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DSwapChain9Ex_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DSwapChain9Ex_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DSwapChain9Ex_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DSwapChain9 macros */ -#define IDirect3DSwapChain9Ex_Present(p,a,b,c,d,e) (p)->lpVtbl->Present(p,a,b,c,d,e) -#define IDirect3DSwapChain9Ex_GetFrontBufferData(p,a) (p)->lpVtbl->GetFrontBufferData(p,a) -#define IDirect3DSwapChain9Ex_GetBackBuffer(p,a,b,c) (p)->lpVtbl->GetBackBuffer(p,a,b,c) -#define IDirect3DSwapChain9Ex_GetRasterStatus(p,a) (p)->lpVtbl->GetRasterStatus(p,a) -#define IDirect3DSwapChain9Ex_GetDisplayMode(p,a) (p)->lpVtbl->GetDisplayMode(p,a) -#define IDirect3DSwapChain9Ex_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) -#define IDirect3DSwapChain9Ex_GetPresentParameters(p,a) (p)->lpVtbl->GetPresentParameters(p,a) -/* IDirect3DSwapChain9Ex macros */ -#define IDirect3DSwapChain9Ex_GetLastPresentCount(p,a) (p)->lpVtbl->GetLastPresentCount(p,a) -#define IDirect3DSwapChain9Ex_GetPresentStats(p,a) (p)->lpVtbl->GetPresentStats(p,a) -#define IDirect3DSwapChain9Ex_GetDisplayModeEx(p,a,b) (p)->lpVtbl->GetDisplayModeEx(p,a,b) - -typedef struct IDirect3DTexture9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DTexture9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DTexture9 *This); - ULONG (WINAPI *Release)(IDirect3DTexture9 *This); - /* IDirect3DResource9 */ - HRESULT (WINAPI *GetDevice)(IDirect3DTexture9 *This, IDirect3DDevice9 **ppDevice); - HRESULT (WINAPI *SetPrivateData)(IDirect3DTexture9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); - HRESULT (WINAPI *GetPrivateData)(IDirect3DTexture9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); - HRESULT (WINAPI *FreePrivateData)(IDirect3DTexture9 *This, REFGUID refguid); - DWORD (WINAPI *SetPriority)(IDirect3DTexture9 *This, DWORD PriorityNew); - DWORD (WINAPI *GetPriority)(IDirect3DTexture9 *This); - void (WINAPI *PreLoad)(IDirect3DTexture9 *This); - D3DRESOURCETYPE (WINAPI *GetType)(IDirect3DTexture9 *This); - /* IDirect3DBaseTexture9 */ - DWORD (WINAPI *SetLOD)(IDirect3DTexture9 *This, DWORD LODNew); - DWORD (WINAPI *GetLOD)(IDirect3DTexture9 *This); - DWORD (WINAPI *GetLevelCount)(IDirect3DTexture9 *This); - HRESULT (WINAPI *SetAutoGenFilterType)(IDirect3DTexture9 *This, D3DTEXTUREFILTERTYPE FilterType); - D3DTEXTUREFILTERTYPE (WINAPI *GetAutoGenFilterType)(IDirect3DTexture9 *This); - void (WINAPI *GenerateMipSubLevels)(IDirect3DTexture9 *This); - /* IDirect3DTexture9 */ - HRESULT (WINAPI *GetLevelDesc)(IDirect3DTexture9 *This, UINT Level, D3DSURFACE_DESC *pDesc); - HRESULT (WINAPI *GetSurfaceLevel)(IDirect3DTexture9 *This, UINT Level, IDirect3DSurface9 **ppSurfaceLevel); - HRESULT (WINAPI *LockRect)(IDirect3DTexture9 *This, UINT Level, D3DLOCKED_RECT *pLockedRect, const RECT *pRect, DWORD Flags); - HRESULT (WINAPI *UnlockRect)(IDirect3DTexture9 *This, UINT Level); - HRESULT (WINAPI *AddDirtyRect)(IDirect3DTexture9 *This, const RECT *pDirtyRect); -} IDirect3DTexture9Vtbl; -struct IDirect3DTexture9 -{ - IDirect3DTexture9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DTexture9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DTexture9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DTexture9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DResource9 macros */ -#define IDirect3DTexture9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) -#define IDirect3DTexture9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) -#define IDirect3DTexture9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) -#define IDirect3DTexture9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) -#define IDirect3DTexture9_SetPriority(p,a) (p)->lpVtbl->SetPriority(p,a) -#define IDirect3DTexture9_GetPriority(p) (p)->lpVtbl->GetPriority(p) -#define IDirect3DTexture9_PreLoad(p) (p)->lpVtbl->PreLoad(p) -#define IDirect3DTexture9_GetType(p) (p)->lpVtbl->GetType(p) -/* IDirect3DBaseTexture9 macros */ -#define IDirect3DTexture9_SetLOD(p,a) (p)->lpVtbl->SetLOD(p,a) -#define IDirect3DTexture9_GetLOD(p) (p)->lpVtbl->GetLOD(p) -#define IDirect3DTexture9_GetLevelCount(p) (p)->lpVtbl->GetLevelCount(p) -#define IDirect3DTexture9_SetAutoGenFilterType(p,a) (p)->lpVtbl->SetAutoGenFilterType(p,a) -#define IDirect3DTexture9_GetAutoGenFilterType(p) (p)->lpVtbl->GetAutoGenFilterType(p) -#define IDirect3DTexture9_GenerateMipSubLevels(p) (p)->lpVtbl->GenerateMipSubLevels(p) -/* IDirect3DTexture9 macros */ -#define IDirect3DTexture9_GetLevelDesc(p,a,b) (p)->lpVtbl->GetLevelDesc(p,a,b) -#define IDirect3DTexture9_GetSurfaceLevel(p,a,b) (p)->lpVtbl->GetSurfaceLevel(p,a,b) -#define IDirect3DTexture9_LockRect(p,a,b,c,d) (p)->lpVtbl->LockRect(p,a,b,c,d) -#define IDirect3DTexture9_UnlockRect(p,a) (p)->lpVtbl->UnlockRect(p,a) -#define IDirect3DTexture9_AddDirtyRect(p,a) (p)->lpVtbl->AddDirtyRect(p,a) - -typedef struct IDirect3DVertexBuffer9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DVertexBuffer9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DVertexBuffer9 *This); - ULONG (WINAPI *Release)(IDirect3DVertexBuffer9 *This); - /* IDirect3DResource9 */ - HRESULT (WINAPI *GetDevice)(IDirect3DVertexBuffer9 *This, IDirect3DDevice9 **ppDevice); - HRESULT (WINAPI *SetPrivateData)(IDirect3DVertexBuffer9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); - HRESULT (WINAPI *GetPrivateData)(IDirect3DVertexBuffer9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); - HRESULT (WINAPI *FreePrivateData)(IDirect3DVertexBuffer9 *This, REFGUID refguid); - DWORD (WINAPI *SetPriority)(IDirect3DVertexBuffer9 *This, DWORD PriorityNew); - DWORD (WINAPI *GetPriority)(IDirect3DVertexBuffer9 *This); - void (WINAPI *PreLoad)(IDirect3DVertexBuffer9 *This); - D3DRESOURCETYPE (WINAPI *GetType)(IDirect3DVertexBuffer9 *This); - /* IDirect3DVertexBuffer9 */ - HRESULT (WINAPI *Lock)(IDirect3DVertexBuffer9 *This, UINT OffsetToLock, UINT SizeToLock, void **ppbData, DWORD Flags); - HRESULT (WINAPI *Unlock)(IDirect3DVertexBuffer9 *This); - HRESULT (WINAPI *GetDesc)(IDirect3DVertexBuffer9 *This, D3DVERTEXBUFFER_DESC *pDesc); -} IDirect3DVertexBuffer9Vtbl; -struct IDirect3DVertexBuffer9 -{ - IDirect3DVertexBuffer9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DVertexBuffer9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DVertexBuffer9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DVertexBuffer9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DResource9 macros */ -#define IDirect3DVertexBuffer9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) -#define IDirect3DVertexBuffer9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) -#define IDirect3DVertexBuffer9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) -#define IDirect3DVertexBuffer9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) -#define IDirect3DVertexBuffer9_SetPriority(p,a) (p)->lpVtbl->SetPriority(p,a) -#define IDirect3DVertexBuffer9_GetPriority(p) (p)->lpVtbl->GetPriority(p) -#define IDirect3DVertexBuffer9_PreLoad(p) (p)->lpVtbl->PreLoad(p) -#define IDirect3DVertexBuffer9_GetType(p) (p)->lpVtbl->GetType(p) -/* IDirect3DVertexBuffer9 macros */ -#define IDirect3DVertexBuffer9_Lock(p,a,b,c,d) (p)->lpVtbl->Lock(p,a,b,c,d) -#define IDirect3DVertexBuffer9_Unlock(p) (p)->lpVtbl->Unlock(p) -#define IDirect3DVertexBuffer9_GetDesc(p,a) (p)->lpVtbl->GetDesc(p,a) - -typedef struct IDirect3DVertexDeclaration9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DVertexDeclaration9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DVertexDeclaration9 *This); - ULONG (WINAPI *Release)(IDirect3DVertexDeclaration9 *This); - /* IDirect3DVertexDeclaration9 */ - HRESULT (WINAPI *GetDevice)(IDirect3DVertexDeclaration9 *This, IDirect3DDevice9 **ppDevice); - HRESULT (WINAPI *GetDeclaration)(IDirect3DVertexDeclaration9 *This, D3DVERTEXELEMENT9 *pElement, UINT *pNumElements); -} IDirect3DVertexDeclaration9Vtbl; -struct IDirect3DVertexDeclaration9 -{ - IDirect3DVertexDeclaration9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DVertexDeclaration9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DVertexDeclaration9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DVertexDeclaration9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DVertexDeclaration9 macros */ -#define IDirect3DVertexDeclaration9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) -#define IDirect3DVertexDeclaration9_GetDeclaration(p,a,b) (p)->lpVtbl->GetDeclaration(p,a,b) - -typedef struct IDirect3DVertexShader9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DVertexShader9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DVertexShader9 *This); - ULONG (WINAPI *Release)(IDirect3DVertexShader9 *This); - /* IDirect3DVertexShader9 */ - HRESULT (WINAPI *GetDevice)(IDirect3DVertexShader9 *This, IDirect3DDevice9 **ppDevice); - HRESULT (WINAPI *GetFunction)(IDirect3DVertexShader9 *This, void *pData, UINT *pSizeOfData); -} IDirect3DVertexShader9Vtbl; -struct IDirect3DVertexShader9 -{ - IDirect3DVertexShader9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DVertexShader9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DVertexShader9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DVertexShader9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DVertexShader9 macros */ -#define IDirect3DVertexShader9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) -#define IDirect3DVertexShader9_GetFunction(p,a,b) (p)->lpVtbl->GetFunction(p,a,b) - -typedef struct IDirect3DVolume9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DVolume9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DVolume9 *This); - ULONG (WINAPI *Release)(IDirect3DVolume9 *This); - /* IDirect3DVolume9 */ - HRESULT (WINAPI *GetDevice)(IDirect3DVolume9 *This, IDirect3DDevice9 **ppDevice); - HRESULT (WINAPI *SetPrivateData)(IDirect3DVolume9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); - HRESULT (WINAPI *GetPrivateData)(IDirect3DVolume9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); - HRESULT (WINAPI *FreePrivateData)(IDirect3DVolume9 *This, REFGUID refguid); - HRESULT (WINAPI *GetContainer)(IDirect3DVolume9 *This, REFIID riid, void **ppContainer); - HRESULT (WINAPI *GetDesc)(IDirect3DVolume9 *This, D3DVOLUME_DESC *pDesc); - HRESULT (WINAPI *LockBox)(IDirect3DVolume9 *This, D3DLOCKED_BOX *pLockedVolume, const D3DBOX *pBox, DWORD Flags); - HRESULT (WINAPI *UnlockBox)(IDirect3DVolume9 *This); -} IDirect3DVolume9Vtbl; -struct IDirect3DVolume9 -{ - IDirect3DVolume9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DVolume9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DVolume9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DVolume9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DVolume9 macros */ -#define IDirect3DVolume9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) -#define IDirect3DVolume9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) -#define IDirect3DVolume9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) -#define IDirect3DVolume9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) -#define IDirect3DVolume9_GetContainer(p,a,b) (p)->lpVtbl->GetContainer(p,a,b) -#define IDirect3DVolume9_GetDesc(p,a) (p)->lpVtbl->GetDesc(p,a) -#define IDirect3DVolume9_LockBox(p,a,b,c) (p)->lpVtbl->LockBox(p,a,b,c) -#define IDirect3DVolume9_UnlockBox(p) (p)->lpVtbl->UnlockBox(p) - -typedef struct IDirect3DVolumeTexture9Vtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IDirect3DVolumeTexture9 *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IDirect3DVolumeTexture9 *This); - ULONG (WINAPI *Release)(IDirect3DVolumeTexture9 *This); - /* IDirect3DResource9 */ - HRESULT (WINAPI *GetDevice)(IDirect3DVolumeTexture9 *This, IDirect3DDevice9 **ppDevice); - HRESULT (WINAPI *SetPrivateData)(IDirect3DVolumeTexture9 *This, REFGUID refguid, const void *pData, DWORD SizeOfData, DWORD Flags); - HRESULT (WINAPI *GetPrivateData)(IDirect3DVolumeTexture9 *This, REFGUID refguid, void *pData, DWORD *pSizeOfData); - HRESULT (WINAPI *FreePrivateData)(IDirect3DVolumeTexture9 *This, REFGUID refguid); - DWORD (WINAPI *SetPriority)(IDirect3DVolumeTexture9 *This, DWORD PriorityNew); - DWORD (WINAPI *GetPriority)(IDirect3DVolumeTexture9 *This); - void (WINAPI *PreLoad)(IDirect3DVolumeTexture9 *This); - D3DRESOURCETYPE (WINAPI *GetType)(IDirect3DVolumeTexture9 *This); - /* IDirect3DBaseTexture9 */ - DWORD (WINAPI *SetLOD)(IDirect3DVolumeTexture9 *This, DWORD LODNew); - DWORD (WINAPI *GetLOD)(IDirect3DVolumeTexture9 *This); - DWORD (WINAPI *GetLevelCount)(IDirect3DVolumeTexture9 *This); - HRESULT (WINAPI *SetAutoGenFilterType)(IDirect3DVolumeTexture9 *This, D3DTEXTUREFILTERTYPE FilterType); - D3DTEXTUREFILTERTYPE (WINAPI *GetAutoGenFilterType)(IDirect3DVolumeTexture9 *This); - void (WINAPI *GenerateMipSubLevels)(IDirect3DVolumeTexture9 *This); - /* IDirect3DVolumeTexture9 */ - HRESULT (WINAPI *GetLevelDesc)(IDirect3DVolumeTexture9 *This, UINT Level, D3DVOLUME_DESC *pDesc); - HRESULT (WINAPI *GetVolumeLevel)(IDirect3DVolumeTexture9 *This, UINT Level, IDirect3DVolume9 **ppVolumeLevel); - HRESULT (WINAPI *LockBox)(IDirect3DVolumeTexture9 *This, UINT Level, D3DLOCKED_BOX *pLockedVolume, const D3DBOX *pBox, DWORD Flags); - HRESULT (WINAPI *UnlockBox)(IDirect3DVolumeTexture9 *This, UINT Level); - HRESULT (WINAPI *AddDirtyBox)(IDirect3DVolumeTexture9 *This, const D3DBOX *pDirtyBox); -} IDirect3DVolumeTexture9Vtbl; -struct IDirect3DVolumeTexture9 -{ - IDirect3DVolumeTexture9Vtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IDirect3DVolumeTexture9_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IDirect3DVolumeTexture9_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IDirect3DVolumeTexture9_Release(p) (p)->lpVtbl->Release(p) -/* IDirect3DResource9 macros */ -#define IDirect3DVolumeTexture9_GetDevice(p,a) (p)->lpVtbl->GetDevice(p,a) -#define IDirect3DVolumeTexture9_SetPrivateData(p,a,b,c,d) (p)->lpVtbl->SetPrivateData(p,a,b,c,d) -#define IDirect3DVolumeTexture9_GetPrivateData(p,a,b,c) (p)->lpVtbl->GetPrivateData(p,a,b,c) -#define IDirect3DVolumeTexture9_FreePrivateData(p,a) (p)->lpVtbl->FreePrivateData(p,a) -#define IDirect3DVolumeTexture9_SetPriority(p,a) (p)->lpVtbl->SetPriority(p,a) -#define IDirect3DVolumeTexture9_GetPriority(p) (p)->lpVtbl->GetPriority(p) -#define IDirect3DVolumeTexture9_PreLoad(p) (p)->lpVtbl->PreLoad(p) -#define IDirect3DVolumeTexture9_GetType(p) (p)->lpVtbl->GetType(p) -/* IDirect3DBaseTexture9 macros */ -#define IDirect3DVolumeTexture9_SetLOD(p,a) (p)->lpVtbl->SetLOD(p,a) -#define IDirect3DVolumeTexture9_GetLOD(p) (p)->lpVtbl->GetLOD(p) -#define IDirect3DVolumeTexture9_GetLevelCount(p) (p)->lpVtbl->GetLevelCount(p) -#define IDirect3DVolumeTexture9_SetAutoGenFilterType(p,a) (p)->lpVtbl->SetAutoGenFilterType(p,a) -#define IDirect3DVolumeTexture9_GetAutoGenFilterType(p) (p)->lpVtbl->GetAutoGenFilterType(p) -#define IDirect3DVolumeTexture9_GenerateMipSubLevels(p) (p)->lpVtbl->GenerateMipSubLevels(p) -/* IDirect3DVolumeTexture9 macros */ -#define IDirect3DVolumeTexture9_GetLevelDesc(p,a,b) (p)->lpVtbl->GetLevelDesc(p,a,b) -#define IDirect3DVolumeTexture9_GetVolumeLevel(p,a,b) (p)->lpVtbl->GetVolumeLevel(p,a,b) -#define IDirect3DVolumeTexture9_LockBox(p,a,b,c,d) (p)->lpVtbl->LockBox(p,a,b,c,d) -#define IDirect3DVolumeTexture9_UnlockBox(p,a) (p)->lpVtbl->UnlockBox(p,a) -#define IDirect3DVolumeTexture9_AddDirtyBox(p,a) (p)->lpVtbl->AddDirtyBox(p,a) - -#endif /* __cplusplus */ - -#ifdef _WIN32 - -IDirect3D9 *WINAPI -Direct3DCreate9( UINT SDKVersion ); - -HRESULT WINAPI -Direct3DCreate9Ex( UINT SDKVersion, - IDirect3D9Ex **ppD3D9 ); - -void *WINAPI -Direct3DShaderValidatorCreate9( void ); - -int WINAPI -D3DPERF_BeginEvent( D3DCOLOR color, - LPCWSTR name ); - -int WINAPI -D3DPERF_EndEvent( void ); - -DWORD WINAPI -D3DPERF_GetStatus( void ); - -void WINAPI -D3DPERF_SetOptions( DWORD options ); - -BOOL WINAPI -D3DPERF_QueryRepeatFrame( void ); - -void WINAPI -D3DPERF_SetMarker( D3DCOLOR color, - LPCWSTR name ); - -void WINAPI -D3DPERF_SetRegion( D3DCOLOR color, - LPCWSTR name ); - -void WINAPI -DebugSetMute( void ); - -#endif - -#endif /* _D3D9_H_ */ diff --git a/nine-native/include/D3D9/d3d9caps.h b/nine-native/include/D3D9/d3d9caps.h deleted file mode 100644 index 70f9919c5..000000000 --- a/nine-native/include/D3D9/d3d9caps.h +++ /dev/null @@ -1,390 +0,0 @@ -/* - * Copyright 2011 Joakim Sindholt - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef _D3D9CAPS_H_ -#define _D3D9CAPS_H_ - -#include "d3d9types.h" - -/* Caps flags */ -#define D3DCAPS_OVERLAY 0x00000800 -#define D3DCAPS_READ_SCANLINE 0x00020000 - -#define D3DCAPS2_FULLSCREENGAMMA 0x00020000 -#define D3DCAPS2_CANCALIBRATEGAMMA 0x00100000 -#define D3DCAPS2_RESERVED 0x02000000 -#define D3DCAPS2_CANMANAGERESOURCE 0x10000000 -#define D3DCAPS2_DYNAMICTEXTURES 0x20000000 -#define D3DCAPS2_CANAUTOGENMIPMAP 0x40000000 -#define D3DCAPS2_CANSHARERESOURCE 0x80000000 - -#define D3DCAPS3_ALPHA_FULLSCREEN_FLIP_OR_DISCARD 0x00000020 -#define D3DCAPS3_LINEAR_TO_SRGB_PRESENTATION 0x00000080 -#define D3DCAPS3_COPY_TO_VIDMEM 0x00000100 -#define D3DCAPS3_COPY_TO_SYSTEMMEM 0x00000200 -#define D3DCAPS3_DXVAHD 0x00000400 -#define D3DCAPS3_RESERVED 0x8000001F - -#define D3DPRESENT_INTERVAL_DEFAULT 0x00000000 -#define D3DPRESENT_INTERVAL_ONE 0x00000001 -#define D3DPRESENT_INTERVAL_TWO 0x00000002 -#define D3DPRESENT_INTERVAL_THREE 0x00000004 -#define D3DPRESENT_INTERVAL_FOUR 0x00000008 -#define D3DPRESENT_INTERVAL_IMMEDIATE 0x80000000 - -#define D3DCURSORCAPS_COLOR 0x00000001 -#define D3DCURSORCAPS_LOWRES 0x00000002 - -#define D3DDEVCAPS_EXECUTESYSTEMMEMORY 0x00000010 -#define D3DDEVCAPS_EXECUTEVIDEOMEMORY 0x00000020 -#define D3DDEVCAPS_TLVERTEXSYSTEMMEMORY 0x00000040 -#define D3DDEVCAPS_TLVERTEXVIDEOMEMORY 0x00000080 -#define D3DDEVCAPS_TEXTURESYSTEMMEMORY 0x00000100 -#define D3DDEVCAPS_TEXTUREVIDEOMEMORY 0x00000200 -#define D3DDEVCAPS_DRAWPRIMTLVERTEX 0x00000400 -#define D3DDEVCAPS_CANRENDERAFTERFLIP 0x00000800 -#define D3DDEVCAPS_TEXTURENONLOCALVIDMEM 0x00001000 -#define D3DDEVCAPS_DRAWPRIMITIVES2 0x00002000 -#define D3DDEVCAPS_SEPARATETEXTUREMEMORIES 0x00004000 -#define D3DDEVCAPS_DRAWPRIMITIVES2EX 0x00008000 -#define D3DDEVCAPS_HWTRANSFORMANDLIGHT 0x00010000 -#define D3DDEVCAPS_CANBLTSYSTONONLOCAL 0x00020000 -#define D3DDEVCAPS_HWRASTERIZATION 0x00080000 -#define D3DDEVCAPS_PUREDEVICE 0x00100000 -#define D3DDEVCAPS_QUINTICRTPATCHES 0x00200000 -#define D3DDEVCAPS_RTPATCHES 0x00400000 -#define D3DDEVCAPS_RTPATCHHANDLEZERO 0x00800000 -#define D3DDEVCAPS_NPATCHES 0x01000000 - -#define D3DPMISCCAPS_MASKZ 0x00000002 -#define D3DPMISCCAPS_CULLNONE 0x00000010 -#define D3DPMISCCAPS_CULLCW 0x00000020 -#define D3DPMISCCAPS_CULLCCW 0x00000040 -#define D3DPMISCCAPS_COLORWRITEENABLE 0x00000080 -#define D3DPMISCCAPS_CLIPPLANESCALEDPOINTS 0x00000100 -#define D3DPMISCCAPS_CLIPTLVERTS 0x00000200 -#define D3DPMISCCAPS_TSSARGTEMP 0x00000400 -#define D3DPMISCCAPS_BLENDOP 0x00000800 -#define D3DPMISCCAPS_NULLREFERENCE 0x00001000 -#define D3DPMISCCAPS_INDEPENDENTWRITEMASKS 0x00004000 -#define D3DPMISCCAPS_PERSTAGECONSTANT 0x00008000 -#define D3DPMISCCAPS_FOGANDSPECULARALPHA 0x00010000 -#define D3DPMISCCAPS_SEPARATEALPHABLEND 0x00020000 -#define D3DPMISCCAPS_MRTINDEPENDENTBITDEPTHS 0x00040000 -#define D3DPMISCCAPS_MRTPOSTPIXELSHADERBLENDING 0x00080000 -#define D3DPMISCCAPS_FOGVERTEXCLAMPED 0x00100000 -#define D3DPMISCCAPS_POSTBLENDSRGBCONVERT 0x00200000 - -#define D3DPRASTERCAPS_DITHER 0x00000001 -#define D3DPRASTERCAPS_ZTEST 0x00000010 -#define D3DPRASTERCAPS_FOGVERTEX 0x00000080 -#define D3DPRASTERCAPS_FOGTABLE 0x00000100 -#define D3DPRASTERCAPS_MIPMAPLODBIAS 0x00002000 -#define D3DPRASTERCAPS_ZBUFFERLESSHSR 0x00008000 -#define D3DPRASTERCAPS_FOGRANGE 0x00010000 -#define D3DPRASTERCAPS_ANISOTROPY 0x00020000 -#define D3DPRASTERCAPS_WBUFFER 0x00040000 -#define D3DPRASTERCAPS_WFOG 0x00100000 -#define D3DPRASTERCAPS_ZFOG 0x00200000 -#define D3DPRASTERCAPS_COLORPERSPECTIVE 0x00400000 -#define D3DPRASTERCAPS_SCISSORTEST 0x01000000 -#define D3DPRASTERCAPS_SLOPESCALEDEPTHBIAS 0x02000000 -#define D3DPRASTERCAPS_DEPTHBIAS 0x04000000 -#define D3DPRASTERCAPS_MULTISAMPLE_TOGGLE 0x08000000 - -#define D3DPCMPCAPS_NEVER 0x00000001 -#define D3DPCMPCAPS_LESS 0x00000002 -#define D3DPCMPCAPS_EQUAL 0x00000004 -#define D3DPCMPCAPS_LESSEQUAL 0x00000008 -#define D3DPCMPCAPS_GREATER 0x00000010 -#define D3DPCMPCAPS_NOTEQUAL 0x00000020 -#define D3DPCMPCAPS_GREATEREQUAL 0x00000040 -#define D3DPCMPCAPS_ALWAYS 0x00000080 - -#define D3DPBLENDCAPS_ZERO 0x00000001 -#define D3DPBLENDCAPS_ONE 0x00000002 -#define D3DPBLENDCAPS_SRCCOLOR 0x00000004 -#define D3DPBLENDCAPS_INVSRCCOLOR 0x00000008 -#define D3DPBLENDCAPS_SRCALPHA 0x00000010 -#define D3DPBLENDCAPS_INVSRCALPHA 0x00000020 -#define D3DPBLENDCAPS_DESTALPHA 0x00000040 -#define D3DPBLENDCAPS_INVDESTALPHA 0x00000080 -#define D3DPBLENDCAPS_DESTCOLOR 0x00000100 -#define D3DPBLENDCAPS_INVDESTCOLOR 0x00000200 -#define D3DPBLENDCAPS_SRCALPHASAT 0x00000400 -#define D3DPBLENDCAPS_BOTHSRCALPHA 0x00000800 -#define D3DPBLENDCAPS_BOTHINVSRCALPHA 0x00001000 -#define D3DPBLENDCAPS_BLENDFACTOR 0x00002000 -#ifndef D3D_DISABLE_9EX -# define D3DPBLENDCAPS_SRCCOLOR2 0x00004000 -# define D3DPBLENDCAPS_INVSRCCOLOR2 0x00008000 -#endif - -#define D3DPSHADECAPS_COLORGOURAUDRGB 0x00000008 -#define D3DPSHADECAPS_SPECULARGOURAUDRGB 0x00000200 -#define D3DPSHADECAPS_ALPHAGOURAUDBLEND 0x00004000 -#define D3DPSHADECAPS_FOGGOURAUD 0x00080000 - -#define D3DPTEXTURECAPS_PERSPECTIVE 0x00000001 -#define D3DPTEXTURECAPS_POW2 0x00000002 -#define D3DPTEXTURECAPS_ALPHA 0x00000004 -#define D3DPTEXTURECAPS_SQUAREONLY 0x00000020 -#define D3DPTEXTURECAPS_TEXREPEATNOTSCALEDBYSIZE 0x00000040 -#define D3DPTEXTURECAPS_ALPHAPALETTE 0x00000080 -#define D3DPTEXTURECAPS_NONPOW2CONDITIONAL 0x00000100 -#define D3DPTEXTURECAPS_PROJECTED 0x00000400 -#define D3DPTEXTURECAPS_CUBEMAP 0x00000800 -#define D3DPTEXTURECAPS_VOLUMEMAP 0x00002000 -#define D3DPTEXTURECAPS_MIPMAP 0x00004000 -#define D3DPTEXTURECAPS_MIPVOLUMEMAP 0x00008000 -#define D3DPTEXTURECAPS_MIPCUBEMAP 0x00010000 -#define D3DPTEXTURECAPS_CUBEMAP_POW2 0x00020000 -#define D3DPTEXTURECAPS_VOLUMEMAP_POW2 0x00040000 -#define D3DPTEXTURECAPS_NOPROJECTEDBUMPENV 0x00200000 - -#define D3DPTFILTERCAPS_MINFPOINT 0x00000100 -#define D3DPTFILTERCAPS_MINFLINEAR 0x00000200 -#define D3DPTFILTERCAPS_MINFANISOTROPIC 0x00000400 -#define D3DPTFILTERCAPS_MINFPYRAMIDALQUAD 0x00000800 -#define D3DPTFILTERCAPS_MINFGAUSSIANQUAD 0x00001000 -#define D3DPTFILTERCAPS_MIPFPOINT 0x00010000 -#define D3DPTFILTERCAPS_MIPFLINEAR 0x00020000 -#define D3DPTFILTERCAPS_MAGFPOINT 0x01000000 -#define D3DPTFILTERCAPS_MAGFLINEAR 0x02000000 -#define D3DPTFILTERCAPS_MAGFANISOTROPIC 0x04000000 -#define D3DPTFILTERCAPS_MAGFPYRAMIDALQUAD 0x08000000 -#define D3DPTFILTERCAPS_MAGFGAUSSIANQUAD 0x10000000 - -#define D3DPTADDRESSCAPS_WRAP 0x00000001 -#define D3DPTADDRESSCAPS_MIRROR 0x00000002 -#define D3DPTADDRESSCAPS_CLAMP 0x00000004 -#define D3DPTADDRESSCAPS_BORDER 0x00000008 -#define D3DPTADDRESSCAPS_INDEPENDENTUV 0x00000010 -#define D3DPTADDRESSCAPS_MIRRORONCE 0x00000020 - -#define D3DLINECAPS_TEXTURE 0x00000001 -#define D3DLINECAPS_ZTEST 0x00000002 -#define D3DLINECAPS_BLEND 0x00000004 -#define D3DLINECAPS_ALPHACMP 0x00000008 -#define D3DLINECAPS_FOG 0x00000010 -#define D3DLINECAPS_ANTIALIAS 0x00000020 - -#define D3DSTENCILCAPS_KEEP 0x00000001 -#define D3DSTENCILCAPS_ZERO 0x00000002 -#define D3DSTENCILCAPS_REPLACE 0x00000004 -#define D3DSTENCILCAPS_INCRSAT 0x00000008 -#define D3DSTENCILCAPS_DECRSAT 0x00000010 -#define D3DSTENCILCAPS_INVERT 0x00000020 -#define D3DSTENCILCAPS_INCR 0x00000040 -#define D3DSTENCILCAPS_DECR 0x00000080 -#define D3DSTENCILCAPS_TWOSIDED 0x00000100 - -#define D3DFVFCAPS_TEXCOORDCOUNTMASK 0x0000FFFF -#define D3DFVFCAPS_DONOTSTRIPELEMENTS 0x00080000 -#define D3DFVFCAPS_PSIZE 0x00100000 - -#define D3DTEXOPCAPS_DISABLE 0x00000001 -#define D3DTEXOPCAPS_SELECTARG1 0x00000002 -#define D3DTEXOPCAPS_SELECTARG2 0x00000004 -#define D3DTEXOPCAPS_MODULATE 0x00000008 -#define D3DTEXOPCAPS_MODULATE2X 0x00000010 -#define D3DTEXOPCAPS_MODULATE4X 0x00000020 -#define D3DTEXOPCAPS_ADD 0x00000040 -#define D3DTEXOPCAPS_ADDSIGNED 0x00000080 -#define D3DTEXOPCAPS_ADDSIGNED2X 0x00000100 -#define D3DTEXOPCAPS_SUBTRACT 0x00000200 -#define D3DTEXOPCAPS_ADDSMOOTH 0x00000400 -#define D3DTEXOPCAPS_BLENDDIFFUSEALPHA 0x00000800 -#define D3DTEXOPCAPS_BLENDTEXTUREALPHA 0x00001000 -#define D3DTEXOPCAPS_BLENDFACTORALPHA 0x00002000 -#define D3DTEXOPCAPS_BLENDTEXTUREALPHAPM 0x00004000 -#define D3DTEXOPCAPS_BLENDCURRENTALPHA 0x00008000 -#define D3DTEXOPCAPS_PREMODULATE 0x00010000 -#define D3DTEXOPCAPS_MODULATEALPHA_ADDCOLOR 0x00020000 -#define D3DTEXOPCAPS_MODULATECOLOR_ADDALPHA 0x00040000 -#define D3DTEXOPCAPS_MODULATEINVALPHA_ADDCOLOR 0x00080000 -#define D3DTEXOPCAPS_MODULATEINVCOLOR_ADDALPHA 0x00100000 -#define D3DTEXOPCAPS_BUMPENVMAP 0x00200000 -#define D3DTEXOPCAPS_BUMPENVMAPLUMINANCE 0x00400000 -#define D3DTEXOPCAPS_DOTPRODUCT3 0x00800000 -#define D3DTEXOPCAPS_MULTIPLYADD 0x01000000 -#define D3DTEXOPCAPS_LERP 0x02000000 - -#define D3DVTXPCAPS_TEXGEN 0x00000001 -#define D3DVTXPCAPS_MATERIALSOURCE7 0x00000002 -#define D3DVTXPCAPS_DIRECTIONALLIGHTS 0x00000008 -#define D3DVTXPCAPS_POSITIONALLIGHTS 0x00000010 -#define D3DVTXPCAPS_LOCALVIEWER 0x00000020 -#define D3DVTXPCAPS_TWEENING 0x00000040 -#define D3DVTXPCAPS_TEXGEN_SPHEREMAP 0x00000100 -#define D3DVTXPCAPS_NO_TEXGEN_NONLOCALVIEWER 0x00000200 - -#define D3DDEVCAPS2_STREAMOFFSET 0x00000001 -#define D3DDEVCAPS2_DMAPNPATCH 0x00000002 -#define D3DDEVCAPS2_ADAPTIVETESSRTPATCH 0x00000004 -#define D3DDEVCAPS2_ADAPTIVETESSNPATCH 0x00000008 -#define D3DDEVCAPS2_CAN_STRETCHRECT_FROM_TEXTURES 0x00000010 -#define D3DDEVCAPS2_PRESAMPLEDDMAPNPATCH 0x00000020 -#define D3DDEVCAPS2_VERTEXELEMENTSCANSHARESTREAMOFFSET 0x00000040 - -#define D3DDTCAPS_UBYTE4 0x00000001 -#define D3DDTCAPS_UBYTE4N 0x00000002 -#define D3DDTCAPS_SHORT2N 0x00000004 -#define D3DDTCAPS_SHORT4N 0x00000008 -#define D3DDTCAPS_USHORT2N 0x00000010 -#define D3DDTCAPS_USHORT4N 0x00000020 -#define D3DDTCAPS_UDEC3 0x00000040 -#define D3DDTCAPS_DEC3N 0x00000080 -#define D3DDTCAPS_FLOAT16_2 0x00000100 -#define D3DDTCAPS_FLOAT16_4 0x00000200 - - -#define D3DVS20_MAX_DYNAMICFLOWCONTROLDEPTH 24 -#define D3DVS20_MIN_DYNAMICFLOWCONTROLDEPTH 0 -#define D3DVS20_MAX_NUMTEMPS 32 -#define D3DVS20_MIN_NUMTEMPS 12 -#define D3DVS20_MAX_STATICFLOWCONTROLDEPTH 4 -#define D3DVS20_MIN_STATICFLOWCONTROLDEPTH 1 - -#define D3DVS20CAPS_PREDICATION (1 << 0) - -#define D3DPS20CAPS_ARBITRARYSWIZZLE (1 << 0) -#define D3DPS20CAPS_GRADIENTINSTRUCTIONS (1 << 1) -#define D3DPS20CAPS_PREDICATION (1 << 2) -#define D3DPS20CAPS_NODEPENDENTREADLIMIT (1 << 3) -#define D3DPS20CAPS_NOTEXINSTRUCTIONLIMIT (1 << 4) - -#define D3DPS20_MAX_DYNAMICFLOWCONTROLDEPTH 24 -#define D3DPS20_MIN_DYNAMICFLOWCONTROLDEPTH 0 -#define D3DPS20_MAX_NUMTEMPS 32 -#define D3DPS20_MIN_NUMTEMPS 12 -#define D3DPS20_MAX_STATICFLOWCONTROLDEPTH 4 -#define D3DPS20_MIN_STATICFLOWCONTROLDEPTH 0 -#define D3DPS20_MAX_NUMINSTRUCTIONSLOTS 512 -#define D3DPS20_MIN_NUMINSTRUCTIONSLOTS 96 - -#define D3DMIN30SHADERINSTRUCTIONS 512 -#define D3DMAX30SHADERINSTRUCTIONS 32768 - -/* Structs */ -typedef struct _D3DVSHADERCAPS2_0 { - DWORD Caps; - INT DynamicFlowControlDepth; - INT NumTemps; - INT StaticFlowControlDepth; -} D3DVSHADERCAPS2_0, *PD3DVSHADERCAPS2_0, *LPD3DVSHADERCAPS2_0; - -typedef struct _D3DPSHADERCAPS2_0 { - DWORD Caps; - INT DynamicFlowControlDepth; - INT NumTemps; - INT StaticFlowControlDepth; - INT NumInstructionSlots; -} D3DPSHADERCAPS2_0, *PD3DPSHADERCAPS2_0, *LPD3DPSHADERCAPS2_0; - -typedef struct _D3DCAPS9 { - D3DDEVTYPE DeviceType; - UINT AdapterOrdinal; - DWORD Caps; - DWORD Caps2; - DWORD Caps3; - DWORD PresentationIntervals; - DWORD CursorCaps; - DWORD DevCaps; - DWORD PrimitiveMiscCaps; - DWORD RasterCaps; - DWORD ZCmpCaps; - DWORD SrcBlendCaps; - DWORD DestBlendCaps; - DWORD AlphaCmpCaps; - DWORD ShadeCaps; - DWORD TextureCaps; - DWORD TextureFilterCaps; - DWORD CubeTextureFilterCaps; - DWORD VolumeTextureFilterCaps; - DWORD TextureAddressCaps; - DWORD VolumeTextureAddressCaps; - DWORD LineCaps; - DWORD MaxTextureWidth; - DWORD MaxTextureHeight; - DWORD MaxVolumeExtent; - DWORD MaxTextureRepeat; - DWORD MaxTextureAspectRatio; - DWORD MaxAnisotropy; - float MaxVertexW; - float GuardBandLeft; - float GuardBandTop; - float GuardBandRight; - float GuardBandBottom; - float ExtentsAdjust; - DWORD StencilCaps; - DWORD FVFCaps; - DWORD TextureOpCaps; - DWORD MaxTextureBlendStages; - DWORD MaxSimultaneousTextures; - DWORD VertexProcessingCaps; - DWORD MaxActiveLights; - DWORD MaxUserClipPlanes; - DWORD MaxVertexBlendMatrices; - DWORD MaxVertexBlendMatrixIndex; - float MaxPointSize; - DWORD MaxPrimitiveCount; - DWORD MaxVertexIndex; - DWORD MaxStreams; - DWORD MaxStreamStride; - DWORD VertexShaderVersion; - DWORD MaxVertexShaderConst; - DWORD PixelShaderVersion; - float PixelShader1xMaxValue; - DWORD DevCaps2; - float MaxNpatchTessellationLevel; - DWORD Reserved5; - UINT MasterAdapterOrdinal; - UINT AdapterOrdinalInGroup; - UINT NumberOfAdaptersInGroup; - DWORD DeclTypes; - DWORD NumSimultaneousRTs; - DWORD StretchRectFilterCaps; - D3DVSHADERCAPS2_0 VS20Caps; - D3DPSHADERCAPS2_0 PS20Caps; - DWORD VertexTextureFilterCaps; - DWORD MaxVShaderInstructionsExecuted; - DWORD MaxPShaderInstructionsExecuted; - DWORD MaxVertexShader30InstructionSlots; - DWORD MaxPixelShader30InstructionSlots; -} D3DCAPS9, *PD3DCAPS9, *LPD3DCAPS9; - -typedef struct _D3DCONTENTPROTECTIONCAPS { - DWORD Caps; - GUID KeyExchangeType; - UINT BufferAlignmentStart; - UINT BlockAlignmentSize; - ULONGLONG ProtectedMemorySize; -} D3DCONTENTPROTECTIONCAPS, *PD3DCONTENTPROTECTIONCAPS, *LPD3DCONTENTPROTECTIONCAPS; - -typedef struct _D3DOVERLAYCAPS { - UINT Caps; - UINT MaxOverlayDisplayWidth; - UINT MaxOverlayDisplayHeight; -} D3DOVERLAYCAPS, *PD3DOVERLAYCAPS, *LPD3DOVERLAYCAPS; - -#endif /* _D3D9CAPS_H_ */ diff --git a/nine-native/include/D3D9/d3d9types.h b/nine-native/include/D3D9/d3d9types.h deleted file mode 100644 index 2550c8df2..000000000 --- a/nine-native/include/D3D9/d3d9types.h +++ /dev/null @@ -1,1824 +0,0 @@ -/* - * Copyright 2011 Joakim Sindholt - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef _D3D9TYPES_H_ -#define _D3D9TYPES_H_ - -#ifdef _WIN32 -#include -#else /* _WIN32 */ -#include - -#ifndef NULL -#define NULL 0 -#endif - -/******************************************************** - * Windows types * - ********************************************************/ -/* Function macros */ -#define FAILED(x) ((HRESULT)(x) < 0) -#define SUCCEEDED(x) ((HRESULT)(x) >= 0) - -#define MAKE_HRESULT(sev,fac,code) \ - ( \ - ((HRESULT)(sev) << 31) | \ - ((HRESULT)(fac) << 16) | \ - (HRESULT)(code) \ - ) - -/* Windows errors */ -#define E_OUTOFMEMORY MAKE_HRESULT(1, 0x007, 14) -#define E_INVALIDARG MAKE_HRESULT(1, 0x007, 0x0057) -#define E_NOTIMPL MAKE_HRESULT(1, 0x000, 0x4001) -#define E_NOINTERFACE MAKE_HRESULT(1, 0x000, 0x4002) -#define E_POINTER MAKE_HRESULT(1, 0x000, 0x4003) -#define E_FAIL MAKE_HRESULT(1, 0x000, 0x4005) - -#define S_OK ((HRESULT)0) -#define S_FALSE ((HRESULT)1) - -/* WORD types */ -typedef uint8_t BYTE; -typedef uint16_t WORD; -typedef uint32_t DWORD; - -/* Renamed types */ -typedef int BOOL; -#ifndef FALSE -#define FALSE 0 -#define TRUE (!FALSE) -#endif - -typedef uint32_t UINT32; -typedef uint64_t UINT64; - -typedef unsigned short USHORT; -typedef unsigned int UINT; -typedef unsigned int ULONG; -typedef unsigned long long ULONGLONG; - -typedef short SHORT; -typedef int INT; -typedef int LONG; -typedef long long LONGLONG; -typedef float FLOAT; - -/* Windows types */ -typedef void *HANDLE; -typedef int32_t HRESULT; -typedef HANDLE HWND; -typedef HANDLE HMONITOR; -typedef HANDLE HDC; - -/* Unions */ -typedef union { - struct { - DWORD LowPart; - LONG HighPart; - }; - - struct { - DWORD LowPart; - LONG HighPart; - } u; - - LONGLONG QuadPart; -} LARGE_INTEGER, *LPLARGE_INTEGER; - -/* Structs */ - -typedef struct _GUID { - DWORD Data1; - WORD Data2; - WORD Data3; - BYTE Data4[8]; -} GUID, IID, *LPGUID, *REFGUID, *REFIID; - -typedef struct _LUID { - DWORD LowPart; - LONG HighPart; -} LUID, *LPLUID, *PLUID; - -typedef struct _PALETTEENTRY { - BYTE peRed; - BYTE peGreen; - BYTE peBlue; - BYTE peFlags; -} PALETTEENTRY, *LPPALETTEENTRY; - -typedef struct _POINT { - LONG x; - LONG y; -} POINT, *LPPOINT; - -typedef struct _RECT { - LONG left; - LONG top; - LONG right; - LONG bottom; -} RECT, *LPRECT; - -typedef struct _RGNDATAHEADER { - DWORD dwSize; - DWORD iType; - DWORD nCount; - DWORD nRgnSize; - RECT rcBound; -} RGNDATAHEADER, *LPRGNDATAHEADER; - -typedef struct _RGNDATA { - RGNDATAHEADER rdh; - char Buffer[1]; -} RGNDATA, *LPRGNDATA; -#endif /* _WIN32 */ - -#ifndef MAKEFOURCC -#define MAKEFOURCC(a, b, c, d) \ - ( \ - (DWORD)(BYTE)(a) | \ - ((DWORD)(BYTE)(b) << 8) | \ - ((DWORD)(BYTE)(c) << 16) | \ - ((DWORD)(BYTE)(d) << 24) \ - ) -#endif /* MAKEFOURCC */ - - -#define D3DPRESENTFLAG_LOCKABLE_BACKBUFFER 0x00000001 -#define D3DPRESENTFLAG_DISCARD_DEPTHSTENCIL 0x00000002 -#define D3DPRESENTFLAG_DEVICECLIP 0x00000004 -#define D3DPRESENTFLAG_VIDEO 0x00000010 -#define D3DPRESENTFLAG_NOAUTOROTATE 0x00000020 -#define D3DPRESENTFLAG_UNPRUNEDMODE 0x00000040 -#define D3DPRESENTFLAG_OVERLAY_LIMITEDRGB 0x00000080 -#define D3DPRESENTFLAG_OVERLAY_YCbCr_BT709 0x00000100 -#define D3DPRESENTFLAG_OVERLAY_YCbCr_xvYCC 0x00000200 -#define D3DPRESENTFLAG_RESTRICTED_CONTENT 0x00000400 -#define D3DPRESENTFLAG_RESTRICT_SHARED_RESOURCE_DRIVER 0x00000800 - -/* Windows calling convention */ -#ifndef WINAPI - #if defined(__x86_64__) && !defined(__ILP32__) - #define WINAPI __attribute__((ms_abi)) - #elif defined(__i386__) - #define WINAPI __attribute__((__stdcall__)) - #else /* neither amd64 nor i386 */ - #define WINAPI - #endif -#endif /* WINAPI */ - -/* Implementation caps */ -#define D3DPRESENT_BACK_BUFFERS_MAX 3 -#define D3DPRESENT_BACK_BUFFERS_MAX_EX 30 - -/* Functions */ -#define MAKE_D3DHRESULT(code) MAKE_HRESULT(1, 0x876, code) -#define MAKE_D3DSTATUS(code) MAKE_HRESULT(0, 0x876, code) - -/* SDK version */ -#define D3D_SDK_VERSION 32 - -/* Adapter */ -#define D3DADAPTER_DEFAULT 0 -#define D3DSGR_NO_CALIBRATION 0x00000000 - -/******************************************************** - * Return codes * - ********************************************************/ -#define D3D_OK S_OK -#define D3DOK_NOAUTOGEN MAKE_D3DSTATUS(2159) -#define D3DERR_OUTOFVIDEOMEMORY MAKE_D3DHRESULT(380) -#define D3DERR_WASSTILLDRAWING MAKE_D3DHRESULT(540) -#define D3DERR_WRONGTEXTUREFORMAT MAKE_D3DHRESULT(2072) -#define D3DERR_UNSUPPORTEDCOLOROPERATION MAKE_D3DHRESULT(2073) -#define D3DERR_UNSUPPORTEDCOLORARG MAKE_D3DHRESULT(2074) -#define D3DERR_UNSUPPORTEDALPHAOPERATION MAKE_D3DHRESULT(2075) -#define D3DERR_UNSUPPORTEDALPHAARG MAKE_D3DHRESULT(2076) -#define D3DERR_TOOMANYOPERATIONS MAKE_D3DHRESULT(2077) -#define D3DERR_CONFLICTINGTEXTUREFILTER MAKE_D3DHRESULT(2078) -#define D3DERR_UNSUPPORTEDFACTORVALUE MAKE_D3DHRESULT(2079) -#define D3DERR_CONFLICTINGRENDERSTATE MAKE_D3DHRESULT(2081) -#define D3DERR_UNSUPPORTEDTEXTUREFILTER MAKE_D3DHRESULT(2082) -#define D3DERR_CONFLICTINGTEXTUREPALETTE MAKE_D3DHRESULT(2086) -#define D3DERR_DRIVERINTERNALERROR MAKE_D3DHRESULT(2087) -#define D3DERR_NOTFOUND MAKE_D3DHRESULT(2150) -#define D3DERR_MOREDATA MAKE_D3DHRESULT(2151) -#define D3DERR_DEVICELOST MAKE_D3DHRESULT(2152) -#define D3DERR_DEVICENOTRESET MAKE_D3DHRESULT(2153) -#define D3DERR_NOTAVAILABLE MAKE_D3DHRESULT(2154) -#define D3DERR_INVALIDDEVICE MAKE_D3DHRESULT(2155) -#define D3DERR_INVALIDCALL MAKE_D3DHRESULT(2156) -#define D3DERR_DRIVERINVALIDCALL MAKE_D3DHRESULT(2157) -#define D3DERR_DEVICEREMOVED MAKE_D3DHRESULT(2160) -#define D3DERR_DEVICEHUNG MAKE_D3DHRESULT(2164) -#define S_PRESENT_OCCLUDED MAKE_D3DSTATUS(2168) - -/******************************************************** - * Bitmasks * - *******************************************************/ -/* IDirect3DDevice9::Clear */ -#define D3DCLEAR_TARGET 0x00000001 -#define D3DCLEAR_ZBUFFER 0x00000002 -#define D3DCLEAR_STENCIL 0x00000004 - -/* Usage */ -/* http://msdn.microsoft.com/en-us/library/ee416659(VS.85).aspx */ -#define D3DUSAGE_RENDERTARGET 0x00000001 -#define D3DUSAGE_DEPTHSTENCIL 0x00000002 -#define D3DUSAGE_WRITEONLY 0x00000008 -#define D3DUSAGE_SOFTWAREPROCESSING 0x00000010 -#define D3DUSAGE_DONOTCLIP 0x00000020 -#define D3DUSAGE_POINTS 0x00000040 -#define D3DUSAGE_RTPATCHES 0x00000080 -#define D3DUSAGE_NPATCHES 0x00000100 -#define D3DUSAGE_DYNAMIC 0x00000200 -#define D3DUSAGE_AUTOGENMIPMAP 0x00000400 -#ifndef D3D_DISABLE_9EX -#define D3DUSAGE_RESTRICTED_CONTENT 0x00000800 -#define D3DUSAGE_RESTRICT_SHARED_RESOURCE_DRIVER 0x00001000 -#define D3DUSAGE_RESTRICT_SHARED_RESOURCE 0x00002000 -#endif -#define D3DUSAGE_DMAP 0x00004000 -#define D3DUSAGE_QUERY_LEGACYBUMPMAP 0x00008000 -#define D3DUSAGE_QUERY_SRGBREAD 0x00010000 -#define D3DUSAGE_QUERY_FILTER 0x00020000 -#define D3DUSAGE_QUERY_SRGBWRITE 0x00040000 -#define D3DUSAGE_QUERY_POSTPIXELSHADER_BLENDING 0x00080000 -#define D3DUSAGE_QUERY_VERTEXTEXTURE 0x00100000 -#define D3DUSAGE_QUERY_WRAPANDMIP 0x00200000 -#ifndef D3D_DISABLE_9EX -#define D3DUSAGE_NONSECURE 0x00800000 -#define D3DUSAGE_TEXTAPI 0x10000000 -#endif - -/* Buffer locking */ -/* http://msdn.microsoft.com/en-us/library/ee416503(VS.85).aspx */ -#define D3DLOCK_READONLY 0x00000010 -#define D3DLOCK_NOSYSLOCK 0x00000800 -#define D3DLOCK_NOOVERWRITE 0x00001000 -#define D3DLOCK_DISCARD 0x00002000 -#define D3DLOCK_DONOTWAIT 0x00004000 -#define D3DLOCK_NO_DIRTY_UPDATE 0x00008000 - -/* FVF */ -/* http://msdn.microsoft.com/en-us/library/ee416490(VS.85).aspx */ -/* http://msdn.microsoft.com/en-us/library/ms791638.aspx */ -#define D3DFVF_XYZ 0x00000002 -#define D3DFVF_XYZRHW 0x00000004 -#define D3DFVF_XYZB1 0x00000006 -#define D3DFVF_XYZB2 0x00000008 -#define D3DFVF_XYZB3 0x0000000A -#define D3DFVF_XYZB4 0x0000000C -#define D3DFVF_XYZB5 0x0000000E -#define D3DFVF_XYZW 0x00004002 -#define D3DFVF_POSITION_MASK 0x0000400E - -#define D3DFVF_NORMAL 0x00000010 -#define D3DFVF_PSIZE 0x00000020 -#define D3DFVF_DIFFUSE 0x00000040 -#define D3DFVF_SPECULAR 0x00000080 - -#define D3DFVF_TEX0 0x00000000 -#define D3DFVF_TEX1 0x00000100 -#define D3DFVF_TEX2 0x00000200 -#define D3DFVF_TEX3 0x00000300 -#define D3DFVF_TEX4 0x00000400 -#define D3DFVF_TEX5 0x00000500 -#define D3DFVF_TEX6 0x00000600 -#define D3DFVF_TEX7 0x00000700 -#define D3DFVF_TEX8 0x00000800 -#define D3DFVF_TEXCOUNT_MASK 0x00000F00 -#define D3DFVF_TEXCOUNT_SHIFT 8 -#define D3DFVF_TEXTUREFORMAT1 0x00000003 -#define D3DFVF_TEXTUREFORMAT2 0x00000000 -#define D3DFVF_TEXTUREFORMAT3 0x00000001 -#define D3DFVF_TEXTUREFORMAT4 0x00000002 -#define D3DFVF_TEXCOORDSIZE1(CoordIndex) (D3DFVF_TEXTUREFORMAT1 << (CoordIndex*2 + 16)) -#define D3DFVF_TEXCOORDSIZE2(CoordIndex) (D3DFVF_TEXTUREFORMAT2) -#define D3DFVF_TEXCOORDSIZE3(CoordIndex) (D3DFVF_TEXTUREFORMAT3 << (CoordIndex*2 + 16)) -#define D3DFVF_TEXCOORDSIZE4(CoordIndex) (D3DFVF_TEXTUREFORMAT4 << (CoordIndex*2 + 16)) - -#define D3DFVF_POSITION_MASK 0x0000400E -#define D3DFVF_TEXCOUNT_MASK 0x00000F00 -#define D3DFVF_TEXCOUNT_SHIFT 8 - -#define D3DFVF_LASTBETA_UBYTE4 0x00001000 -#define D3DFVF_LASTBETA_D3DCOLOR 0x00008000 - -#define D3DFVF_RESERVED0 0x00000001 -#define D3DFVF_RESERVED2 0x00006000 - -#define D3DTA_SELECTMASK 0x0000000f -#define D3DTA_DIFFUSE 0x00000000 -#define D3DTA_CURRENT 0x00000001 -#define D3DTA_TEXTURE 0x00000002 -#define D3DTA_TFACTOR 0x00000003 -#define D3DTA_SPECULAR 0x00000004 -#define D3DTA_TEMP 0x00000005 -#define D3DTA_CONSTANT 0x00000006 -#define D3DTA_COMPLEMENT 0x00000010 -#define D3DTA_ALPHAREPLICATE 0x00000020 - -#define D3DSPD_IUNKNOWN 0x00000001 - -#define D3DPRESENT_DONOTWAIT 0x00000001 -#define D3DPRESENT_LINEAR_CONTENT 0x00000002 -#define D3DPRESENT_RATE_DEFAULT 0 - -#define D3DCREATE_FPU_PRESERVE 0x00000002 -#define D3DCREATE_MULTITHREADED 0x00000004 -#define D3DCREATE_PUREDEVICE 0x00000010 -#define D3DCREATE_SOFTWARE_VERTEXPROCESSING 0x00000020 -#define D3DCREATE_HARDWARE_VERTEXPROCESSING 0x00000040 -#define D3DCREATE_MIXED_VERTEXPROCESSING 0x00000080 -#define D3DCREATE_DISABLE_DRIVER_MANAGEMENT 0x00000100 -#define D3DCREATE_ADAPTERGROUP_DEVICE 0x00000200 - -#define D3DSTREAMSOURCE_INDEXEDDATA (1 << 30) -#define D3DSTREAMSOURCE_INSTANCEDATA (2 << 30) - -/* D3DRS_COLORWRITEENABLE */ -#define D3DCOLORWRITEENABLE_RED (1L << 0) -#define D3DCOLORWRITEENABLE_GREEN (1L << 1) -#define D3DCOLORWRITEENABLE_BLUE (1L << 2) -#define D3DCOLORWRITEENABLE_ALPHA (1L << 3) - - -/******************************************************** - * Function macros * - *******************************************************/ - -/* Colors */ -#define D3DCOLOR_ARGB(a,r,g,b) \ - ((D3DCOLOR)( \ - (((a) & 0xFF) << 24) | \ - (((r) & 0xFF) << 16) | \ - (((g) & 0xFF) << 8) | \ - ((b) & 0xFF) \ - )) - -#define D3DCOLOR_RGBA(r,g,b,a) D3DCOLOR_ARGB(a,r,g,b) -#define D3DCOLOR_XRGB(r,g,b) D3DCOLOR_ARGB(0xFF,r,g,b) -#define D3DCOLOR_AYUV(a,y,u,v) D3DCOLOR_ARGB(a,y,u,v) -#define D3DCOLOR_XYUV(y,u,v) D3DCOLOR_ARGB(0xFF,y,u,v) - -#define D3DCOLOR_COLORVALUE(r,g,b,a) \ - D3DCOLOR_RGBA( \ - (DWORD)((r) * 255.0f), \ - (DWORD)((g) * 255.0f), \ - (DWORD)((b) * 255.0f), \ - (DWORD)((a) * 255.0f) \ - ) - -/* Shaders */ -#define D3DDECL_END() { 0xFF, 0, D3DDECLTYPE_UNUSED, 0, 0, 0 } - -/***************************************************************************** - * Typedefs * - *****************************************************************************/ -typedef DWORD D3DCOLOR; - -/***************************************************************************** - * Enums * - *****************************************************************************/ -typedef enum D3DDISPLAYROTATION { - D3DDISPLAYROTATION_IDENTITY = 1, - D3DDISPLAYROTATION_90 = 2, - D3DDISPLAYROTATION_180 = 3, - D3DDISPLAYROTATION_270 = 4 -} D3DDISPLAYROTATION; - -typedef enum D3DSCANLINEORDERING { - D3DSCANLINEORDERING_UNKNOWN = 0, - D3DSCANLINEORDERING_PROGRESSIVE = 1, - D3DSCANLINEORDERING_INTERLACED = 2 -} D3DSCANLINEORDERING; - -typedef enum _D3DAUTHENTICATEDCHANNELTYPE { - D3DAUTHENTICATEDCHANNEL_D3D9 = 1, - D3DAUTHENTICATEDCHANNEL_DRIVER_SOFTWARE = 2, - D3DAUTHENTICATEDCHANNEL_DRIVER_HARDWARE = 3 -} D3DAUTHENTICATEDCHANNELTYPE; - -typedef enum _D3DAUTHENTICATEDCHANNEL_PROCESSIDENTIFIERTYPE { - PROCESSIDTYPE_UNKNOWN = 0, - PROCESSIDTYPE_DWM = 1, - PROCESSIDTYPE_HANDLE = 2 -} D3DAUTHENTICATEDCHANNEL_PROCESSIDENTIFIERTYPE; - -typedef enum _D3DBACKBUFFER_TYPE { - D3DBACKBUFFER_TYPE_MONO = 0, - D3DBACKBUFFER_TYPE_LEFT = 1, - D3DBACKBUFFER_TYPE_RIGHT = 2 -} D3DBACKBUFFER_TYPE; - -typedef enum _D3DBASISTYPE { - D3DBASIS_BEZIER = 0, - D3DBASIS_BSPLINE = 1, - D3DBASIS_CATMULL_ROM = 2 -} D3DBASISTYPE; - -typedef enum _D3DBLEND { - D3DBLEND_ZERO = 1, - D3DBLEND_ONE = 2, - D3DBLEND_SRCCOLOR = 3, - D3DBLEND_INVSRCCOLOR = 4, - D3DBLEND_SRCALPHA = 5, - D3DBLEND_INVSRCALPHA = 6, - D3DBLEND_DESTALPHA = 7, - D3DBLEND_INVDESTALPHA = 8, - D3DBLEND_DESTCOLOR = 9, - D3DBLEND_INVDESTCOLOR = 10, - D3DBLEND_SRCALPHASAT = 11, - D3DBLEND_BOTHSRCALPHA = 12, - D3DBLEND_BOTHINVSRCALPHA = 13, - D3DBLEND_BLENDFACTOR = 14, - D3DBLEND_INVBLENDFACTOR = 15, - D3DBLEND_SRCCOLOR2 = 16, - D3DBLEND_INVSRCCOLOR2 = 17 -} D3DBLEND; - -typedef enum _D3DBLENDOP { - D3DBLENDOP_ADD = 1, - D3DBLENDOP_SUBTRACT = 2, - D3DBLENDOP_REVSUBTRACT = 3, - D3DBLENDOP_MIN = 4, - D3DBLENDOP_MAX = 5 -} D3DBLENDOP; - -typedef enum _D3DBUSTYPE { - D3DBUSTYPE_OTHER = 0x00000000, - D3DBUSTYPE_PCI = 0x00000001, - D3DBUSTYPE_PCIX = 0x00000002, - D3DBUSTYPE_PCIEXPRESS = 0x00000003, - D3DBUSTYPE_AGP = 0x00000004, - D3DBUSIMPL_MODIFIER_INSIDE_OF_CHIPSET = 0x00010000, - D3DBUSIMPL_MODIFIER_TRACKS_ON_MOTHER_BOARD_TO_CHIP = 0x00020000, - D3DBUSIMPL_MODIFIER_TRACKS_ON_MOTHER_BOARD_TO_SOCKET = 0x00030000, - D3DBUSIMPL_MODIFIER_DAUGHTER_BOARD_CONNECTOR = 0x00040000, - D3DBUSIMPL_MODIFIER_DAUGHTER_BOARD_CONNECTOR_INSIDE_OF_NUAE = 0x00050000, - D3DBUSIMPL_MODIFIER_NON_STANDARD = 0x80000000 -} D3DBUSTYPE; - -typedef enum _D3DCMPFUNC { - D3DCMP_NEVER_ZERO = 0, //Needed to avoid warnings - D3DCMP_NEVER = 1, - D3DCMP_LESS = 2, - D3DCMP_EQUAL = 3, - D3DCMP_LESSEQUAL = 4, - D3DCMP_GREATER = 5, - D3DCMP_NOTEQUAL = 6, - D3DCMP_GREATEREQUAL = 7, - D3DCMP_ALWAYS = 8 -} D3DCMPFUNC; - -typedef enum _D3DCOMPOSERECTSOP{ - D3DCOMPOSERECTS_COPY = 1, - D3DCOMPOSERECTS_OR = 2, - D3DCOMPOSERECTS_AND = 3, - D3DCOMPOSERECTS_NEG = 4 -} D3DCOMPOSERECTSOP; - -typedef enum _D3DCUBEMAP_FACES { - D3DCUBEMAP_FACE_POSITIVE_X = 0, - D3DCUBEMAP_FACE_NEGATIVE_X = 1, - D3DCUBEMAP_FACE_POSITIVE_Y = 2, - D3DCUBEMAP_FACE_NEGATIVE_Y = 3, - D3DCUBEMAP_FACE_POSITIVE_Z = 4, - D3DCUBEMAP_FACE_NEGATIVE_Z = 5 -} D3DCUBEMAP_FACES; - -typedef enum _D3DCULL { - D3DCULL_NONE = 1, - D3DCULL_CW = 2, - D3DCULL_CCW = 3 -} D3DCULL; - -typedef enum _D3DDEBUGMONITORTOKENS { - D3DDMT_ENABLE = 0, - D3DDMT_DISABLE = 1 -} D3DDEBUGMONITORTOKENS; - -typedef enum _D3DDECLMETHOD { - D3DDECLMETHOD_DEFAULT = 0, - D3DDECLMETHOD_PARTIALU = 1, - D3DDECLMETHOD_PARTIALV = 2, - D3DDECLMETHOD_CROSSUV = 3, - D3DDECLMETHOD_UV = 4, - D3DDECLMETHOD_LOOKUP = 5, - D3DDECLMETHOD_LOOKUPPRESAMPLED = 6 -} D3DDECLMETHOD; - -typedef enum _D3DDECLTYPE { - D3DDECLTYPE_FLOAT1 = 0, - D3DDECLTYPE_FLOAT2 = 1, - D3DDECLTYPE_FLOAT3 = 2, - D3DDECLTYPE_FLOAT4 = 3, - D3DDECLTYPE_D3DCOLOR = 4, - D3DDECLTYPE_UBYTE4 = 5, - D3DDECLTYPE_SHORT2 = 6, - D3DDECLTYPE_SHORT4 = 7, - D3DDECLTYPE_UBYTE4N = 8, - D3DDECLTYPE_SHORT2N = 9, - D3DDECLTYPE_SHORT4N = 10, - D3DDECLTYPE_USHORT2N = 11, - D3DDECLTYPE_USHORT4N = 12, - D3DDECLTYPE_UDEC3 = 13, - D3DDECLTYPE_DEC3N = 14, - D3DDECLTYPE_FLOAT16_2 = 15, - D3DDECLTYPE_FLOAT16_4 = 16, - D3DDECLTYPE_UNUSED = 17 -} D3DDECLTYPE; - -typedef enum _D3DDECLUSAGE { - D3DDECLUSAGE_POSITION = 0, - D3DDECLUSAGE_BLENDWEIGHT = 1, - D3DDECLUSAGE_BLENDINDICES = 2, - D3DDECLUSAGE_NORMAL = 3, - D3DDECLUSAGE_PSIZE = 4, - D3DDECLUSAGE_TEXCOORD = 5, - D3DDECLUSAGE_TANGENT = 6, - D3DDECLUSAGE_BINORMAL = 7, - D3DDECLUSAGE_TESSFACTOR = 8, - D3DDECLUSAGE_POSITIONT = 9, - D3DDECLUSAGE_COLOR = 10, - D3DDECLUSAGE_FOG = 11, - D3DDECLUSAGE_DEPTH = 12, - D3DDECLUSAGE_SAMPLE = 13 -} D3DDECLUSAGE; - -typedef enum _D3DDEGREETYPE { - D3DDEGREE_LINEAR = 1, - D3DDEGREE_QUADRATIC = 2, - D3DDEGREE_CUBIC = 3, - D3DDEGREE_QUINTIC = 5 -} D3DDEGREETYPE; - -typedef enum _D3DDEVTYPE { - D3DDEVTYPE_HAL = 1, - D3DDEVTYPE_REF = 2, - D3DDEVTYPE_SW = 3, - D3DDEVTYPE_NULLREF = 4 -} D3DDEVTYPE; - -typedef enum _D3DFILLMODE { - D3DFILL_SOLID_ZERO = 0, - D3DFILL_POINT = 1, - D3DFILL_WIREFRAME = 2, - D3DFILL_SOLID = 3 -} D3DFILLMODE; - -typedef enum _D3DFOGMODE { - D3DFOG_NONE = 0, - D3DFOG_EXP = 1, - D3DFOG_EXP2 = 2, - D3DFOG_LINEAR = 3 -} D3DFOGMODE; - -typedef enum _D3DFORMAT { - D3DFMT_UNKNOWN = 0, - D3DFMT_R8G8B8 = 20, - D3DFMT_A8R8G8B8 = 21, - D3DFMT_X8R8G8B8 = 22, - D3DFMT_R5G6B5 = 23, - D3DFMT_X1R5G5B5 = 24, - D3DFMT_A1R5G5B5 = 25, - D3DFMT_A4R4G4B4 = 26, - D3DFMT_R3G3B2 = 27, - D3DFMT_A8 = 28, - D3DFMT_A8R3G3B2 = 29, - D3DFMT_X4R4G4B4 = 30, - D3DFMT_A2B10G10R10 = 31, - D3DFMT_A8B8G8R8 = 32, - D3DFMT_X8B8G8R8 = 33, - D3DFMT_G16R16 = 34, - D3DFMT_A2R10G10B10 = 35, - D3DFMT_A16B16G16R16 = 36, - D3DFMT_A8P8 = 40, - D3DFMT_P8 = 41, - D3DFMT_L8 = 50, - D3DFMT_A8L8 = 51, - D3DFMT_A4L4 = 52, - D3DFMT_V8U8 = 60, - D3DFMT_L6V5U5 = 61, - D3DFMT_X8L8V8U8 = 62, - D3DFMT_Q8W8V8U8 = 63, - D3DFMT_V16U16 = 64, - D3DFMT_A2W10V10U10 = 67, - D3DFMT_UYVY = MAKEFOURCC('U', 'Y', 'V', 'Y'), - D3DFMT_R8G8_B8G8 = MAKEFOURCC('R', 'G', 'B', 'G'), - D3DFMT_YUY2 = MAKEFOURCC('Y', 'U', 'Y', '2'), - D3DFMT_G8R8_G8B8 = MAKEFOURCC('G', 'R', 'G', 'B'), - D3DFMT_DXT1 = MAKEFOURCC('D', 'X', 'T', '1'), - D3DFMT_DXT2 = MAKEFOURCC('D', 'X', 'T', '2'), - D3DFMT_DXT3 = MAKEFOURCC('D', 'X', 'T', '3'), - D3DFMT_DXT4 = MAKEFOURCC('D', 'X', 'T', '4'), - D3DFMT_DXT5 = MAKEFOURCC('D', 'X', 'T', '5'), - D3DFMT_D16_LOCKABLE = 70, - D3DFMT_D32 = 71, - D3DFMT_D15S1 = 73, - D3DFMT_D24S8 = 75, - D3DFMT_D24X8 = 77, - D3DFMT_D24X4S4 = 79, - D3DFMT_D16 = 80, - D3DFMT_D32F_LOCKABLE = 82, - D3DFMT_D24FS8 = 83, - D3DFMT_D32_LOCKABLE = 84, - D3DFMT_S8_LOCKABLE = 85, - D3DFMT_L16 = 81, - D3DFMT_VERTEXDATA = 100, - D3DFMT_INDEX16 = 101, - D3DFMT_INDEX32 = 102, - D3DFMT_Q16W16V16U16 = 110, - D3DFMT_MULTI2_ARGB8 = MAKEFOURCC('M','E','T','1'), - D3DFMT_R16F = 111, - D3DFMT_G16R16F = 112, - D3DFMT_A16B16G16R16F = 113, - D3DFMT_R32F = 114, - D3DFMT_G32R32F = 115, - D3DFMT_A32B32G32R32F = 116, - D3DFMT_CxV8U8 = 117, - D3DFMT_A1 = 118, - D3DFMT_A2B10G10R10_XR_BIAS = 119, - D3DFMT_BINARYBUFFER = 199, - D3DFMT_ATI1 = MAKEFOURCC('A', 'T', 'I', '1'), - D3DFMT_ATI2 = MAKEFOURCC('A', 'T', 'I', '2'), - D3DFMT_ATOC = MAKEFOURCC('A', 'T', 'O', 'C'), - D3DFMT_DF16 = MAKEFOURCC('D', 'F', '1', '6'), - D3DFMT_DF24 = MAKEFOURCC('D', 'F', '2', '4'), - D3DFMT_INTZ = MAKEFOURCC('I', 'N', 'T', 'Z'), - D3DFMT_NULL = MAKEFOURCC('N', 'U', 'L', 'L'), - D3DFMT_NVDB = MAKEFOURCC('N', 'V', 'D', 'B'), - D3DFMT_NV11 = MAKEFOURCC('N', 'V', '1', '1'), - D3DFMT_NV12 = MAKEFOURCC('N', 'V', '1', '2'), - D3DFMT_RESZ = MAKEFOURCC('R', 'E', 'S', 'Z'), - D3DFMT_Y210 = MAKEFOURCC('Y', '2', '1', '0'), - D3DFMT_Y216 = MAKEFOURCC('Y', '2', '1', '6'), - D3DFMT_Y410 = MAKEFOURCC('Y', '4', '1', '0') -} D3DFORMAT; - -typedef enum _D3DLIGHTTYPE { - D3DLIGHT_POINT = 1, - D3DLIGHT_SPOT = 2, - D3DLIGHT_DIRECTIONAL = 3 -} D3DLIGHTTYPE; - -typedef enum _D3DMATERIALCOLORSOURCE { - D3DMCS_MATERIAL = 0, - D3DMCS_COLOR1 = 1, - D3DMCS_COLOR2 = 2 -} D3DMATERIALCOLORSOURCE; - -typedef enum _D3DMULTISAMPLE_TYPE { - D3DMULTISAMPLE_NONE = 0, - D3DMULTISAMPLE_NONMASKABLE = 1, - D3DMULTISAMPLE_2_SAMPLES = 2, - D3DMULTISAMPLE_3_SAMPLES = 3, - D3DMULTISAMPLE_4_SAMPLES = 4, - D3DMULTISAMPLE_5_SAMPLES = 5, - D3DMULTISAMPLE_6_SAMPLES = 6, - D3DMULTISAMPLE_7_SAMPLES = 7, - D3DMULTISAMPLE_8_SAMPLES = 8, - D3DMULTISAMPLE_9_SAMPLES = 9, - D3DMULTISAMPLE_10_SAMPLES = 10, - D3DMULTISAMPLE_11_SAMPLES = 11, - D3DMULTISAMPLE_12_SAMPLES = 12, - D3DMULTISAMPLE_13_SAMPLES = 13, - D3DMULTISAMPLE_14_SAMPLES = 14, - D3DMULTISAMPLE_15_SAMPLES = 15, - D3DMULTISAMPLE_16_SAMPLES = 16 -} D3DMULTISAMPLE_TYPE; - -typedef enum _D3DPATCHEDGESTYLE { - D3DPATCHEDGE_DISCRETE = 0, - D3DPATCHEDGE_CONTINUOUS = 1 -} D3DPATCHEDGESTYLE; - -typedef enum _D3DPOOL { - D3DPOOL_DEFAULT = 0, - D3DPOOL_MANAGED = 1, - D3DPOOL_SYSTEMMEM = 2, - D3DPOOL_SCRATCH = 3 -} D3DPOOL; - -typedef enum _D3DPRIMITIVETYPE { - D3DPT_POINTLIST = 1, - D3DPT_LINELIST = 2, - D3DPT_LINESTRIP = 3, - D3DPT_TRIANGLELIST = 4, - D3DPT_TRIANGLESTRIP = 5, - D3DPT_TRIANGLEFAN = 6 -} D3DPRIMITIVETYPE; - -typedef enum _D3DQUERYTYPE { - D3DQUERYTYPE_VCACHE = 4, - D3DQUERYTYPE_RESOURCEMANAGER = 5, - D3DQUERYTYPE_VERTEXSTATS = 6, - D3DQUERYTYPE_EVENT = 8, - D3DQUERYTYPE_OCCLUSION = 9, - D3DQUERYTYPE_TIMESTAMP = 10, - D3DQUERYTYPE_TIMESTAMPDISJOINT = 11, - D3DQUERYTYPE_TIMESTAMPFREQ = 12, - D3DQUERYTYPE_PIPELINETIMINGS = 13, - D3DQUERYTYPE_INTERFACETIMINGS = 14, - D3DQUERYTYPE_VERTEXTIMINGS = 15, - D3DQUERYTYPE_PIXELTIMINGS = 16, - D3DQUERYTYPE_BANDWIDTHTIMINGS = 17, - D3DQUERYTYPE_CACHEUTILIZATION = 18, - D3DQUERYTYPE_MEMORYPRESSURE = 19 -} D3DQUERYTYPE; - -#define D3DISSUE_BEGIN (1 << 1) -#define D3DISSUE_END (1 << 0) -#define D3DGETDATA_FLUSH (1 << 0) - - -typedef enum _D3DRENDERSTATETYPE { - D3DRS_ZENABLE = 7, - D3DRS_FILLMODE = 8, - D3DRS_SHADEMODE = 9, - D3DRS_ZWRITEENABLE = 14, - D3DRS_ALPHATESTENABLE = 15, - D3DRS_LASTPIXEL = 16, - D3DRS_SRCBLEND = 19, - D3DRS_DESTBLEND = 20, - D3DRS_CULLMODE = 22, - D3DRS_ZFUNC = 23, - D3DRS_ALPHAREF = 24, - D3DRS_ALPHAFUNC = 25, - D3DRS_DITHERENABLE = 26, - D3DRS_ALPHABLENDENABLE = 27, - D3DRS_FOGENABLE = 28, - D3DRS_SPECULARENABLE = 29, - D3DRS_FOGCOLOR = 34, - D3DRS_FOGTABLEMODE = 35, - D3DRS_FOGSTART = 36, - D3DRS_FOGEND = 37, - D3DRS_FOGDENSITY = 38, - D3DRS_RANGEFOGENABLE = 48, - D3DRS_STENCILENABLE = 52, - D3DRS_STENCILFAIL = 53, - D3DRS_STENCILZFAIL = 54, - D3DRS_STENCILPASS = 55, - D3DRS_STENCILFUNC = 56, - D3DRS_STENCILREF = 57, - D3DRS_STENCILMASK = 58, - D3DRS_STENCILWRITEMASK = 59, - D3DRS_TEXTUREFACTOR = 60, - D3DRS_WRAP0 = 128, - D3DRS_WRAP1 = 129, - D3DRS_WRAP2 = 130, - D3DRS_WRAP3 = 131, - D3DRS_WRAP4 = 132, - D3DRS_WRAP5 = 133, - D3DRS_WRAP6 = 134, - D3DRS_WRAP7 = 135, - D3DRS_CLIPPING = 136, - D3DRS_LIGHTING = 137, - D3DRS_AMBIENT = 139, - D3DRS_FOGVERTEXMODE = 140, - D3DRS_COLORVERTEX = 141, - D3DRS_LOCALVIEWER = 142, - D3DRS_NORMALIZENORMALS = 143, - D3DRS_DIFFUSEMATERIALSOURCE = 145, - D3DRS_SPECULARMATERIALSOURCE = 146, - D3DRS_AMBIENTMATERIALSOURCE = 147, - D3DRS_EMISSIVEMATERIALSOURCE = 148, - D3DRS_VERTEXBLEND = 151, - D3DRS_CLIPPLANEENABLE = 152, - D3DRS_POINTSIZE = 154, - D3DRS_POINTSIZE_MIN = 155, - D3DRS_POINTSPRITEENABLE = 156, - D3DRS_POINTSCALEENABLE = 157, - D3DRS_POINTSCALE_A = 158, - D3DRS_POINTSCALE_B = 159, - D3DRS_POINTSCALE_C = 160, - D3DRS_MULTISAMPLEANTIALIAS = 161, - D3DRS_MULTISAMPLEMASK = 162, - D3DRS_PATCHEDGESTYLE = 163, - D3DRS_DEBUGMONITORTOKEN = 165, - D3DRS_POINTSIZE_MAX = 166, - D3DRS_INDEXEDVERTEXBLENDENABLE = 167, - D3DRS_COLORWRITEENABLE = 168, - D3DRS_TWEENFACTOR = 170, - D3DRS_BLENDOP = 171, - D3DRS_POSITIONDEGREE = 172, - D3DRS_NORMALDEGREE = 173, - D3DRS_SCISSORTESTENABLE = 174, - D3DRS_SLOPESCALEDEPTHBIAS = 175, - D3DRS_ANTIALIASEDLINEENABLE = 176, - D3DRS_MINTESSELLATIONLEVEL = 178, - D3DRS_MAXTESSELLATIONLEVEL = 179, - D3DRS_ADAPTIVETESS_X = 180, - D3DRS_ADAPTIVETESS_Y = 181, - D3DRS_ADAPTIVETESS_Z = 182, - D3DRS_ADAPTIVETESS_W = 183, - D3DRS_ENABLEADAPTIVETESSELLATION = 184, - D3DRS_TWOSIDEDSTENCILMODE = 185, - D3DRS_CCW_STENCILFAIL = 186, - D3DRS_CCW_STENCILZFAIL = 187, - D3DRS_CCW_STENCILPASS = 188, - D3DRS_CCW_STENCILFUNC = 189, - D3DRS_COLORWRITEENABLE1 = 190, - D3DRS_COLORWRITEENABLE2 = 191, - D3DRS_COLORWRITEENABLE3 = 192, - D3DRS_BLENDFACTOR = 193, - D3DRS_SRGBWRITEENABLE = 194, - D3DRS_DEPTHBIAS = 195, - D3DRS_WRAP8 = 198, - D3DRS_WRAP9 = 199, - D3DRS_WRAP10 = 200, - D3DRS_WRAP11 = 201, - D3DRS_WRAP12 = 202, - D3DRS_WRAP13 = 203, - D3DRS_WRAP14 = 204, - D3DRS_WRAP15 = 205, - D3DRS_SEPARATEALPHABLENDENABLE = 206, - D3DRS_SRCBLENDALPHA = 207, - D3DRS_DESTBLENDALPHA = 208, - D3DRS_BLENDOPALPHA = 209 -} D3DRENDERSTATETYPE; - -typedef enum _D3DRESOURCETYPE { - D3DRTYPE_SURFACE = 1, - D3DRTYPE_VOLUME = 2, - D3DRTYPE_TEXTURE = 3, - D3DRTYPE_VOLUMETEXTURE = 4, - D3DRTYPE_CUBETEXTURE = 5, - D3DRTYPE_VERTEXBUFFER = 6, - D3DRTYPE_INDEXBUFFER = 7 -} D3DRESOURCETYPE; -#define D3DRTYPECOUNT (D3DRTYPE_INDEXBUFFER+1) - -typedef enum _D3DSAMPLERSTATETYPE { - D3DSAMP_ADDRESSU = 1, - D3DSAMP_ADDRESSV = 2, - D3DSAMP_ADDRESSW = 3, - D3DSAMP_BORDERCOLOR = 4, - D3DSAMP_MAGFILTER = 5, - D3DSAMP_MINFILTER = 6, - D3DSAMP_MIPFILTER = 7, - D3DSAMP_MIPMAPLODBIAS = 8, - D3DSAMP_MAXMIPLEVEL = 9, - D3DSAMP_MAXANISOTROPY = 10, - D3DSAMP_SRGBTEXTURE = 11, - D3DSAMP_ELEMENTINDEX = 12, - D3DSAMP_DMAPOFFSET = 13 -} D3DSAMPLERSTATETYPE; - -typedef enum _D3DSAMPLER_TEXTURE_TYPE { - D3DSTT_UNKNOWN = 0<<27, - D3DSTT_1D = 1<<27, - D3DSTT_2D = 2<<27, - D3DSTT_CUBE = 3<<27, - D3DSTT_VOLUME = 4<<27 -} D3DSAMPLER_TEXTURE_TYPE; - -typedef enum _D3DSHADEMODE { - D3DSHADE_FLAT = 1, - D3DSHADE_GOURAUD = 2, - D3DSHADE_PHONG = 3 -} D3DSHADEMODE; - -typedef enum _D3DSHADER_ADDRESSMODE_TYPE { - D3DSHADER_ADDRMODE_ABSOLUTE = 0<<13, - D3DSHADER_ADDRMODE_RELATIVE = 1<<13 -} D3DSHADER_ADDRESSMODE_TYPE; - -typedef enum _D3DSHADER_COMPARISON { - D3DSPC_RESERVED0 = 0, - D3DSPC_GT = 1, - D3DSPC_EQ = 2, - D3DSPC_GE = 3, - D3DSPC_LT = 4, - D3DSPC_NE = 5, - D3DSPC_LE = 6, - D3DSPC_RESERVED1 = 7 -} D3DSHADER_COMPARISON; - -#define D3DDP_MAXTEXCOORD 8 - -#define D3DSI_OPCODE_MASK 0x0000FFFF -#define D3DSI_INSTLENGTH_MASK 0x0F000000 -#define D3DSI_INSTLENGTH_SHIFT 24 - -typedef enum _D3DSHADER_INSTRUCTION_OPCODE_TYPE { - D3DSIO_NOP = 0, - D3DSIO_MOV = 1, - D3DSIO_ADD = 2, - D3DSIO_SUB = 3, - D3DSIO_MAD = 4, - D3DSIO_MUL = 5, - D3DSIO_RCP = 6, - D3DSIO_RSQ = 7, - D3DSIO_DP3 = 8, - D3DSIO_DP4 = 9, - D3DSIO_MIN = 10, - D3DSIO_MAX = 11, - D3DSIO_SLT = 12, - D3DSIO_SGE = 13, - D3DSIO_EXP = 14, - D3DSIO_LOG = 15, - D3DSIO_LIT = 16, - D3DSIO_DST = 17, - D3DSIO_LRP = 18, - D3DSIO_FRC = 19, - D3DSIO_M4x4 = 20, - D3DSIO_M4x3 = 21, - D3DSIO_M3x4 = 22, - D3DSIO_M3x3 = 23, - D3DSIO_M3x2 = 24, - D3DSIO_CALL = 25, - D3DSIO_CALLNZ = 26, - D3DSIO_LOOP = 27, - D3DSIO_RET = 28, - D3DSIO_ENDLOOP = 29, - D3DSIO_LABEL = 30, - D3DSIO_DCL = 31, - D3DSIO_POW = 32, - D3DSIO_CRS = 33, - D3DSIO_SGN = 34, - D3DSIO_ABS = 35, - D3DSIO_NRM = 36, - D3DSIO_SINCOS = 37, - D3DSIO_REP = 38, - D3DSIO_ENDREP = 39, - D3DSIO_IF = 40, - D3DSIO_IFC = 41, - D3DSIO_ELSE = 42, - D3DSIO_ENDIF = 43, - D3DSIO_BREAK = 44, - D3DSIO_BREAKC = 45, - D3DSIO_MOVA = 46, - D3DSIO_DEFB = 47, - D3DSIO_DEFI = 48, - D3DSIO_TEXCOORD = 64, - D3DSIO_TEXKILL = 65, - D3DSIO_TEX = 66, - D3DSIO_TEXBEM = 67, - D3DSIO_TEXBEML = 68, - D3DSIO_TEXREG2AR = 69, - D3DSIO_TEXREG2GB = 70, - D3DSIO_TEXM3x2PAD = 71, - D3DSIO_TEXM3x2TEX = 72, - D3DSIO_TEXM3x3PAD = 73, - D3DSIO_TEXM3x3TEX = 74, - D3DSIO_RESERVED0 = 75, - D3DSIO_TEXM3x3SPEC = 76, - D3DSIO_TEXM3x3VSPEC = 77, - D3DSIO_EXPP = 78, - D3DSIO_LOGP = 79, - D3DSIO_CND = 80, - D3DSIO_DEF = 81, - D3DSIO_TEXREG2RGB = 82, - D3DSIO_TEXDP3TEX = 83, - D3DSIO_TEXM3x2DEPTH = 84, - D3DSIO_TEXDP3 = 85, - D3DSIO_TEXM3x3 = 86, - D3DSIO_TEXDEPTH = 87, - D3DSIO_CMP = 88, - D3DSIO_BEM = 89, - D3DSIO_DP2ADD = 90, - D3DSIO_DSX = 91, - D3DSIO_DSY = 92, - D3DSIO_TEXLDD = 93, - D3DSIO_SETP = 94, - D3DSIO_TEXLDL = 95, - D3DSIO_BREAKP = 96, - D3DSIO_PHASE = 0xFFFD, - D3DSIO_COMMENT = 0xFFFE, - D3DSIO_END = 0xFFFF -} D3DSHADER_INSTRUCTION_OPCODE_TYPE; - -#define D3DSI_COISSUE 0x40000000 - -#define D3DSP_DCL_USAGE_SHIFT 0 -#define D3DSP_DCL_USAGE_MASK 0x0000000f - -#define D3DSP_DCL_USAGEINDEX_SHIFT 16 -#define D3DSP_DCL_USAGEINDEX_MASK 0x000f0000 - -#define D3DSP_TEXTURETYPE_SHIFT 27 -#define D3DSP_TEXTURETYPE_MASK 0x78000000 - -#define D3DSP_REGNUM_MASK 0x000007FF - -#define D3DSP_WRITEMASK_0 0x00010000 -#define D3DSP_WRITEMASK_1 0x00020000 -#define D3DSP_WRITEMASK_2 0x00040000 -#define D3DSP_WRITEMASK_3 0x00080000 -#define D3DSP_WRITEMASK_ALL 0x000F0000 - -#define D3DSP_DSTMOD_SHIFT 20 -#define D3DSP_DSTMOD_MASK (0xF << D3DSP_DSTMOD_SHIFT) - -typedef enum _D3DSHADER_PARAM_DSTMOD_TYPE { - D3DSPDM_NONE = 0 << D3DSP_DSTMOD_SHIFT, - D3DSPDM_SATURATE = 1 << D3DSP_DSTMOD_SHIFT, - D3DSPDM_PARTIALPRECISION = 2 << D3DSP_DSTMOD_SHIFT, - D3DSPDM_MSAMPCENTROID = 4 << D3DSP_DSTMOD_SHIFT, - D3DSPDM_FORCE_DWORD = 0x7FFFFFFF -} D3DSHADER_PARAM_DSTMOD_TYPE; - -#define D3DSP_DSTSHIFT_SHIFT 24 -#define D3DSP_DSTSHIFT_MASK (0xF << D3DSP_DSTSHIFT_SHIFT) - -#define D3DSP_REGTYPE_SHIFT 28 -#define D3DSP_REGTYPE_SHIFT2 8 -#define D3DSP_REGTYPE_MASK (0x7 << D3DSP_REGTYPE_SHIFT) -#define D3DSP_REGTYPE_MASK2 0x00001800 - -typedef enum _D3DSHADER_MISCTYPE_OFFSETS { - D3DSMO_POSITION = 0, - D3DSMO_FACE = 1 -} D3DSHADER_MISCTYPE_OFFSETS; - -typedef enum _D3DSHADER_PARAM_REGISTER_TYPE { - D3DSPR_TEMP = 0, - D3DSPR_INPUT = 1, - D3DSPR_CONST = 2, - D3DSPR_ADDR = 3, - D3DSPR_TEXTURE = 3, - D3DSPR_RASTOUT = 4, - D3DSPR_ATTROUT = 5, - D3DSPR_TEXCRDOUT = 6, - D3DSPR_OUTPUT = 6, - D3DSPR_CONSTINT = 7, - D3DSPR_COLOROUT = 8, - D3DSPR_DEPTHOUT = 9, - D3DSPR_SAMPLER = 10, - D3DSPR_CONST2 = 11, - D3DSPR_CONST3 = 12, - D3DSPR_CONST4 = 13, - D3DSPR_CONSTBOOL = 14, - D3DSPR_LOOP = 15, - D3DSPR_TEMPFLOAT16 = 16, - D3DSPR_MISCTYPE = 17, - D3DSPR_LABEL = 18, - D3DSPR_PREDICATE = 19 -} D3DSHADER_PARAM_REGISTER_TYPE; - -#define D3DSP_SWIZZLE_SHIFT 16 -#define D3DSP_SWIZZLE_MASK (0xFF << D3DSP_SWIZZLE_SHIFT) - -#define D3DSP_NOSWIZZLE \ - ((0 << (D3DSP_SWIZZLE_SHIFT + 0)) | (1 << (D3DSP_SWIZZLE_SHIFT + 2)) | (2 << (D3DSP_SWIZZLE_SHIFT + 4)) | (3 << (D3DSP_SWIZZLE_SHIFT + 6))) - -#define D3DSP_SRCMOD_SHIFT 24 -#define D3DSP_SRCMOD_MASK (0xF << D3DSP_SRCMOD_SHIFT) - -typedef enum _D3DSHADER_PARAM_SRCMOD_TYPE { - D3DSPSM_NONE = 0<<24, - D3DSPSM_NEG = 1<<24, - D3DSPSM_BIAS = 2<<24, - D3DSPSM_BIASNEG = 3<<24, - D3DSPSM_SIGN = 4<<24, - D3DSPSM_SIGNNEG = 5<<24, - D3DSPSM_COMP = 6<<24, - D3DSPSM_X2 = 7<<24, - D3DSPSM_X2NEG = 8<<24, - D3DSPSM_DZ = 9<<24, - D3DSPSM_DW = 10<<24, - D3DSPSM_ABS = 11<<24, - D3DSPSM_ABSNEG = 12<<24, - D3DSPSM_NOT = 13<<24 -} D3DSHADER_PARAM_SRCMOD_TYPE; - -#define D3DPS_VERSION(major, minor) (0xFFFF0000 | ((major) << 8) | (minor)) -#define D3DVS_VERSION(major, minor) (0xFFFE0000 | ((major) << 8) | (minor)) -#define D3DSHADER_VERSION_MAJOR(version) (((version) >> 8) & 0xFF) -#define D3DSHADER_VERSION_MINOR(version) (((version) >> 0) & 0xFF) - -#define D3DSI_COMMENTSIZE_SHIFT 16 -#define D3DSI_COMMENTSIZE_MASK (0x7FFF << D3DSI_COMMENTSIZE_SHIFT) - -typedef enum _D3DSTATEBLOCKTYPE { - D3DSBT_ALL = 1, - D3DSBT_PIXELSTATE = 2, - D3DSBT_VERTEXSTATE = 3 -} D3DSTATEBLOCKTYPE; - -typedef enum _D3DSTENCILOP { - D3DSTENCILOP_KEEP = 1, - D3DSTENCILOP_ZERO = 2, - D3DSTENCILOP_REPLACE = 3, - D3DSTENCILOP_INCRSAT = 4, - D3DSTENCILOP_DECRSAT = 5, - D3DSTENCILOP_INVERT = 6, - D3DSTENCILOP_INCR = 7, - D3DSTENCILOP_DECR = 8 -} D3DSTENCILOP; - -typedef enum _D3DSWAPEFFECT { - D3DSWAPEFFECT_DISCARD = 1, - D3DSWAPEFFECT_FLIP = 2, - D3DSWAPEFFECT_COPY = 3, - D3DSWAPEFFECT_OVERLAY = 4, - D3DSWAPEFFECT_FLIPEX = 5 -} D3DSWAPEFFECT; - -typedef enum _D3DTEXTUREADDRESS { - D3DTADDRESS_WRAP = 1, - D3DTADDRESS_MIRROR = 2, - D3DTADDRESS_CLAMP = 3, - D3DTADDRESS_BORDER = 4, - D3DTADDRESS_MIRRORONCE = 5 -} D3DTEXTUREADDRESS; - -typedef enum _D3DTEXTUREFILTERTYPE { - D3DTEXF_NONE = 0, - D3DTEXF_POINT = 1, - D3DTEXF_LINEAR = 2, - D3DTEXF_ANISOTROPIC = 3, - D3DTEXF_PYRAMIDALQUAD = 6, - D3DTEXF_GAUSSIANQUAD = 7, - D3DTEXF_CONVOLUTIONMONO = 8, - D3DTEXF_FORCE_DWORD = 0x7fffffff -} D3DTEXTUREFILTERTYPE; - -typedef enum _D3DTEXTUREOP { - D3DTOP_DISABLE = 1, - D3DTOP_SELECTARG1 = 2, - D3DTOP_SELECTARG2 = 3, - D3DTOP_MODULATE = 4, - D3DTOP_MODULATE2X = 5, - D3DTOP_MODULATE4X = 6, - D3DTOP_ADD = 7, - D3DTOP_ADDSIGNED = 8, - D3DTOP_ADDSIGNED2X = 9, - D3DTOP_SUBTRACT = 10, - D3DTOP_ADDSMOOTH = 11, - D3DTOP_BLENDDIFFUSEALPHA = 12, - D3DTOP_BLENDTEXTUREALPHA = 13, - D3DTOP_BLENDFACTORALPHA = 14, - D3DTOP_BLENDTEXTUREALPHAPM = 15, - D3DTOP_BLENDCURRENTALPHA = 16, - D3DTOP_PREMODULATE = 17, - D3DTOP_MODULATEALPHA_ADDCOLOR = 18, - D3DTOP_MODULATECOLOR_ADDALPHA = 19, - D3DTOP_MODULATEINVALPHA_ADDCOLOR = 20, - D3DTOP_MODULATEINVCOLOR_ADDALPHA = 21, - D3DTOP_BUMPENVMAP = 22, - D3DTOP_BUMPENVMAPLUMINANCE = 23, - D3DTOP_DOTPRODUCT3 = 24, - D3DTOP_MULTIPLYADD = 25, - D3DTOP_LERP = 26 -} D3DTEXTUREOP; - -typedef enum _D3DTEXTURESTAGESTATETYPE { - D3DTSS_COLOROP = 1, - D3DTSS_COLORARG1 = 2, - D3DTSS_COLORARG2 = 3, - D3DTSS_ALPHAOP = 4, - D3DTSS_ALPHAARG1 = 5, - D3DTSS_ALPHAARG2 = 6, - D3DTSS_BUMPENVMAT00 = 7, - D3DTSS_BUMPENVMAT01 = 8, - D3DTSS_BUMPENVMAT10 = 9, - D3DTSS_BUMPENVMAT11 = 10, - D3DTSS_TEXCOORDINDEX = 11, - D3DTSS_BUMPENVLSCALE = 22, - D3DTSS_BUMPENVLOFFSET = 23, - D3DTSS_TEXTURETRANSFORMFLAGS = 24, - D3DTSS_COLORARG0 = 26, - D3DTSS_ALPHAARG0 = 27, - D3DTSS_RESULTARG = 28, - D3DTSS_CONSTANT = 32 -} D3DTEXTURESTAGESTATETYPE; - -/* MSDN has this in d3d9caps.h, but it should be here */ -#define D3DTSS_TCI_PASSTHRU 0x00000 -#define D3DTSS_TCI_CAMERASPACENORMAL 0x10000 -#define D3DTSS_TCI_CAMERASPACEPOSITION 0x20000 -#define D3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR 0x30000 -#define D3DTSS_TCI_SPHEREMAP 0x40000 - -typedef enum _D3DTEXTURETRANSFORMFLAGS { - D3DTTFF_DISABLE = 0, - D3DTTFF_COUNT1 = 1, - D3DTTFF_COUNT2 = 2, - D3DTTFF_COUNT3 = 3, - D3DTTFF_COUNT4 = 4, - D3DTTFF_PROJECTED = 256 -} D3DTEXTURETRANSFORMFLAGS; - -typedef enum _D3DTRANSFORMSTATETYPE { - D3DTS_VIEW = 2, - D3DTS_PROJECTION = 3, - D3DTS_TEXTURE0 = 16, - D3DTS_TEXTURE1 = 17, - D3DTS_TEXTURE2 = 18, - D3DTS_TEXTURE3 = 19, - D3DTS_TEXTURE4 = 20, - D3DTS_TEXTURE5 = 21, - D3DTS_TEXTURE6 = 22, - D3DTS_TEXTURE7 = 23 -} D3DTRANSFORMSTATETYPE; - -#define D3DDMAPSAMPLER 256 -#define D3DVERTEXTEXTURESAMPLER0 (D3DDMAPSAMPLER+1) -#define D3DVERTEXTEXTURESAMPLER1 (D3DDMAPSAMPLER+2) -#define D3DVERTEXTEXTURESAMPLER2 (D3DDMAPSAMPLER+3) -#define D3DVERTEXTEXTURESAMPLER3 (D3DDMAPSAMPLER+4) - -#define D3DTS_WORLD D3DTS_WORLDMATRIX(0) -#define D3DTS_WORLD1 D3DTS_WORLDMATRIX(1) -#define D3DTS_WORLD2 D3DTS_WORLDMATRIX(2) -#define D3DTS_WORLD3 D3DTS_WORLDMATRIX(3) -#define D3DTS_WORLDMATRIX(index) (D3DTRANSFORMSTATETYPE)(index + 256) - -typedef enum _D3DVERTEXBLENDFLAGS { - D3DVBF_DISABLE = 0, - D3DVBF_1WEIGHTS = 1, - D3DVBF_2WEIGHTS = 2, - D3DVBF_3WEIGHTS = 3, - D3DVBF_TWEENING = 255, - D3DVBF_0WEIGHTS = 256 -} D3DVERTEXBLENDFLAGS; - -typedef enum _D3DVS_ADDRESSMODE_TYPE { - D3DVS_ADDRMODE_ABSOLUTE = 0<<13, - D3DVS_ADDRMODE_RELATIVE = 1<<13 -} D3DVS_ADDRESSMODE_TYPE; - -typedef enum _D3DVS_RASTOUT_OFFSETS { - D3DSRO_POSITION = 0, - D3DSRO_FOG = 1, - D3DSRO_POINT_SIZE = 2 -} D3DVS_RASTOUT_OFFSETS; - -typedef enum _D3DZBUFFERTYPE { - D3DZB_FALSE = 0, - D3DZB_TRUE = 1, - D3DZB_USEW = 2 -} D3DZBUFFERTYPE; - -/***************************************************************************** - * Structs * - *****************************************************************************/ -typedef struct D3DDISPLAYMODEEX { - UINT Size; - UINT Width; - UINT Height; - UINT RefreshRate; - D3DFORMAT Format; - D3DSCANLINEORDERING ScanLineOrdering; -} D3DDISPLAYMODEEX, *PD3DDISPLAYMODEEX, *LPD3DDISPLAYMODEEX; - -typedef struct D3DDISPLAYMODEFILTER { - UINT Size; - D3DFORMAT Format; - D3DSCANLINEORDERING ScanLineOrdering; -} D3DDISPLAYMODEFILTER, *PD3DDISPLAYMODEFILTER, *LPD3DDISPLAYMODEFILTER; - -typedef struct _D3D_OMAC { - BYTE Omac[16]; -} D3D_OMAC, *PD3D_OMAC, *LPD3D_OMAC; - -typedef struct _D3DADAPTER_IDENTIFIER9 { - char Driver[512]; - char Description[512]; - char DeviceName[32]; - DWORD DriverVersionLowPart; - DWORD DriverVersionHighPart; - DWORD VendorId; - DWORD DeviceId; - DWORD SubSysId; - DWORD Revision; - GUID DeviceIdentifier; - DWORD WHQLLevel; -} D3DADAPTER_IDENTIFIER9, *PD3DADAPTER_IDENTIFIER9, *LPD3DADAPTER_IDENTIFIER9; - -typedef struct _D3DAES_CTR_IV { - UINT64 IV; - UINT64 Count; -} D3DAES_CTR_IV, *PD3DAES_CTR_IV, *LPD3DAES_CTR_IV; - -typedef struct _D3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT { - D3D_OMAC omac; - GUID ConfigureType; - HANDLE hChannel; - UINT SequenceNumber; -} D3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT, *PD3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT, *LPD3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_CONFIGURECRYPTOSESSION { - D3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT Parameters; - HANDLE DXVA2DecodeHandle; - HANDLE CryptoSessionHandle; - HANDLE DeviceHandle; -} D3DAUTHENTICATEDCHANNEL_CONFIGURECRYPTOSESSION, *PD3DAUTHENTICATEDCHANNEL_CONFIGURECRYPTOSESSION, *LPD3DAUTHENTICATEDCHANNEL_CONFIGURECRYPTOSESSION; - -typedef struct _D3DAUTHENTICATEDCHANNEL_CONFIGUREINITIALIZE { - D3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT Parameters; - UINT StartSequenceQuery; - UINT StartSequenceConfigure; -} D3DAUTHENTICATEDCHANNEL_CONFIGUREINITIALIZE, *PD3DAUTHENTICATEDCHANNEL_CONFIGUREINITIALIZE, *LPD3DAUTHENTICATEDCHANNEL_CONFIGUREINITIALIZE; - -typedef struct _D3DAUTHENTICATEDCHANNEL_PROTECTION_FLAGS { - union { - struct { - UINT ProtectionEnabled : 1; - UINT OverlayOrFullscreenRequired : 1; - UINT Reserved : 30; - }; - UINT Value; - }; -} D3DAUTHENTICATEDCHANNEL_PROTECTION_FLAGS, *PD3DAUTHENTICATEDCHANNEL_PROTECTION_FLAGS, *LPD3DAUTHENTICATEDCHANNEL_PROTECTION_FLAGS; - -typedef struct _D3DAUTHENTICATEDCHANNEL_CONFIGUREPROTECTION { - D3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT Parameters; - D3DAUTHENTICATEDCHANNEL_PROTECTION_FLAGS Protections; -} D3DAUTHENTICATEDCHANNEL_CONFIGUREPROTECTION, *PD3DAUTHENTICATEDCHANNEL_CONFIGUREPROTECTION, *LPD3DAUTHENTICATEDCHANNEL_CONFIGUREPROTECTION; - -typedef struct _D3DAUTHENTICATEDCHANNEL_CONFIGURESHAREDRESOURCE { - D3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT Parameters; - D3DAUTHENTICATEDCHANNEL_PROCESSIDENTIFIERTYPE ProcessIdentiferType; - HANDLE ProcessHandle; - BOOL AllowAccess; -} D3DAUTHENTICATEDCHANNEL_CONFIGURESHAREDRESOURCE, *PD3DAUTHENTICATEDCHANNEL_CONFIGURESHAREDRESOURCE, *LPD3DAUTHENTICATEDCHANNEL_CONFIGURESHAREDRESOURCE; - -typedef struct _D3DAUTHENTICATEDCHANNEL_CONFIGUREUNCOMPRESSEDENCRYPTION { - D3DAUTHENTICATEDCHANNEL_CONFIGURE_INPUT Parameters; - GUID EncryptionGuid; -} D3DAUTHENTICATEDCHANNEL_CONFIGUREUNCOMPRESSEDENCRYPTION, *PD3DAUTHENTICATEDCHANNEL_CONFIGUREUNCOMPRESSEDENCRYPTION, *LPD3DAUTHENTICATEDCHANNEL_CONFIGUREUNCOMPRESSEDENCRYPTION; - -typedef struct _D3DAUTHENTICATEDCHANNEL_CONFIGURE_OUTPUT { - D3D_OMAC omac; - GUID ConfigureType; - HANDLE hChannel; - UINT SequenceNumber; - HRESULT ReturnCode; -} D3DAUTHENTICATEDCHANNEL_CONFIGURE_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_CONFIGURE_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_CONFIGURE_OUTPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERY_INPUT { - GUID QueryType; - HANDLE hChannel; - UINT SequenceNumber; -} D3DAUTHENTICATEDCHANNEL_QUERY_INPUT, *PD3DAUTHENTICATEDCHANNEL_QUERY_INPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERY_INPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT { - D3D_OMAC omac; - GUID QueryType; - HANDLE hChannel; - UINT SequenceNumber; - HRESULT ReturnCode; -} D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYCHANNELTYPE_OUTPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; - D3DAUTHENTICATEDCHANNELTYPE ChannelType; -} D3DAUTHENTICATEDCHANNEL_QUERYCHANNELTYPE_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYCHANNELTYPE_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYCHANNELTYPE_OUTPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYCRYPTOSESSION_INPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_INPUT Input; - HANDLE DXVA2DecodeHandle; -} D3DAUTHENTICATEDCHANNEL_QUERYCRYPTOSESSION_INPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYCRYPTOSESSION_INPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYCRYPTOSESSION_INPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYCRYPTOSESSION_OUTPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; - HANDLE DXVA2DecodeHandle; - HANDLE CryptoSessionHandle; - HANDLE DeviceHandle; -} D3DAUTHENTICATEDCHANNEL_QUERYCRYPTOSESSION_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYCRYPTOSESSION_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYCRYPTOSESSION_OUTPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYDEVICEHANDLE_OUTPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; - HANDLE DeviceHandle; -} D3DAUTHENTICATEDCHANNEL_QUERYDEVICEHANDLE_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYDEVICEHANDLE_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYDEVICEHANDLE_OUTPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUIDCOUNT_OUTPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; - UINT NumEncryptionGuids; -} D3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUIDCOUNT_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUIDCOUNT_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUIDCOUNT_OUTPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUID_INPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_INPUT Input; - UINT EncryptionGuidIndex; -} D3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUID_INPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUID_INPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUID_INPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUID_OUTPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; - UINT EncryptionGuidIndex; - GUID EncryptionGuid; -} D3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUID_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUID_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYEVICTIONENCRYPTIONGUID_OUTPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYINFOBUSTYPE_OUTPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; - D3DBUSTYPE BusType; - BOOL bAccessibleInContiguousBlocks; - BOOL bAccessibleInNonContiguousBlocks; -} D3DAUTHENTICATEDCHANNEL_QUERYINFOBUSTYPE_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYINFOBUSTYPE_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYINFOBUSTYPE_OUTPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYOUTPUTIDCOUNT_INPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_INPUT Input; - HANDLE DeviceHandle; - HANDLE CryptoSessionHandle; -} D3DAUTHENTICATEDCHANNEL_QUERYOUTPUTIDCOUNT_INPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYOUTPUTIDCOUNT_INPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYOUTPUTIDCOUNT_INPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYOUTPUTIDCOUNT_OUTPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; - HANDLE DeviceHandle; - HANDLE CryptoSessionHandle; - UINT NumOutputIDs; -} D3DAUTHENTICATEDCHANNEL_QUERYOUTPUTIDCOUNT_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYOUTPUTIDCOUNT_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYOUTPUTIDCOUNT_OUTPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYOUTPUTID_INPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_INPUT Input; - HANDLE DeviceHandle; - HANDLE CryptoSessionHandle; - UINT OutputIDIndex; -} D3DAUTHENTICATEDCHANNEL_QUERYOUTPUTID_INPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYOUTPUTID_INPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYOUTPUTID_INPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYOUTPUTID_OUTPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; - HANDLE DeviceHandle; - HANDLE CryptoSessionHandle; - UINT OutputIDIndex; - UINT64 OutputID; -} D3DAUTHENTICATEDCHANNEL_QUERYOUTPUTID_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYOUTPUTID_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYOUTPUTID_OUTPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYPROTECTION_OUTPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; - D3DAUTHENTICATEDCHANNEL_PROTECTION_FLAGS ProtectionFlags; -} D3DAUTHENTICATEDCHANNEL_QUERYPROTECTION_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYPROTECTION_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYPROTECTION_OUTPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESSCOUNT_OUTPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; - UINT NumRestrictedSharedResourceProcesses; -} D3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESSCOUNT_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESSCOUNT_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESSCOUNT_OUTPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESS_INPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_INPUT Input; - UINT ProcessIndex; -} D3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESS_INPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESS_INPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESS_INPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESS_OUTPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; - UINT ProcessIndex; - D3DAUTHENTICATEDCHANNEL_PROCESSIDENTIFIERTYPE ProcessIdentifer; - HANDLE ProcessHandle; -} D3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESS_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESS_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYRESTRICTEDSHAREDRESOURCEPROCESS_OUTPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYUNCOMPRESSEDENCRYPTIONLEVEL_OUTPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; - GUID EncryptionGuid; -} D3DAUTHENTICATEDCHANNEL_QUERYUNCOMPRESSEDENCRYPTIONLEVEL_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYUNCOMPRESSEDENCRYPTIONLEVEL_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYUNCOMPRESSEDENCRYPTIONLEVEL_OUTPUT; - -typedef struct _D3DAUTHENTICATEDCHANNEL_QUERYUNRESTRICTEDPROTECTEDSHAREDRESOURCECOUNT_OUTPUT { - D3DAUTHENTICATEDCHANNEL_QUERY_OUTPUT Output; - UINT NumUnrestrictedProtectedSharedResources; -} D3DAUTHENTICATEDCHANNEL_QUERYUNRESTRICTEDPROTECTEDSHAREDRESOURCECOUNT_OUTPUT, *PD3DAUTHENTICATEDCHANNEL_QUERYUNRESTRICTEDPROTECTEDSHAREDRESOURCECOUNT_OUTPUT, *LPD3DAUTHENTICATEDCHANNEL_QUERYUNRESTRICTEDPROTECTEDSHAREDRESOURCECOUNT_OUTPUT; - -typedef struct _D3DBOX { - UINT Left; - UINT Top; - UINT Right; - UINT Bottom; - UINT Front; - UINT Back; -} D3DBOX, *PD3DBOX, *LPD3DBOX; - -typedef struct _D3DCLIPSTATUS9 { - DWORD ClipUnion; - DWORD ClipIntersection; -} D3DCLIPSTATUS9, *PD3DCLIPSTATUS9, *LPD3DCLIPSTATUS9; - -typedef struct _D3DCOLORVALUE { - float r; - float g; - float b; - float a; -} D3DCOLORVALUE, *PD3DCOLORVALUE, *LPD3DCOLORVALUE; - -typedef struct _D3DCOMPOSERECTDESC { - USHORT X, Y; - USHORT Width, Height; -} D3DCOMPOSERECTDESC, *PD3DCOMPOSERECTDESC, *LPD3DCOMPOSERECTDESC; - -typedef struct _D3DCOMPOSERECTDESTINATION { - USHORT SrcRectIndex; - USHORT Reserved; - SHORT X, Y; -} D3DCOMPOSERECTDESTINATION, *PD3DCOMPOSERECTDESTINATION, *LPD3DCOMPOSERECTDESTINATION; - -typedef struct _D3DDEVICE_CREATION_PARAMETERS { - UINT AdapterOrdinal; - D3DDEVTYPE DeviceType; - HWND hFocusWindow; - DWORD BehaviorFlags; -} D3DDEVICE_CREATION_PARAMETERS, *PD3DDEVICE_CREATION_PARAMETERS, *LPD3DDEVICE_CREATION_PARAMETERS; - -typedef struct _D3DDEVINFO_D3D9BANDWIDTHTIMINGS { - FLOAT MaxBandwidthUtilized; - FLOAT FrontEndUploadMemoryUtilizedPercent; - FLOAT VertexRateUtilizedPercent; - FLOAT TriangleSetupRateUtilizedPercent; - FLOAT FillRateUtilizedPercent; -} D3DDEVINFO_D3D9BANDWIDTHTIMINGS, *PD3DDEVINFO_D3D9BANDWIDTHTIMINGS, *LPD3DDEVINFO_D3D9BANDWIDTHTIMINGS; - -typedef struct _D3DDEVINFO_D3D9CACHEUTILIZATION { - FLOAT TextureCacheHitRate; - FLOAT PostTransformVertexCacheHitRate; -} D3DDEVINFO_D3D9CACHEUTILIZATION, *PD3DDEVINFO_D3D9CACHEUTILIZATION, *LPD3DDEVINFO_D3D9CACHEUTILIZATION; - -typedef struct _D3DDEVINFO_D3D9INTERFACETIMINGS { - FLOAT WaitingForGPUToUseApplicationResourceTimePercent; - FLOAT WaitingForGPUToAcceptMoreCommandsTimePercent; - FLOAT WaitingForGPUToStayWithinLatencyTimePercent; - FLOAT WaitingForGPUExclusiveResourceTimePercent; - FLOAT WaitingForGPUOtherTimePercent; -} D3DDEVINFO_D3D9INTERFACETIMINGS, *PD3DDEVINFO_D3D9INTERFACETIMINGS, *LPD3DDEVINFO_D3D9INTERFACETIMINGS; - -typedef struct _D3DDEVINFO_D3D9PIPELINETIMINGS { - FLOAT VertexProcessingTimePercent; - FLOAT PixelProcessingTimePercent; - FLOAT OtherGPUProcessingTimePercent; - FLOAT GPUIdleTimePercent; -} D3DDEVINFO_D3D9PIPELINETIMINGS, *PD3DDEVINFO_D3D9PIPELINETIMINGS, *LPD3DDEVINFO_D3D9PIPELINETIMINGS; - -typedef struct _D3DDEVINFO_D3D9STAGETIMINGS { - FLOAT MemoryProcessingPercent; - FLOAT ComputationProcessingPercent; -} D3DDEVINFO_D3D9STAGETIMINGS, *PD3DDEVINFO_D3D9STAGETIMINGS, *LPD3DDEVINFO_D3D9STAGETIMINGS; - -typedef struct _D3DDEVINFO_D3DVERTEXSTATS { - DWORD NumRenderedTriangles; - DWORD NumExtraClippingTriangles; -} D3DDEVINFO_D3DVERTEXSTATS, *LPD3DDEVINFO_D3DVERTEXSTATS; - -typedef struct _D3DRESOURCESTATS { - BOOL bThrashing; - DWORD ApproxBytesDownloaded; - DWORD NumEvicts; - DWORD NumVidCreates; - DWORD LastPri; - DWORD NumUsed; - DWORD NumUsedInVidMem; - DWORD WorkingSet; - DWORD WorkingSetBytes; - DWORD TotalManaged; - DWORD TotalBytes; -} D3DRESOURCESTATS, *PD3DRESOURCESTATS, *LPD3DRESOURCESTATS; - -typedef struct _D3DDEVINFO_RESOURCEMANAGER { - D3DRESOURCESTATS stats[(D3DRTYPE_INDEXBUFFER+1)]; -} D3DDEVINFO_RESOURCEMANAGER, *LPD3DDEVINFO_RESOURCEMANAGER; - -typedef struct _D3DDEVINFO_VCACHE { - DWORD Pattern; - DWORD OptMethod; - DWORD CacheSize; - DWORD MagicNumber; -} D3DDEVINFO_VCACHE, *LPD3DDEVINFO_VCACHE; - -typedef struct _D3DDISPLAYMODE { - UINT Width; - UINT Height; - UINT RefreshRate; - D3DFORMAT Format; -} D3DDISPLAYMODE, *PD3DDISPLAYMODE, *LPD3DDISPLAYMODE; - -typedef struct _D3DENCRYPTED_BLOCK_INFO { - UINT NumEncryptedBytesAtBeginning; - UINT NumBytesInSkipPattern; - UINT NumBytesInEncryptPattern; -} D3DENCRYPTED_BLOCK_INFO, *PD3DENCRYPTED_BLOCK_INFO, *LPD3DENCRYPTED_BLOCK_INFO; - -typedef struct _D3DGAMMARAMP { - WORD red [256]; - WORD green[256]; - WORD blue [256]; -} D3DGAMMARAMP, *PD3DGAMMARAMP, *LPD3DGAMMARAMP; - -typedef struct _D3DINDEXBUFFER_DESC { - D3DFORMAT Format; - D3DRESOURCETYPE Type; - DWORD Usage; - D3DPOOL Pool; - UINT Size; -} D3DINDEXBUFFER_DESC, *PD3DINDEXBUFFER_DESC, *LPD3DINDEXBUFFER_DESC; - -typedef struct _D3DVECTOR { - float x; - float y; - float z; -} D3DVECTOR, *PD3DVECTOR, *LPD3DVECTOR; - -typedef struct _D3DLIGHT9 { - D3DLIGHTTYPE Type; - D3DCOLORVALUE Diffuse; - D3DCOLORVALUE Specular; - D3DCOLORVALUE Ambient; - D3DVECTOR Position; - D3DVECTOR Direction; - float Range; - float Falloff; - float Attenuation0; - float Attenuation1; - float Attenuation2; - float Theta; - float Phi; -} D3DLIGHT9, *PD3DLIGHT9, *LPD3DLIGHT9; - -typedef struct _D3DLOCKED_BOX { - INT RowPitch; - INT SlicePitch; - void* pBits; -} D3DLOCKED_BOX, *PD3DLOCKED_BOX, *LPD3DLOCKED_BOX; - -typedef struct _D3DLOCKED_RECT { - INT Pitch; - void* pBits; -} D3DLOCKED_RECT, *PD3DLOCKED_RECT, *LPD3DLOCKED_RECT; - -typedef struct _D3DMATERIAL9 { - D3DCOLORVALUE Diffuse; - D3DCOLORVALUE Ambient; - D3DCOLORVALUE Specular; - D3DCOLORVALUE Emissive; - float Power; -} D3DMATERIAL9, *PD3DMATERIAL9, *LPD3DMATERIAL9; - -typedef struct _D3DMATRIX { - union { - struct { - float _11, _12, _13, _14; - float _21, _22, _23, _24; - float _31, _32, _33, _34; - float _41, _42, _43, _44; - }; - float m[4][4]; - }; -} D3DMATRIX, *PD3DMATRIX, *LPD3DMATRIX; - -typedef struct _D3DMEMORYPRESSURE { - UINT64 BytesEvictedFromProcess; - UINT64 SizeOfInefficientAllocation; - DWORD LevelOfEfficiency; -} D3DMEMORYPRESSURE, *PD3DMEMORYPRESSURE, *LPD3DMEMORYPRESSURE; - -typedef struct _D3DPRESENTSTATS { - UINT PresentCount; - UINT PresentRefreshCount; - UINT SyncRefreshCount; - LARGE_INTEGER SyncQPCTime; - LARGE_INTEGER SyncGPUTime; -} D3DPRESENTSTATS, *PD3DPRESENTSTATS, *LPD3DPRESENTSTATS; - -typedef struct _D3DPRESENT_PARAMETERS_ { - UINT BackBufferWidth; - UINT BackBufferHeight; - D3DFORMAT BackBufferFormat; - UINT BackBufferCount; - D3DMULTISAMPLE_TYPE MultiSampleType; - DWORD MultiSampleQuality; - D3DSWAPEFFECT SwapEffect; - HWND hDeviceWindow; - BOOL Windowed; - BOOL EnableAutoDepthStencil; - D3DFORMAT AutoDepthStencilFormat; - DWORD Flags; - UINT FullScreen_RefreshRateInHz; - UINT PresentationInterval; -} D3DPRESENT_PARAMETERS, *PD3DPRESENT_PARAMETERS, *LPD3DPRESENT_PARAMETERS; - -typedef struct _D3DRANGE { - UINT Offset; - UINT Size; -} D3DRANGE, *PD3DRANGE, *LPD3DRANGE; - -typedef struct _D3DRASTER_STATUS { - BOOL InVBlank; - UINT ScanLine; -} D3DRASTER_STATUS, *PD3DRASTER_STATUS, *LPD3DRASTER_STATUS; - -typedef struct _D3DRECT { - LONG x1; - LONG y1; - LONG x2; - LONG y2; -} D3DRECT, *PD3DRECT, *LPD3DRECT; - -typedef struct _D3DRECTPATCH_INFO { - UINT StartVertexOffsetWidth; - UINT StartVertexOffsetHeight; - UINT Width; - UINT Height; - UINT Stride; - D3DBASISTYPE Basis; - D3DDEGREETYPE Degree; -} D3DRECTPATCH_INFO, *PD3DRECTPATCH_INFO, *LPD3DRECTPATCH_INFO; - -typedef struct _D3DSURFACE_DESC { - D3DFORMAT Format; - D3DRESOURCETYPE Type; - DWORD Usage; - D3DPOOL Pool; - D3DMULTISAMPLE_TYPE MultiSampleType; - DWORD MultiSampleQuality; - UINT Width; - UINT Height; -} D3DSURFACE_DESC, *PD3DSURFACE_DESC, *LPD3DSURFACE_DESC; - -typedef struct _D3DTRIPATCH_INFO { - UINT StartVertexOffset; - UINT NumVertices; - D3DBASISTYPE Basis; - D3DDEGREETYPE Degree; -} D3DTRIPATCH_INFO, *PD3DTRIPATCH_INFO, *LPD3DTRIPATCH_INFO; - -typedef struct _D3DVERTEXBUFFER_DESC { - D3DFORMAT Format; - D3DRESOURCETYPE Type; - DWORD Usage; - D3DPOOL Pool; - UINT Size; - DWORD FVF; -} D3DVERTEXBUFFER_DESC, *PD3DVERTEXBUFFER_DESC, *LPD3DVERTEXBUFFER_DESC; - -typedef struct _D3DVERTEXELEMENT9 { - WORD Stream; - WORD Offset; - BYTE Type; - BYTE Method; - BYTE Usage; - BYTE UsageIndex; -} D3DVERTEXELEMENT9, *LPD3DVERTEXELEMENT9; - -typedef struct _D3DVIEWPORT9 { - DWORD X; - DWORD Y; - DWORD Width; - DWORD Height; - float MinZ; - float MaxZ; -} D3DVIEWPORT9, *PD3DVIEWPORT9, *LPD3DVIEWPORT9; - -typedef struct _D3DVOLUME_DESC { - D3DFORMAT Format; - D3DRESOURCETYPE Type; - DWORD Usage; - D3DPOOL Pool; - UINT Width; - UINT Height; - UINT Depth; -} D3DVOLUME_DESC, *PD3DVOLUME_DESC, *LPD3DVOLUME_DESC; - -#ifndef _WIN32 -/* If _WIN32 isn't declared it means only internal header files are used. To - * avoid a conflict, IUnknown is declared here rather than in d3d9.h */ - -typedef struct IUnknown IUnknown, *PUNKNOWN, *LPUNKNOWN; - -#ifdef __cplusplus -extern "C" const GUID IID_IUnknown; - -struct IUnknown -{ - virtual HRESULT WINAPI QueryInterface(REFIID riid, void **ppvObject) = 0; - virtual ULONG WINAPI AddRef() = 0; - virtual ULONG WINAPI Release() = 0; -}; -#else /* __cplusplus */ -extern const GUID IID_IUnknown; - -typedef struct IUnknownVtbl -{ - /* IUnknown */ - HRESULT (WINAPI *QueryInterface)(IUnknown *This, REFIID riid, void **ppvObject); - ULONG (WINAPI *AddRef)(IUnknown *This); - ULONG (WINAPI *Release)(IUnknown *This); -} IUnknownVtbl; - -struct IUnknown -{ - IUnknownVtbl *lpVtbl; -}; - -/* IUnknown macros */ -#define IUnknown_QueryInterface(p,a,b) (p)->lpVtbl->QueryInterface(p,a,b) -#define IUnknown_AddRef(p) (p)->lpVtbl->AddRef(p) -#define IUnknown_Release(p) (p)->lpVtbl->Release(p) -#endif /* __cplusplus */ -#endif /* _WIN32 */ - -#endif /* _D3D9TYPES_H_ */ diff --git a/nine-native/include/nine_sdl.h b/nine-native/include/nine_sdl.h deleted file mode 100644 index 294ad6040..000000000 --- a/nine-native/include/nine_sdl.h +++ /dev/null @@ -1,13 +0,0 @@ - -#ifdef __cplusplus -extern "C" { -#endif - -struct SDL_Window; -struct IDirect3D9Ex* Direct3DCreate9Ex_SDL(struct SDL_Window *win); -struct IDirect3D9* Direct3DCreate9_SDL(struct SDL_Window *win); - -#ifdef __cplusplus -} -#endif - diff --git a/nine-native/src/dri3.c b/nine-native/src/dri3.c deleted file mode 100644 index 06cfeba4f..000000000 --- a/nine-native/src/dri3.c +++ /dev/null @@ -1,788 +0,0 @@ -/* - * Copyright © 2014 Axel Davy - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include "dri3.h" - - -#ifdef _DEBUG -#define TRACE(...) fprintf(stderr, __VA_ARGS__) -#define ERR(...) fprintf(stderr, __VA_ARGS__) -#else -#define TRACE(...) -#define ERR(...) fprintf(stderr, __VA_ARGS__) -#endif - - -// --------------------------------- dlls/winex11.drv/dri3.c -------------------------------------------- - -BOOL -DRI3CheckExtension(Display *dpy, int major, int minor) -{ - xcb_connection_t *xcb_connection = XGetXCBConnection(dpy); - xcb_dri3_query_version_cookie_t dri3_cookie; - xcb_dri3_query_version_reply_t *dri3_reply; - xcb_generic_error_t *error; - const xcb_query_extension_reply_t *extension; - int fd; - - xcb_prefetch_extension_data(xcb_connection, &xcb_dri3_id); - - extension = xcb_get_extension_data(xcb_connection, &xcb_dri3_id); - if (!(extension && extension->present)) { - ERR("DRI3 extension is not present\n"); - return FALSE; - } - - dri3_cookie = xcb_dri3_query_version(xcb_connection, major, minor); - - dri3_reply = xcb_dri3_query_version_reply(xcb_connection, dri3_cookie, &error); - if (!dri3_reply) { - free(error); - ERR("Issue getting requested version of DRI3: %d,%d\n", major, minor); - return FALSE; - } - - if (!DRI3Open(dpy, DefaultScreen(dpy), &fd)) { - ERR("DRI3 advertised, but not working\n"); - return FALSE; - } - close(fd); - - TRACE("DRI3 version %d,%d found. %d %d requested\n", major, minor, (int)dri3_reply->major_version, (int)dri3_reply->minor_version); - free(dri3_reply); - - return TRUE; -} - -BOOL -PRESENTCheckExtension(Display *dpy, int major, int minor) -{ - xcb_connection_t *xcb_connection = XGetXCBConnection(dpy); - xcb_present_query_version_cookie_t present_cookie; - xcb_present_query_version_reply_t *present_reply; - xcb_generic_error_t *error; - const xcb_query_extension_reply_t *extension; - - xcb_prefetch_extension_data(xcb_connection, &xcb_present_id); - - extension = xcb_get_extension_data(xcb_connection, &xcb_present_id); - if (!(extension && extension->present)) { - ERR("PRESENT extension is not present\n"); - return FALSE; - } - - present_cookie = xcb_present_query_version(xcb_connection, major, minor); - - present_reply = xcb_present_query_version_reply(xcb_connection, present_cookie, &error); - if (!present_reply) { - free(error); - ERR("Issue getting requested version of PRESENT: %d,%d\n", major, minor); - return FALSE; - } - - TRACE("PRESENT version %d,%d found. %d %d requested\n", major, minor, (int)present_reply->major_version, (int)present_reply->minor_version); - free(present_reply); - - return TRUE; -} - -BOOL -DRI3Open(Display *dpy, int screen, int *device_fd) -{ - xcb_dri3_open_cookie_t cookie; - xcb_dri3_open_reply_t *reply; - xcb_connection_t *xcb_connection = XGetXCBConnection(dpy); - int fd; - Window root = RootWindow(dpy, screen); - - cookie = xcb_dri3_open(xcb_connection, root, 0); - - reply = xcb_dri3_open_reply(xcb_connection, cookie, NULL); - if (!reply) - return FALSE; - - if (reply->nfd != 1) { - free(reply); - return FALSE; - } - - fd = xcb_dri3_open_reply_fds(xcb_connection, reply)[0]; - fcntl(fd, F_SETFD, FD_CLOEXEC); - - *device_fd = fd; - - return TRUE; -} - -BOOL -DRI3PixmapFromDmaBuf(Display *dpy, int screen, int fd, int width, int height, int stride, int depth, int bpp, Pixmap *pixmap) -{ - xcb_connection_t *xcb_connection = XGetXCBConnection(dpy); - Window root = RootWindow(dpy, screen); - xcb_void_cookie_t cookie; - xcb_generic_error_t *error; - - cookie = xcb_dri3_pixmap_from_buffer_checked(xcb_connection, - (*pixmap = xcb_generate_id(xcb_connection)), - root, - 0, - width, height, stride, - depth, bpp, fd); - error = xcb_request_check(xcb_connection, cookie); /* performs a flush */ - if (error) { - ERR("Error using DRI3 to convert a DmaBufFd to pixmap\n"); - return FALSE; - } - return TRUE; -} - -BOOL -DRI3DmaBufFromPixmap(Display *dpy, Pixmap pixmap, int *fd, int *width, int *height, int *stride, int *depth, int *bpp) -{ - xcb_connection_t *xcb_connection = XGetXCBConnection(dpy); - xcb_dri3_buffer_from_pixmap_cookie_t bp_cookie; - xcb_dri3_buffer_from_pixmap_reply_t *bp_reply; - - bp_cookie = xcb_dri3_buffer_from_pixmap(xcb_connection, pixmap); - bp_reply = xcb_dri3_buffer_from_pixmap_reply(xcb_connection, bp_cookie, NULL); - if (!bp_reply) - return FALSE; - *fd = xcb_dri3_buffer_from_pixmap_reply_fds(xcb_connection, bp_reply)[0]; - *width = bp_reply->width; - *height = bp_reply->height; - *stride = bp_reply->stride; - *depth = bp_reply->depth; - *bpp = bp_reply->depth; - return TRUE; -} - -struct PRESENTPriv { - xcb_connection_t *xcb_connection; - xcb_connection_t *xcb_connection_bis; /* to avoid libxcb thread bugs, use a different connection to present pixmaps */ - XID window; - uint64_t last_msc; - uint64_t last_target; - uint32_t last_serial_given; - xcb_special_event_t *special_event; - PRESENTPixmapPriv *first_present_priv; - int pixmap_present_pending; - BOOL notify_with_serial_pending; - pthread_mutex_t mutex_present; /* protect readind/writing present_priv things */ - pthread_mutex_t mutex_xcb_wait; - BOOL xcb_wait; -}; - -struct PRESENTPixmapPriv { - PRESENTpriv *present_priv; - Pixmap pixmap; - BOOL released; - unsigned int width; - unsigned int height; - unsigned int depth; - BOOL present_complete_pending; - uint32_t serial; - BOOL last_present_was_flip; - PRESENTPixmapPriv *next; -}; - -static PRESENTPixmapPriv *PRESENTFindPixmapPriv(PRESENTpriv *present_priv, uint32_t serial) -{ - PRESENTPixmapPriv *current = present_priv->first_present_priv; - - while (current) { - if (current->serial == serial) - return current; - current = current->next; - } - return NULL; -} - -static void PRESENThandle_events(PRESENTpriv *present_priv, xcb_present_generic_event_t *ge) -{ - PRESENTPixmapPriv *present_pixmap_priv = NULL; - - switch (ge->evtype) { - case XCB_PRESENT_COMPLETE_NOTIFY: { - xcb_present_complete_notify_event_t *ce = (void *) ge; - if (ce->kind == XCB_PRESENT_COMPLETE_KIND_NOTIFY_MSC) { - if (ce->serial) - present_priv->notify_with_serial_pending = FALSE; - free(ce); - return; - } - present_pixmap_priv = PRESENTFindPixmapPriv(present_priv, ce->serial); - if (!present_pixmap_priv || ce->kind != XCB_PRESENT_COMPLETE_KIND_PIXMAP) { - ERR("FATAL ERROR: PRESENT handling failed\n"); - free(ce); - return; - } - present_pixmap_priv->present_complete_pending = FALSE; - switch (ce->mode) { - case XCB_PRESENT_COMPLETE_MODE_FLIP: - present_pixmap_priv->last_present_was_flip = TRUE; - break; - case XCB_PRESENT_COMPLETE_MODE_COPY: - present_pixmap_priv->last_present_was_flip = FALSE; - break; - } - present_priv->pixmap_present_pending--; - present_priv->last_msc = ce->msc; - break; - } - case XCB_PRESENT_EVENT_IDLE_NOTIFY: { - xcb_present_idle_notify_event_t *ie = (void *) ge; - present_pixmap_priv = PRESENTFindPixmapPriv(present_priv, ie->serial); - if (!present_pixmap_priv || present_pixmap_priv->pixmap != ie->pixmap) { - ERR("FATAL ERROR: PRESENT handling failed\n"); - free(ie); - return; - } - present_pixmap_priv->released = TRUE; - break; - } - } - free(ge); -} - -static void PRESENTflush_events(PRESENTpriv *present_priv, BOOL assert_no_other_thread_waiting) -{ - xcb_generic_event_t *ev; - - if ((present_priv->xcb_wait && !assert_no_other_thread_waiting) || /* don't steal events to someone waiting */ - !present_priv->special_event) - return; - - while ((ev = xcb_poll_for_special_event(present_priv->xcb_connection, present_priv->special_event)) != NULL) { - PRESENThandle_events(present_priv, (void *) ev); - } -} - -static BOOL PRESENTwait_events(PRESENTpriv *present_priv, BOOL allow_other_threads) -{ - xcb_generic_event_t *ev; - - if (allow_other_threads) { - present_priv->xcb_wait = TRUE; - pthread_mutex_lock(&present_priv->mutex_xcb_wait); - pthread_mutex_unlock(&present_priv->mutex_present); - } - ev = xcb_wait_for_special_event(present_priv->xcb_connection, present_priv->special_event); - if (allow_other_threads) { - pthread_mutex_unlock(&present_priv->mutex_xcb_wait); - pthread_mutex_lock(&present_priv->mutex_present); - present_priv->xcb_wait = FALSE; - } - if (!ev) { - ERR("FATAL error: xcb had an error\n"); - return FALSE; - } - - PRESENThandle_events(present_priv, (void *) ev); - return TRUE; -} - -static struct xcb_connection_t * -create_xcb_connection(Display *dpy) -{ - int screen_num = DefaultScreen(dpy); - xcb_connection_t *ret; - xcb_xfixes_query_version_cookie_t cookie; - xcb_xfixes_query_version_reply_t *rep; - - ret = xcb_connect(DisplayString(dpy), &screen_num); - cookie = xcb_xfixes_query_version_unchecked(ret, XCB_XFIXES_MAJOR_VERSION, XCB_XFIXES_MINOR_VERSION); - rep = xcb_xfixes_query_version_reply(ret, cookie, NULL); - if (rep) - free(rep); - return ret; -} - -BOOL -PRESENTInit(Display *dpy, PRESENTpriv **present_priv) -{ - *present_priv = (PRESENTpriv *) calloc(1, sizeof(PRESENTpriv)); - if (!*present_priv) { - return FALSE; - } - (*present_priv)->xcb_connection = create_xcb_connection(dpy); - (*present_priv)->xcb_connection_bis = create_xcb_connection(dpy); - pthread_mutex_init(&(*present_priv)->mutex_present, NULL); - pthread_mutex_init(&(*present_priv)->mutex_xcb_wait, NULL); - return TRUE; -} - -static void PRESENTForceReleases(PRESENTpriv *present_priv) -{ - PRESENTPixmapPriv *current = NULL; - - if (!present_priv->window) - return; - - /* There should be no other thread listening for events here. - * This can happen when hDestWindowOverride changes without reset. - * This case should never happen, but can happen in theory.*/ - if (present_priv->xcb_wait) { - xcb_present_notify_msc(present_priv->xcb_connection, present_priv->window, 0, 0, 0, 0); - xcb_flush(present_priv->xcb_connection); - pthread_mutex_lock(&present_priv->mutex_xcb_wait); - pthread_mutex_unlock(&present_priv->mutex_xcb_wait); - /* the problem here is that we don't have access to the event the other thread got. - * It is either presented event, idle event or notify event. - */ - while (present_priv->pixmap_present_pending >= 2) - PRESENTwait_events(present_priv, FALSE); - PRESENTflush_events(present_priv, TRUE); - /* Remaining events to come can be a pair of present/idle, - * or an idle, or nothing. To be sure we are after all pixmaps - * have been presented, add an event to the queue that can only - * be after the present event, then if we receive an event more, - * we are sure all pixmaps were presented */ - present_priv->notify_with_serial_pending = TRUE; - xcb_present_notify_msc(present_priv->xcb_connection, present_priv->window, 1, present_priv->last_target + 5, 0, 0); - xcb_flush(present_priv->xcb_connection); - while (present_priv->notify_with_serial_pending) - PRESENTwait_events(present_priv, FALSE); - /* Now we are sure we are not expecting any new event */ - } else { - while (present_priv->pixmap_present_pending) /* wait all sent pixmaps are presented */ - PRESENTwait_events(present_priv, FALSE); - PRESENTflush_events(present_priv, TRUE); /* may be remaining idle event */ - /* Since idle events are send with the complete events when it is not flips, - * we are not expecting any new event here */ - } - - current = present_priv->first_present_priv; - while (current) { - if (!current->released) { - if (!current->last_present_was_flip && !present_priv->xcb_wait) { - ERR("ERROR: a pixmap seems not released by PRESENT for no reason. Code bug.\n"); - } else { - /* Present the same pixmap with a non-valid part to force the copy mode and the releases */ - xcb_xfixes_region_t valid, update; - xcb_rectangle_t rect_update; - rect_update.x = 0; - rect_update.y = 0; - rect_update.width = 8; - rect_update.height = 1; - valid = xcb_generate_id(present_priv->xcb_connection); - update = xcb_generate_id(present_priv->xcb_connection); - xcb_xfixes_create_region(present_priv->xcb_connection, valid, 1, &rect_update); - xcb_xfixes_create_region(present_priv->xcb_connection, update, 1, &rect_update); - /* here we know the pixmap has been presented. Thus if it is on screen, - * the following request can only make it released by the server if it is not */ - xcb_present_pixmap(present_priv->xcb_connection, present_priv->window, - current->pixmap, 0, valid, update, 0, 0, None, None, - None, XCB_PRESENT_OPTION_COPY | XCB_PRESENT_OPTION_ASYNC, 0, 0, 0, 0, NULL); - xcb_flush(present_priv->xcb_connection); - PRESENTwait_events(present_priv, FALSE); /* by assumption this can only be idle event */ - PRESENTflush_events(present_priv, TRUE); /* Shoudln't be needed */ - } - } - current = current->next; - } - /* Now all pixmaps are released (possibility if xcb_wait is true that one is not aware yet), - * and we don't expect any new Present event to come from Xserver */ -} - -static void PRESENTFreeXcbQueue(PRESENTpriv *present_priv) -{ - if (present_priv->window) { - xcb_unregister_for_special_event(present_priv->xcb_connection, present_priv->special_event); - present_priv->last_msc = 0; - present_priv->last_target = 0; - present_priv->special_event = NULL; - } -} - -static BOOL PRESENTPrivChangeWindow(PRESENTpriv *present_priv, XID window) -{ - xcb_void_cookie_t cookie; - xcb_generic_error_t *error; - xcb_present_event_t eid; - - PRESENTForceReleases(present_priv); - PRESENTFreeXcbQueue(present_priv); - present_priv->window = window; - - if (window) { - cookie = xcb_present_select_input_checked(present_priv->xcb_connection, - (eid = xcb_generate_id(present_priv->xcb_connection)), - window, - XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY| - XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY); - present_priv->special_event = xcb_register_for_special_xge(present_priv->xcb_connection, - &xcb_present_id, - eid, NULL); - error = xcb_request_check(present_priv->xcb_connection, cookie); /* performs a flush */ - if (error || !present_priv->special_event) { - ERR("FAILED to use the X PRESENT extension. Was the destination a window ?\n"); - if (present_priv->special_event) - xcb_unregister_for_special_event(present_priv->xcb_connection, present_priv->special_event); - present_priv->special_event = NULL; - present_priv->window = 0; - } - } - return (present_priv->window != 0); -} - -/* Destroy the content, except the link and the struct mem */ -static void -PRESENTDestroyPixmapContent(Display *dpy, PRESENTPixmapPriv *present_pixmap) -{ - XFreePixmap(dpy, present_pixmap->pixmap); -} - -void -PRESENTDestroy(Display *dpy, PRESENTpriv *present_priv) -{ - PRESENTPixmapPriv *current = NULL; - - pthread_mutex_lock(&present_priv->mutex_present); - - PRESENTForceReleases(present_priv); - - current = present_priv->first_present_priv; - while (current) { - PRESENTPixmapPriv *next = current->next; - PRESENTDestroyPixmapContent(dpy, current); - free(current); - current = next; - } - - PRESENTFreeXcbQueue(present_priv); - - xcb_disconnect(present_priv->xcb_connection); - xcb_disconnect(present_priv->xcb_connection_bis); - pthread_mutex_unlock(&present_priv->mutex_present); - pthread_mutex_destroy(&present_priv->mutex_present); - pthread_mutex_destroy(&present_priv->mutex_xcb_wait); - - free(present_priv); -} - -BOOL -PRESENTPixmapInit(PRESENTpriv *present_priv, Pixmap pixmap, PRESENTPixmapPriv **present_pixmap_priv) -{ - xcb_get_geometry_cookie_t cookie; - xcb_get_geometry_reply_t *reply; - - cookie = xcb_get_geometry(present_priv->xcb_connection, pixmap); - reply = xcb_get_geometry_reply(present_priv->xcb_connection, cookie, NULL); - - if (!reply) - return FALSE; - - *present_pixmap_priv = (PRESENTPixmapPriv *) calloc(1, sizeof(PRESENTPixmapPriv)); - if (!*present_pixmap_priv) { - free(reply); - return FALSE; - } - pthread_mutex_lock(&present_priv->mutex_present); - - (*present_pixmap_priv)->released = TRUE; - (*present_pixmap_priv)->pixmap = pixmap; - (*present_pixmap_priv)->present_priv = present_priv; - (*present_pixmap_priv)->next = present_priv->first_present_priv; - (*present_pixmap_priv)->width = reply->width; - (*present_pixmap_priv)->height = reply->height; - (*present_pixmap_priv)->depth = reply->depth; - free(reply); - - present_priv->last_serial_given++; - (*present_pixmap_priv)->serial = present_priv->last_serial_given; - present_priv->first_present_priv = *present_pixmap_priv; - - pthread_mutex_unlock(&present_priv->mutex_present); - return TRUE; -} - -BOOL -PRESENTTryFreePixmap(Display *dpy, PRESENTPixmapPriv *present_pixmap_priv) -{ - PRESENTpriv *present_priv = present_pixmap_priv->present_priv; - PRESENTPixmapPriv *current; - - pthread_mutex_lock(&present_priv->mutex_present); - - if (!present_pixmap_priv->released || present_pixmap_priv->present_complete_pending) { - pthread_mutex_unlock(&present_priv->mutex_present); - return FALSE; - } - - if (present_priv->first_present_priv == present_pixmap_priv) { - present_priv->first_present_priv = present_pixmap_priv->next; - goto free_priv; - } - - current = present_priv->first_present_priv; - while (current->next != present_pixmap_priv) - current = current->next; - current->next = present_pixmap_priv->next; -free_priv: - PRESENTDestroyPixmapContent(dpy, present_pixmap_priv); - free(present_pixmap_priv); - pthread_mutex_unlock(&present_priv->mutex_present); - return TRUE; -} - -BOOL -PRESENTHelperCopyFront(Display *dpy, PRESENTPixmapPriv *present_pixmap_priv) -{ - PRESENTpriv *present_priv = present_pixmap_priv->present_priv; - xcb_void_cookie_t cookie; - xcb_generic_error_t *error; - - uint32_t v = 0; - xcb_gcontext_t gc; - - pthread_mutex_lock(&present_priv->mutex_present); - - if (!present_priv->window) { - pthread_mutex_unlock(&present_priv->mutex_present); - return FALSE; - } - - xcb_create_gc(present_priv->xcb_connection, - (gc = xcb_generate_id(present_priv->xcb_connection)), - present_priv->window, - XCB_GC_GRAPHICS_EXPOSURES, - &v); - cookie = xcb_copy_area_checked(present_priv->xcb_connection, - present_priv->window, - present_pixmap_priv->pixmap, - gc, - 0, 0, 0, 0, - present_pixmap_priv->width, - present_pixmap_priv->height); - error = xcb_request_check(present_priv->xcb_connection, cookie); - xcb_free_gc(present_priv->xcb_connection, gc); - pthread_mutex_unlock(&present_priv->mutex_present); - return (error != NULL); -} - -BOOL -PRESENTPixmap(Display *dpy, XID window, - PRESENTPixmapPriv *present_pixmap_priv, D3DPRESENT_PARAMETERS *pPresentationParameters, - const RECT *pSourceRect, const RECT *pDestRect, const RGNDATA *pDirtyRegion) -{ - PRESENTpriv *present_priv = present_pixmap_priv->present_priv; - xcb_void_cookie_t cookie; - xcb_generic_error_t *error; - int64_t target_msc, presentationInterval; - xcb_xfixes_region_t valid, update; - int16_t x_off, y_off; - uint32_t options = XCB_PRESENT_OPTION_NONE; - - pthread_mutex_lock(&present_priv->mutex_present); - - if (window != present_priv->window) - PRESENTPrivChangeWindow(present_priv, window); - - if (!window) { - ERR("ERROR: Try to Present a pixmap on a NULL window\n"); - pthread_mutex_unlock(&present_priv->mutex_present); - return FALSE; - } - - PRESENTflush_events(present_priv, FALSE); - if (!present_pixmap_priv->released || present_pixmap_priv->present_complete_pending) { - ERR("FATAL ERROR: Trying to Present a pixmap not released\n"); - pthread_mutex_unlock(&present_priv->mutex_present); - return FALSE; - } - target_msc = present_priv->last_msc; - switch(pPresentationParameters->PresentationInterval) { - case D3DPRESENT_INTERVAL_DEFAULT: - case D3DPRESENT_INTERVAL_ONE: - presentationInterval = 1; - break; - case D3DPRESENT_INTERVAL_TWO: - presentationInterval = 2; - break; - case D3DPRESENT_INTERVAL_THREE: - presentationInterval = 3; - break; - case D3DPRESENT_INTERVAL_FOUR: - presentationInterval = 4; - break; - case D3DPRESENT_INTERVAL_IMMEDIATE: - default: - presentationInterval = 0; - options |= XCB_PRESENT_OPTION_ASYNC; - break; - } - target_msc += presentationInterval * (present_priv->pixmap_present_pending + 1); - - /* Note: PRESENT defines some way to do partial copy: - * presentproto: - * 'x-off' and 'y-off' define the location in the window where - * the 0,0 location of the pixmap will be presented. valid-area - * and update-area are relative to the pixmap. - */ - if (!pSourceRect && !pDestRect && !pDirtyRegion) { - valid = 0; - update = 0; - x_off = 0; - y_off = 0; - } else { - xcb_rectangle_t rect_update; - xcb_rectangle_t *rect_updates; - int i; - - rect_update.x = 0; - rect_update.y = 0; - rect_update.width = present_pixmap_priv->width; - rect_update.height = present_pixmap_priv->height; - x_off = 0; - y_off = 0; - if (pSourceRect) { - x_off = -pSourceRect->left; - y_off = -pSourceRect->top; - rect_update.x = pSourceRect->left; - rect_update.y = pSourceRect->top; - rect_update.width = pSourceRect->right - pSourceRect->left; - rect_update.height = pSourceRect->bottom - pSourceRect->top; - } - if (pDestRect) { - x_off += pDestRect->left; - y_off += pDestRect->top; - rect_update.width = pDestRect->right - pDestRect->left; - rect_update.height = pDestRect->bottom - pDestRect->top; - /* Note: the size of pDestRect and pSourceRect are supposed to be the same size - * because the driver would have done things to assure that. */ - } - valid = xcb_generate_id(present_priv->xcb_connection_bis); - update = xcb_generate_id(present_priv->xcb_connection_bis); - xcb_xfixes_create_region(present_priv->xcb_connection_bis, valid, 1, &rect_update); - if (pDirtyRegion && pDirtyRegion->rdh.nCount) { - rect_updates = (void *) calloc(pDirtyRegion->rdh.nCount, sizeof(xcb_rectangle_t)); - for (i = 0; i < pDirtyRegion->rdh.nCount; i++) - { - RECT rc; - memcpy(&rc, pDirtyRegion->Buffer + i * sizeof(RECT), sizeof(RECT)); - rect_update.x = rc.left; - rect_update.y = rc.top; - rect_update.width = rc.right - rc.left; - rect_update.height = rc.bottom - rc.top; - memcpy(rect_updates + i * sizeof(xcb_rectangle_t), &rect_update, sizeof(xcb_rectangle_t)); - } - xcb_xfixes_create_region(present_priv->xcb_connection_bis, update, pDirtyRegion->rdh.nCount, rect_updates); - free(rect_updates); - } else - xcb_xfixes_create_region(present_priv->xcb_connection_bis, update, 1, &rect_update); - } - if (pPresentationParameters->SwapEffect == D3DSWAPEFFECT_COPY) - options |= XCB_PRESENT_OPTION_COPY; - cookie = xcb_present_pixmap_checked(present_priv->xcb_connection_bis, - window, - present_pixmap_priv->pixmap, - present_pixmap_priv->serial, - valid, update, x_off, y_off, - None, None, None, options, - target_msc, 0, 0, 0, NULL); - error = xcb_request_check(present_priv->xcb_connection_bis, cookie); /* performs a flush */ - - if (update) - xcb_xfixes_destroy_region(present_priv->xcb_connection_bis, update); - if (valid) - xcb_xfixes_destroy_region(present_priv->xcb_connection_bis, valid); - - if (error) { - xcb_get_geometry_cookie_t cookie_geom; - xcb_get_geometry_reply_t *reply; - - cookie_geom = xcb_get_geometry(present_priv->xcb_connection_bis, window); - reply = xcb_get_geometry_reply(present_priv->xcb_connection_bis, cookie_geom, NULL); - - ERR("Error using PRESENT. Here some debug info\n"); - if (!reply) { - ERR("Error querying window info. Perhaps it doesn't exist anymore\n"); - pthread_mutex_unlock(&present_priv->mutex_present); - return FALSE; - } - ERR("Pixmap: width=%d, height=%d, depth=%d\n", - present_pixmap_priv->width, present_pixmap_priv->height, - present_pixmap_priv->depth); - ERR("Window: width=%d, height=%d, depth=%d, x=%d, y=%d\n", - (int) reply->width, (int) reply->height, - (int) reply->depth, (int) reply->x, (int) reply->y); - ERR("Present parameter: PresentationInterval=%d, BackBufferCount=%d, Pending presentations=%d\n", - pPresentationParameters->PresentationInterval, - pPresentationParameters->BackBufferCount, - present_priv->pixmap_present_pending - ); - if (present_pixmap_priv->depth != reply->depth) - ERR("Depths are different. PRESENT needs the pixmap and the window have same depth\n"); - free(reply); - pthread_mutex_unlock(&present_priv->mutex_present); - return FALSE; - } - present_priv->last_target = target_msc; - present_priv->pixmap_present_pending++; - present_pixmap_priv->present_complete_pending = TRUE; - present_pixmap_priv->released = FALSE; - pthread_mutex_unlock(&present_priv->mutex_present); - return TRUE; -} - -BOOL -PRESENTWaitPixmapReleased(PRESENTPixmapPriv *present_pixmap_priv) -{ - PRESENTpriv *present_priv = present_pixmap_priv->present_priv; - - pthread_mutex_lock(&present_priv->mutex_present); - - PRESENTflush_events(present_priv, FALSE); - - while (!present_pixmap_priv->released || present_pixmap_priv->present_complete_pending) { - /* Note: following if should not happen because we'll never - * use two PRESENTWaitPixmapReleased in parallels on same window. - * However it would make it work in that case */ - if (present_priv->xcb_wait) { /* we allow only one thread to dispatch events */ - pthread_mutex_lock(&present_priv->mutex_xcb_wait); - /* here the other thread got an event but hasn't treated it yet */ - pthread_mutex_unlock(&present_priv->mutex_xcb_wait); - pthread_mutex_unlock(&present_priv->mutex_present); - struct timespec duration = { 0, 10 * 1000*1000 }; - nanosleep(&duration,NULL); /* Let it treat the event */ - pthread_mutex_lock(&present_priv->mutex_present); - } else if (!PRESENTwait_events(present_priv, TRUE)) { - pthread_mutex_unlock(&present_priv->mutex_present); - return FALSE; - } - } - pthread_mutex_unlock(&present_priv->mutex_present); - return TRUE; -} diff --git a/nine-native/src/dri3.h b/nine-native/src/dri3.h deleted file mode 100644 index ee2264bf5..000000000 --- a/nine-native/src/dri3.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright © 2014 Axel Davy - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef __XNINE_DRI3_H -#define __XNINE_DRI3_H - -#include -#include -#include -#include -#include -#include - -BOOL -DRI3CheckExtension(Display *dpy, int major, int minor); - -BOOL -PRESENTCheckExtension(Display *dpy, int major, int minor); - -BOOL -DRI3Open(Display *dpy, int screen, int *device_fd); - -BOOL -DRI3PixmapFromDmaBuf(Display *dpy, int screen, int fd, int width, int height, int stride, int depth, int bpp, Pixmap *pixmap); - -BOOL -DRI3DmaBufFromPixmap(Display *dpy, Pixmap pixmap, int *fd, int *width, int *height, int *stride, int *depth, int *bpp); - -typedef struct PRESENTPriv PRESENTpriv; -typedef struct PRESENTPixmapPriv PRESENTPixmapPriv; - -BOOL -PRESENTInit(Display *dpy, PRESENTpriv **present_priv); - -/* will clean properly and free all PRESENTPixmapPriv associated to PRESENTpriv. - * PRESENTPixmapPriv should not be freed by something else. - * If never a PRESENTPixmapPriv has to be destroyed, - * please destroy the current PRESENTpriv and create a new one. - * This will take care than all pixmaps are released */ -void -PRESENTDestroy(Display *dpy, PRESENTpriv *present_priv); - -BOOL -PRESENTPixmapInit(PRESENTpriv *present_priv, Pixmap pixmap, PRESENTPixmapPriv **present_pixmap_priv); - -BOOL -PRESENTTryFreePixmap(Display *dpy, PRESENTPixmapPriv *present_pixmap_priv); - -BOOL -PRESENTHelperCopyFront(Display *dpy, PRESENTPixmapPriv *present_pixmap_priv); - -BOOL -PRESENTPixmap(Display *dpy, XID window, - PRESENTPixmapPriv *present_pixmap_priv, D3DPRESENT_PARAMETERS *pPresentationParameters, - const RECT *pSourceRect, const RECT *pDestRect, const RGNDATA *pDirtyRegion); - -BOOL -PRESENTWaitPixmapReleased(PRESENTPixmapPriv *present_pixmap_priv); - -#endif /* __XNINE_DRI3_H */ diff --git a/nine-native/src/nine_sdl.c b/nine-native/src/nine_sdl.c deleted file mode 100644 index 1f6f1633a..000000000 --- a/nine-native/src/nine_sdl.c +++ /dev/null @@ -1,1256 +0,0 @@ -// ---------------------------------------- -// nine_sdl - -#include "nine_sdl.h" - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include - -#include -#include - -#include "dri3.h" - -// ----------- utils funcs / stubs ------------------------ -#define TRACE(...) -#define FIXME(...) fprintf(stderr, __VA_ARGS__) -#define WARN(...) fprintf(stderr, __VA_ARGS__) -#define ERR(...) fprintf(stderr, __VA_ARGS__) - -static inline BOOL IsEqualGUID(const GUID* a, const GUID* b) -{ - return memcmp(a,b,sizeof(GUID)) == 0; -} -static const char* debugstr_guid(const GUID* id) -{ - return "GUID"; -} - -static inline LONG WINAPI InterlockedIncrement( LONG volatile *dest ) -{ - return __sync_add_and_fetch(dest, 1);; -} -static inline LONG WINAPI InterlockedDecrement( LONG volatile *dest ) -{ - return __sync_sub_and_fetch(dest, 1);; -} - - -// --------------------------------------------------------------------------------------- - -static const D3DFORMAT ConvertFromSDL(Uint32 format) -{ - switch (format) - { - case SDL_PIXELFORMAT_ARGB4444: return D3DFMT_A4R4G4B4; - case SDL_PIXELFORMAT_RGB332: return D3DFMT_R3G3B2; - case SDL_PIXELFORMAT_ARGB1555: return D3DFMT_A1R5G5B5; - case SDL_PIXELFORMAT_RGB555: return D3DFMT_X1R5G5B5; - case SDL_PIXELFORMAT_RGB565: return D3DFMT_R5G6B5; - case SDL_PIXELFORMAT_RGB24: return D3DFMT_R8G8B8; - case SDL_PIXELFORMAT_RGB888: return D3DFMT_X8R8G8B8; - case SDL_PIXELFORMAT_ARGB8888: return D3DFMT_A8R8G8B8; - case SDL_PIXELFORMAT_ARGB2101010: return D3DFMT_A2R10G10B10; - default: - case SDL_PIXELFORMAT_UNKNOWN: return D3DFMT_UNKNOWN; - } -} - - -static const Uint32 ConvertToSDL(D3DFORMAT format) -{ - switch (format) - { - case D3DFMT_A4R4G4B4: return SDL_PIXELFORMAT_ARGB4444; - case D3DFMT_R3G3B2: return SDL_PIXELFORMAT_RGB332; - case D3DFMT_A1R5G5B5: return SDL_PIXELFORMAT_ARGB1555; - case D3DFMT_X1R5G5B5: return SDL_PIXELFORMAT_RGB555; - case D3DFMT_R5G6B5: return SDL_PIXELFORMAT_RGB565; - case D3DFMT_R8G8B8: return SDL_PIXELFORMAT_RGB24; - case D3DFMT_X8R8G8B8: return SDL_PIXELFORMAT_RGB888; - case D3DFMT_A8R8G8B8: return SDL_PIXELFORMAT_ARGB8888; - case D3DFMT_A2R10G10B10: return SDL_PIXELFORMAT_ARGB2101010; - default: - case D3DFMT_UNKNOWN: return SDL_PIXELFORMAT_UNKNOWN; - } -} - - -// ---- dlls/winex11.drv/d3dadapter.c ---------------------------------------------------------------- - -#include -#include -#include -#include - -const GUID IID_IDirect3D9Ex = { 0x02177241, 0x69FC, 0x400C, {0x8F, 0xF1, 0x93, 0xA4, 0x4D, 0xF6, 0x86, 0x1D}}; -const GUID IID_IDirect3D9 = { 0x81BDCBCA, 0x64D4, 0x426D, {0xAE, 0x8D, 0xAD, 0x1, 0x47, 0xF4, 0x27, 0x5C}}; -const GUID IID_ID3DPresent = { 0x77D60E80, 0xF1E6, 0x11DF, { 0x9E, 0x39, 0x95, 0x0C, 0xDF, 0xD7, 0x20, 0x85 } }; -const GUID IID_ID3DPresentGroup = { 0xB9C3016E, 0xF32A, 0x11DF, { 0x9C, 0x18, 0x92, 0xEA, 0xDE, 0xD7, 0x20, 0x85 } }; -const GUID IID_IUnknown = { 0x00000000, 0x0000, 0x0000, { 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46 } }; - - -struct DRI3Present -{ - /* COM vtable */ - void *vtable; - /* IUnknown reference count */ - LONG refs; - - D3DPRESENT_PARAMETERS params; - PRESENTpriv *present_priv; - - SDL_Window* sdl_win; - Display* x11_display; - Window x11_window; -}; - -struct D3DWindowBuffer -{ - PRESENTPixmapPriv *present_pixmap_priv; -}; - - -static ULONG WINAPI -DRI3Present_AddRef( struct DRI3Present *This ) -{ - ULONG refs = InterlockedIncrement(&This->refs); - TRACE("%p increasing refcount to %u.\n", This, refs); - return refs; -} - -static ULONG WINAPI -DRI3Present_Release( struct DRI3Present *This ) -{ - ULONG refs = InterlockedDecrement(&This->refs); - TRACE("%p decreasing refcount to %u.\n", This, refs); - if (refs == 0) { - /* dtor */ - SDL_SetWindowFullscreen(This->sdl_win, FALSE); - PRESENTDestroy(This->x11_display, This->present_priv); - free(This); - } - return refs; -} - -static HRESULT WINAPI -DRI3Present_QueryInterface( struct DRI3Present *This, - REFIID riid, - void **ppvObject ) -{ - if (!ppvObject) { return E_POINTER; } - - if (IsEqualGUID(&IID_ID3DPresent, riid) || - IsEqualGUID(&IID_IUnknown, riid)) { - *ppvObject = This; - DRI3Present_AddRef(This); - return S_OK; - } - - WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); - *ppvObject = NULL; - - return E_NOINTERFACE; -} - -static void -DRI3Present_ChangePresentParameters( struct DRI3Present *This, - D3DPRESENT_PARAMETERS *params, - BOOL first_time); - -static HRESULT WINAPI -DRI3Present_SetPresentParameters( struct DRI3Present *This, - D3DPRESENT_PARAMETERS *pPresentationParameters, - D3DDISPLAYMODEEX *pFullscreenDisplayMode ) -{ - if (!pPresentationParameters) { - WARN("pPresentationParameters is NULL.\n"); - return D3DERR_INVALIDCALL; - } - - if (pPresentationParameters->Windowed) { - SDL_SetWindowFullscreen(This->sdl_win, FALSE); - DRI3Present_ChangePresentParameters(This, pPresentationParameters, FALSE); - - } - else { - if (!pFullscreenDisplayMode) { - WARN("pFullscreenDisplayMode is NULL.\n"); - return D3DERR_INVALIDCALL; - } - - SDL_DisplayMode target; - SDL_DisplayMode closest; - memset(&target, 0, sizeof(target)); - memset(&closest, 0, sizeof(closest)); - - // msdn: "When switching to full-screen mode, - // Direct3D will try to find a desktop format that matches the back buffer format, - // so that back buffer and front buffer formats will be identical (to eliminate the need for color conversion)." - Uint32 preferred_format_for_backbuffer = ConvertToSDL(pPresentationParameters->BackBufferFormat); - - target.w = pFullscreenDisplayMode->Height; - target.h = pFullscreenDisplayMode->Width; - target.refresh_rate = pFullscreenDisplayMode->RefreshRate; - if (preferred_format_for_backbuffer != SDL_PIXELFORMAT_UNKNOWN) - target.format = preferred_format_for_backbuffer; - else - target.format = ConvertToSDL(pFullscreenDisplayMode->Format); - - SDL_DisplayMode* mode = NULL; - if (FALSE) { - /* - * this doesn't seem to be a good idea: - * it returns different mode even when the request mode exits and works... - */ - int Adapter = 0; - mode = SDL_GetClosestDisplayMode(Adapter, &target, &closest); - if (!mode) { - WARN("Could not find requested fullscreen display mode (%dx%d %dHz, format = %d).\n", pFullscreenDisplayMode->Width, pFullscreenDisplayMode->Height, pFullscreenDisplayMode->RefreshRate, pFullscreenDisplayMode->Format); - } - } - else { - mode = ⌖ - } - - int err = SDL_SetWindowDisplayMode(This->sdl_win, mode); - if (err < 0) { - WARN("SDL_SetWindowDisplayMode returned an error: %s\n", SDL_GetError()); - return D3DERR_INVALIDCALL; - } - - err = SDL_SetWindowFullscreen(This->sdl_win, SDL_WINDOW_FULLSCREEN); - if (err < 0) { - WARN("SDL_SetWindowFullscreen returned an error: %s\n", SDL_GetError()); - return D3DERR_INVALIDCALL; - } - - DRI3Present_ChangePresentParameters(This, pPresentationParameters, FALSE); - } - return D3D_OK; -} - -static HRESULT WINAPI -DRI3Present_D3DWindowBufferFromDmaBuf( struct DRI3Present *This, - int dmaBufFd, - int width, - int height, - int stride, - int depth, - int bpp, - struct D3DWindowBuffer **out) -{ - Pixmap pixmap; - - if (!DRI3PixmapFromDmaBuf(This->x11_display, DefaultScreen(This->x11_display), - dmaBufFd, width, height, stride, depth, - bpp, &pixmap )) - return D3DERR_DRIVERINTERNALERROR; - - *out = calloc(1, sizeof(struct D3DWindowBuffer)); - PRESENTPixmapInit(This->present_priv, pixmap, &((*out)->present_pixmap_priv)); - return D3D_OK; -} - -static HRESULT WINAPI -DRI3Present_DestroyD3DWindowBuffer( struct DRI3Present *This, - struct D3DWindowBuffer *buffer ) -{ - /* the pixmap is managed by the PRESENT backend. - * But if it can delete it right away, we may have - * better performance */ - PRESENTTryFreePixmap(This->x11_display, buffer->present_pixmap_priv); - free(buffer); - return D3D_OK; -} - -static HRESULT WINAPI -DRI3Present_WaitBufferReleased( struct DRI3Present *This, - struct D3DWindowBuffer *buffer) -{ - (void) This; - PRESENTWaitPixmapReleased(buffer->present_pixmap_priv); - return D3D_OK; -} - -static HRESULT WINAPI -DRI3Present_FrontBufferCopy( struct DRI3Present *This, - struct D3DWindowBuffer *buffer ) -{ - /* TODO: use dc_rect */ - if (PRESENTHelperCopyFront(This->x11_display, buffer->present_pixmap_priv)) - return D3D_OK; - else - return D3DERR_DRIVERINTERNALERROR; -} - -static HRESULT WINAPI -DRI3Present_PresentBuffer( struct DRI3Present *This, - struct D3DWindowBuffer *buffer, - HWND hWndOverride, - const RECT *pSourceRect, - const RECT *pDestRect, - const RGNDATA *pDirtyRegion, - DWORD Flags ) -{ -/* TODO? - - if (d3d->dc_rect.top != 0 && - d3d->dc_rect.left != 0) { - if (!pDestRect) - pDestRect = (const RECT *) &(d3d->dc_rect); - else { - dest_translate.top = pDestRect->top + d3d->dc_rect.top; - dest_translate.left = pDestRect->left + d3d->dc_rect.left; - dest_translate.bottom = pDestRect->bottom + d3d->dc_rect.bottom; - dest_translate.right = pDestRect->right + d3d->dc_rect.right; - pDestRect = (const RECT *) &dest_translate; - } - } -*/ - - if (!PRESENTPixmap(This->x11_display, This->x11_window, buffer->present_pixmap_priv, - &This->params, pSourceRect, pDestRect, pDirtyRegion)) - return D3DERR_DRIVERINTERNALERROR; - - return D3D_OK; -} - -static HRESULT WINAPI -DRI3Present_GetRasterStatus( struct DRI3Present *This, - D3DRASTER_STATUS *pRasterStatus ) -{ - FIXME("(%p, %p), stub!\n", This, pRasterStatus); - return D3DERR_INVALIDCALL; -} - -static HRESULT WINAPI -DRI3Present_GetDisplayMode( struct DRI3Present *This, - D3DDISPLAYMODEEX *pMode, - D3DDISPLAYROTATION *pRotation ) -{ - int Adapter = 0; - SDL_DisplayMode mode; - int err = SDL_GetDesktopDisplayMode(Adapter, &mode); - if (err < 0) { - WARN("SDL_GetCurrentDisplayMode returned an error: %s\n", SDL_GetError()); - return D3DERR_INVALIDCALL; - } - - pMode->Width = mode.w; - pMode->Height = mode.h; - pMode->RefreshRate = mode.refresh_rate; - pMode->Format = ConvertFromSDL(mode.format); - pMode->ScanLineOrdering = D3DSCANLINEORDERING_PROGRESSIVE; - - *pRotation = D3DDISPLAYROTATION_IDENTITY; - - return D3D_OK; -} - -static HRESULT WINAPI -DRI3Present_GetPresentStats( struct DRI3Present *This, - D3DPRESENTSTATS *pStats ) -{ - FIXME("(%p, %p), stub!\n", This, pStats); - return D3DERR_INVALIDCALL; -} - -static HRESULT WINAPI -DRI3Present_GetCursorPos( struct DRI3Present *This, - POINT *pPoint ) -{ - // FIXME faked - BOOL ok; - if (!pPoint) - return D3DERR_INVALIDCALL; - ok = TRUE; - memset(pPoint,0,sizeof(POINT)); - return ok ? S_OK : D3DERR_DRIVERINTERNALERROR; -} - -static HRESULT WINAPI -DRI3Present_SetCursorPos( struct DRI3Present *This, - POINT *pPoint ) -{ - // FIXME faked - if (!pPoint) - return D3DERR_INVALIDCALL; - return S_OK; -} - -static HRESULT WINAPI -DRI3Present_SetCursor( struct DRI3Present *This, - void *pBitmap, - POINT *pHotspot, - BOOL bShow ) -{ - // FIXME faked - return D3D_OK; -} - -static HRESULT WINAPI -DRI3Present_SetGammaRamp( struct DRI3Present *This, - const D3DGAMMARAMP *pRamp, - HWND hWndOverride ) -{ - // FIXME faked - return D3D_OK; -} - -static HRESULT WINAPI -DRI3Present_GetWindowInfo( struct DRI3Present *This, - HWND hWnd, - int *width, int *height, int *depth ) -{ - int w,h; - SDL_GetWindowSize(This->sdl_win, &w, &h); - Uint32 format = SDL_GetWindowPixelFormat(This->sdl_win); - - *width = w; - *height = h; - *depth = format != SDL_PIXELFORMAT_UNKNOWN ? SDL_BITSPERPIXEL(format) : 24; - return D3D_OK; -} - -/*----------*/ - - -static ID3DPresentVtbl DRI3Present_vtable = { - (void *)DRI3Present_QueryInterface, - (void *)DRI3Present_AddRef, - (void *)DRI3Present_Release, - (void *)DRI3Present_SetPresentParameters, - (void *)DRI3Present_D3DWindowBufferFromDmaBuf, - (void *)DRI3Present_DestroyD3DWindowBuffer, - (void *)DRI3Present_WaitBufferReleased, - (void *)DRI3Present_FrontBufferCopy, - (void *)DRI3Present_PresentBuffer, - (void *)DRI3Present_GetRasterStatus, - (void *)DRI3Present_GetDisplayMode, - (void *)DRI3Present_GetPresentStats, - (void *)DRI3Present_GetCursorPos, - (void *)DRI3Present_SetCursorPos, - (void *)DRI3Present_SetCursor, - (void *)DRI3Present_SetGammaRamp, - (void *)DRI3Present_GetWindowInfo -}; - -static void -DRI3Present_ChangePresentParameters( struct DRI3Present *This, - D3DPRESENT_PARAMETERS *params, - BOOL first_time) -{ - (void) first_time; /* will be used to manage screen res if windowed mode change */ - - if (params->hDeviceWindow && params->hDeviceWindow != This->sdl_win) { - WARN("Changing hDeviceWindow not supported\n"); - } - - int w,h; - SDL_GetWindowSize(This->sdl_win, &w, &h); - params->BackBufferWidth = w; - params->BackBufferHeight = h; - - This->params = *params; -} - -static HRESULT -DRI3Present_new( SDL_Window* sdl_win, - D3DPRESENT_PARAMETERS *params, - struct DRI3Present **out ) -{ - struct DRI3Present *This; - - if (!sdl_win) { - ERR("No SDL_Window specified for presentation backend.\n"); - return D3DERR_INVALIDCALL; - } - - SDL_SysWMinfo info; - SDL_VERSION(&info.version); - SDL_bool Ok = SDL_GetWindowWMInfo(sdl_win, &info); - if (!Ok) { - ERR("Invalid SDL_Window specified for presentation backend.\n"); - return D3DERR_INVALIDCALL; - } - - This = calloc(1, sizeof(struct DRI3Present)); - if (!This) { - return E_OUTOFMEMORY; - } - - This->vtable = &DRI3Present_vtable; - This->refs = 1; - This->sdl_win = sdl_win; - This->x11_display = info.info.x11.display; - This->x11_window = info.info.x11.window; - - DRI3Present_ChangePresentParameters(This, params, TRUE); - - PRESENTInit(info.info.x11.display, &(This->present_priv)); - - *out = This; - - return D3D_OK; -} - -struct DRI3PresentGroup -{ - /* COM vtable */ - void *vtable; - /* IUnknown reference count */ - LONG refs; - - struct DRI3Present **present_backends; - unsigned npresent_backends; -}; - -static ULONG WINAPI -DRI3PresentGroup_AddRef( struct DRI3PresentGroup *This ) -{ - ULONG refs = InterlockedIncrement(&This->refs); - TRACE("%p increasing refcount to %u.\n", This, refs); - return refs; -} - -static ULONG WINAPI -DRI3PresentGroup_Release( struct DRI3PresentGroup *This ) -{ - ULONG refs = InterlockedDecrement(&This->refs); - TRACE("%p decreasing refcount to %u.\n", This, refs); - if (refs == 0) { - unsigned i; - if (This->present_backends) { - for (i = 0; i < This->npresent_backends; ++i) { - if (This->present_backends[i]) - DRI3Present_Release(This->present_backends[i]); - } - free(This->present_backends); - } - free(This); - } - return refs; -} - -static HRESULT WINAPI -DRI3PresentGroup_QueryInterface( struct DRI3PresentGroup *This, - REFIID riid, - void **ppvObject ) -{ - if (!ppvObject) { return E_POINTER; } - if (IsEqualGUID(&IID_ID3DPresentGroup, riid) || - IsEqualGUID(&IID_IUnknown, riid)) { - *ppvObject = This; - DRI3PresentGroup_AddRef(This); - return S_OK; - } - - WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); - *ppvObject = NULL; - - return E_NOINTERFACE; -} - -static UINT WINAPI -DRI3PresentGroup_GetMultiheadCount( struct DRI3PresentGroup *This ) -{ - FIXME("(%p), stub!\n", This); - return 1; -} - -static HRESULT WINAPI -DRI3PresentGroup_GetPresent( struct DRI3PresentGroup *This, - UINT Index, - ID3DPresent **ppPresent ) -{ - if (Index >= DRI3PresentGroup_GetMultiheadCount(This)) { - ERR("Index >= MultiHeadCount\n"); - return D3DERR_INVALIDCALL; - } - DRI3Present_AddRef(This->present_backends[Index]); - *ppPresent = (ID3DPresent *)This->present_backends[Index]; - - return D3D_OK; -} - -static HRESULT WINAPI -DRI3PresentGroup_CreateAdditionalPresent( struct DRI3PresentGroup *This, - D3DPRESENT_PARAMETERS *pPresentationParameters, - ID3DPresent **ppPresent ) -{ - FIXME("(%p, %p, %p), stub!\n", This, pPresentationParameters, ppPresent); - return D3DERR_INVALIDCALL; -} - -static void WINAPI -DRI3PresentGroup_GetVersion( struct DRI3PresentGroup *This, - int *major, - int *minor) -{ - *major = 1; - *minor = 0; -} - -static ID3DPresentGroupVtbl DRI3PresentGroup_vtable = { - (void *)DRI3PresentGroup_QueryInterface, - (void *)DRI3PresentGroup_AddRef, - (void *)DRI3PresentGroup_Release, - (void *)DRI3PresentGroup_GetMultiheadCount, - (void *)DRI3PresentGroup_GetPresent, - (void *)DRI3PresentGroup_CreateAdditionalPresent, - (void *)DRI3PresentGroup_GetVersion -}; - -static HRESULT -dri3_create_present_group( SDL_Window* win, - UINT adapter, - D3DPRESENT_PARAMETERS *params, - unsigned nparams, - ID3DPresentGroup **group ) -{ - struct DRI3PresentGroup *This = calloc(1, sizeof(struct DRI3PresentGroup)); - HRESULT hr; - unsigned i; - - if (!This) { - ERR("Out of memory.\n"); - return E_OUTOFMEMORY; - } - - This->vtable = &DRI3PresentGroup_vtable; - This->refs = 1; - This->npresent_backends = nparams; - This->present_backends = calloc(This->npresent_backends, sizeof(struct DRI3Present *)); - - if (!This->present_backends) { - DRI3PresentGroup_Release(This); - ERR("Out of memory.\n"); - return E_OUTOFMEMORY; - } - - if (nparams != 1) { adapter = 0; } - for (i = 0; i < This->npresent_backends; ++i) { - /* create an ID3DPresent for it */ - hr = DRI3Present_new(win, ¶ms[i], - &This->present_backends[i]); - if (FAILED(hr)) { - DRI3PresentGroup_Release(This); - return hr; - } - } - - *group = (ID3DPresentGroup *)This; - TRACE("Returning %p\n", *group); - - return D3D_OK; -} - - - -// --------- dlls/d3d9/d3dadapter9.c ---------------------------------------------------------------- - -struct d3dadapter9 -{ - /* COM vtable */ - void *vtable; - /* IUnknown reference count */ - LONG refs; - - /* simple test, one adapter */ - ID3DAdapter9 *adapter; - - /* true if it implements IDirect3D9Ex */ - BOOL ex; -}; - -/* convenience wrapper for calls into ID3D9Adapter */ -#define ADAPTER_PROC(name, ...) \ - ID3DAdapter9_##name(This->adapter, ## __VA_ARGS__) - -static HRESULT WINAPI -d3dadapter9_CheckDeviceFormat( struct d3dadapter9 *This, - UINT Adapter, - D3DDEVTYPE DeviceType, - D3DFORMAT AdapterFormat, - DWORD Usage, - D3DRESOURCETYPE RType, - D3DFORMAT CheckFormat ); - -static ULONG WINAPI -d3dadapter9_AddRef( struct d3dadapter9 *This ) -{ - ULONG refs = InterlockedIncrement(&This->refs); - TRACE("%p increasing refcount to %u.\n", This, refs); - return refs; -} - -static ULONG WINAPI -d3dadapter9_Release( struct d3dadapter9 *This ) -{ - ULONG refs = InterlockedDecrement(&This->refs); - TRACE("%p decreasing refcount to %u.\n", This, refs); - if (refs == 0) { - /* dtor */ - if (This->adapter){ - ID3DAdapter9_Release(This->adapter); - } - - free(This); - } - return refs; -} - -static HRESULT WINAPI -d3dadapter9_QueryInterface( struct d3dadapter9 *This, - REFIID riid, - void **ppvObject ) -{ - if (!ppvObject) { return E_POINTER; } - if ((IsEqualGUID(&IID_IDirect3D9Ex, riid) && This->ex) || - IsEqualGUID(&IID_IDirect3D9, riid) || - IsEqualGUID(&IID_IUnknown, riid)) { - *ppvObject = This; - d3dadapter9_AddRef(This); - return S_OK; - } - - WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid)); - *ppvObject = NULL; - - return E_NOINTERFACE; -} - -static HRESULT WINAPI -d3dadapter9_RegisterSoftwareDevice( struct d3dadapter9 *This, - void *pInitializeFunction ) -{ - FIXME("(%p, %p), stub!\n", This, pInitializeFunction); - return D3DERR_INVALIDCALL; -} - -static UINT WINAPI -d3dadapter9_GetAdapterCount( struct d3dadapter9 *This ) -{ - return This ? 1 : 0; -} - -static HRESULT WINAPI -d3dadapter9_GetAdapterIdentifier( struct d3dadapter9 *This, - UINT Adapter, - DWORD Flags, - D3DADAPTER_IDENTIFIER9 *pIdentifier ) -{ - HRESULT hr; - - if (Adapter >= d3dadapter9_GetAdapterCount(This)) { return D3DERR_INVALIDCALL; } - - hr = ADAPTER_PROC(GetAdapterIdentifier, Flags, pIdentifier); - return hr; -} - -static UINT WINAPI -d3dadapter9_GetAdapterModeCount( struct d3dadapter9 *This, - UINT Adapter, - D3DFORMAT Format ) -{ - if (Adapter >= d3dadapter9_GetAdapterCount(This)) { - WARN("Adapter %u does not exist.\n", Adapter); - return 0; - } - if (FAILED(d3dadapter9_CheckDeviceFormat(This, Adapter, D3DDEVTYPE_HAL, - Format, D3DUSAGE_RENDERTARGET, - D3DRTYPE_SURFACE, Format))) { - WARN("DeviceFormat not available.\n"); - return 0; - } - - int NumMatchingModes = 0; - int NumModes = SDL_GetNumDisplayModes(Adapter); - int i; - for (i=0;i= d3dadapter9_GetAdapterCount(This)) { - WARN("Adapter %u does not exist.\n", Adapter); - return D3DERR_INVALIDCALL; - } - if (!pMode) { - WARN("pMode is NULL.\n"); - return D3DERR_INVALIDCALL; - } - - hr = d3dadapter9_CheckDeviceFormat(This, Adapter, D3DDEVTYPE_HAL, - Format, D3DUSAGE_RENDERTARGET, - D3DRTYPE_SURFACE, Format); - if (FAILED(hr)) { - TRACE("DeviceFormat not available.\n"); - return hr; - } - - int IndexMatchingModes = 0; - int NumModes = SDL_GetNumDisplayModes(Adapter); - int i; - for (i=0;iWidth = mode.w; - pMode->Height = mode.h; - pMode->RefreshRate = mode.refresh_rate; - pMode->Format = Format; - - return D3D_OK; - } - IndexMatchingModes ++; - } - - WARN("invalid mode for format %d on adapter %d: %d\n", Format, Adapter, ModeIndex); - return D3DERR_INVALIDCALL; -} - -static HRESULT WINAPI -d3dadapter9_GetAdapterDisplayMode( struct d3dadapter9 *This, - UINT Adapter, - D3DDISPLAYMODE *pMode ) -{ - if (Adapter >= d3dadapter9_GetAdapterCount(This)) { - WARN("Adapter %u does not exist.\n", Adapter); - return D3DERR_INVALIDCALL; - } - - SDL_DisplayMode mode; - int err = SDL_GetCurrentDisplayMode(Adapter, &mode); - if (err < 0) { - WARN("SDL_GetCurrentDisplayMode returned an error: %s\n", SDL_GetError()); - return D3DERR_INVALIDCALL; - } - - pMode->Width = mode.w; - pMode->Height = mode.h; - pMode->RefreshRate = mode.refresh_rate; - pMode->Format = ConvertFromSDL(mode.format); - - return D3D_OK; -} - -static HRESULT WINAPI -d3dadapter9_CheckDeviceType( struct d3dadapter9 *This, - UINT Adapter, - D3DDEVTYPE DevType, - D3DFORMAT AdapterFormat, - D3DFORMAT BackBufferFormat, - BOOL bWindowed ) -{ - if (Adapter >= d3dadapter9_GetAdapterCount(This)) { return D3DERR_INVALIDCALL; } - return ADAPTER_PROC(CheckDeviceType, - DevType, AdapterFormat, BackBufferFormat, bWindowed); -} - -static HRESULT WINAPI -d3dadapter9_CheckDeviceFormat( struct d3dadapter9 *This, - UINT Adapter, - D3DDEVTYPE DeviceType, - D3DFORMAT AdapterFormat, - DWORD Usage, - D3DRESOURCETYPE RType, - D3DFORMAT CheckFormat ) -{ - if (Adapter >= d3dadapter9_GetAdapterCount(This)) { return D3DERR_INVALIDCALL; } - return ADAPTER_PROC(CheckDeviceFormat, - DeviceType, AdapterFormat, Usage, RType, CheckFormat); -} - -static HRESULT WINAPI -d3dadapter9_CheckDeviceMultiSampleType( struct d3dadapter9 *This, - UINT Adapter, - D3DDEVTYPE DeviceType, - D3DFORMAT SurfaceFormat, - BOOL Windowed, - D3DMULTISAMPLE_TYPE MultiSampleType, - DWORD *pQualityLevels ) -{ - if (Adapter >= d3dadapter9_GetAdapterCount(This)) { return D3DERR_INVALIDCALL; } - return ADAPTER_PROC(CheckDeviceMultiSampleType, DeviceType, SurfaceFormat, - Windowed, MultiSampleType, pQualityLevels); -} - -static HRESULT WINAPI -d3dadapter9_CheckDepthStencilMatch( struct d3dadapter9 *This, - UINT Adapter, - D3DDEVTYPE DeviceType, - D3DFORMAT AdapterFormat, - D3DFORMAT RenderTargetFormat, - D3DFORMAT DepthStencilFormat ) -{ - if (Adapter >= d3dadapter9_GetAdapterCount(This)) { return D3DERR_INVALIDCALL; } - return ADAPTER_PROC(CheckDepthStencilMatch, DeviceType, AdapterFormat, - RenderTargetFormat, DepthStencilFormat); -} - -static HRESULT WINAPI -d3dadapter9_CheckDeviceFormatConversion( struct d3dadapter9 *This, - UINT Adapter, - D3DDEVTYPE DeviceType, - D3DFORMAT SourceFormat, - D3DFORMAT TargetFormat ) -{ - if (Adapter >= d3dadapter9_GetAdapterCount(This)) { return D3DERR_INVALIDCALL; } - return ADAPTER_PROC(CheckDeviceFormatConversion, - DeviceType, SourceFormat, TargetFormat); -} - -static HRESULT WINAPI -d3dadapter9_GetDeviceCaps( struct d3dadapter9 *This, - UINT Adapter, - D3DDEVTYPE DeviceType, - D3DCAPS9 *pCaps ) -{ - HRESULT hr; - - if (Adapter >= d3dadapter9_GetAdapterCount(This)) { return D3DERR_INVALIDCALL; } - - hr = ADAPTER_PROC(GetDeviceCaps, DeviceType, pCaps); - if (FAILED(hr)) { return hr; } - - pCaps->MasterAdapterOrdinal = 0; - pCaps->AdapterOrdinalInGroup = 0; - pCaps->NumberOfAdaptersInGroup = 1; - - return hr; -} - -static HMONITOR WINAPI -d3dadapter9_GetAdapterMonitor( struct d3dadapter9 *This, - UINT Adapter ) -{ - // FIXME faked - if (Adapter >= d3dadapter9_GetAdapterCount(This)) { return (HMONITOR)0; } - return (HMONITOR)0;; -} - -static HRESULT WINAPI -d3dadapter9_CreateDeviceEx( struct d3dadapter9 *This, - UINT Adapter, - D3DDEVTYPE DeviceType, - HWND hFocusWindow, - DWORD BehaviorFlags, - D3DPRESENT_PARAMETERS *pPresentationParameters, - D3DDISPLAYMODEEX *pFullscreenDisplayMode, - IDirect3DDevice9Ex **ppReturnedDeviceInterface ); - -static HRESULT WINAPI -d3dadapter9_CreateDevice( struct d3dadapter9 *This, - UINT Adapter, - D3DDEVTYPE DeviceType, - HWND hFocusWindow, - DWORD BehaviorFlags, - D3DPRESENT_PARAMETERS *pPresentationParameters, - IDirect3DDevice9 **ppReturnedDeviceInterface ) -{ - HRESULT hr; - hr = d3dadapter9_CreateDeviceEx(This, Adapter, DeviceType, hFocusWindow, - BehaviorFlags, pPresentationParameters, - NULL, - (IDirect3DDevice9Ex **)ppReturnedDeviceInterface); - if (FAILED(hr)) - return hr; - return D3D_OK; -} - -static UINT WINAPI -d3dadapter9_GetAdapterModeCountEx( struct d3dadapter9 *This, - UINT Adapter, - const D3DDISPLAYMODEFILTER *pFilter ) -{ - return 1; -} - -static HRESULT WINAPI -d3dadapter9_EnumAdapterModesEx( struct d3dadapter9 *This, - UINT Adapter, - const D3DDISPLAYMODEFILTER *pFilter, - UINT Mode, - D3DDISPLAYMODEEX *pMode ) -{ - FIXME("(%p, %u, %p, %u, %p), stub!\n", This, Adapter, pFilter, Mode, pMode); - return D3DERR_INVALIDCALL; -} - -static HRESULT WINAPI -d3dadapter9_GetAdapterDisplayModeEx( struct d3dadapter9 *This, - UINT Adapter, - D3DDISPLAYMODEEX *pMode, - D3DDISPLAYROTATION *pRotation ) -{ - FIXME("(%p, %u, %p, %p), stub!\n", This, Adapter, pMode, pRotation); - return D3DERR_INVALIDCALL; -} - -static HRESULT WINAPI -d3dadapter9_CreateDeviceEx( struct d3dadapter9 *This, - UINT Adapter, - D3DDEVTYPE DeviceType, - HWND hFocusWindow, - DWORD BehaviorFlags, - D3DPRESENT_PARAMETERS *pPresentationParameters, - D3DDISPLAYMODEEX *pFullscreenDisplayMode, - IDirect3DDevice9Ex **ppReturnedDeviceInterface ) -{ - ID3DPresentGroup *present; - HRESULT hr; - SDL_Window* sdl_win = (SDL_Window*)hFocusWindow; - - if (Adapter >= d3dadapter9_GetAdapterCount(This)) { - WARN("Adapter %u does not exist.\n", Adapter); - return D3DERR_INVALIDCALL; - } - - if (!sdl_win) { - ERR("no SDL_Window specified\n"); - return D3DERR_INVALIDCALL; - } - - { - UINT nparams = 1; - UINT ordinal = 0; - hr = dri3_create_present_group(sdl_win, ordinal, pPresentationParameters, - nparams, &present); - } - - if (FAILED(hr)) { - WARN("Failed to create PresentGroup.\n"); - return hr; - } - - if (This->ex) { - hr = ADAPTER_PROC(CreateDeviceEx, Adapter, DeviceType, hFocusWindow, - BehaviorFlags, pPresentationParameters, - pFullscreenDisplayMode, - (IDirect3D9Ex *)This, present, - ppReturnedDeviceInterface); - } else { /* CreateDevice on non-ex */ - hr = ADAPTER_PROC(CreateDevice, Adapter, DeviceType, hFocusWindow, - BehaviorFlags, pPresentationParameters, - (IDirect3D9 *)This, present, - (IDirect3DDevice9 **)ppReturnedDeviceInterface); - } - if (FAILED(hr)) { - WARN("ADAPTER_PROC failed.\n"); - ID3DPresentGroup_Release(present); - } - - return hr; -} - -static HRESULT WINAPI -d3dadapter9_GetAdapterLUID( struct d3dadapter9 *This, - UINT Adapter, - LUID *pLUID ) -{ - FIXME("(%p, %u, %p), stub!\n", This, Adapter, pLUID); - return D3DERR_INVALIDCALL; -} - -static IDirect3D9ExVtbl d3dadapter9_vtable = { - (void *)d3dadapter9_QueryInterface, - (void *)d3dadapter9_AddRef, - (void *)d3dadapter9_Release, - (void *)d3dadapter9_RegisterSoftwareDevice, - (void *)d3dadapter9_GetAdapterCount, - (void *)d3dadapter9_GetAdapterIdentifier, - (void *)d3dadapter9_GetAdapterModeCount, - (void *)d3dadapter9_EnumAdapterModes, - (void *)d3dadapter9_GetAdapterDisplayMode, - (void *)d3dadapter9_CheckDeviceType, - (void *)d3dadapter9_CheckDeviceFormat, - (void *)d3dadapter9_CheckDeviceMultiSampleType, - (void *)d3dadapter9_CheckDepthStencilMatch, - (void *)d3dadapter9_CheckDeviceFormatConversion, - (void *)d3dadapter9_GetDeviceCaps, - (void *)d3dadapter9_GetAdapterMonitor, - (void *)d3dadapter9_CreateDevice, - (void *)d3dadapter9_GetAdapterModeCountEx, - (void *)d3dadapter9_EnumAdapterModesEx, - (void *)d3dadapter9_GetAdapterDisplayModeEx, - (void *)d3dadapter9_CreateDeviceEx, - (void *)d3dadapter9_GetAdapterLUID -}; - -HRESULT -d3dadapter9_new( BOOL ex, Display *dpy, - IDirect3D9Ex **ppOut ) -{ - static void * WINAPI (*pD3DAdapter9GetProc)(const char *) = NULL; - static BOOL StaticInitDone = FALSE; - - // load dynamic library and retrieve "D3DAdapter9GetProc" symbol. - if (!StaticInitDone) { - StaticInitDone = TRUE; - - if (!PRESENTCheckExtension(dpy, 1, 0)) { - ERR("Unable to query PRESENT.\n"); - return D3DERR_NOTAVAILABLE; - } - - if (!DRI3CheckExtension(dpy, 1, 0)) { - ERR("Unable to query DRI3.\n"); - return D3DERR_NOTAVAILABLE; - } - - - void * handle = NULL; - - const char *path = getenv("D3D_MODULE_PATH"); - if (path) { - /* extremely basic path parsing attempt */ - const char *dot = strrchr(path, '.'); - if (dot) { - if (!strcmp(dot, ".so")) { - handle = dlopen(path, RTLD_LOCAL | RTLD_NOW); - if (!handle) - ERR("Failed to load d3d9 lib '%s': %s\n", path, dlerror()); - } - } else { - char str[4096]; - snprintf(str, sizeof(str), "%s/d3dadapter9.so", path); - handle = dlopen(str, RTLD_LOCAL | RTLD_NOW); - ERR("Failed to load d3d9 lib '%s': %s\n", str, dlerror()); - } - } else { - const char *paths[] = { - "/usr/lib32/d3d/d3dadapter9.so", - "/usr/lib/d3d/d3dadapter9.so", - "/usr/local/lib/x86_64-linux-gnu/d3d/d3dadapter9.so", - "/usr/lib/x86_64-linux-gnu/d3d/d3dadapter9.so", - "/usr/local/lib/arm-linux-gnueabihf/d3d/d3dadapter9.so", - "/usr/lib/arm-linux-gnueabihf/d3d/d3dadapter9.so", - "/usr/local/lib/aarch64-linux-gnu/d3d/d3dadapter9.so", - "/usr/lib/aarch64-linux-gnu/d3d/d3dadapter9.so", - }; - for (unsigned i = 0; !handle && i < 8; i++) - handle = dlopen(paths[i], RTLD_LOCAL | RTLD_NOW); - if (!handle) - ERR("Failed to load d3d9 lib: %s\n", dlerror()); - } - if (!handle) { - return D3DERR_NOTAVAILABLE; - } - - pD3DAdapter9GetProc = dlsym(handle, "D3DAdapter9GetProc"); - if (!pD3DAdapter9GetProc) { - ERR("Failed to load d3d9 lib symbols\n"); - return D3DERR_NOTAVAILABLE; - } - } - - int fd; - if (!DRI3Open(dpy, DefaultScreen(dpy), &fd)) { - ERR("DRI3Open failed (fd=%d)\n", fd); - return D3DERR_NOTAVAILABLE; - } - - const struct D3DAdapter9DRM *d3d9_drm = pD3DAdapter9GetProc(D3DADAPTER9DRM_NAME); - if (!d3d9_drm || d3d9_drm->major_version != D3DADAPTER9DRM_MAJOR) - { - ERR("Your display driver doesn't support native D3D9 adapters.\n"); - return D3DERR_NOTAVAILABLE; - } - - ID3DAdapter9* adapter = NULL; - HRESULT hr = d3d9_drm->create_adapter(fd, &adapter); - if (FAILED(hr)) { - ERR("Unable to create ID3DAdapter9 (fd=%d)\n", fd); - return hr; - } - - struct d3dadapter9 *This = calloc(1, sizeof(struct d3dadapter9)); - if (!This) { - ERR("Out of memory.\n"); - return E_OUTOFMEMORY; - } - - This->vtable = &d3dadapter9_vtable; - This->refs = 1; - This->ex = ex; - This->adapter = adapter; - - *ppOut = (IDirect3D9Ex *)This; - FIXME("\033[1;32m\nNative Direct3D 9 is active.\033[0m\n"); - return D3D_OK; -} - - -// -------------------------------------------------------------------- - -static IDirect3D9Ex* Direct3DCreate9Ex_SDL_common(BOOL ex, SDL_Window *win ) -{ - SDL_SysWMinfo info; - SDL_VERSION(&info.version); - SDL_bool Ok = SDL_GetWindowWMInfo(win, &info); - if (!Ok) { - ERR("SDL_GetWindowWMInfo failed."); - return NULL; - } - - IDirect3D9Ex *pD3D9Ex = NULL; - HRESULT hr = d3dadapter9_new( ex, info.info.x11.display, &pD3D9Ex ); - if (FAILED(hr)) { - return NULL; - } - - return pD3D9Ex; -} - -IDirect3D9Ex* SDL_Direct3DCreate9Ex(SDL_Window *win) -{ - return Direct3DCreate9Ex_SDL_common(TRUE, win); -} - - -IDirect3D9* Direct3DCreate9_SDL(SDL_Window *win) -{ - return (IDirect3D9*)Direct3DCreate9Ex_SDL_common(FALSE, win); -} - From 47e9db18fa9ccd6a9c1b59ff1ec7135891fbd23f Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Tue, 14 Mar 2023 15:20:09 +0300 Subject: [PATCH 25/27] nine: add Gallium Nine support with ExternalProject Nine Native --- CMakeLists.txt | 31 ++++++++++++++++++++++++++----- src/apps/engine/CMakeLists.txt | 4 ---- src/libs/renderer/CMakeLists.txt | 4 +--- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 11bfdec57..49107c1f3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,12 +69,33 @@ elseif (STORM_MESA_NINE) # for Gallium Nine find_package(PkgConfig REQUIRED) pkg_check_modules(D3D REQUIRED IMPORTED_TARGET d3d) - add_subdirectory(nine-native) + include(ExternalProject) + ExternalProject_Add(nine-native + GIT_REPOSITORY https://github.com/q4a/nine-native + GIT_TAG main + GIT_SHALLOW ON + BUILD_ALWAYS OFF + BUILD_BYPRODUCTS /libnine-native.a + INSTALL_COMMAND "" + ) + ExternalProject_Get_property(nine-native SOURCE_DIR BINARY_DIR) set(NINE_NATIVE_INCLUDE_DIRS - "${CMAKE_CURRENT_SOURCE_DIR}/nine-native/include" - "${CMAKE_CURRENT_SOURCE_DIR}/nine-native/include/D3D9" + "${SOURCE_DIR}/include" + "${SOURCE_DIR}/include/D3D9" + "${SOURCE_DIR}/include/windows" + ) + set(NATIVE_D3D9_LIBS + "${BINARY_DIR}/libnine-native.a" + "PkgConfig::D3D" + X11 + xcb + xcb-present + xcb-dri3 + xcb-xfixes + X11-xcb ) include_directories("${NINE_NATIVE_INCLUDE_DIRS}") + add_custom_target(dependencies ALL DEPENDS nine-native) else() # for DXVK Native message("Using DXVK-native for D3D9 API") @@ -94,9 +115,9 @@ else() # for DXVK Native "${SOURCE_DIR}/include/native/directx" "${SOURCE_DIR}/include/native/windows" ) - set(DXVK_NATIVE_D3D9_LIB ${BINARY_DIR}/src/d3d9/libdxvk_d3d9.so) + set(NATIVE_D3D9_LIBS ${BINARY_DIR}/src/d3d9/libdxvk_d3d9.so) include_directories("${DXVK_NATIVE_INCLUDE_DIRS}") - ADD_CUSTOM_TARGET(dependencies ALL DEPENDS dxvk-native) + add_custom_target(dependencies ALL DEPENDS dxvk-native) endif() ### Define library ALIASes for use without CONAN_PKG:: prefix diff --git a/src/apps/engine/CMakeLists.txt b/src/apps/engine/CMakeLists.txt index db911fe9a..c70051c26 100644 --- a/src/apps/engine/CMakeLists.txt +++ b/src/apps/engine/CMakeLists.txt @@ -16,10 +16,6 @@ else() set(SYSTEM_DEPS "ffi") endif() -if (STORM_MESA_NINE) - set(SYSTEM_DEPS ${SYSTEM_DEPS} "nine-native") -endif() - STORM_SETUP( TARGET_NAME engine TYPE executable diff --git a/src/libs/renderer/CMakeLists.txt b/src/libs/renderer/CMakeLists.txt index 9f932b0d2..682f7219c 100644 --- a/src/libs/renderer/CMakeLists.txt +++ b/src/libs/renderer/CMakeLists.txt @@ -1,9 +1,7 @@ if (WIN32) set(SYSTEM_DEPS "legacy_stdio_definitions") -elseif (STORM_MESA_NINE) -set(SYSTEM_DEPS "") else() -set(SYSTEM_DEPS "${DXVK_NATIVE_D3D9_LIB}") +set(SYSTEM_DEPS "${NATIVE_D3D9_LIBS}") endif() STORM_SETUP( From 42ecc3af93a887f950f8b3784b49a42a7bf119af Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Tue, 14 Mar 2023 15:55:46 +0300 Subject: [PATCH 26/27] nine: remove old #if* STORM_MESA_NINE --- src/libs/ball_splash/src/ball_splash_defines.h | 2 -- src/libs/core/src/token.h | 2 -- src/libs/input/src/sdl_input.cpp | 2 -- src/libs/renderer/src/s_device.cpp | 7 ------- src/libs/renderer/src/storm_d3dx9.cpp | 5 ----- src/libs/renderer/src/technique.cpp | 10 ---------- src/libs/sink_effect/src/sink_splash_defines.h | 2 -- 7 files changed, 30 deletions(-) diff --git a/src/libs/ball_splash/src/ball_splash_defines.h b/src/libs/ball_splash/src/ball_splash_defines.h index cc3ebdc08..08faf3442 100644 --- a/src/libs/ball_splash/src/ball_splash_defines.h +++ b/src/libs/ball_splash/src/ball_splash_defines.h @@ -3,9 +3,7 @@ #include "rands.h" #include -#ifndef STORM_MESA_NINE #include -#endif #include #define BALLSPLASH_TRACE_INFORMATION diff --git a/src/libs/core/src/token.h b/src/libs/core/src/token.h index 6cdef33f8..b001fe617 100644 --- a/src/libs/core/src/token.h +++ b/src/libs/core/src/token.h @@ -1,8 +1,6 @@ #pragma once -#ifndef STORM_MESA_NINE #include -#endif #include #include diff --git a/src/libs/input/src/sdl_input.cpp b/src/libs/input/src/sdl_input.cpp index 83327c1f5..0c85c6d19 100644 --- a/src/libs/input/src/sdl_input.cpp +++ b/src/libs/input/src/sdl_input.cpp @@ -3,9 +3,7 @@ #include #include #include -#ifndef STORM_MESA_NINE #include -#endif #include namespace storm diff --git a/src/libs/renderer/src/s_device.cpp b/src/libs/renderer/src/s_device.cpp index e8775f7ed..c4e3e85d7 100644 --- a/src/libs/renderer/src/s_device.cpp +++ b/src/libs/renderer/src/s_device.cpp @@ -13,9 +13,6 @@ #include #include -#ifdef STORM_MESA_NINE -#include "nine_sdl.h" -#endif #include @@ -707,11 +704,7 @@ bool DX9RENDER::InitDevice(bool windowed, void *_hwnd, int32_t width, int32_t he hwnd = static_cast(_hwnd); core.Trace("Initializing DirectX 9"); -#ifdef STORM_MESA_NINE - d3d = Direct3DCreate9_SDL(static_cast(_hwnd)); -#else d3d = Direct3DCreate9(D3D_SDK_VERSION); -#endif if (d3d == nullptr) { // MessageBox(hwnd, "Direct3DCreate9 error", "InitDevice::Direct3DCreate9", MB_OK); diff --git a/src/libs/renderer/src/storm_d3dx9.cpp b/src/libs/renderer/src/storm_d3dx9.cpp index df57f646e..5ed9bdb54 100644 --- a/src/libs/renderer/src/storm_d3dx9.cpp +++ b/src/libs/renderer/src/storm_d3dx9.cpp @@ -8,11 +8,6 @@ #define WARN(...) fprintf(stdout, __VA_ARGS__) -#ifdef STORM_MESA_NINE // IMPROVE Nine Native, taken from dxvk-native/include/native/windows/windows_base.h -typedef int32_t HRESULT; -#define E_NOTIMPL ((HRESULT)0x80004001) -#endif - ///////////////////////// Parts from WINE source code for d3dx9 licensed under GPLv2 ///////////////////////// #define D3DX_FILTER_NONE 0x00000001 diff --git a/src/libs/renderer/src/technique.cpp b/src/libs/renderer/src/technique.cpp index 4a20b95f9..9ff0b9db6 100644 --- a/src/libs/renderer/src/technique.cpp +++ b/src/libs/renderer/src/technique.cpp @@ -10,16 +10,6 @@ #include #include -#ifdef STORM_MESA_NINE // IMPROVE Nine Native + taken from dxvk-native/include/native/directx/d3d8types.h -#define D3DWRAP_U 1 -#define D3DWRAP_V 2 -#define D3DWRAP_W 4 -#define D3DWRAPCOORD_0 1 -#define D3DWRAPCOORD_1 2 -#define D3DWRAPCOORD_2 4 -#define D3DWRAPCOORD_3 8 -#endif - #define USE_FX // Will load techniques from fx files #ifdef USE_FX diff --git a/src/libs/sink_effect/src/sink_splash_defines.h b/src/libs/sink_effect/src/sink_splash_defines.h index d71d15f0b..b0054b6b1 100644 --- a/src/libs/sink_effect/src/sink_splash_defines.h +++ b/src/libs/sink_effect/src/sink_splash_defines.h @@ -1,8 +1,6 @@ #pragma once -#ifndef STORM_MESA_NINE #include "windows.h" -#endif #include "d3d9types.h" #define SINK_SPLASH_TRACE_INFORMATION From 11f3d1ed304ae9f7c6303edc7113137af232aeb4 Mon Sep 17 00:00:00 2001 From: Aleksey Komarov Date: Wed, 15 Mar 2023 02:27:05 +0300 Subject: [PATCH 27/27] linux: improve dxvk CONFIGURE_COMMAND - build only d3d9 --- CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 49107c1f3..19dff34df 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,7 +97,7 @@ elseif (STORM_MESA_NINE) # for Gallium Nine include_directories("${NINE_NATIVE_INCLUDE_DIRS}") add_custom_target(dependencies ALL DEPENDS nine-native) else() # for DXVK Native - message("Using DXVK-native for D3D9 API") + message("Using DXVK Native for D3D9 API") include(ExternalProject) ExternalProject_Add(dxvk-native @@ -106,8 +106,9 @@ else() # for DXVK Native GIT_SHALLOW ON BUILD_ALWAYS OFF CONFIGURE_HANDLED_BY_BUILD ON - CONFIGURE_COMMAND meson ../dxvk-native + CONFIGURE_COMMAND meson setup ../dxvk-native --buildtype=release -Denable_d3d11=false -Denable_d3d10=false -Denable_dxgi=false BUILD_COMMAND ninja + BUILD_BYPRODUCTS /src/d3d9/libdxvk_d3d9.so INSTALL_COMMAND "" ) ExternalProject_Get_property(dxvk-native SOURCE_DIR BINARY_DIR)