From 644ff788ca3634900f2742cc661961e14ad5beaf Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Tue, 12 Nov 2019 10:20:10 +0000 Subject: [PATCH] Import primus-vk_1.3.orig.tar.gz [dgit import orig primus-vk_1.3.orig.tar.gz] --- .gitignore | 7 + LICENSE | 25 + Makefile | 45 + README.md | 100 +++ nv_vulkan_wrapper.cpp | 87 ++ nv_vulkan_wrapper.json | 7 + primus_vk.cpp | 1312 ++++++++++++++++++++++++++++ primus_vk.json | 21 + primus_vk_diag.cpp | 232 +++++ primus_vk_diag.sh | 54 ++ primus_vk_forwarding.h | 17 + primus_vk_forwarding_prototypes.h | 61 ++ pvkrun.in.sh | 2 + surface_forwarding_functions.xslt | 14 + surface_forwarding_prototypes.xslt | 22 + 15 files changed, 2006 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 Makefile create mode 100644 README.md create mode 100644 nv_vulkan_wrapper.cpp create mode 100644 nv_vulkan_wrapper.json create mode 100644 primus_vk.cpp create mode 100644 primus_vk.json create mode 100644 primus_vk_diag.cpp create mode 100755 primus_vk_diag.sh create mode 100644 primus_vk_forwarding.h create mode 100644 primus_vk_forwarding_prototypes.h create mode 100644 pvkrun.in.sh create mode 100644 surface_forwarding_functions.xslt create mode 100644 surface_forwarding_prototypes.xslt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e74dad1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +\#*\# +*~ + +/libprimus_vk.so +/libnv_vulkan_wrapper.so +/primus_vk_diag +/primus_vk_diag.o diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..4071f77 --- /dev/null +++ b/LICENSE @@ -0,0 +1,25 @@ +BSD 2-Clause License + +Copyright (c) 2018, Felix Doerre +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..86ab89e --- /dev/null +++ b/Makefile @@ -0,0 +1,45 @@ +DESTDIR ?= +PREFIX = $(DESTDIR)/usr/local +INSTALL = /usr/bin/install +override INSTALL += -D +MSGFMT = /usr/bin/msgfmt +SED = /bin/sed +LN = /bin/ln +bindir = $(PREFIX)/bin +libdir = $(PREFIX)/lib +sysconfdir = $(PREFIX)/etc +datarootdir = ${PREFIX}/share +datadir = ${datarootdir} + +override CXXFLAGS += --std=gnu++11 -g3 + +all: libprimus_vk.so libnv_vulkan_wrapper.so + +libprimus_vk.so: primus_vk.cpp + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -I/usr/include/vulkan -shared -fPIC $^ -o $@ -Wl,-soname,libprimus_vk.so.1 -ldl -lpthread $(LDFLAGS) + +libnv_vulkan_wrapper.so: nv_vulkan_wrapper.cpp + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -I/usr/include/vulkan -shared -fPIC $^ -o $@ -Wl,-soname,libnv_vulkan_wrapper.so.1 -ldl $(LDFLAGS) + +primus_vk_forwarding.h: + xsltproc surface_forwarding_functions.xslt /usr/share/vulkan/registry/vk.xml | tail -n +2 > $@ + +primus_vk_forwarding_prototypes.h: + xsltproc surface_forwarding_prototypes.xslt /usr/share/vulkan/registry/vk.xml | tail -n +2 > $@ + +primus_vk.cpp: primus_vk_forwarding.h primus_vk_forwarding_prototypes.h + +primus_vk_diag: primus_vk_diag.o + $(CXX) -g3 -o $@ $^ -lX11 -lvulkan -ldl $(LDFLAGS) + +clean: + rm -f libnv_vulkan_wrapper.so libprimus_vk.so + +install: all + $(INSTALL) "libnv_vulkan_wrapper.so" "$(libdir)/libnv_vulkan_wrapper.so.1" + $(LN) -s "libnv_vulkan_wrapper.so.1" "$(libdir)/libnv_vulkan_wrapper.so" + $(INSTALL) "libprimus_vk.so" "$(libdir)/libprimus_vk.so.1" + $(LN) -s "libprimus_vk.so.1" "$(libdir)/libprimus_vk.so" + $(INSTALL) -m644 "primus_vk.json" -t "$(datadir)/vulkan/implicit_layer.d/" + $(INSTALL) -m644 "nv_vulkan_wrapper.json" -t "$(datadir)/vulkan/icd.d/" + $(INSTALL) -m755 "pvkrun.in.sh" "$(bindir)/pvkrun" diff --git a/README.md b/README.md new file mode 100644 index 0000000..4654587 --- /dev/null +++ b/README.md @@ -0,0 +1,100 @@ +# Primus-vk + +This Vulkan layer can be used to do GPU offloading. Typically you want to display an image rendered on a more powerful GPU on a display managed by an internal GPU. + +It is basically the same as Primus for OpenGL (https://github.com/amonakov/primus). However it does not wrap the Vulkan API from the application but is directly integrated into Vulkan as a layer (which seems to be the intendend way to implement such logic). + +## Usage + +First you need to install `primus_vk`. On Archlinux there are official packages ([for 64-bit games](https://www.archlinux.org/packages/community/x86_64/primus_vk/), [for 32-bit games](https://www.archlinux.org/packages/multilib/x86_64/lib32-primus_vk/)). For other distributions you will likely need to [manually install](#installation) `primus_vk`. + +To run an application with `primus_vk` prefix the command with `ENABLE_PRIMUS_LAYER=1 optirun`. So instead of running `path/to/application`, invoke `ENABLE_PRIMUS_LAYER=1 optirun path/to/application` instead. + +## Idea + +Just as the OpenGL-Primus: Let the application talk to the primary display and transparently map API calls so that the application thinks, it renders using the primary display, however the `VkDevice` (and `VkImage`s) comes from the rendering GPU. +When the application wants to swap frames, copy the image over to the integrated GPU and display it there. + +## Why do we need to copy the Image so often? +As far as I can tell `VkImage` (and `VkMemory`) objects may not be shared beween different physical devices. So there is not really another way than using `memcpy` on the images when memmapped into main memory. + +Additinonally, only images with `VK_IMAGE_TILING_OPTIMAL` can be rendered to and presentend and only images with `VK_IMAGE_TILING_LINEAR` can be mapped to main memory to be copied. So I see no better way than copying the image 3 times from render target to display. On my machine the `memcpy` from an external device was pretty clearly the bottleneck. So it is not really the copying of the image, but the transfer from rendering GPU into main memory. + +An idea might be to use `VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT` to map one device's memory and use that directly on the other device (or import host-allocated memory on both devices). However that is not implemented yet. + +## Dependencies +This layer requires two working vulkan drivers. The only hardware that I have experience with are Intel Integrated Graphics + Nvidia. However it should theoretically work with any other graphics setup of two vulkan-compatible graphics devices. For the Nvidia graphics card, both the "nonglvd" and the "glvnd" proprietary driver seem to work. + +To use this layer you will require something similar to bumblebee to poweron/off the dedicated graphics card. + +Due to a bug/missing feature in the Vulkan Loader you will need `Vulkan/libvulkan >= 1.1.108`. If you have an older system you can try primus_vk version 1.1 which contains an ugly workaround for that issue and is therefore compatible with older Vulkan versions. + + +## Development Status + +This layer works for the applications I tested it with, but has still some technical difficulties (see Technical Limitations). Additionally the image copy still introduces too much overhead. +However this layer should already be usable with most applications. + +## Technical Limitations + +1. The NVIDIA driver always connect to the "default" X-Display to verify that it has the NV-GLX extensions availible. Otherwise the NVIDIA-vulkan-icd driver disables itself. For testing an intermediate solution is to modify the demo application to always use ":0" and set DISPLAY to ":8" to make the NV-Driver happy. However this approach does work on general applications that cannot be modified. So this issue has to be solved in the graphics driver. + +2. Currently under Debian unstable the nvidia-icd is registered with a non-absolute library path in `/usr/share/vulkan/icd.d/nvidia_icd.json`. Replace `libGL.so.1` with `/usr/lib/x86_64-linux-gnu/nvidia/libGL.so.1` there to always load the intended Vulkan driver. + +3. When running an applications with DXVK and wine, wine loads both Vulkan and OpenGL. This creates a problem as: + 1. Wine loads Vulkan, which loades the integrated GPU's ICD, the Nvidia ICD (contained in Nvidia's libGL.so on my system), Primus-VK and potentially more. + 2. Wine loads OpenGL, which should be satisfied by OpenGL-Primus. However for whatever reason wine directly gets Nvidia's libGL which fails to provide an OpenGL context for the primary X screen. + This needs to be prevented by forcing wine to load Primus' libGL. + +Issues 1.,2. and 3. can be worked around by compiling `libnv_vulkan_wrapper.so` and registering it instead of nvidia's `libGL.so.1` in `/usr/share/vulkan/icd.d/nvidia_icd.json`. + +## Installation +### Locally +Create the folder `~/.local/share/vulkan/implicit_layer.d` and copy `primus_vk.json` there with the path adjusted to the location of the shared object. + +### System-wide +Copy `primus_vk.json` to `/usr/share/vulkan/implicit_layer.d` and adjust the path. + +## Howto + +1. Use `make libprimus_vk.so libnv_vulkan_wrapper.so` to compile Primus-vk and `libnv_vulkan_wrapper.so` (check that the path to the nvidia-driver in `nv_vulkan_wrapper.so` is correct). +2. Ensure that the (unwrapped) nvidia driver is not registered (e.g. in `/usr/share/vulkan/icd.d/nvidia_icd.json`) and create a similar file `nv_vulkan_wrapper.json` where the path to the driver points to the compiled `libnv_vulkan_wrapper.so`. +3. (Optional) Run `optirun primus_vk_diag`. It has to display entries for both graphics cards, otherwise the driver setup is broken. +4. Install `primus_vk.json` and adjust path. +5. Run `ENABLE_PRIMUS_LAYER=1 optirun vulkan-smoketest`. + If you want to specify the devices used for rendering and displaying manually, you can use `PRIMUS_VK_DISPLAYID` and `PRIMUS_VK_RENDERID` and give them the `deviceID`s from `optirun env DISPLAY=:8 vulkaninfo`. + +I tested this on Debian unstable. + +### Debian Packages that I used: + +``` +bumblebee-nvidia 3.2.1-17 +nvidia-driver 390.77-1 +nvidia-nonglvnd-vulkan-icd:amd64 390.77-1 +nvidia-nonglvnd-vulkan-icd:i386 390.77-1 +primus 0~20150328-6 +mesa-vulkan-drivers:amd64 18.1.7-1 +``` + +For testing a Windows DX11-Application, I used: +``` +wine32-development:i386 3.14-1 +wine64-development 3.14-1 +``` +and dxvk-0.7 inside the wineprefix. + +### Arch Linux + +Notes for running on Arch Linux: + +* nv_vulkan_wrapper.cpp: Change nvDiver path to `/usr/lib/libGLX_nvidia.so.0` +* primus_vk.cpp: add: `#include "vk_layer_utils.h"` (on Debian the contents are included in some other header and there is no "vk_layer_utils.h") + +### RPM package + +Leonid Maksymchuk built RPM packaging scripts for primus-vk which can be found in his [repository](https://github.com/leonmaxx/primus-vk-rpm). + +## Credits + +This layer is based on the sample layer available under https://github.com/baldurk/sample_layer. The guide that goes along with it is [https://renderdoc.org/vulkan-layer-guide.html](https://renderdoc.org/vulkan-layer-guide.html). diff --git a/nv_vulkan_wrapper.cpp b/nv_vulkan_wrapper.cpp new file mode 100644 index 0000000..9494cba --- /dev/null +++ b/nv_vulkan_wrapper.cpp @@ -0,0 +1,87 @@ +#include +#include + +#include +#include + +extern "C" VKAPI_ATTR VkResult VKAPI_CALL vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion); + + +#ifndef NV_DRIVER_PATH +#define NV_DRIVER_PATH "/usr/lib/x86_64-linux-gnu/nvidia/current/libGL.so.1" +#endif + +class StaticInitialize { + void *nvDriver; + void *glLibGL; +public: + VKAPI_ATTR PFN_vkVoidFunction (*instanceProcAddr) (VkInstance instance, + const char* pName); + VKAPI_ATTR PFN_vkVoidFunction (*phyProcAddr) (VkInstance instance, + const char* pName); + VKAPI_ATTR VkResult VKAPI_CALL (*negotiateVersion)(uint32_t* pSupportedVersion); +public: + StaticInitialize(){ + // Load libGL from LD_LIBRARY_PATH before loading the NV-driver (unluckily also named libGL + // This ensures that ld.so will find this libGL before the Nvidia one, when + // again asked to load libGL. + glLibGL = dlopen("libGL.so.1", RTLD_GLOBAL | RTLD_NOW); + + std::string drivers(NV_DRIVER_PATH); + while(!nvDriver && drivers.size() > 0){ + auto end = drivers.find(':'); + if(end == std::string::npos) { + nvDriver = dlopen(drivers.c_str(), RTLD_LOCAL | RTLD_LAZY); + } else { + std::string this_driver = drivers.substr(0, end); + nvDriver = dlopen(this_driver.c_str(), RTLD_LOCAL | RTLD_LAZY); + drivers = drivers.substr(end+1); + } + } + if(!nvDriver) { + std::cerr << "PrimusVK: ERROR! Nvidia driver could not be loaded from '" NV_DRIVER_PATH "'.\n"; + return; + } + typedef void* (*dlsym_fn)(void *, const char*); + static dlsym_fn real_dlsym = (dlsym_fn) dlsym(dlopen("libdl.so.2", RTLD_LAZY), "dlsym"); + instanceProcAddr = (decltype(instanceProcAddr)) real_dlsym(nvDriver, "vk_icdGetInstanceProcAddr"); + phyProcAddr = (decltype(phyProcAddr)) real_dlsym(nvDriver, "vk_icdGetPhysicalDeviceProcAddr"); + negotiateVersion = (decltype(negotiateVersion)) real_dlsym(nvDriver, "vk_icdNegotiateLoaderICDInterfaceVersion"); + } + ~StaticInitialize(){ + if(nvDriver) + dlclose(nvDriver); + dlclose(glLibGL); + } + bool IsInited(){ + return nvDriver != nullptr; + } +}; + +StaticInitialize init; + +extern "C" VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( + VkInstance instance, + const char* pName){ + if (!init.IsInited()) return nullptr; + auto res = init.instanceProcAddr(instance, pName); + return res; +} + +extern "C" VKAPI_ATTR PFN_vkVoidFunction vk_icdGetPhysicalDeviceProcAddr(VkInstance instance, + const char* pName){ + if (!init.IsInited()) return nullptr; + auto res = init.phyProcAddr(instance, pName); + return res; +} +extern "C" VKAPI_ATTR VkResult VKAPI_CALL vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion){ + if (!init.IsInited()) { + return VK_ERROR_INCOMPATIBLE_DRIVER; + } + char *prev = getenv("DISPLAY"); + std::string old{prev}; + setenv("DISPLAY", ":8", 1); + auto res = init.negotiateVersion(pSupportedVersion); + setenv("DISPLAY",old.c_str(), 1); + return res; +} diff --git a/nv_vulkan_wrapper.json b/nv_vulkan_wrapper.json new file mode 100644 index 0000000..8bc6561 --- /dev/null +++ b/nv_vulkan_wrapper.json @@ -0,0 +1,7 @@ +{ + "file_format_version" : "1.0.0", + "ICD": { + "library_path": "libnv_vulkan_wrapper.so", + "api_version" : "1.1.84" + } +} diff --git a/primus_vk.cpp b/primus_vk.cpp new file mode 100644 index 0000000..ba5a69e --- /dev/null +++ b/primus_vk.cpp @@ -0,0 +1,1312 @@ +#include "vulkan.h" +#include "vk_layer.h" +#include "vk_layer_dispatch_table.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#undef VK_LAYER_EXPORT +#if defined(WIN32) +#define VK_LAYER_EXPORT extern "C" __declspec(dllexport) +#else +#define VK_LAYER_EXPORT extern "C" +#endif + +// single global lock, for simplicity +std::mutex global_lock; +typedef std::lock_guard scoped_lock; + +// use the loader's dispatch table pointer as a key for dispatch map lookups +template +void *&GetKey(DispatchableType inst) +{ + return *(void **)inst; +} + +class CreateOtherDevice; + +// #define TRACE(x) +#define TRACE(x) std::cout << "PrimusVK: " << x << "\n"; +#define TRACE_PROFILING(x) +// #define TRACE_PROFILING(x) std::cout << "PrimusVK: " << x << "\n"; +#define TRACE_PROFILING_EVENT(x, y) +// #define TRACE_PROFILING_EVENT(idx, evt) std::cout << "PrimusVK-profiling: " << idx << " " << std::chrono::duration_cast(std::chrono::steady_clock::now() - primus_start).count() << " " << evt << "\n"; +#define TRACE_FRAME(x) +// #define TRACE_FRAME(x) std::cout << "PrimusVK: " << x << "\n"; + +#define VK_CHECK_RESULT(x) do{ const VkResult r = x; if(r != VK_SUCCESS){printf("PrimusVK: Error %d in line %d.\n", r, __LINE__);}}while(0); +// #define VK_CHECK_RESULT(x) if(x != VK_SUCCESS){printf("Error %d, in %d\n", x, __LINE__);} + +struct InstanceInfo { +public: + VkInstance instance; + PFN_vkLayerCreateDevice layerCreateDevice; + PFN_vkLayerDestroyDevice layerDestroyDevice; + + VkPhysicalDevice render = VK_NULL_HANDLE; + VkPhysicalDevice display = VK_NULL_HANDLE; + std::map> cod = {}; + + std::shared_ptr renderQueueMutex = std::make_shared(); + InstanceInfo() = default; + InstanceInfo(const InstanceInfo &) = delete; + InstanceInfo(InstanceInfo &&) = default; + InstanceInfo(VkInstance instance, + PFN_vkLayerCreateDevice layerCreateDevice, + PFN_vkLayerDestroyDevice layerDestroyDevice) : instance(instance), layerCreateDevice(layerCreateDevice), layerDestroyDevice(layerDestroyDevice) { + } + InstanceInfo &operator=(InstanceInfo &&) = default; +private: + void GetEnvVendorDeviceIDs(std::string env, uint32_t &vendor, uint32_t &device) { + char *envstr = getenv(env.c_str()); + if(envstr != nullptr){ + std::stringstream ss(envstr); + std::string item; + std::vector hexnums(2); + int i = 0; + while(std::getline(ss, item, ':') && (i < 2)) { + uint32_t num = 0; + std::stringstream _ss; + _ss << std::hex << item; + _ss >> num; + hexnums[i] = num; + ++i; + } + vendor = hexnums[0]; + device = hexnums[1]; + } + } + + bool IsDevice( + VkPhysicalDeviceProperties props, + uint32_t vendor, + uint32_t device, + VkPhysicalDeviceType type = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) { + if((vendor == 0) && (props.deviceType == type)){ + if(type == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU){ + TRACE("Got integrated gpu!"); + } else { + TRACE("Got discrete gpu!"); + } + TRACE("Device: " << props.deviceName); + TRACE(" Type: " << props.deviceType); + return true; + } + if((props.vendorID == vendor) && (props.deviceID == device)){ + TRACE("Got device from env!"); + TRACE("Device: " << props.deviceName); + TRACE(" Type: " << props.deviceType); + return true; + } + if(props.vendorID == vendor){ + TRACE("Got device from env! (via vendorID)"); + TRACE("Device: " << props.deviceName); + TRACE(" Type: " << props.deviceType); + return true; + } + return false; + } + +public: + VkResult searchDevices(VkLayerInstanceDispatchTable &dispatchTable){ + uint32_t displayVendorID = 0; + uint32_t displayDeviceID = 0; + uint32_t renderVendorID = 0; + uint32_t renderDeviceID = 0; + GetEnvVendorDeviceIDs("PRIMUS_VK_DISPLAYID", displayVendorID, displayDeviceID); + GetEnvVendorDeviceIDs("PRIMUS_VK_RENDERID", renderVendorID, renderDeviceID); + + std::vector physicalDevices; + { + auto enumerateDevices = dispatchTable.EnumeratePhysicalDevices; + uint32_t gpuCount = 0; + enumerateDevices(instance, &gpuCount, nullptr); + physicalDevices.resize(gpuCount); + enumerateDevices(instance, &gpuCount, physicalDevices.data()); + } + + TRACE("Searching for display GPU:"); + for(auto &dev: physicalDevices){ + VkPhysicalDeviceProperties props; + dispatchTable.GetPhysicalDeviceProperties(dev, &props); + TRACE(dev << ": "); + if(IsDevice(props, displayVendorID, displayDeviceID, VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU)){ + display = dev; + break; + } + } + + TRACE("Searching for render GPU:"); + for(auto &dev: physicalDevices){ + VkPhysicalDeviceProperties props; + dispatchTable.GetPhysicalDeviceProperties(dev, &props); + TRACE(dev << "."); + if(IsDevice(props, renderVendorID, renderDeviceID)){ + render = dev; + break; + } + } + if(display == VK_NULL_HANDLE || render == VK_NULL_HANDLE){ + const auto c_icd_filenames = getenv("VK_ICD_FILENAMES"); + if(display == VK_NULL_HANDLE) { + TRACE("No device for the display GPU found. Are the intel-mesa drivers installed?"); + } + if(render == VK_NULL_HANDLE) { + TRACE("No device for the rendering GPU found. Is the correct driver installed?"); + } + if(c_icd_filenames != nullptr) { + TRACE("VK_ICD_FILENAMES=" << c_icd_filenames); + } else { + TRACE("VK_ICD_FILENAMES not set"); + } + return VK_ERROR_INITIALIZATION_FAILED; + } + return VK_SUCCESS; + } +}; + +std::map instance_dispatch; +VkLayerInstanceDispatchTable loader_dispatch; +// VkInstance->disp is beeing malloc'ed for every new instance +// so we can assume it to be a good key. +std::map instance_info; + +std::map device_instance_info; +std::map device_dispatch; + +/////////////////////////////////////////////////////////////////////////////////////////// +// Layer init and shutdown +VkLayerDispatchTable fetchDispatchTable(PFN_vkGetDeviceProcAddr gdpa, VkDevice *pDevice); +VK_LAYER_EXPORT PFN_vkVoidFunction VKAPI_CALL PrimusVK_GetInstanceProcAddr(VkInstance instance, const char *pName); +VkResult VKAPI_CALL PrimusVK_CreateInstance( + const VkInstanceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkInstance* pInstance) +{ + VkLayerInstanceCreateInfo *layer_link_info = nullptr; + PFN_vkLayerCreateDevice layerCreateDevice = nullptr; + PFN_vkLayerDestroyDevice layerDestroyDevice = nullptr; + VkLayerInstanceCreateInfo *layerCreateInfo = (VkLayerInstanceCreateInfo *)pCreateInfo->pNext; + + while(layerCreateInfo) { + if ( layerCreateInfo->sType == VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO && layerCreateInfo->function == VK_LAYER_LINK_INFO) { + layer_link_info = layerCreateInfo; + } + if ( layerCreateInfo->sType == VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO && layerCreateInfo->function == VK_LOADER_LAYER_CREATE_DEVICE_CALLBACK) { + layerCreateDevice = layerCreateInfo->u.layerDevice.pfnLayerCreateDevice; + layerDestroyDevice = layerCreateInfo->u.layerDevice.pfnLayerDestroyDevice; + } + layerCreateInfo = (VkLayerInstanceCreateInfo *)layerCreateInfo->pNext; + } + + if(layer_link_info == nullptr) { + return VK_ERROR_INITIALIZATION_FAILED; + } + if(layerCreateDevice == nullptr || layerDestroyDevice == nullptr) { + TRACE("Loader did not supply layer device creation callbacks. Please upgrade to vulkan >= 1.1.108"); + return VK_ERROR_INITIALIZATION_FAILED; + } + + PFN_vkGetInstanceProcAddr gpa = layer_link_info->u.pLayerInfo->pfnNextGetInstanceProcAddr; + layer_link_info->u.pLayerInfo = layer_link_info->u.pLayerInfo->pNext; + + PFN_vkCreateInstance createFunc = (PFN_vkCreateInstance)gpa(VK_NULL_HANDLE, "vkCreateInstance"); + VK_CHECK_RESULT( createFunc(pCreateInfo, pAllocator, pInstance) ); + + // fetch our own dispatch table for the functions we need, into the next layer + VkLayerInstanceDispatchTable dispatchTable; +#define FORWARD(func) dispatchTable.func = (PFN_vk##func)gpa(*pInstance, "vk" #func); + FORWARD(GetInstanceProcAddr); + FORWARD(EnumeratePhysicalDevices); + FORWARD(DestroyInstance); + FORWARD(EnumerateDeviceExtensionProperties); + FORWARD(GetPhysicalDeviceProperties); +#undef FORWARD + + auto my_instance_info = InstanceInfo{*pInstance, layerCreateDevice, layerDestroyDevice}; + auto res = my_instance_info.searchDevices(dispatchTable); + if(res != VK_SUCCESS) return res; +#define FORWARD(func) dispatchTable.func = (PFN_vk##func)gpa(*pInstance, "vk" #func); + FORWARD(GetPhysicalDeviceMemoryProperties); + FORWARD(GetPhysicalDeviceQueueFamilyProperties); +#include "primus_vk_forwarding.h" +#undef FORWARD + + // store the table by key + { + scoped_lock l(global_lock); + + instance_dispatch[GetKey(*pInstance)] = dispatchTable; + instance_info[GetKey(*pInstance)] = std::move(my_instance_info); + } + + return VK_SUCCESS; +} + +void VKAPI_CALL PrimusVK_DestroyInstance(VkInstance instance, const VkAllocationCallbacks* pAllocator) +{ + scoped_lock l(global_lock); + + instance_dispatch[GetKey(instance)].DestroyInstance(instance, pAllocator); + + instance_dispatch.erase(GetKey(instance)); + instance_info.erase(GetKey(instance)); +} + +struct FramebufferImage; +struct MappedMemory{ + VkDevice device; + VkDeviceMemory mem; + char* data; + MappedMemory(VkDevice device, FramebufferImage &img); + ~MappedMemory(); +}; +struct FramebufferImage { + VkImage img; + VkDeviceMemory mem; + + VkDevice device; + + std::shared_ptr mapped; + FramebufferImage(FramebufferImage &) = delete; + FramebufferImage(VkDevice device, VkExtent2D size, VkImageTiling tiling, VkImageUsageFlags usage, VkFormat format, int memoryTypeIndex): device(device){ + TRACE("Creating image: " << size.width << "x" << size.height); + VkImageCreateInfo imageCreateCI {}; + imageCreateCI.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + imageCreateCI.imageType = VK_IMAGE_TYPE_2D; + imageCreateCI.format = format; + imageCreateCI.extent.width = size.width; + imageCreateCI.extent.height = size.height; + imageCreateCI.extent.depth = 1; + imageCreateCI.arrayLayers = 1; + imageCreateCI.mipLevels = 1; + imageCreateCI.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageCreateCI.samples = VK_SAMPLE_COUNT_1_BIT; + imageCreateCI.tiling = tiling; + imageCreateCI.usage = usage; + VK_CHECK_RESULT(device_dispatch[GetKey(device)].CreateImage(device, &imageCreateCI, nullptr, &img)); + + VkMemoryRequirements memRequirements {}; + VkMemoryAllocateInfo memAllocInfo {.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO}; + device_dispatch[GetKey(device)].GetImageMemoryRequirements(device, img, &memRequirements); + memAllocInfo.allocationSize = memRequirements.size; + memAllocInfo.memoryTypeIndex = memoryTypeIndex; + VK_CHECK_RESULT(device_dispatch[GetKey(device)].AllocateMemory(device, &memAllocInfo, nullptr, &mem)); + VK_CHECK_RESULT(device_dispatch[GetKey(device)].BindImageMemory(device, img, mem, 0)); + } + std::shared_ptr getMapped(){ + if(!mapped){ + throw std::runtime_error("not mapped"); + } + return mapped; + } + void map(){ + mapped = std::make_shared(device, *this); + } + VkSubresourceLayout getLayout(){ + VkImageSubresource subResource { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0 }; + VkSubresourceLayout subResourceLayout; + device_dispatch[GetKey(device)].GetImageSubresourceLayout(device, img, &subResource, &subResourceLayout); + return subResourceLayout; + } + ~FramebufferImage(){ + mapped.reset(); + device_dispatch[GetKey(device)].FreeMemory(device, mem, nullptr); + device_dispatch[GetKey(device)].DestroyImage(device, img, nullptr); + } +}; +MappedMemory::MappedMemory(VkDevice device, FramebufferImage &img): device(device), mem(img.mem){ + device_dispatch[GetKey(device)].MapMemory(device, img.mem, 0, VK_WHOLE_SIZE, 0, (void**)&data); +} +MappedMemory::~MappedMemory(){ + device_dispatch[GetKey(device)].UnmapMemory(device, mem); +} +class CommandBuffer; +class Fence{ + VkDevice device; +public: + VkFence fence; + Fence(VkDevice dev): device(dev){ + // Create fence to ensure that the command buffer has finished executing + VkFenceCreateInfo fenceInfo = {.sType=VK_STRUCTURE_TYPE_FENCE_CREATE_INFO}; + fenceInfo.flags = 0; + VK_CHECK_RESULT(device_dispatch[GetKey(device)].CreateFence(device, &fenceInfo, nullptr, &fence)); + } + void await(){ + // Wait for the fence to signal that command buffer has finished executing + VK_CHECK_RESULT(device_dispatch[GetKey(device)].WaitForFences(device, 1, &fence, VK_TRUE, 10000000000L)); + } + void reset(){ + VK_CHECK_RESULT(device_dispatch[GetKey(device)].ResetFences(device, 1, &fence)); + } + Fence(Fence &&other): device(other.device), fence(other.fence){ + other.fence = VK_NULL_HANDLE; + } + ~Fence(){ + if(fence != VK_NULL_HANDLE){ + device_dispatch[GetKey(device)].DestroyFence(device, fence, nullptr); + } + } +}; +class Semaphore{ + VkDevice device; +public: + VkSemaphore sem; + Semaphore(VkDevice dev): device(dev){ + VkSemaphoreCreateInfo semInfo = {.sType=VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO}; + semInfo.flags = 0; + VK_CHECK_RESULT(device_dispatch[GetKey(device)].CreateSemaphore(device, &semInfo, nullptr, &sem)); + } + Semaphore(Semaphore &&other): device(other.device), sem(other.sem) { + other.sem = VK_NULL_HANDLE; + other.device = VK_NULL_HANDLE; + } + ~Semaphore(){ + if(sem != VK_NULL_HANDLE){ + device_dispatch[GetKey(device)].DestroySemaphore(device, sem, nullptr); + } + } +}; +struct PrimusSwapchain; +struct ImageWorker { + PrimusSwapchain &swapchain; + + std::shared_ptr render_image; + std::shared_ptr render_copy_image; + std::shared_ptr display_src_image; + Fence render_copy_fence; + Semaphore display_semaphore; + VkImage display_image = VK_NULL_HANDLE; + + std::shared_ptr render_copy_command; + std::shared_ptr display_command; + std::unique_ptr display_command_fence; + + ImageWorker(PrimusSwapchain &swapchain, VkImage display_image, const VkSwapchainCreateInfoKHR &createInfo, std::tuple image_memory_types); + ImageWorker(ImageWorker &&other) = default; + ~ImageWorker(); + void initImages( std::tuple image_memory_types, const VkSwapchainCreateInfoKHR &createInfo); + void createCommandBuffers(); + void copyImageData(std::vector sems); +}; +struct PrimusSwapchain{ + std::chrono::steady_clock::time_point lastPresent = std::chrono::steady_clock::now(); + VkDevice device; + VkQueue render_queue; + VkDevice display_device; + std::mutex displayQueueMutex; + VkQueue display_queue; + VkSwapchainKHR backend; + std::vector images; + VkExtent2D imgSize; + + std::vector> threads; + + std::shared_ptr cod; + PrimusSwapchain(PrimusSwapchain &) = delete; + PrimusSwapchain(VkDevice device, VkDevice display_device, VkSwapchainKHR backend, const VkSwapchainCreateInfoKHR *pCreateInfo, uint32_t imageCount, std::shared_ptr &cod): + device(device), display_device(display_device), backend(backend), cod(cod){ + // TODO automatically find correct queue and not choose 0 forcibly + device_dispatch[GetKey(device)].GetDeviceQueue(device, 0, 0, &render_queue); + device_dispatch[GetKey(display_device)].GetDeviceQueue(display_device, 0, 0, &display_queue); + GetKey(render_queue) = GetKey(device); // TODO, use vkSetDeviceLoaderData instead + GetKey(display_queue) = GetKey(display_device); + + uint32_t image_count; + device_dispatch[GetKey(display_device)].GetSwapchainImagesKHR(display_device, backend, &image_count, nullptr); + TRACE("Image aquiring: " << image_count); + std::vector display_images; + display_images.resize(image_count); + device_dispatch[GetKey(display_device)].GetSwapchainImagesKHR(display_device, backend, &image_count, display_images.data()); + + imgSize = pCreateInfo->imageExtent; + + auto image_memory_types = getImageMemories(); + for(uint32_t i = 0; i < imageCount; i++){ + images.emplace_back(*this, display_images[i], *pCreateInfo, image_memory_types); + } + + TRACE("Creating a Swapchain thread."); + size_t thread_count = 1; + char *m_env = getenv("PRIMUS_VK_MULTITHREADING"); + if(m_env == nullptr || std::string{m_env} != "1"){ + thread_count = imageCount; + } + threads.resize(thread_count); + for(auto &thread: threads){ + thread = std::unique_ptr(new std::thread([this](){this->run();})); + pthread_setname_np(thread->native_handle(), "swapchain-thread"); + } + } + + std::tuple getImageMemories(); + + void storeImage(uint32_t index, VkQueue queue, std::vector wait_on, Fence ¬ify); + + void queue(VkQueue queue, const VkPresentInfoKHR *pPresentInfo); + + std::mutex queueMutex; + std::condition_variable has_work; + bool active = true; + struct QueueItem { + VkQueue queue; + VkPresentInfoKHR pPresentInfo; + uint32_t imgIndex; + }; + std::list work; + std::list in_progress; + void present(const QueueItem &workItem); + void run(); + void stop(); +}; + +ImageWorker::ImageWorker(PrimusSwapchain &swapchain, VkImage display_image, const VkSwapchainCreateInfoKHR &createInfo, std::tuple image_memory_types): swapchain(swapchain), render_copy_fence(swapchain.device), display_semaphore(swapchain.display_device), display_image(display_image){ + initImages(image_memory_types, createInfo); + createCommandBuffers(); +} +ImageWorker::~ImageWorker(){ + if(display_command_fence){ + display_command_fence->await(); + } +} + +class CreateOtherDevice { +public: + VkPhysicalDevice display_dev; + VkPhysicalDevice render_dev; + VkPhysicalDeviceMemoryProperties display_mem; + VkPhysicalDeviceMemoryProperties render_mem; + VkDevice render_gpu = VK_NULL_HANDLE; + VkDevice display_gpu = VK_NULL_HANDLE; + + CreateOtherDevice(VkPhysicalDevice display_dev, VkPhysicalDevice render_dev): + display_dev(display_dev), render_dev(render_dev){ + } + void setRenderDevice(VkDevice render_gpu){ + this->render_gpu = render_gpu; + } + void finish(std::function creator){ + auto &minstance_info = instance_info[GetKey(render_dev)]; + auto &minstance_dispatch = instance_dispatch[GetKey(minstance_info.instance)]; + minstance_dispatch.GetPhysicalDeviceMemoryProperties(display_dev, &display_mem); + minstance_dispatch.GetPhysicalDeviceMemoryProperties(render_dev, &render_mem); + + createDisplayDev(creator); + } + void createDisplayDev(std::function creator){ + VkDeviceCreateInfo createInfo = {}; + createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + + VkDeviceQueueCreateInfo queueInfo{}; + queueInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queueInfo.queueFamilyIndex = 0; + queueInfo.queueCount = 1; + const float defaultQueuePriority(0.0f); + queueInfo.pQueuePriorities = &defaultQueuePriority; + + createInfo.queueCreateInfoCount = 1; + createInfo.pQueueCreateInfos = &queueInfo; + createInfo.enabledExtensionCount = 1; + const char *swap[] = {VK_KHR_SWAPCHAIN_EXTENSION_NAME}; + createInfo.ppEnabledExtensionNames = swap; + VkResult ret = creator(createInfo, display_gpu); + TRACE("Creating display device finished!: " << ret); + if(ret != VK_SUCCESS){ + throw std::runtime_error("Display device creation failed"); + } + } +}; + + +class CommandBuffer { + VkCommandPool commandPool; + VkDevice device; +public: + VkCommandBuffer cmd; + CommandBuffer(VkDevice device) : device(device) { + VkCommandPoolCreateInfo poolInfo = {.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO}; + poolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + poolInfo.queueFamilyIndex = 0; + VK_CHECK_RESULT(device_dispatch[GetKey(device)].CreateCommandPool(device, &poolInfo, nullptr, &commandPool)); + VkCommandBufferAllocateInfo cmdBufAllocateInfo = {.sType=VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO}; + cmdBufAllocateInfo.commandPool = commandPool; + cmdBufAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + cmdBufAllocateInfo.commandBufferCount = 1; + + VK_CHECK_RESULT(device_dispatch[GetKey(device)].AllocateCommandBuffers(device, &cmdBufAllocateInfo, &cmd)); + + VkCommandBufferBeginInfo cmdBufInfo = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO}; + VK_CHECK_RESULT(device_dispatch[GetKey(device)].BeginCommandBuffer(cmd, &cmdBufInfo)); + } + ~CommandBuffer(){ + device_dispatch[GetKey(device)].FreeCommandBuffers(device, commandPool, 1, &cmd); + } + void insertImageMemoryBarrier( + VkImage image, + VkAccessFlags srcAccessMask, + VkAccessFlags dstAccessMask, + VkImageLayout oldImageLayout, + VkImageLayout newImageLayout, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask, + VkImageSubresourceRange subresourceRange) { + VkImageMemoryBarrier imageMemoryBarrier{.sType=VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; + imageMemoryBarrier.srcAccessMask = srcAccessMask; + imageMemoryBarrier.dstAccessMask = dstAccessMask; + imageMemoryBarrier.oldLayout = oldImageLayout; + imageMemoryBarrier.newLayout = newImageLayout; + imageMemoryBarrier.image = image; + imageMemoryBarrier.subresourceRange = subresourceRange; + + device_dispatch[GetKey(device)].CmdPipelineBarrier( + cmd, + srcStageMask, + dstStageMask, + 0, + 0, nullptr, + 0, nullptr, + 1, &imageMemoryBarrier); + } + void copyImage(VkImage src, VkImage dst, VkExtent2D imgSize){ + VkImageCopy imageCopyRegion{}; + imageCopyRegion.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imageCopyRegion.srcSubresource.layerCount = 1; + imageCopyRegion.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imageCopyRegion.dstSubresource.layerCount = 1; + imageCopyRegion.extent.width = imgSize.width; + imageCopyRegion.extent.height = imgSize.height; + imageCopyRegion.extent.depth = 1; + + // Issue the copy command + device_dispatch[GetKey(device)].CmdCopyImage( + cmd, + src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, + &imageCopyRegion); + } + void end(){ + VK_CHECK_RESULT(device_dispatch[GetKey(device)].EndCommandBuffer(cmd)); + } + void submit(VkQueue queue, VkFence fence, std::vector wait = {}, std::vector signal = {}){ + VkSubmitInfo submitInfo = {.sType=VK_STRUCTURE_TYPE_SUBMIT_INFO}; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &cmd; + VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + submitInfo.pWaitDstStageMask = &waitStage; + submitInfo.waitSemaphoreCount = wait.size(); + submitInfo.pWaitSemaphores = wait.data(); + submitInfo.signalSemaphoreCount = signal.size(); + submitInfo.pSignalSemaphores = signal.data(); + + // Submit to the queue + VK_CHECK_RESULT(device_dispatch[GetKey(device)].QueueSubmit(queue, 1, &submitInfo, fence)); + } +}; + +void ImageWorker::initImages( std::tuple image_memory_types, const VkSwapchainCreateInfoKHR &createInfo){ + ssize_t render_local_mem, render_host_mem, display_host_mem; + std::tie( render_local_mem, render_host_mem, display_host_mem) = image_memory_types; + auto imgSize = createInfo.imageExtent; + auto format = createInfo.imageFormat; + + auto &renderImage = render_image; + auto &renderCopyImage = render_copy_image; + auto &displaySrcImage = display_src_image; + renderImage = std::make_shared(swapchain.device, imgSize, + VK_IMAGE_TILING_OPTIMAL,VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |/**/ VK_IMAGE_USAGE_TRANSFER_SRC_BIT,format, render_local_mem); + renderCopyImage = std::make_shared(swapchain.device, imgSize, + VK_IMAGE_TILING_LINEAR, VK_IMAGE_USAGE_TRANSFER_DST_BIT,format, render_host_mem); + displaySrcImage = std::make_shared(swapchain.display_device, imgSize, + VK_IMAGE_TILING_LINEAR,VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |/**/ VK_IMAGE_USAGE_TRANSFER_SRC_BIT,format, display_host_mem); + + renderCopyImage->map(); + displaySrcImage->map(); + + CommandBuffer cmd{swapchain.display_device}; + cmd.insertImageMemoryBarrier( + displaySrcImage->img, + 0, + VK_ACCESS_MEMORY_WRITE_BIT, + VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + cmd.end(); + Fence f{swapchain.display_device}; + cmd.submit(swapchain.display_queue, f.fence); + f.await(); +} + + + +VkResult VKAPI_CALL PrimusVK_CreateDevice( + VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDevice* pDevice) +{ + auto &my_instance_info = instance_info[GetKey(physicalDevice)]; + VkLayerDeviceCreateInfo *layerCreateInfo = (VkLayerDeviceCreateInfo *)pCreateInfo->pNext; + + // step through the chain of pNext until we get to the link info + while(layerCreateInfo && (layerCreateInfo->sType != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO || + layerCreateInfo->function != VK_LAYER_LINK_INFO)) + { + layerCreateInfo = (VkLayerDeviceCreateInfo *)layerCreateInfo->pNext; + } + + if(layerCreateInfo == NULL) + { + // No loader instance create info + return VK_ERROR_INITIALIZATION_FAILED; + } + + PFN_vkGetInstanceProcAddr gipa = layerCreateInfo->u.pLayerInfo->pfnNextGetInstanceProcAddr; + PFN_vkGetDeviceProcAddr gdpa = layerCreateInfo->u.pLayerInfo->pfnNextGetDeviceProcAddr; + // move chain on for next layer + layerCreateInfo->u.pLayerInfo = layerCreateInfo->u.pLayerInfo->pNext; + + auto display_dev = my_instance_info.display; + std::shared_ptr cod = nullptr; + { + scoped_lock l(global_lock); + cod = std::make_shared(display_dev, physicalDevice); + } + auto createDevice = my_instance_info.layerCreateDevice; + cod->finish([createDevice,&my_instance_info](VkDeviceCreateInfo &createInfo, VkDevice &dev){ + PFN_vkGetDeviceProcAddr gdpa = nullptr; + auto ret = createDevice(my_instance_info.instance, my_instance_info.display, &createInfo, nullptr, &dev, PrimusVK_GetInstanceProcAddr, &gdpa); + { + scoped_lock l(global_lock); + device_instance_info[GetKey(dev)] = &my_instance_info; + device_dispatch[GetKey(dev)] = fetchDispatchTable(gdpa, &dev); + } + return ret; + }); + PFN_vkCreateDevice createFunc = (PFN_vkCreateDevice)gipa(VK_NULL_HANDLE, "vkCreateDevice"); + VkResult ret = createFunc(physicalDevice, pCreateInfo, pAllocator, pDevice); + cod->setRenderDevice(*pDevice); + my_instance_info.cod[GetKey(*pDevice)] = cod; + + // store the table by key + { + scoped_lock l(global_lock); + device_instance_info[GetKey(*pDevice)] = &my_instance_info; + device_dispatch[GetKey(*pDevice)] = fetchDispatchTable(gdpa, pDevice); + } + TRACE("CreateDevice done"); + + return ret; + +} + +VkLayerDispatchTable fetchDispatchTable(PFN_vkGetDeviceProcAddr gdpa, VkDevice *pDevice){ + TRACE("fetching dispatch for " << GetKey(*pDevice)); + // fetch our own dispatch table for the functions we need, into the next layer + VkLayerDispatchTable dispatchTable; +#define FETCH(x) dispatchTable.x = (PFN_vk##x) gdpa(*pDevice, "vk" #x); + FETCH(GetDeviceProcAddr); + FETCH(DestroyDevice); + FETCH(BeginCommandBuffer); + FETCH(CmdDraw); + FETCH(CmdDrawIndexed); + FETCH(EndCommandBuffer); + + FETCH(CreateSwapchainKHR); + FETCH(DestroySwapchainKHR); + FETCH(GetSwapchainImagesKHR); + FETCH(AcquireNextImageKHR); + FETCH(GetSwapchainStatusKHR); + FETCH(QueuePresentKHR); + + FETCH(CreateImage); + FETCH(GetImageMemoryRequirements); + FETCH(AllocateMemory); + FETCH(BindImageMemory); + FETCH(GetImageSubresourceLayout); + FETCH(FreeMemory); + FETCH(DestroyImage); + FETCH(MapMemory); + FETCH(UnmapMemory); + + + FETCH(AllocateCommandBuffers); + FETCH(BeginCommandBuffer); + FETCH(CmdCopyImage); + FETCH(CmdPipelineBarrier); + FETCH(CreateCommandPool); + //FETCH(CreateDevice); + FETCH(EndCommandBuffer); + //FETCH(EnumeratePhysicalDevices); + FETCH(FreeCommandBuffers); + //FETCH(GetPhysicalDeviceMemoryProperties); + //FETCH(GetPhysicalDeviceQueueFamilyProperties); + FETCH(QueueSubmit); + FETCH(DeviceWaitIdle); + FETCH(QueueWaitIdle); + + FETCH(GetDeviceQueue); + + FETCH(CreateFence); + FETCH(WaitForFences); + FETCH(ResetFences); + FETCH(DestroyFence); + + FETCH(CreateSemaphore); + FETCH(DestroySemaphore); + +#undef FETCH + return dispatchTable; +} + +void VKAPI_CALL PrimusVK_DestroyDevice(VkDevice device, const VkAllocationCallbacks* pAllocator) +{ + scoped_lock l(global_lock); + auto &my_instance = *device_instance_info[GetKey(device)]; + auto &display_device = my_instance.cod[GetKey(device)]->display_gpu; + my_instance.layerDestroyDevice(display_device, nullptr, device_dispatch[GetKey(display_device)].DestroyDevice); + device_dispatch[GetKey(device)].DestroyDevice(device, pAllocator); + my_instance.cod.erase(GetKey(device)); + device_dispatch.erase(GetKey(device)); + device_dispatch.erase(GetKey(display_device)); +} + +VkResult VKAPI_CALL PrimusVK_CreateSwapchainKHR(VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchain) { + auto &my_instance = *device_instance_info[GetKey(device)]; + TRACE("Application requested " << pCreateInfo->minImageCount << " images."); + VkDevice render_gpu = device; + VkSwapchainCreateInfoKHR info2 = *pCreateInfo; + info2.minImageCount = std::max(3u, pCreateInfo->minImageCount); + info2.imageUsage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + pCreateInfo = &info2; + + VkSwapchainKHR old = pCreateInfo->oldSwapchain; + if(old != VK_NULL_HANDLE){ + PrimusSwapchain *ch = reinterpret_cast(old); + info2.oldSwapchain = ch->backend; + TRACE("Old Swapchain: " << ch->backend); + } + TRACE("Creating Swapchain for size: " << pCreateInfo->imageExtent.width << "x" << pCreateInfo->imageExtent.height); + TRACE("MinImageCount: " << pCreateInfo->minImageCount); + TRACE("fetching device for: " << GetKey(render_gpu)); + VkDevice display_gpu = my_instance.cod[GetKey(device)]->display_gpu; + + TRACE("FamilyIndexCount: " << pCreateInfo->queueFamilyIndexCount); + TRACE("Dev: " << GetKey(display_gpu)); + TRACE("Swapchainfunc: " << (void*) device_dispatch[GetKey(display_gpu)].CreateSwapchainKHR); + + VkSwapchainKHR backend; + VkResult rc = device_dispatch[GetKey(display_gpu)].CreateSwapchainKHR(display_gpu, pCreateInfo, pAllocator, &backend); + TRACE(">> Swapchain create done " << rc << ";" << (void*) backend); + if(rc != VK_SUCCESS){ + return rc; + } + + PrimusSwapchain *ch = new PrimusSwapchain(render_gpu, display_gpu, backend, pCreateInfo, info2.minImageCount, my_instance.cod[GetKey(device)]); + + *pSwapchain = reinterpret_cast(ch); + + + return rc; +} + +void VKAPI_CALL PrimusVK_DestroySwapchainKHR(VkDevice device, VkSwapchainKHR swapchain, const VkAllocationCallbacks* pAllocator) { + if(swapchain == VK_NULL_HANDLE) { return;} + PrimusSwapchain *ch = reinterpret_cast(swapchain); + TRACE(">> Destroy swapchain: " << (void*) ch->backend); + ch->stop(); + device_dispatch[GetKey(ch->display_device)].DestroySwapchainKHR(ch->display_device, ch->backend, pAllocator); + delete ch; +} +VkResult VKAPI_CALL PrimusVK_GetSwapchainImagesKHR(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pSwapchainImageCount, VkImage* pSwapchainImages) { + PrimusSwapchain *ch = reinterpret_cast(swapchain); + + *pSwapchainImageCount = ch->images.size(); + VkResult res = VK_SUCCESS; + if(pSwapchainImages != nullptr) { + res = VK_SUCCESS; + for(size_t i = 0; i < *pSwapchainImageCount; i++){ + pSwapchainImages[i] = ch->images[i].render_image->img; + } + TRACE("Count: " << *pSwapchainImageCount); + } + return res; +} + +const auto primus_start = std::chrono::steady_clock::now(); + +VkResult VKAPI_CALL PrimusVK_AcquireNextImage2KHR(VkDevice device, const VkAcquireNextImageInfoKHR* pAcquireInfo, uint32_t* pImageIndex) { + TRACE_PROFILING_EVENT(-1, "Acquire starting"); + PrimusSwapchain *ch = reinterpret_cast(pAcquireInfo->swapchain); + + VkResult res; + { + Fence myfence{ch->display_device}; + + res = device_dispatch[GetKey(ch->display_device)].AcquireNextImageKHR(ch->display_device, ch->backend, pAcquireInfo->timeout, VK_NULL_HANDLE, myfence.fence, pImageIndex); + TRACE_PROFILING_EVENT(*pImageIndex, "got image"); + + myfence.await(); + } + VkSubmitInfo qsi{}; + qsi.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + if(pAcquireInfo->semaphore != VK_NULL_HANDLE){ + qsi.signalSemaphoreCount = 1; + qsi.pSignalSemaphores = &pAcquireInfo->semaphore; + } + scoped_lock lock(*device_instance_info[GetKey(ch->render_queue)]->renderQueueMutex); + device_dispatch[GetKey(ch->render_queue)].QueueSubmit(ch->render_queue, 1, &qsi, pAcquireInfo->fence); + TRACE_PROFILING_EVENT(*pImageIndex, "Acquire done"); + + return res; +} +VkResult VKAPI_CALL PrimusVK_AcquireNextImageKHR(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, VkFence fence, uint32_t* pImageIndex) { + auto acquireInfo = VkAcquireNextImageInfoKHR{ + .sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR, + .swapchain = swapchain, + .timeout = timeout, + .semaphore = semaphore, + .fence = fence + }; + return PrimusVK_AcquireNextImage2KHR(device, &acquireInfo, pImageIndex); +} +VkResult VKAPI_CALL PrimusVK_GetSwapchainStatusKHR(VkDevice device, VkSwapchainKHR swapchain){ + PrimusSwapchain *ch = reinterpret_cast(swapchain); + return device_dispatch[GetKey(ch->display_device)].GetSwapchainStatusKHR(device, ch->backend); +} + +std::tuple PrimusSwapchain::getImageMemories(){ + VkMemoryPropertyFlags host_mem = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + VkMemoryPropertyFlags local_mem = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + ssize_t render_host_mem = -1; + ssize_t render_local_mem = -1; + ssize_t display_host_mem = -1; + for(size_t j=0; j < cod->render_mem.memoryTypeCount; j++){ + if ( render_host_mem == -1 && ( cod->render_mem.memoryTypes[j].propertyFlags & host_mem ) == host_mem ) { + render_host_mem = j; + } + if ( render_local_mem == -1 && ( cod->render_mem.memoryTypes[j].propertyFlags & local_mem ) == local_mem ) { + render_local_mem = j; + } + } + for(size_t j=0; j < cod->display_mem.memoryTypeCount; j++){ + if ( display_host_mem == -1 && ( cod->display_mem.memoryTypes[j].propertyFlags & host_mem ) == host_mem ) { + display_host_mem = j; + } + } + TRACE("Selected render mem: " << render_host_mem << ";" << render_local_mem << " display: " << display_host_mem); + + return std::make_tuple(render_local_mem, render_host_mem, display_host_mem); +} + +void ImageWorker::createCommandBuffers(){ + { + auto cpyImage = render_copy_image; + auto srcImage = render_image->img; + render_copy_command = std::make_shared(swapchain. device); + CommandBuffer &cmd = *render_copy_command; + cmd.insertImageMemoryBarrier( + cpyImage->img, + VK_ACCESS_HOST_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + cmd.insertImageMemoryBarrier( + srcImage, + VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_TRANSFER_READ_BIT, + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + + cmd.copyImage(srcImage, cpyImage->img, swapchain.imgSize); + + cmd.insertImageMemoryBarrier( + cpyImage->img, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, + VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + cmd.insertImageMemoryBarrier( + srcImage, + VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_MEMORY_READ_BIT, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + + cmd.end(); + } + + { + display_command = std::make_shared(swapchain.display_device); + CommandBuffer &cmd = *display_command; + cmd.insertImageMemoryBarrier( + display_src_image->img, + VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + cmd.insertImageMemoryBarrier( + display_image, + VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + cmd.copyImage(display_src_image->img, display_image, swapchain.imgSize); + + cmd.insertImageMemoryBarrier( + display_src_image->img, + VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_HOST_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, + VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + cmd.insertImageMemoryBarrier( + display_image, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_MEMORY_READ_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }); + cmd.end(); + } +} + +void PrimusSwapchain::storeImage(uint32_t index, VkQueue queue, std::vector wait_on, Fence ¬ify){ + images[index].render_copy_command->submit(queue, notify.fence, wait_on); +} + +void ImageWorker::copyImageData(std::vector sems){ + { + auto rendered = render_copy_image->getMapped(); + auto display = display_src_image->getMapped(); + auto rendered_layout = render_copy_image->getLayout(); + auto display_layout = display_src_image->getLayout(); + auto rendered_start = rendered->data + rendered_layout.offset; + auto display_start = display->data + display_layout.offset; + if(rendered_layout.size/rendered_layout.rowPitch != display_layout.size/display_layout.rowPitch){ + TRACE("Layouts don't match at all"); + throw std::runtime_error("Layouts don't match at all"); + } + TRACE_PROFILING_EVENT(index, "memcpy start"); + if(rendered_layout.rowPitch == display_layout.rowPitch){ + std::memcpy(display_start, rendered_start, rendered_layout.size); + }else{ + VkDeviceSize display_offset = 0; + VkDeviceSize minRowPitch = rendered_layout.rowPitch; + if(display_layout.rowPitch < minRowPitch){ + minRowPitch = display_layout.rowPitch; + } + for(VkDeviceSize offset = 0; offset < rendered_layout.size; offset += rendered_layout.rowPitch){ + std::memcpy(display_start + display_offset, rendered_start + offset, minRowPitch); + display_offset += display_layout.rowPitch; + } + } + TRACE_PROFILING_EVENT(index, "memcpy done"); + } + { + std::unique_lock lock(swapchain.queueMutex); + if(display_command_fence){ + display_command_fence->await(); + display_command_fence->reset(); + }else{ + display_command_fence = std::unique_ptr(new Fence(swapchain.display_device)); + } + display_command->submit(swapchain.display_queue, display_command_fence->fence, {}, sems); + } +} + +void PrimusSwapchain::queue(VkQueue queue, const VkPresentInfoKHR* pPresentInfo){ + std::unique_lock lock(queueMutex); + + auto workItem = QueueItem{queue, *pPresentInfo, pPresentInfo->pImageIndices[0]}; + storeImage(workItem.imgIndex, render_queue, std::vector{pPresentInfo->pWaitSemaphores, pPresentInfo->pWaitSemaphores + pPresentInfo->waitSemaphoreCount}, images[workItem.imgIndex].render_copy_fence); + + work.push_back(std::move(workItem)); + has_work.notify_all(); +} +void PrimusSwapchain::stop(){ + { + std::unique_lock lock(queueMutex); + active = false; + has_work.notify_all(); + } + for(auto &thread: threads){ + thread->join(); + thread.reset(); + } +} +void PrimusSwapchain::present(const QueueItem &workItem){ + const auto index = workItem.imgIndex; + images[index].render_copy_fence.await(); + images[index].render_copy_fence.reset(); + images[index].copyImageData({images[index].display_semaphore.sem}); + + TRACE_PROFILING_EVENT(index, "copy queued"); + + VkPresentInfoKHR p2 = {.sType=VK_STRUCTURE_TYPE_PRESENT_INFO_KHR}; + p2.pSwapchains = &backend; + p2.swapchainCount = 1; + p2.pWaitSemaphores = &images[workItem.imgIndex].display_semaphore.sem; + p2.waitSemaphoreCount = 1; + p2.pImageIndices = &index; + + { + std::unique_lock lock(queueMutex); + has_work.wait(lock, [this,&workItem](){return &workItem == &in_progress.front();}); + TRACE_PROFILING_EVENT(index, "submitting"); + VkResult res = device_dispatch[GetKey(display_device)].QueuePresentKHR(display_queue, &p2); + if(res != VK_SUCCESS) { + TRACE("ERROR, Queue Present failed: " << res << "\n"); + } + in_progress.pop_front(); + has_work.notify_all(); + } +} +void PrimusSwapchain::run(){ + while(true){ + QueueItem *workItem = nullptr; + { + std::unique_lock lock(queueMutex); + has_work.wait(lock, [this](){return !active || work.size() > 0;}); + if(!active) return; + in_progress.push_back(std::move(work.front())); + workItem = &in_progress.back(); + work.pop_front(); + } + present(*workItem); + } +} + +VkResult VKAPI_CALL PrimusVK_QueueSubmit(VkQueue queue, uint32_t submitCount, + const VkSubmitInfo* pSubmits, + VkFence fence) { + scoped_lock lock(*device_instance_info[GetKey(queue)]->renderQueueMutex); + return device_dispatch[GetKey(queue)].QueueSubmit(queue, submitCount, pSubmits, fence); +} + +VkResult VKAPI_CALL PrimusVK_QueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* pPresentInfo) { + scoped_lock lock(*device_instance_info[GetKey(queue)]->renderQueueMutex); + const auto start = std::chrono::steady_clock::now(); + if(pPresentInfo->swapchainCount != 1){ + TRACE("Warning, presenting with multiple swapchains not implemented, ignoring"); + } + + PrimusSwapchain *ch = reinterpret_cast(pPresentInfo->pSwapchains[0]); + double secs = std::chrono::duration_cast>(start - ch->lastPresent).count(); + TRACE_PROFILING_EVENT(pPresentInfo->pImageIndices[0], "QueuePresent"); + TRACE_PROFILING(" === Time between VkQueuePresents: " << secs << " -> " << 1/secs << " FPS"); + ch->lastPresent = start; + + ch->queue(queue, pPresentInfo); + + return VK_SUCCESS; +} + +void VKAPI_CALL PrimusVK_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties* pQueueFamilyProperties) { + VkPhysicalDevice phy = physicalDevice; + instance_dispatch[GetKey(phy)].GetPhysicalDeviceQueueFamilyProperties(phy, pQueueFamilyPropertyCount, pQueueFamilyProperties); +} + +void VKAPI_CALL PrimusVK_QueueWaitIdle(VkQueue queue){ + scoped_lock lock(*device_instance_info[GetKey(queue)]->renderQueueMutex); + device_dispatch[GetKey(queue)].QueueWaitIdle(queue); +} + +void VKAPI_CALL PrimusVK_DeviceWaitIdle(VkDevice device){ + auto &my_instance = *device_instance_info[GetKey(device)]; + device_dispatch[GetKey(device)].DeviceWaitIdle(device); + auto display_gpu = my_instance.cod[GetKey(device)]->display_gpu; + device_dispatch[GetKey(display_gpu)].DeviceWaitIdle(display_gpu); +} + +#include "primus_vk_forwarding_prototypes.h" + +/////////////////////////////////////////////////////////////////////////////////////////// +// Enumeration function + +VkResult VKAPI_CALL PrimusVK_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, + VkLayerProperties *pProperties) +{ + if(pPropertyCount) *pPropertyCount = 1; + + if(pProperties) + { + strcpy(pProperties->layerName, "VK_LAYER_PRIMUS_PrimusVK"); + strcpy(pProperties->description, "Primus-vk - https://github.com/felixdoerre/primus_vk"); + pProperties->implementationVersion = 1; + pProperties->specVersion = VK_API_VERSION_1_0; + } + + return VK_SUCCESS; +} + +VkResult VKAPI_CALL PrimusVK_EnumerateDeviceLayerProperties( + VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount, VkLayerProperties *pProperties) +{ + return PrimusVK_EnumerateInstanceLayerProperties(pPropertyCount, pProperties); +} + +VkResult VKAPI_CALL PrimusVK_EnumerateInstanceExtensionProperties( + const char *pLayerName, uint32_t *pPropertyCount, VkExtensionProperties *pProperties) +{ + if(pLayerName == NULL || strcmp(pLayerName, "VK_LAYER_PRIMUS_PrimusVK")) + return VK_ERROR_LAYER_NOT_PRESENT; + + // don't expose any extensions + if(pPropertyCount) *pPropertyCount = 0; + return VK_SUCCESS; +} + +VkResult VKAPI_CALL PrimusVK_EnumerateDeviceExtensionProperties( + VkPhysicalDevice physicalDevice, const char *pLayerName, + uint32_t *pPropertyCount, VkExtensionProperties *pProperties) +{ + // pass through any queries that aren't to us + if(pLayerName == NULL || strcmp(pLayerName, "VK_LAYER_PRIMUS_PrimusVK")) + { + if(physicalDevice == VK_NULL_HANDLE) + return VK_SUCCESS; + + scoped_lock l(global_lock); + return instance_dispatch[GetKey(physicalDevice)].EnumerateDeviceExtensionProperties(physicalDevice, pLayerName, pPropertyCount, pProperties); + } + + // don't expose any extensions + if(pPropertyCount) *pPropertyCount = 0; + return VK_SUCCESS; +} + +VkResult VKAPI_CALL PrimusVK_EnumeratePhysicalDevices( + VkInstance instance, + uint32_t* pPhysicalDeviceCount, + VkPhysicalDevice* pPhysicalDevices){ + const int cnt = 1; + if(pPhysicalDevices == nullptr){ + *pPhysicalDeviceCount = cnt; + return VK_SUCCESS; + } + scoped_lock l(global_lock); + InstanceInfo &info = instance_info[GetKey(instance)]; + pPhysicalDevices[0] = info.render; + *pPhysicalDeviceCount = cnt; + return VK_SUCCESS; +} +VkResult VKAPI_CALL PrimusVK_EnumeratePhysicalDeviceGroups( + VkInstance instance, + uint32_t* pPhysicalDeviceGroupCount, + VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties) { + InstanceInfo &info = instance_info[GetKey(instance)]; + *pPhysicalDeviceGroupCount = 1; + if(pPhysicalDeviceGroupProperties){ + pPhysicalDeviceGroupProperties[0].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GROUP_PROPERTIES; + pPhysicalDeviceGroupProperties[0].pNext = nullptr; + pPhysicalDeviceGroupProperties[0].physicalDeviceCount = 1; + pPhysicalDeviceGroupProperties[0].physicalDevices[0] = info.render; + pPhysicalDeviceGroupProperties[0].subsetAllocation = VK_FALSE; + } + return VK_SUCCESS; +} +VkResult VKAPI_CALL PrimusVK_EnumeratePhysicalDeviceGroupsKHR( + VkInstance instance, + uint32_t* pPhysicalDeviceGroupCount, + VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties) { + return PrimusVK_EnumeratePhysicalDeviceGroups(instance, pPhysicalDeviceGroupCount, pPhysicalDeviceGroupProperties); +} + + +/////////////////////////////////////////////////////////////////////////////////////////// +// GetProcAddr functions, entry points of the layer + +#define GETPROCADDR(func) if(!strcmp(pName, "vk" #func)) return (PFN_vkVoidFunction)&PrimusVK_##func; + +VK_LAYER_EXPORT PFN_vkVoidFunction VKAPI_CALL PrimusVK_GetDeviceProcAddr(VkDevice device, const char *pName) +{ + // device chain functions we intercept + GETPROCADDR(GetDeviceProcAddr); + GETPROCADDR(EnumerateDeviceLayerProperties); + GETPROCADDR(EnumerateDeviceExtensionProperties); + GETPROCADDR(CreateDevice); + GETPROCADDR(DestroyDevice); + + GETPROCADDR(CreateSwapchainKHR); + GETPROCADDR(DestroySwapchainKHR); + GETPROCADDR(GetSwapchainImagesKHR); + GETPROCADDR(AcquireNextImageKHR); + GETPROCADDR(AcquireNextImage2KHR); + GETPROCADDR(GetSwapchainStatusKHR); + GETPROCADDR(QueuePresentKHR); + + GETPROCADDR(QueueSubmit); + GETPROCADDR(DeviceWaitIdle); + GETPROCADDR(QueueWaitIdle); +#define FORWARD(func) GETPROCADDR(func) +#include "primus_vk_forwarding.h" +#undef FORWARD + { + scoped_lock l(global_lock); + return device_dispatch[GetKey(device)].GetDeviceProcAddr(device, pName); + } +} + +VK_LAYER_EXPORT PFN_vkVoidFunction VKAPI_CALL PrimusVK_GetInstanceProcAddr(VkInstance instance, const char *pName) +{ + // instance chain functions we intercept + GETPROCADDR(GetInstanceProcAddr); + GETPROCADDR(EnumeratePhysicalDevices); + GETPROCADDR(EnumeratePhysicalDeviceGroups); + GETPROCADDR(EnumeratePhysicalDeviceGroupsKHR); + GETPROCADDR(EnumerateInstanceLayerProperties); + GETPROCADDR(EnumerateInstanceExtensionProperties); + GETPROCADDR(CreateInstance); + GETPROCADDR(DestroyInstance); + + // device chain functions we intercept + GETPROCADDR(GetDeviceProcAddr); + GETPROCADDR(EnumerateDeviceLayerProperties); + GETPROCADDR(EnumerateDeviceExtensionProperties); + GETPROCADDR(CreateDevice); + GETPROCADDR(DestroyDevice); + + GETPROCADDR(CreateSwapchainKHR); + GETPROCADDR(DestroySwapchainKHR); + GETPROCADDR(GetSwapchainImagesKHR); + GETPROCADDR(AcquireNextImageKHR); + GETPROCADDR(AcquireNextImage2KHR); + GETPROCADDR(GetSwapchainStatusKHR); + GETPROCADDR(QueuePresentKHR); + + GETPROCADDR(QueueSubmit); + GETPROCADDR(DeviceWaitIdle); + GETPROCADDR(QueueWaitIdle); + GETPROCADDR(GetPhysicalDeviceQueueFamilyProperties); + +#define FORWARD(func) GETPROCADDR(func) +#include "primus_vk_forwarding.h" +#undef FORWARD + { + scoped_lock l(global_lock); + return instance_dispatch[GetKey(instance)].GetInstanceProcAddr(instance, pName); + } +} diff --git a/primus_vk.json b/primus_vk.json new file mode 100644 index 0000000..7b0771c --- /dev/null +++ b/primus_vk.json @@ -0,0 +1,21 @@ +{ + "file_format_version" : "1.0.0", + "layer" : { + "name": "VK_LAYER_PRIMUS_PrimusVK", + "type": "GLOBAL", + "library_path": "libprimus_vk.so", + "api_version": "1.1.0", + "implementation_version": "1", + "description": "Primus-vk - https://github.com/felixdoerre/primus_vk", + "functions": { + "vkGetInstanceProcAddr": "PrimusVK_GetInstanceProcAddr", + "vkGetDeviceProcAddr": "PrimusVK_GetDeviceProcAddr" + }, + "enable_environment": { + "ENABLE_PRIMUS_LAYER": "1" + }, + "disable_environment": { + "DISABLE_PRIMUS_LAYER": "1" + } + } +} diff --git a/primus_vk_diag.cpp b/primus_vk_diag.cpp new file mode 100644 index 0000000..dd21154 --- /dev/null +++ b/primus_vk_diag.cpp @@ -0,0 +1,232 @@ +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include + +#define GLX_CONTEXT_MAJOR_VERSION_ARB 0x2091 +#define GLX_CONTEXT_MINOR_VERSION_ARB 0x2092 +typedef GLXContext (*glXCreateContextAttribsARBProc)(Display*, GLXFBConfig, GLXContext, Bool, const int*); + +class VulkanContext { + VkInstance instance; +public: + VulkanContext(); + VulkanContext(const VulkanContext&) = delete; + ~VulkanContext(); +}; +#define VK_CHECK() if(reply != VK_SUCCESS){ \ + throw std::runtime_error("Vulkan operation failed with code: " + std::to_string(reply)); \ + } + + +#define GL_FUNCTIONS GL_FUNCTION(glClearColor);\ + GL_FUNCTION(glClear);\ + GL_FUNCTION(glXChooseFBConfig);\ + GL_FUNCTION(glXCreateContext);\ + GL_FUNCTION(glXDestroyContext);\ + GL_FUNCTION(glXGetProcAddress);\ + GL_FUNCTION(glXGetVisualFromFBConfig);\ + GL_FUNCTION(glXMakeCurrent);\ + GL_FUNCTION(glXQueryExtensionsString);\ + GL_FUNCTION(glXSwapBuffers);\ + + +struct GLLib { +#define GL_FUNCTION(x) decltype(&x) ptr_##x; + GL_FUNCTIONS +#undef GL_FUNCTION + GLLib() { + void* handle = dlopen("libGL.so.1", RTLD_NOW|RTLD_GLOBAL); +#define GL_FUNCTION(x) ptr_##x = (decltype(&x)) dlsym(handle, #x) + GL_FUNCTIONS +#undef GL_FUNCTION + } +}; + +const auto self = std::string{"PrimusVK-diagnostic: "}; + +VulkanContext::VulkanContext(){ + std::cout << self << "Creating Vulkan instance" << std::endl; + VkInstanceCreateInfo instanceCreateInfo = {}; + instanceCreateInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + instanceCreateInfo.pNext = NULL; + instanceCreateInfo.pApplicationInfo = nullptr; + auto reply = vkCreateInstance(&instanceCreateInfo, nullptr, &instance); + VK_CHECK(); + + uint32_t gpuCount; + reply = vkEnumeratePhysicalDevices(instance, &gpuCount, nullptr); + VK_CHECK(); + // Enumerate devices + std::vector physicalDevices(gpuCount); + vkEnumeratePhysicalDevices(instance, &gpuCount, physicalDevices.data()); + VK_CHECK(); + for ( auto &device : physicalDevices) { + VkPhysicalDeviceProperties deviceProperties; + vkGetPhysicalDeviceProperties(device, &deviceProperties); + std::cout << self << "Device: " << deviceProperties.deviceName << std::endl; + std::cout << self << " Type: " << deviceProperties.deviceType << std::endl; + std::cout << self << " API: " << (deviceProperties.apiVersion >> 22) << "." << ((deviceProperties.apiVersion >> 12) & 0x3ff) << "." << (deviceProperties.apiVersion & 0xfff) << std::endl; + uint32_t queues; + vkGetPhysicalDeviceQueueFamilyProperties(device, &queues, nullptr); + std::vector data(queues); + vkGetPhysicalDeviceQueueFamilyProperties(device, &queues, data.data()); + std::cout << self << " Queues: " << queues << std::endl; + + VkDeviceQueueCreateInfo queue1{}; + queue1.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queue1.queueFamilyIndex = 0; + queue1.queueCount = 1; + float prio = 1; + queue1.pQueuePriorities = &prio; + VkDeviceCreateInfo createInfo{}; + createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + createInfo.pQueueCreateInfos = &queue1; + createInfo.queueCreateInfoCount = 1; + VkDevice dev; + VkResult res = vkCreateDevice(device, &createInfo, nullptr, &dev); + if(res == VK_SUCCESS) { + std::cout << "Device creation succeeded\n"; + } else { + std::cout << "Device creation failed: " << res << "\n"; + } + vkDestroyDevice(dev, nullptr); + } +} +VulkanContext::~VulkanContext(){ + std::cout << self << "Destroying Vulkan: " << instance << std::endl; + vkDestroyInstance(instance, nullptr); +} + +class XWindowContext; +class GLContext { + GLXContext ctx; + XWindowContext &data; + std::shared_ptr gl; +public: + GLContext(XWindowContext &data); + void drawSample(); + ~GLContext(); +}; + +class XWindowContext { +public: + Display *display; + XVisualInfo *vi; + + Window win; + GLXFBConfig fbconfig; + std::shared_ptr gl = std::make_shared(); + + XWindowContext(Display *display): display(display){ + const char *extensions = gl->ptr_glXQueryExtensionsString(display, DefaultScreen(display)); + std::cout << self << extensions << std::endl; + + static int visual_attribs[] = + { + GLX_RENDER_TYPE, GLX_RGBA_BIT, + GLX_DRAWABLE_TYPE, GLX_WINDOW_BIT, + GLX_DOUBLEBUFFER, true, + GLX_RED_SIZE, 1, + GLX_GREEN_SIZE, 1, + GLX_BLUE_SIZE, 1, + None + }; + + std::cout << self << "Getting framebuffer config" << std::endl; + int fbcount; + GLXFBConfig *fbc = gl->ptr_glXChooseFBConfig(display, DefaultScreen(display), visual_attribs, &fbcount); + if (!fbc) { + throw std::runtime_error("Failed to retrieve a framebuffer config"); + } + fbconfig = fbc[0]; + + vi = gl->ptr_glXGetVisualFromFBConfig(display, fbconfig); + + XSetWindowAttributes swa; + swa.colormap = XCreateColormap(display, RootWindow(display, vi->screen), vi->visual, AllocNone); + swa.border_pixel = 0; + swa.event_mask = StructureNotifyMask; + + std::cout << self << "Creating window" << std::endl; + win = XCreateWindow(display, RootWindow(display, vi->screen), 0, 0, 100, 100, 0, vi->depth, InputOutput, vi->visual, CWBorderPixel|CWColormap|CWEventMask, &swa); + if (!win) { + throw std::runtime_error("Failed to create window."); + } + XMapWindow(display, win); + } +}; + +GLContext::GLContext(XWindowContext &data): data(data), gl(data.gl){ + // Create an oldstyle context first, to get the correct function pointer for glXCreateContextAttribsARB + GLXContext ctx_old = gl->ptr_glXCreateContext(data.display, data.vi, 0, GL_TRUE); + const auto glXCreateContextAttribsARB = (glXCreateContextAttribsARBProc)gl->ptr_glXGetProcAddress((const GLubyte*)"glXCreateContextAttribsARB"); + gl->ptr_glXMakeCurrent(data.display, 0, 0); + gl->ptr_glXDestroyContext(data.display, ctx_old); + + if (glXCreateContextAttribsARB == NULL) { + throw std::runtime_error("glXCreateContextAttribsARB entry point not found. Aborting."); + } + + static int context_attribs[] = + { + GLX_CONTEXT_MAJOR_VERSION_ARB, 3, + GLX_CONTEXT_MINOR_VERSION_ARB, 0, + None + }; + + std::cout << self << "Creating context" << std::endl; + ctx = glXCreateContextAttribsARB(data.display, data.fbconfig, NULL, true, context_attribs); + if (!ctx) { + throw std::runtime_error("Failed to create GL3 context."); + } + + gl->ptr_glXMakeCurrent(data.display, data.win, ctx); +} +void GLContext::drawSample(){ + std::cout << self << "Rendering with GL" << std::endl; + gl->ptr_glClearColor (0, 0.5, 1, 1); + gl->ptr_glClear (GL_COLOR_BUFFER_BIT); + gl->ptr_glXSwapBuffers (data.display, data.win); + + auto toSleep = timespec{}; + toSleep.tv_nsec=200000000; + nanosleep(&toSleep, nullptr); + + gl->ptr_glClearColor (1, 0.5, 0, 1); + gl->ptr_glClear (GL_COLOR_BUFFER_BIT); + gl->ptr_glXSwapBuffers (data.display, data.win); + + nanosleep(&toSleep, nullptr); +} +GLContext::~GLContext(){ + std::cout << self << "Destroying GL context" << std::endl; + gl->ptr_glXMakeCurrent(data.display, 0, 0); + gl->ptr_glXDestroyContext(data.display, ctx); +} + +int main (int argc, char ** argv) { + Display *display = XOpenDisplay(0); + for(int i = 1; i < argc; i++){ + std::string arg = argv[i]; + if(arg == "gl"){ + std::cout << self << "Loading GL." << std::endl; + auto winContext = std::make_shared(display); + GLContext context = GLContext{*winContext}; + context.drawSample(); + } else if(arg == "vulkan") { + VulkanContext context; + } + } + return 0; +} diff --git a/primus_vk_diag.sh b/primus_vk_diag.sh new file mode 100755 index 0000000..79d0e5b --- /dev/null +++ b/primus_vk_diag.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +export VK_LOADER_DEBUG=warn,error,info +export LD_DEBUG=libs +export ENABLE_PRIMUS_LAYER=0 + +function step_0 { + printf "===== Round 0: Vulkaninfo =====\n" + if which vulkaninfo >> /dev/null; then + printf "==== Without Optirun ====\n" + vulkaninfo 2>&1 + printf "==== With Optirun ====\n" + optirun vulkaninfo 2>&1 + else + printf "ERROR: Vulkaninfo is missing. Please install for more diagnostic data\n" + fi +} +function step_1 { + printf "===== Round 1: Plain Vulkan =====\n" + ./primus_vk_diag vulkan 2>&1 +} +function step_2 { + printf "===== Round 2: Vulkan with optirun =====\n" + optirun ./primus_vk_diag vulkan 2>&1 +} +function step_3 { + printf "===== Round 3: Vulkan with optirun and Primus layer =====\n" + ENABLE_PRIMUS_LAYER=1 optirun ./primus_vk_diag vulkan 2>&1 +} +function step_4 { + printf "===== Round 4: Mixed Vulkan and OpenGL with Primus layer =====\n" + ENABLE_PRIMUS_LAYER=1 optirun ./primus_vk_diag vulkan gl vulkan 2>&1 +} +function step_5 { + printf "===== Round 5: Mixed Vulkan and OpenGL with Primus layer while forcing primus-libGLa =====\n" + ENABLE_PRIMUS_LAYER=1 optirun env PRIMUS_libGLa=/usr/lib/x86_64-linux-gnu/nvidia/current/libGLX_nvidia.so.0 ./primus_vk_diag vulkan gl vulkan 2>&1 +} + +if [[ $# == 0 ]]; then + step_0 + step_1 + step_2 + step_3 + step_4 + step_5 +else + for arg in "$@"; do + if [[ $arg == [0-5] ]]; then + step_$arg + else + printf "Invalid argument\n" >&2 + fi + done +fi diff --git a/primus_vk_forwarding.h b/primus_vk_forwarding.h new file mode 100644 index 0000000..ba066e2 --- /dev/null +++ b/primus_vk_forwarding.h @@ -0,0 +1,17 @@ + + FORWARD(GetPhysicalDeviceSurfaceSupportKHR); + + FORWARD(GetPhysicalDeviceSurfaceCapabilitiesKHR); + + FORWARD(GetPhysicalDeviceSurfaceFormatsKHR); + + FORWARD(GetPhysicalDeviceSurfacePresentModesKHR); + + FORWARD(GetPhysicalDeviceSurfaceCapabilities2EXT); + + FORWARD(GetPhysicalDevicePresentRectanglesKHR); + + FORWARD(GetPhysicalDeviceSurfaceCapabilities2KHR); + + FORWARD(GetPhysicalDeviceSurfaceFormats2KHR); + diff --git a/primus_vk_forwarding_prototypes.h b/primus_vk_forwarding_prototypes.h new file mode 100644 index 0000000..dc3cec1 --- /dev/null +++ b/primus_vk_forwarding_prototypes.h @@ -0,0 +1,61 @@ +VkResult VKAPI_CALL PrimusVK_GetPhysicalDeviceSurfaceSupportKHR( + VkPhysicalDevice physicalDevice, + uint32_t queueFamilyIndex, + VkSurfaceKHR surface, + VkBool32* pSupported) { + VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display; + return instance_dispatch[GetKey(phy)].GetPhysicalDeviceSurfaceSupportKHR(phy, queueFamilyIndex, surface, pSupported); +} + VkResult VKAPI_CALL PrimusVK_GetPhysicalDeviceSurfaceCapabilitiesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) { + VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display; + return instance_dispatch[GetKey(phy)].GetPhysicalDeviceSurfaceCapabilitiesKHR(phy, surface, pSurfaceCapabilities); +} + VkResult VKAPI_CALL PrimusVK_GetPhysicalDeviceSurfaceFormatsKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormatKHR* pSurfaceFormats) { + VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display; + return instance_dispatch[GetKey(phy)].GetPhysicalDeviceSurfaceFormatsKHR(phy, surface, pSurfaceFormatCount, pSurfaceFormats); +} + VkResult VKAPI_CALL PrimusVK_GetPhysicalDeviceSurfacePresentModesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t* pPresentModeCount, + VkPresentModeKHR* pPresentModes) { + VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display; + return instance_dispatch[GetKey(phy)].GetPhysicalDeviceSurfacePresentModesKHR(phy, surface, pPresentModeCount, pPresentModes); +} + VkResult VKAPI_CALL PrimusVK_GetPhysicalDeviceSurfaceCapabilities2EXT( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + VkSurfaceCapabilities2EXT* pSurfaceCapabilities) { + VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display; + return instance_dispatch[GetKey(phy)].GetPhysicalDeviceSurfaceCapabilities2EXT(phy, surface, pSurfaceCapabilities); +} + VkResult VKAPI_CALL PrimusVK_GetPhysicalDevicePresentRectanglesKHR( + VkPhysicalDevice physicalDevice, + VkSurfaceKHR surface, + uint32_t* pRectCount, + VkRect2D* pRects) { + VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display; + return instance_dispatch[GetKey(phy)].GetPhysicalDevicePresentRectanglesKHR(phy, surface, pRectCount, pRects); +} + VkResult VKAPI_CALL PrimusVK_GetPhysicalDeviceSurfaceCapabilities2KHR( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo, + VkSurfaceCapabilities2KHR* pSurfaceCapabilities) { + VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display; + return instance_dispatch[GetKey(phy)].GetPhysicalDeviceSurfaceCapabilities2KHR(phy, pSurfaceInfo, pSurfaceCapabilities); +} + VkResult VKAPI_CALL PrimusVK_GetPhysicalDeviceSurfaceFormats2KHR( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo, + uint32_t* pSurfaceFormatCount, + VkSurfaceFormat2KHR* pSurfaceFormats) { + VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display; + return instance_dispatch[GetKey(phy)].GetPhysicalDeviceSurfaceFormats2KHR(phy, pSurfaceInfo, pSurfaceFormatCount, pSurfaceFormats); +} diff --git a/pvkrun.in.sh b/pvkrun.in.sh new file mode 100644 index 0000000..4767033 --- /dev/null +++ b/pvkrun.in.sh @@ -0,0 +1,2 @@ +#!/bin/sh +ENABLE_PRIMUS_LAYER=1 exec primusrun "$@" diff --git a/surface_forwarding_functions.xslt b/surface_forwarding_functions.xslt new file mode 100644 index 0000000..f9a4c8b --- /dev/null +++ b/surface_forwarding_functions.xslt @@ -0,0 +1,14 @@ + + + + + + + + + FORWARD(); + + + + diff --git a/surface_forwarding_prototypes.xslt b/surface_forwarding_prototypes.xslt new file mode 100644 index 0000000..53aa0bf --- /dev/null +++ b/surface_forwarding_prototypes.xslt @@ -0,0 +1,22 @@ + + + + + + + + + VKAPI_CALL PrimusVK_( + + + , + +) { + VkPhysicalDevice phy = instance_info[GetKey()].display; + return instance_dispatch[GetKey(phy)].(phy, ); +} + + + + -- 2.30.2