Import primus-vk_1.4.orig.tar.gz
authorLuca Boccassi <bluca@debian.org>
Mon, 10 Feb 2020 18:26:45 +0000 (18:26 +0000)
committerLuca Boccassi <bluca@debian.org>
Mon, 10 Feb 2020 18:26:45 +0000 (18:26 +0000)
[dgit import orig primus-vk_1.4.orig.tar.gz]

15 files changed:
.gitignore [new file with mode: 0644]
LICENSE [new file with mode: 0644]
Makefile [new file with mode: 0644]
README.md [new file with mode: 0644]
nv_vulkan_wrapper.cpp [new file with mode: 0644]
nv_vulkan_wrapper.json [new file with mode: 0644]
primus_vk.cpp [new file with mode: 0644]
primus_vk.json [new file with mode: 0644]
primus_vk_diag.cpp [new file with mode: 0644]
primus_vk_diag.sh [new file with mode: 0755]
primus_vk_forwarding.h [new file with mode: 0644]
primus_vk_forwarding_prototypes.h [new file with mode: 0644]
pvkrun.in.sh [new file with mode: 0644]
surface_forwarding_functions.xslt [new file with mode: 0644]
surface_forwarding_prototypes.xslt [new file with mode: 0644]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..e74dad1
--- /dev/null
@@ -0,0 +1,7 @@
+\#*\#
+*~
+
+/libprimus_vk.so
+/libnv_vulkan_wrapper.so
+/primus_vk_diag
+/primus_vk_diag.o
diff --git a/LICENSE b/LICENSE
new file mode 100644 (file)
index 0000000..4071f77
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,25 @@
+BSD 2-Clause License
+
+Copyright (c) 2018, Felix Doerre
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..86ab89e
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,45 @@
+DESTDIR      ?=
+PREFIX        = $(DESTDIR)/usr/local
+INSTALL       = /usr/bin/install
+override INSTALL += -D
+MSGFMT        = /usr/bin/msgfmt
+SED           = /bin/sed
+LN            = /bin/ln
+bindir        = $(PREFIX)/bin
+libdir        = $(PREFIX)/lib
+sysconfdir    = $(PREFIX)/etc
+datarootdir   = ${PREFIX}/share
+datadir       = ${datarootdir}
+
+override CXXFLAGS += --std=gnu++11 -g3
+
+all: libprimus_vk.so libnv_vulkan_wrapper.so
+
+libprimus_vk.so: primus_vk.cpp
+       $(CXX) $(CPPFLAGS) $(CXXFLAGS) -I/usr/include/vulkan -shared -fPIC $^ -o $@ -Wl,-soname,libprimus_vk.so.1 -ldl -lpthread $(LDFLAGS)
+
+libnv_vulkan_wrapper.so: nv_vulkan_wrapper.cpp
+       $(CXX) $(CPPFLAGS) $(CXXFLAGS) -I/usr/include/vulkan -shared -fPIC $^ -o $@ -Wl,-soname,libnv_vulkan_wrapper.so.1 -ldl $(LDFLAGS)
+
+primus_vk_forwarding.h:
+       xsltproc surface_forwarding_functions.xslt /usr/share/vulkan/registry/vk.xml | tail -n +2 > $@
+
+primus_vk_forwarding_prototypes.h:
+       xsltproc surface_forwarding_prototypes.xslt /usr/share/vulkan/registry/vk.xml | tail -n +2 > $@
+
+primus_vk.cpp: primus_vk_forwarding.h primus_vk_forwarding_prototypes.h
+
+primus_vk_diag: primus_vk_diag.o
+       $(CXX) -g3 -o $@ $^ -lX11 -lvulkan -ldl $(LDFLAGS)
+
+clean:
+       rm -f libnv_vulkan_wrapper.so libprimus_vk.so
+
+install: all
+       $(INSTALL) "libnv_vulkan_wrapper.so" "$(libdir)/libnv_vulkan_wrapper.so.1"
+       $(LN) -s "libnv_vulkan_wrapper.so.1" "$(libdir)/libnv_vulkan_wrapper.so"
+       $(INSTALL) "libprimus_vk.so"  "$(libdir)/libprimus_vk.so.1"
+       $(LN) -s "libprimus_vk.so.1" "$(libdir)/libprimus_vk.so"
+       $(INSTALL) -m644 "primus_vk.json" -t "$(datadir)/vulkan/implicit_layer.d/"
+       $(INSTALL) -m644 "nv_vulkan_wrapper.json" -t "$(datadir)/vulkan/icd.d/"
+       $(INSTALL) -m755 "pvkrun.in.sh" "$(bindir)/pvkrun"
diff --git a/README.md b/README.md
new file mode 100644 (file)
index 0000000..9ededa9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,84 @@
+# Primus-vk
+
+This Vulkan layer can be used to do GPU offloading. Typically you want to display an image rendered on a more powerful GPU on a display managed by an internal GPU.
+
+It is basically the same as Primus for OpenGL (https://github.com/amonakov/primus). However it does not wrap the Vulkan API from the application but is directly integrated into Vulkan as a layer (which seems to be the intendend way to implement such logic).
+
+## Usage
+
+First you need to install `primus_vk`:
+ * On Archlinux there are official packages ([for 64-bit games](https://www.archlinux.org/packages/community/x86_64/primus_vk/), [for 32-bit games](https://www.archlinux.org/packages/multilib/x86_64/lib32-primus_vk/)).
+ * On Debian (from bullseye on) you should use `primus-vk-nvidia` (which recommends `primus-vk-nvidia-i386` for 32-bit games), which already is preconfigured for the Nvidia dedicated + Intel integrated graphics setup. When you have a different setup, you should install just `primus-vk` (which installs only the bare `primus_vk`-library and neither graphics drivers not the `pvkrun`-runner), select the Vulkan drivers you need yourself and then invoke `primus_vk` manually.
+ * For other distributions you will likely need to [manually install](#installation) `primus_vk`.
+
+To run an application with `primus_vk` prefix the command with `pvkrun` (which in the easiest case is just `ENABLE_PRIMUS_LAYER=1 optirun`). So instead of running `path/to/application`, invoke `pvkrun path/to/application` instead. You should be able to use `pvkrun` for all applications, independently of them using Vulkan, OpenGL or both.
+
+By default `primus_vk` chooses a graphics card marked as `dedicated` and one not marked as `dedicated`. If that does not fit on your scenario, you need to specify the devices used for rendering and displaying manually. You can use `PRIMUS_VK_DISPLAYID` and `PRIMUS_VK_RENDERID` and give them the `deviceID`s from `optirun env DISPLAY=:8 vulkaninfo`. That way you can force `primus_vk` to work in a variety of different scenarios (e.g. having two dedicated graphics cards and rendering on one, while displaying on the other).
+
+
+## Idea
+
+Just as the OpenGL-Primus: Let the application talk to the primary display and transparently map API calls so that the application thinks, it renders using the primary display, however the `VkDevice` (and `VkImage`s) comes from the rendering GPU.
+When the application wants to swap frames, copy the image over to the integrated GPU and display it there.
+
+## Why do we need to copy the Image so often?
+As far as I can tell `VkImage` (and `VkMemory`) objects may not be shared beween different physical devices. So there is not really another way than using `memcpy` on the images when memmapped into main memory.
+
+Additinonally, only images with `VK_IMAGE_TILING_OPTIMAL` can be rendered to and presentend and only images with `VK_IMAGE_TILING_LINEAR` can be mapped to main memory to be copied. So I see no better way than copying the image 3 times from render target to display. On my machine the `memcpy` from an external device was pretty clearly the bottleneck. So it is not really the copying of the image, but the transfer from rendering GPU into main memory.
+
+An idea might be to use `VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT` to map one device's memory and use that directly on the other device (or import host-allocated memory on both devices). However that is not implemented yet.
+
+## Dependencies
+This layer requires two working vulkan drivers. The only hardware that I have experience with are Intel Integrated Graphics + Nvidia. However it should theoretically work with any other graphics setup of two vulkan-compatible graphics devices. For the Nvidia graphics card, both the "nonglvd" and the "glvnd" proprietary driver seem to work, however the "nonglvnd"-driver seems to be broken around `430.64` and is removed in newer versions.
+
+To use this layer you will require something similar to bumblebee to poweron/off the dedicated graphics card.
+
+Due to a bug/missing feature in the Vulkan Loader you will need `Vulkan/libvulkan >= 1.1.108`. If you have an older system you can try primus_vk version 1.1 which contains an ugly workaround for that issue and is therefore compatible with older Vulkan versions.
+
+
+## Development Status
+
+This layer works for all the applications I tested it with, but uses a fair share of CPU resorces for copying.
+
+## Technical Limitations
+
+1. The NVIDIA driver always connect to the "default" X-Display to verify that it has the NV-GLX extensions availible. Otherwise the NVIDIA-vulkan-icd driver disables itself. For testing an intermediate solution is to modify the demo application to always use ":0" and set DISPLAY to ":8" to make the NV-Driver happy. However this approach does work on general applications that cannot be modified. So this issue has to be solved in the graphics driver.
+
+2. Currently under Debian unstable the nvidia-icd is registered with a non-absolute library path in `/usr/share/vulkan/icd.d/nvidia_icd.json`. Replace `libGL.so.1` with `/usr/lib/x86_64-linux-gnu/nvidia/libGL.so.1` there to always load the intended Vulkan driver.
+
+3. When running an applications with DXVK and wine, wine loads both Vulkan and OpenGL. This creates a problem as:
+       1. Wine loads Vulkan, which loades the integrated GPU's ICD, the Nvidia ICD (contained in Nvidia's libGL.so on my system), Primus-VK and potentially more.
+       2. Wine loads OpenGL, which should be satisfied by OpenGL-Primus. However for whatever reason wine directly gets Nvidia's libGL which fails to provide an OpenGL context for the primary X screen.
+       This needs to be prevented by forcing wine to load Primus' libGL.
+
+Issues 1.,2. and 3. can be worked around by compiling `libnv_vulkan_wrapper.so` and registering it instead of nvidia's `libGL.so.1` in `/usr/share/vulkan/icd.d/nvidia_icd.json`.
+
+## Installation
+### Locally
+Create the folder `~/.local/share/vulkan/implicit_layer.d` and copy `primus_vk.json` there with the path adjusted to the location of the shared object.
+
+### System-wide
+Copy `primus_vk.json` to `/usr/share/vulkan/implicit_layer.d` and adjust the path.
+
+## Howto
+1. Install the correct vulkan icds (i.e. intel/mesa, nvidia, amd, depending on your hardware).
+2. Use `make libprimus_vk.so libnv_vulkan_wrapper.so` to compile Primus-vk and `libnv_vulkan_wrapper.so` (check that the path to the nvidia-driver in `nv_vulkan_wrapper.so` is correct).
+3. Ensure that the (unwrapped) nvidia driver is not registered (e.g. in `/usr/share/vulkan/icd.d/nvidia_icd.json`) and create a similar file `nv_vulkan_wrapper.json` where the path to the driver points to the compiled `libnv_vulkan_wrapper.so`.
+4. (Optional) Run `optirun primus_vk_diag`. It has to display entries for both graphics cards, otherwise the driver setup is broken. You can also test with `optirun vulkaninfo` that your Vulkan drivers are at least detecting your graphics cards.
+5. Install `primus_vk.json` and adjust path.
+6. Run `ENABLE_PRIMUS_LAYER=1 optirun vulkan-smoketest`.
+
+### Arch Linux
+
+Notes for running on Arch Linux:
+
+* nv_vulkan_wrapper.cpp: Change nvDiver path to `/usr/lib/libGLX_nvidia.so.0`
+* primus_vk.cpp: add: `#include "vk_layer_utils.h"` (on Debian the contents are included in some other header and there is no "vk_layer_utils.h")
+
+### RPM package
+
+Leonid Maksymchuk built RPM packaging scripts for primus-vk which can be found in his [repository](https://github.com/leonmaxx/primus-vk-rpm).
+
+## Credits
+
+This layer is based on the sample layer available under https://github.com/baldurk/sample_layer. The guide that goes along with it is [https://renderdoc.org/vulkan-layer-guide.html](https://renderdoc.org/vulkan-layer-guide.html).
diff --git a/nv_vulkan_wrapper.cpp b/nv_vulkan_wrapper.cpp
new file mode 100644 (file)
index 0000000..b186aac
--- /dev/null
@@ -0,0 +1,88 @@
+#include <vulkan.h>
+#include <dlfcn.h>
+
+#include <string>
+#include <iostream>
+
+extern "C" VKAPI_ATTR VkResult VKAPI_CALL vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion);
+
+
+#ifndef NV_DRIVER_PATH
+#define NV_DRIVER_PATH "/usr/lib/x86_64-linux-gnu/nvidia/current/libGL.so.1"
+#endif
+
+class StaticInitialize {
+  void *nvDriver;
+  void *glLibGL;
+public:
+  VKAPI_ATTR PFN_vkVoidFunction (*instanceProcAddr) (VkInstance instance,
+                                               const char* pName);
+  VKAPI_ATTR PFN_vkVoidFunction (*phyProcAddr) (VkInstance instance,
+                                               const char* pName);
+  VKAPI_ATTR VkResult VKAPI_CALL (*negotiateVersion)(uint32_t* pSupportedVersion);
+public:
+  StaticInitialize(){
+    // Load libGL from LD_LIBRARY_PATH before loading the NV-driver (unluckily also named libGL
+    // This ensures that ld.so will find this libGL before the Nvidia one, when
+    // again asked to load libGL.
+    glLibGL = dlopen("libGL.so.1", RTLD_GLOBAL | RTLD_NOW);
+
+    std::string drivers(NV_DRIVER_PATH);
+    while(!nvDriver && drivers.size() > 0){
+      auto end = drivers.find(':');
+      if(end == std::string::npos) {
+       nvDriver = dlopen(drivers.c_str(), RTLD_LOCAL | RTLD_LAZY);
+       drivers = "";
+      } else {
+       std::string this_driver = drivers.substr(0, end);
+       nvDriver = dlopen(this_driver.c_str(), RTLD_LOCAL | RTLD_LAZY);
+       drivers = drivers.substr(end+1);
+      }
+    }
+    if(!nvDriver) {
+      std::cerr << "PrimusVK: ERROR! Nvidia driver could not be loaded from '" NV_DRIVER_PATH "'.\n";
+      return;
+    }
+    typedef void* (*dlsym_fn)(void *, const char*);
+    static dlsym_fn real_dlsym = (dlsym_fn) dlsym(dlopen("libdl.so.2", RTLD_LAZY), "dlsym");
+    instanceProcAddr = (decltype(instanceProcAddr)) real_dlsym(nvDriver, "vk_icdGetInstanceProcAddr");
+    phyProcAddr = (decltype(phyProcAddr)) real_dlsym(nvDriver, "vk_icdGetPhysicalDeviceProcAddr");
+    negotiateVersion = (decltype(negotiateVersion)) real_dlsym(nvDriver, "vk_icdNegotiateLoaderICDInterfaceVersion");
+  }
+  ~StaticInitialize(){
+    if(nvDriver)
+      dlclose(nvDriver);
+    dlclose(glLibGL);
+  }
+  bool IsInited(){
+    return nvDriver != nullptr;
+  }
+};
+
+StaticInitialize init;
+
+extern "C" VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
+                                               VkInstance instance,
+                                               const char* pName){
+  if (!init.IsInited()) return nullptr;
+  auto res = init.instanceProcAddr(instance, pName);
+  return res;
+}
+
+extern "C" VKAPI_ATTR PFN_vkVoidFunction vk_icdGetPhysicalDeviceProcAddr(VkInstance instance,
+                                                   const char* pName){
+  if (!init.IsInited()) return nullptr;
+  auto res = init.phyProcAddr(instance, pName);
+  return res;
+}
+extern "C" VKAPI_ATTR VkResult VKAPI_CALL vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion){
+  if (!init.IsInited()) {
+    return VK_ERROR_INCOMPATIBLE_DRIVER;
+  }
+  char *prev = getenv("DISPLAY");
+  std::string old{prev};
+  setenv("DISPLAY", ":8", 1);
+  auto res = init.negotiateVersion(pSupportedVersion);
+  setenv("DISPLAY",old.c_str(), 1);
+  return res;
+}
diff --git a/nv_vulkan_wrapper.json b/nv_vulkan_wrapper.json
new file mode 100644 (file)
index 0000000..8bc6561
--- /dev/null
@@ -0,0 +1,7 @@
+{
+    "file_format_version" : "1.0.0",
+    "ICD": {
+        "library_path": "libnv_vulkan_wrapper.so",
+        "api_version" : "1.1.84"
+    }
+}
diff --git a/primus_vk.cpp b/primus_vk.cpp
new file mode 100644 (file)
index 0000000..c492c3e
--- /dev/null
@@ -0,0 +1,1362 @@
+#define VK_USE_PLATFORM_XCB_KHR
+#define VK_USE_PLATFORM_XLIB_KHR
+#define VK_USE_PLATFORM_WAYLAND_KHR
+#include "vulkan.h"
+#include "vk_layer.h"
+
+#include "vk_layer_dispatch_table.h"
+
+#include <cassert>
+#include <cstring>
+
+#include <mutex>
+#include <condition_variable>
+#include <map>
+#include <vector>
+#include <list>
+#include <iostream>
+
+#include <pthread.h>
+
+#include <stdexcept>
+
+#include <dlfcn.h>
+
+#include <vector>
+#include <memory>
+#include <thread>
+#include <algorithm>
+#include <sstream>
+#include <string>
+#include <chrono>
+#include <functional>
+
+#undef VK_LAYER_EXPORT
+#if defined(WIN32)
+#define VK_LAYER_EXPORT extern "C" __declspec(dllexport)
+#else
+#define VK_LAYER_EXPORT extern "C"
+#endif
+
+// single global lock, for simplicity
+std::mutex global_lock;
+typedef std::lock_guard<std::mutex> scoped_lock;
+
+// use the loader's dispatch table pointer as a key for dispatch map lookups
+template<typename DispatchableType>
+void *&GetKey(DispatchableType inst)
+{
+  return *(void **)inst;
+}
+
+class CreateOtherDevice;
+
+// #define TRACE(x)
+#define TRACE(x) std::cout << "PrimusVK: " << x << "\n";
+#define TRACE_PROFILING(x)
+// #define TRACE_PROFILING(x) std::cout << "PrimusVK: " << x << "\n";
+#define TRACE_PROFILING_EVENT(x, y)
+// #define TRACE_PROFILING_EVENT(idx, evt) std::cout << "PrimusVK-profiling: " << idx << " " << std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now() - primus_start).count() << " " << evt << "\n";
+#define TRACE_FRAME(x)
+// #define TRACE_FRAME(x) std::cout << "PrimusVK: " << x << "\n";
+
+#define VK_CHECK_RESULT(x) do{ const VkResult r = x; if(r != VK_SUCCESS){printf("PrimusVK: Error %d in line %d.\n", r, __LINE__);}}while(0);
+// #define VK_CHECK_RESULT(x) if(x != VK_SUCCESS){printf("Error %d, in %d\n", x, __LINE__);}
+
+struct InstanceInfo {
+public:
+  VkInstance instance;
+  PFN_vkLayerCreateDevice layerCreateDevice;
+  PFN_vkLayerDestroyDevice layerDestroyDevice;
+
+  VkPhysicalDevice render = VK_NULL_HANDLE;
+  VkPhysicalDevice display = VK_NULL_HANDLE;
+  std::map<void*, std::shared_ptr<CreateOtherDevice>> cod = {};
+
+  std::shared_ptr<std::mutex> renderQueueMutex = std::make_shared<std::mutex>();
+  InstanceInfo() = default;
+  InstanceInfo(const InstanceInfo &) = delete;
+  InstanceInfo(InstanceInfo &&) = default;
+  InstanceInfo(VkInstance instance,
+              PFN_vkLayerCreateDevice layerCreateDevice,
+              PFN_vkLayerDestroyDevice layerDestroyDevice) : instance(instance), layerCreateDevice(layerCreateDevice), layerDestroyDevice(layerDestroyDevice) {
+  }
+  InstanceInfo &operator=(InstanceInfo &&) = default;
+private:
+  void GetEnvVendorDeviceIDs(std::string env, uint32_t &vendor, uint32_t &device) {
+    char *envstr = getenv(env.c_str());
+    if(envstr != nullptr){
+      std::stringstream ss(envstr);
+      std::string item;
+      std::vector<uint32_t> hexnums(2);
+      int i = 0;
+      while(std::getline(ss, item, ':') && (i < 2)) {
+       uint32_t num = 0;
+       std::stringstream _ss;
+       _ss << std::hex << item;
+       _ss >> num;
+       hexnums[i] = num;
+       ++i;
+      }
+      vendor = hexnums[0];
+      device = hexnums[1];
+    }
+  }
+
+  bool IsDevice(
+               VkPhysicalDeviceProperties props, 
+               uint32_t vendor, 
+               uint32_t device, 
+               VkPhysicalDeviceType type = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) {
+    if((vendor == 0) && (props.deviceType == type)){
+      if(type == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU){
+       TRACE("Got integrated gpu!");
+      } else {
+       TRACE("Got discrete gpu!");
+      }
+      TRACE("Device: " << props.deviceName);
+      TRACE("  Type: " << props.deviceType);
+      return true;
+    }
+    if((props.vendorID == vendor) && (props.deviceID == device)){
+      TRACE("Got device from env!");
+      TRACE("Device: " << props.deviceName);
+      TRACE("  Type: " << props.deviceType);
+      return true;
+    }
+    if(props.vendorID == vendor){
+      TRACE("Got device from env! (via vendorID)");
+      TRACE("Device: " << props.deviceName);
+      TRACE("  Type: " << props.deviceType);
+      return true;
+    }
+    return false;
+  }
+
+public:
+  VkResult searchDevices(VkLayerInstanceDispatchTable &dispatchTable){
+    uint32_t displayVendorID = 0;
+    uint32_t displayDeviceID = 0;
+    uint32_t renderVendorID = 0;
+    uint32_t renderDeviceID = 0;
+    GetEnvVendorDeviceIDs("PRIMUS_VK_DISPLAYID", displayVendorID, displayDeviceID);
+    GetEnvVendorDeviceIDs("PRIMUS_VK_RENDERID", renderVendorID, renderDeviceID);
+
+    std::vector<VkPhysicalDevice> physicalDevices;
+    {
+      auto enumerateDevices = dispatchTable.EnumeratePhysicalDevices;
+      uint32_t gpuCount = 0;
+      enumerateDevices(instance, &gpuCount, nullptr);
+      physicalDevices.resize(gpuCount);
+      enumerateDevices(instance, &gpuCount, physicalDevices.data());
+    }
+
+    TRACE("Searching for display GPU:");
+    for(auto &dev: physicalDevices){
+      VkPhysicalDeviceProperties props;
+      dispatchTable.GetPhysicalDeviceProperties(dev, &props);
+      TRACE(dev << ": ");
+      if(IsDevice(props, displayVendorID, displayDeviceID, VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU)){
+       display = dev;
+       break;
+      }
+    }
+
+    TRACE("Searching for render GPU:");
+    for(auto &dev: physicalDevices){
+      VkPhysicalDeviceProperties props;
+      dispatchTable.GetPhysicalDeviceProperties(dev, &props);
+      TRACE(dev << ".");
+      if(IsDevice(props, renderVendorID, renderDeviceID)){
+       render = dev;
+       break;
+      }
+    }
+    if(display == VK_NULL_HANDLE || render == VK_NULL_HANDLE){
+      const auto c_icd_filenames = getenv("VK_ICD_FILENAMES");
+      if(display == VK_NULL_HANDLE) {
+       TRACE("No device for the display GPU found. Are the intel-mesa drivers installed?");
+      }
+      if(render == VK_NULL_HANDLE) {
+       TRACE("No device for the rendering GPU found. Is the correct driver installed?");
+      }
+      if(c_icd_filenames != nullptr) {
+       TRACE("VK_ICD_FILENAMES=" << c_icd_filenames);
+      } else {
+       TRACE("VK_ICD_FILENAMES not set");
+      }
+      return VK_ERROR_INITIALIZATION_FAILED;
+    }
+    return VK_SUCCESS;
+  }
+};
+
+std::map<void *, VkLayerInstanceDispatchTable> instance_dispatch;
+VkLayerInstanceDispatchTable loader_dispatch;
+// VkInstance->disp is beeing malloc'ed for every new instance
+// so we can assume it to be a good key.
+std::map<void *, InstanceInfo> instance_info;
+
+std::map<void *, InstanceInfo*> device_instance_info;
+std::map<void *, VkLayerDispatchTable> device_dispatch;
+
+///////////////////////////////////////////////////////////////////////////////////////////
+// Layer init and shutdown
+VkLayerDispatchTable fetchDispatchTable(PFN_vkGetDeviceProcAddr gdpa, VkDevice *pDevice);
+VK_LAYER_EXPORT PFN_vkVoidFunction VKAPI_CALL PrimusVK_GetInstanceProcAddr(VkInstance instance, const char *pName);
+VkResult VKAPI_CALL PrimusVK_CreateInstance(
+    const VkInstanceCreateInfo*                 pCreateInfo,
+    const VkAllocationCallbacks*                pAllocator,
+    VkInstance*                                 pInstance)
+{
+  VkLayerInstanceCreateInfo *layer_link_info = nullptr;
+  PFN_vkLayerCreateDevice layerCreateDevice = nullptr;
+  PFN_vkLayerDestroyDevice layerDestroyDevice = nullptr;
+  VkLayerInstanceCreateInfo *layerCreateInfo = (VkLayerInstanceCreateInfo *)pCreateInfo->pNext;
+
+  while(layerCreateInfo) {
+    if ( layerCreateInfo->sType == VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO &&  layerCreateInfo->function == VK_LAYER_LINK_INFO) {
+      layer_link_info = layerCreateInfo;
+    }
+    if ( layerCreateInfo->sType == VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO &&  layerCreateInfo->function == VK_LOADER_LAYER_CREATE_DEVICE_CALLBACK) {
+      layerCreateDevice = layerCreateInfo->u.layerDevice.pfnLayerCreateDevice;
+      layerDestroyDevice = layerCreateInfo->u.layerDevice.pfnLayerDestroyDevice;
+    }
+    layerCreateInfo = (VkLayerInstanceCreateInfo *)layerCreateInfo->pNext;
+  }
+
+  if(layer_link_info == nullptr) {
+    return VK_ERROR_INITIALIZATION_FAILED;
+  }
+  if(layerCreateDevice == nullptr || layerDestroyDevice == nullptr) {
+    TRACE("Loader did not supply layer device creation callbacks. Please upgrade to vulkan >= 1.1.108");
+    return VK_ERROR_INITIALIZATION_FAILED;
+  }
+
+  PFN_vkGetInstanceProcAddr gpa = layer_link_info->u.pLayerInfo->pfnNextGetInstanceProcAddr;
+  layer_link_info->u.pLayerInfo = layer_link_info->u.pLayerInfo->pNext;
+
+  PFN_vkCreateInstance createFunc = (PFN_vkCreateInstance)gpa(VK_NULL_HANDLE, "vkCreateInstance");
+  VK_CHECK_RESULT( createFunc(pCreateInfo, pAllocator, pInstance) );
+
+  // fetch our own dispatch table for the functions we need, into the next layer
+  VkLayerInstanceDispatchTable dispatchTable;
+#define FORWARD(func) dispatchTable.func = (PFN_vk##func)gpa(*pInstance, "vk" #func);
+  FORWARD(GetInstanceProcAddr);
+  FORWARD(EnumeratePhysicalDevices);
+  FORWARD(DestroyInstance);
+  FORWARD(EnumerateDeviceExtensionProperties);
+  FORWARD(GetPhysicalDeviceProperties);
+#undef FORWARD
+
+  auto my_instance_info = InstanceInfo{*pInstance, layerCreateDevice, layerDestroyDevice};
+  auto res = my_instance_info.searchDevices(dispatchTable);
+  if(res != VK_SUCCESS) return res;
+#define FORWARD(func) dispatchTable.func = (PFN_vk##func)gpa(*pInstance, "vk" #func);
+  FORWARD(GetPhysicalDeviceMemoryProperties);
+  FORWARD(GetPhysicalDeviceQueueFamilyProperties);
+#ifdef VK_USE_PLATFORM_XCB_KHR
+  FORWARD(GetPhysicalDeviceXcbPresentationSupportKHR);
+#endif
+#ifdef VK_USE_PLATFORM_XLIB_KHR
+  FORWARD(GetPhysicalDeviceXlibPresentationSupportKHR);
+#endif
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+  FORWARD(GetPhysicalDeviceWaylandPresentationSupportKHR);
+#endif
+#include "primus_vk_forwarding.h"
+#undef FORWARD
+
+  // store the table by key
+  {
+    scoped_lock l(global_lock);
+
+    instance_dispatch[GetKey(*pInstance)] = dispatchTable;
+    instance_info[GetKey(*pInstance)] = std::move(my_instance_info);
+  }
+
+  return VK_SUCCESS;
+}
+
+void VKAPI_CALL PrimusVK_DestroyInstance(VkInstance instance, const VkAllocationCallbacks* pAllocator)
+{
+  scoped_lock l(global_lock);
+
+  instance_dispatch[GetKey(instance)].DestroyInstance(instance, pAllocator);
+
+  instance_dispatch.erase(GetKey(instance));
+  instance_info.erase(GetKey(instance));
+}
+
+struct FramebufferImage;
+struct MappedMemory{
+  VkDevice device;
+  VkDeviceMemory mem;
+  char* data;
+  MappedMemory(VkDevice device, FramebufferImage &img);
+  ~MappedMemory();
+};
+struct FramebufferImage {
+  VkImage img;
+  VkDeviceMemory mem;
+
+  VkDevice device;
+
+  std::shared_ptr<MappedMemory> mapped;
+  FramebufferImage(FramebufferImage &) = delete;
+  FramebufferImage(VkDevice device, VkExtent2D size, VkImageTiling tiling, VkImageUsageFlags usage, VkFormat format, int memoryTypeIndex): device(device){
+    TRACE("Creating image: " << size.width << "x" << size.height);
+    VkImageCreateInfo imageCreateCI {};
+    imageCreateCI.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
+    imageCreateCI.imageType = VK_IMAGE_TYPE_2D;
+    imageCreateCI.format = format;
+    imageCreateCI.extent.width = size.width;
+    imageCreateCI.extent.height = size.height;
+    imageCreateCI.extent.depth = 1;
+    imageCreateCI.arrayLayers = 1;
+    imageCreateCI.mipLevels = 1;
+    imageCreateCI.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+    imageCreateCI.samples = VK_SAMPLE_COUNT_1_BIT;
+    imageCreateCI.tiling = tiling;
+    imageCreateCI.usage = usage;
+    VK_CHECK_RESULT(device_dispatch[GetKey(device)].CreateImage(device, &imageCreateCI, nullptr, &img));
+
+    VkMemoryRequirements memRequirements {};
+    VkMemoryAllocateInfo memAllocInfo {.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
+    device_dispatch[GetKey(device)].GetImageMemoryRequirements(device, img, &memRequirements);
+    memAllocInfo.allocationSize = memRequirements.size;
+    memAllocInfo.memoryTypeIndex = memoryTypeIndex;
+    VK_CHECK_RESULT(device_dispatch[GetKey(device)].AllocateMemory(device, &memAllocInfo, nullptr, &mem));
+    VK_CHECK_RESULT(device_dispatch[GetKey(device)].BindImageMemory(device, img, mem, 0));
+  }
+  std::shared_ptr<MappedMemory> getMapped(){
+    if(!mapped){
+      throw std::runtime_error("not mapped");
+    }
+    return mapped;
+  }
+  void map(){
+    mapped = std::make_shared<MappedMemory>(device, *this);
+  }
+  VkSubresourceLayout getLayout(){
+    VkImageSubresource subResource { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0 };
+    VkSubresourceLayout subResourceLayout;
+    device_dispatch[GetKey(device)].GetImageSubresourceLayout(device, img, &subResource, &subResourceLayout);
+    return subResourceLayout;
+  }
+  ~FramebufferImage(){
+    mapped.reset();
+    device_dispatch[GetKey(device)].FreeMemory(device, mem, nullptr);
+    device_dispatch[GetKey(device)].DestroyImage(device, img, nullptr);
+  }
+};
+MappedMemory::MappedMemory(VkDevice device, FramebufferImage &img): device(device), mem(img.mem){
+  device_dispatch[GetKey(device)].MapMemory(device, img.mem, 0, VK_WHOLE_SIZE, 0, (void**)&data);
+}
+MappedMemory::~MappedMemory(){
+  device_dispatch[GetKey(device)].UnmapMemory(device, mem);
+}
+class CommandBuffer;
+class Fence{
+  VkDevice device;
+public:
+  VkFence fence;
+  Fence(VkDevice dev): device(dev){
+    // Create fence to ensure that the command buffer has finished executing
+    VkFenceCreateInfo fenceInfo = {.sType=VK_STRUCTURE_TYPE_FENCE_CREATE_INFO};
+    fenceInfo.flags = 0;
+    VK_CHECK_RESULT(device_dispatch[GetKey(device)].CreateFence(device, &fenceInfo, nullptr, &fence));
+  }
+  void await(){
+    // Wait for the fence to signal that command buffer has finished executing
+    VK_CHECK_RESULT(device_dispatch[GetKey(device)].WaitForFences(device, 1, &fence, VK_TRUE, 10000000000L));
+  }
+  void reset(){
+    VK_CHECK_RESULT(device_dispatch[GetKey(device)].ResetFences(device, 1, &fence));
+  }
+  Fence(Fence &&other): device(other.device), fence(other.fence){
+    other.fence = VK_NULL_HANDLE;
+  }
+  ~Fence(){
+    if(fence != VK_NULL_HANDLE){
+      device_dispatch[GetKey(device)].DestroyFence(device, fence, nullptr);
+    }
+  }
+};
+class Semaphore{
+  VkDevice device;
+public:
+  VkSemaphore sem;
+  Semaphore(VkDevice dev): device(dev){
+    VkSemaphoreCreateInfo semInfo = {.sType=VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};
+    semInfo.flags = 0;
+    VK_CHECK_RESULT(device_dispatch[GetKey(device)].CreateSemaphore(device, &semInfo, nullptr, &sem));
+  }
+  Semaphore(Semaphore &&other): device(other.device), sem(other.sem) {
+    other.sem = VK_NULL_HANDLE;
+    other.device = VK_NULL_HANDLE;
+  }
+  ~Semaphore(){
+    if(sem != VK_NULL_HANDLE){
+      device_dispatch[GetKey(device)].DestroySemaphore(device, sem, nullptr);
+    }
+  }
+};
+struct PrimusSwapchain;
+struct ImageWorker {
+  PrimusSwapchain &swapchain;
+
+  std::shared_ptr<FramebufferImage> render_image;
+  std::shared_ptr<FramebufferImage> render_copy_image;
+  std::shared_ptr<FramebufferImage> display_src_image;
+  Fence render_copy_fence;
+  Semaphore display_semaphore;
+  VkImage display_image = VK_NULL_HANDLE;
+
+  std::shared_ptr<CommandBuffer> render_copy_command;
+  std::shared_ptr<CommandBuffer> display_command;
+  std::unique_ptr<Fence> display_command_fence;
+
+  ImageWorker(PrimusSwapchain &swapchain, VkImage display_image, const VkSwapchainCreateInfoKHR &createInfo, std::tuple<ssize_t, ssize_t, ssize_t> image_memory_types);
+  ImageWorker(ImageWorker &&other) = default;
+  ~ImageWorker();
+  void initImages( std::tuple<ssize_t, ssize_t, ssize_t> image_memory_types, const VkSwapchainCreateInfoKHR &createInfo);
+  void createCommandBuffers();
+  void copyImageData(std::vector<VkSemaphore> sems);
+};
+struct PrimusSwapchain{
+  std::chrono::steady_clock::time_point lastPresent = std::chrono::steady_clock::now();
+  VkDevice device;
+  VkQueue render_queue;
+  VkDevice display_device;
+  std::mutex displayQueueMutex;
+  VkQueue display_queue;
+  VkSwapchainKHR backend;
+  std::vector<ImageWorker> images;
+  VkExtent2D imgSize;
+
+  std::vector<std::unique_ptr<std::thread>> threads;
+
+  std::shared_ptr<CreateOtherDevice> cod;
+  PrimusSwapchain(PrimusSwapchain &) = delete;
+  PrimusSwapchain(VkDevice device, VkDevice display_device, VkSwapchainKHR backend, const VkSwapchainCreateInfoKHR *pCreateInfo, uint32_t imageCount, std::shared_ptr<CreateOtherDevice> &cod):
+    device(device), display_device(display_device), backend(backend), cod(cod){
+    // TODO automatically find correct queue and not choose 0 forcibly
+    device_dispatch[GetKey(device)].GetDeviceQueue(device, 0, 0, &render_queue);
+    device_dispatch[GetKey(display_device)].GetDeviceQueue(display_device, 0, 0, &display_queue);
+    GetKey(render_queue) = GetKey(device); // TODO, use vkSetDeviceLoaderData instead
+    GetKey(display_queue) = GetKey(display_device);
+
+    uint32_t image_count;
+    device_dispatch[GetKey(display_device)].GetSwapchainImagesKHR(display_device, backend, &image_count, nullptr);
+    TRACE("Image aquiring: " << image_count);
+    std::vector<VkImage> display_images;
+    display_images.resize(image_count);
+    device_dispatch[GetKey(display_device)].GetSwapchainImagesKHR(display_device, backend, &image_count, display_images.data());
+
+    imgSize = pCreateInfo->imageExtent;
+
+    auto image_memory_types = getImageMemories();
+    for(uint32_t i = 0; i < imageCount; i++){
+      images.emplace_back(*this, display_images[i], *pCreateInfo, image_memory_types);
+    }
+
+    TRACE("Creating a Swapchain thread.");
+    size_t thread_count = 1;
+    char *m_env = getenv("PRIMUS_VK_MULTITHREADING");
+    if(m_env == nullptr || std::string{m_env} != "1"){
+      thread_count = imageCount;
+    }
+    threads.resize(thread_count);
+    for(auto &thread: threads){
+      thread = std::unique_ptr<std::thread>(new std::thread([this](){this->run();}));
+      pthread_setname_np(thread->native_handle(), "swapchain-thread");
+    }
+  }
+
+  std::tuple<ssize_t, ssize_t, ssize_t> getImageMemories();
+
+  void storeImage(uint32_t index, VkQueue queue, std::vector<VkSemaphore> wait_on, Fence &notify);
+
+  void queue(VkQueue queue, const VkPresentInfoKHR *pPresentInfo);
+
+  std::mutex queueMutex;
+  std::condition_variable has_work;
+  bool active = true;
+  struct QueueItem {
+    VkQueue queue;
+    VkPresentInfoKHR pPresentInfo;
+    uint32_t imgIndex;
+  };
+  std::list<QueueItem> work;
+  std::list<QueueItem> in_progress;
+  void present(const QueueItem &workItem);
+  void run();
+  void stop();
+};
+
+ImageWorker::ImageWorker(PrimusSwapchain &swapchain, VkImage display_image, const VkSwapchainCreateInfoKHR &createInfo, std::tuple<ssize_t, ssize_t, ssize_t> image_memory_types): swapchain(swapchain), render_copy_fence(swapchain.device), display_semaphore(swapchain.display_device), display_image(display_image){
+  initImages(image_memory_types, createInfo);
+  createCommandBuffers();
+}
+ImageWorker::~ImageWorker(){
+  if(display_command_fence){
+    display_command_fence->await();
+  }
+}
+
+class CreateOtherDevice {
+public:
+  VkPhysicalDevice display_dev;
+  VkPhysicalDevice render_dev;
+  VkPhysicalDeviceMemoryProperties display_mem;
+  VkPhysicalDeviceMemoryProperties render_mem;
+  VkDevice render_gpu = VK_NULL_HANDLE;
+  VkDevice display_gpu = VK_NULL_HANDLE;
+
+  CreateOtherDevice(VkPhysicalDevice display_dev, VkPhysicalDevice render_dev):
+    display_dev(display_dev), render_dev(render_dev){
+  }
+  void setRenderDevice(VkDevice render_gpu){
+    this->render_gpu = render_gpu;
+  }
+  void finish(std::function<VkResult(VkDeviceCreateInfo &createInfo, VkDevice &dev)> creator){
+    auto &minstance_info = instance_info[GetKey(render_dev)];
+    auto &minstance_dispatch = instance_dispatch[GetKey(minstance_info.instance)];
+    minstance_dispatch.GetPhysicalDeviceMemoryProperties(display_dev, &display_mem);
+    minstance_dispatch.GetPhysicalDeviceMemoryProperties(render_dev, &render_mem);
+
+    createDisplayDev(creator);
+  }
+  void createDisplayDev(std::function<VkResult(VkDeviceCreateInfo &createInfo, VkDevice &dev)> creator){
+    VkDeviceCreateInfo createInfo = {};
+    createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
+
+    VkDeviceQueueCreateInfo queueInfo{};
+    queueInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
+    queueInfo.queueFamilyIndex = 0;
+    queueInfo.queueCount = 1;
+    const float defaultQueuePriority(0.0f);
+    queueInfo.pQueuePriorities = &defaultQueuePriority;
+
+    createInfo.queueCreateInfoCount = 1;
+    createInfo.pQueueCreateInfos = &queueInfo;
+    createInfo.enabledExtensionCount = 1;
+    const char *swap[] = {VK_KHR_SWAPCHAIN_EXTENSION_NAME};
+    createInfo.ppEnabledExtensionNames = swap;
+    VkResult ret = creator(createInfo, display_gpu);
+    TRACE("Creating display device finished!: " << ret);
+    if(ret != VK_SUCCESS){
+      throw std::runtime_error("Display device creation failed");
+    }
+  }
+};
+
+
+class CommandBuffer {
+  VkCommandPool commandPool;
+  VkDevice device;
+public:
+  VkCommandBuffer cmd;
+  CommandBuffer(VkDevice device) : device(device) {
+    VkCommandPoolCreateInfo poolInfo = {.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO};
+    poolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
+    poolInfo.queueFamilyIndex = 0;
+    VK_CHECK_RESULT(device_dispatch[GetKey(device)].CreateCommandPool(device, &poolInfo, nullptr, &commandPool));
+    VkCommandBufferAllocateInfo cmdBufAllocateInfo = {.sType=VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO};
+    cmdBufAllocateInfo.commandPool = commandPool;
+    cmdBufAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+    cmdBufAllocateInfo.commandBufferCount = 1;
+
+    VK_CHECK_RESULT(device_dispatch[GetKey(device)].AllocateCommandBuffers(device, &cmdBufAllocateInfo, &cmd));
+
+    VkCommandBufferBeginInfo cmdBufInfo = {.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO};
+    VK_CHECK_RESULT(device_dispatch[GetKey(device)].BeginCommandBuffer(cmd, &cmdBufInfo));
+  }
+  ~CommandBuffer(){
+    device_dispatch[GetKey(device)].FreeCommandBuffers(device, commandPool, 1, &cmd);
+  }
+  void insertImageMemoryBarrier(
+                             VkImage image,
+                             VkAccessFlags srcAccessMask,
+                             VkAccessFlags dstAccessMask,
+                             VkImageLayout oldImageLayout,
+                             VkImageLayout newImageLayout,
+                             VkPipelineStageFlags srcStageMask,
+                             VkPipelineStageFlags dstStageMask,
+                             VkImageSubresourceRange subresourceRange) {
+    VkImageMemoryBarrier imageMemoryBarrier{.sType=VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
+    imageMemoryBarrier.srcAccessMask = srcAccessMask;
+    imageMemoryBarrier.dstAccessMask = dstAccessMask;
+    imageMemoryBarrier.oldLayout = oldImageLayout;
+    imageMemoryBarrier.newLayout = newImageLayout;
+    imageMemoryBarrier.image = image;
+    imageMemoryBarrier.subresourceRange = subresourceRange;
+
+    device_dispatch[GetKey(device)].CmdPipelineBarrier(
+                        cmd,
+                        srcStageMask,
+                        dstStageMask,
+                        0,
+                        0, nullptr,
+                        0, nullptr,
+                        1, &imageMemoryBarrier);
+  }
+  void copyImage(VkImage src, VkImage dst, VkExtent2D imgSize){
+    VkImageCopy imageCopyRegion{};
+    imageCopyRegion.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+    imageCopyRegion.srcSubresource.layerCount = 1;
+    imageCopyRegion.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+    imageCopyRegion.dstSubresource.layerCount = 1;
+    imageCopyRegion.extent.width = imgSize.width;
+    imageCopyRegion.extent.height = imgSize.height;
+    imageCopyRegion.extent.depth = 1;
+
+    // Issue the copy command
+    device_dispatch[GetKey(device)].CmdCopyImage(
+                  cmd,
+                  src, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                  dst, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                  1,
+                  &imageCopyRegion);
+  }
+  void end(){
+    VK_CHECK_RESULT(device_dispatch[GetKey(device)].EndCommandBuffer(cmd));
+  }
+  void submit(VkQueue queue, VkFence fence, std::vector<VkSemaphore> wait = {}, std::vector<VkSemaphore> signal = {}){
+    VkSubmitInfo submitInfo = {.sType=VK_STRUCTURE_TYPE_SUBMIT_INFO};
+    submitInfo.commandBufferCount = 1;
+    submitInfo.pCommandBuffers = &cmd;
+    VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
+    submitInfo.pWaitDstStageMask = &waitStage;
+    submitInfo.waitSemaphoreCount = wait.size();
+    submitInfo.pWaitSemaphores = wait.data();
+    submitInfo.signalSemaphoreCount = signal.size();
+    submitInfo.pSignalSemaphores = signal.data();
+
+    // Submit to the queue
+    VK_CHECK_RESULT(device_dispatch[GetKey(device)].QueueSubmit(queue, 1, &submitInfo, fence));
+  }
+};
+
+void ImageWorker::initImages( std::tuple<ssize_t, ssize_t, ssize_t> image_memory_types, const VkSwapchainCreateInfoKHR &createInfo){
+  ssize_t render_local_mem, render_host_mem, display_host_mem;
+  std::tie( render_local_mem, render_host_mem, display_host_mem) = image_memory_types;
+  auto imgSize = createInfo.imageExtent;
+  auto format = createInfo.imageFormat;
+    
+  auto &renderImage = render_image;
+  auto &renderCopyImage = render_copy_image;
+  auto &displaySrcImage = display_src_image;
+  renderImage = std::make_shared<FramebufferImage>(swapchain.device, imgSize,
+                                                    VK_IMAGE_TILING_OPTIMAL,VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |/**/ VK_IMAGE_USAGE_TRANSFER_SRC_BIT,format, render_local_mem);
+  renderCopyImage = std::make_shared<FramebufferImage>(swapchain.device, imgSize,
+                                                        VK_IMAGE_TILING_LINEAR, VK_IMAGE_USAGE_TRANSFER_DST_BIT,format, render_host_mem);
+  displaySrcImage = std::make_shared<FramebufferImage>(swapchain.display_device, imgSize,
+                                                        VK_IMAGE_TILING_LINEAR,VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |/**/ VK_IMAGE_USAGE_TRANSFER_SRC_BIT,format, display_host_mem);
+
+  renderCopyImage->map();
+  displaySrcImage->map();
+
+  CommandBuffer cmd{swapchain.display_device};
+  cmd.insertImageMemoryBarrier(
+                              displaySrcImage->img,
+                              0,
+                              VK_ACCESS_MEMORY_WRITE_BIT,
+                              VK_IMAGE_LAYOUT_UNDEFINED,
+                              VK_IMAGE_LAYOUT_GENERAL,
+                              VK_PIPELINE_STAGE_TRANSFER_BIT,
+                              VK_PIPELINE_STAGE_TRANSFER_BIT,
+                              VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
+  cmd.end();
+  Fence f{swapchain.display_device};
+  cmd.submit(swapchain.display_queue, f.fence);
+  f.await();
+}
+
+
+
+VkResult VKAPI_CALL PrimusVK_CreateDevice(
+    VkPhysicalDevice                            physicalDevice,
+    const VkDeviceCreateInfo*                   pCreateInfo,
+    const VkAllocationCallbacks*                pAllocator,
+    VkDevice*                                   pDevice)
+{
+  auto &my_instance_info = instance_info[GetKey(physicalDevice)];
+  VkLayerDeviceCreateInfo *layerCreateInfo = (VkLayerDeviceCreateInfo *)pCreateInfo->pNext;
+
+  // step through the chain of pNext until we get to the link info
+  while(layerCreateInfo && (layerCreateInfo->sType != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO ||
+                            layerCreateInfo->function != VK_LAYER_LINK_INFO))
+  {
+    layerCreateInfo = (VkLayerDeviceCreateInfo *)layerCreateInfo->pNext;
+  }
+
+  if(layerCreateInfo == NULL)
+  {
+    // No loader instance create info
+    return VK_ERROR_INITIALIZATION_FAILED;
+  }
+
+  PFN_vkGetInstanceProcAddr gipa = layerCreateInfo->u.pLayerInfo->pfnNextGetInstanceProcAddr;
+  PFN_vkGetDeviceProcAddr gdpa = layerCreateInfo->u.pLayerInfo->pfnNextGetDeviceProcAddr;
+  // move chain on for next layer
+  layerCreateInfo->u.pLayerInfo = layerCreateInfo->u.pLayerInfo->pNext;
+
+  auto display_dev = my_instance_info.display;
+  std::shared_ptr<CreateOtherDevice> cod = nullptr;
+  {
+    scoped_lock l(global_lock);
+    cod = std::make_shared<CreateOtherDevice>(display_dev, physicalDevice);
+  }
+  auto createDevice = my_instance_info.layerCreateDevice;
+  cod->finish([createDevice,&my_instance_info](VkDeviceCreateInfo &createInfo, VkDevice &dev){
+    PFN_vkGetDeviceProcAddr gdpa = nullptr;
+    auto ret = createDevice(my_instance_info.instance, my_instance_info.display, &createInfo, nullptr, &dev, PrimusVK_GetInstanceProcAddr, &gdpa);
+    {
+      scoped_lock l(global_lock);
+      device_instance_info[GetKey(dev)] = &my_instance_info;
+      device_dispatch[GetKey(dev)] = fetchDispatchTable(gdpa, &dev);
+    }
+    return ret;
+  });
+  PFN_vkCreateDevice createFunc = (PFN_vkCreateDevice)gipa(VK_NULL_HANDLE, "vkCreateDevice");
+  VkResult ret = createFunc(physicalDevice, pCreateInfo, pAllocator, pDevice);
+  cod->setRenderDevice(*pDevice);
+  my_instance_info.cod[GetKey(*pDevice)] = cod;
+
+  // store the table by key
+  {
+    scoped_lock l(global_lock);
+    device_instance_info[GetKey(*pDevice)] = &my_instance_info;
+    device_dispatch[GetKey(*pDevice)] = fetchDispatchTable(gdpa, pDevice);
+  }
+  TRACE("CreateDevice done");
+
+  return ret;
+
+}
+
+VkLayerDispatchTable fetchDispatchTable(PFN_vkGetDeviceProcAddr gdpa, VkDevice *pDevice){
+  TRACE("fetching dispatch for " << GetKey(*pDevice));
+  // fetch our own dispatch table for the functions we need, into the next layer
+  VkLayerDispatchTable dispatchTable;
+#define FETCH(x) dispatchTable.x = (PFN_vk##x) gdpa(*pDevice, "vk" #x);
+  FETCH(GetDeviceProcAddr);
+  FETCH(DestroyDevice);
+  FETCH(BeginCommandBuffer);
+  FETCH(CmdDraw);
+  FETCH(CmdDrawIndexed);
+  FETCH(EndCommandBuffer);
+
+  FETCH(CreateSwapchainKHR);
+  FETCH(DestroySwapchainKHR);
+  FETCH(GetSwapchainImagesKHR);
+  FETCH(AcquireNextImageKHR);
+  FETCH(GetSwapchainStatusKHR);
+  FETCH(QueuePresentKHR);
+
+  FETCH(CreateImage);
+  FETCH(GetImageMemoryRequirements);
+  FETCH(AllocateMemory);
+  FETCH(BindImageMemory);
+  FETCH(GetImageSubresourceLayout);
+  FETCH(FreeMemory);
+  FETCH(DestroyImage);
+  FETCH(MapMemory);
+  FETCH(UnmapMemory);
+
+
+  FETCH(AllocateCommandBuffers);
+  FETCH(BeginCommandBuffer);
+  FETCH(CmdCopyImage);
+  FETCH(CmdPipelineBarrier);
+  FETCH(CreateCommandPool);
+  //FETCH(CreateDevice);
+  FETCH(EndCommandBuffer);
+  //FETCH(EnumeratePhysicalDevices);
+  FETCH(FreeCommandBuffers);
+  //FETCH(GetPhysicalDeviceMemoryProperties);
+  //FETCH(GetPhysicalDeviceQueueFamilyProperties);
+  FETCH(QueueSubmit);
+  FETCH(DeviceWaitIdle);
+  FETCH(QueueWaitIdle);
+
+  FETCH(GetDeviceQueue);
+
+  FETCH(CreateFence);
+  FETCH(WaitForFences);
+  FETCH(ResetFences);
+  FETCH(DestroyFence);
+
+  FETCH(CreateSemaphore);
+  FETCH(DestroySemaphore);
+
+#undef FETCH
+  return dispatchTable;
+}
+
+void VKAPI_CALL PrimusVK_DestroyDevice(VkDevice device, const VkAllocationCallbacks* pAllocator)
+{
+  scoped_lock l(global_lock);
+  auto &my_instance = *device_instance_info[GetKey(device)];
+  auto &display_device = my_instance.cod[GetKey(device)]->display_gpu;
+  my_instance.layerDestroyDevice(display_device, nullptr, device_dispatch[GetKey(display_device)].DestroyDevice);
+  device_dispatch[GetKey(device)].DestroyDevice(device, pAllocator);
+  my_instance.cod.erase(GetKey(device));
+  device_dispatch.erase(GetKey(device));
+  device_dispatch.erase(GetKey(display_device));
+}
+
+VkResult VKAPI_CALL PrimusVK_CreateSwapchainKHR(VkDevice device, const VkSwapchainCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSwapchainKHR* pSwapchain) {
+  auto &my_instance = *device_instance_info[GetKey(device)];
+  TRACE("Application requested " << pCreateInfo->minImageCount << " images.");
+  VkDevice render_gpu = device;
+  VkSwapchainCreateInfoKHR info2 = *pCreateInfo;
+  info2.minImageCount = std::max(3u, pCreateInfo->minImageCount);
+  info2.imageUsage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+  pCreateInfo = &info2;
+  
+  VkSwapchainKHR old = pCreateInfo->oldSwapchain;
+  if(old != VK_NULL_HANDLE){
+    PrimusSwapchain *ch = reinterpret_cast<PrimusSwapchain*>(old);
+    info2.oldSwapchain = ch->backend;
+    TRACE("Old Swapchain: " << ch->backend);
+  }
+  TRACE("Creating Swapchain for size: " << pCreateInfo->imageExtent.width << "x" << pCreateInfo->imageExtent.height);
+  TRACE("MinImageCount: " << pCreateInfo->minImageCount);
+  TRACE("fetching device for: " << GetKey(render_gpu));
+  VkDevice display_gpu = my_instance.cod[GetKey(device)]->display_gpu;
+
+  TRACE("FamilyIndexCount: " <<  pCreateInfo->queueFamilyIndexCount);
+  TRACE("Dev: " << GetKey(display_gpu));
+  TRACE("Swapchainfunc: " << (void*) device_dispatch[GetKey(display_gpu)].CreateSwapchainKHR);
+
+  VkSwapchainKHR backend;
+  VkResult rc = device_dispatch[GetKey(display_gpu)].CreateSwapchainKHR(display_gpu, pCreateInfo, pAllocator, &backend);
+  TRACE(">> Swapchain create done " << rc << ";" << (void*) backend);
+  if(rc != VK_SUCCESS){
+    return rc;
+  }
+
+  PrimusSwapchain *ch = new PrimusSwapchain(render_gpu, display_gpu, backend, pCreateInfo, info2.minImageCount, my_instance.cod[GetKey(device)]);
+
+  *pSwapchain = reinterpret_cast<VkSwapchainKHR>(ch);
+
+
+  return rc;
+}
+
+void VKAPI_CALL PrimusVK_DestroySwapchainKHR(VkDevice device, VkSwapchainKHR swapchain, const VkAllocationCallbacks* pAllocator) {
+    if(swapchain == VK_NULL_HANDLE) { return;}
+  PrimusSwapchain *ch = reinterpret_cast<PrimusSwapchain*>(swapchain);
+  TRACE(">> Destroy swapchain: " << (void*) ch->backend);
+  ch->stop();
+  device_dispatch[GetKey(ch->display_device)].DestroySwapchainKHR(ch->display_device, ch->backend, pAllocator);
+  delete ch;
+}
+VkResult VKAPI_CALL PrimusVK_GetSwapchainImagesKHR(VkDevice device, VkSwapchainKHR swapchain, uint32_t* pSwapchainImageCount, VkImage* pSwapchainImages) {
+  PrimusSwapchain *ch = reinterpret_cast<PrimusSwapchain*>(swapchain);
+
+  *pSwapchainImageCount = ch->images.size();
+  VkResult res = VK_SUCCESS;
+  if(pSwapchainImages != nullptr) {
+    res = VK_SUCCESS;
+    for(size_t i = 0; i < *pSwapchainImageCount; i++){
+      pSwapchainImages[i] = ch->images[i].render_image->img;
+    }
+    TRACE("Count: " << *pSwapchainImageCount);
+  }
+  return res;
+}
+
+const auto primus_start = std::chrono::steady_clock::now();
+
+VkResult VKAPI_CALL PrimusVK_AcquireNextImage2KHR(VkDevice device, const VkAcquireNextImageInfoKHR* pAcquireInfo, uint32_t* pImageIndex) {
+  TRACE_PROFILING_EVENT(-1, "Acquire starting");
+  PrimusSwapchain *ch = reinterpret_cast<PrimusSwapchain*>(pAcquireInfo->swapchain);
+
+  VkResult res;
+  {
+    Fence myfence{ch->display_device};
+
+    res = device_dispatch[GetKey(ch->display_device)].AcquireNextImageKHR(ch->display_device, ch->backend, pAcquireInfo->timeout, VK_NULL_HANDLE, myfence.fence, pImageIndex);
+    TRACE_PROFILING_EVENT(*pImageIndex, "got image");
+
+    myfence.await();
+  }
+  VkSubmitInfo qsi{};
+  qsi.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+  if(pAcquireInfo->semaphore != VK_NULL_HANDLE){
+    qsi.signalSemaphoreCount = 1;
+    qsi.pSignalSemaphores = &pAcquireInfo->semaphore;
+  }
+  scoped_lock lock(*device_instance_info[GetKey(ch->render_queue)]->renderQueueMutex);
+  device_dispatch[GetKey(ch->render_queue)].QueueSubmit(ch->render_queue, 1, &qsi, pAcquireInfo->fence);
+  TRACE_PROFILING_EVENT(*pImageIndex, "Acquire done");
+
+  return res;
+}
+VkResult VKAPI_CALL PrimusVK_AcquireNextImageKHR(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout, VkSemaphore semaphore, VkFence fence, uint32_t* pImageIndex) {
+  auto acquireInfo = VkAcquireNextImageInfoKHR{
+    .sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR,
+    .swapchain = swapchain,
+    .timeout = timeout,
+    .semaphore = semaphore,
+    .fence = fence
+  };
+  return PrimusVK_AcquireNextImage2KHR(device, &acquireInfo, pImageIndex);
+}
+VkResult VKAPI_CALL PrimusVK_GetSwapchainStatusKHR(VkDevice device, VkSwapchainKHR swapchain){
+  PrimusSwapchain *ch = reinterpret_cast<PrimusSwapchain*>(swapchain);
+  return device_dispatch[GetKey(ch->display_device)].GetSwapchainStatusKHR(device, ch->backend);
+}
+
+std::tuple<ssize_t, ssize_t, ssize_t> PrimusSwapchain::getImageMemories(){
+  VkMemoryPropertyFlags host_mem = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+  VkMemoryPropertyFlags local_mem = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+  ssize_t render_host_mem = -1;
+  ssize_t render_local_mem = -1;
+  ssize_t display_host_mem = -1;
+  for(size_t j=0; j < cod->render_mem.memoryTypeCount; j++){
+    if ( render_host_mem == -1 && ( cod->render_mem.memoryTypes[j].propertyFlags & host_mem ) == host_mem ) {
+      render_host_mem = j;
+    }
+    if ( render_local_mem == -1 && ( cod->render_mem.memoryTypes[j].propertyFlags & local_mem ) == local_mem ) {
+      render_local_mem = j;
+    }
+  }
+  for(size_t j=0; j < cod->display_mem.memoryTypeCount; j++){
+    if ( display_host_mem == -1 && ( cod->display_mem.memoryTypes[j].propertyFlags & host_mem ) == host_mem ) {
+      display_host_mem = j;
+    }
+  }
+  TRACE("Selected render mem: " << render_host_mem << ";" << render_local_mem << " display: " << display_host_mem);
+
+  return std::make_tuple(render_local_mem, render_host_mem, display_host_mem);
+}
+
+void ImageWorker::createCommandBuffers(){
+  {
+    auto cpyImage = render_copy_image;
+    auto srcImage = render_image->img;
+    render_copy_command = std::make_shared<CommandBuffer>(swapchain. device);
+    CommandBuffer &cmd = *render_copy_command;
+    cmd.insertImageMemoryBarrier(
+       cpyImage->img,
+       VK_ACCESS_HOST_READ_BIT,                VK_ACCESS_TRANSFER_WRITE_BIT,
+       VK_IMAGE_LAYOUT_UNDEFINED,              VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+       VK_PIPELINE_STAGE_HOST_BIT,             VK_PIPELINE_STAGE_TRANSFER_BIT,
+       VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
+    cmd.insertImageMemoryBarrier(
+       srcImage,
+       VK_ACCESS_MEMORY_READ_BIT,              VK_ACCESS_TRANSFER_READ_BIT,
+       VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,        VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+       VK_PIPELINE_STAGE_TRANSFER_BIT,         VK_PIPELINE_STAGE_TRANSFER_BIT,
+       VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
+
+    cmd.copyImage(srcImage, cpyImage->img, swapchain.imgSize);
+
+    cmd.insertImageMemoryBarrier(
+       cpyImage->img,
+       VK_ACCESS_TRANSFER_WRITE_BIT,           VK_ACCESS_HOST_READ_BIT,
+       VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,   VK_IMAGE_LAYOUT_GENERAL,
+       VK_PIPELINE_STAGE_TRANSFER_BIT,         VK_PIPELINE_STAGE_HOST_BIT,
+       VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
+    cmd.insertImageMemoryBarrier(
+       srcImage,
+       VK_ACCESS_TRANSFER_READ_BIT,            VK_ACCESS_MEMORY_READ_BIT,
+       VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,   VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
+       VK_PIPELINE_STAGE_TRANSFER_BIT,         VK_PIPELINE_STAGE_TRANSFER_BIT,
+       VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
+
+    cmd.end();
+  }
+
+  {
+    display_command = std::make_shared<CommandBuffer>(swapchain.display_device);
+    CommandBuffer &cmd = *display_command;
+    cmd.insertImageMemoryBarrier(
+       display_src_image->img,
+       VK_ACCESS_HOST_WRITE_BIT,       VK_ACCESS_TRANSFER_READ_BIT,
+       VK_IMAGE_LAYOUT_GENERAL,        VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+       VK_PIPELINE_STAGE_HOST_BIT,     VK_PIPELINE_STAGE_TRANSFER_BIT,
+       VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
+    cmd.insertImageMemoryBarrier(
+       display_image,
+       VK_ACCESS_MEMORY_READ_BIT,      VK_ACCESS_TRANSFER_WRITE_BIT,
+       VK_IMAGE_LAYOUT_UNDEFINED,      VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+       VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+       VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
+    cmd.copyImage(display_src_image->img, display_image, swapchain.imgSize);
+
+    cmd.insertImageMemoryBarrier(
+       display_src_image->img,
+       VK_ACCESS_TRANSFER_READ_BIT,    VK_ACCESS_HOST_WRITE_BIT,
+       VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,   VK_IMAGE_LAYOUT_GENERAL,
+       VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
+       VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
+    cmd.insertImageMemoryBarrier(
+       display_image,
+       VK_ACCESS_TRANSFER_WRITE_BIT,   VK_ACCESS_MEMORY_READ_BIT,
+       VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,   VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
+       VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
+       VkImageSubresourceRange{ VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 });
+    cmd.end();
+  }
+}
+
+void PrimusSwapchain::storeImage(uint32_t index, VkQueue queue, std::vector<VkSemaphore> wait_on, Fence &notify){
+  images[index].render_copy_command->submit(queue, notify.fence, wait_on);
+}
+
+void ImageWorker::copyImageData(std::vector<VkSemaphore> sems){
+  {
+    auto rendered = render_copy_image->getMapped();
+    auto display = display_src_image->getMapped();
+    auto rendered_layout = render_copy_image->getLayout();
+    auto display_layout = display_src_image->getLayout();
+    auto rendered_start = rendered->data + rendered_layout.offset;
+    auto display_start = display->data + display_layout.offset;
+    if(rendered_layout.size/rendered_layout.rowPitch != display_layout.size/display_layout.rowPitch){
+      TRACE("Layouts don't match at all");
+      throw std::runtime_error("Layouts don't match at all");
+    }
+    TRACE_PROFILING_EVENT(index, "memcpy start");
+    if(rendered_layout.rowPitch == display_layout.rowPitch){
+      std::memcpy(display_start, rendered_start, rendered_layout.size);
+    }else{
+      VkDeviceSize display_offset = 0;
+      VkDeviceSize minRowPitch = rendered_layout.rowPitch;
+      if(display_layout.rowPitch < minRowPitch){
+       minRowPitch = display_layout.rowPitch;
+      }
+      for(VkDeviceSize offset = 0; offset < rendered_layout.size; offset += rendered_layout.rowPitch){
+       std::memcpy(display_start + display_offset, rendered_start + offset, minRowPitch);
+       display_offset += display_layout.rowPitch;
+      }
+    }
+    TRACE_PROFILING_EVENT(index, "memcpy done");
+  }
+  {
+    std::unique_lock<std::mutex> lock(swapchain.queueMutex);
+    if(display_command_fence){
+      display_command_fence->await();
+      display_command_fence->reset();
+    }else{
+      display_command_fence = std::unique_ptr<Fence>(new Fence(swapchain.display_device));
+    }
+    display_command->submit(swapchain.display_queue, display_command_fence->fence, {}, sems);
+  }
+}
+
+void PrimusSwapchain::queue(VkQueue queue, const VkPresentInfoKHR* pPresentInfo){
+  std::unique_lock<std::mutex> lock(queueMutex);
+
+  auto workItem = QueueItem{queue, *pPresentInfo, pPresentInfo->pImageIndices[0]};
+  storeImage(workItem.imgIndex, render_queue, std::vector<VkSemaphore>{pPresentInfo->pWaitSemaphores, pPresentInfo->pWaitSemaphores + pPresentInfo->waitSemaphoreCount}, images[workItem.imgIndex].render_copy_fence);
+
+  work.push_back(std::move(workItem));
+  has_work.notify_all();
+}
+void PrimusSwapchain::stop(){
+  {
+    std::unique_lock<std::mutex> lock(queueMutex);
+    active = false;
+    has_work.notify_all();
+  }
+  for(auto &thread: threads){
+    thread->join();
+    thread.reset();
+  }
+}
+void PrimusSwapchain::present(const QueueItem &workItem){
+    const auto index = workItem.imgIndex;
+    images[index].render_copy_fence.await();
+    images[index].render_copy_fence.reset();
+    images[index].copyImageData({images[index].display_semaphore.sem});
+
+    TRACE_PROFILING_EVENT(index, "copy queued");
+
+    VkPresentInfoKHR p2 = {.sType=VK_STRUCTURE_TYPE_PRESENT_INFO_KHR};
+    p2.pSwapchains = &backend;
+    p2.swapchainCount = 1;
+    p2.pWaitSemaphores = &images[workItem.imgIndex].display_semaphore.sem;
+    p2.waitSemaphoreCount = 1;
+    p2.pImageIndices = &index;
+
+    {
+      std::unique_lock<std::mutex> lock(queueMutex);
+      has_work.wait(lock, [this,&workItem](){return &workItem == &in_progress.front();});
+      TRACE_PROFILING_EVENT(index, "submitting");
+      VkResult res = device_dispatch[GetKey(display_device)].QueuePresentKHR(display_queue, &p2);
+      if(res != VK_SUCCESS) {
+       TRACE("ERROR, Queue Present failed: " << res << "\n");
+      }
+      in_progress.pop_front();
+      has_work.notify_all();
+    }
+}
+void PrimusSwapchain::run(){
+  while(true){
+    QueueItem *workItem = nullptr;
+    {
+      std::unique_lock<std::mutex> lock(queueMutex);
+      has_work.wait(lock, [this](){return !active || work.size() > 0;});
+      if(!active) return;
+      in_progress.push_back(std::move(work.front()));
+      workItem = &in_progress.back();
+      work.pop_front();
+    }
+    present(*workItem);
+  }
+}
+
+VkResult VKAPI_CALL PrimusVK_QueueSubmit(VkQueue queue, uint32_t submitCount,
+                                                        const VkSubmitInfo* pSubmits,
+                                                        VkFence fence) {
+  scoped_lock lock(*device_instance_info[GetKey(queue)]->renderQueueMutex);
+  return device_dispatch[GetKey(queue)].QueueSubmit(queue, submitCount, pSubmits, fence);
+}
+
+VkResult VKAPI_CALL PrimusVK_QueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* pPresentInfo) {
+  scoped_lock lock(*device_instance_info[GetKey(queue)]->renderQueueMutex);
+  const auto start = std::chrono::steady_clock::now();
+  if(pPresentInfo->swapchainCount != 1){
+    TRACE("Warning, presenting with multiple swapchains not implemented, ignoring");
+  }
+
+  PrimusSwapchain *ch = reinterpret_cast<PrimusSwapchain*>(pPresentInfo->pSwapchains[0]);
+  double secs = std::chrono::duration_cast<std::chrono::duration<double>>(start - ch->lastPresent).count();
+  TRACE_PROFILING_EVENT(pPresentInfo->pImageIndices[0], "QueuePresent");
+  TRACE_PROFILING(" === Time between VkQueuePresents: " << secs << " -> " << 1/secs << " FPS");
+  ch->lastPresent = start;
+
+  ch->queue(queue, pPresentInfo);
+
+  return VK_SUCCESS;
+}
+
+void VKAPI_CALL PrimusVK_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, uint32_t* pQueueFamilyPropertyCount, VkQueueFamilyProperties* pQueueFamilyProperties) {
+  VkPhysicalDevice phy = physicalDevice;
+  instance_dispatch[GetKey(phy)].GetPhysicalDeviceQueueFamilyProperties(phy, pQueueFamilyPropertyCount, pQueueFamilyProperties);
+}
+#ifdef VK_USE_PLATFORM_XCB_KHR
+VkBool32 VKAPI_CALL PrimusVK_GetPhysicalDeviceXcbPresentationSupportKHR(
+    VkPhysicalDevice                            physicalDevice,
+    uint32_t                                    queueFamilyIndex,
+    xcb_connection_t*                           connection,
+    xcb_visualid_t                              visual_id){
+  VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display;
+  return instance_dispatch[GetKey(phy)].GetPhysicalDeviceXcbPresentationSupportKHR(phy, queueFamilyIndex, connection, visual_id);
+}
+#endif
+#ifdef VK_USE_PLATFORM_XLIB_KHR
+VkBool32 VKAPI_CALL PrimusVK_GetPhysicalDeviceXlibPresentationSupportKHR(
+    VkPhysicalDevice                            physicalDevice,
+    uint32_t                                    queueFamilyIndex,
+    Display*                                    dpy,
+    VisualID                                    visualID){
+  VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display;
+  return instance_dispatch[GetKey(phy)].GetPhysicalDeviceXlibPresentationSupportKHR(phy, queueFamilyIndex, dpy, visualID);
+}
+#endif
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+VkBool32 VKAPI_CALL PrimusVK_GetPhysicalDeviceWaylandPresentationSupportKHR(
+    VkPhysicalDevice                            physicalDevice,
+    uint32_t                                    queueFamilyIndex,
+    struct wl_display*                          display){
+  VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display;
+  return instance_dispatch[GetKey(phy)].GetPhysicalDeviceWaylandPresentationSupportKHR(phy, queueFamilyIndex, display);
+}
+#endif
+
+void VKAPI_CALL PrimusVK_QueueWaitIdle(VkQueue queue){
+  scoped_lock lock(*device_instance_info[GetKey(queue)]->renderQueueMutex);
+  device_dispatch[GetKey(queue)].QueueWaitIdle(queue);
+}
+
+void VKAPI_CALL PrimusVK_DeviceWaitIdle(VkDevice device){
+  auto &my_instance = *device_instance_info[GetKey(device)];
+  device_dispatch[GetKey(device)].DeviceWaitIdle(device);
+  auto display_gpu = my_instance.cod[GetKey(device)]->display_gpu;
+  device_dispatch[GetKey(display_gpu)].DeviceWaitIdle(display_gpu);
+}
+
+#include "primus_vk_forwarding_prototypes.h"
+
+///////////////////////////////////////////////////////////////////////////////////////////
+// Enumeration function
+
+VkResult VKAPI_CALL PrimusVK_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
+                                                                       VkLayerProperties *pProperties)
+{
+  if(pPropertyCount) *pPropertyCount = 1;
+
+  if(pProperties)
+  {
+    strcpy(pProperties->layerName, "VK_LAYER_PRIMUS_PrimusVK");
+    strcpy(pProperties->description, "Primus-vk - https://github.com/felixdoerre/primus_vk");
+    pProperties->implementationVersion = 1;
+    pProperties->specVersion = VK_API_VERSION_1_0;
+  }
+
+  return VK_SUCCESS;
+}
+
+VkResult VKAPI_CALL PrimusVK_EnumerateDeviceLayerProperties(
+    VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount, VkLayerProperties *pProperties)
+{
+  return PrimusVK_EnumerateInstanceLayerProperties(pPropertyCount, pProperties);
+}
+
+VkResult VKAPI_CALL PrimusVK_EnumerateInstanceExtensionProperties(
+    const char *pLayerName, uint32_t *pPropertyCount, VkExtensionProperties *pProperties)
+{
+  if(pLayerName == NULL || strcmp(pLayerName, "VK_LAYER_PRIMUS_PrimusVK"))
+    return VK_ERROR_LAYER_NOT_PRESENT;
+
+  // don't expose any extensions
+  if(pPropertyCount) *pPropertyCount = 0;
+  return VK_SUCCESS;
+}
+
+VkResult VKAPI_CALL PrimusVK_EnumerateDeviceExtensionProperties(
+                                     VkPhysicalDevice physicalDevice, const char *pLayerName,
+                                     uint32_t *pPropertyCount, VkExtensionProperties *pProperties)
+{
+  // pass through any queries that aren't to us
+  if(pLayerName == NULL || strcmp(pLayerName, "VK_LAYER_PRIMUS_PrimusVK"))
+  {
+    if(physicalDevice == VK_NULL_HANDLE)
+      return VK_SUCCESS;
+
+    scoped_lock l(global_lock);
+    return instance_dispatch[GetKey(physicalDevice)].EnumerateDeviceExtensionProperties(physicalDevice, pLayerName, pPropertyCount, pProperties);
+  }
+
+  // don't expose any extensions
+  if(pPropertyCount) *pPropertyCount = 0;
+  return VK_SUCCESS;
+}
+
+VkResult VKAPI_CALL PrimusVK_EnumeratePhysicalDevices(
+    VkInstance                                  instance,
+    uint32_t*                                   pPhysicalDeviceCount,
+    VkPhysicalDevice*                           pPhysicalDevices){
+  const int cnt = 1;
+  if(pPhysicalDevices == nullptr){
+    *pPhysicalDeviceCount = cnt;
+    return VK_SUCCESS;
+  }
+  scoped_lock l(global_lock);
+  InstanceInfo &info = instance_info[GetKey(instance)];
+  pPhysicalDevices[0] = info.render;
+  *pPhysicalDeviceCount = cnt;
+  return VK_SUCCESS;
+}
+VkResult VKAPI_CALL PrimusVK_EnumeratePhysicalDeviceGroups(
+    VkInstance                                  instance,
+    uint32_t*                                   pPhysicalDeviceGroupCount,
+    VkPhysicalDeviceGroupProperties*            pPhysicalDeviceGroupProperties) {
+  InstanceInfo &info = instance_info[GetKey(instance)];
+  *pPhysicalDeviceGroupCount = 1;
+  if(pPhysicalDeviceGroupProperties){
+    pPhysicalDeviceGroupProperties[0].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GROUP_PROPERTIES;
+    pPhysicalDeviceGroupProperties[0].pNext = nullptr;
+    pPhysicalDeviceGroupProperties[0].physicalDeviceCount = 1;
+    pPhysicalDeviceGroupProperties[0].physicalDevices[0] = info.render;
+    pPhysicalDeviceGroupProperties[0].subsetAllocation = VK_FALSE;
+  }
+  return VK_SUCCESS;
+}
+VkResult VKAPI_CALL PrimusVK_EnumeratePhysicalDeviceGroupsKHR(
+    VkInstance                                  instance,
+    uint32_t*                                   pPhysicalDeviceGroupCount,
+    VkPhysicalDeviceGroupProperties*            pPhysicalDeviceGroupProperties) {
+  return PrimusVK_EnumeratePhysicalDeviceGroups(instance, pPhysicalDeviceGroupCount, pPhysicalDeviceGroupProperties);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////////////////
+// GetProcAddr functions, entry points of the layer
+
+#define GETPROCADDR(func) if(!strcmp(pName, "vk" #func)) return (PFN_vkVoidFunction)&PrimusVK_##func;
+
+VK_LAYER_EXPORT PFN_vkVoidFunction VKAPI_CALL PrimusVK_GetDeviceProcAddr(VkDevice device, const char *pName)
+{
+  // device chain functions we intercept
+  GETPROCADDR(GetDeviceProcAddr);
+  GETPROCADDR(EnumerateDeviceLayerProperties);
+  GETPROCADDR(EnumerateDeviceExtensionProperties);
+  GETPROCADDR(CreateDevice);
+  GETPROCADDR(DestroyDevice);
+
+  GETPROCADDR(CreateSwapchainKHR);
+  GETPROCADDR(DestroySwapchainKHR);
+  GETPROCADDR(GetSwapchainImagesKHR);
+  GETPROCADDR(AcquireNextImageKHR);
+  GETPROCADDR(AcquireNextImage2KHR);
+  GETPROCADDR(GetSwapchainStatusKHR);
+  GETPROCADDR(QueuePresentKHR);
+
+  GETPROCADDR(QueueSubmit);
+  GETPROCADDR(DeviceWaitIdle);
+  GETPROCADDR(QueueWaitIdle);
+#define FORWARD(func) GETPROCADDR(func)
+#include "primus_vk_forwarding.h"
+#undef FORWARD
+  {
+    scoped_lock l(global_lock);
+    return device_dispatch[GetKey(device)].GetDeviceProcAddr(device, pName);
+  }
+}
+
+VK_LAYER_EXPORT PFN_vkVoidFunction VKAPI_CALL PrimusVK_GetInstanceProcAddr(VkInstance instance, const char *pName)
+{
+  // instance chain functions we intercept
+  GETPROCADDR(GetInstanceProcAddr);
+  GETPROCADDR(EnumeratePhysicalDevices);
+  GETPROCADDR(EnumeratePhysicalDeviceGroups);
+  GETPROCADDR(EnumeratePhysicalDeviceGroupsKHR);
+  GETPROCADDR(EnumerateInstanceLayerProperties);
+  GETPROCADDR(EnumerateInstanceExtensionProperties);
+  GETPROCADDR(CreateInstance);
+  GETPROCADDR(DestroyInstance);
+
+  // device chain functions we intercept
+  GETPROCADDR(GetDeviceProcAddr);
+  GETPROCADDR(EnumerateDeviceLayerProperties);
+  GETPROCADDR(EnumerateDeviceExtensionProperties);
+  GETPROCADDR(CreateDevice);
+  GETPROCADDR(DestroyDevice);
+
+  GETPROCADDR(CreateSwapchainKHR);
+  GETPROCADDR(DestroySwapchainKHR);
+  GETPROCADDR(GetSwapchainImagesKHR);
+  GETPROCADDR(AcquireNextImageKHR);
+  GETPROCADDR(AcquireNextImage2KHR);
+  GETPROCADDR(GetSwapchainStatusKHR);
+  GETPROCADDR(QueuePresentKHR);
+
+  GETPROCADDR(QueueSubmit);
+  GETPROCADDR(DeviceWaitIdle);
+  GETPROCADDR(QueueWaitIdle);
+  GETPROCADDR(GetPhysicalDeviceQueueFamilyProperties);
+#ifdef VK_USE_PLATFORM_XCB_KHR
+  GETPROCADDR(GetPhysicalDeviceXcbPresentationSupportKHR);
+#endif
+#ifdef VK_USE_PLATFORM_XLIB_KHR
+  GETPROCADDR(GetPhysicalDeviceXlibPresentationSupportKHR);
+#endif
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+  GETPROCADDR(GetPhysicalDeviceWaylandPresentationSupportKHR);
+#endif
+#define FORWARD(func) GETPROCADDR(func)
+#include "primus_vk_forwarding.h"
+#undef FORWARD
+  {
+    scoped_lock l(global_lock);
+    return instance_dispatch[GetKey(instance)].GetInstanceProcAddr(instance, pName);
+  }
+}
diff --git a/primus_vk.json b/primus_vk.json
new file mode 100644 (file)
index 0000000..7b0771c
--- /dev/null
@@ -0,0 +1,21 @@
+{
+  "file_format_version" : "1.0.0",
+  "layer" : {
+    "name": "VK_LAYER_PRIMUS_PrimusVK",
+    "type": "GLOBAL",
+    "library_path": "libprimus_vk.so",
+    "api_version": "1.1.0",
+    "implementation_version": "1",
+    "description": "Primus-vk - https://github.com/felixdoerre/primus_vk",
+    "functions": {
+      "vkGetInstanceProcAddr": "PrimusVK_GetInstanceProcAddr",
+      "vkGetDeviceProcAddr": "PrimusVK_GetDeviceProcAddr"
+    },
+    "enable_environment": {
+      "ENABLE_PRIMUS_LAYER": "1"
+    },
+    "disable_environment": {
+      "DISABLE_PRIMUS_LAYER": "1"
+    }
+  }
+}
diff --git a/primus_vk_diag.cpp b/primus_vk_diag.cpp
new file mode 100644 (file)
index 0000000..dd21154
--- /dev/null
@@ -0,0 +1,232 @@
+#include <GL/glx.h>
+#include <GL/gl.h>
+#include <unistd.h>
+#include <iostream>
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include <vulkan/vulkan.h>
+#include <vector>
+#include <memory>
+
+#include <dlfcn.h>
+
+#define GLX_CONTEXT_MAJOR_VERSION_ARB       0x2091
+#define GLX_CONTEXT_MINOR_VERSION_ARB       0x2092
+typedef GLXContext (*glXCreateContextAttribsARBProc)(Display*, GLXFBConfig, GLXContext, Bool, const int*);
+
+class VulkanContext {
+  VkInstance instance;
+public:
+  VulkanContext();
+  VulkanContext(const VulkanContext&) = delete;
+  ~VulkanContext();
+};
+#define VK_CHECK()     if(reply != VK_SUCCESS){ \
+      throw std::runtime_error("Vulkan operation failed with code: " + std::to_string(reply)); \
+    }
+
+
+#define GL_FUNCTIONS GL_FUNCTION(glClearColor);\
+  GL_FUNCTION(glClear);\
+  GL_FUNCTION(glXChooseFBConfig);\
+  GL_FUNCTION(glXCreateContext);\
+  GL_FUNCTION(glXDestroyContext);\
+  GL_FUNCTION(glXGetProcAddress);\
+  GL_FUNCTION(glXGetVisualFromFBConfig);\
+  GL_FUNCTION(glXMakeCurrent);\
+  GL_FUNCTION(glXQueryExtensionsString);\
+  GL_FUNCTION(glXSwapBuffers);\
+
+
+struct GLLib {
+#define GL_FUNCTION(x) decltype(&x) ptr_##x;
+  GL_FUNCTIONS
+#undef GL_FUNCTION
+  GLLib() {
+    void* handle = dlopen("libGL.so.1", RTLD_NOW|RTLD_GLOBAL);
+#define GL_FUNCTION(x) ptr_##x = (decltype(&x)) dlsym(handle, #x)
+    GL_FUNCTIONS
+#undef GL_FUNCTION
+  }
+};
+
+const auto self = std::string{"PrimusVK-diagnostic: "};
+
+VulkanContext::VulkanContext(){
+  std::cout << self << "Creating Vulkan instance" << std::endl;
+  VkInstanceCreateInfo instanceCreateInfo = {};
+  instanceCreateInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
+  instanceCreateInfo.pNext = NULL;
+  instanceCreateInfo.pApplicationInfo = nullptr;
+  auto reply = vkCreateInstance(&instanceCreateInfo, nullptr, &instance);
+  VK_CHECK();
+
+  uint32_t gpuCount;
+  reply = vkEnumeratePhysicalDevices(instance, &gpuCount, nullptr);
+  VK_CHECK();
+  // Enumerate devices
+  std::vector<VkPhysicalDevice> physicalDevices(gpuCount);
+  vkEnumeratePhysicalDevices(instance, &gpuCount, physicalDevices.data());
+  VK_CHECK();
+  for ( auto &device : physicalDevices) {
+    VkPhysicalDeviceProperties deviceProperties;
+    vkGetPhysicalDeviceProperties(device, &deviceProperties);
+    std::cout << self << "Device: " << deviceProperties.deviceName << std::endl;
+    std::cout << self << " Type: " << deviceProperties.deviceType << std::endl;
+    std::cout << self << " API: " << (deviceProperties.apiVersion >> 22) << "." << ((deviceProperties.apiVersion >> 12) & 0x3ff) << "." << (deviceProperties.apiVersion & 0xfff) << std::endl;
+    uint32_t queues;
+    vkGetPhysicalDeviceQueueFamilyProperties(device, &queues, nullptr);
+    std::vector<VkQueueFamilyProperties> data(queues);
+    vkGetPhysicalDeviceQueueFamilyProperties(device, &queues, data.data());
+    std::cout << self << "   Queues: " << queues << std::endl;
+
+    VkDeviceQueueCreateInfo queue1{};
+    queue1.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
+    queue1.queueFamilyIndex = 0;
+    queue1.queueCount = 1;
+    float prio = 1;
+    queue1.pQueuePriorities = &prio;
+    VkDeviceCreateInfo createInfo{};
+    createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
+    createInfo.pQueueCreateInfos = &queue1;
+    createInfo.queueCreateInfoCount = 1;
+    VkDevice dev;
+    VkResult res = vkCreateDevice(device, &createInfo, nullptr, &dev);
+    if(res == VK_SUCCESS) {
+      std::cout << "Device creation succeeded\n";
+    } else {
+      std::cout << "Device creation failed: " << res << "\n";
+    }
+    vkDestroyDevice(dev, nullptr);
+  }
+}
+VulkanContext::~VulkanContext(){
+  std::cout << self << "Destroying Vulkan: " << instance << std::endl;
+  vkDestroyInstance(instance, nullptr);
+}
+
+class XWindowContext;
+class GLContext {
+  GLXContext ctx;
+  XWindowContext &data;
+  std::shared_ptr<GLLib> gl;
+public:
+  GLContext(XWindowContext &data);
+  void drawSample();
+  ~GLContext();
+};
+
+class XWindowContext {
+public:
+  Display *display;
+  XVisualInfo *vi;
+
+  Window win;
+  GLXFBConfig fbconfig;
+  std::shared_ptr<GLLib> gl = std::make_shared<GLLib>();
+
+  XWindowContext(Display *display): display(display){
+    const char *extensions = gl->ptr_glXQueryExtensionsString(display, DefaultScreen(display));
+    std::cout << self << extensions << std::endl;
+
+    static int visual_attribs[] =
+    {
+        GLX_RENDER_TYPE, GLX_RGBA_BIT,
+        GLX_DRAWABLE_TYPE, GLX_WINDOW_BIT,
+        GLX_DOUBLEBUFFER, true,
+        GLX_RED_SIZE, 1,
+        GLX_GREEN_SIZE, 1,
+        GLX_BLUE_SIZE, 1,
+        None
+     };
+
+    std::cout << self << "Getting framebuffer config" << std::endl;
+    int fbcount;
+    GLXFBConfig *fbc = gl->ptr_glXChooseFBConfig(display, DefaultScreen(display), visual_attribs, &fbcount);
+    if (!fbc) {
+      throw std::runtime_error("Failed to retrieve a framebuffer config");
+    }
+    fbconfig = fbc[0];
+
+    vi = gl->ptr_glXGetVisualFromFBConfig(display, fbconfig);
+
+    XSetWindowAttributes swa;
+    swa.colormap = XCreateColormap(display, RootWindow(display, vi->screen), vi->visual, AllocNone);
+    swa.border_pixel = 0;
+    swa.event_mask = StructureNotifyMask;
+
+    std::cout << self << "Creating window" << std::endl;
+    win = XCreateWindow(display, RootWindow(display, vi->screen), 0, 0, 100, 100, 0, vi->depth, InputOutput, vi->visual, CWBorderPixel|CWColormap|CWEventMask, &swa);
+    if (!win) {
+      throw std::runtime_error("Failed to create window.");
+    }
+    XMapWindow(display, win);
+  }
+};
+
+GLContext::GLContext(XWindowContext &data): data(data), gl(data.gl){
+  // Create an oldstyle context first, to get the correct function pointer for glXCreateContextAttribsARB
+  GLXContext ctx_old = gl->ptr_glXCreateContext(data.display, data.vi, 0, GL_TRUE);
+  const auto glXCreateContextAttribsARB =  (glXCreateContextAttribsARBProc)gl->ptr_glXGetProcAddress((const GLubyte*)"glXCreateContextAttribsARB");
+  gl->ptr_glXMakeCurrent(data.display, 0, 0);
+  gl->ptr_glXDestroyContext(data.display, ctx_old);
+
+  if (glXCreateContextAttribsARB == NULL) {
+    throw std::runtime_error("glXCreateContextAttribsARB entry point not found. Aborting.");
+  }
+
+  static int context_attribs[] =
+    {
+      GLX_CONTEXT_MAJOR_VERSION_ARB, 3,
+      GLX_CONTEXT_MINOR_VERSION_ARB, 0,
+      None
+    };
+
+  std::cout << self << "Creating context" << std::endl;
+  ctx = glXCreateContextAttribsARB(data.display, data.fbconfig, NULL, true, context_attribs);
+  if (!ctx) {
+      throw std::runtime_error("Failed to create GL3 context.");
+  }
+
+  gl->ptr_glXMakeCurrent(data.display, data.win, ctx);
+}
+void GLContext::drawSample(){
+  std::cout << self << "Rendering with GL" << std::endl;
+  gl->ptr_glClearColor (0, 0.5, 1, 1);
+  gl->ptr_glClear (GL_COLOR_BUFFER_BIT);
+  gl->ptr_glXSwapBuffers (data.display, data.win);
+
+  auto toSleep = timespec{};
+  toSleep.tv_nsec=200000000;
+  nanosleep(&toSleep, nullptr);
+
+  gl->ptr_glClearColor (1, 0.5, 0, 1);
+  gl->ptr_glClear (GL_COLOR_BUFFER_BIT);
+  gl->ptr_glXSwapBuffers (data.display, data.win);
+
+  nanosleep(&toSleep, nullptr);
+}
+GLContext::~GLContext(){
+  std::cout << self << "Destroying GL context" << std::endl;
+  gl->ptr_glXMakeCurrent(data.display, 0, 0);
+  gl->ptr_glXDestroyContext(data.display, ctx);
+}
+
+int main (int argc, char ** argv) {
+  Display *display = XOpenDisplay(0);
+  for(int i = 1; i < argc; i++){
+    std::string arg = argv[i];
+    if(arg == "gl"){
+      std::cout << self << "Loading GL." << std::endl;
+      auto winContext = std::make_shared<XWindowContext>(display);
+      GLContext context = GLContext{*winContext};
+      context.drawSample();
+    } else if(arg == "vulkan") {
+      VulkanContext context;
+    }
+  }
+  return 0;
+}
diff --git a/primus_vk_diag.sh b/primus_vk_diag.sh
new file mode 100755 (executable)
index 0000000..79d0e5b
--- /dev/null
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+export VK_LOADER_DEBUG=warn,error,info
+export LD_DEBUG=libs
+export ENABLE_PRIMUS_LAYER=0
+
+function step_0 {
+    printf "===== Round 0: Vulkaninfo =====\n"
+    if which vulkaninfo >> /dev/null; then
+       printf "==== Without Optirun ====\n"
+       vulkaninfo 2>&1
+       printf "==== With Optirun ====\n"
+       optirun vulkaninfo 2>&1
+    else
+       printf "ERROR: Vulkaninfo is missing. Please install for more diagnostic data\n"
+    fi
+}
+function step_1 {
+    printf "===== Round 1: Plain Vulkan =====\n"
+    ./primus_vk_diag vulkan 2>&1
+}
+function step_2 {
+    printf "===== Round 2: Vulkan with optirun =====\n"
+    optirun ./primus_vk_diag vulkan 2>&1
+}
+function step_3 {
+    printf "===== Round 3: Vulkan with optirun and Primus layer =====\n"
+    ENABLE_PRIMUS_LAYER=1 optirun ./primus_vk_diag vulkan 2>&1
+}
+function step_4 {
+    printf "===== Round 4: Mixed Vulkan and OpenGL with Primus layer =====\n"
+    ENABLE_PRIMUS_LAYER=1 optirun ./primus_vk_diag vulkan gl vulkan 2>&1
+}
+function step_5 {
+    printf "===== Round 5: Mixed Vulkan and OpenGL with Primus layer while forcing primus-libGLa =====\n"
+    ENABLE_PRIMUS_LAYER=1 optirun env PRIMUS_libGLa=/usr/lib/x86_64-linux-gnu/nvidia/current/libGLX_nvidia.so.0 ./primus_vk_diag vulkan gl vulkan 2>&1
+}
+
+if [[ $# == 0 ]]; then
+    step_0
+    step_1
+    step_2
+    step_3
+    step_4
+    step_5
+else
+    for arg in "$@"; do
+       if [[ $arg == [0-5] ]]; then
+           step_$arg
+       else
+           printf "Invalid argument\n" >&2
+       fi
+    done
+fi
diff --git a/primus_vk_forwarding.h b/primus_vk_forwarding.h
new file mode 100644 (file)
index 0000000..ba066e2
--- /dev/null
@@ -0,0 +1,17 @@
+
+      FORWARD(GetPhysicalDeviceSurfaceSupportKHR);
+    
+      FORWARD(GetPhysicalDeviceSurfaceCapabilitiesKHR);
+    
+      FORWARD(GetPhysicalDeviceSurfaceFormatsKHR);
+    
+      FORWARD(GetPhysicalDeviceSurfacePresentModesKHR);
+    
+      FORWARD(GetPhysicalDeviceSurfaceCapabilities2EXT);
+    
+      FORWARD(GetPhysicalDevicePresentRectanglesKHR);
+    
+      FORWARD(GetPhysicalDeviceSurfaceCapabilities2KHR);
+    
+      FORWARD(GetPhysicalDeviceSurfaceFormats2KHR);
+    
diff --git a/primus_vk_forwarding_prototypes.h b/primus_vk_forwarding_prototypes.h
new file mode 100644 (file)
index 0000000..dc3cec1
--- /dev/null
@@ -0,0 +1,61 @@
+VkResult VKAPI_CALL PrimusVK_GetPhysicalDeviceSurfaceSupportKHR(
+    VkPhysicalDevice physicalDevice,
+    uint32_t queueFamilyIndex,
+    VkSurfaceKHR surface,
+    VkBool32* pSupported) {
+  VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display;
+  return instance_dispatch[GetKey(phy)].GetPhysicalDeviceSurfaceSupportKHR(phy, queueFamilyIndex, surface, pSupported);
+}          
+    VkResult VKAPI_CALL PrimusVK_GetPhysicalDeviceSurfaceCapabilitiesKHR(
+    VkPhysicalDevice physicalDevice,
+    VkSurfaceKHR surface,
+    VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) {
+  VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display;
+  return instance_dispatch[GetKey(phy)].GetPhysicalDeviceSurfaceCapabilitiesKHR(phy, surface, pSurfaceCapabilities);
+}          
+    VkResult VKAPI_CALL PrimusVK_GetPhysicalDeviceSurfaceFormatsKHR(
+    VkPhysicalDevice physicalDevice,
+    VkSurfaceKHR surface,
+    uint32_t* pSurfaceFormatCount,
+    VkSurfaceFormatKHR* pSurfaceFormats) {
+  VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display;
+  return instance_dispatch[GetKey(phy)].GetPhysicalDeviceSurfaceFormatsKHR(phy, surface, pSurfaceFormatCount, pSurfaceFormats);
+}          
+    VkResult VKAPI_CALL PrimusVK_GetPhysicalDeviceSurfacePresentModesKHR(
+    VkPhysicalDevice physicalDevice,
+    VkSurfaceKHR surface,
+    uint32_t* pPresentModeCount,
+    VkPresentModeKHR* pPresentModes) {
+  VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display;
+  return instance_dispatch[GetKey(phy)].GetPhysicalDeviceSurfacePresentModesKHR(phy, surface, pPresentModeCount, pPresentModes);
+}          
+    VkResult VKAPI_CALL PrimusVK_GetPhysicalDeviceSurfaceCapabilities2EXT(
+    VkPhysicalDevice physicalDevice,
+    VkSurfaceKHR surface,
+    VkSurfaceCapabilities2EXT* pSurfaceCapabilities) {
+  VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display;
+  return instance_dispatch[GetKey(phy)].GetPhysicalDeviceSurfaceCapabilities2EXT(phy, surface, pSurfaceCapabilities);
+}          
+    VkResult VKAPI_CALL PrimusVK_GetPhysicalDevicePresentRectanglesKHR(
+    VkPhysicalDevice physicalDevice,
+    VkSurfaceKHR surface,
+    uint32_t* pRectCount,
+    VkRect2D* pRects) {
+  VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display;
+  return instance_dispatch[GetKey(phy)].GetPhysicalDevicePresentRectanglesKHR(phy, surface, pRectCount, pRects);
+}          
+    VkResult VKAPI_CALL PrimusVK_GetPhysicalDeviceSurfaceCapabilities2KHR(
+    VkPhysicalDevice physicalDevice,
+    const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo,
+    VkSurfaceCapabilities2KHR* pSurfaceCapabilities) {
+  VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display;
+  return instance_dispatch[GetKey(phy)].GetPhysicalDeviceSurfaceCapabilities2KHR(phy, pSurfaceInfo, pSurfaceCapabilities);
+}          
+    VkResult VKAPI_CALL PrimusVK_GetPhysicalDeviceSurfaceFormats2KHR(
+    VkPhysicalDevice physicalDevice,
+    const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo,
+    uint32_t* pSurfaceFormatCount,
+    VkSurfaceFormat2KHR* pSurfaceFormats) {
+  VkPhysicalDevice phy = instance_info[GetKey(physicalDevice)].display;
+  return instance_dispatch[GetKey(phy)].GetPhysicalDeviceSurfaceFormats2KHR(phy, pSurfaceInfo, pSurfaceFormatCount, pSurfaceFormats);
+}          
diff --git a/pvkrun.in.sh b/pvkrun.in.sh
new file mode 100644 (file)
index 0000000..4767033
--- /dev/null
@@ -0,0 +1,2 @@
+#!/bin/sh
+ENABLE_PRIMUS_LAYER=1 exec primusrun "$@"
diff --git a/surface_forwarding_functions.xslt b/surface_forwarding_functions.xslt
new file mode 100644 (file)
index 0000000..f9a4c8b
--- /dev/null
@@ -0,0 +1,14 @@
+<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<xsl:template match="/registry">
+  <xsl:for-each select="commands/command">
+    <xsl:variable name="surface" select="param[type = 'VkSurfaceKHR'][not(contains(text(), '*'))]"/>
+    <xsl:variable name="surfaceInfo" select="param[type = 'VkPhysicalDeviceSurfaceInfo2KHR']"/>
+    <xsl:variable name="dev" select="param[type = 'VkPhysicalDevice']"/>
+    <xsl:if test="($surface/text() != '' or $surfaceInfo/text() != '') and $dev/text() != ''">
+      FORWARD(<xsl:value-of select="substring(proto/name,3)"/>);
+    </xsl:if>
+  </xsl:for-each>      
+</xsl:template>
+</xsl:stylesheet> 
diff --git a/surface_forwarding_prototypes.xslt b/surface_forwarding_prototypes.xslt
new file mode 100644 (file)
index 0000000..53aa0bf
--- /dev/null
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<xsl:template match="/registry">
+  <xsl:for-each select="commands/command">
+    <xsl:variable name="surface" select="param[type = 'VkSurfaceKHR'][not(contains(text(), '*'))]"/>
+    <xsl:variable name="surfaceInfo" select="param[type = 'VkPhysicalDeviceSurfaceInfo2KHR']"/>
+    <xsl:variable name="dev" select="param[type = 'VkPhysicalDevice']"/>
+    <xsl:if test="($surface/text() != '' or $surfaceInfo/text() != '') and $dev/text() != ''">
+<xsl:value-of select="proto/type"/> VKAPI_CALL PrimusVK_<xsl:value-of select="substring(proto/name,3)"/>(
+<xsl:for-each select="param">
+  <xsl:text>    </xsl:text><xsl:value-of select="."/>
+  <xsl:if test="./following-sibling::param/text() != ''">,<xsl:text>
+</xsl:text></xsl:if>
+</xsl:for-each>) {
+  VkPhysicalDevice phy = instance_info[GetKey(<xsl:value-of select="$dev/name"/>)].display;
+  return instance_dispatch[GetKey(phy)].<xsl:value-of select="substring(proto/name,3)"/>(phy<xsl:for-each select="param[type != 'VkPhysicalDevice']">, <xsl:value-of select="name"/></xsl:for-each>);
+}          
+    </xsl:if>
+  </xsl:for-each>
+</xsl:template>
+</xsl:stylesheet>