|
|
@ -6,13 +6,14 @@ |
|
|
|
#include <errno.h> |
|
|
|
#include <errno.h> |
|
|
|
|
|
|
|
|
|
|
|
#include <vulkan/vulkan.h> |
|
|
|
#include <vulkan/vulkan.h> |
|
|
|
|
|
|
|
#include <vulkan/vulkan_core.h> |
|
|
|
|
|
|
|
|
|
|
|
// check for half precision floating point support, for x86 this is equivalent to
|
|
|
|
// check for half precision floating point support, for x86 this is equivalent to
|
|
|
|
// checking for SSE2
|
|
|
|
// checking for SSE2
|
|
|
|
#define SUPPORTS_NATIVE_FP16 (__x86_64__ == 1 && __SSE2__ == 1) |
|
|
|
#define SUPPORTS_NATIVE_FP16 (__x86_64__ == 1 && __SSE2__ == 1) |
|
|
|
// print debug messages
|
|
|
|
// print debug messages
|
|
|
|
#define DEBUG 1 |
|
|
|
#define DEBUG 1 |
|
|
|
#define VERBOSE 0 |
|
|
|
#define VERBOSE 0 |
|
|
|
|
|
|
|
|
|
|
|
// define half precision floating point
|
|
|
|
// define half precision floating point
|
|
|
|
#if SUPPORTS_NATIVE_FP16 |
|
|
|
#if SUPPORTS_NATIVE_FP16 |
|
|
@ -97,13 +98,14 @@ VkInstance vk_init(void) |
|
|
|
VkInstance vk_instance = VK_NULL_HANDLE; |
|
|
|
VkInstance vk_instance = VK_NULL_HANDLE; |
|
|
|
|
|
|
|
|
|
|
|
VkApplicationInfo vk_appinfo = { |
|
|
|
VkApplicationInfo vk_appinfo = { |
|
|
|
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, |
|
|
|
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, |
|
|
|
.pNext = NULL, |
|
|
|
.pNext = NULL, |
|
|
|
.pApplicationName = __FILE__, |
|
|
|
.pApplicationName = __FILE__, |
|
|
|
.applicationVersion = VK_MAKE_VERSION(0, 1, 0), |
|
|
|
.applicationVersion = VK_MAKE_VERSION(0, 1, 0), |
|
|
|
.pEngineName = "no engine", |
|
|
|
.pEngineName = "no engine", |
|
|
|
.engineVersion = VK_MAKE_VERSION(0, 0, 0), |
|
|
|
.engineVersion = VK_MAKE_VERSION(0, 0, 0), |
|
|
|
.apiVersion = VK_API_VERSION_1_3, |
|
|
|
.apiVersion = |
|
|
|
|
|
|
|
VK_API_VERSION_1_2, // api version 1.2 is more widely available
|
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
vk_enumerate_instance_extensions(); |
|
|
|
vk_enumerate_instance_extensions(); |
|
|
@ -118,12 +120,12 @@ VkInstance vk_init(void) |
|
|
|
(uint32_t)(sizeof(vk_instance_extensions) / sizeof(char *)); |
|
|
|
(uint32_t)(sizeof(vk_instance_extensions) / sizeof(char *)); |
|
|
|
|
|
|
|
|
|
|
|
VkInstanceCreateInfo vk_instanceinfo = { |
|
|
|
VkInstanceCreateInfo vk_instanceinfo = { |
|
|
|
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, |
|
|
|
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, |
|
|
|
.pApplicationInfo = &vk_appinfo, |
|
|
|
.pApplicationInfo = &vk_appinfo, |
|
|
|
.flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR, |
|
|
|
.flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR, |
|
|
|
.enabledExtensionCount = vk_instance_extensions_no, |
|
|
|
.enabledExtensionCount = vk_instance_extensions_no, |
|
|
|
.ppEnabledExtensionNames = vk_instance_extensions, |
|
|
|
.ppEnabledExtensionNames = vk_instance_extensions, |
|
|
|
.enabledLayerCount = 0, |
|
|
|
.enabledLayerCount = 0, |
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
int e = 0; |
|
|
|
int e = 0; |
|
|
@ -153,9 +155,10 @@ void vk_destroy(VkInstance vk_instance) |
|
|
|
|
|
|
|
|
|
|
|
VkPhysicalDevice vk_physical_device_get(VkInstance vk_instance) |
|
|
|
VkPhysicalDevice vk_physical_device_get(VkInstance vk_instance) |
|
|
|
{ |
|
|
|
{ |
|
|
|
|
|
|
|
// get the physical devices list
|
|
|
|
VkPhysicalDevice vk_phydev = VK_NULL_HANDLE; |
|
|
|
VkPhysicalDevice vk_phydev = VK_NULL_HANDLE; |
|
|
|
|
|
|
|
|
|
|
|
uint32_t vk_phydevs_no = 0; |
|
|
|
uint32_t vk_phydevs_no = 0; |
|
|
|
VkPhysicalDevice *vk_phydevs; |
|
|
|
VkPhysicalDevice *vk_phydevs; |
|
|
|
vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, NULL); |
|
|
|
vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, NULL); |
|
|
|
|
|
|
|
|
|
|
@ -171,24 +174,38 @@ VkPhysicalDevice vk_physical_device_get(VkInstance vk_instance) |
|
|
|
|
|
|
|
|
|
|
|
vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, vk_phydevs); |
|
|
|
vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, vk_phydevs); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// print out information about each device
|
|
|
|
printf("Available Physical Devices: \n"); |
|
|
|
printf("Available Physical Devices: \n"); |
|
|
|
for (uint32_t i = 0; i < vk_phydevs_no; i++) { |
|
|
|
for (uint32_t i = 0; i < vk_phydevs_no; i++) { |
|
|
|
VkPhysicalDevice device = vk_phydevs[i]; |
|
|
|
VkPhysicalDevice dev = vk_phydevs[i]; |
|
|
|
VkPhysicalDeviceProperties device_properties; |
|
|
|
VkPhysicalDeviceProperties dev_properties; |
|
|
|
VkPhysicalDeviceFeatures device_features; |
|
|
|
VkPhysicalDeviceFeatures dev_features; |
|
|
|
|
|
|
|
VkPhysicalDeviceMemoryProperties dev_memory; |
|
|
|
|
|
|
|
|
|
|
|
vkGetPhysicalDeviceProperties(device, &device_properties); |
|
|
|
vkGetPhysicalDeviceProperties(dev, &dev_properties); |
|
|
|
vkGetPhysicalDeviceFeatures(device, &device_features); |
|
|
|
vkGetPhysicalDeviceFeatures(dev, &dev_features); |
|
|
|
|
|
|
|
vkGetPhysicalDeviceMemoryProperties(dev, &dev_memory); |
|
|
|
|
|
|
|
|
|
|
|
printf( |
|
|
|
printf( |
|
|
|
"\tDevice %d: %s, Discrete: %s\n", |
|
|
|
"\tDevice %d: %s, Discrete: %s\n", |
|
|
|
i, |
|
|
|
i, |
|
|
|
device_properties.deviceName, |
|
|
|
dev_properties.deviceName, |
|
|
|
device_properties.deviceType == |
|
|
|
dev_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU |
|
|
|
VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU |
|
|
|
|
|
|
|
? "true" |
|
|
|
? "true" |
|
|
|
: "false" |
|
|
|
: "false" |
|
|
|
); |
|
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (unsigned x = 0; x < dev_memory.memoryHeapCount; x++) { |
|
|
|
|
|
|
|
uint64_t mem_size = dev_memory.memoryHeaps[x].size; |
|
|
|
|
|
|
|
uint32_t mem_flags = dev_memory.memoryHeaps[x].flags; |
|
|
|
|
|
|
|
char mem_local = mem_flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; |
|
|
|
|
|
|
|
printf( |
|
|
|
|
|
|
|
"\t\tHeap %.2d: local: %d, size: %.3f MiB\n", |
|
|
|
|
|
|
|
x, |
|
|
|
|
|
|
|
mem_local, |
|
|
|
|
|
|
|
(float)mem_size / (1024.0 * 1024.0) |
|
|
|
|
|
|
|
); |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// TODO: find the most suitable physical device, but for now every vulkan
|
|
|
|
// TODO: find the most suitable physical device, but for now every vulkan
|
|
|
@ -210,9 +227,9 @@ void vk_physical_device_destroy(VkPhysicalDevice vk_phydev) |
|
|
|
// returns a negative index on error
|
|
|
|
// returns a negative index on error
|
|
|
|
int vk_device_compute_queue_index(VkPhysicalDevice vk_phydev) |
|
|
|
int vk_device_compute_queue_index(VkPhysicalDevice vk_phydev) |
|
|
|
{ |
|
|
|
{ |
|
|
|
uint32_t vk_qfamilies_no = 0; |
|
|
|
uint32_t vk_qfamilies_no = 0; |
|
|
|
VkQueueFamilyProperties *vk_qfamilies; |
|
|
|
VkQueueFamilyProperties *vk_qfamilies; |
|
|
|
int supports = -1; |
|
|
|
int supports = -1; |
|
|
|
|
|
|
|
|
|
|
|
vkGetPhysicalDeviceQueueFamilyProperties(vk_phydev, &vk_qfamilies_no, NULL); |
|
|
|
vkGetPhysicalDeviceQueueFamilyProperties(vk_phydev, &vk_qfamilies_no, NULL); |
|
|
|
|
|
|
|
|
|
|
@ -239,16 +256,16 @@ int vk_device_compute_queue_index(VkPhysicalDevice vk_phydev) |
|
|
|
VkDevice vk_logical_device_create(VkPhysicalDevice vk_phydev, int qfamily_idx) |
|
|
|
VkDevice vk_logical_device_create(VkPhysicalDevice vk_phydev, int qfamily_idx) |
|
|
|
{ |
|
|
|
{ |
|
|
|
VkResult res; |
|
|
|
VkResult res; |
|
|
|
VkDevice vk_logdev = VK_NULL_HANDLE; |
|
|
|
VkDevice vk_logdev = VK_NULL_HANDLE; |
|
|
|
float vk_queue_priority = 1.0f; |
|
|
|
float vk_queue_priority = 1.0f; |
|
|
|
|
|
|
|
|
|
|
|
// specify which command queues to use for the physical device
|
|
|
|
// specify which command queues to use for the physical device
|
|
|
|
VkDeviceQueueCreateInfo vk_queueinfo = { |
|
|
|
VkDeviceQueueCreateInfo vk_queueinfo = { |
|
|
|
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, |
|
|
|
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, |
|
|
|
.pNext = NULL, |
|
|
|
.pNext = NULL, |
|
|
|
.flags = 0, |
|
|
|
.flags = 0, |
|
|
|
.queueFamilyIndex = qfamily_idx, |
|
|
|
.queueFamilyIndex = qfamily_idx, |
|
|
|
.queueCount = 1, |
|
|
|
.queueCount = 1, |
|
|
|
.pQueuePriorities = &vk_queue_priority, |
|
|
|
.pQueuePriorities = &vk_queue_priority, |
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
@ -261,14 +278,14 @@ VkDevice vk_logical_device_create(VkPhysicalDevice vk_phydev, int qfamily_idx) |
|
|
|
// FIXME: here validation layers are ignored but it is still better to define
|
|
|
|
// FIXME: here validation layers are ignored but it is still better to define
|
|
|
|
// them for compatibility
|
|
|
|
// them for compatibility
|
|
|
|
VkDeviceCreateInfo vk_createinfo = { |
|
|
|
VkDeviceCreateInfo vk_createinfo = { |
|
|
|
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, |
|
|
|
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, |
|
|
|
.pQueueCreateInfos = &vk_queueinfo, |
|
|
|
.pQueueCreateInfos = &vk_queueinfo, |
|
|
|
.queueCreateInfoCount = 1, |
|
|
|
.queueCreateInfoCount = 1, |
|
|
|
.pEnabledFeatures = &vk_phydev_features, |
|
|
|
.pEnabledFeatures = &vk_phydev_features, |
|
|
|
.ppEnabledExtensionNames = NULL, |
|
|
|
.ppEnabledExtensionNames = NULL, |
|
|
|
.enabledExtensionCount = 0, |
|
|
|
.enabledExtensionCount = 0, |
|
|
|
.ppEnabledLayerNames = NULL, |
|
|
|
.ppEnabledLayerNames = NULL, |
|
|
|
.enabledLayerCount = 0, |
|
|
|
.enabledLayerCount = 0, |
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
res = vkCreateDevice(vk_phydev, &vk_createinfo, NULL, &vk_logdev); |
|
|
|
res = vkCreateDevice(vk_phydev, &vk_createinfo, NULL, &vk_logdev); |
|
|
@ -313,7 +330,7 @@ int main(void) |
|
|
|
exit(EXIT_FAILURE); |
|
|
|
exit(EXIT_FAILURE); |
|
|
|
} |
|
|
|
} |
|
|
|
VkPhysicalDevice vk_phydev = vk_physical_device_get(vk_instance); |
|
|
|
VkPhysicalDevice vk_phydev = vk_physical_device_get(vk_instance); |
|
|
|
int qfamily_idx = vk_device_compute_queue_index(vk_phydev); |
|
|
|
int qfamily_idx = vk_device_compute_queue_index(vk_phydev); |
|
|
|
if (qfamily_idx < 0) { |
|
|
|
if (qfamily_idx < 0) { |
|
|
|
err("The device does not support compute queues\n"); |
|
|
|
err("The device does not support compute queues\n"); |
|
|
|
exit(EXIT_FAILURE); |
|
|
|
exit(EXIT_FAILURE); |
|
|
|