|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdarg.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <errno.h>
|
|
|
|
|
|
|
|
#include <vulkan/vulkan.h>
|
|
|
|
#include <vulkan/vulkan_core.h>
|
|
|
|
|
|
|
|
// check for half precision floating point support, for x86 this is equivalent to
|
|
|
|
// checking for SSE2
|
|
|
|
#define SUPPORTS_NATIVE_FP16 (__x86_64__ == 1 && __SSE2__ == 1)
|
|
|
|
// print debug messages
|
|
|
|
#define DEBUG 1
|
|
|
|
#define VERBOSE 0
|
|
|
|
|
|
|
|
// define half precision floating point
|
|
|
|
#if SUPPORTS_NATIVE_FP16
|
|
|
|
// extension is needed due to -pedantic
|
|
|
|
__extension__ typedef _Float16 half;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// useful macros
|
|
|
|
#define TEST_BIT(f, b) (!!(f & b))
|
|
|
|
#define GIB(x) ((uint64_t)x * 1024u * 1024u * 1024u)
|
|
|
|
#define MIB(x) ((uint64_t)x * 1024u * 1024u)
|
|
|
|
#define KIB(x) ((uint64_t)x * 1024u)
|
|
|
|
|
|
|
|
const char *vk_validation_layer[] = {"VK_LAYER_KHRONOS_validation"};
|
|
|
|
const uint32_t vk_validation_layer_no = 1;
|
|
|
|
|
|
|
|
// FIXME: including vulkan/vk_enum_string_helper.h does not compile
|
|
|
|
extern const char *vk_Result_to_str(VkResult input);
|
|
|
|
|
|
|
|
// like printf but on stderr
|
|
|
|
int err(const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list ap;
|
|
|
|
va_start(ap, fmt);
|
|
|
|
int ret = vfprintf(stderr, fmt, ap);
|
|
|
|
va_end(ap);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
// print out all the instance extensions
|
|
|
|
// NOTE: these are different from device and shader extensions
|
|
|
|
int vk_enumerate_instance_extensions(void)
|
|
|
|
{
|
|
|
|
uint32_t ex_no = 0;
|
|
|
|
#if VERBOSE > 0
|
|
|
|
vkEnumerateInstanceExtensionProperties(NULL, &ex_no, NULL);
|
|
|
|
VkExtensionProperties *ex_arr =
|
|
|
|
malloc(sizeof(VkExtensionProperties) * ex_no);
|
|
|
|
if (ex_arr == NULL) {
|
|
|
|
err("ERROR: in %s: %s\n", __func__, strerror(errno));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
vkEnumerateInstanceExtensionProperties(NULL, &ex_no, ex_arr);
|
|
|
|
printf("Available Properties: \n");
|
|
|
|
for (uint32_t i = 0; i < ex_no; i++) {
|
|
|
|
printf("\t%s\n", ex_arr[i].extensionName);
|
|
|
|
}
|
|
|
|
free(ex_arr);
|
|
|
|
#endif
|
|
|
|
return ex_no;
|
|
|
|
}
|
|
|
|
|
|
|
|
// on debug check for support of validation layers and activate one, a validation
|
|
|
|
// layer is useful to do more error checking at runtime like ckecking for invalid
|
|
|
|
// arguments, validation layers are available only if vulkan-sdk is installed
|
|
|
|
// (vulkan-devel on arch)
|
|
|
|
int vk_activate_validation_layer(VkInstanceCreateInfo *cinfo)
|
|
|
|
{
|
|
|
|
uint32_t prop_no = 0;
|
|
|
|
#if DEBUG > 0
|
|
|
|
vkEnumerateInstanceLayerProperties(&prop_no, NULL);
|
|
|
|
|
|
|
|
VkLayerProperties *prop_arr = malloc(sizeof(VkLayerProperties) * prop_no);
|
|
|
|
if (prop_arr == NULL) {
|
|
|
|
err("ERROR: in %s: %s\n", __func__, strerror(errno));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
vkEnumerateInstanceLayerProperties(&prop_no, prop_arr);
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < prop_no; i++) {
|
|
|
|
if (strcmp(prop_arr[i].layerName, vk_validation_layer[0]) == 0) {
|
|
|
|
cinfo->enabledLayerCount = vk_validation_layer_no;
|
|
|
|
cinfo->ppEnabledLayerNames = vk_validation_layer;
|
|
|
|
free(prop_arr);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(prop_arr);
|
|
|
|
return 1;
|
|
|
|
#endif
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
VkInstance vk_init(void)
|
|
|
|
{
|
|
|
|
// create a vulkan instance and fill it with the application data
|
|
|
|
VkResult res;
|
|
|
|
VkInstance vk_instance = VK_NULL_HANDLE;
|
|
|
|
|
|
|
|
VkApplicationInfo vk_appinfo = {
|
|
|
|
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
|
|
|
|
.pNext = NULL,
|
|
|
|
.pApplicationName = __FILE__,
|
|
|
|
.applicationVersion = VK_MAKE_VERSION(0, 1, 0),
|
|
|
|
.pEngineName = "no engine",
|
|
|
|
.engineVersion = VK_MAKE_VERSION(0, 0, 0),
|
|
|
|
.apiVersion =
|
|
|
|
VK_API_VERSION_1_2, // api version 1.2 is more widely available
|
|
|
|
};
|
|
|
|
|
|
|
|
vk_enumerate_instance_extensions();
|
|
|
|
|
|
|
|
// TODO: check for extension availability
|
|
|
|
// TODO: does the lifetime of VkInstanceCreateInfo has to be the same as the
|
|
|
|
// lifetime of VkInstance?
|
|
|
|
const char *vk_instance_extensions[] = {
|
|
|
|
VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME,
|
|
|
|
};
|
|
|
|
const uint32_t vk_instance_extensions_no =
|
|
|
|
(uint32_t)(sizeof(vk_instance_extensions) / sizeof(char *));
|
|
|
|
|
|
|
|
VkInstanceCreateInfo vk_instanceinfo = {
|
|
|
|
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
|
|
|
|
.pApplicationInfo = &vk_appinfo,
|
|
|
|
.flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR,
|
|
|
|
.enabledExtensionCount = vk_instance_extensions_no,
|
|
|
|
.ppEnabledExtensionNames = vk_instance_extensions,
|
|
|
|
.enabledLayerCount = 0,
|
|
|
|
};
|
|
|
|
|
|
|
|
int e = 0;
|
|
|
|
if ((e = vk_activate_validation_layer(&vk_instanceinfo))) {
|
|
|
|
err("Could not activate validation layers%s\n",
|
|
|
|
e > 0 ? ": No validation layers found" : "");
|
|
|
|
}
|
|
|
|
|
|
|
|
res = vkCreateInstance(&vk_instanceinfo, NULL, &vk_instance);
|
|
|
|
if (res != VK_SUCCESS) {
|
|
|
|
err("ERROR: Could not create vulkan instance %s",
|
|
|
|
vk_Result_to_str(res));
|
|
|
|
return VK_NULL_HANDLE;
|
|
|
|
} else {
|
|
|
|
#if VERBOSE > 0
|
|
|
|
printf("Created vulkan instance\n");
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
return vk_instance;
|
|
|
|
}
|
|
|
|
|
|
|
|
void vk_destroy(VkInstance vk_instance)
|
|
|
|
{
|
|
|
|
// ...
|
|
|
|
vkDestroyInstance(vk_instance, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
VkPhysicalDevice vk_physical_device_get(VkInstance vk_instance)
|
|
|
|
{
|
|
|
|
// get the physical devices list
|
|
|
|
VkPhysicalDevice vk_phydev = VK_NULL_HANDLE;
|
|
|
|
|
|
|
|
uint32_t vk_phydevs_no = 0;
|
|
|
|
VkPhysicalDevice *vk_phydevs;
|
|
|
|
vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, NULL);
|
|
|
|
|
|
|
|
if (vk_phydevs_no == 0) {
|
|
|
|
return vk_phydev;
|
|
|
|
}
|
|
|
|
|
|
|
|
vk_phydevs = malloc(sizeof(VkPhysicalDevice) * vk_phydevs_no);
|
|
|
|
if (vk_phydevs == NULL) {
|
|
|
|
err("ERROR: in %s: %s\n", __func__, strerror(errno));
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, vk_phydevs);
|
|
|
|
|
|
|
|
// print out information about each device
|
|
|
|
printf("Available Physical Devices: \n");
|
|
|
|
for (uint32_t i = 0; i < vk_phydevs_no; i++) {
|
|
|
|
VkPhysicalDevice dev = vk_phydevs[i];
|
|
|
|
VkPhysicalDeviceProperties dev_properties;
|
|
|
|
VkPhysicalDeviceFeatures dev_features;
|
|
|
|
VkPhysicalDeviceMemoryProperties dev_memory;
|
|
|
|
|
|
|
|
vkGetPhysicalDeviceProperties(dev, &dev_properties);
|
|
|
|
vkGetPhysicalDeviceFeatures(dev, &dev_features);
|
|
|
|
vkGetPhysicalDeviceMemoryProperties(dev, &dev_memory);
|
|
|
|
|
|
|
|
printf(
|
|
|
|
"\tDevice %d: %s, Discrete: %s\n",
|
|
|
|
i,
|
|
|
|
dev_properties.deviceName,
|
|
|
|
dev_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU
|
|
|
|
? "true"
|
|
|
|
: "false"
|
|
|
|
);
|
|
|
|
|
|
|
|
for (unsigned x = 0; x < dev_memory.memoryHeapCount; x++) {
|
|
|
|
uint64_t mem_size = dev_memory.memoryHeaps[x].size;
|
|
|
|
uint32_t mem_flags = dev_memory.memoryHeaps[x].flags;
|
|
|
|
char is_local =
|
|
|
|
TEST_BIT(mem_flags, VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
|
|
|
|
printf(
|
|
|
|
"\t\tHeap %.2d: local: %d, size: %.3f MiB\n",
|
|
|
|
x,
|
|
|
|
is_local,
|
|
|
|
(float)mem_size / (1024.0 * 1024.0)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: find the most suitable physical device, but for now every vulkan
|
|
|
|
// device has to be compatible with compute shaders
|
|
|
|
vk_phydev = vk_phydevs[0];
|
|
|
|
|
|
|
|
free(vk_phydevs);
|
|
|
|
return vk_phydev;
|
|
|
|
}
|
|
|
|
|
|
|
|
// returns the index of a usable memory type in the device that is also backed by
|
|
|
|
// a heap with a size of at least min_size bytes
|
|
|
|
int vk_device_get_usable_memory_type_index(
|
|
|
|
VkPhysicalDevice vk_phydev, uint64_t min_size
|
|
|
|
)
|
|
|
|
{
|
|
|
|
int memtype_idx = -1;
|
|
|
|
VkPhysicalDeviceMemoryProperties dev_memory;
|
|
|
|
vkGetPhysicalDeviceMemoryProperties(vk_phydev, &dev_memory);
|
|
|
|
|
|
|
|
VkMemoryPropertyFlags flags = 0;
|
|
|
|
uint32_t idx = 0;
|
|
|
|
VkMemoryHeap mem;
|
|
|
|
for (unsigned i = 0; i < dev_memory.memoryTypeCount; i++) {
|
|
|
|
flags = dev_memory.memoryTypes[i].propertyFlags;
|
|
|
|
idx = dev_memory.memoryTypes[i].heapIndex;
|
|
|
|
mem = dev_memory.memoryHeaps[idx];
|
|
|
|
|
|
|
|
// TODO: do we need more flags to be set?
|
|
|
|
if (TEST_BIT(flags, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
|
|
|
|
TEST_BIT(flags, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) &&
|
|
|
|
mem.size >= min_size) {
|
|
|
|
// as the name suggests we only care about the memory type
|
|
|
|
// and not the heap itself
|
|
|
|
memtype_idx = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return memtype_idx;
|
|
|
|
}
|
|
|
|
|
|
|
|
// do an allocation on the device of size bytes, according to krhonos it is a good
|
|
|
|
// idea to do one or few allocations and subdivide them on the host
|
|
|
|
// https://github.com/KhronosGroup/Vulkan-Guide/blob/main/chapters/memory_allocation.adoc
|
|
|
|
// this memory has to be freed using vkFreeMemory(device, mem, NULL);
|
|
|
|
VkDeviceMemory
|
|
|
|
vk_allocate_memory(VkDevice vk_logdev, uint32_t memtype_index, uint64_t size)
|
|
|
|
{
|
|
|
|
VkMemoryAllocateInfo alloc_info = {
|
|
|
|
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
|
|
|
.pNext = NULL,
|
|
|
|
.allocationSize = size,
|
|
|
|
.memoryTypeIndex = memtype_index,
|
|
|
|
};
|
|
|
|
VkDeviceMemory mem = VK_NULL_HANDLE;
|
|
|
|
|
|
|
|
VkResult res = vkAllocateMemory(vk_logdev, &alloc_info, NULL, &mem);
|
|
|
|
if (res != VK_SUCCESS) {
|
|
|
|
err("Error allocating memory on device: %s\n",
|
|
|
|
vk_Result_to_str(res));
|
|
|
|
return VK_NULL_HANDLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
return mem;
|
|
|
|
}
|
|
|
|
|
|
|
|
void vk_physical_device_destroy(VkPhysicalDevice vk_phydev)
|
|
|
|
{
|
|
|
|
if (vk_phydev != VK_NULL_HANDLE) {
|
|
|
|
// ...
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// return the index of the first queue family that supports compute on the device,
|
|
|
|
// returns a negative index on error
|
|
|
|
// A better approach would be to find a queue that only handled compute workloads
|
|
|
|
// (but you need to ignore the transfer bit and for our purposes the sparse binding
|
|
|
|
// bit too)
|
|
|
|
int vk_device_get_compute_queue_index(VkPhysicalDevice vk_phydev)
|
|
|
|
{
|
|
|
|
uint32_t vk_qfamilies_no = 0;
|
|
|
|
VkQueueFamilyProperties *vk_qfamilies;
|
|
|
|
int qfamily_idx = -1;
|
|
|
|
|
|
|
|
vkGetPhysicalDeviceQueueFamilyProperties(vk_phydev, &vk_qfamilies_no, NULL);
|
|
|
|
|
|
|
|
vk_qfamilies = malloc(sizeof(VkQueueFamilyProperties) * vk_qfamilies_no);
|
|
|
|
if (vk_qfamilies == NULL) {
|
|
|
|
err("ERROR: in %s: %s\n", __func__, strerror(errno));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
vkGetPhysicalDeviceQueueFamilyProperties(
|
|
|
|
vk_phydev, &vk_qfamilies_no, vk_qfamilies
|
|
|
|
);
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < vk_qfamilies_no; i++) {
|
|
|
|
if (TEST_BIT(vk_qfamilies[i].queueFlags, VK_QUEUE_COMPUTE_BIT)) {
|
|
|
|
qfamily_idx = i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
free(vk_qfamilies);
|
|
|
|
return qfamily_idx;
|
|
|
|
}
|
|
|
|
|
|
|
|
VkDevice vk_logical_device_create(VkPhysicalDevice vk_phydev, int qfamily_idx)
|
|
|
|
{
|
|
|
|
VkResult res;
|
|
|
|
VkDevice vk_logdev = VK_NULL_HANDLE;
|
|
|
|
float vk_queue_priority = 1.0f;
|
|
|
|
|
|
|
|
// specify which command queues to use for the physical device
|
|
|
|
VkDeviceQueueCreateInfo vk_queueinfo = {
|
|
|
|
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
|
|
|
|
.pNext = NULL,
|
|
|
|
.flags = 0,
|
|
|
|
.queueFamilyIndex = qfamily_idx,
|
|
|
|
.queueCount = 1,
|
|
|
|
.pQueuePriorities = &vk_queue_priority,
|
|
|
|
};
|
|
|
|
|
|
|
|
// specify which device features to use
|
|
|
|
// TODO: this
|
|
|
|
VkPhysicalDeviceFeatures vk_phydev_features = {0};
|
|
|
|
|
|
|
|
// actually create the logical device
|
|
|
|
// TODO: figure out what device extensions are
|
|
|
|
// FIXME: here validation layers are ignored but it is still better to define
|
|
|
|
// them for compatibility
|
|
|
|
VkDeviceCreateInfo vk_createinfo = {
|
|
|
|
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
|
|
|
|
.pQueueCreateInfos = &vk_queueinfo,
|
|
|
|
.queueCreateInfoCount = 1,
|
|
|
|
.pEnabledFeatures = &vk_phydev_features,
|
|
|
|
.ppEnabledExtensionNames = NULL,
|
|
|
|
.enabledExtensionCount = 0,
|
|
|
|
.ppEnabledLayerNames = NULL,
|
|
|
|
.enabledLayerCount = 0,
|
|
|
|
};
|
|
|
|
|
|
|
|
res = vkCreateDevice(vk_phydev, &vk_createinfo, NULL, &vk_logdev);
|
|
|
|
if (res != VK_SUCCESS) {
|
|
|
|
err("ERROR: Could not create vulkan logical device %s",
|
|
|
|
vk_Result_to_str(res));
|
|
|
|
return VK_NULL_HANDLE;
|
|
|
|
} else {
|
|
|
|
#if VERBOSE > 0
|
|
|
|
printf("Created vulkan logical device\n");
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
return vk_logdev;
|
|
|
|
}
|
|
|
|
|
|
|
|
void vk_logical_device_destroy(VkDevice vk_logdev)
|
|
|
|
{
|
|
|
|
vkDestroyDevice(vk_logdev, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
// get the queue handle from it's index
|
|
|
|
VkQueue vk_queue_get(VkDevice vk_logdev, int qfamily_idx)
|
|
|
|
{
|
|
|
|
VkQueue vk_queue = VK_NULL_HANDLE;
|
|
|
|
vkGetDeviceQueue(vk_logdev, qfamily_idx, 0, &vk_queue);
|
|
|
|
return vk_queue;
|
|
|
|
}
|
|
|
|
|
|
|
|
int main(void)
|
|
|
|
{
|
|
|
|
#if VERBOSE > 0
|
|
|
|
if (SUPPORTS_NATIVE_FP16) {
|
|
|
|
printf("Processor supports half precision floating point\n");
|
|
|
|
} else {
|
|
|
|
printf("Processor doesn't support half precision floating point\n");
|
|
|
|
return EXIT_FAILURE;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
VkInstance vk_instance = vk_init();
|
|
|
|
if (vk_instance == VK_NULL_HANDLE) {
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
VkPhysicalDevice vk_phydev = vk_physical_device_get(vk_instance);
|
|
|
|
int qfamily_idx = vk_device_get_compute_queue_index(vk_phydev);
|
|
|
|
if (qfamily_idx < 0) {
|
|
|
|
err("The device does not support compute queues\n");
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
VkDevice vk_logdev = vk_logical_device_create(vk_phydev, qfamily_idx);
|
|
|
|
|
|
|
|
int devmem_idx = vk_device_get_usable_memory_type_index(vk_phydev, GIB(1));
|
|
|
|
if (devmem_idx < 0) {
|
|
|
|
err("Could not find a suitable device memory heap\n");
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
VkDeviceMemory mem = vk_allocate_memory(vk_logdev, devmem_idx, MIB(256));
|
|
|
|
if (mem == VK_NULL_HANDLE) {
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
} else {
|
|
|
|
printf("Successfully allocated memory on device\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: create buffers with vkCreateBuffer and VkCreateBufferInfo
|
|
|
|
// TODO: bind the buffer to the allocated memory with vkBindBufferMemory
|
|
|
|
// TODO: actually use that memory
|
|
|
|
|
|
|
|
vkFreeMemory(vk_logdev, mem, NULL);
|
|
|
|
|
|
|
|
vk_logical_device_destroy(vk_logdev);
|
|
|
|
vk_physical_device_destroy(vk_phydev);
|
|
|
|
vk_destroy(vk_instance);
|
|
|
|
|
|
|
|
return EXIT_SUCCESS;
|
|
|
|
}
|
|
|
|
|