#include #include #include #include #include #include #include #include // check for half precision floating point support, for x86 this is equivalent to // checking for SSE2 #define SUPPORTS_NATIVE_FP16 (__x86_64__ == 1 && __SSE2__ == 1) // print debug messages #define DEBUG 1 #define VERBOSE 0 // define half precision floating point #if SUPPORTS_NATIVE_FP16 // extension is needed due to -pedantic __extension__ typedef _Float16 half; #endif // useful macros #define TEST_BIT(f, b) (!!(f & b)) #define GIB(x) ((uint64_t)x * 1024u * 1024u * 1024u) #define MIB(x) ((uint64_t)x * 1024u * 1024u) #define KIB(x) ((uint64_t)x * 1024u) const char *vk_validation_layer[] = {"VK_LAYER_KHRONOS_validation"}; const uint32_t vk_validation_layer_no = 1; // FIXME: including vulkan/vk_enum_string_helper.h does not compile extern const char *vk_Result_to_str(VkResult input); // like printf but on stderr int err(const char *fmt, ...) { va_list ap; va_start(ap, fmt); int ret = vfprintf(stderr, fmt, ap); va_end(ap); return ret; } // print out all the instance extensions // NOTE: these are different from device and shader extensions int vk_enumerate_instance_extensions(void) { uint32_t ex_no = 0; #if VERBOSE > 0 vkEnumerateInstanceExtensionProperties(NULL, &ex_no, NULL); VkExtensionProperties *ex_arr = malloc(sizeof(VkExtensionProperties) * ex_no); if (ex_arr == NULL) { err("ERROR: in %s: %s\n", __func__, strerror(errno)); return -1; } vkEnumerateInstanceExtensionProperties(NULL, &ex_no, ex_arr); printf("Available Properties: \n"); for (uint32_t i = 0; i < ex_no; i++) { printf("\t%s\n", ex_arr[i].extensionName); } free(ex_arr); #endif return ex_no; } // on debug check for support of validation layers and activate one, a validation // layer is useful to do more error checking at runtime like ckecking for invalid // arguments, validation layers are available only if vulkan-sdk is installed // (vulkan-devel on arch) int vk_activate_validation_layer(VkInstanceCreateInfo *cinfo) { uint32_t prop_no = 0; #if DEBUG > 0 vkEnumerateInstanceLayerProperties(&prop_no, NULL); VkLayerProperties *prop_arr = malloc(sizeof(VkLayerProperties) * prop_no); if (prop_arr == NULL) { err("ERROR: in %s: %s\n", __func__, strerror(errno)); return -1; } vkEnumerateInstanceLayerProperties(&prop_no, prop_arr); for (uint32_t i = 0; i < prop_no; i++) { if (strcmp(prop_arr[i].layerName, vk_validation_layer[0]) == 0) { cinfo->enabledLayerCount = vk_validation_layer_no; cinfo->ppEnabledLayerNames = vk_validation_layer; free(prop_arr); return 0; } } free(prop_arr); return 1; #endif return 0; } VkInstance vk_init(void) { // create a vulkan instance and fill it with the application data VkResult res; VkInstance vk_instance = VK_NULL_HANDLE; VkApplicationInfo vk_appinfo = { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .pNext = NULL, .pApplicationName = __FILE__, .applicationVersion = VK_MAKE_VERSION(0, 1, 0), .pEngineName = "no engine", .engineVersion = VK_MAKE_VERSION(0, 0, 0), .apiVersion = VK_API_VERSION_1_2, // api version 1.2 is more widely available }; vk_enumerate_instance_extensions(); // TODO: check for extension availability // TODO: does the lifetime of VkInstanceCreateInfo has to be the same as the // lifetime of VkInstance? const char *vk_instance_extensions[] = { VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, }; const uint32_t vk_instance_extensions_no = (uint32_t)(sizeof(vk_instance_extensions) / sizeof(char *)); VkInstanceCreateInfo vk_instanceinfo = { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pApplicationInfo = &vk_appinfo, .flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR, .enabledExtensionCount = vk_instance_extensions_no, .ppEnabledExtensionNames = vk_instance_extensions, .enabledLayerCount = 0, }; int e = 0; if ((e = vk_activate_validation_layer(&vk_instanceinfo))) { err("Could not activate validation layers%s\n", e > 0 ? ": No validation layers found" : ""); } res = vkCreateInstance(&vk_instanceinfo, NULL, &vk_instance); if (res != VK_SUCCESS) { err("ERROR: Could not create vulkan instance %s", vk_Result_to_str(res)); return VK_NULL_HANDLE; } else { #if VERBOSE > 0 printf("Created vulkan instance\n"); #endif } return vk_instance; } void vk_destroy(VkInstance vk_instance) { // ... vkDestroyInstance(vk_instance, NULL); } VkPhysicalDevice vk_physical_device_get(VkInstance vk_instance) { // get the physical devices list VkPhysicalDevice vk_phydev = VK_NULL_HANDLE; uint32_t vk_phydevs_no = 0; VkPhysicalDevice *vk_phydevs; vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, NULL); if (vk_phydevs_no == 0) { return vk_phydev; } vk_phydevs = malloc(sizeof(VkPhysicalDevice) * vk_phydevs_no); if (vk_phydevs == NULL) { err("ERROR: in %s: %s\n", __func__, strerror(errno)); return NULL; } vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, vk_phydevs); // print out information about each device printf("Available Physical Devices: \n"); for (uint32_t i = 0; i < vk_phydevs_no; i++) { VkPhysicalDevice dev = vk_phydevs[i]; VkPhysicalDeviceProperties dev_properties; VkPhysicalDeviceFeatures dev_features; VkPhysicalDeviceMemoryProperties dev_memory; vkGetPhysicalDeviceProperties(dev, &dev_properties); vkGetPhysicalDeviceFeatures(dev, &dev_features); vkGetPhysicalDeviceMemoryProperties(dev, &dev_memory); printf( "\tDevice %d: %s, Discrete: %s\n", i, dev_properties.deviceName, dev_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU ? "true" : "false" ); for (unsigned x = 0; x < dev_memory.memoryHeapCount; x++) { uint64_t mem_size = dev_memory.memoryHeaps[x].size; uint32_t mem_flags = dev_memory.memoryHeaps[x].flags; char is_local = TEST_BIT(mem_flags, VK_MEMORY_HEAP_DEVICE_LOCAL_BIT); printf( "\t\tHeap %.2d: local: %d, size: %.3f MiB\n", x, is_local, (float)mem_size / (1024.0 * 1024.0) ); } } // TODO: find the most suitable physical device, but for now every vulkan // device has to be compatible with compute shaders vk_phydev = vk_phydevs[0]; free(vk_phydevs); return vk_phydev; } // returns the index of a usable memory type in the device that is also backed by // a heap with a size of at least min_size bytes int vk_device_get_usable_memory_type_index( VkPhysicalDevice vk_phydev, uint64_t min_size ) { int memtype_idx = -1; VkPhysicalDeviceMemoryProperties dev_memory; vkGetPhysicalDeviceMemoryProperties(vk_phydev, &dev_memory); VkMemoryPropertyFlags flags = 0; uint32_t idx = 0; VkMemoryHeap mem; for (unsigned i = 0; i < dev_memory.memoryTypeCount; i++) { flags = dev_memory.memoryTypes[i].propertyFlags; idx = dev_memory.memoryTypes[i].heapIndex; mem = dev_memory.memoryHeaps[idx]; // TODO: do we need more flags to be set? if (TEST_BIT(flags, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && TEST_BIT(flags, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) && mem.size >= min_size) { // as the name suggests we only care about the memory type // and not the heap itself memtype_idx = i; break; } } return memtype_idx; } // do an allocation on the device of size bytes, according to krhonos it is a good // idea to do one or few allocations and subdivide them on the host // https://github.com/KhronosGroup/Vulkan-Guide/blob/main/chapters/memory_allocation.adoc // this memory has to be freed using vkFreeMemory(device, mem, NULL); VkDeviceMemory vk_allocate_memory(VkDevice vk_logdev, uint32_t memtype_index, uint64_t size) { VkMemoryAllocateInfo alloc_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .pNext = NULL, .allocationSize = size, .memoryTypeIndex = memtype_index, }; VkDeviceMemory mem = VK_NULL_HANDLE; VkResult res = vkAllocateMemory(vk_logdev, &alloc_info, NULL, &mem); if (res != VK_SUCCESS) { err("Error allocating memory on device: %s\n", vk_Result_to_str(res)); return VK_NULL_HANDLE; } return mem; } void vk_physical_device_destroy(VkPhysicalDevice vk_phydev) { if (vk_phydev != VK_NULL_HANDLE) { // ... } } // return the index of the first queue family that supports compute on the device, // returns a negative index on error // A better approach would be to find a queue that only handled compute workloads // (but you need to ignore the transfer bit and for our purposes the sparse binding // bit too) int vk_device_get_compute_queue_index(VkPhysicalDevice vk_phydev) { uint32_t vk_qfamilies_no = 0; VkQueueFamilyProperties *vk_qfamilies; int qfamily_idx = -1; vkGetPhysicalDeviceQueueFamilyProperties(vk_phydev, &vk_qfamilies_no, NULL); vk_qfamilies = malloc(sizeof(VkQueueFamilyProperties) * vk_qfamilies_no); if (vk_qfamilies == NULL) { err("ERROR: in %s: %s\n", __func__, strerror(errno)); return -1; } vkGetPhysicalDeviceQueueFamilyProperties( vk_phydev, &vk_qfamilies_no, vk_qfamilies ); for (uint32_t i = 0; i < vk_qfamilies_no; i++) { if (TEST_BIT(vk_qfamilies[i].queueFlags, VK_QUEUE_COMPUTE_BIT)) { qfamily_idx = i; } } free(vk_qfamilies); return qfamily_idx; } VkDevice vk_logical_device_create(VkPhysicalDevice vk_phydev, int qfamily_idx) { VkResult res; VkDevice vk_logdev = VK_NULL_HANDLE; float vk_queue_priority = 1.0f; // specify which command queues to use for the physical device VkDeviceQueueCreateInfo vk_queueinfo = { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .pNext = NULL, .flags = 0, .queueFamilyIndex = qfamily_idx, .queueCount = 1, .pQueuePriorities = &vk_queue_priority, }; // specify which device features to use // TODO: this VkPhysicalDeviceFeatures vk_phydev_features = {0}; // actually create the logical device // TODO: figure out what device extensions are // FIXME: here validation layers are ignored but it is still better to define // them for compatibility VkDeviceCreateInfo vk_createinfo = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pQueueCreateInfos = &vk_queueinfo, .queueCreateInfoCount = 1, .pEnabledFeatures = &vk_phydev_features, .ppEnabledExtensionNames = NULL, .enabledExtensionCount = 0, .ppEnabledLayerNames = NULL, .enabledLayerCount = 0, }; res = vkCreateDevice(vk_phydev, &vk_createinfo, NULL, &vk_logdev); if (res != VK_SUCCESS) { err("ERROR: Could not create vulkan logical device %s", vk_Result_to_str(res)); return VK_NULL_HANDLE; } else { #if VERBOSE > 0 printf("Created vulkan logical device\n"); #endif } return vk_logdev; } void vk_logical_device_destroy(VkDevice vk_logdev) { vkDestroyDevice(vk_logdev, NULL); } // get the queue handle from it's index VkQueue vk_queue_get(VkDevice vk_logdev, int qfamily_idx) { VkQueue vk_queue = VK_NULL_HANDLE; vkGetDeviceQueue(vk_logdev, qfamily_idx, 0, &vk_queue); return vk_queue; } int main(void) { #if VERBOSE > 0 if (SUPPORTS_NATIVE_FP16) { printf("Processor supports half precision floating point\n"); } else { printf("Processor doesn't support half precision floating point\n"); return EXIT_FAILURE; } #endif VkInstance vk_instance = vk_init(); if (vk_instance == VK_NULL_HANDLE) { exit(EXIT_FAILURE); } VkPhysicalDevice vk_phydev = vk_physical_device_get(vk_instance); int qfamily_idx = vk_device_get_compute_queue_index(vk_phydev); if (qfamily_idx < 0) { err("The device does not support compute queues\n"); exit(EXIT_FAILURE); } VkDevice vk_logdev = vk_logical_device_create(vk_phydev, qfamily_idx); int devmem_idx = vk_device_get_usable_memory_type_index(vk_phydev, GIB(1)); if (devmem_idx < 0) { err("Could not find a suitable device memory heap\n"); exit(EXIT_FAILURE); } VkDeviceMemory mem = vk_allocate_memory(vk_logdev, devmem_idx, MIB(256)); if (mem == VK_NULL_HANDLE) { exit(EXIT_FAILURE); } else { printf("Successfully allocated memory on device\n"); } // TODO: create buffers with vkCreateBuffer and VkCreateBufferInfo // TODO: bind the buffer to the allocated memory with vkBindBufferMemory // TODO: actually use that memory vkFreeMemory(vk_logdev, mem, NULL); vk_logical_device_destroy(vk_logdev); vk_physical_device_destroy(vk_phydev); vk_destroy(vk_instance); return EXIT_SUCCESS; }