#include #include #include #include #include #include #include #include // check for half precision floating point support, for x86 this is equivalent to // checking for SSE2 #define SUPPORTS_NATIVE_FP16 (__x86_64__ == 1 && __SSE2__ == 1) // print debug messages #define DEBUG 1 #define VERBOSE 0 // define half precision floating point #if SUPPORTS_NATIVE_FP16 // extension is needed due to -pedantic __extension__ typedef _Float16 half; #endif const char *vk_validation_layer[] = {"VK_LAYER_KHRONOS_validation"}; const uint32_t vk_validation_layer_no = 1; // FIXME: including vulkan/vk_enum_string_helper.h does not compile extern const char *vk_Result_to_str(VkResult input); // like printf but on stderr int err(const char *fmt, ...) { va_list ap; va_start(ap, fmt); int ret = vfprintf(stderr, fmt, ap); va_end(ap); return ret; } // print out all the instance extensions // NOTE: these are different from device and shader extensions int vk_enumerate_instance_extensions(void) { uint32_t ex_no = 0; #if VERBOSE > 0 vkEnumerateInstanceExtensionProperties(NULL, &ex_no, NULL); VkExtensionProperties *ex_arr = malloc(sizeof(VkExtensionProperties) * ex_no); if (ex_arr == NULL) { err("ERROR: in %s: %s\n", __func__, strerror(errno)); return -1; } vkEnumerateInstanceExtensionProperties(NULL, &ex_no, ex_arr); printf("Available Properties: \n"); for (uint32_t i = 0; i < ex_no; i++) { printf("\t%s\n", ex_arr[i].extensionName); } free(ex_arr); #endif return ex_no; } // on debug check for support of validation layers and activate one, a validation // layer is useful to do more error checking at runtime like ckecking for invalid // arguments, validation layers are available only if vulkan-sdk is installed // (vulkan-devel on arch) int vk_activate_validation_layer(VkInstanceCreateInfo *cinfo) { uint32_t prop_no = 0; #if DEBUG > 0 vkEnumerateInstanceLayerProperties(&prop_no, NULL); VkLayerProperties *prop_arr = malloc(sizeof(VkLayerProperties) * prop_no); if (prop_arr == NULL) { err("ERROR: in %s: %s\n", __func__, strerror(errno)); return -1; } vkEnumerateInstanceLayerProperties(&prop_no, prop_arr); for (uint32_t i = 0; i < prop_no; i++) { if (strcmp(prop_arr[i].layerName, vk_validation_layer[0]) == 0) { cinfo->enabledLayerCount = vk_validation_layer_no; cinfo->ppEnabledLayerNames = vk_validation_layer; free(prop_arr); return 0; } } free(prop_arr); return 1; #endif return 0; } VkInstance vk_init(void) { // create a vulkan instance and fill it with the application data VkResult res; VkInstance vk_instance = VK_NULL_HANDLE; VkApplicationInfo vk_appinfo = { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .pNext = NULL, .pApplicationName = __FILE__, .applicationVersion = VK_MAKE_VERSION(0, 1, 0), .pEngineName = "no engine", .engineVersion = VK_MAKE_VERSION(0, 0, 0), .apiVersion = VK_API_VERSION_1_2, // api version 1.2 is more widely available }; vk_enumerate_instance_extensions(); // TODO: check for extension availability // TODO: does the lifetime of VkInstanceCreateInfo has to be the same as the // lifetime of VkInstance? const char *vk_instance_extensions[] = { VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME, }; const uint32_t vk_instance_extensions_no = (uint32_t)(sizeof(vk_instance_extensions) / sizeof(char *)); VkInstanceCreateInfo vk_instanceinfo = { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pApplicationInfo = &vk_appinfo, .flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR, .enabledExtensionCount = vk_instance_extensions_no, .ppEnabledExtensionNames = vk_instance_extensions, .enabledLayerCount = 0, }; int e = 0; if ((e = vk_activate_validation_layer(&vk_instanceinfo))) { err("Could not activate validation layers%s\n", e > 0 ? ": No validation layers found" : ""); } res = vkCreateInstance(&vk_instanceinfo, NULL, &vk_instance); if (res != VK_SUCCESS) { err("ERROR: Could not create vulkan instance %s", vk_Result_to_str(res)); return VK_NULL_HANDLE; } else { #if VERBOSE > 0 printf("Created vulkan instance\n"); #endif } return vk_instance; } void vk_destroy(VkInstance vk_instance) { // ... vkDestroyInstance(vk_instance, NULL); } VkPhysicalDevice vk_physical_device_get(VkInstance vk_instance) { // get the physical devices list VkPhysicalDevice vk_phydev = VK_NULL_HANDLE; uint32_t vk_phydevs_no = 0; VkPhysicalDevice *vk_phydevs; vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, NULL); if (vk_phydevs_no == 0) { return vk_phydev; } vk_phydevs = malloc(sizeof(VkPhysicalDevice) * vk_phydevs_no); if (vk_phydevs == NULL) { err("ERROR: in %s: %s\n", __func__, strerror(errno)); return NULL; } vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, vk_phydevs); // print out information about each device printf("Available Physical Devices: \n"); for (uint32_t i = 0; i < vk_phydevs_no; i++) { VkPhysicalDevice dev = vk_phydevs[i]; VkPhysicalDeviceProperties dev_properties; VkPhysicalDeviceFeatures dev_features; VkPhysicalDeviceMemoryProperties dev_memory; vkGetPhysicalDeviceProperties(dev, &dev_properties); vkGetPhysicalDeviceFeatures(dev, &dev_features); vkGetPhysicalDeviceMemoryProperties(dev, &dev_memory); printf( "\tDevice %d: %s, Discrete: %s\n", i, dev_properties.deviceName, dev_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU ? "true" : "false" ); for (unsigned x = 0; x < dev_memory.memoryHeapCount; x++) { uint64_t mem_size = dev_memory.memoryHeaps[x].size; uint32_t mem_flags = dev_memory.memoryHeaps[x].flags; char mem_local = mem_flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; printf( "\t\tHeap %.2d: local: %d, size: %.3f MiB\n", x, mem_local, (float)mem_size / (1024.0 * 1024.0) ); } } // TODO: find the most suitable physical device, but for now every vulkan // device has to be compatible with compute shaders vk_phydev = vk_phydevs[0]; free(vk_phydevs); return vk_phydev; } void vk_physical_device_destroy(VkPhysicalDevice vk_phydev) { if (vk_phydev != VK_NULL_HANDLE) { // ... } } // return the index of the first queue family that supports compute on the device, // returns a negative index on error int vk_device_compute_queue_index(VkPhysicalDevice vk_phydev) { uint32_t vk_qfamilies_no = 0; VkQueueFamilyProperties *vk_qfamilies; int supports = -1; vkGetPhysicalDeviceQueueFamilyProperties(vk_phydev, &vk_qfamilies_no, NULL); vk_qfamilies = malloc(sizeof(VkQueueFamilyProperties) * vk_qfamilies_no); if (vk_qfamilies == NULL) { err("ERROR: in %s: %s\n", __func__, strerror(errno)); return -1; } vkGetPhysicalDeviceQueueFamilyProperties( vk_phydev, &vk_qfamilies_no, vk_qfamilies ); for (uint32_t i = 0; i < vk_qfamilies_no; i++) { if (vk_qfamilies[i].queueFlags & VK_QUEUE_COMPUTE_BIT) { supports = i; } } free(vk_qfamilies); return supports; } VkDevice vk_logical_device_create(VkPhysicalDevice vk_phydev, int qfamily_idx) { VkResult res; VkDevice vk_logdev = VK_NULL_HANDLE; float vk_queue_priority = 1.0f; // specify which command queues to use for the physical device VkDeviceQueueCreateInfo vk_queueinfo = { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .pNext = NULL, .flags = 0, .queueFamilyIndex = qfamily_idx, .queueCount = 1, .pQueuePriorities = &vk_queue_priority, }; // specify which device features to use // TODO: this VkPhysicalDeviceFeatures vk_phydev_features = {0}; // actually create the logical device // TODO: figure out what device extensions are // FIXME: here validation layers are ignored but it is still better to define // them for compatibility VkDeviceCreateInfo vk_createinfo = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pQueueCreateInfos = &vk_queueinfo, .queueCreateInfoCount = 1, .pEnabledFeatures = &vk_phydev_features, .ppEnabledExtensionNames = NULL, .enabledExtensionCount = 0, .ppEnabledLayerNames = NULL, .enabledLayerCount = 0, }; res = vkCreateDevice(vk_phydev, &vk_createinfo, NULL, &vk_logdev); if (res != VK_SUCCESS) { err("ERROR: Could not create vulkan logical device %s", vk_Result_to_str(res)); return VK_NULL_HANDLE; } else { #if VERBOSE > 0 printf("Created vulkan logical device\n"); #endif } return vk_logdev; } void vk_logical_device_destroy(VkDevice vk_logdev) { vkDestroyDevice(vk_logdev, NULL); } VkQueue vk_queue_get(VkDevice vk_logdev, int qfamily_idx) { VkQueue vk_queue = VK_NULL_HANDLE; vkGetDeviceQueue(vk_logdev, qfamily_idx, 0, &vk_queue); return vk_queue; } int main(void) { #if VERBOSE > 0 if (SUPPORTS_NATIVE_FP16) { printf("Processor supports half precision floating point\n"); } else { printf("Processor doesn't support half precision floating point\n"); return EXIT_FAILURE; } #endif VkInstance vk_instance = vk_init(); if (vk_instance == VK_NULL_HANDLE) { exit(EXIT_FAILURE); } VkPhysicalDevice vk_phydev = vk_physical_device_get(vk_instance); int qfamily_idx = vk_device_compute_queue_index(vk_phydev); if (qfamily_idx < 0) { err("The device does not support compute queues\n"); exit(EXIT_FAILURE); } VkDevice vk_logdev = vk_logical_device_create(vk_phydev, qfamily_idx); vk_logical_device_destroy(vk_logdev); vk_physical_device_destroy(vk_phydev); vk_destroy(vk_instance); return EXIT_SUCCESS; }