From ef9161fabee659de6e87a0d8d8a2aef2c772fd67 Mon Sep 17 00:00:00 2001 From: Alessandro Mauri Date: Mon, 25 Dec 2023 17:52:33 +0100 Subject: [PATCH] stuff --- .clang-format | 2 +- test3/README | 27 ++++++++++++++++- test3/main.c | 83 +++++++++++++++++++++++++++++++-------------------- 3 files changed, 77 insertions(+), 35 deletions(-) diff --git a/.clang-format b/.clang-format index ae6c3fd..2d50375 100644 --- a/.clang-format +++ b/.clang-format @@ -1,7 +1,7 @@ # linux kernel style formatting BasedOnStyle: LLVM IndentWidth: 8 -UseTab: Always +UseTab: AlignWithSpaces BreakBeforeBraces: Linux AllowShortIfStatementsOnASingleLine: false diff --git a/test3/README b/test3/README index b479d1d..e2fec98 100644 --- a/test3/README +++ b/test3/README @@ -1 +1,26 @@ -Trying to implement test2 with just vulkan and in C \ No newline at end of file +### Trying to implement test2 with just vulkan and in C + +Just trying to get large matrix multiplication going in C since C++ really fucks with +my grey matter + +### Useful links: + +[Vulkan Tutorial](https://vulkan-tutorial.com): A full Vulkan tutorial implemented in C++ +but it is easy to port to C, the only brain scratcher are lifetimes. Still this is more +targeted towards graphics rather than Compute, as such it is not easy to differentiate +which parts are needed and which aren't + +[Simple Vulkan Compute Example](https://bakedbits.dev/posts/vulkan-compute-example/): +a bit brief, as such easier to skim trough + +[A Simple Vulkan Compute Example](https://www.neilhenning.dev/posts/a-simple-vulkan-compute-example/): +This time in C, more complete than the homonymous article and still easy to follow + +[VkGuide](https://vkguide.dev/docs/gpudriven/compute_shaders/): Good resource but not +really a tutorial + +[Vulkan Samples](https://github.com/SaschaWillems/Vulkan-Samples): A collection of sample +programs, still haven't read trough them but might be good for implementation details + +[VkFFT](https://github.com/DTolm/VkFFT): Fast Fourier Transform on vulkan shader, still +haven't looked at it but it might be great to learn more advanced shader techniques diff --git a/test3/main.c b/test3/main.c index 91b585c..7f80e11 100644 --- a/test3/main.c +++ b/test3/main.c @@ -6,13 +6,14 @@ #include #include +#include // check for half precision floating point support, for x86 this is equivalent to // checking for SSE2 #define SUPPORTS_NATIVE_FP16 (__x86_64__ == 1 && __SSE2__ == 1) // print debug messages -#define DEBUG 1 -#define VERBOSE 0 +#define DEBUG 1 +#define VERBOSE 0 // define half precision floating point #if SUPPORTS_NATIVE_FP16 @@ -97,13 +98,14 @@ VkInstance vk_init(void) VkInstance vk_instance = VK_NULL_HANDLE; VkApplicationInfo vk_appinfo = { - .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, - .pNext = NULL, - .pApplicationName = __FILE__, + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pNext = NULL, + .pApplicationName = __FILE__, .applicationVersion = VK_MAKE_VERSION(0, 1, 0), - .pEngineName = "no engine", - .engineVersion = VK_MAKE_VERSION(0, 0, 0), - .apiVersion = VK_API_VERSION_1_3, + .pEngineName = "no engine", + .engineVersion = VK_MAKE_VERSION(0, 0, 0), + .apiVersion = + VK_API_VERSION_1_2, // api version 1.2 is more widely available }; vk_enumerate_instance_extensions(); @@ -118,12 +120,12 @@ VkInstance vk_init(void) (uint32_t)(sizeof(vk_instance_extensions) / sizeof(char *)); VkInstanceCreateInfo vk_instanceinfo = { - .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pApplicationInfo = &vk_appinfo, - .flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR, + .flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR, .enabledExtensionCount = vk_instance_extensions_no, .ppEnabledExtensionNames = vk_instance_extensions, - .enabledLayerCount = 0, + .enabledLayerCount = 0, }; int e = 0; @@ -153,9 +155,10 @@ void vk_destroy(VkInstance vk_instance) VkPhysicalDevice vk_physical_device_get(VkInstance vk_instance) { + // get the physical devices list VkPhysicalDevice vk_phydev = VK_NULL_HANDLE; - uint32_t vk_phydevs_no = 0; + uint32_t vk_phydevs_no = 0; VkPhysicalDevice *vk_phydevs; vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, NULL); @@ -171,24 +174,38 @@ VkPhysicalDevice vk_physical_device_get(VkInstance vk_instance) vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, vk_phydevs); + // print out information about each device printf("Available Physical Devices: \n"); for (uint32_t i = 0; i < vk_phydevs_no; i++) { - VkPhysicalDevice device = vk_phydevs[i]; - VkPhysicalDeviceProperties device_properties; - VkPhysicalDeviceFeatures device_features; + VkPhysicalDevice dev = vk_phydevs[i]; + VkPhysicalDeviceProperties dev_properties; + VkPhysicalDeviceFeatures dev_features; + VkPhysicalDeviceMemoryProperties dev_memory; - vkGetPhysicalDeviceProperties(device, &device_properties); - vkGetPhysicalDeviceFeatures(device, &device_features); + vkGetPhysicalDeviceProperties(dev, &dev_properties); + vkGetPhysicalDeviceFeatures(dev, &dev_features); + vkGetPhysicalDeviceMemoryProperties(dev, &dev_memory); printf( "\tDevice %d: %s, Discrete: %s\n", i, - device_properties.deviceName, - device_properties.deviceType == - VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU + dev_properties.deviceName, + dev_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU ? "true" : "false" ); + + for (unsigned x = 0; x < dev_memory.memoryHeapCount; x++) { + uint64_t mem_size = dev_memory.memoryHeaps[x].size; + uint32_t mem_flags = dev_memory.memoryHeaps[x].flags; + char mem_local = mem_flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; + printf( + "\t\tHeap %.2d: local: %d, size: %.3f MiB\n", + x, + mem_local, + (float)mem_size / (1024.0 * 1024.0) + ); + } } // TODO: find the most suitable physical device, but for now every vulkan @@ -210,9 +227,9 @@ void vk_physical_device_destroy(VkPhysicalDevice vk_phydev) // returns a negative index on error int vk_device_compute_queue_index(VkPhysicalDevice vk_phydev) { - uint32_t vk_qfamilies_no = 0; + uint32_t vk_qfamilies_no = 0; VkQueueFamilyProperties *vk_qfamilies; - int supports = -1; + int supports = -1; vkGetPhysicalDeviceQueueFamilyProperties(vk_phydev, &vk_qfamilies_no, NULL); @@ -239,16 +256,16 @@ int vk_device_compute_queue_index(VkPhysicalDevice vk_phydev) VkDevice vk_logical_device_create(VkPhysicalDevice vk_phydev, int qfamily_idx) { VkResult res; - VkDevice vk_logdev = VK_NULL_HANDLE; - float vk_queue_priority = 1.0f; + VkDevice vk_logdev = VK_NULL_HANDLE; + float vk_queue_priority = 1.0f; // specify which command queues to use for the physical device VkDeviceQueueCreateInfo vk_queueinfo = { - .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, - .pNext = NULL, - .flags = 0, + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .pNext = NULL, + .flags = 0, .queueFamilyIndex = qfamily_idx, - .queueCount = 1, + .queueCount = 1, .pQueuePriorities = &vk_queue_priority, }; @@ -261,14 +278,14 @@ VkDevice vk_logical_device_create(VkPhysicalDevice vk_phydev, int qfamily_idx) // FIXME: here validation layers are ignored but it is still better to define // them for compatibility VkDeviceCreateInfo vk_createinfo = { - .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, - .pQueueCreateInfos = &vk_queueinfo, + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .pQueueCreateInfos = &vk_queueinfo, .queueCreateInfoCount = 1, - .pEnabledFeatures = &vk_phydev_features, + .pEnabledFeatures = &vk_phydev_features, .ppEnabledExtensionNames = NULL, .enabledExtensionCount = 0, .ppEnabledLayerNames = NULL, - .enabledLayerCount = 0, + .enabledLayerCount = 0, }; res = vkCreateDevice(vk_phydev, &vk_createinfo, NULL, &vk_logdev); @@ -313,7 +330,7 @@ int main(void) exit(EXIT_FAILURE); } VkPhysicalDevice vk_phydev = vk_physical_device_get(vk_instance); - int qfamily_idx = vk_device_compute_queue_index(vk_phydev); + int qfamily_idx = vk_device_compute_queue_index(vk_phydev); if (qfamily_idx < 0) { err("The device does not support compute queues\n"); exit(EXIT_FAILURE);