From ef9161fabee659de6e87a0d8d8a2aef2c772fd67 Mon Sep 17 00:00:00 2001
From: Alessandro Mauri <alemauri001@gmail.com>
Date: Mon, 25 Dec 2023 17:52:33 +0100
Subject: [PATCH] stuff

---
 .clang-format |  2 +-
 test3/README  | 27 ++++++++++++++++-
 test3/main.c  | 83 +++++++++++++++++++++++++++++++--------------------
 3 files changed, 77 insertions(+), 35 deletions(-)

diff --git a/.clang-format b/.clang-format
index ae6c3fd..2d50375 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,7 +1,7 @@
 # linux kernel style formatting
 BasedOnStyle: LLVM
 IndentWidth: 8
-UseTab: Always
+UseTab: AlignWithSpaces
 
 BreakBeforeBraces: Linux
 AllowShortIfStatementsOnASingleLine: false
diff --git a/test3/README b/test3/README
index b479d1d..e2fec98 100644
--- a/test3/README
+++ b/test3/README
@@ -1 +1,26 @@
-Trying to implement test2 with just vulkan and in C
\ No newline at end of file
+### Trying to implement test2 with just vulkan and in C
+
+Just trying to get large matrix multiplication going in C since C++ really fucks with
+my grey matter
+
+### Useful links:
+
+[Vulkan Tutorial](https://vulkan-tutorial.com): A full Vulkan tutorial implemented in C++
+but it is easy to port to C, the only brain scratcher are lifetimes. Still this is more
+targeted towards graphics rather than Compute, as such it is not easy to differentiate
+which parts are needed and which aren't
+
+[Simple Vulkan Compute Example](https://bakedbits.dev/posts/vulkan-compute-example/):
+a bit brief, as such easier to skim trough
+
+[A Simple Vulkan Compute Example](https://www.neilhenning.dev/posts/a-simple-vulkan-compute-example/):
+This time in C, more complete than the homonymous article and still easy to follow
+
+[VkGuide](https://vkguide.dev/docs/gpudriven/compute_shaders/): Good resource but not
+really a tutorial
+
+[Vulkan Samples](https://github.com/SaschaWillems/Vulkan-Samples): A collection of sample
+programs, still haven't read trough them but might be good for implementation details
+
+[VkFFT](https://github.com/DTolm/VkFFT): Fast Fourier Transform on vulkan shader, still
+haven't looked at it but it might be great to learn more advanced shader techniques
diff --git a/test3/main.c b/test3/main.c
index 91b585c..7f80e11 100644
--- a/test3/main.c
+++ b/test3/main.c
@@ -6,13 +6,14 @@
 #include <errno.h>
 
 #include <vulkan/vulkan.h>
+#include <vulkan/vulkan_core.h>
 
 // check for half precision floating point support, for x86 this is equivalent to
 // checking for SSE2
 #define SUPPORTS_NATIVE_FP16 (__x86_64__ == 1 && __SSE2__ == 1)
 // print debug messages
-#define DEBUG		     1
-#define VERBOSE		     0
+#define DEBUG                1
+#define VERBOSE              0
 
 // define half precision floating point
 #if SUPPORTS_NATIVE_FP16
@@ -97,13 +98,14 @@ VkInstance vk_init(void)
 	VkInstance vk_instance = VK_NULL_HANDLE;
 
 	VkApplicationInfo vk_appinfo = {
-	    .sType		= VK_STRUCTURE_TYPE_APPLICATION_INFO,
-	    .pNext		= NULL,
-	    .pApplicationName	= __FILE__,
+	    .sType              = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+	    .pNext              = NULL,
+	    .pApplicationName   = __FILE__,
 	    .applicationVersion = VK_MAKE_VERSION(0, 1, 0),
-	    .pEngineName	= "no engine",
-	    .engineVersion	= VK_MAKE_VERSION(0, 0, 0),
-	    .apiVersion		= VK_API_VERSION_1_3,
+	    .pEngineName        = "no engine",
+	    .engineVersion      = VK_MAKE_VERSION(0, 0, 0),
+	    .apiVersion =
+		VK_API_VERSION_1_2, // api version 1.2 is more widely available
 	};
 
 	vk_enumerate_instance_extensions();
@@ -118,12 +120,12 @@ VkInstance vk_init(void)
 	    (uint32_t)(sizeof(vk_instance_extensions) / sizeof(char *));
 
 	VkInstanceCreateInfo vk_instanceinfo = {
-	    .sType	      = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+	    .sType            = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
 	    .pApplicationInfo = &vk_appinfo,
-	    .flags	      = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR,
+	    .flags            = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR,
 	    .enabledExtensionCount   = vk_instance_extensions_no,
 	    .ppEnabledExtensionNames = vk_instance_extensions,
-	    .enabledLayerCount	     = 0,
+	    .enabledLayerCount       = 0,
 	};
 
 	int e = 0;
@@ -153,9 +155,10 @@ void vk_destroy(VkInstance vk_instance)
 
 VkPhysicalDevice vk_physical_device_get(VkInstance vk_instance)
 {
+	// get the physical devices list
 	VkPhysicalDevice vk_phydev = VK_NULL_HANDLE;
 
-	uint32_t	  vk_phydevs_no = 0;
+	uint32_t          vk_phydevs_no = 0;
 	VkPhysicalDevice *vk_phydevs;
 	vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, NULL);
 
@@ -171,24 +174,38 @@ VkPhysicalDevice vk_physical_device_get(VkInstance vk_instance)
 
 	vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, vk_phydevs);
 
+	// print out information about each device
 	printf("Available Physical Devices: \n");
 	for (uint32_t i = 0; i < vk_phydevs_no; i++) {
-		VkPhysicalDevice	   device = vk_phydevs[i];
-		VkPhysicalDeviceProperties device_properties;
-		VkPhysicalDeviceFeatures   device_features;
+		VkPhysicalDevice                 dev = vk_phydevs[i];
+		VkPhysicalDeviceProperties       dev_properties;
+		VkPhysicalDeviceFeatures         dev_features;
+		VkPhysicalDeviceMemoryProperties dev_memory;
 
-		vkGetPhysicalDeviceProperties(device, &device_properties);
-		vkGetPhysicalDeviceFeatures(device, &device_features);
+		vkGetPhysicalDeviceProperties(dev, &dev_properties);
+		vkGetPhysicalDeviceFeatures(dev, &dev_features);
+		vkGetPhysicalDeviceMemoryProperties(dev, &dev_memory);
 
 		printf(
 		    "\tDevice %d: %s, Discrete: %s\n",
 		    i,
-		    device_properties.deviceName,
-		    device_properties.deviceType ==
-			    VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU
+		    dev_properties.deviceName,
+		    dev_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU
 			? "true"
 			: "false"
 		);
+
+		for (unsigned x = 0; x < dev_memory.memoryHeapCount; x++) {
+			uint64_t mem_size  = dev_memory.memoryHeaps[x].size;
+			uint32_t mem_flags = dev_memory.memoryHeaps[x].flags;
+			char mem_local = mem_flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
+			printf(
+			    "\t\tHeap %.2d: local: %d, size: %.3f MiB\n",
+			    x,
+			    mem_local,
+			    (float)mem_size / (1024.0 * 1024.0)
+			);
+		}
 	}
 
 	// TODO: find the most suitable physical device, but for now every vulkan
@@ -210,9 +227,9 @@ void vk_physical_device_destroy(VkPhysicalDevice vk_phydev)
 // returns a negative index on error
 int vk_device_compute_queue_index(VkPhysicalDevice vk_phydev)
 {
-	uint32_t		 vk_qfamilies_no = 0;
+	uint32_t                 vk_qfamilies_no = 0;
 	VkQueueFamilyProperties *vk_qfamilies;
-	int			 supports = -1;
+	int                      supports = -1;
 
 	vkGetPhysicalDeviceQueueFamilyProperties(vk_phydev, &vk_qfamilies_no, NULL);
 
@@ -239,16 +256,16 @@ int vk_device_compute_queue_index(VkPhysicalDevice vk_phydev)
 VkDevice vk_logical_device_create(VkPhysicalDevice vk_phydev, int qfamily_idx)
 {
 	VkResult res;
-	VkDevice vk_logdev	   = VK_NULL_HANDLE;
-	float	 vk_queue_priority = 1.0f;
+	VkDevice vk_logdev         = VK_NULL_HANDLE;
+	float    vk_queue_priority = 1.0f;
 
 	// specify which command queues to use for the physical device
 	VkDeviceQueueCreateInfo vk_queueinfo = {
-	    .sType	      = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
-	    .pNext	      = NULL,
-	    .flags	      = 0,
+	    .sType            = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+	    .pNext            = NULL,
+	    .flags            = 0,
 	    .queueFamilyIndex = qfamily_idx,
-	    .queueCount	      = 1,
+	    .queueCount       = 1,
 	    .pQueuePriorities = &vk_queue_priority,
 	};
 
@@ -261,14 +278,14 @@ VkDevice vk_logical_device_create(VkPhysicalDevice vk_phydev, int qfamily_idx)
 	// FIXME: here validation layers are ignored but it is still better to define
 	//        them for compatibility
 	VkDeviceCreateInfo vk_createinfo = {
-	    .sType		     = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
-	    .pQueueCreateInfos	     = &vk_queueinfo,
+	    .sType                   = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+	    .pQueueCreateInfos       = &vk_queueinfo,
 	    .queueCreateInfoCount    = 1,
-	    .pEnabledFeatures	     = &vk_phydev_features,
+	    .pEnabledFeatures        = &vk_phydev_features,
 	    .ppEnabledExtensionNames = NULL,
 	    .enabledExtensionCount   = 0,
 	    .ppEnabledLayerNames     = NULL,
-	    .enabledLayerCount	     = 0,
+	    .enabledLayerCount       = 0,
 	};
 
 	res = vkCreateDevice(vk_phydev, &vk_createinfo, NULL, &vk_logdev);
@@ -313,7 +330,7 @@ int main(void)
 		exit(EXIT_FAILURE);
 	}
 	VkPhysicalDevice vk_phydev   = vk_physical_device_get(vk_instance);
-	int		 qfamily_idx = vk_device_compute_queue_index(vk_phydev);
+	int              qfamily_idx = vk_device_compute_queue_index(vk_phydev);
 	if (qfamily_idx < 0) {
 		err("The device does not support compute queues\n");
 		exit(EXIT_FAILURE);