stuff
This commit is contained in:
parent
ee79b4c195
commit
ef9161fabe
@ -1,7 +1,7 @@
|
|||||||
# linux kernel style formatting
|
# linux kernel style formatting
|
||||||
BasedOnStyle: LLVM
|
BasedOnStyle: LLVM
|
||||||
IndentWidth: 8
|
IndentWidth: 8
|
||||||
UseTab: Always
|
UseTab: AlignWithSpaces
|
||||||
|
|
||||||
BreakBeforeBraces: Linux
|
BreakBeforeBraces: Linux
|
||||||
AllowShortIfStatementsOnASingleLine: false
|
AllowShortIfStatementsOnASingleLine: false
|
||||||
|
27
test3/README
27
test3/README
@ -1 +1,26 @@
|
|||||||
Trying to implement test2 with just vulkan and in C
|
### Trying to implement test2 with just vulkan and in C
|
||||||
|
|
||||||
|
Just trying to get large matrix multiplication going in C since C++ really fucks with
|
||||||
|
my grey matter
|
||||||
|
|
||||||
|
### Useful links:
|
||||||
|
|
||||||
|
[Vulkan Tutorial](https://vulkan-tutorial.com): A full Vulkan tutorial implemented in C++
|
||||||
|
but it is easy to port to C, the only brain scratcher are lifetimes. Still this is more
|
||||||
|
targeted towards graphics rather than Compute, as such it is not easy to differentiate
|
||||||
|
which parts are needed and which aren't
|
||||||
|
|
||||||
|
[Simple Vulkan Compute Example](https://bakedbits.dev/posts/vulkan-compute-example/):
|
||||||
|
a bit brief, as such easier to skim trough
|
||||||
|
|
||||||
|
[A Simple Vulkan Compute Example](https://www.neilhenning.dev/posts/a-simple-vulkan-compute-example/):
|
||||||
|
This time in C, more complete than the homonymous article and still easy to follow
|
||||||
|
|
||||||
|
[VkGuide](https://vkguide.dev/docs/gpudriven/compute_shaders/): Good resource but not
|
||||||
|
really a tutorial
|
||||||
|
|
||||||
|
[Vulkan Samples](https://github.com/SaschaWillems/Vulkan-Samples): A collection of sample
|
||||||
|
programs, still haven't read trough them but might be good for implementation details
|
||||||
|
|
||||||
|
[VkFFT](https://github.com/DTolm/VkFFT): Fast Fourier Transform on vulkan shader, still
|
||||||
|
haven't looked at it but it might be great to learn more advanced shader techniques
|
||||||
|
83
test3/main.c
83
test3/main.c
@ -6,13 +6,14 @@
|
|||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
#include <vulkan/vulkan.h>
|
#include <vulkan/vulkan.h>
|
||||||
|
#include <vulkan/vulkan_core.h>
|
||||||
|
|
||||||
// check for half precision floating point support, for x86 this is equivalent to
|
// check for half precision floating point support, for x86 this is equivalent to
|
||||||
// checking for SSE2
|
// checking for SSE2
|
||||||
#define SUPPORTS_NATIVE_FP16 (__x86_64__ == 1 && __SSE2__ == 1)
|
#define SUPPORTS_NATIVE_FP16 (__x86_64__ == 1 && __SSE2__ == 1)
|
||||||
// print debug messages
|
// print debug messages
|
||||||
#define DEBUG 1
|
#define DEBUG 1
|
||||||
#define VERBOSE 0
|
#define VERBOSE 0
|
||||||
|
|
||||||
// define half precision floating point
|
// define half precision floating point
|
||||||
#if SUPPORTS_NATIVE_FP16
|
#if SUPPORTS_NATIVE_FP16
|
||||||
@ -97,13 +98,14 @@ VkInstance vk_init(void)
|
|||||||
VkInstance vk_instance = VK_NULL_HANDLE;
|
VkInstance vk_instance = VK_NULL_HANDLE;
|
||||||
|
|
||||||
VkApplicationInfo vk_appinfo = {
|
VkApplicationInfo vk_appinfo = {
|
||||||
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
|
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
|
||||||
.pNext = NULL,
|
.pNext = NULL,
|
||||||
.pApplicationName = __FILE__,
|
.pApplicationName = __FILE__,
|
||||||
.applicationVersion = VK_MAKE_VERSION(0, 1, 0),
|
.applicationVersion = VK_MAKE_VERSION(0, 1, 0),
|
||||||
.pEngineName = "no engine",
|
.pEngineName = "no engine",
|
||||||
.engineVersion = VK_MAKE_VERSION(0, 0, 0),
|
.engineVersion = VK_MAKE_VERSION(0, 0, 0),
|
||||||
.apiVersion = VK_API_VERSION_1_3,
|
.apiVersion =
|
||||||
|
VK_API_VERSION_1_2, // api version 1.2 is more widely available
|
||||||
};
|
};
|
||||||
|
|
||||||
vk_enumerate_instance_extensions();
|
vk_enumerate_instance_extensions();
|
||||||
@ -118,12 +120,12 @@ VkInstance vk_init(void)
|
|||||||
(uint32_t)(sizeof(vk_instance_extensions) / sizeof(char *));
|
(uint32_t)(sizeof(vk_instance_extensions) / sizeof(char *));
|
||||||
|
|
||||||
VkInstanceCreateInfo vk_instanceinfo = {
|
VkInstanceCreateInfo vk_instanceinfo = {
|
||||||
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
|
||||||
.pApplicationInfo = &vk_appinfo,
|
.pApplicationInfo = &vk_appinfo,
|
||||||
.flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR,
|
.flags = VK_INSTANCE_CREATE_ENUMERATE_PORTABILITY_BIT_KHR,
|
||||||
.enabledExtensionCount = vk_instance_extensions_no,
|
.enabledExtensionCount = vk_instance_extensions_no,
|
||||||
.ppEnabledExtensionNames = vk_instance_extensions,
|
.ppEnabledExtensionNames = vk_instance_extensions,
|
||||||
.enabledLayerCount = 0,
|
.enabledLayerCount = 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
int e = 0;
|
int e = 0;
|
||||||
@ -153,9 +155,10 @@ void vk_destroy(VkInstance vk_instance)
|
|||||||
|
|
||||||
VkPhysicalDevice vk_physical_device_get(VkInstance vk_instance)
|
VkPhysicalDevice vk_physical_device_get(VkInstance vk_instance)
|
||||||
{
|
{
|
||||||
|
// get the physical devices list
|
||||||
VkPhysicalDevice vk_phydev = VK_NULL_HANDLE;
|
VkPhysicalDevice vk_phydev = VK_NULL_HANDLE;
|
||||||
|
|
||||||
uint32_t vk_phydevs_no = 0;
|
uint32_t vk_phydevs_no = 0;
|
||||||
VkPhysicalDevice *vk_phydevs;
|
VkPhysicalDevice *vk_phydevs;
|
||||||
vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, NULL);
|
vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, NULL);
|
||||||
|
|
||||||
@ -171,24 +174,38 @@ VkPhysicalDevice vk_physical_device_get(VkInstance vk_instance)
|
|||||||
|
|
||||||
vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, vk_phydevs);
|
vkEnumeratePhysicalDevices(vk_instance, &vk_phydevs_no, vk_phydevs);
|
||||||
|
|
||||||
|
// print out information about each device
|
||||||
printf("Available Physical Devices: \n");
|
printf("Available Physical Devices: \n");
|
||||||
for (uint32_t i = 0; i < vk_phydevs_no; i++) {
|
for (uint32_t i = 0; i < vk_phydevs_no; i++) {
|
||||||
VkPhysicalDevice device = vk_phydevs[i];
|
VkPhysicalDevice dev = vk_phydevs[i];
|
||||||
VkPhysicalDeviceProperties device_properties;
|
VkPhysicalDeviceProperties dev_properties;
|
||||||
VkPhysicalDeviceFeatures device_features;
|
VkPhysicalDeviceFeatures dev_features;
|
||||||
|
VkPhysicalDeviceMemoryProperties dev_memory;
|
||||||
|
|
||||||
vkGetPhysicalDeviceProperties(device, &device_properties);
|
vkGetPhysicalDeviceProperties(dev, &dev_properties);
|
||||||
vkGetPhysicalDeviceFeatures(device, &device_features);
|
vkGetPhysicalDeviceFeatures(dev, &dev_features);
|
||||||
|
vkGetPhysicalDeviceMemoryProperties(dev, &dev_memory);
|
||||||
|
|
||||||
printf(
|
printf(
|
||||||
"\tDevice %d: %s, Discrete: %s\n",
|
"\tDevice %d: %s, Discrete: %s\n",
|
||||||
i,
|
i,
|
||||||
device_properties.deviceName,
|
dev_properties.deviceName,
|
||||||
device_properties.deviceType ==
|
dev_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU
|
||||||
VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU
|
|
||||||
? "true"
|
? "true"
|
||||||
: "false"
|
: "false"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
for (unsigned x = 0; x < dev_memory.memoryHeapCount; x++) {
|
||||||
|
uint64_t mem_size = dev_memory.memoryHeaps[x].size;
|
||||||
|
uint32_t mem_flags = dev_memory.memoryHeaps[x].flags;
|
||||||
|
char mem_local = mem_flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
|
||||||
|
printf(
|
||||||
|
"\t\tHeap %.2d: local: %d, size: %.3f MiB\n",
|
||||||
|
x,
|
||||||
|
mem_local,
|
||||||
|
(float)mem_size / (1024.0 * 1024.0)
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: find the most suitable physical device, but for now every vulkan
|
// TODO: find the most suitable physical device, but for now every vulkan
|
||||||
@ -210,9 +227,9 @@ void vk_physical_device_destroy(VkPhysicalDevice vk_phydev)
|
|||||||
// returns a negative index on error
|
// returns a negative index on error
|
||||||
int vk_device_compute_queue_index(VkPhysicalDevice vk_phydev)
|
int vk_device_compute_queue_index(VkPhysicalDevice vk_phydev)
|
||||||
{
|
{
|
||||||
uint32_t vk_qfamilies_no = 0;
|
uint32_t vk_qfamilies_no = 0;
|
||||||
VkQueueFamilyProperties *vk_qfamilies;
|
VkQueueFamilyProperties *vk_qfamilies;
|
||||||
int supports = -1;
|
int supports = -1;
|
||||||
|
|
||||||
vkGetPhysicalDeviceQueueFamilyProperties(vk_phydev, &vk_qfamilies_no, NULL);
|
vkGetPhysicalDeviceQueueFamilyProperties(vk_phydev, &vk_qfamilies_no, NULL);
|
||||||
|
|
||||||
@ -239,16 +256,16 @@ int vk_device_compute_queue_index(VkPhysicalDevice vk_phydev)
|
|||||||
VkDevice vk_logical_device_create(VkPhysicalDevice vk_phydev, int qfamily_idx)
|
VkDevice vk_logical_device_create(VkPhysicalDevice vk_phydev, int qfamily_idx)
|
||||||
{
|
{
|
||||||
VkResult res;
|
VkResult res;
|
||||||
VkDevice vk_logdev = VK_NULL_HANDLE;
|
VkDevice vk_logdev = VK_NULL_HANDLE;
|
||||||
float vk_queue_priority = 1.0f;
|
float vk_queue_priority = 1.0f;
|
||||||
|
|
||||||
// specify which command queues to use for the physical device
|
// specify which command queues to use for the physical device
|
||||||
VkDeviceQueueCreateInfo vk_queueinfo = {
|
VkDeviceQueueCreateInfo vk_queueinfo = {
|
||||||
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
|
||||||
.pNext = NULL,
|
.pNext = NULL,
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
.queueFamilyIndex = qfamily_idx,
|
.queueFamilyIndex = qfamily_idx,
|
||||||
.queueCount = 1,
|
.queueCount = 1,
|
||||||
.pQueuePriorities = &vk_queue_priority,
|
.pQueuePriorities = &vk_queue_priority,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -261,14 +278,14 @@ VkDevice vk_logical_device_create(VkPhysicalDevice vk_phydev, int qfamily_idx)
|
|||||||
// FIXME: here validation layers are ignored but it is still better to define
|
// FIXME: here validation layers are ignored but it is still better to define
|
||||||
// them for compatibility
|
// them for compatibility
|
||||||
VkDeviceCreateInfo vk_createinfo = {
|
VkDeviceCreateInfo vk_createinfo = {
|
||||||
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
|
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
|
||||||
.pQueueCreateInfos = &vk_queueinfo,
|
.pQueueCreateInfos = &vk_queueinfo,
|
||||||
.queueCreateInfoCount = 1,
|
.queueCreateInfoCount = 1,
|
||||||
.pEnabledFeatures = &vk_phydev_features,
|
.pEnabledFeatures = &vk_phydev_features,
|
||||||
.ppEnabledExtensionNames = NULL,
|
.ppEnabledExtensionNames = NULL,
|
||||||
.enabledExtensionCount = 0,
|
.enabledExtensionCount = 0,
|
||||||
.ppEnabledLayerNames = NULL,
|
.ppEnabledLayerNames = NULL,
|
||||||
.enabledLayerCount = 0,
|
.enabledLayerCount = 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
res = vkCreateDevice(vk_phydev, &vk_createinfo, NULL, &vk_logdev);
|
res = vkCreateDevice(vk_phydev, &vk_createinfo, NULL, &vk_logdev);
|
||||||
@ -313,7 +330,7 @@ int main(void)
|
|||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
VkPhysicalDevice vk_phydev = vk_physical_device_get(vk_instance);
|
VkPhysicalDevice vk_phydev = vk_physical_device_get(vk_instance);
|
||||||
int qfamily_idx = vk_device_compute_queue_index(vk_phydev);
|
int qfamily_idx = vk_device_compute_queue_index(vk_phydev);
|
||||||
if (qfamily_idx < 0) {
|
if (qfamily_idx < 0) {
|
||||||
err("The device does not support compute queues\n");
|
err("The device does not support compute queues\n");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
|
Loading…
Reference in New Issue
Block a user