|
|
@ -21,6 +21,12 @@ |
|
|
|
__extension__ typedef _Float16 half; |
|
|
|
__extension__ typedef _Float16 half; |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// useful macros
|
|
|
|
|
|
|
|
#define TEST_BIT(f, b) (!!(f & b)) |
|
|
|
|
|
|
|
#define GIB(x) ((uint64_t)x * 1024u * 1024u * 1024u) |
|
|
|
|
|
|
|
#define MIB(x) ((uint64_t)x * 1024u * 1024u) |
|
|
|
|
|
|
|
#define KIB(x) ((uint64_t)x * 1024u) |
|
|
|
|
|
|
|
|
|
|
|
const char *vk_validation_layer[] = {"VK_LAYER_KHRONOS_validation"}; |
|
|
|
const char *vk_validation_layer[] = {"VK_LAYER_KHRONOS_validation"}; |
|
|
|
const uint32_t vk_validation_layer_no = 1; |
|
|
|
const uint32_t vk_validation_layer_no = 1; |
|
|
|
|
|
|
|
|
|
|
@ -198,11 +204,12 @@ VkPhysicalDevice vk_physical_device_get(VkInstance vk_instance) |
|
|
|
for (unsigned x = 0; x < dev_memory.memoryHeapCount; x++) { |
|
|
|
for (unsigned x = 0; x < dev_memory.memoryHeapCount; x++) { |
|
|
|
uint64_t mem_size = dev_memory.memoryHeaps[x].size; |
|
|
|
uint64_t mem_size = dev_memory.memoryHeaps[x].size; |
|
|
|
uint32_t mem_flags = dev_memory.memoryHeaps[x].flags; |
|
|
|
uint32_t mem_flags = dev_memory.memoryHeaps[x].flags; |
|
|
|
char mem_local = mem_flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; |
|
|
|
char is_local = |
|
|
|
|
|
|
|
TEST_BIT(mem_flags, VK_MEMORY_HEAP_DEVICE_LOCAL_BIT); |
|
|
|
printf( |
|
|
|
printf( |
|
|
|
"\t\tHeap %.2d: local: %d, size: %.3f MiB\n", |
|
|
|
"\t\tHeap %.2d: local: %d, size: %.3f MiB\n", |
|
|
|
x, |
|
|
|
x, |
|
|
|
mem_local, |
|
|
|
is_local, |
|
|
|
(float)mem_size / (1024.0 * 1024.0) |
|
|
|
(float)mem_size / (1024.0 * 1024.0) |
|
|
|
); |
|
|
|
); |
|
|
|
} |
|
|
|
} |
|
|
@ -216,6 +223,63 @@ VkPhysicalDevice vk_physical_device_get(VkInstance vk_instance) |
|
|
|
return vk_phydev; |
|
|
|
return vk_phydev; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// returns the index of a usable memory type in the device that is also backed by
|
|
|
|
|
|
|
|
// a heap with a size of at least min_size bytes
|
|
|
|
|
|
|
|
int vk_device_get_usable_memory_type_index( |
|
|
|
|
|
|
|
VkPhysicalDevice vk_phydev, uint64_t min_size |
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
int memtype_idx = -1; |
|
|
|
|
|
|
|
VkPhysicalDeviceMemoryProperties dev_memory; |
|
|
|
|
|
|
|
vkGetPhysicalDeviceMemoryProperties(vk_phydev, &dev_memory); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
VkMemoryPropertyFlags flags = 0; |
|
|
|
|
|
|
|
uint32_t idx = 0; |
|
|
|
|
|
|
|
VkMemoryHeap mem; |
|
|
|
|
|
|
|
for (unsigned i = 0; i < dev_memory.memoryTypeCount; i++) { |
|
|
|
|
|
|
|
flags = dev_memory.memoryTypes[i].propertyFlags; |
|
|
|
|
|
|
|
idx = dev_memory.memoryTypes[i].heapIndex; |
|
|
|
|
|
|
|
mem = dev_memory.memoryHeaps[idx]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// TODO: do we need more flags to be set?
|
|
|
|
|
|
|
|
if (TEST_BIT(flags, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && |
|
|
|
|
|
|
|
TEST_BIT(flags, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) && |
|
|
|
|
|
|
|
mem.size >= min_size) { |
|
|
|
|
|
|
|
// as the name suggests we only care about the memory type
|
|
|
|
|
|
|
|
// and not the heap itself
|
|
|
|
|
|
|
|
memtype_idx = i; |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return memtype_idx; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// do an allocation on the device of size bytes, according to krhonos it is a good
|
|
|
|
|
|
|
|
// idea to do one or few allocations and subdivide them on the host
|
|
|
|
|
|
|
|
// https://github.com/KhronosGroup/Vulkan-Guide/blob/main/chapters/memory_allocation.adoc
|
|
|
|
|
|
|
|
// this memory has to be freed using vkFreeMemory(device, mem, NULL);
|
|
|
|
|
|
|
|
VkDeviceMemory |
|
|
|
|
|
|
|
vk_allocate_memory(VkDevice vk_logdev, uint32_t memtype_index, uint64_t size) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
VkMemoryAllocateInfo alloc_info = { |
|
|
|
|
|
|
|
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, |
|
|
|
|
|
|
|
.pNext = NULL, |
|
|
|
|
|
|
|
.allocationSize = size, |
|
|
|
|
|
|
|
.memoryTypeIndex = memtype_index, |
|
|
|
|
|
|
|
}; |
|
|
|
|
|
|
|
VkDeviceMemory mem = VK_NULL_HANDLE; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
VkResult res = vkAllocateMemory(vk_logdev, &alloc_info, NULL, &mem); |
|
|
|
|
|
|
|
if (res != VK_SUCCESS) { |
|
|
|
|
|
|
|
err("Error allocating memory on device: %s\n", |
|
|
|
|
|
|
|
vk_Result_to_str(res)); |
|
|
|
|
|
|
|
return VK_NULL_HANDLE; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return mem; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
void vk_physical_device_destroy(VkPhysicalDevice vk_phydev) |
|
|
|
void vk_physical_device_destroy(VkPhysicalDevice vk_phydev) |
|
|
|
{ |
|
|
|
{ |
|
|
|
if (vk_phydev != VK_NULL_HANDLE) { |
|
|
|
if (vk_phydev != VK_NULL_HANDLE) { |
|
|
@ -225,11 +289,14 @@ void vk_physical_device_destroy(VkPhysicalDevice vk_phydev) |
|
|
|
|
|
|
|
|
|
|
|
// return the index of the first queue family that supports compute on the device,
|
|
|
|
// return the index of the first queue family that supports compute on the device,
|
|
|
|
// returns a negative index on error
|
|
|
|
// returns a negative index on error
|
|
|
|
int vk_device_compute_queue_index(VkPhysicalDevice vk_phydev) |
|
|
|
// A better approach would be to find a queue that only handled compute workloads
|
|
|
|
|
|
|
|
// (but you need to ignore the transfer bit and for our purposes the sparse binding
|
|
|
|
|
|
|
|
// bit too)
|
|
|
|
|
|
|
|
int vk_device_get_compute_queue_index(VkPhysicalDevice vk_phydev) |
|
|
|
{ |
|
|
|
{ |
|
|
|
uint32_t vk_qfamilies_no = 0; |
|
|
|
uint32_t vk_qfamilies_no = 0; |
|
|
|
VkQueueFamilyProperties *vk_qfamilies; |
|
|
|
VkQueueFamilyProperties *vk_qfamilies; |
|
|
|
int supports = -1; |
|
|
|
int qfamily_idx = -1; |
|
|
|
|
|
|
|
|
|
|
|
vkGetPhysicalDeviceQueueFamilyProperties(vk_phydev, &vk_qfamilies_no, NULL); |
|
|
|
vkGetPhysicalDeviceQueueFamilyProperties(vk_phydev, &vk_qfamilies_no, NULL); |
|
|
|
|
|
|
|
|
|
|
@ -244,13 +311,13 @@ int vk_device_compute_queue_index(VkPhysicalDevice vk_phydev) |
|
|
|
); |
|
|
|
); |
|
|
|
|
|
|
|
|
|
|
|
for (uint32_t i = 0; i < vk_qfamilies_no; i++) { |
|
|
|
for (uint32_t i = 0; i < vk_qfamilies_no; i++) { |
|
|
|
if (vk_qfamilies[i].queueFlags & VK_QUEUE_COMPUTE_BIT) { |
|
|
|
if (TEST_BIT(vk_qfamilies[i].queueFlags, VK_QUEUE_COMPUTE_BIT)) { |
|
|
|
supports = i; |
|
|
|
qfamily_idx = i; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
free(vk_qfamilies); |
|
|
|
free(vk_qfamilies); |
|
|
|
return supports; |
|
|
|
return qfamily_idx; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
VkDevice vk_logical_device_create(VkPhysicalDevice vk_phydev, int qfamily_idx) |
|
|
|
VkDevice vk_logical_device_create(VkPhysicalDevice vk_phydev, int qfamily_idx) |
|
|
@ -307,6 +374,7 @@ void vk_logical_device_destroy(VkDevice vk_logdev) |
|
|
|
vkDestroyDevice(vk_logdev, NULL); |
|
|
|
vkDestroyDevice(vk_logdev, NULL); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// get the queue handle from it's index
|
|
|
|
VkQueue vk_queue_get(VkDevice vk_logdev, int qfamily_idx) |
|
|
|
VkQueue vk_queue_get(VkDevice vk_logdev, int qfamily_idx) |
|
|
|
{ |
|
|
|
{ |
|
|
|
VkQueue vk_queue = VK_NULL_HANDLE; |
|
|
|
VkQueue vk_queue = VK_NULL_HANDLE; |
|
|
@ -330,13 +398,28 @@ int main(void) |
|
|
|
exit(EXIT_FAILURE); |
|
|
|
exit(EXIT_FAILURE); |
|
|
|
} |
|
|
|
} |
|
|
|
VkPhysicalDevice vk_phydev = vk_physical_device_get(vk_instance); |
|
|
|
VkPhysicalDevice vk_phydev = vk_physical_device_get(vk_instance); |
|
|
|
int qfamily_idx = vk_device_compute_queue_index(vk_phydev); |
|
|
|
int qfamily_idx = vk_device_get_compute_queue_index(vk_phydev); |
|
|
|
if (qfamily_idx < 0) { |
|
|
|
if (qfamily_idx < 0) { |
|
|
|
err("The device does not support compute queues\n"); |
|
|
|
err("The device does not support compute queues\n"); |
|
|
|
exit(EXIT_FAILURE); |
|
|
|
exit(EXIT_FAILURE); |
|
|
|
} |
|
|
|
} |
|
|
|
VkDevice vk_logdev = vk_logical_device_create(vk_phydev, qfamily_idx); |
|
|
|
VkDevice vk_logdev = vk_logical_device_create(vk_phydev, qfamily_idx); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int devmem_idx = vk_device_get_usable_memory_type_index(vk_phydev, GIB(1)); |
|
|
|
|
|
|
|
if (devmem_idx < 0) { |
|
|
|
|
|
|
|
err("Could not find a suitable device memory heap\n"); |
|
|
|
|
|
|
|
exit(EXIT_FAILURE); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
VkDeviceMemory mem = vk_allocate_memory(vk_logdev, devmem_idx, MIB(256)); |
|
|
|
|
|
|
|
if (mem == VK_NULL_HANDLE) { |
|
|
|
|
|
|
|
exit(EXIT_FAILURE); |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
printf("Successfully allocated memory on device\n"); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vkFreeMemory(vk_logdev, mem, NULL); |
|
|
|
|
|
|
|
|
|
|
|
vk_logical_device_destroy(vk_logdev); |
|
|
|
vk_logical_device_destroy(vk_logdev); |
|
|
|
vk_physical_device_destroy(vk_phydev); |
|
|
|
vk_physical_device_destroy(vk_phydev); |
|
|
|
vk_destroy(vk_instance); |
|
|
|
vk_destroy(vk_instance); |
|
|
|