test using the kompute library and possibly vulkan
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
kompute_tests/test2/main.cpp

133 lines
3.7 KiB

#include <fstream>
#include <iostream>
#include <iterator>
#include <memory>
#include <regex>
#include <sstream>
#include <vector>
#include <kompute/Kompute.hpp>
#include <vulkan/vulkan_handles.hpp>
#include <unistd.h>
#define MSIZE 128
static std::vector<uint32_t> compile_shader(const std::string &source)
{
std::ofstream fileOut("tmp_kp_shader.comp");
fileOut << source;
fileOut.close();
if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o "
"tmp_kp_shader.comp.spv")
.c_str())) {
throw std::runtime_error("Error running glslangValidator command");
}
std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
std::vector<char> buffer;
buffer.insert(
buffer.begin(), std::istreambuf_iterator<char>(fileStream), {}
);
return {
(uint32_t *)buffer.data(), (uint32_t *)(buffer.data() + buffer.size())};
}
static std::string shader_to_string(const char *path)
{
std::ifstream comp_file;
comp_file.open(path);
if (comp_file.is_open() == false) {
return std::string("// bad code");
}
std::ostringstream outstr;
outstr << comp_file.rdbuf();
return outstr.str();
}
template <typename T>
std::string replacewith(const char *needle, T val, std::string str)
{
std::string replace = std::to_string(val);
size_t len = strlen(needle);
for (size_t pos = 0; (pos = str.find(needle)) != std::string::npos;) {
str.replace(pos, len, replace);
}
return str;
}
// compute C = A*B on the GPU
int main()
{
// create the kompute manager
kp::Manager mgr;
// matrices are on the stack, this breaks for large MSIZE (1024)
float matrixA[MSIZE][MSIZE] = {0};
float matrixB[MSIZE][MSIZE] = {0};
float matrixC[MSIZE][MSIZE] = {0};
// fill an identity matrix
for (int y = 0; y < MSIZE; y++) {
matrixA[y][y] = 1.0;
}
// fill a matrix with data
for (int y = 0; y < MSIZE; y++) {
for (int x = 0; x < MSIZE; x++) {
matrixB[y][x] = x * 0.74 - y * 0.22;
}
}
// create the tensors, tensors are just arrays, in the shader we will have
// to describe how it translates to matrices
kp::Tensor::TensorDataTypes dtype = kp::Tensor::TensorDataTypes::eFloat;
// auto because fuck C++
auto tensorA = mgr.tensor(matrixA, MSIZE * MSIZE, sizeof(float), dtype);
auto tensorB = mgr.tensor(matrixB, MSIZE * MSIZE, sizeof(float), dtype);
auto tensorC = mgr.tensor(matrixC, MSIZE * MSIZE, sizeof(float), dtype);
const std::vector<std::shared_ptr<kp::Tensor>> params = {
tensorA, tensorB, tensorC};
// workgroup, dispatch a 2D array of workgroups (2D matrices)
// TODO: determine the size of the workgroups by doing some calls to vk
const int wgrp_x = 32, wgrp_y = 32;
// this should call vkCmdDispatch(x, y, z)
kp::Workgroup workgroup({wgrp_x, wgrp_y, 1});
// get the shader code into a string
const char *shader_path = "shader.comp";
std::string shader_str = shader_to_string(shader_path);
// substitute the value for the number of threads (xyz) per workgroup since
// it has to be a compile-time constant
shader_str = replacewith<int>("__lcsize_x__", 32, shader_str);
shader_str = replacewith<int>("__lcsize_y__", 32, shader_str);
shader_str = replacewith<int>("__lcsize_z__", 1, shader_str);
printf("%s\n", shader_str.c_str());
return 0;
const std::vector<uint32_t> shader =
compile_shader(shader_to_string("shader.comp"));
std::shared_ptr<kp::Algorithm> algo =
mgr.algorithm(params, shader, workgroup, {MSIZE});
mgr.sequence()
->record<kp::OpTensorSyncDevice>(params)
->record<kp::OpAlgoDispatch>(algo)
->record<kp::OpTensorSyncLocal>(params)
->eval();
// print the resulting matrix
std::cout << "Output: { ";
for (const float &elem : tensorC->vector<float>()) {
printf("%.2f, ", elem);
}
std::cout << "}" << std::endl;
return 0;
}