You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
159 lines
3.9 KiB
159 lines
3.9 KiB
12 months ago
|
#include <fstream>
|
||
|
#include <iostream>
|
||
|
#include <iterator>
|
||
|
#include <memory>
|
||
|
#include <regex>
|
||
|
#include <sstream>
|
||
|
#include <vector>
|
||
|
|
||
|
#include <kompute/Kompute.hpp>
|
||
|
#include <vulkan/vulkan_handles.hpp>
|
||
|
|
||
|
|
||
|
static std::vector<uint32_t> compile_shader(const std::string& source)
|
||
|
{
|
||
|
std::ofstream fileOut("tmp_kp_shader.comp");
|
||
|
fileOut << source;
|
||
|
fileOut.close();
|
||
|
if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str()))
|
||
|
throw std::runtime_error("Error running glslangValidator command");
|
||
|
std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
|
||
|
std::vector<char> buffer;
|
||
|
buffer.insert(buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
|
||
|
return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())};
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
static std::string shader_to_string(const char *path)
|
||
|
{
|
||
|
std::ifstream comp_file;
|
||
|
|
||
|
comp_file.open(path);
|
||
|
if (comp_file.is_open() == false) {
|
||
|
return std::string("// bad code");
|
||
|
}
|
||
|
|
||
|
std::ostringstream outstr;
|
||
|
outstr << comp_file.rdbuf();
|
||
|
return outstr.str();
|
||
|
}
|
||
|
|
||
|
|
||
|
// the sed replace command as a function
|
||
|
// 1. /pattern/replace/
|
||
|
// 2. /pattern//
|
||
|
static std::string regex_replace(const char *expr_string, std::string str)
|
||
|
{
|
||
|
std::string expr(expr_string);
|
||
|
std::string pattern, replace;
|
||
|
|
||
|
if (expr.size() < 3 || expr[0] != '/' || expr.back() != '/') {
|
||
|
return str;
|
||
|
} else {
|
||
|
// shift 1
|
||
|
expr = expr.substr(1, expr.size()-2);
|
||
|
}
|
||
|
for (size_t pos = 0, nxpos = 0; ; pos = nxpos) {
|
||
|
nxpos = expr.find("/", pos);
|
||
|
if (nxpos == std::string::npos) {
|
||
|
break;
|
||
|
}
|
||
|
// skip escaped '/'
|
||
|
if (nxpos > 0 && expr[nxpos-1] == '\\') {
|
||
|
if (nxpos > 1 && expr[nxpos-2] != '\\') {
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
pattern = expr.substr(0, nxpos);
|
||
|
replace = expr.substr(nxpos+1);
|
||
|
break;
|
||
|
}
|
||
|
if (pattern.empty()) {
|
||
|
return str;
|
||
|
}
|
||
|
|
||
|
std::regex reg(pattern);
|
||
|
std::string newstr = std::regex_replace(str, reg, replace);
|
||
|
return newstr;
|
||
|
}
|
||
|
|
||
|
|
||
|
template <typename T> std::string regex_subst(const char *expr, T val, std::string str)
|
||
|
{
|
||
|
std::regex reg(expr);
|
||
|
std::regex_match();
|
||
|
return newstr;
|
||
|
}
|
||
|
|
||
|
|
||
|
int main()
|
||
|
{
|
||
|
// create the kompute manager
|
||
|
kp::Manager mgr;
|
||
|
|
||
|
// C = A*B
|
||
|
float matrixA[1024][1024] = {0};
|
||
|
float matrixB[1024][1024] = {0};
|
||
|
float matrixC[1024][1024] = {0};
|
||
|
// fill an identity matrix
|
||
|
for (int y = 0; y < 1024; y++) {
|
||
|
matrixA[y][y] = 1.0;
|
||
|
}
|
||
|
// fill a matrix with data
|
||
|
for (int y = 0; y < 1024; y++) {
|
||
|
for (int x = 0; x < 1024; x++) {
|
||
|
matrixB[y][x] = x*0.74 - y*0.22;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// create the tensors, tensors are just arrays, in the shader we will have
|
||
|
// to describe how it translates to matrices
|
||
|
kp::Tensor::TensorDataTypes dtype = kp::Tensor::TensorDataTypes::eFloat;
|
||
|
|
||
|
// auto because fuck C++
|
||
|
auto tensorA = mgr.tensor(matrixA, 1024*1024, sizeof(float), dtype);
|
||
|
auto tensorB = mgr.tensor(matrixB, 1024*1024, sizeof(float), dtype);
|
||
|
auto tensorC = mgr.tensor(matrixC, 1024*1024, sizeof(float), dtype);
|
||
|
|
||
|
const std::vector<std::shared_ptr<kp::Tensor>> params = {
|
||
|
tensorA, tensorB, tensorC
|
||
|
};
|
||
|
|
||
|
// workgroup, dispatch a 2D array of workgroups (2D matrices)
|
||
|
// TODO: determine the size of the workgroups by doing some calls to vk
|
||
|
const int wgrp_x = 32, wgrp_y = 32;
|
||
|
// this should call vkCmdDispatch(x, y, z)
|
||
|
kp::Workgroup workgroup({wgrp_x, wgrp_y, 1});
|
||
|
|
||
|
// substitute the values in the shader
|
||
|
const char *shader_path = "shader.comp";
|
||
|
std::string shader_str = shader_to_string(shader_path);
|
||
|
shader_str = regex_replace("/{lcsize_x}/", shader_str);
|
||
|
const std::vector<uint32_t> shader = compile_shader(shader_to_string("shader.comp"));
|
||
|
|
||
|
|
||
|
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
|
||
|
params,
|
||
|
shader,
|
||
|
workgroup,
|
||
|
{1024.0}
|
||
|
);
|
||
|
|
||
|
mgr.sequence()
|
||
|
->record<kp::OpTensorSyncDevice>(params)
|
||
|
->record<kp::OpAlgoDispatch>(algo)
|
||
|
->record<kp::OpTensorSyncLocal>(params)
|
||
|
->eval();
|
||
|
|
||
|
|
||
|
// print the resulting matrix
|
||
|
std::cout << "Output: { ";
|
||
|
for (const float& elem : tensorC->vector<float>()) {
|
||
|
printf("%.2f, ", elem);
|
||
|
}
|
||
|
std::cout << "}" << std::endl;
|
||
|
|
||
|
return 0;
|
||
|
}
|