Alessandro Mauri 12 months ago
parent f94c6dc470
commit 366c7d2c03
  1. 40
      test2/.clang-format
  2. 4
      test2/.gitignore
  3. 135
      test2/main.cpp
  4. 24
      test2/shader.comp
  5. BIN
      test2/test2

@ -0,0 +1,40 @@
# linux kernel style formatting
BasedOnStyle: LLVM
IndentWidth: 8
UseTab: Always
BreakBeforeBraces: Linux
AllowShortIfStatementsOnASingleLine: false
IndentCaseLabels: false
ColumnLimit: 85
InsertBraces: true
SortIncludes: Never
BinPackParameters: false
BinPackArguments: false
Cpp11BracedListStyle: true
SpaceBeforeCpp11BracedList: true
SeparateDefinitionBlocks: Always
AlignAfterOpenBracket: BlockIndent
AlignConsecutiveDeclarations:
Enabled: true
AcrossEmptyLines: false
AcrossComments: false
AlignCompound: true
PadOperators: true
AlignConsecutiveMacros:
Enabled: true
AcrossEmptyLines: false
AcrossComments: true
AlignConsecutiveBitFields:
Enabled: true
AcrossEmptyLines: false
AcrossComments: true
AlignConsecutiveAssignments:
Enabled: true
AcrossEmptyLines: false
AcrossComments: true

4
test2/.gitignore vendored

@ -1,3 +1,3 @@
tmp_*
*.spv *.spv
*.comp test2
test1

@ -9,22 +9,29 @@
#include <kompute/Kompute.hpp> #include <kompute/Kompute.hpp>
#include <vulkan/vulkan_handles.hpp> #include <vulkan/vulkan_handles.hpp>
#include <unistd.h>
static std::vector<uint32_t> compile_shader(const std::string& source) #define MSIZE 128
static std::vector<uint32_t> compile_shader(const std::string &source)
{ {
std::ofstream fileOut("tmp_kp_shader.comp"); std::ofstream fileOut("tmp_kp_shader.comp");
fileOut << source; fileOut << source;
fileOut.close(); fileOut.close();
if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str())) if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o "
"tmp_kp_shader.comp.spv")
.c_str())) {
throw std::runtime_error("Error running glslangValidator command"); throw std::runtime_error("Error running glslangValidator command");
std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary); }
std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
std::vector<char> buffer; std::vector<char> buffer;
buffer.insert(buffer.begin(), std::istreambuf_iterator<char>(fileStream), {}); buffer.insert(
return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())}; buffer.begin(), std::istreambuf_iterator<char>(fileStream), {}
);
return {
(uint32_t *)buffer.data(), (uint32_t *)(buffer.data() + buffer.size())};
} }
static std::string shader_to_string(const char *path) static std::string shader_to_string(const char *path)
{ {
std::ifstream comp_file; std::ifstream comp_file;
@ -39,71 +46,36 @@ static std::string shader_to_string(const char *path)
return outstr.str(); return outstr.str();
} }
template <typename T>
// the sed replace command as a function std::string replacewith(const char *needle, T val, std::string str)
// 1. /pattern/replace/
// 2. /pattern//
static std::string regex_replace(const char *expr_string, std::string str)
{ {
std::string expr(expr_string);
std::string pattern, replace;
if (expr.size() < 3 || expr[0] != '/' || expr.back() != '/') {
return str;
} else {
// shift 1
expr = expr.substr(1, expr.size()-2);
}
for (size_t pos = 0, nxpos = 0; ; pos = nxpos) {
nxpos = expr.find("/", pos);
if (nxpos == std::string::npos) {
break;
}
// skip escaped '/'
if (nxpos > 0 && expr[nxpos-1] == '\\') {
if (nxpos > 1 && expr[nxpos-2] != '\\') {
continue;
}
}
pattern = expr.substr(0, nxpos);
replace = expr.substr(nxpos+1);
break;
}
if (pattern.empty()) {
return str;
}
std::regex reg(pattern);
std::string newstr = std::regex_replace(str, reg, replace);
return newstr;
}
std::string replace = std::to_string(val);
size_t len = strlen(needle);
template <typename T> std::string regex_subst(const char *expr, T val, std::string str) for (size_t pos = 0; (pos = str.find(needle)) != std::string::npos;) {
{ str.replace(pos, len, replace);
std::regex reg(expr); }
std::regex_match(); return str;
return newstr;
} }
// compute C = A*B on the GPU
int main() int main()
{ {
// create the kompute manager // create the kompute manager
kp::Manager mgr; kp::Manager mgr;
// matrices are on the stack, this breaks for large MSIZE (1024)
// C = A*B float matrixA[MSIZE][MSIZE] = {0};
float matrixA[1024][1024] = {0}; float matrixB[MSIZE][MSIZE] = {0};
float matrixB[1024][1024] = {0}; float matrixC[MSIZE][MSIZE] = {0};
float matrixC[1024][1024] = {0};
// fill an identity matrix // fill an identity matrix
for (int y = 0; y < 1024; y++) { for (int y = 0; y < MSIZE; y++) {
matrixA[y][y] = 1.0; matrixA[y][y] = 1.0;
} }
// fill a matrix with data // fill a matrix with data
for (int y = 0; y < 1024; y++) { for (int y = 0; y < MSIZE; y++) {
for (int x = 0; x < 1024; x++) { for (int x = 0; x < MSIZE; x++) {
matrixB[y][x] = x*0.74 - y*0.22; matrixB[y][x] = x * 0.74 - y * 0.22;
} }
} }
@ -112,13 +84,12 @@ int main()
kp::Tensor::TensorDataTypes dtype = kp::Tensor::TensorDataTypes::eFloat; kp::Tensor::TensorDataTypes dtype = kp::Tensor::TensorDataTypes::eFloat;
// auto because fuck C++ // auto because fuck C++
auto tensorA = mgr.tensor(matrixA, 1024*1024, sizeof(float), dtype); auto tensorA = mgr.tensor(matrixA, MSIZE * MSIZE, sizeof(float), dtype);
auto tensorB = mgr.tensor(matrixB, 1024*1024, sizeof(float), dtype); auto tensorB = mgr.tensor(matrixB, MSIZE * MSIZE, sizeof(float), dtype);
auto tensorC = mgr.tensor(matrixC, 1024*1024, sizeof(float), dtype); auto tensorC = mgr.tensor(matrixC, MSIZE * MSIZE, sizeof(float), dtype);
const std::vector<std::shared_ptr<kp::Tensor>> params = { const std::vector<std::shared_ptr<kp::Tensor>> params = {
tensorA, tensorB, tensorC tensorA, tensorB, tensorC};
};
// workgroup, dispatch a 2D array of workgroups (2D matrices) // workgroup, dispatch a 2D array of workgroups (2D matrices)
// TODO: determine the size of the workgroups by doing some calls to vk // TODO: determine the size of the workgroups by doing some calls to vk
@ -126,30 +97,34 @@ int main()
// this should call vkCmdDispatch(x, y, z) // this should call vkCmdDispatch(x, y, z)
kp::Workgroup workgroup({wgrp_x, wgrp_y, 1}); kp::Workgroup workgroup({wgrp_x, wgrp_y, 1});
// substitute the values in the shader // get the shader code into a string
const char *shader_path = "shader.comp"; const char *shader_path = "shader.comp";
std::string shader_str = shader_to_string(shader_path); std::string shader_str = shader_to_string(shader_path);
shader_str = regex_replace("/{lcsize_x}/", shader_str);
const std::vector<uint32_t> shader = compile_shader(shader_to_string("shader.comp"));
// substitute the value for the number of threads (xyz) per workgroup since
// it has to be a compile-time constant
shader_str = replacewith<int>("__lcsize_x__", 32, shader_str);
shader_str = replacewith<int>("__lcsize_y__", 32, shader_str);
shader_str = replacewith<int>("__lcsize_z__", 1, shader_str);
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm( printf("%s\n", shader_str.c_str());
params, return 0;
shader,
workgroup,
{1024.0}
);
mgr.sequence() const std::vector<uint32_t> shader =
->record<kp::OpTensorSyncDevice>(params) compile_shader(shader_to_string("shader.comp"));
->record<kp::OpAlgoDispatch>(algo)
->record<kp::OpTensorSyncLocal>(params)
->eval();
std::shared_ptr<kp::Algorithm> algo =
mgr.algorithm(params, shader, workgroup, {MSIZE});
mgr.sequence()
->record<kp::OpTensorSyncDevice>(params)
->record<kp::OpAlgoDispatch>(algo)
->record<kp::OpTensorSyncLocal>(params)
->eval();
// print the resulting matrix // print the resulting matrix
std::cout << "Output: { "; std::cout << "Output: { ";
for (const float& elem : tensorC->vector<float>()) { for (const float &elem : tensorC->vector<float>()) {
printf("%.2f, ", elem); printf("%.2f, ", elem);
} }
std::cout << "}" << std::endl; std::cout << "}" << std::endl;

@ -0,0 +1,24 @@
#version 450
// clang-format off
// The number of threads spawned per-workgroup, these are substituted by the
// program pre-compilation
layout(
local_size_x = __lcsize_x__,
local_size_y = __lcsize_y__,
local_size_z = __lcsize_z__
) in;
// The buffers are provided via the tensors
layout(binding = 0) buffer tensorA { float matA[]; };
layout(binding = 1) buffer tensorB { float matB[]; };
layout(binding = 2) buffer tensorC { float matC[]; };
// specialization constants
layout(constant_id = 0) const float tensor_size_f = 0;
void main()
{
uint index = gl_GlobalInvocationID.x;
o[index] = a[index] * b[index];
}

Binary file not shown.
Loading…
Cancel
Save