diff --git a/test2/.clang-format b/test2/.clang-format new file mode 100644 index 0000000..1b9315e --- /dev/null +++ b/test2/.clang-format @@ -0,0 +1,40 @@ +# linux kernel style formatting +BasedOnStyle: LLVM +IndentWidth: 8 +UseTab: Always + +BreakBeforeBraces: Linux +AllowShortIfStatementsOnASingleLine: false +IndentCaseLabels: false +ColumnLimit: 85 + +InsertBraces: true +SortIncludes: Never +BinPackParameters: false +BinPackArguments: false +Cpp11BracedListStyle: true +SpaceBeforeCpp11BracedList: true +SeparateDefinitionBlocks: Always +AlignAfterOpenBracket: BlockIndent + +AlignConsecutiveDeclarations: + Enabled: true + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: true + PadOperators: true + +AlignConsecutiveMacros: + Enabled: true + AcrossEmptyLines: false + AcrossComments: true + +AlignConsecutiveBitFields: + Enabled: true + AcrossEmptyLines: false + AcrossComments: true + +AlignConsecutiveAssignments: + Enabled: true + AcrossEmptyLines: false + AcrossComments: true diff --git a/test2/.gitignore b/test2/.gitignore index e37dbbe..9f35b14 100644 --- a/test2/.gitignore +++ b/test2/.gitignore @@ -1,3 +1,3 @@ +tmp_* *.spv -*.comp -test1 +test2 diff --git a/test2/main.cpp b/test2/main.cpp index 274a3db..838e8f4 100644 --- a/test2/main.cpp +++ b/test2/main.cpp @@ -9,22 +9,29 @@ #include #include +#include -static std::vector compile_shader(const std::string& source) +#define MSIZE 128 + +static std::vector compile_shader(const std::string &source) { std::ofstream fileOut("tmp_kp_shader.comp"); fileOut << source; fileOut.close(); - if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str())) + if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o " + "tmp_kp_shader.comp.spv") + .c_str())) { throw std::runtime_error("Error running glslangValidator command"); - std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary); + } + std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary); std::vector buffer; - buffer.insert(buffer.begin(), std::istreambuf_iterator(fileStream), {}); - return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())}; + buffer.insert( + buffer.begin(), std::istreambuf_iterator(fileStream), {} + ); + return { + (uint32_t *)buffer.data(), (uint32_t *)(buffer.data() + buffer.size())}; } - - static std::string shader_to_string(const char *path) { std::ifstream comp_file; @@ -39,71 +46,36 @@ static std::string shader_to_string(const char *path) return outstr.str(); } - -// the sed replace command as a function -// 1. /pattern/replace/ -// 2. /pattern// -static std::string regex_replace(const char *expr_string, std::string str) +template +std::string replacewith(const char *needle, T val, std::string str) { - std::string expr(expr_string); - std::string pattern, replace; - - if (expr.size() < 3 || expr[0] != '/' || expr.back() != '/') { - return str; - } else { - // shift 1 - expr = expr.substr(1, expr.size()-2); - } - for (size_t pos = 0, nxpos = 0; ; pos = nxpos) { - nxpos = expr.find("/", pos); - if (nxpos == std::string::npos) { - break; - } - // skip escaped '/' - if (nxpos > 0 && expr[nxpos-1] == '\\') { - if (nxpos > 1 && expr[nxpos-2] != '\\') { - continue; - } - } - pattern = expr.substr(0, nxpos); - replace = expr.substr(nxpos+1); - break; - } - if (pattern.empty()) { - return str; - } - - std::regex reg(pattern); - std::string newstr = std::regex_replace(str, reg, replace); - return newstr; -} + std::string replace = std::to_string(val); + size_t len = strlen(needle); -template std::string regex_subst(const char *expr, T val, std::string str) -{ - std::regex reg(expr); - std::regex_match(); - return newstr; + for (size_t pos = 0; (pos = str.find(needle)) != std::string::npos;) { + str.replace(pos, len, replace); + } + return str; } - +// compute C = A*B on the GPU int main() { // create the kompute manager kp::Manager mgr; - - // C = A*B - float matrixA[1024][1024] = {0}; - float matrixB[1024][1024] = {0}; - float matrixC[1024][1024] = {0}; + // matrices are on the stack, this breaks for large MSIZE (1024) + float matrixA[MSIZE][MSIZE] = {0}; + float matrixB[MSIZE][MSIZE] = {0}; + float matrixC[MSIZE][MSIZE] = {0}; // fill an identity matrix - for (int y = 0; y < 1024; y++) { + for (int y = 0; y < MSIZE; y++) { matrixA[y][y] = 1.0; } // fill a matrix with data - for (int y = 0; y < 1024; y++) { - for (int x = 0; x < 1024; x++) { - matrixB[y][x] = x*0.74 - y*0.22; + for (int y = 0; y < MSIZE; y++) { + for (int x = 0; x < MSIZE; x++) { + matrixB[y][x] = x * 0.74 - y * 0.22; } } @@ -112,13 +84,12 @@ int main() kp::Tensor::TensorDataTypes dtype = kp::Tensor::TensorDataTypes::eFloat; // auto because fuck C++ - auto tensorA = mgr.tensor(matrixA, 1024*1024, sizeof(float), dtype); - auto tensorB = mgr.tensor(matrixB, 1024*1024, sizeof(float), dtype); - auto tensorC = mgr.tensor(matrixC, 1024*1024, sizeof(float), dtype); + auto tensorA = mgr.tensor(matrixA, MSIZE * MSIZE, sizeof(float), dtype); + auto tensorB = mgr.tensor(matrixB, MSIZE * MSIZE, sizeof(float), dtype); + auto tensorC = mgr.tensor(matrixC, MSIZE * MSIZE, sizeof(float), dtype); const std::vector> params = { - tensorA, tensorB, tensorC - }; + tensorA, tensorB, tensorC}; // workgroup, dispatch a 2D array of workgroups (2D matrices) // TODO: determine the size of the workgroups by doing some calls to vk @@ -126,30 +97,34 @@ int main() // this should call vkCmdDispatch(x, y, z) kp::Workgroup workgroup({wgrp_x, wgrp_y, 1}); - // substitute the values in the shader + // get the shader code into a string const char *shader_path = "shader.comp"; - std::string shader_str = shader_to_string(shader_path); - shader_str = regex_replace("/{lcsize_x}/", shader_str); - const std::vector shader = compile_shader(shader_to_string("shader.comp")); + std::string shader_str = shader_to_string(shader_path); + // substitute the value for the number of threads (xyz) per workgroup since + // it has to be a compile-time constant + shader_str = replacewith("__lcsize_x__", 32, shader_str); + shader_str = replacewith("__lcsize_y__", 32, shader_str); + shader_str = replacewith("__lcsize_z__", 1, shader_str); - std::shared_ptr algo = mgr.algorithm( - params, - shader, - workgroup, - {1024.0} - ); + printf("%s\n", shader_str.c_str()); + return 0; - mgr.sequence() - ->record(params) - ->record(algo) - ->record(params) - ->eval(); + const std::vector shader = + compile_shader(shader_to_string("shader.comp")); + std::shared_ptr algo = + mgr.algorithm(params, shader, workgroup, {MSIZE}); + + mgr.sequence() + ->record(params) + ->record(algo) + ->record(params) + ->eval(); // print the resulting matrix std::cout << "Output: { "; - for (const float& elem : tensorC->vector()) { + for (const float &elem : tensorC->vector()) { printf("%.2f, ", elem); } std::cout << "}" << std::endl; diff --git a/test2/shader.comp b/test2/shader.comp new file mode 100644 index 0000000..3316350 --- /dev/null +++ b/test2/shader.comp @@ -0,0 +1,24 @@ +#version 450 +// clang-format off + +// The number of threads spawned per-workgroup, these are substituted by the +// program pre-compilation +layout( + local_size_x = __lcsize_x__, + local_size_y = __lcsize_y__, + local_size_z = __lcsize_z__ +) in; + +// The buffers are provided via the tensors +layout(binding = 0) buffer tensorA { float matA[]; }; +layout(binding = 1) buffer tensorB { float matB[]; }; +layout(binding = 2) buffer tensorC { float matC[]; }; + +// specialization constants +layout(constant_id = 0) const float tensor_size_f = 0; + +void main() +{ + uint index = gl_GlobalInvocationID.x; + o[index] = a[index] * b[index]; +} \ No newline at end of file diff --git a/test2/test2 b/test2/test2 deleted file mode 100755 index 401d056..0000000 Binary files a/test2/test2 and /dev/null differ