boo
This commit is contained in:
parent
f94c6dc470
commit
366c7d2c03
40
test2/.clang-format
Normal file
40
test2/.clang-format
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
# linux kernel style formatting
|
||||||
|
BasedOnStyle: LLVM
|
||||||
|
IndentWidth: 8
|
||||||
|
UseTab: Always
|
||||||
|
|
||||||
|
BreakBeforeBraces: Linux
|
||||||
|
AllowShortIfStatementsOnASingleLine: false
|
||||||
|
IndentCaseLabels: false
|
||||||
|
ColumnLimit: 85
|
||||||
|
|
||||||
|
InsertBraces: true
|
||||||
|
SortIncludes: Never
|
||||||
|
BinPackParameters: false
|
||||||
|
BinPackArguments: false
|
||||||
|
Cpp11BracedListStyle: true
|
||||||
|
SpaceBeforeCpp11BracedList: true
|
||||||
|
SeparateDefinitionBlocks: Always
|
||||||
|
AlignAfterOpenBracket: BlockIndent
|
||||||
|
|
||||||
|
AlignConsecutiveDeclarations:
|
||||||
|
Enabled: true
|
||||||
|
AcrossEmptyLines: false
|
||||||
|
AcrossComments: false
|
||||||
|
AlignCompound: true
|
||||||
|
PadOperators: true
|
||||||
|
|
||||||
|
AlignConsecutiveMacros:
|
||||||
|
Enabled: true
|
||||||
|
AcrossEmptyLines: false
|
||||||
|
AcrossComments: true
|
||||||
|
|
||||||
|
AlignConsecutiveBitFields:
|
||||||
|
Enabled: true
|
||||||
|
AcrossEmptyLines: false
|
||||||
|
AcrossComments: true
|
||||||
|
|
||||||
|
AlignConsecutiveAssignments:
|
||||||
|
Enabled: true
|
||||||
|
AcrossEmptyLines: false
|
||||||
|
AcrossComments: true
|
4
test2/.gitignore
vendored
4
test2/.gitignore
vendored
@ -1,3 +1,3 @@
|
|||||||
|
tmp_*
|
||||||
*.spv
|
*.spv
|
||||||
*.comp
|
test2
|
||||||
test1
|
|
||||||
|
115
test2/main.cpp
115
test2/main.cpp
@ -9,22 +9,29 @@
|
|||||||
#include <kompute/Kompute.hpp>
|
#include <kompute/Kompute.hpp>
|
||||||
#include <vulkan/vulkan_handles.hpp>
|
#include <vulkan/vulkan_handles.hpp>
|
||||||
|
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#define MSIZE 128
|
||||||
|
|
||||||
static std::vector<uint32_t> compile_shader(const std::string &source)
|
static std::vector<uint32_t> compile_shader(const std::string &source)
|
||||||
{
|
{
|
||||||
std::ofstream fileOut("tmp_kp_shader.comp");
|
std::ofstream fileOut("tmp_kp_shader.comp");
|
||||||
fileOut << source;
|
fileOut << source;
|
||||||
fileOut.close();
|
fileOut.close();
|
||||||
if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o tmp_kp_shader.comp.spv").c_str()))
|
if (system(std::string("glslangValidator -V tmp_kp_shader.comp -o "
|
||||||
|
"tmp_kp_shader.comp.spv")
|
||||||
|
.c_str())) {
|
||||||
throw std::runtime_error("Error running glslangValidator command");
|
throw std::runtime_error("Error running glslangValidator command");
|
||||||
|
}
|
||||||
std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
|
std::ifstream fileStream("tmp_kp_shader.comp.spv", std::ios::binary);
|
||||||
std::vector<char> buffer;
|
std::vector<char> buffer;
|
||||||
buffer.insert(buffer.begin(), std::istreambuf_iterator<char>(fileStream), {});
|
buffer.insert(
|
||||||
return {(uint32_t*)buffer.data(), (uint32_t*)(buffer.data() + buffer.size())};
|
buffer.begin(), std::istreambuf_iterator<char>(fileStream), {}
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
(uint32_t *)buffer.data(), (uint32_t *)(buffer.data() + buffer.size())};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static std::string shader_to_string(const char *path)
|
static std::string shader_to_string(const char *path)
|
||||||
{
|
{
|
||||||
std::ifstream comp_file;
|
std::ifstream comp_file;
|
||||||
@ -39,70 +46,35 @@ static std::string shader_to_string(const char *path)
|
|||||||
return outstr.str();
|
return outstr.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
// the sed replace command as a function
|
std::string replacewith(const char *needle, T val, std::string str)
|
||||||
// 1. /pattern/replace/
|
|
||||||
// 2. /pattern//
|
|
||||||
static std::string regex_replace(const char *expr_string, std::string str)
|
|
||||||
{
|
{
|
||||||
std::string expr(expr_string);
|
|
||||||
std::string pattern, replace;
|
|
||||||
|
|
||||||
if (expr.size() < 3 || expr[0] != '/' || expr.back() != '/') {
|
std::string replace = std::to_string(val);
|
||||||
return str;
|
size_t len = strlen(needle);
|
||||||
} else {
|
|
||||||
// shift 1
|
for (size_t pos = 0; (pos = str.find(needle)) != std::string::npos;) {
|
||||||
expr = expr.substr(1, expr.size()-2);
|
str.replace(pos, len, replace);
|
||||||
}
|
}
|
||||||
for (size_t pos = 0, nxpos = 0; ; pos = nxpos) {
|
|
||||||
nxpos = expr.find("/", pos);
|
|
||||||
if (nxpos == std::string::npos) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// skip escaped '/'
|
|
||||||
if (nxpos > 0 && expr[nxpos-1] == '\\') {
|
|
||||||
if (nxpos > 1 && expr[nxpos-2] != '\\') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pattern = expr.substr(0, nxpos);
|
|
||||||
replace = expr.substr(nxpos+1);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (pattern.empty()) {
|
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::regex reg(pattern);
|
// compute C = A*B on the GPU
|
||||||
std::string newstr = std::regex_replace(str, reg, replace);
|
|
||||||
return newstr;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T> std::string regex_subst(const char *expr, T val, std::string str)
|
|
||||||
{
|
|
||||||
std::regex reg(expr);
|
|
||||||
std::regex_match();
|
|
||||||
return newstr;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
// create the kompute manager
|
// create the kompute manager
|
||||||
kp::Manager mgr;
|
kp::Manager mgr;
|
||||||
|
// matrices are on the stack, this breaks for large MSIZE (1024)
|
||||||
// C = A*B
|
float matrixA[MSIZE][MSIZE] = {0};
|
||||||
float matrixA[1024][1024] = {0};
|
float matrixB[MSIZE][MSIZE] = {0};
|
||||||
float matrixB[1024][1024] = {0};
|
float matrixC[MSIZE][MSIZE] = {0};
|
||||||
float matrixC[1024][1024] = {0};
|
|
||||||
// fill an identity matrix
|
// fill an identity matrix
|
||||||
for (int y = 0; y < 1024; y++) {
|
for (int y = 0; y < MSIZE; y++) {
|
||||||
matrixA[y][y] = 1.0;
|
matrixA[y][y] = 1.0;
|
||||||
}
|
}
|
||||||
// fill a matrix with data
|
// fill a matrix with data
|
||||||
for (int y = 0; y < 1024; y++) {
|
for (int y = 0; y < MSIZE; y++) {
|
||||||
for (int x = 0; x < 1024; x++) {
|
for (int x = 0; x < MSIZE; x++) {
|
||||||
matrixB[y][x] = x * 0.74 - y * 0.22;
|
matrixB[y][x] = x * 0.74 - y * 0.22;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -112,13 +84,12 @@ int main()
|
|||||||
kp::Tensor::TensorDataTypes dtype = kp::Tensor::TensorDataTypes::eFloat;
|
kp::Tensor::TensorDataTypes dtype = kp::Tensor::TensorDataTypes::eFloat;
|
||||||
|
|
||||||
// auto because fuck C++
|
// auto because fuck C++
|
||||||
auto tensorA = mgr.tensor(matrixA, 1024*1024, sizeof(float), dtype);
|
auto tensorA = mgr.tensor(matrixA, MSIZE * MSIZE, sizeof(float), dtype);
|
||||||
auto tensorB = mgr.tensor(matrixB, 1024*1024, sizeof(float), dtype);
|
auto tensorB = mgr.tensor(matrixB, MSIZE * MSIZE, sizeof(float), dtype);
|
||||||
auto tensorC = mgr.tensor(matrixC, 1024*1024, sizeof(float), dtype);
|
auto tensorC = mgr.tensor(matrixC, MSIZE * MSIZE, sizeof(float), dtype);
|
||||||
|
|
||||||
const std::vector<std::shared_ptr<kp::Tensor>> params = {
|
const std::vector<std::shared_ptr<kp::Tensor>> params = {
|
||||||
tensorA, tensorB, tensorC
|
tensorA, tensorB, tensorC};
|
||||||
};
|
|
||||||
|
|
||||||
// workgroup, dispatch a 2D array of workgroups (2D matrices)
|
// workgroup, dispatch a 2D array of workgroups (2D matrices)
|
||||||
// TODO: determine the size of the workgroups by doing some calls to vk
|
// TODO: determine the size of the workgroups by doing some calls to vk
|
||||||
@ -126,19 +97,24 @@ int main()
|
|||||||
// this should call vkCmdDispatch(x, y, z)
|
// this should call vkCmdDispatch(x, y, z)
|
||||||
kp::Workgroup workgroup({wgrp_x, wgrp_y, 1});
|
kp::Workgroup workgroup({wgrp_x, wgrp_y, 1});
|
||||||
|
|
||||||
// substitute the values in the shader
|
// get the shader code into a string
|
||||||
const char *shader_path = "shader.comp";
|
const char *shader_path = "shader.comp";
|
||||||
std::string shader_str = shader_to_string(shader_path);
|
std::string shader_str = shader_to_string(shader_path);
|
||||||
shader_str = regex_replace("/{lcsize_x}/", shader_str);
|
|
||||||
const std::vector<uint32_t> shader = compile_shader(shader_to_string("shader.comp"));
|
|
||||||
|
|
||||||
|
// substitute the value for the number of threads (xyz) per workgroup since
|
||||||
|
// it has to be a compile-time constant
|
||||||
|
shader_str = replacewith<int>("__lcsize_x__", 32, shader_str);
|
||||||
|
shader_str = replacewith<int>("__lcsize_y__", 32, shader_str);
|
||||||
|
shader_str = replacewith<int>("__lcsize_z__", 1, shader_str);
|
||||||
|
|
||||||
std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(
|
printf("%s\n", shader_str.c_str());
|
||||||
params,
|
return 0;
|
||||||
shader,
|
|
||||||
workgroup,
|
const std::vector<uint32_t> shader =
|
||||||
{1024.0}
|
compile_shader(shader_to_string("shader.comp"));
|
||||||
);
|
|
||||||
|
std::shared_ptr<kp::Algorithm> algo =
|
||||||
|
mgr.algorithm(params, shader, workgroup, {MSIZE});
|
||||||
|
|
||||||
mgr.sequence()
|
mgr.sequence()
|
||||||
->record<kp::OpTensorSyncDevice>(params)
|
->record<kp::OpTensorSyncDevice>(params)
|
||||||
@ -146,7 +122,6 @@ int main()
|
|||||||
->record<kp::OpTensorSyncLocal>(params)
|
->record<kp::OpTensorSyncLocal>(params)
|
||||||
->eval();
|
->eval();
|
||||||
|
|
||||||
|
|
||||||
// print the resulting matrix
|
// print the resulting matrix
|
||||||
std::cout << "Output: { ";
|
std::cout << "Output: { ";
|
||||||
for (const float &elem : tensorC->vector<float>()) {
|
for (const float &elem : tensorC->vector<float>()) {
|
||||||
|
24
test2/shader.comp
Normal file
24
test2/shader.comp
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
#version 450
|
||||||
|
// clang-format off
|
||||||
|
|
||||||
|
// The number of threads spawned per-workgroup, these are substituted by the
|
||||||
|
// program pre-compilation
|
||||||
|
layout(
|
||||||
|
local_size_x = __lcsize_x__,
|
||||||
|
local_size_y = __lcsize_y__,
|
||||||
|
local_size_z = __lcsize_z__
|
||||||
|
) in;
|
||||||
|
|
||||||
|
// The buffers are provided via the tensors
|
||||||
|
layout(binding = 0) buffer tensorA { float matA[]; };
|
||||||
|
layout(binding = 1) buffer tensorB { float matB[]; };
|
||||||
|
layout(binding = 2) buffer tensorC { float matC[]; };
|
||||||
|
|
||||||
|
// specialization constants
|
||||||
|
layout(constant_id = 0) const float tensor_size_f = 0;
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
uint index = gl_GlobalInvocationID.x;
|
||||||
|
o[index] = a[index] * b[index];
|
||||||
|
}
|
BIN
test2/test2
BIN
test2/test2
Binary file not shown.
Loading…
Reference in New Issue
Block a user