#version 450
// clang-format off

// The number of threads spawned per-workgroup, these are substituted by the
// program pre-compilation
layout(
    local_size_x = __lcsize_x__,
    local_size_y = __lcsize_y__,
    local_size_z = __lcsize_z__
) in;

// The buffers are provided via the tensors
layout(binding = 0) buffer tensorA { float matA[]; };
layout(binding = 1) buffer tensorB { float matB[]; };
layout(binding = 2) buffer tensorC { float matC[]; };

// specialization constants
layout(constant_id = 0) const float tensor_size_f = 0;

void main()
{
	uint index = gl_GlobalInvocationID.x;
	o[index]   = a[index] * b[index];
}