| void main() { | |
| const uint i = gl_WorkGroupID.x; | |
| const int r = inB[i + pcs.inBOff]; | |
| int z = 0; | |
| for (uint ind = gl_LocalInvocationID.x; ind < pcs.ne00/16; ind += gl_WorkGroupSize.x) { | |
| const uint inIndex = (r * pcs.nb01 + pcs.inAOff) + ind/NL * SIZE_OF_BLOCK; | |
| const mat4 result = dequantize_block(inIndex, ind%NL); | |
| for (uint j = 0; j < 4; ++j) { | |
| for (uint k = 0; k < 4; ++k) { | |
| const uint outIndex = i * pcs.nb1/BYTES_FOR_TYPE + pcs.outOff + z; | |
| out_[outIndex] = result[j][k]; | |
| ++z; | |
| } | |
| } | |
| } | |
| } | |