18 const void* inputData,
20 unsigned int dataTypeSize)
22 const unsigned int blockSize = descriptor.
m_BlockSize;
25 const unsigned int batches = inputShape[0];
28 const unsigned int inDepth = inputShape[dataLayoutIndexed.
GetChannelsIndex()];
29 const unsigned int inHeight = inputShape[dataLayoutIndexed.
GetHeightIndex()];
30 const unsigned int inWidth = inputShape[dataLayoutIndexed.
GetWidthIndex()];
32 const unsigned int outDepth = inDepth / (blockSize * blockSize);
53 permDestShape =
TensorShape({ outDepth, inHeight, blockSize, inWidth, blockSize });
54 permVector = { 2, 4, 0, 1, 3 };
58 permDestShape =
TensorShape({ inHeight, blockSize, inWidth, blockSize, outDepth });
59 permVector = { 0, 2, 1, 3, 4 };
62 const unsigned int numElementsPerBatch = inputShape.
GetNumElements() / batches;
64 for (
unsigned int batchIndex = 0u; batchIndex < batches; ++batchIndex)
66 const uintptr_t batchDataOffset = batchIndex * (numElementsPerBatch * dataTypeSize);
70 static_cast<const void*
>(
reinterpret_cast<const uint8_t*
>(inputData) + batchDataOffset),
71 static_cast<void*
>(
reinterpret_cast<uint8_t*
>(outputData) + batchDataOffset),