20 const void* inputData,
22 unsigned int dataTypeSize)
24 const unsigned int blockSize = descriptor.
m_BlockSize;
28 const unsigned int batches = inputShape[0];
31 const unsigned int inDepth = inputShape[dataLayoutIndexed.
GetChannelsIndex()];
32 const unsigned int inHeight = inputShape[dataLayoutIndexed.
GetHeightIndex()];
33 const unsigned int inWidth = inputShape[dataLayoutIndexed.
GetWidthIndex()];
35 const unsigned int outDepth = inDepth / (blockSize * blockSize);
56 permDestShape =
TensorShape({ outDepth, inHeight, blockSize, inWidth, blockSize });
57 permVector = { 2, 4, 0, 1, 3 };
61 permDestShape =
TensorShape({ inHeight, blockSize, inWidth, blockSize, outDepth });
62 permVector = { 0, 2, 1, 3, 4 };
65 const unsigned int numElementsPerBatch = inputShape.
GetNumElements() / batches;
67 for (
unsigned int batchIndex = 0u; batchIndex < batches; ++batchIndex)
69 const uintptr_t batchDataOffset = batchIndex * (numElementsPerBatch * dataTypeSize);
73 static_cast<const void*
>(
reinterpret_cast<const uint8_t*
>(inputData) + batchDataOffset),
74 static_cast<void*
>(
reinterpret_cast<uint8_t*
>(outputData) + batchDataOffset),