77 unsigned int paddingTop,
78 unsigned int paddingLeft,
81 unsigned int xDilation,
82 unsigned int yDilation,
85 if (biasEnabled && !pBiasDecoder)
92 const unsigned int heightIndex = dataLayoutIndexed.
GetHeightIndex();
93 const unsigned int widthIndex = dataLayoutIndexed.
GetWidthIndex();
98 const unsigned int inputChannels = rInputShape[channelsIndex];
99 const unsigned int outputChannels = rOutputShape[channelsIndex];
100 const unsigned int depthMultiplier = depthwise ? outputChannels/inputChannels : 1;
102 const unsigned int batchSize = rOutputShape[0];
103 const unsigned int outputHeight = rOutputShape[heightIndex];
104 const unsigned int outputWidth = rOutputShape[widthIndex];
105 const unsigned int inputHeight = rInputShape[heightIndex];
106 const unsigned int inputWidth = rInputShape[widthIndex];
108 const unsigned int filterHeight = depthwise ? rFilterShape[1] : rFilterShape[heightIndex];
109 const unsigned int filterWidth = depthwise ? rFilterShape[2] : rFilterShape[widthIndex];
111 const std::vector<float> inputVec = rInputDecoder.
DecodeTensor(rInputShape);
112 const std::vector<float> filterVec = rFilterDecoder.
DecodeTensor(rFilterShape, depthwise);
115 const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->
DecodeTensor(biasShape) : std::vector<float>();
117 for (
unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
119 for (
unsigned int cOutput = 0; cOutput < outputChannels; cOutput++)
121 for (
unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
123 for (
unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
130 for (
unsigned int cInput = 0; cInput < (depthwise ? 1 : inputChannels); cInput++)
132 for (
unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
134 for (
unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
137 unsigned int filterIndex = 0;
142 cInput = cOutput / depthMultiplier;
144 filterIndex = xFilter * outputChannels + cOutput +
145 yFilter * filterWidth * outputChannels;
153 filterIndex = cOutput * filterHeight * filterWidth * inputChannels +
154 yFilter * filterWidth * inputChannels +
155 xFilter * inputChannels +
160 filterIndex = cOutput * filterWidth * filterHeight * inputChannels +
161 cInput * filterWidth * filterHeight +
162 yFilter * filterWidth +
167 unsigned int yInput = yOutput * yStride + yFilter * yDilation;
168 unsigned int xInput = xOutput * xStride + xFilter * xDilation;
173 if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
174 xInput < paddingLeft || xInput >= inputWidth + paddingLeft)
180 unsigned int inputIndex = 0;
186 inputIndex = batchIdx * inputHeight * inputWidth * inputChannels +
187 (yInput - paddingTop) * inputWidth * inputChannels +
188 (xInput - paddingLeft) * inputChannels +
193 inputIndex = batchIdx * inputWidth * inputHeight * inputChannels +
194 inputWidth * inputHeight * cInput +
195 inputWidth * (yInput - paddingTop) +
196 xInput - paddingLeft;
198 inputValue = inputVec[inputIndex];
201 sum += filterVec[filterIndex] * inputValue;
208 sum += biasVec[cOutput];
214 outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
215 yOutput * outputWidth * outputChannels +
216 xOutput * outputChannels +
221 outIdx = batchIdx * outputHeight * outputWidth * outputChannels +
222 cOutput * outputHeight * outputWidth +
223 yOutput * outputWidth +
227 rOutputEncoder[outIdx];
228 rOutputEncoder.
Set(sum);
void Convolve(const TensorShape &rInputShape, Decoder< float > &rInputDecoder, const TensorShape &rOutputShape, Encoder< float > &rOutputEncoder, const TensorShape &rFilterShape, Decoder< float > &rFilterDecoder, bool biasEnabled, Decoder< float > *pBiasDecoder, DataLayout dataLayout, unsigned int paddingTop, unsigned int paddingLeft, unsigned int xStride, unsigned int yStride, unsigned int xDilation, unsigned int yDilation, bool depthwise)