24.02.1
|
Go to the documentation of this file.
48 struct BatchNormalizationSelectorData
53 using BatchNormalizationSelectorPtr = std::add_pointer<bool(
const BatchNormalizationSelectorData &data)>
::type;
54 using BatchNormalizationKernelPtr = std::add_pointer<void(ITensor *,
61 ActivationLayerInfo &,
64 struct BatchNormalizationKernel
71 static const BatchNormalizationKernel available_kernels[] = {
72 #if defined(ARM_COMPUTE_ENABLE_SVE)
73 {
"sve_fp16_batch_normalization",
74 [](
const BatchNormalizationSelectorData &data) {
return data.dt ==
DataType::F16 && data.ci.has_sve(); },
76 {
"sve_fp32_batch_normalization",
77 [](
const BatchNormalizationSelectorData &data) {
return data.dt ==
DataType::F32 && data.ci.has_sve(); },
80 #if defined(ARM_COMPUTE_ENABLE_NEON)
81 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
82 {
"neon_fp16_batch_normalization",
83 [](
const BatchNormalizationSelectorData &data) {
return data.dt ==
DataType::F16; },
86 {
"neon_fp32_batch_normalization",
87 [](
const BatchNormalizationSelectorData &data) {
return data.dt ==
DataType::F32; },
92 const BatchNormalizationKernel *get_implementation(
const BatchNormalizationSelectorData &data)
94 for (
const auto &uk : available_kernels)
96 if (uk.is_selected(data))
105 const ITensorInfo *output,
106 const ITensorInfo *mean,
107 const ITensorInfo *var,
108 const ITensorInfo *beta,
109 const ITensorInfo *gamma,
115 const auto *uk = get_implementation(BatchNormalizationSelectorData{
input->data_type(),
CPUInfo::get()});
122 act != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU &&
124 ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU);
128 if (
nullptr != output)
142 if (gamma !=
nullptr)
154 void NEBatchNormalizationLayerKernel::configure_non_fused()
170 void NEBatchNormalizationLayerKernel::configure_fused()
173 static std::map<ActivationLayerInfo::ActivationFunction, BatchNormFunctionPtr> bn_fused_map_f32_nchw = {
174 {ActivationLayerInfo::ActivationFunction::RELU,
176 {ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
178 {ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
182 static std::map<ActivationLayerInfo::ActivationFunction, BatchNormFunctionPtr> bn_fused_map_f16_nchw = {
183 {ActivationLayerInfo::ActivationFunction::RELU,
185 {ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
187 {ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
193 _func = bn_fused_map_f16_nchw[_act_info.
activation()];
196 _func = bn_fused_map_f32_nchw[_act_info.
activation()];
229 mean->
info(), var->
info(), (beta !=
nullptr) ? beta->
info() :
nullptr,
241 const bool run_in_place = (output ==
nullptr) || (output ==
input);
257 configure_non_fused();
263 INEKernel::configure(win);
265 if (output !=
nullptr)
296 (*_func)(
window, _input, _output, _mean, _var, _beta, _gamma, _epsilon, _act_info);
302 uk->ukernel(_input, _output, _mean, _var, _beta, _gamma, _epsilon, _act_info,
window);
Class to describe a number of elements in each dimension.
@ NCHW
Num samples, channels, height, width.
NEBatchNormalizationLayerKernel()
Default constructor.
virtual DataLayout data_layout() const =0
Get the data layout of the tensor.
decltype(strategy::transforms) typedef type
Window calculate_max_window(const ValidRegion &valid_region, const Steps &steps, bool skip_border, BorderSize border_size)
BatchNormalizationKernelPtr ukernel
arm_compute::ActivationFunction ActivationFunction
void fp32_batch_normalization_nchw_non_fused_lubrelu(const Window &window, ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, ActivationLayerInfo act_info)
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info)
#define ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(k)
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(...)
void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta=nullptr, const ITensor *gamma=nullptr, float epsilon=0.001f, ActivationLayerInfo act_info=ActivationLayerInfo())
Set the input and output tensors.
#define ARM_COMPUTE_ERROR(msg)
Print the given message then throw an std::runtime_error.
static CPUInfo & get()
Access the KernelLibrary singleton.
Interface for CPU tensor.
#define REGISTER_FP16_NEON(func_name)
ActivationFunction activation() const
Get the type of activation function.
Includes all wrapper headers at once.
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(...)
#define ARM_COMPUTE_RETURN_ON_ERROR(status)
Checks if a status contains an error and returns it.
Activation Layer Information class.
#define REGISTER_FP32_NEON(func_name)
void fp32_neon_batch_normalization(ITensor *src, ITensor *dst, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, ActivationLayerInfo &act_info, const Window &window)
void fp32_batch_normalization_nchw_non_fused(const Window &window, ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, ActivationLayerInfo act_info)
#define ARM_COMPUTE_ERROR_ON_NULLPTR(...)
#define REGISTER_FP32_SVE(func_name)
virtual ITensorInfo * info() const =0
Interface to be implemented by the child class to return the tensor's metadata.
void fp16_sve_batch_normalization(ITensor *src, ITensor *dst, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, ActivationLayerInfo &act_info, const Window &window)
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
#define ARM_COMPUTE_ERROR_THROW_ON(status)
bool enabled() const
Check if initialised.
void fp16_batch_normalization_nchw_non_fused(const Window &window, ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, ActivationLayerInfo act_info)
void fp16_neon_batch_normalization(ITensor *src, ITensor *dst, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, ActivationLayerInfo &act_info, const Window &window)
void fp16_batch_normalization_nchw_non_fused_brelu(const Window &window, ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, ActivationLayerInfo act_info)
#define ARM_COMPUTE_RETURN_ERROR_ON(cond)
If the condition is true, an error is returned.
const BatchNormalizationSelectorPtr is_selected
bool auto_init_if_empty(ITensorInfo &info, const TensorShape &shape, int num_channels, DataType data_type, QuantizationInfo quantization_info=QuantizationInfo())
Auto initialize the tensor info (shape, number of channels and data type) if the current assignment i...
#define ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(f, s)
virtual DataType data_type() const =0
Data type used for each element of the tensor.
void fp16_batch_normalization_nchw_non_fused_relu(const Window &window, ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, ActivationLayerInfo act_info)
#define ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(...)
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
void run(const Window &window, const ThreadInfo &info) override
Execute the kernel on the passed window.
const Window & window() const
The maximum window the kernel can be executed on.
Information about executing thread and CPU.
size_t get_data_layout_dimension_index(const DataLayout &data_layout, const DataLayoutDimension &data_layout_dimension)
Get the index of the given dimension.
void fp32_batch_normalization_nchw_non_fused_relu(const Window &window, ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, ActivationLayerInfo act_info)
Describe a multidimensional execution window.
Copyright (c) 2017-2024 Arm Limited.
#define REGISTER_FP16_SVE(func_name)
@ F16
16-bit floating-point number
void fp32_sve_batch_normalization(ITensor *src, ITensor *dst, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, ActivationLayerInfo &act_info, const Window &window)
Store the tensor's metadata.
@ F32
32-bit floating-point number
ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false)
void fp16_batch_normalization_nchw_non_fused_lubrelu(const Window &window, ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, ActivationLayerInfo act_info)
DataType
Available data types.
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *var, const ITensorInfo *beta=nullptr, const ITensorInfo *gamma=nullptr, float epsilon=0.001f, ActivationLayerInfo act_info=ActivationLayerInfo())
Static function to check if given info will lead to a valid configuration of NEBatchNormalizationLaye...
void fp32_batch_normalization_nchw_non_fused_brelu(const Window &window, ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon, ActivationLayerInfo act_info)