40 std::vector<unsigned int> _lws_x{ 0 };
41 std::vector<unsigned int> _lws_y{ 0 };
42 std::vector<unsigned int> _lws_z{ 0 };
43 std::vector<int> _wbsm{ 0 };
48 CLTuningParametersList() =
default;
50 CLTuningParametersList(
const CLTuningParametersList &) =
default;
52 CLTuningParametersList(CLTuningParametersList &&) noexcept(
true) =
default;
54 CLTuningParametersList &operator=(
const CLTuningParametersList &) =
default;
56 CLTuningParametersList &operator=(CLTuningParametersList &&) noexcept(
true) =
default;
58 virtual ~CLTuningParametersList() =
default;
61 virtual size_t size()
override;
65 class CLTuningParametersListExhaustive :
public CLTuningParametersList
69 CLTuningParametersListExhaustive() =
delete;
71 CLTuningParametersListExhaustive(
const cl::NDRange &gws,
CLTuningInfo tuning_info);
73 CLTuningParametersListExhaustive(
const CLTuningParametersListExhaustive &) =
default;
75 CLTuningParametersListExhaustive(CLTuningParametersListExhaustive &&) noexcept(
true) =
default;
77 CLTuningParametersListExhaustive &operator=(
const CLTuningParametersListExhaustive &) =
default;
79 CLTuningParametersListExhaustive &operator=(CLTuningParametersListExhaustive &&) noexcept(
true) =
default;
81 ~CLTuningParametersListExhaustive() =
default;
88 class CLTuningParametersListNormal :
public CLTuningParametersList
92 CLTuningParametersListNormal(
const cl::NDRange &gws,
CLTuningInfo tuning_info);
94 CLTuningParametersListNormal(
const CLTuningParametersListNormal &) =
default;
96 CLTuningParametersListNormal(CLTuningParametersListNormal &&) noexcept(
true) =
default;
98 CLTuningParametersListNormal &operator=(
const CLTuningParametersListNormal &) =
default;
100 CLTuningParametersListNormal &operator=(CLTuningParametersListNormal &&) noexcept(
true) =
default;
102 ~CLTuningParametersListNormal() =
default;
108 CLTuningParametersListNormal() =
default;
119 void initialize_lws_values(std::vector<unsigned int> &lws,
unsigned int gws,
unsigned int lws_max,
bool mod_let_one);
123 class CLTuningParametersListRapid :
public CLTuningParametersListNormal
127 CLTuningParametersListRapid() =
delete;
129 CLTuningParametersListRapid(
const cl::NDRange &gws,
CLTuningInfo tuning_info);
131 CLTuningParametersListRapid(
const CLTuningParametersListRapid &) =
default;
133 CLTuningParametersListRapid(CLTuningParametersListRapid &&) noexcept(
true) =
default;
135 CLTuningParametersListRapid &operator=(
const CLTuningParametersListRapid &) =
default;
137 CLTuningParametersListRapid &operator=(CLTuningParametersListRapid &&) noexcept(
true) =
default;
139 virtual ~CLTuningParametersListRapid() =
default;
148 void initialize_lws_values(std::vector<unsigned int> &lws,
unsigned int lws_max);
151 size_t CLTuningParametersList::size()
153 return search_space_shape.total_size();
156 CLTuningParams CLTuningParametersListExhaustive::operator[](
size_t index)
160 return CLTuningParams(coords[0] + 1
U, coords[1] + 1
U, coords[2] + 1
U, static_cast<int>(coords[3]));
163 CLTuningParametersListExhaustive::CLTuningParametersListExhaustive(
const cl::NDRange &gws,
CLTuningInfo tuning_info)
169 search_space_shape[3] = 1;
172 _wbsm = { -3, -2, -1, 0, 1, 2, 3 };
173 search_space_shape[3] = _wbsm.size();
177 CLTuningParams CLTuningParametersListNormal::operator[](
size_t index)
181 return CLTuningParams(_lws_x[coords[0]], _lws_y[coords[1]], _lws_z[coords[2]], _wbsm[coords[3]]);
184 CLTuningParametersListNormal::CLTuningParametersListNormal(
const cl::NDRange &gws,
CLTuningInfo tuning_info)
194 initialize_lws_values(_lws_x, gws[0], lws_x_max, gws[2] > 16);
195 initialize_lws_values(_lws_y, gws[1], lws_y_max, gws[2] > 16);
196 initialize_lws_values(_lws_z, gws[2], lws_z_max,
false);
198 search_space_shape[0] = _lws_x.size();
199 search_space_shape[1] = _lws_y.size();
200 search_space_shape[2] = _lws_z.size();
201 search_space_shape[3] = 1;
204 _wbsm = { -2, -1, 0, 1, 2 };
205 search_space_shape[3] = _wbsm.size();
209 void CLTuningParametersListNormal::initialize_lws_values(std::vector<unsigned int> &lws,
unsigned int gws,
unsigned int lws_max,
bool mod_let_one)
213 for(
unsigned int i = 2; i <= lws_max; ++i)
216 const bool is_power_of_two = (i & (i - 1)) == 0;
219 const bool mod_cond = mod_let_one ? (gws % i) <= 1 : (gws % i) == 0;
221 if(mod_cond || is_power_of_two)
228 CLTuningParametersListRapid::CLTuningParametersListRapid(
const cl::NDRange &gws,
CLTuningInfo tuning_info)
230 auto lws_x_max = std::min(static_cast<unsigned int>(gws[0]), 8u);
231 auto lws_y_max = std::min(static_cast<unsigned int>(gws[1]), 4u);
232 auto lws_z_max = std::min(static_cast<unsigned int>(gws[2]), 4u);
238 initialize_lws_values(_lws_x, lws_x_max);
239 initialize_lws_values(_lws_y, lws_y_max);
240 initialize_lws_values(_lws_z, lws_z_max);
242 search_space_shape[0] = _lws_x.size();
243 search_space_shape[1] = _lws_y.size();
244 search_space_shape[2] = _lws_z.size();
245 search_space_shape[3] = 1;
248 _wbsm = { -1, 0, 1 };
249 search_space_shape[3] = _wbsm.size();
253 void CLTuningParametersListRapid::initialize_lws_values(std::vector<unsigned int> &lws,
unsigned int lws_max)
257 for(
unsigned int i = 2; i <= lws_max; i *= 4)
268 return std::make_unique<CLTuningParametersListExhaustive>(gws, tuning_info);
270 return std::make_unique<CLTuningParametersListNormal>(gws, tuning_info);
272 return std::make_unique<CLTuningParametersListRapid>(gws, tuning_info);
Coordinates index2coords(const TensorShape &shape, int index)
Convert a linear index into n-dimensional coordinates.
bool tune_wbsm
Flag to tune the batches of work groups distributed to compute units.
#define ARM_COMPUTE_ERROR_ON(cond)
If the condition is true then an error message is printed and an exception thrown.
< OpenCL tuner parameters
constexpr unsigned int max_lws_supported_z
constexpr unsigned int max_lws_supported_y
Copyright (c) 2017-2021 Arm Limited.
Interface for Tuning Parameters lists.
Searches a minimal subset of LWS configurations while tuning.
#define ARM_COMPUTE_UNUSED(...)
To avoid unused variables warnings.
constexpr unsigned int max_lws_supported_x
CLTunerMode tuner_mode
Parameter to select the level (granularity) of the tuning.
std::unique_ptr< ICLTuningParametersList > get_tuning_parameters_list(CLTuningInfo tuning_info, const cl::NDRange &gws)
Construct an ICLTuningParametersList object for the given tuner mode and gws configuration.
Searches all possible LWS configurations while tuning.