ArmNN
 25.11
Loading...
Searching...
No Matches
Numpy.hpp
Go to the documentation of this file.
1//
2// Copyright © 2024-2025 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5// Copyright © 2021 Leon Merten Lohse
6// SPDX-License-Identifier: MIT
7//
8
9#ifndef NUMPY_HPP
10#define NUMPY_HPP
11
12#include <fmt/format.h>
13#include "Types.hpp"
14#include "Tensor.hpp"
15
16#include <fstream>
17#include <iostream>
18
19namespace armnnNumpy
20{
21
22 /// @struct HeaderInfo: contains information from the numpy file to be parsed.
23 /// @var m_MajorVersion: Single byte containing the major version of numpy implementation
24 /// @var m_MinorVersion: Single byte containing the minor version of numpy implementation
25 /// @var m_MagicStringLength: unsigned 8bit int containing the length of the magic string.
26 /// @var m_MagicString: Char array containing the magic string at the beginning of every numpy file.
27 /// @var m_HeaderLen: 2 or 4byte unsigned int containing the length of the header, depending on version.
29 {
32 const uint8_t m_MagicStringLength = 6;
33 const char m_MagicString[7] = "\x93NUMPY";
35 uint32_t m_HeaderLen;
36 };
37
38 /// @struct Header: contains information from the numpy file to be parsed.
39 /// @var m_HeaderString: String containing header.
40 /// @var m_DescrString: String containing description.
41 /// @var m_FortranOrder: Boolean declaring if array is in Fortran Order.
42 /// @var m_Shape: Shape of the data.
43 struct Header
44 {
45 std::string m_HeaderString;
46 std::string m_DescrString;
48 std::vector<uint32_t> m_Shape;
49 };
50
51 inline void CreateHeaderInfo(std::ifstream &ifStream, HeaderInfo &headerInfo)
52 {
53 // A Numpy header consists of:
54 // a magic string "x93NUMPY"
55 // 1 byte for the major version
56 // 1 byte for the minor version
57 // 2 or 4 bytes for the header length
58 // More info: https://numpy.org/devdocs/reference/generated/numpy.lib.format.html
59 char buffer[headerInfo.m_MagicStringLength + 2lu];
60 ifStream.read(buffer, headerInfo.m_MagicStringLength + 2);
61
62 if (!ifStream)
63 {
64 throw armnn::Exception(
65 fmt::format("Failed to create numpy header info at {}",
66 CHECK_LOCATION().AsString()));
67 }
68 // Verify that the numpy is in the valid format by checking for the magic string
69 int compare_result = ::memcmp(buffer, headerInfo.m_MagicString, headerInfo.m_MagicStringLength);
70 if (compare_result != 0) {
71 throw armnn::Exception(fmt::format("Numpy does not contain magic string. Can not parse invalid numpy {}",
72 CHECK_LOCATION().AsString()));
73 }
74
75 headerInfo.m_MajorVersion = buffer[headerInfo.m_MagicStringLength];
76 headerInfo.m_MinorVersion = buffer[headerInfo.m_MagicStringLength + 1];
77 if(headerInfo.m_MajorVersion == 1 && headerInfo.m_MinorVersion == 0)
78 {
79 ifStream.read(headerInfo.m_HeaderLenBytes, 2);
80 // Header len is written in little endian, so we do a quick test
81 // to check the machines endianness
82 int i = 1;
83 if (*(reinterpret_cast<char *>(&i)) == 1)
84 {
85 headerInfo.m_HeaderLen = static_cast<unsigned>(headerInfo.m_HeaderLenBytes[0]) |
86 (static_cast<unsigned>(headerInfo.m_HeaderLenBytes[1] << 8));
87 }
88 else
89 {
90 headerInfo.m_HeaderLen = static_cast<unsigned>(headerInfo.m_HeaderLenBytes[1]) |
91 (static_cast<unsigned>(headerInfo.m_HeaderLenBytes[0] << 8));
92 }
93 }
94 else if (headerInfo.m_MajorVersion == 2 && headerInfo.m_MinorVersion == 0)
95 {
96 ifStream.read(headerInfo.m_HeaderLenBytes, 4);
97 // Header len is written in little endian, so we do a quick test
98 // to check the machines endianness
99 int i = 1;
100 if (*(reinterpret_cast<char *>(&i)) == 1)
101 {
102 headerInfo.m_HeaderLen = static_cast<unsigned>(headerInfo.m_HeaderLenBytes[0] << 0) |
103 static_cast<unsigned>(headerInfo.m_HeaderLenBytes[1] << 8) |
104 static_cast<unsigned>(headerInfo.m_HeaderLenBytes[2] << 16) |
105 static_cast<unsigned>(headerInfo.m_HeaderLenBytes[3] << 24);
106 }
107 else
108 {
109 headerInfo.m_HeaderLen = static_cast<unsigned>(headerInfo.m_HeaderLenBytes[3] << 0) |
110 static_cast<unsigned>(headerInfo.m_HeaderLenBytes[2] << 8) |
111 static_cast<unsigned>(headerInfo.m_HeaderLenBytes[1] << 16) |
112 static_cast<unsigned>(headerInfo.m_HeaderLenBytes[0] << 24);
113 }
114 }
115 else
116 {
117 throw armnn::ParseException(fmt::format("Unable to parser Numpy version {}.{} {}",
118 headerInfo.m_MajorVersion,
119 headerInfo.m_MinorVersion,
120 CHECK_LOCATION().AsString()));
121 }
122 }
123
124 /// Primarily used to isolate values from header dictionary
125 inline std::string getSubstring(std::string fullString,
126 std::string substringStart,
127 std::string substringEnd,
128 bool removeStartChar = 0,
129 bool includeEndChar = 0)
130 {
131 size_t startPos = fullString.find(substringStart);
132 size_t endPos = fullString.find(substringEnd, startPos);
133 if (startPos == std::string::npos || endPos == std::string::npos)
134 {
135 throw armnn::ParseException(fmt::format("Unable to find {} in numpy file.",
136 CHECK_LOCATION().AsString()));
137 }
138
139 // std::string.substr takes the starting position and the length of the substring.
140 // To calculate the length we subtract the start position from the end position.
141 // We also add a boolean on whether or not we want to include the character used to find endPos
142 startPos+= removeStartChar;
143 endPos += includeEndChar;
144 return fullString.substr(startPos, endPos - startPos);
145 }
146
147 inline void parseShape(Header& header, std::string& shapeString)
148 {
149 std::istringstream shapeStringStream(shapeString);
150 std::string token;
151 while(getline(shapeStringStream, token, ','))
152 {
153 header.m_Shape.push_back(static_cast<uint32_t >(std::stoi(token)));
154 }
155 }
156
157 inline void CreateHeader(std::ifstream& ifStream, HeaderInfo& headerInfo, Header& header)
158 {
159 char stringBuffer[headerInfo.m_HeaderLen];
160 ifStream.read(stringBuffer, static_cast<std::streamsize>(headerInfo.m_HeaderLen));
161
162 header.m_HeaderString = std::string(stringBuffer, headerInfo.m_HeaderLen);
163 // Remove new line character at the end of the string
164 if(header.m_HeaderString.back() == '\n')
165 {
166 header.m_HeaderString.pop_back();
167 }
168
169 // Remove whitespace from the string.
170 // std::remove shuffles the string by place all whitespace at the end and
171 // returning the start location of the shuffled whitespace.
172 // std::string.erase then deletes the whitespace by deleting characters
173 // between the iterator returned from std::remove and the end of the std::string
174 std::string::iterator whitespaceSubstringStart = std::remove(header.m_HeaderString.begin(),
175 header.m_HeaderString.end(), ' ');
176 header.m_HeaderString.erase(whitespaceSubstringStart, header.m_HeaderString.end());
177
178 // The order of the dictionary should be alphabetical,
179 // however this is not guarenteed so we have to search for the string.
180 // Because of this we do some weird parsing using std::string.find and some magic patterns
181 //
182 // For the description value, we include the end character from the first substring
183 // to help us find the value in the second substring. This should return with a "," at the end.
184 // Since we previously left the "," at the end of the substring,
185 // we can use it to find the end of the description value and then remove it after.
186 std::string descrString = getSubstring(header.m_HeaderString, "'descr", ",", 0, 1);
187 header.m_DescrString = getSubstring(descrString, ":", ",", 1);
188
189 // Fortran order is a python boolean literal, we simply look for a comma to delimit this pair.
190 // Since this is a boolean, both true and false end in an "e" without appearing in between.
191 // It is not great, but it is the easiest way to find the end.
192 // We have to ensure we include this e in the substring.
193 // Since this is a boolean we can check if the string contains
194 // either true or false and set the variable as required
195 std::string fortranOrderString = getSubstring(header.m_HeaderString, "'fortran_order", ",");
196 fortranOrderString = getSubstring(fortranOrderString, ":", "e", 1, 1);
197 header.m_FortranOrder = fortranOrderString.find("True") != std::string::npos ? true : false;
198
199 // The shape is a python tuple so we search for the closing bracket of the tuple.
200 // We include the end character to help us isolate the value substring.
201 // We can extract the inside of the tuple by searching for opening and closing brackets.
202 // We can then remove the brackets isolating the inside of the tuple.
203 // We then need to parse the string into a vector of unsigned integers
204 std::string shapeString = getSubstring(header.m_HeaderString, "'shape", ")", 0, 1);
205 shapeString = getSubstring(shapeString, "(", ")", 1, 0);
206 parseShape(header, shapeString);
207 }
208
209 template<typename T>
210 inline void ReadData(std::ifstream& ifStream, T* tensor, const unsigned int& numElements)
211 {
212 const std::streamsize bytes = static_cast<std::streamsize>(sizeof(T)) *
213 static_cast<std::streamsize>(numElements);
214 ifStream.read(reinterpret_cast<char *>(tensor), bytes);
215 }
216
217
218 inline armnn::DataType getArmNNDataType(std::string& descr)
219 {
220 if(descr.find("f4") != std::string::npos || descr.find("f8") != std::string::npos)
221 {
223 }
224 else if (descr.find("f2") != std::string::npos)
225 {
227 }
228 else if (descr.find("i8") != std::string::npos)
229 {
231 }
232 else if (descr.find("i4") != std::string::npos)
233 {
235 }
236 else if (descr.find("i2") != std::string::npos)
237 {
239 }
240 else if (descr.find("i1") != std::string::npos)
241 {
243 }
244 else if (descr.find("u1") != std::string::npos)
245 {
247 }
248 else
249 {
250 throw armnn::Exception(fmt::format("Numpy data type:{} not supported. {}",
251 descr, CHECK_LOCATION().AsString()));
252 }
253 }
254
255 inline std::string getNumpyDescr(armnn::DataType dType)
256 {
257 switch(dType)
258 {
260 return "f" + std::to_string(sizeof(float)); // size of float can be 4 or 8
262 return "f2";
264 return "i8";
266 return "i4";
268 return "i2";
271 return "i1";
273 return "u1";
274 default:
275 throw armnn::Exception(fmt::format("ArmNN to Numpy data type:{} not supported. {}",
276 dType, CHECK_LOCATION().AsString()));
277 }
278 }
279
280 template <typename T>
281 inline bool compareCTypes(std::string& descr)
282 {
283 if(descr.find("f4") != std::string::npos || descr.find("f8") != std::string::npos)
284 {
285 return std::is_same<T, float>::value;
286 }
287 else if (descr.find("i8") != std::string::npos)
288 {
289 return std::is_same<T, int64_t>::value;
290 }
291 else if (descr.find("i4") != std::string::npos)
292 {
293 return std::is_same<T, int32_t>::value;
294 }
295 else if (descr.find("i2") != std::string::npos)
296 {
297 return std::is_same<T, int16_t>::value;
298 }
299 else if (descr.find("i1") != std::string::npos)
300 {
301 return std::is_same<T, int8_t>::value;
302 }
303 else if (descr.find("u1") != std::string::npos)
304 {
305 return std::is_same<T, uint8_t>::value;
306 }
307 else
308 {
309 throw armnn::Exception(fmt::format("Numpy data type:{} not supported. {}",
310 descr, CHECK_LOCATION().AsString()));
311 }
312 }
313
314 inline unsigned int getNumElements(Header& header)
315 {
316 unsigned int numEls = 1;
317 for (auto dim: header.m_Shape)
318 {
319 numEls *= dim;
320 }
321
322 return numEls;
323 }
324
325 // Material in WriteToNumpyFile() has been reused from https://github.com/llohse/libnpy/blob/master/include/npy.hpp
326 // Please see write_header() in the above file for more details.
327 template<typename T>
328 inline void WriteToNumpyFile(const std::string& outputTensorFileName,
329 const T* const array,
330 const unsigned int numElements,
331 armnn::DataType dataType,
332 const armnn::TensorShape& shape)
333 {
334 std::ofstream out(outputTensorFileName, std::ofstream::binary);
335
336 // write header
337 {
338 // Setup string of tensor shape in format (x0, x1, x2, ..)
339 std::string shapeStr = "(";
340 for (uint32_t i = 0; i < shape.GetNumDimensions()-1; i++)
341 {
342 shapeStr = shapeStr + std::to_string(shape[i]) + ", ";
343 }
344 shapeStr = shapeStr + std::to_string(shape[shape.GetNumDimensions()-1]) + ")";
345
346 int i = 1;
347 std::string endianChar = (*(reinterpret_cast<char *>(&i))) ? "<" : ">";
348 std::string dataTypeStr = getNumpyDescr(dataType);
349 std::string fortranOrder = "False";
350 std::string headerStr = "{'descr': '" + endianChar + dataTypeStr +
351 "', 'fortran_order': " + fortranOrder +
352 ", 'shape': " + shapeStr + ", }";
353
354 armnnNumpy::HeaderInfo headerInfo;
355
356 // Header is composed of:
357 // - 6 byte magic string
358 // - 2 byte major and minor version
359 // - 2 byte (v1.0) / 4 byte (v2.0) little-endian unsigned int
360 // - headerStr.length() bytes
361 // - 1 byte for newline termination (\n)
362 size_t length = headerInfo.m_MagicStringLength + 2 + 2 + headerStr.length() + 1;
363 uint8_t major_version = 1;
364
365 // for numpy major version 2, add extra 2 bytes for little-endian int (total 4 bytes)
366 if (length >= 255 * 255)
367 {
368 length += 2;
369 major_version = 2;
370 }
371
372 // Pad with spaces so header length is modulo 16 bytes.
373 size_t padding_length = 16 - length % 16;
374 std::string padding(padding_length, ' ');
375
376 // write magic string
377 out.write(headerInfo.m_MagicString, headerInfo.m_MagicStringLength);
378 out.put(major_version);
379 out.put(0); // minor version
380
381 // write header length
382 if (major_version == 1)
383 {
384 auto header_len = static_cast<uint16_t>(headerStr.length() + padding.length() + 1);
385
386 std::array<uint8_t, 2> header_len_16{static_cast<uint8_t>((header_len >> 0) & 0xff),
387 static_cast<uint8_t>((header_len >> 8) & 0xff)};
388 out.write(reinterpret_cast<char *>(header_len_16.data()), 2);
389 }
390 else
391 {
392 auto header_len = static_cast<uint32_t>(headerStr.length() + padding.length() + 1);
393
394 std::array<uint8_t, 4> header_len_32{
395 static_cast<uint8_t>((header_len >> 0) & 0xff), static_cast<uint8_t>((header_len >> 8) & 0xff),
396 static_cast<uint8_t>((header_len >> 16) & 0xff), static_cast<uint8_t>((header_len >> 24) & 0xff)};
397 out.write(reinterpret_cast<char *>(header_len_32.data()), 4);
398 }
399
400 out << headerStr << padding << '\n';
401 }
402
403 // write tensor data to file
404 out.write(reinterpret_cast<const char *>(array), sizeof(T) * numElements);
405 }
406}
407
408#endif // NUMPY_HPP
#define CHECK_LOCATION()
Base class for all ArmNN exceptions so that users can filter to just those.
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition Tensor.cpp:174
DataType
Definition Types.hpp:49
std::string getSubstring(std::string fullString, std::string substringStart, std::string substringEnd, bool removeStartChar=0, bool includeEndChar=0)
Primarily used to isolate values from header dictionary.
Definition Numpy.hpp:125
void CreateHeaderInfo(std::ifstream &ifStream, HeaderInfo &headerInfo)
Definition Numpy.hpp:51
void parseShape(Header &header, std::string &shapeString)
Definition Numpy.hpp:147
void CreateHeader(std::ifstream &ifStream, HeaderInfo &headerInfo, Header &header)
Definition Numpy.hpp:157
unsigned int getNumElements(Header &header)
Definition Numpy.hpp:314
void ReadData(std::ifstream &ifStream, T *tensor, const unsigned int &numElements)
Definition Numpy.hpp:210
armnn::DataType getArmNNDataType(std::string &descr)
Definition Numpy.hpp:218
bool compareCTypes(std::string &descr)
Definition Numpy.hpp:281
std::string getNumpyDescr(armnn::DataType dType)
Definition Numpy.hpp:255
void WriteToNumpyFile(const std::string &outputTensorFileName, const T *const array, const unsigned int numElements, armnn::DataType dataType, const armnn::TensorShape &shape)
Definition Numpy.hpp:328
from the numpy file to be parsed.
Definition Numpy.hpp:44
std::string m_DescrString
Definition Numpy.hpp:46
std::vector< uint32_t > m_Shape
Definition Numpy.hpp:48
std::string m_HeaderString
Definition Numpy.hpp:45
from the numpy file to be parsed.
Definition Numpy.hpp:29
const char m_MagicString[7]
Definition Numpy.hpp:33
const uint8_t m_MagicStringLength
Definition Numpy.hpp:32
char m_HeaderLenBytes[4]
Definition Numpy.hpp:34