ArmNN
 24.08
Numpy.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 // Copyright © 2021 Leon Merten Lohse
6 // SPDX-License-Identifier: MIT
7 //
8 
9 #ifndef NUMPY_HPP
10 #define NUMPY_HPP
11 
12 #include <fmt/format.h>
13 #include "Types.hpp"
14 #include "Tensor.hpp"
15 
16 #include <fstream>
17 #include <iostream>
18 
19 namespace armnnNumpy
20 {
21 
22  /// @struct HeaderInfo: contains information from the numpy file to be parsed.
23  /// @var m_MajorVersion: Single byte containing the major version of numpy implementation
24  /// @var m_MinorVersion: Single byte containing the minor version of numpy implementation
25  /// @var m_MagicStringLength: unsigned 8bit int containing the length of the magic string.
26  /// @var m_MagicString: Char array containing the magic string at the beginning of every numpy file.
27  /// @var m_HeaderLen: 2 or 4byte unsigned int containing the length of the header, depending on version.
28  struct HeaderInfo
29  {
32  const uint8_t m_MagicStringLength = 6;
33  const char m_MagicString[7] = "\x93NUMPY";
35  uint32_t m_HeaderLen;
36  };
37 
38  /// @struct Header: contains information from the numpy file to be parsed.
39  /// @var m_HeaderString: String containing header.
40  /// @var m_DescrString: String containing description.
41  /// @var m_FortranOrder: Boolean declaring if array is in Fortran Order.
42  /// @var m_Shape: Shape of the data.
43  struct Header
44  {
45  std::string m_HeaderString;
46  std::string m_DescrString;
48  std::vector<uint32_t> m_Shape;
49  };
50 
51  inline void CreateHeaderInfo(std::ifstream &ifStream, HeaderInfo &headerInfo)
52  {
53  // A Numpy header consists of:
54  // a magic string "x93NUMPY"
55  // 1 byte for the major version
56  // 1 byte for the minor version
57  // 2 or 4 bytes for the header length
58  // More info: https://numpy.org/devdocs/reference/generated/numpy.lib.format.html
59  char buffer[headerInfo.m_MagicStringLength + 2lu];
60  ifStream.read(buffer, headerInfo.m_MagicStringLength + 2);
61 
62  if (!ifStream)
63  {
64  throw armnn::Exception(
65  fmt::format("Failed to create numpy header info at {}",
66  CHECK_LOCATION().AsString()));
67  }
68  // Verify that the numpy is in the valid format by checking for the magic string
69  int compare_result = ::memcmp(buffer, headerInfo.m_MagicString, headerInfo.m_MagicStringLength);
70  if (compare_result != 0) {
71  throw armnn::Exception(fmt::format("Numpy does not contain magic string. Can not parse invalid numpy {}",
72  CHECK_LOCATION().AsString()));
73  }
74 
75  headerInfo.m_MajorVersion = buffer[headerInfo.m_MagicStringLength];
76  headerInfo.m_MinorVersion = buffer[headerInfo.m_MagicStringLength + 1];
77  if(headerInfo.m_MajorVersion == 1 && headerInfo.m_MinorVersion == 0)
78  {
79  ifStream.read(headerInfo.m_HeaderLenBytes, 2);
80  // Header len is written in little endian, so we do a quick test
81  // to check the machines endianness
82  int i = 1;
83  if (*(reinterpret_cast<char *>(&i)) == 1)
84  {
85  headerInfo.m_HeaderLen = static_cast<unsigned>(headerInfo.m_HeaderLenBytes[0]) |
86  (static_cast<unsigned>(headerInfo.m_HeaderLenBytes[1] << 8));
87  }
88  else
89  {
90  headerInfo.m_HeaderLen = static_cast<unsigned>(headerInfo.m_HeaderLenBytes[1]) |
91  (static_cast<unsigned>(headerInfo.m_HeaderLenBytes[0] << 8));
92  }
93  }
94  else if (headerInfo.m_MajorVersion == 2 && headerInfo.m_MinorVersion == 0)
95  {
96  ifStream.read(headerInfo.m_HeaderLenBytes, 4);
97  // Header len is written in little endian, so we do a quick test
98  // to check the machines endianness
99  int i = 1;
100  if (*(reinterpret_cast<char *>(&i)) == 1)
101  {
102  headerInfo.m_HeaderLen = static_cast<unsigned>(headerInfo.m_HeaderLenBytes[0] << 0) |
103  static_cast<unsigned>(headerInfo.m_HeaderLenBytes[1] << 8) |
104  static_cast<unsigned>(headerInfo.m_HeaderLenBytes[2] << 16) |
105  static_cast<unsigned>(headerInfo.m_HeaderLenBytes[3] << 24);
106  }
107  else
108  {
109  headerInfo.m_HeaderLen = static_cast<unsigned>(headerInfo.m_HeaderLenBytes[3] << 0) |
110  static_cast<unsigned>(headerInfo.m_HeaderLenBytes[2] << 8) |
111  static_cast<unsigned>(headerInfo.m_HeaderLenBytes[1] << 16) |
112  static_cast<unsigned>(headerInfo.m_HeaderLenBytes[0] << 24);
113  }
114  }
115  else
116  {
117  throw armnn::ParseException(fmt::format("Unable to parser Numpy version {}.{} {}",
118  headerInfo.m_MajorVersion,
119  headerInfo.m_MinorVersion,
120  CHECK_LOCATION().AsString()));
121  }
122  }
123 
124  /// Primarily used to isolate values from header dictionary
125  inline std::string getSubstring(std::string fullString,
126  std::string substringStart,
127  std::string substringEnd,
128  bool removeStartChar = 0,
129  bool includeEndChar = 0)
130  {
131  size_t startPos = fullString.find(substringStart);
132  size_t endPos = fullString.find(substringEnd, startPos);
133  if (startPos == std::string::npos || endPos == std::string::npos)
134  {
135  throw armnn::ParseException(fmt::format("Unable to find {} in numpy file.",
136  CHECK_LOCATION().AsString()));
137  }
138 
139  // std::string.substr takes the starting position and the length of the substring.
140  // To calculate the length we subtract the start position from the end position.
141  // We also add a boolean on whether or not we want to include the character used to find endPos
142  startPos+= removeStartChar;
143  endPos += includeEndChar;
144  return fullString.substr(startPos, endPos - startPos);
145  }
146 
147  inline void parseShape(Header& header, std::string& shapeString)
148  {
149  std::istringstream shapeStringStream(shapeString);
150  std::string token;
151  while(getline(shapeStringStream, token, ','))
152  {
153  header.m_Shape.push_back(static_cast<uint32_t >(std::stoi(token)));
154  }
155  }
156 
157  inline void CreateHeader(std::ifstream& ifStream, HeaderInfo& headerInfo, Header& header)
158  {
159  char stringBuffer[headerInfo.m_HeaderLen];
160  ifStream.read(stringBuffer, headerInfo.m_HeaderLen);
161 
162  header.m_HeaderString = std::string(stringBuffer, headerInfo.m_HeaderLen);
163  // Remove new line character at the end of the string
164  if(header.m_HeaderString.back() == '\n')
165  {
166  header.m_HeaderString.pop_back();
167  }
168 
169  // Remove whitespace from the string.
170  // std::remove shuffles the string by place all whitespace at the end and
171  // returning the start location of the shuffled whitespace.
172  // std::string.erase then deletes the whitespace by deleting characters
173  // between the iterator returned from std::remove and the end of the std::string
174  std::string::iterator whitespaceSubstringStart = std::remove(header.m_HeaderString.begin(),
175  header.m_HeaderString.end(), ' ');
176  header.m_HeaderString.erase(whitespaceSubstringStart, header.m_HeaderString.end());
177 
178  // The order of the dictionary should be alphabetical,
179  // however this is not guarenteed so we have to search for the string.
180  // Because of this we do some weird parsing using std::string.find and some magic patterns
181  //
182  // For the description value, we include the end character from the first substring
183  // to help us find the value in the second substring. This should return with a "," at the end.
184  // Since we previously left the "," at the end of the substring,
185  // we can use it to find the end of the description value and then remove it after.
186  std::string descrString = getSubstring(header.m_HeaderString, "'descr", ",", 0, 1);
187  header.m_DescrString = getSubstring(descrString, ":", ",", 1);
188 
189  // Fortran order is a python boolean literal, we simply look for a comma to delimit this pair.
190  // Since this is a boolean, both true and false end in an "e" without appearing in between.
191  // It is not great, but it is the easiest way to find the end.
192  // We have to ensure we include this e in the substring.
193  // Since this is a boolean we can check if the string contains
194  // either true or false and set the variable as required
195  std::string fortranOrderString = getSubstring(header.m_HeaderString, "'fortran_order", ",");
196  fortranOrderString = getSubstring(fortranOrderString, ":", "e", 1, 1);
197  header.m_FortranOrder = fortranOrderString.find("True") != std::string::npos ? true : false;
198 
199  // The shape is a python tuple so we search for the closing bracket of the tuple.
200  // We include the end character to help us isolate the value substring.
201  // We can extract the inside of the tuple by searching for opening and closing brackets.
202  // We can then remove the brackets isolating the inside of the tuple.
203  // We then need to parse the string into a vector of unsigned integers
204  std::string shapeString = getSubstring(header.m_HeaderString, "'shape", ")", 0, 1);
205  shapeString = getSubstring(shapeString, "(", ")", 1, 0);
206  parseShape(header, shapeString);
207  }
208 
209  template<typename T>
210  inline void ReadData(std::ifstream& ifStream, T* tensor, const unsigned int& numElements)
211  {
212  ifStream.read(reinterpret_cast<char *>(tensor), sizeof(T) * numElements);
213  }
214 
215 
216  inline armnn::DataType getArmNNDataType(std::string& descr)
217  {
218  if(descr.find("f4") != std::string::npos || descr.find("f8") != std::string::npos)
219  {
221  }
222  else if (descr.find("f2") != std::string::npos)
223  {
225  }
226  else if (descr.find("i8") != std::string::npos)
227  {
229  }
230  else if (descr.find("i4") != std::string::npos)
231  {
233  }
234  else if (descr.find("i2") != std::string::npos)
235  {
237  }
238  else if (descr.find("i1") != std::string::npos)
239  {
241  }
242  else if (descr.find("u1") != std::string::npos)
243  {
245  }
246  else
247  {
248  throw armnn::Exception(fmt::format("Numpy data type:{} not supported. {}",
249  descr, CHECK_LOCATION().AsString()));
250  }
251  }
252 
253  inline std::string getNumpyDescr(armnn::DataType dType)
254  {
255  switch(dType)
256  {
258  return "f" + std::to_string(sizeof(float)); // size of float can be 4 or 8
260  return "f2";
262  return "i8";
264  return "i4";
266  return "i2";
269  return "i1";
271  return "u1";
272  default:
273  throw armnn::Exception(fmt::format("ArmNN to Numpy data type:{} not supported. {}",
274  dType, CHECK_LOCATION().AsString()));
275  }
276  }
277 
278  template <typename T>
279  inline bool compareCTypes(std::string& descr)
280  {
281  if(descr.find("f4") != std::string::npos || descr.find("f8") != std::string::npos)
282  {
283  return std::is_same<T, float>::value;
284  }
285  else if (descr.find("i8") != std::string::npos)
286  {
287  return std::is_same<T, int64_t>::value;
288  }
289  else if (descr.find("i4") != std::string::npos)
290  {
291  return std::is_same<T, int32_t>::value;
292  }
293  else if (descr.find("i2") != std::string::npos)
294  {
295  return std::is_same<T, int16_t>::value;
296  }
297  else if (descr.find("i1") != std::string::npos)
298  {
299  return std::is_same<T, int8_t>::value;
300  }
301  else if (descr.find("u1") != std::string::npos)
302  {
303  return std::is_same<T, uint8_t>::value;
304  }
305  else
306  {
307  throw armnn::Exception(fmt::format("Numpy data type:{} not supported. {}",
308  descr, CHECK_LOCATION().AsString()));
309  }
310  }
311 
312  inline unsigned int getNumElements(Header& header)
313  {
314  unsigned int numEls = 1;
315  for (auto dim: header.m_Shape)
316  {
317  numEls *= dim;
318  }
319 
320  return numEls;
321  }
322 
323  // Material in WriteToNumpyFile() has been reused from https://github.com/llohse/libnpy/blob/master/include/npy.hpp
324  // Please see write_header() in the above file for more details.
325  template<typename T>
326  inline void WriteToNumpyFile(const std::string& outputTensorFileName,
327  const T* const array,
328  const unsigned int numElements,
329  armnn::DataType dataType,
330  const armnn::TensorShape& shape)
331  {
332  std::ofstream out(outputTensorFileName, std::ofstream::binary);
333 
334  // write header
335  {
336  // Setup string of tensor shape in format (x0, x1, x2, ..)
337  std::string shapeStr = "(";
338  for (uint32_t i = 0; i < shape.GetNumDimensions()-1; i++)
339  {
340  shapeStr = shapeStr + std::to_string(shape[i]) + ", ";
341  }
342  shapeStr = shapeStr + std::to_string(shape[shape.GetNumDimensions()-1]) + ")";
343 
344  int i = 1;
345  std::string endianChar = (*(reinterpret_cast<char *>(&i))) ? "<" : ">";
346  std::string dataTypeStr = getNumpyDescr(dataType);
347  std::string fortranOrder = "False";
348  std::string headerStr = "{'descr': '" + endianChar + dataTypeStr +
349  "', 'fortran_order': " + fortranOrder +
350  ", 'shape': " + shapeStr + ", }";
351 
352  armnnNumpy::HeaderInfo headerInfo;
353 
354  // Header is composed of:
355  // - 6 byte magic string
356  // - 2 byte major and minor version
357  // - 2 byte (v1.0) / 4 byte (v2.0) little-endian unsigned int
358  // - headerStr.length() bytes
359  // - 1 byte for newline termination (\n)
360  size_t length = headerInfo.m_MagicStringLength + 2 + 2 + headerStr.length() + 1;
361  uint8_t major_version = 1;
362 
363  // for numpy major version 2, add extra 2 bytes for little-endian int (total 4 bytes)
364  if (length >= 255 * 255)
365  {
366  length += 2;
367  major_version = 2;
368  }
369 
370  // Pad with spaces so header length is modulo 16 bytes.
371  size_t padding_length = 16 - length % 16;
372  std::string padding(padding_length, ' ');
373 
374  // write magic string
375  out.write(headerInfo.m_MagicString, headerInfo.m_MagicStringLength);
376  out.put(major_version);
377  out.put(0); // minor version
378 
379  // write header length
380  if (major_version == 1)
381  {
382  auto header_len = static_cast<uint16_t>(headerStr.length() + padding.length() + 1);
383 
384  std::array<uint8_t, 2> header_len_16{static_cast<uint8_t>((header_len >> 0) & 0xff),
385  static_cast<uint8_t>((header_len >> 8) & 0xff)};
386  out.write(reinterpret_cast<char *>(header_len_16.data()), 2);
387  }
388  else
389  {
390  auto header_len = static_cast<uint32_t>(headerStr.length() + padding.length() + 1);
391 
392  std::array<uint8_t, 4> header_len_32{
393  static_cast<uint8_t>((header_len >> 0) & 0xff), static_cast<uint8_t>((header_len >> 8) & 0xff),
394  static_cast<uint8_t>((header_len >> 16) & 0xff), static_cast<uint8_t>((header_len >> 24) & 0xff)};
395  out.write(reinterpret_cast<char *>(header_len_32.data()), 4);
396  }
397 
398  out << headerStr << padding << '\n';
399  }
400 
401  // write tensor data to file
402  out.write(reinterpret_cast<const char *>(array), sizeof(T) * numElements);
403  }
404 }
405 
406 #endif // NUMPY_HPP
armnnNumpy::parseShape
void parseShape(Header &header, std::string &shapeString)
Definition: Numpy.hpp:147
armnnNumpy::HeaderInfo::m_MagicString
const char m_MagicString[7]
Definition: Numpy.hpp:33
armnnNumpy
Definition: Numpy.hpp:19
CHECK_LOCATION
#define CHECK_LOCATION()
Definition: Exceptions.hpp:203
armnn::DataType::Float32
@ Float32
armnn::DataType::QAsymmU8
@ QAsymmU8
armnn::DataType::QSymmS8
@ QSymmS8
armnnNumpy::Header::m_FortranOrder
bool m_FortranOrder
Definition: Numpy.hpp:47
armnn::DataType::QSymmS16
@ QSymmS16
armnnNumpy::HeaderInfo::m_MagicStringLength
const uint8_t m_MagicStringLength
Definition: Numpy.hpp:32
armnn::TensorShape
Definition: Tensor.hpp:20
armnn::DataType::Float16
@ Float16
armnnNumpy::HeaderInfo::m_HeaderLenBytes
char m_HeaderLenBytes[4]
Definition: Numpy.hpp:34
armnn::TensorShape::GetNumDimensions
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
Definition: Tensor.cpp:174
armnnNumpy::WriteToNumpyFile
void WriteToNumpyFile(const std::string &outputTensorFileName, const T *const array, const unsigned int numElements, armnn::DataType dataType, const armnn::TensorShape &shape)
Definition: Numpy.hpp:326
armnnNumpy::compareCTypes
bool compareCTypes(std::string &descr)
Definition: Numpy.hpp:279
armnn::DataType
DataType
Definition: Types.hpp:48
armnnNumpy::HeaderInfo::m_MinorVersion
char m_MinorVersion
Definition: Numpy.hpp:31
armnnNumpy::getNumElements
unsigned int getNumElements(Header &header)
Definition: Numpy.hpp:312
armnn::Exception
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
armnnNumpy::Header
Definition: Numpy.hpp:43
armnnNumpy::getSubstring
std::string getSubstring(std::string fullString, std::string substringStart, std::string substringEnd, bool removeStartChar=0, bool includeEndChar=0)
Primarily used to isolate values from header dictionary.
Definition: Numpy.hpp:125
armnn::DataType::Signed32
@ Signed32
armnnNumpy::Header::m_HeaderString
std::string m_HeaderString
Definition: Numpy.hpp:45
armnnNumpy::ReadData
void ReadData(std::ifstream &ifStream, T *tensor, const unsigned int &numElements)
Definition: Numpy.hpp:210
armnn::DataType::QAsymmS8
@ QAsymmS8
armnnNumpy::HeaderInfo::m_MajorVersion
char m_MajorVersion
Definition: Numpy.hpp:30
armnnNumpy::Header::m_DescrString
std::string m_DescrString
Definition: Numpy.hpp:46
armnnNumpy::HeaderInfo
Definition: Numpy.hpp:28
Tensor.hpp
armnnNumpy::CreateHeader
void CreateHeader(std::ifstream &ifStream, HeaderInfo &headerInfo, Header &header)
Definition: Numpy.hpp:157
armnnNumpy::getArmNNDataType
armnn::DataType getArmNNDataType(std::string &descr)
Definition: Numpy.hpp:216
armnn::ParseException
Definition: Exceptions.hpp:92
armnnNumpy::Header::m_Shape
std::vector< uint32_t > m_Shape
Definition: Numpy.hpp:48
Types.hpp
armnnNumpy::HeaderInfo::m_HeaderLen
uint32_t m_HeaderLen
Definition: Numpy.hpp:35
armnn::DataType::Signed64
@ Signed64
armnnNumpy::CreateHeaderInfo
void CreateHeaderInfo(std::ifstream &ifStream, HeaderInfo &headerInfo)
Definition: Numpy.hpp:51
armnnNumpy::getNumpyDescr
std::string getNumpyDescr(armnn::DataType dType)
Definition: Numpy.hpp:253