test_format.py 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940
  1. from __future__ import division, absolute_import, print_function
  2. # doctest
  3. r''' Test the .npy file format.
  4. Set up:
  5. >>> import sys
  6. >>> from io import BytesIO
  7. >>> from numpy.lib import format
  8. >>>
  9. >>> scalars = [
  10. ... np.uint8,
  11. ... np.int8,
  12. ... np.uint16,
  13. ... np.int16,
  14. ... np.uint32,
  15. ... np.int32,
  16. ... np.uint64,
  17. ... np.int64,
  18. ... np.float32,
  19. ... np.float64,
  20. ... np.complex64,
  21. ... np.complex128,
  22. ... object,
  23. ... ]
  24. >>>
  25. >>> basic_arrays = []
  26. >>>
  27. >>> for scalar in scalars:
  28. ... for endian in '<>':
  29. ... dtype = np.dtype(scalar).newbyteorder(endian)
  30. ... basic = np.arange(15).astype(dtype)
  31. ... basic_arrays.extend([
  32. ... np.array([], dtype=dtype),
  33. ... np.array(10, dtype=dtype),
  34. ... basic,
  35. ... basic.reshape((3,5)),
  36. ... basic.reshape((3,5)).T,
  37. ... basic.reshape((3,5))[::-1,::2],
  38. ... ])
  39. ...
  40. >>>
  41. >>> Pdescr = [
  42. ... ('x', 'i4', (2,)),
  43. ... ('y', 'f8', (2, 2)),
  44. ... ('z', 'u1')]
  45. >>>
  46. >>>
  47. >>> PbufferT = [
  48. ... ([3,2], [[6.,4.],[6.,4.]], 8),
  49. ... ([4,3], [[7.,5.],[7.,5.]], 9),
  50. ... ]
  51. >>>
  52. >>>
  53. >>> Ndescr = [
  54. ... ('x', 'i4', (2,)),
  55. ... ('Info', [
  56. ... ('value', 'c16'),
  57. ... ('y2', 'f8'),
  58. ... ('Info2', [
  59. ... ('name', 'S2'),
  60. ... ('value', 'c16', (2,)),
  61. ... ('y3', 'f8', (2,)),
  62. ... ('z3', 'u4', (2,))]),
  63. ... ('name', 'S2'),
  64. ... ('z2', 'b1')]),
  65. ... ('color', 'S2'),
  66. ... ('info', [
  67. ... ('Name', 'U8'),
  68. ... ('Value', 'c16')]),
  69. ... ('y', 'f8', (2, 2)),
  70. ... ('z', 'u1')]
  71. >>>
  72. >>>
  73. >>> NbufferT = [
  74. ... ([3,2], (6j, 6., ('nn', [6j,4j], [6.,4.], [1,2]), 'NN', True), 'cc', ('NN', 6j), [[6.,4.],[6.,4.]], 8),
  75. ... ([4,3], (7j, 7., ('oo', [7j,5j], [7.,5.], [2,1]), 'OO', False), 'dd', ('OO', 7j), [[7.,5.],[7.,5.]], 9),
  76. ... ]
  77. >>>
  78. >>>
  79. >>> record_arrays = [
  80. ... np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('<')),
  81. ... np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('<')),
  82. ... np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('>')),
  83. ... np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('>')),
  84. ... ]
  85. Test the magic string writing.
  86. >>> format.magic(1, 0)
  87. '\x93NUMPY\x01\x00'
  88. >>> format.magic(0, 0)
  89. '\x93NUMPY\x00\x00'
  90. >>> format.magic(255, 255)
  91. '\x93NUMPY\xff\xff'
  92. >>> format.magic(2, 5)
  93. '\x93NUMPY\x02\x05'
  94. Test the magic string reading.
  95. >>> format.read_magic(BytesIO(format.magic(1, 0)))
  96. (1, 0)
  97. >>> format.read_magic(BytesIO(format.magic(0, 0)))
  98. (0, 0)
  99. >>> format.read_magic(BytesIO(format.magic(255, 255)))
  100. (255, 255)
  101. >>> format.read_magic(BytesIO(format.magic(2, 5)))
  102. (2, 5)
  103. Test the header writing.
  104. >>> for arr in basic_arrays + record_arrays:
  105. ... f = BytesIO()
  106. ... format.write_array_header_1_0(f, arr) # XXX: arr is not a dict, items gets called on it
  107. ... print(repr(f.getvalue()))
  108. ...
  109. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (0,)} \n"
  110. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': ()} \n"
  111. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (15,)} \n"
  112. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 5)} \n"
  113. "F\x00{'descr': '|u1', 'fortran_order': True, 'shape': (5, 3)} \n"
  114. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 3)} \n"
  115. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (0,)} \n"
  116. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': ()} \n"
  117. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (15,)} \n"
  118. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 5)} \n"
  119. "F\x00{'descr': '|u1', 'fortran_order': True, 'shape': (5, 3)} \n"
  120. "F\x00{'descr': '|u1', 'fortran_order': False, 'shape': (3, 3)} \n"
  121. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (0,)} \n"
  122. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': ()} \n"
  123. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (15,)} \n"
  124. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 5)} \n"
  125. "F\x00{'descr': '|i1', 'fortran_order': True, 'shape': (5, 3)} \n"
  126. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 3)} \n"
  127. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (0,)} \n"
  128. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': ()} \n"
  129. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (15,)} \n"
  130. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 5)} \n"
  131. "F\x00{'descr': '|i1', 'fortran_order': True, 'shape': (5, 3)} \n"
  132. "F\x00{'descr': '|i1', 'fortran_order': False, 'shape': (3, 3)} \n"
  133. "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (0,)} \n"
  134. "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': ()} \n"
  135. "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (15,)} \n"
  136. "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (3, 5)} \n"
  137. "F\x00{'descr': '<u2', 'fortran_order': True, 'shape': (5, 3)} \n"
  138. "F\x00{'descr': '<u2', 'fortran_order': False, 'shape': (3, 3)} \n"
  139. "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (0,)} \n"
  140. "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': ()} \n"
  141. "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (15,)} \n"
  142. "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (3, 5)} \n"
  143. "F\x00{'descr': '>u2', 'fortran_order': True, 'shape': (5, 3)} \n"
  144. "F\x00{'descr': '>u2', 'fortran_order': False, 'shape': (3, 3)} \n"
  145. "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (0,)} \n"
  146. "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': ()} \n"
  147. "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (15,)} \n"
  148. "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (3, 5)} \n"
  149. "F\x00{'descr': '<i2', 'fortran_order': True, 'shape': (5, 3)} \n"
  150. "F\x00{'descr': '<i2', 'fortran_order': False, 'shape': (3, 3)} \n"
  151. "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (0,)} \n"
  152. "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': ()} \n"
  153. "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (15,)} \n"
  154. "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (3, 5)} \n"
  155. "F\x00{'descr': '>i2', 'fortran_order': True, 'shape': (5, 3)} \n"
  156. "F\x00{'descr': '>i2', 'fortran_order': False, 'shape': (3, 3)} \n"
  157. "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (0,)} \n"
  158. "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': ()} \n"
  159. "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (15,)} \n"
  160. "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (3, 5)} \n"
  161. "F\x00{'descr': '<u4', 'fortran_order': True, 'shape': (5, 3)} \n"
  162. "F\x00{'descr': '<u4', 'fortran_order': False, 'shape': (3, 3)} \n"
  163. "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (0,)} \n"
  164. "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': ()} \n"
  165. "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (15,)} \n"
  166. "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (3, 5)} \n"
  167. "F\x00{'descr': '>u4', 'fortran_order': True, 'shape': (5, 3)} \n"
  168. "F\x00{'descr': '>u4', 'fortran_order': False, 'shape': (3, 3)} \n"
  169. "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (0,)} \n"
  170. "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': ()} \n"
  171. "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (15,)} \n"
  172. "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (3, 5)} \n"
  173. "F\x00{'descr': '<i4', 'fortran_order': True, 'shape': (5, 3)} \n"
  174. "F\x00{'descr': '<i4', 'fortran_order': False, 'shape': (3, 3)} \n"
  175. "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (0,)} \n"
  176. "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': ()} \n"
  177. "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (15,)} \n"
  178. "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (3, 5)} \n"
  179. "F\x00{'descr': '>i4', 'fortran_order': True, 'shape': (5, 3)} \n"
  180. "F\x00{'descr': '>i4', 'fortran_order': False, 'shape': (3, 3)} \n"
  181. "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (0,)} \n"
  182. "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': ()} \n"
  183. "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (15,)} \n"
  184. "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (3, 5)} \n"
  185. "F\x00{'descr': '<u8', 'fortran_order': True, 'shape': (5, 3)} \n"
  186. "F\x00{'descr': '<u8', 'fortran_order': False, 'shape': (3, 3)} \n"
  187. "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (0,)} \n"
  188. "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': ()} \n"
  189. "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (15,)} \n"
  190. "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (3, 5)} \n"
  191. "F\x00{'descr': '>u8', 'fortran_order': True, 'shape': (5, 3)} \n"
  192. "F\x00{'descr': '>u8', 'fortran_order': False, 'shape': (3, 3)} \n"
  193. "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (0,)} \n"
  194. "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': ()} \n"
  195. "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (15,)} \n"
  196. "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (3, 5)} \n"
  197. "F\x00{'descr': '<i8', 'fortran_order': True, 'shape': (5, 3)} \n"
  198. "F\x00{'descr': '<i8', 'fortran_order': False, 'shape': (3, 3)} \n"
  199. "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (0,)} \n"
  200. "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': ()} \n"
  201. "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (15,)} \n"
  202. "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (3, 5)} \n"
  203. "F\x00{'descr': '>i8', 'fortran_order': True, 'shape': (5, 3)} \n"
  204. "F\x00{'descr': '>i8', 'fortran_order': False, 'shape': (3, 3)} \n"
  205. "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (0,)} \n"
  206. "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': ()} \n"
  207. "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (15,)} \n"
  208. "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (3, 5)} \n"
  209. "F\x00{'descr': '<f4', 'fortran_order': True, 'shape': (5, 3)} \n"
  210. "F\x00{'descr': '<f4', 'fortran_order': False, 'shape': (3, 3)} \n"
  211. "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (0,)} \n"
  212. "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': ()} \n"
  213. "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (15,)} \n"
  214. "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (3, 5)} \n"
  215. "F\x00{'descr': '>f4', 'fortran_order': True, 'shape': (5, 3)} \n"
  216. "F\x00{'descr': '>f4', 'fortran_order': False, 'shape': (3, 3)} \n"
  217. "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (0,)} \n"
  218. "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': ()} \n"
  219. "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (15,)} \n"
  220. "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (3, 5)} \n"
  221. "F\x00{'descr': '<f8', 'fortran_order': True, 'shape': (5, 3)} \n"
  222. "F\x00{'descr': '<f8', 'fortran_order': False, 'shape': (3, 3)} \n"
  223. "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (0,)} \n"
  224. "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': ()} \n"
  225. "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (15,)} \n"
  226. "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (3, 5)} \n"
  227. "F\x00{'descr': '>f8', 'fortran_order': True, 'shape': (5, 3)} \n"
  228. "F\x00{'descr': '>f8', 'fortran_order': False, 'shape': (3, 3)} \n"
  229. "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (0,)} \n"
  230. "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': ()} \n"
  231. "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (15,)} \n"
  232. "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (3, 5)} \n"
  233. "F\x00{'descr': '<c8', 'fortran_order': True, 'shape': (5, 3)} \n"
  234. "F\x00{'descr': '<c8', 'fortran_order': False, 'shape': (3, 3)} \n"
  235. "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (0,)} \n"
  236. "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': ()} \n"
  237. "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (15,)} \n"
  238. "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (3, 5)} \n"
  239. "F\x00{'descr': '>c8', 'fortran_order': True, 'shape': (5, 3)} \n"
  240. "F\x00{'descr': '>c8', 'fortran_order': False, 'shape': (3, 3)} \n"
  241. "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (0,)} \n"
  242. "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': ()} \n"
  243. "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (15,)} \n"
  244. "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (3, 5)} \n"
  245. "F\x00{'descr': '<c16', 'fortran_order': True, 'shape': (5, 3)} \n"
  246. "F\x00{'descr': '<c16', 'fortran_order': False, 'shape': (3, 3)} \n"
  247. "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (0,)} \n"
  248. "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': ()} \n"
  249. "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (15,)} \n"
  250. "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (3, 5)} \n"
  251. "F\x00{'descr': '>c16', 'fortran_order': True, 'shape': (5, 3)} \n"
  252. "F\x00{'descr': '>c16', 'fortran_order': False, 'shape': (3, 3)} \n"
  253. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (0,)} \n"
  254. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': ()} \n"
  255. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (15,)} \n"
  256. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (3, 5)} \n"
  257. "F\x00{'descr': 'O', 'fortran_order': True, 'shape': (5, 3)} \n"
  258. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (3, 3)} \n"
  259. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (0,)} \n"
  260. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': ()} \n"
  261. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (15,)} \n"
  262. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (3, 5)} \n"
  263. "F\x00{'descr': 'O', 'fortran_order': True, 'shape': (5, 3)} \n"
  264. "F\x00{'descr': 'O', 'fortran_order': False, 'shape': (3, 3)} \n"
  265. "v\x00{'descr': [('x', '<i4', (2,)), ('y', '<f8', (2, 2)), ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n"
  266. "\x16\x02{'descr': [('x', '<i4', (2,)),\n ('Info',\n [('value', '<c16'),\n ('y2', '<f8'),\n ('Info2',\n [('name', '|S2'),\n ('value', '<c16', (2,)),\n ('y3', '<f8', (2,)),\n ('z3', '<u4', (2,))]),\n ('name', '|S2'),\n ('z2', '|b1')]),\n ('color', '|S2'),\n ('info', [('Name', '<U8'), ('Value', '<c16')]),\n ('y', '<f8', (2, 2)),\n ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n"
  267. "v\x00{'descr': [('x', '>i4', (2,)), ('y', '>f8', (2, 2)), ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n"
  268. "\x16\x02{'descr': [('x', '>i4', (2,)),\n ('Info',\n [('value', '>c16'),\n ('y2', '>f8'),\n ('Info2',\n [('name', '|S2'),\n ('value', '>c16', (2,)),\n ('y3', '>f8', (2,)),\n ('z3', '>u4', (2,))]),\n ('name', '|S2'),\n ('z2', '|b1')]),\n ('color', '|S2'),\n ('info', [('Name', '>U8'), ('Value', '>c16')]),\n ('y', '>f8', (2, 2)),\n ('z', '|u1')],\n 'fortran_order': False,\n 'shape': (2,)} \n"
  269. '''
  270. import sys
  271. import os
  272. import shutil
  273. import tempfile
  274. import warnings
  275. import pytest
  276. from io import BytesIO
  277. import numpy as np
  278. from numpy.testing import (
  279. assert_, assert_array_equal, assert_raises, assert_raises_regex,
  280. )
  281. from numpy.lib import format
  282. tempdir = None
  283. # Module-level setup.
  284. def setup_module():
  285. global tempdir
  286. tempdir = tempfile.mkdtemp()
  287. def teardown_module():
  288. global tempdir
  289. if tempdir is not None and os.path.isdir(tempdir):
  290. shutil.rmtree(tempdir)
  291. tempdir = None
  292. # Generate some basic arrays to test with.
  293. scalars = [
  294. np.uint8,
  295. np.int8,
  296. np.uint16,
  297. np.int16,
  298. np.uint32,
  299. np.int32,
  300. np.uint64,
  301. np.int64,
  302. np.float32,
  303. np.float64,
  304. np.complex64,
  305. np.complex128,
  306. object,
  307. ]
  308. basic_arrays = []
  309. for scalar in scalars:
  310. for endian in '<>':
  311. dtype = np.dtype(scalar).newbyteorder(endian)
  312. basic = np.arange(1500).astype(dtype)
  313. basic_arrays.extend([
  314. # Empty
  315. np.array([], dtype=dtype),
  316. # Rank-0
  317. np.array(10, dtype=dtype),
  318. # 1-D
  319. basic,
  320. # 2-D C-contiguous
  321. basic.reshape((30, 50)),
  322. # 2-D F-contiguous
  323. basic.reshape((30, 50)).T,
  324. # 2-D non-contiguous
  325. basic.reshape((30, 50))[::-1, ::2],
  326. ])
  327. # More complicated record arrays.
  328. # This is the structure of the table used for plain objects:
  329. #
  330. # +-+-+-+
  331. # |x|y|z|
  332. # +-+-+-+
  333. # Structure of a plain array description:
  334. Pdescr = [
  335. ('x', 'i4', (2,)),
  336. ('y', 'f8', (2, 2)),
  337. ('z', 'u1')]
  338. # A plain list of tuples with values for testing:
  339. PbufferT = [
  340. # x y z
  341. ([3, 2], [[6., 4.], [6., 4.]], 8),
  342. ([4, 3], [[7., 5.], [7., 5.]], 9),
  343. ]
  344. # This is the structure of the table used for nested objects (DON'T PANIC!):
  345. #
  346. # +-+---------------------------------+-----+----------+-+-+
  347. # |x|Info |color|info |y|z|
  348. # | +-----+--+----------------+----+--+ +----+-----+ | |
  349. # | |value|y2|Info2 |name|z2| |Name|Value| | |
  350. # | | | +----+-----+--+--+ | | | | | | |
  351. # | | | |name|value|y3|z3| | | | | | | |
  352. # +-+-----+--+----+-----+--+--+----+--+-----+----+-----+-+-+
  353. #
  354. # The corresponding nested array description:
  355. Ndescr = [
  356. ('x', 'i4', (2,)),
  357. ('Info', [
  358. ('value', 'c16'),
  359. ('y2', 'f8'),
  360. ('Info2', [
  361. ('name', 'S2'),
  362. ('value', 'c16', (2,)),
  363. ('y3', 'f8', (2,)),
  364. ('z3', 'u4', (2,))]),
  365. ('name', 'S2'),
  366. ('z2', 'b1')]),
  367. ('color', 'S2'),
  368. ('info', [
  369. ('Name', 'U8'),
  370. ('Value', 'c16')]),
  371. ('y', 'f8', (2, 2)),
  372. ('z', 'u1')]
  373. NbufferT = [
  374. # x Info color info y z
  375. # value y2 Info2 name z2 Name Value
  376. # name value y3 z3
  377. ([3, 2], (6j, 6., ('nn', [6j, 4j], [6., 4.], [1, 2]), 'NN', True),
  378. 'cc', ('NN', 6j), [[6., 4.], [6., 4.]], 8),
  379. ([4, 3], (7j, 7., ('oo', [7j, 5j], [7., 5.], [2, 1]), 'OO', False),
  380. 'dd', ('OO', 7j), [[7., 5.], [7., 5.]], 9),
  381. ]
  382. record_arrays = [
  383. np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('<')),
  384. np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('<')),
  385. np.array(PbufferT, dtype=np.dtype(Pdescr).newbyteorder('>')),
  386. np.array(NbufferT, dtype=np.dtype(Ndescr).newbyteorder('>')),
  387. np.zeros(1, dtype=[('c', ('<f8', (5,)), (2,))])
  388. ]
  389. #BytesIO that reads a random number of bytes at a time
  390. class BytesIOSRandomSize(BytesIO):
  391. def read(self, size=None):
  392. import random
  393. size = random.randint(1, size)
  394. return super(BytesIOSRandomSize, self).read(size)
  395. def roundtrip(arr):
  396. f = BytesIO()
  397. format.write_array(f, arr)
  398. f2 = BytesIO(f.getvalue())
  399. arr2 = format.read_array(f2, allow_pickle=True)
  400. return arr2
  401. def roundtrip_randsize(arr):
  402. f = BytesIO()
  403. format.write_array(f, arr)
  404. f2 = BytesIOSRandomSize(f.getvalue())
  405. arr2 = format.read_array(f2)
  406. return arr2
  407. def roundtrip_truncated(arr):
  408. f = BytesIO()
  409. format.write_array(f, arr)
  410. #BytesIO is one byte short
  411. f2 = BytesIO(f.getvalue()[0:-1])
  412. arr2 = format.read_array(f2)
  413. return arr2
  414. def assert_equal_(o1, o2):
  415. assert_(o1 == o2)
  416. def test_roundtrip():
  417. for arr in basic_arrays + record_arrays:
  418. arr2 = roundtrip(arr)
  419. assert_array_equal(arr, arr2)
  420. def test_roundtrip_randsize():
  421. for arr in basic_arrays + record_arrays:
  422. if arr.dtype != object:
  423. arr2 = roundtrip_randsize(arr)
  424. assert_array_equal(arr, arr2)
  425. def test_roundtrip_truncated():
  426. for arr in basic_arrays:
  427. if arr.dtype != object:
  428. assert_raises(ValueError, roundtrip_truncated, arr)
  429. def test_long_str():
  430. # check items larger than internal buffer size, gh-4027
  431. long_str_arr = np.ones(1, dtype=np.dtype((str, format.BUFFER_SIZE + 1)))
  432. long_str_arr2 = roundtrip(long_str_arr)
  433. assert_array_equal(long_str_arr, long_str_arr2)
  434. @pytest.mark.slow
  435. def test_memmap_roundtrip():
  436. # Fixme: used to crash on windows
  437. if not (sys.platform == 'win32' or sys.platform == 'cygwin'):
  438. for arr in basic_arrays + record_arrays:
  439. if arr.dtype.hasobject:
  440. # Skip these since they can't be mmap'ed.
  441. continue
  442. # Write it out normally and through mmap.
  443. nfn = os.path.join(tempdir, 'normal.npy')
  444. mfn = os.path.join(tempdir, 'memmap.npy')
  445. fp = open(nfn, 'wb')
  446. try:
  447. format.write_array(fp, arr)
  448. finally:
  449. fp.close()
  450. fortran_order = (
  451. arr.flags.f_contiguous and not arr.flags.c_contiguous)
  452. ma = format.open_memmap(mfn, mode='w+', dtype=arr.dtype,
  453. shape=arr.shape, fortran_order=fortran_order)
  454. ma[...] = arr
  455. del ma
  456. # Check that both of these files' contents are the same.
  457. fp = open(nfn, 'rb')
  458. normal_bytes = fp.read()
  459. fp.close()
  460. fp = open(mfn, 'rb')
  461. memmap_bytes = fp.read()
  462. fp.close()
  463. assert_equal_(normal_bytes, memmap_bytes)
  464. # Check that reading the file using memmap works.
  465. ma = format.open_memmap(nfn, mode='r')
  466. del ma
  467. def test_compressed_roundtrip():
  468. arr = np.random.rand(200, 200)
  469. npz_file = os.path.join(tempdir, 'compressed.npz')
  470. np.savez_compressed(npz_file, arr=arr)
  471. arr1 = np.load(npz_file)['arr']
  472. assert_array_equal(arr, arr1)
  473. # aligned
  474. dt1 = np.dtype('i1, i4, i1', align=True)
  475. # non-aligned, explicit offsets
  476. dt2 = np.dtype({'names': ['a', 'b'], 'formats': ['i4', 'i4'],
  477. 'offsets': [1, 6]})
  478. # nested struct-in-struct
  479. dt3 = np.dtype({'names': ['c', 'd'], 'formats': ['i4', dt2]})
  480. # field with '' name
  481. dt4 = np.dtype({'names': ['a', '', 'b'], 'formats': ['i4']*3})
  482. # titles
  483. dt5 = np.dtype({'names': ['a', 'b'], 'formats': ['i4', 'i4'],
  484. 'offsets': [1, 6], 'titles': ['aa', 'bb']})
  485. @pytest.mark.parametrize("dt", [dt1, dt2, dt3, dt4, dt5])
  486. def test_load_padded_dtype(dt):
  487. arr = np.zeros(3, dt)
  488. for i in range(3):
  489. arr[i] = i + 5
  490. npz_file = os.path.join(tempdir, 'aligned.npz')
  491. np.savez(npz_file, arr=arr)
  492. arr1 = np.load(npz_file)['arr']
  493. assert_array_equal(arr, arr1)
  494. def test_python2_python3_interoperability():
  495. if sys.version_info[0] >= 3:
  496. fname = 'win64python2.npy'
  497. else:
  498. fname = 'python3.npy'
  499. path = os.path.join(os.path.dirname(__file__), 'data', fname)
  500. data = np.load(path)
  501. assert_array_equal(data, np.ones(2))
  502. def test_pickle_python2_python3():
  503. # Test that loading object arrays saved on Python 2 works both on
  504. # Python 2 and Python 3 and vice versa
  505. data_dir = os.path.join(os.path.dirname(__file__), 'data')
  506. if sys.version_info[0] >= 3:
  507. xrange = range
  508. else:
  509. import __builtin__
  510. xrange = __builtin__.xrange
  511. expected = np.array([None, xrange, u'\u512a\u826f',
  512. b'\xe4\xb8\x8d\xe8\x89\xaf'],
  513. dtype=object)
  514. for fname in ['py2-objarr.npy', 'py2-objarr.npz',
  515. 'py3-objarr.npy', 'py3-objarr.npz']:
  516. path = os.path.join(data_dir, fname)
  517. for encoding in ['bytes', 'latin1']:
  518. data_f = np.load(path, allow_pickle=True, encoding=encoding)
  519. if fname.endswith('.npz'):
  520. data = data_f['x']
  521. data_f.close()
  522. else:
  523. data = data_f
  524. if sys.version_info[0] >= 3:
  525. if encoding == 'latin1' and fname.startswith('py2'):
  526. assert_(isinstance(data[3], str))
  527. assert_array_equal(data[:-1], expected[:-1])
  528. # mojibake occurs
  529. assert_array_equal(data[-1].encode(encoding), expected[-1])
  530. else:
  531. assert_(isinstance(data[3], bytes))
  532. assert_array_equal(data, expected)
  533. else:
  534. assert_array_equal(data, expected)
  535. if sys.version_info[0] >= 3:
  536. if fname.startswith('py2'):
  537. if fname.endswith('.npz'):
  538. data = np.load(path, allow_pickle=True)
  539. assert_raises(UnicodeError, data.__getitem__, 'x')
  540. data.close()
  541. data = np.load(path, allow_pickle=True, fix_imports=False,
  542. encoding='latin1')
  543. assert_raises(ImportError, data.__getitem__, 'x')
  544. data.close()
  545. else:
  546. assert_raises(UnicodeError, np.load, path,
  547. allow_pickle=True)
  548. assert_raises(ImportError, np.load, path,
  549. allow_pickle=True, fix_imports=False,
  550. encoding='latin1')
  551. def test_pickle_disallow():
  552. data_dir = os.path.join(os.path.dirname(__file__), 'data')
  553. path = os.path.join(data_dir, 'py2-objarr.npy')
  554. assert_raises(ValueError, np.load, path,
  555. allow_pickle=False, encoding='latin1')
  556. path = os.path.join(data_dir, 'py2-objarr.npz')
  557. f = np.load(path, allow_pickle=False, encoding='latin1')
  558. assert_raises(ValueError, f.__getitem__, 'x')
  559. path = os.path.join(tempdir, 'pickle-disabled.npy')
  560. assert_raises(ValueError, np.save, path, np.array([None], dtype=object),
  561. allow_pickle=False)
  562. @pytest.mark.parametrize('dt', [
  563. np.dtype(np.dtype([('a', np.int8),
  564. ('b', np.int16),
  565. ('c', np.int32),
  566. ], align=True),
  567. (3,)),
  568. np.dtype([('x', np.dtype({'names':['a','b'],
  569. 'formats':['i1','i1'],
  570. 'offsets':[0,4],
  571. 'itemsize':8,
  572. },
  573. (3,)),
  574. (4,),
  575. )]),
  576. np.dtype([('x',
  577. ('<f8', (5,)),
  578. (2,),
  579. )]),
  580. np.dtype([('x', np.dtype((
  581. np.dtype((
  582. np.dtype({'names':['a','b'],
  583. 'formats':['i1','i1'],
  584. 'offsets':[0,4],
  585. 'itemsize':8}),
  586. (3,)
  587. )),
  588. (4,)
  589. )))
  590. ]),
  591. np.dtype([
  592. ('a', np.dtype((
  593. np.dtype((
  594. np.dtype((
  595. np.dtype([
  596. ('a', int),
  597. ('b', np.dtype({'names':['a','b'],
  598. 'formats':['i1','i1'],
  599. 'offsets':[0,4],
  600. 'itemsize':8})),
  601. ]),
  602. (3,),
  603. )),
  604. (4,),
  605. )),
  606. (5,),
  607. )))
  608. ]),
  609. ])
  610. def test_descr_to_dtype(dt):
  611. dt1 = format.descr_to_dtype(dt.descr)
  612. assert_equal_(dt1, dt)
  613. arr1 = np.zeros(3, dt)
  614. arr2 = roundtrip(arr1)
  615. assert_array_equal(arr1, arr2)
  616. def test_version_2_0():
  617. f = BytesIO()
  618. # requires more than 2 byte for header
  619. dt = [(("%d" % i) * 100, float) for i in range(500)]
  620. d = np.ones(1000, dtype=dt)
  621. format.write_array(f, d, version=(2, 0))
  622. with warnings.catch_warnings(record=True) as w:
  623. warnings.filterwarnings('always', '', UserWarning)
  624. format.write_array(f, d)
  625. assert_(w[0].category is UserWarning)
  626. # check alignment of data portion
  627. f.seek(0)
  628. header = f.readline()
  629. assert_(len(header) % format.ARRAY_ALIGN == 0)
  630. f.seek(0)
  631. n = format.read_array(f)
  632. assert_array_equal(d, n)
  633. # 1.0 requested but data cannot be saved this way
  634. assert_raises(ValueError, format.write_array, f, d, (1, 0))
  635. @pytest.mark.slow
  636. def test_version_2_0_memmap():
  637. # requires more than 2 byte for header
  638. dt = [(("%d" % i) * 100, float) for i in range(500)]
  639. d = np.ones(1000, dtype=dt)
  640. tf = tempfile.mktemp('', 'mmap', dir=tempdir)
  641. # 1.0 requested but data cannot be saved this way
  642. assert_raises(ValueError, format.open_memmap, tf, mode='w+', dtype=d.dtype,
  643. shape=d.shape, version=(1, 0))
  644. ma = format.open_memmap(tf, mode='w+', dtype=d.dtype,
  645. shape=d.shape, version=(2, 0))
  646. ma[...] = d
  647. del ma
  648. with warnings.catch_warnings(record=True) as w:
  649. warnings.filterwarnings('always', '', UserWarning)
  650. ma = format.open_memmap(tf, mode='w+', dtype=d.dtype,
  651. shape=d.shape, version=None)
  652. assert_(w[0].category is UserWarning)
  653. ma[...] = d
  654. del ma
  655. ma = format.open_memmap(tf, mode='r')
  656. assert_array_equal(ma, d)
  657. def test_write_version():
  658. f = BytesIO()
  659. arr = np.arange(1)
  660. # These should pass.
  661. format.write_array(f, arr, version=(1, 0))
  662. format.write_array(f, arr)
  663. format.write_array(f, arr, version=None)
  664. format.write_array(f, arr)
  665. format.write_array(f, arr, version=(2, 0))
  666. format.write_array(f, arr)
  667. # These should all fail.
  668. bad_versions = [
  669. (1, 1),
  670. (0, 0),
  671. (0, 1),
  672. (2, 2),
  673. (255, 255),
  674. ]
  675. for version in bad_versions:
  676. with assert_raises_regex(ValueError,
  677. 'we only support format version.*'):
  678. format.write_array(f, arr, version=version)
  679. bad_version_magic = [
  680. b'\x93NUMPY\x01\x01',
  681. b'\x93NUMPY\x00\x00',
  682. b'\x93NUMPY\x00\x01',
  683. b'\x93NUMPY\x02\x00',
  684. b'\x93NUMPY\x02\x02',
  685. b'\x93NUMPY\xff\xff',
  686. ]
  687. malformed_magic = [
  688. b'\x92NUMPY\x01\x00',
  689. b'\x00NUMPY\x01\x00',
  690. b'\x93numpy\x01\x00',
  691. b'\x93MATLB\x01\x00',
  692. b'\x93NUMPY\x01',
  693. b'\x93NUMPY',
  694. b'',
  695. ]
  696. def test_read_magic():
  697. s1 = BytesIO()
  698. s2 = BytesIO()
  699. arr = np.ones((3, 6), dtype=float)
  700. format.write_array(s1, arr, version=(1, 0))
  701. format.write_array(s2, arr, version=(2, 0))
  702. s1.seek(0)
  703. s2.seek(0)
  704. version1 = format.read_magic(s1)
  705. version2 = format.read_magic(s2)
  706. assert_(version1 == (1, 0))
  707. assert_(version2 == (2, 0))
  708. assert_(s1.tell() == format.MAGIC_LEN)
  709. assert_(s2.tell() == format.MAGIC_LEN)
  710. def test_read_magic_bad_magic():
  711. for magic in malformed_magic:
  712. f = BytesIO(magic)
  713. assert_raises(ValueError, format.read_array, f)
  714. def test_read_version_1_0_bad_magic():
  715. for magic in bad_version_magic + malformed_magic:
  716. f = BytesIO(magic)
  717. assert_raises(ValueError, format.read_array, f)
  718. def test_bad_magic_args():
  719. assert_raises(ValueError, format.magic, -1, 1)
  720. assert_raises(ValueError, format.magic, 256, 1)
  721. assert_raises(ValueError, format.magic, 1, -1)
  722. assert_raises(ValueError, format.magic, 1, 256)
  723. def test_large_header():
  724. s = BytesIO()
  725. d = {'a': 1, 'b': 2}
  726. format.write_array_header_1_0(s, d)
  727. s = BytesIO()
  728. d = {'a': 1, 'b': 2, 'c': 'x'*256*256}
  729. assert_raises(ValueError, format.write_array_header_1_0, s, d)
  730. def test_read_array_header_1_0():
  731. s = BytesIO()
  732. arr = np.ones((3, 6), dtype=float)
  733. format.write_array(s, arr, version=(1, 0))
  734. s.seek(format.MAGIC_LEN)
  735. shape, fortran, dtype = format.read_array_header_1_0(s)
  736. assert_(s.tell() % format.ARRAY_ALIGN == 0)
  737. assert_((shape, fortran, dtype) == ((3, 6), False, float))
  738. def test_read_array_header_2_0():
  739. s = BytesIO()
  740. arr = np.ones((3, 6), dtype=float)
  741. format.write_array(s, arr, version=(2, 0))
  742. s.seek(format.MAGIC_LEN)
  743. shape, fortran, dtype = format.read_array_header_2_0(s)
  744. assert_(s.tell() % format.ARRAY_ALIGN == 0)
  745. assert_((shape, fortran, dtype) == ((3, 6), False, float))
  746. def test_bad_header():
  747. # header of length less than 2 should fail
  748. s = BytesIO()
  749. assert_raises(ValueError, format.read_array_header_1_0, s)
  750. s = BytesIO(b'1')
  751. assert_raises(ValueError, format.read_array_header_1_0, s)
  752. # header shorter than indicated size should fail
  753. s = BytesIO(b'\x01\x00')
  754. assert_raises(ValueError, format.read_array_header_1_0, s)
  755. # headers without the exact keys required should fail
  756. d = {"shape": (1, 2),
  757. "descr": "x"}
  758. s = BytesIO()
  759. format.write_array_header_1_0(s, d)
  760. assert_raises(ValueError, format.read_array_header_1_0, s)
  761. d = {"shape": (1, 2),
  762. "fortran_order": False,
  763. "descr": "x",
  764. "extrakey": -1}
  765. s = BytesIO()
  766. format.write_array_header_1_0(s, d)
  767. assert_raises(ValueError, format.read_array_header_1_0, s)
  768. def test_large_file_support():
  769. if (sys.platform == 'win32' or sys.platform == 'cygwin'):
  770. pytest.skip("Unknown if Windows has sparse filesystems")
  771. # try creating a large sparse file
  772. tf_name = os.path.join(tempdir, 'sparse_file')
  773. try:
  774. # seek past end would work too, but linux truncate somewhat
  775. # increases the chances that we have a sparse filesystem and can
  776. # avoid actually writing 5GB
  777. import subprocess as sp
  778. sp.check_call(["truncate", "-s", "5368709120", tf_name])
  779. except Exception:
  780. pytest.skip("Could not create 5GB large file")
  781. # write a small array to the end
  782. with open(tf_name, "wb") as f:
  783. f.seek(5368709120)
  784. d = np.arange(5)
  785. np.save(f, d)
  786. # read it back
  787. with open(tf_name, "rb") as f:
  788. f.seek(5368709120)
  789. r = np.load(f)
  790. assert_array_equal(r, d)
  791. @pytest.mark.skipif(np.dtype(np.intp).itemsize < 8,
  792. reason="test requires 64-bit system")
  793. @pytest.mark.slow
  794. def test_large_archive():
  795. # Regression test for product of saving arrays with dimensions of array
  796. # having a product that doesn't fit in int32. See gh-7598 for details.
  797. try:
  798. a = np.empty((2**30, 2), dtype=np.uint8)
  799. except MemoryError:
  800. pytest.skip("Could not create large file")
  801. fname = os.path.join(tempdir, "large_archive")
  802. with open(fname, "wb") as f:
  803. np.savez(f, arr=a)
  804. with open(fname, "rb") as f:
  805. new_a = np.load(f)["arr"]
  806. assert_(a.shape == new_a.shape)
  807. def test_empty_npz():
  808. # Test for gh-9989
  809. fname = os.path.join(tempdir, "nothing.npz")
  810. np.savez(fname)
  811. np.load(fname)