mio5.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849
  1. ''' Classes for read / write of matlab (TM) 5 files
  2. The matfile specification last found here:
  3. https://www.mathworks.com/access/helpdesk/help/pdf_doc/matlab/matfile_format.pdf
  4. (as of December 5 2008)
  5. '''
  6. from __future__ import division, print_function, absolute_import
  7. '''
  8. =================================
  9. Note on functions and mat files
  10. =================================
  11. The document above does not give any hints as to the storage of matlab
  12. function handles, or anonymous function handles. I had therefore to
  13. guess the format of matlab arrays of ``mxFUNCTION_CLASS`` and
  14. ``mxOPAQUE_CLASS`` by looking at example mat files.
  15. ``mxFUNCTION_CLASS`` stores all types of matlab functions. It seems to
  16. contain a struct matrix with a set pattern of fields. For anonymous
  17. functions, a sub-fields of one of these fields seems to contain the
  18. well-named ``mxOPAQUE_CLASS``. This seems to contain:
  19. * array flags as for any matlab matrix
  20. * 3 int8 strings
  21. * a matrix
  22. It seems that, whenever the mat file contains a ``mxOPAQUE_CLASS``
  23. instance, there is also an un-named matrix (name == '') at the end of
  24. the mat file. I'll call this the ``__function_workspace__`` matrix.
  25. When I saved two anonymous functions in a mat file, or appended another
  26. anonymous function to the mat file, there was still only one
  27. ``__function_workspace__`` un-named matrix at the end, but larger than
  28. that for a mat file with a single anonymous function, suggesting that
  29. the workspaces for the two functions had been merged.
  30. The ``__function_workspace__`` matrix appears to be of double class
  31. (``mxCLASS_DOUBLE``), but stored as uint8, the memory for which is in
  32. the format of a mini .mat file, without the first 124 bytes of the file
  33. header (the description and the subsystem_offset), but with the version
  34. U2 bytes, and the S2 endian test bytes. There follow 4 zero bytes,
  35. presumably for 8 byte padding, and then a series of ``miMATRIX``
  36. entries, as in a standard mat file. The ``miMATRIX`` entries appear to
  37. be series of un-named (name == '') matrices, and may also contain arrays
  38. of this same mini-mat format.
  39. I guess that:
  40. * saving an anonymous function back to a mat file will need the
  41. associated ``__function_workspace__`` matrix saved as well for the
  42. anonymous function to work correctly.
  43. * appending to a mat file that has a ``__function_workspace__`` would
  44. involve first pulling off this workspace, appending, checking whether
  45. there were any more anonymous functions appended, and then somehow
  46. merging the relevant workspaces, and saving at the end of the mat
  47. file.
  48. The mat files I was playing with are in ``tests/data``:
  49. * sqr.mat
  50. * parabola.mat
  51. * some_functions.mat
  52. See ``tests/test_mio.py:test_mio_funcs.py`` for a debugging
  53. script I was working with.
  54. '''
  55. # Small fragments of current code adapted from matfile.py by Heiko
  56. # Henkelmann
  57. import os
  58. import time
  59. import sys
  60. import zlib
  61. from io import BytesIO
  62. import warnings
  63. import numpy as np
  64. from numpy.compat import asbytes, asstr
  65. import scipy.sparse
  66. from scipy._lib.six import string_types
  67. from .byteordercodes import native_code, swapped_code
  68. from .miobase import (MatFileReader, docfiller, matdims, read_dtype,
  69. arr_to_chars, arr_dtype_number, MatWriteError,
  70. MatReadError, MatReadWarning)
  71. # Reader object for matlab 5 format variables
  72. from .mio5_utils import VarReader5
  73. # Constants and helper objects
  74. from .mio5_params import (MatlabObject, MatlabFunction, MDTYPES, NP_TO_MTYPES,
  75. NP_TO_MXTYPES, miCOMPRESSED, miMATRIX, miINT8,
  76. miUTF8, miUINT32, mxCELL_CLASS, mxSTRUCT_CLASS,
  77. mxOBJECT_CLASS, mxCHAR_CLASS, mxSPARSE_CLASS,
  78. mxDOUBLE_CLASS, mclass_info)
  79. from .streams import ZlibInputStream
  80. class MatFile5Reader(MatFileReader):
  81. ''' Reader for Mat 5 mat files
  82. Adds the following attribute to base class
  83. uint16_codec - char codec to use for uint16 char arrays
  84. (defaults to system default codec)
  85. Uses variable reader that has the following stardard interface (see
  86. abstract class in ``miobase``::
  87. __init__(self, file_reader)
  88. read_header(self)
  89. array_from_header(self)
  90. and added interface::
  91. set_stream(self, stream)
  92. read_full_tag(self)
  93. '''
  94. @docfiller
  95. def __init__(self,
  96. mat_stream,
  97. byte_order=None,
  98. mat_dtype=False,
  99. squeeze_me=False,
  100. chars_as_strings=True,
  101. matlab_compatible=False,
  102. struct_as_record=True,
  103. verify_compressed_data_integrity=True,
  104. uint16_codec=None
  105. ):
  106. '''Initializer for matlab 5 file format reader
  107. %(matstream_arg)s
  108. %(load_args)s
  109. %(struct_arg)s
  110. uint16_codec : {None, string}
  111. Set codec to use for uint16 char arrays (e.g. 'utf-8').
  112. Use system default codec if None
  113. '''
  114. super(MatFile5Reader, self).__init__(
  115. mat_stream,
  116. byte_order,
  117. mat_dtype,
  118. squeeze_me,
  119. chars_as_strings,
  120. matlab_compatible,
  121. struct_as_record,
  122. verify_compressed_data_integrity
  123. )
  124. # Set uint16 codec
  125. if not uint16_codec:
  126. uint16_codec = sys.getdefaultencoding()
  127. self.uint16_codec = uint16_codec
  128. # placeholders for readers - see initialize_read method
  129. self._file_reader = None
  130. self._matrix_reader = None
  131. def guess_byte_order(self):
  132. ''' Guess byte order.
  133. Sets stream pointer to 0 '''
  134. self.mat_stream.seek(126)
  135. mi = self.mat_stream.read(2)
  136. self.mat_stream.seek(0)
  137. return mi == b'IM' and '<' or '>'
  138. def read_file_header(self):
  139. ''' Read in mat 5 file header '''
  140. hdict = {}
  141. hdr_dtype = MDTYPES[self.byte_order]['dtypes']['file_header']
  142. hdr = read_dtype(self.mat_stream, hdr_dtype)
  143. hdict['__header__'] = hdr['description'].item().strip(b' \t\n\000')
  144. v_major = hdr['version'] >> 8
  145. v_minor = hdr['version'] & 0xFF
  146. hdict['__version__'] = '%d.%d' % (v_major, v_minor)
  147. return hdict
  148. def initialize_read(self):
  149. ''' Run when beginning read of variables
  150. Sets up readers from parameters in `self`
  151. '''
  152. # reader for top level stream. We need this extra top-level
  153. # reader because we use the matrix_reader object to contain
  154. # compressed matrices (so they have their own stream)
  155. self._file_reader = VarReader5(self)
  156. # reader for matrix streams
  157. self._matrix_reader = VarReader5(self)
  158. def read_var_header(self):
  159. ''' Read header, return header, next position
  160. Header has to define at least .name and .is_global
  161. Parameters
  162. ----------
  163. None
  164. Returns
  165. -------
  166. header : object
  167. object that can be passed to self.read_var_array, and that
  168. has attributes .name and .is_global
  169. next_position : int
  170. position in stream of next variable
  171. '''
  172. mdtype, byte_count = self._file_reader.read_full_tag()
  173. if not byte_count > 0:
  174. raise ValueError("Did not read any bytes")
  175. next_pos = self.mat_stream.tell() + byte_count
  176. if mdtype == miCOMPRESSED:
  177. # Make new stream from compressed data
  178. stream = ZlibInputStream(self.mat_stream, byte_count)
  179. self._matrix_reader.set_stream(stream)
  180. check_stream_limit = self.verify_compressed_data_integrity
  181. mdtype, byte_count = self._matrix_reader.read_full_tag()
  182. else:
  183. check_stream_limit = False
  184. self._matrix_reader.set_stream(self.mat_stream)
  185. if not mdtype == miMATRIX:
  186. raise TypeError('Expecting miMATRIX type here, got %d' % mdtype)
  187. header = self._matrix_reader.read_header(check_stream_limit)
  188. return header, next_pos
  189. def read_var_array(self, header, process=True):
  190. ''' Read array, given `header`
  191. Parameters
  192. ----------
  193. header : header object
  194. object with fields defining variable header
  195. process : {True, False} bool, optional
  196. If True, apply recursive post-processing during loading of
  197. array.
  198. Returns
  199. -------
  200. arr : array
  201. array with post-processing applied or not according to
  202. `process`.
  203. '''
  204. return self._matrix_reader.array_from_header(header, process)
  205. def get_variables(self, variable_names=None):
  206. ''' get variables from stream as dictionary
  207. variable_names - optional list of variable names to get
  208. If variable_names is None, then get all variables in file
  209. '''
  210. if isinstance(variable_names, string_types):
  211. variable_names = [variable_names]
  212. elif variable_names is not None:
  213. variable_names = list(variable_names)
  214. self.mat_stream.seek(0)
  215. # Here we pass all the parameters in self to the reading objects
  216. self.initialize_read()
  217. mdict = self.read_file_header()
  218. mdict['__globals__'] = []
  219. while not self.end_of_stream():
  220. hdr, next_position = self.read_var_header()
  221. name = asstr(hdr.name)
  222. if name in mdict:
  223. warnings.warn('Duplicate variable name "%s" in stream'
  224. ' - replacing previous with new\n'
  225. 'Consider mio5.varmats_from_mat to split '
  226. 'file into single variable files' % name,
  227. MatReadWarning, stacklevel=2)
  228. if name == '':
  229. # can only be a matlab 7 function workspace
  230. name = '__function_workspace__'
  231. # We want to keep this raw because mat_dtype processing
  232. # will break the format (uint8 as mxDOUBLE_CLASS)
  233. process = False
  234. else:
  235. process = True
  236. if variable_names is not None and name not in variable_names:
  237. self.mat_stream.seek(next_position)
  238. continue
  239. try:
  240. res = self.read_var_array(hdr, process)
  241. except MatReadError as err:
  242. warnings.warn(
  243. 'Unreadable variable "%s", because "%s"' %
  244. (name, err),
  245. Warning, stacklevel=2)
  246. res = "Read error: %s" % err
  247. self.mat_stream.seek(next_position)
  248. mdict[name] = res
  249. if hdr.is_global:
  250. mdict['__globals__'].append(name)
  251. if variable_names is not None:
  252. variable_names.remove(name)
  253. if len(variable_names) == 0:
  254. break
  255. return mdict
  256. def list_variables(self):
  257. ''' list variables from stream '''
  258. self.mat_stream.seek(0)
  259. # Here we pass all the parameters in self to the reading objects
  260. self.initialize_read()
  261. self.read_file_header()
  262. vars = []
  263. while not self.end_of_stream():
  264. hdr, next_position = self.read_var_header()
  265. name = asstr(hdr.name)
  266. if name == '':
  267. # can only be a matlab 7 function workspace
  268. name = '__function_workspace__'
  269. shape = self._matrix_reader.shape_from_header(hdr)
  270. if hdr.is_logical:
  271. info = 'logical'
  272. else:
  273. info = mclass_info.get(hdr.mclass, 'unknown')
  274. vars.append((name, shape, info))
  275. self.mat_stream.seek(next_position)
  276. return vars
  277. def varmats_from_mat(file_obj):
  278. """ Pull variables out of mat 5 file as a sequence of mat file objects
  279. This can be useful with a difficult mat file, containing unreadable
  280. variables. This routine pulls the variables out in raw form and puts them,
  281. unread, back into a file stream for saving or reading. Another use is the
  282. pathological case where there is more than one variable of the same name in
  283. the file; this routine returns the duplicates, whereas the standard reader
  284. will overwrite duplicates in the returned dictionary.
  285. The file pointer in `file_obj` will be undefined. File pointers for the
  286. returned file-like objects are set at 0.
  287. Parameters
  288. ----------
  289. file_obj : file-like
  290. file object containing mat file
  291. Returns
  292. -------
  293. named_mats : list
  294. list contains tuples of (name, BytesIO) where BytesIO is a file-like
  295. object containing mat file contents as for a single variable. The
  296. BytesIO contains a string with the original header and a single var. If
  297. ``var_file_obj`` is an individual BytesIO instance, then save as a mat
  298. file with something like ``open('test.mat',
  299. 'wb').write(var_file_obj.read())``
  300. Examples
  301. --------
  302. >>> import scipy.io
  303. BytesIO is from the ``io`` module in python 3, and is ``cStringIO`` for
  304. python < 3.
  305. >>> mat_fileobj = BytesIO()
  306. >>> scipy.io.savemat(mat_fileobj, {'b': np.arange(10), 'a': 'a string'})
  307. >>> varmats = varmats_from_mat(mat_fileobj)
  308. >>> sorted([name for name, str_obj in varmats])
  309. ['a', 'b']
  310. """
  311. rdr = MatFile5Reader(file_obj)
  312. file_obj.seek(0)
  313. # Raw read of top-level file header
  314. hdr_len = MDTYPES[native_code]['dtypes']['file_header'].itemsize
  315. raw_hdr = file_obj.read(hdr_len)
  316. # Initialize variable reading
  317. file_obj.seek(0)
  318. rdr.initialize_read()
  319. mdict = rdr.read_file_header()
  320. next_position = file_obj.tell()
  321. named_mats = []
  322. while not rdr.end_of_stream():
  323. start_position = next_position
  324. hdr, next_position = rdr.read_var_header()
  325. name = asstr(hdr.name)
  326. # Read raw variable string
  327. file_obj.seek(start_position)
  328. byte_count = next_position - start_position
  329. var_str = file_obj.read(byte_count)
  330. # write to stringio object
  331. out_obj = BytesIO()
  332. out_obj.write(raw_hdr)
  333. out_obj.write(var_str)
  334. out_obj.seek(0)
  335. named_mats.append((name, out_obj))
  336. return named_mats
  337. class EmptyStructMarker(object):
  338. """ Class to indicate presence of empty matlab struct on output """
  339. def to_writeable(source):
  340. ''' Convert input object ``source`` to something we can write
  341. Parameters
  342. ----------
  343. source : object
  344. Returns
  345. -------
  346. arr : None or ndarray or EmptyStructMarker
  347. If `source` cannot be converted to something we can write to a matfile,
  348. return None. If `source` is equivalent to an empty dictionary, return
  349. ``EmptyStructMarker``. Otherwise return `source` converted to an
  350. ndarray with contents for writing to matfile.
  351. '''
  352. if isinstance(source, np.ndarray):
  353. return source
  354. if source is None:
  355. return None
  356. # Objects that implement mappings
  357. is_mapping = (hasattr(source, 'keys') and hasattr(source, 'values') and
  358. hasattr(source, 'items'))
  359. # Objects that don't implement mappings, but do have dicts
  360. if isinstance(source, np.generic):
  361. # Numpy scalars are never mappings (pypy issue workaround)
  362. pass
  363. elif not is_mapping and hasattr(source, '__dict__'):
  364. source = dict((key, value) for key, value in source.__dict__.items()
  365. if not key.startswith('_'))
  366. is_mapping = True
  367. if is_mapping:
  368. dtype = []
  369. values = []
  370. for field, value in source.items():
  371. if (isinstance(field, string_types) and
  372. field[0] not in '_0123456789'):
  373. dtype.append((str(field), object))
  374. values.append(value)
  375. if dtype:
  376. return np.array([tuple(values)], dtype)
  377. else:
  378. return EmptyStructMarker
  379. # Next try and convert to an array
  380. narr = np.asanyarray(source)
  381. if narr.dtype.type in (object, np.object_) and \
  382. narr.shape == () and narr == source:
  383. # No interesting conversion possible
  384. return None
  385. return narr
  386. # Native byte ordered dtypes for convenience for writers
  387. NDT_FILE_HDR = MDTYPES[native_code]['dtypes']['file_header']
  388. NDT_TAG_FULL = MDTYPES[native_code]['dtypes']['tag_full']
  389. NDT_TAG_SMALL = MDTYPES[native_code]['dtypes']['tag_smalldata']
  390. NDT_ARRAY_FLAGS = MDTYPES[native_code]['dtypes']['array_flags']
  391. class VarWriter5(object):
  392. ''' Generic matlab matrix writing class '''
  393. mat_tag = np.zeros((), NDT_TAG_FULL)
  394. mat_tag['mdtype'] = miMATRIX
  395. def __init__(self, file_writer):
  396. self.file_stream = file_writer.file_stream
  397. self.unicode_strings = file_writer.unicode_strings
  398. self.long_field_names = file_writer.long_field_names
  399. self.oned_as = file_writer.oned_as
  400. # These are used for top level writes, and unset after
  401. self._var_name = None
  402. self._var_is_global = False
  403. def write_bytes(self, arr):
  404. self.file_stream.write(arr.tostring(order='F'))
  405. def write_string(self, s):
  406. self.file_stream.write(s)
  407. def write_element(self, arr, mdtype=None):
  408. ''' write tag and data '''
  409. if mdtype is None:
  410. mdtype = NP_TO_MTYPES[arr.dtype.str[1:]]
  411. # Array needs to be in native byte order
  412. if arr.dtype.byteorder == swapped_code:
  413. arr = arr.byteswap().newbyteorder()
  414. byte_count = arr.size*arr.itemsize
  415. if byte_count <= 4:
  416. self.write_smalldata_element(arr, mdtype, byte_count)
  417. else:
  418. self.write_regular_element(arr, mdtype, byte_count)
  419. def write_smalldata_element(self, arr, mdtype, byte_count):
  420. # write tag with embedded data
  421. tag = np.zeros((), NDT_TAG_SMALL)
  422. tag['byte_count_mdtype'] = (byte_count << 16) + mdtype
  423. # if arr.tostring is < 4, the element will be zero-padded as needed.
  424. tag['data'] = arr.tostring(order='F')
  425. self.write_bytes(tag)
  426. def write_regular_element(self, arr, mdtype, byte_count):
  427. # write tag, data
  428. tag = np.zeros((), NDT_TAG_FULL)
  429. tag['mdtype'] = mdtype
  430. tag['byte_count'] = byte_count
  431. self.write_bytes(tag)
  432. self.write_bytes(arr)
  433. # pad to next 64-bit boundary
  434. bc_mod_8 = byte_count % 8
  435. if bc_mod_8:
  436. self.file_stream.write(b'\x00' * (8-bc_mod_8))
  437. def write_header(self,
  438. shape,
  439. mclass,
  440. is_complex=False,
  441. is_logical=False,
  442. nzmax=0):
  443. ''' Write header for given data options
  444. shape : sequence
  445. array shape
  446. mclass - mat5 matrix class
  447. is_complex - True if matrix is complex
  448. is_logical - True if matrix is logical
  449. nzmax - max non zero elements for sparse arrays
  450. We get the name and the global flag from the object, and reset
  451. them to defaults after we've used them
  452. '''
  453. # get name and is_global from one-shot object store
  454. name = self._var_name
  455. is_global = self._var_is_global
  456. # initialize the top-level matrix tag, store position
  457. self._mat_tag_pos = self.file_stream.tell()
  458. self.write_bytes(self.mat_tag)
  459. # write array flags (complex, global, logical, class, nzmax)
  460. af = np.zeros((), NDT_ARRAY_FLAGS)
  461. af['data_type'] = miUINT32
  462. af['byte_count'] = 8
  463. flags = is_complex << 3 | is_global << 2 | is_logical << 1
  464. af['flags_class'] = mclass | flags << 8
  465. af['nzmax'] = nzmax
  466. self.write_bytes(af)
  467. # shape
  468. self.write_element(np.array(shape, dtype='i4'))
  469. # write name
  470. name = np.asarray(name)
  471. if name == '': # empty string zero-terminated
  472. self.write_smalldata_element(name, miINT8, 0)
  473. else:
  474. self.write_element(name, miINT8)
  475. # reset the one-shot store to defaults
  476. self._var_name = ''
  477. self._var_is_global = False
  478. def update_matrix_tag(self, start_pos):
  479. curr_pos = self.file_stream.tell()
  480. self.file_stream.seek(start_pos)
  481. byte_count = curr_pos - start_pos - 8
  482. if byte_count >= 2**32:
  483. raise MatWriteError("Matrix too large to save with Matlab "
  484. "5 format")
  485. self.mat_tag['byte_count'] = byte_count
  486. self.write_bytes(self.mat_tag)
  487. self.file_stream.seek(curr_pos)
  488. def write_top(self, arr, name, is_global):
  489. """ Write variable at top level of mat file
  490. Parameters
  491. ----------
  492. arr : array_like
  493. array-like object to create writer for
  494. name : str, optional
  495. name as it will appear in matlab workspace
  496. default is empty string
  497. is_global : {False, True}, optional
  498. whether variable will be global on load into matlab
  499. """
  500. # these are set before the top-level header write, and unset at
  501. # the end of the same write, because they do not apply for lower levels
  502. self._var_is_global = is_global
  503. self._var_name = name
  504. # write the header and data
  505. self.write(arr)
  506. def write(self, arr):
  507. ''' Write `arr` to stream at top and sub levels
  508. Parameters
  509. ----------
  510. arr : array_like
  511. array-like object to create writer for
  512. '''
  513. # store position, so we can update the matrix tag
  514. mat_tag_pos = self.file_stream.tell()
  515. # First check if these are sparse
  516. if scipy.sparse.issparse(arr):
  517. self.write_sparse(arr)
  518. self.update_matrix_tag(mat_tag_pos)
  519. return
  520. # Try to convert things that aren't arrays
  521. narr = to_writeable(arr)
  522. if narr is None:
  523. raise TypeError('Could not convert %s (type %s) to array'
  524. % (arr, type(arr)))
  525. if isinstance(narr, MatlabObject):
  526. self.write_object(narr)
  527. elif isinstance(narr, MatlabFunction):
  528. raise MatWriteError('Cannot write matlab functions')
  529. elif narr is EmptyStructMarker: # empty struct array
  530. self.write_empty_struct()
  531. elif narr.dtype.fields: # struct array
  532. self.write_struct(narr)
  533. elif narr.dtype.hasobject: # cell array
  534. self.write_cells(narr)
  535. elif narr.dtype.kind in ('U', 'S'):
  536. if self.unicode_strings:
  537. codec = 'UTF8'
  538. else:
  539. codec = 'ascii'
  540. self.write_char(narr, codec)
  541. else:
  542. self.write_numeric(narr)
  543. self.update_matrix_tag(mat_tag_pos)
  544. def write_numeric(self, arr):
  545. imagf = arr.dtype.kind == 'c'
  546. logif = arr.dtype.kind == 'b'
  547. try:
  548. mclass = NP_TO_MXTYPES[arr.dtype.str[1:]]
  549. except KeyError:
  550. # No matching matlab type, probably complex256 / float128 / float96
  551. # Cast data to complex128 / float64.
  552. if imagf:
  553. arr = arr.astype('c128')
  554. elif logif:
  555. arr = arr.astype('i1') # Should only contain 0/1
  556. else:
  557. arr = arr.astype('f8')
  558. mclass = mxDOUBLE_CLASS
  559. self.write_header(matdims(arr, self.oned_as),
  560. mclass,
  561. is_complex=imagf,
  562. is_logical=logif)
  563. if imagf:
  564. self.write_element(arr.real)
  565. self.write_element(arr.imag)
  566. else:
  567. self.write_element(arr)
  568. def write_char(self, arr, codec='ascii'):
  569. ''' Write string array `arr` with given `codec`
  570. '''
  571. if arr.size == 0 or np.all(arr == ''):
  572. # This an empty string array or a string array containing
  573. # only empty strings. Matlab cannot distinguish between a
  574. # string array that is empty, and a string array containing
  575. # only empty strings, because it stores strings as arrays of
  576. # char. There is no way of having an array of char that is
  577. # not empty, but contains an empty string. We have to
  578. # special-case the array-with-empty-strings because even
  579. # empty strings have zero padding, which would otherwise
  580. # appear in matlab as a string with a space.
  581. shape = (0,) * np.max([arr.ndim, 2])
  582. self.write_header(shape, mxCHAR_CLASS)
  583. self.write_smalldata_element(arr, miUTF8, 0)
  584. return
  585. # non-empty string.
  586. #
  587. # Convert to char array
  588. arr = arr_to_chars(arr)
  589. # We have to write the shape directly, because we are going
  590. # recode the characters, and the resulting stream of chars
  591. # may have a different length
  592. shape = arr.shape
  593. self.write_header(shape, mxCHAR_CLASS)
  594. if arr.dtype.kind == 'U' and arr.size:
  595. # Make one long string from all the characters. We need to
  596. # transpose here, because we're flattening the array, before
  597. # we write the bytes. The bytes have to be written in
  598. # Fortran order.
  599. n_chars = np.product(shape)
  600. st_arr = np.ndarray(shape=(),
  601. dtype=arr_dtype_number(arr, n_chars),
  602. buffer=arr.T.copy()) # Fortran order
  603. # Recode with codec to give byte string
  604. st = st_arr.item().encode(codec)
  605. # Reconstruct as one-dimensional byte array
  606. arr = np.ndarray(shape=(len(st),),
  607. dtype='S1',
  608. buffer=st)
  609. self.write_element(arr, mdtype=miUTF8)
  610. def write_sparse(self, arr):
  611. ''' Sparse matrices are 2D
  612. '''
  613. A = arr.tocsc() # convert to sparse CSC format
  614. A.sort_indices() # MATLAB expects sorted row indices
  615. is_complex = (A.dtype.kind == 'c')
  616. is_logical = (A.dtype.kind == 'b')
  617. nz = A.nnz
  618. self.write_header(matdims(arr, self.oned_as),
  619. mxSPARSE_CLASS,
  620. is_complex=is_complex,
  621. is_logical=is_logical,
  622. # matlab won't load file with 0 nzmax
  623. nzmax=1 if nz == 0 else nz)
  624. self.write_element(A.indices.astype('i4'))
  625. self.write_element(A.indptr.astype('i4'))
  626. self.write_element(A.data.real)
  627. if is_complex:
  628. self.write_element(A.data.imag)
  629. def write_cells(self, arr):
  630. self.write_header(matdims(arr, self.oned_as),
  631. mxCELL_CLASS)
  632. # loop over data, column major
  633. A = np.atleast_2d(arr).flatten('F')
  634. for el in A:
  635. self.write(el)
  636. def write_empty_struct(self):
  637. self.write_header((1, 1), mxSTRUCT_CLASS)
  638. # max field name length set to 1 in an example matlab struct
  639. self.write_element(np.array(1, dtype=np.int32))
  640. # Field names element is empty
  641. self.write_element(np.array([], dtype=np.int8))
  642. def write_struct(self, arr):
  643. self.write_header(matdims(arr, self.oned_as),
  644. mxSTRUCT_CLASS)
  645. self._write_items(arr)
  646. def _write_items(self, arr):
  647. # write fieldnames
  648. fieldnames = [f[0] for f in arr.dtype.descr]
  649. length = max([len(fieldname) for fieldname in fieldnames])+1
  650. max_length = (self.long_field_names and 64) or 32
  651. if length > max_length:
  652. raise ValueError("Field names are restricted to %d characters" %
  653. (max_length-1))
  654. self.write_element(np.array([length], dtype='i4'))
  655. self.write_element(
  656. np.array(fieldnames, dtype='S%d' % (length)),
  657. mdtype=miINT8)
  658. A = np.atleast_2d(arr).flatten('F')
  659. for el in A:
  660. for f in fieldnames:
  661. self.write(el[f])
  662. def write_object(self, arr):
  663. '''Same as writing structs, except different mx class, and extra
  664. classname element after header
  665. '''
  666. self.write_header(matdims(arr, self.oned_as),
  667. mxOBJECT_CLASS)
  668. self.write_element(np.array(arr.classname, dtype='S'),
  669. mdtype=miINT8)
  670. self._write_items(arr)
  671. class MatFile5Writer(object):
  672. ''' Class for writing mat5 files '''
  673. @docfiller
  674. def __init__(self, file_stream,
  675. do_compression=False,
  676. unicode_strings=False,
  677. global_vars=None,
  678. long_field_names=False,
  679. oned_as='row'):
  680. ''' Initialize writer for matlab 5 format files
  681. Parameters
  682. ----------
  683. %(do_compression)s
  684. %(unicode_strings)s
  685. global_vars : None or sequence of strings, optional
  686. Names of variables to be marked as global for matlab
  687. %(long_fields)s
  688. %(oned_as)s
  689. '''
  690. self.file_stream = file_stream
  691. self.do_compression = do_compression
  692. self.unicode_strings = unicode_strings
  693. if global_vars:
  694. self.global_vars = global_vars
  695. else:
  696. self.global_vars = []
  697. self.long_field_names = long_field_names
  698. self.oned_as = oned_as
  699. self._matrix_writer = None
  700. def write_file_header(self):
  701. # write header
  702. hdr = np.zeros((), NDT_FILE_HDR)
  703. hdr['description'] = 'MATLAB 5.0 MAT-file Platform: %s, Created on: %s' \
  704. % (os.name,time.asctime())
  705. hdr['version'] = 0x0100
  706. hdr['endian_test'] = np.ndarray(shape=(),
  707. dtype='S2',
  708. buffer=np.uint16(0x4d49))
  709. self.file_stream.write(hdr.tostring())
  710. def put_variables(self, mdict, write_header=None):
  711. ''' Write variables in `mdict` to stream
  712. Parameters
  713. ----------
  714. mdict : mapping
  715. mapping with method ``items`` returns name, contents pairs where
  716. ``name`` which will appear in the matlab workspace in file load, and
  717. ``contents`` is something writeable to a matlab file, such as a numpy
  718. array.
  719. write_header : {None, True, False}, optional
  720. If True, then write the matlab file header before writing the
  721. variables. If None (the default) then write the file header
  722. if we are at position 0 in the stream. By setting False
  723. here, and setting the stream position to the end of the file,
  724. you can append variables to a matlab file
  725. '''
  726. # write header if requested, or None and start of file
  727. if write_header is None:
  728. write_header = self.file_stream.tell() == 0
  729. if write_header:
  730. self.write_file_header()
  731. self._matrix_writer = VarWriter5(self)
  732. for name, var in mdict.items():
  733. if name[0] == '_':
  734. continue
  735. is_global = name in self.global_vars
  736. if self.do_compression:
  737. stream = BytesIO()
  738. self._matrix_writer.file_stream = stream
  739. self._matrix_writer.write_top(var, asbytes(name), is_global)
  740. out_str = zlib.compress(stream.getvalue())
  741. tag = np.empty((), NDT_TAG_FULL)
  742. tag['mdtype'] = miCOMPRESSED
  743. tag['byte_count'] = len(out_str)
  744. self.file_stream.write(tag.tostring())
  745. self.file_stream.write(out_str)
  746. else: # not compressing
  747. self._matrix_writer.write_top(var, asbytes(name), is_global)