mio4.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618
  1. ''' Classes for read / write of matlab (TM) 4 files
  2. '''
  3. from __future__ import division, print_function, absolute_import
  4. import sys
  5. import warnings
  6. import numpy as np
  7. from numpy.compat import asbytes, asstr
  8. import scipy.sparse
  9. from scipy._lib.six import string_types
  10. from .miobase import (MatFileReader, docfiller, matdims, read_dtype,
  11. convert_dtypes, arr_to_chars, arr_dtype_number)
  12. from .mio_utils import squeeze_element, chars_to_strings
  13. from functools import reduce
  14. SYS_LITTLE_ENDIAN = sys.byteorder == 'little'
  15. miDOUBLE = 0
  16. miSINGLE = 1
  17. miINT32 = 2
  18. miINT16 = 3
  19. miUINT16 = 4
  20. miUINT8 = 5
  21. mdtypes_template = {
  22. miDOUBLE: 'f8',
  23. miSINGLE: 'f4',
  24. miINT32: 'i4',
  25. miINT16: 'i2',
  26. miUINT16: 'u2',
  27. miUINT8: 'u1',
  28. 'header': [('mopt', 'i4'),
  29. ('mrows', 'i4'),
  30. ('ncols', 'i4'),
  31. ('imagf', 'i4'),
  32. ('namlen', 'i4')],
  33. 'U1': 'U1',
  34. }
  35. np_to_mtypes = {
  36. 'f8': miDOUBLE,
  37. 'c32': miDOUBLE,
  38. 'c24': miDOUBLE,
  39. 'c16': miDOUBLE,
  40. 'f4': miSINGLE,
  41. 'c8': miSINGLE,
  42. 'i4': miINT32,
  43. 'i2': miINT16,
  44. 'u2': miUINT16,
  45. 'u1': miUINT8,
  46. 'S1': miUINT8,
  47. }
  48. # matrix classes
  49. mxFULL_CLASS = 0
  50. mxCHAR_CLASS = 1
  51. mxSPARSE_CLASS = 2
  52. order_codes = {
  53. 0: '<',
  54. 1: '>',
  55. 2: 'VAX D-float', # !
  56. 3: 'VAX G-float',
  57. 4: 'Cray', # !!
  58. }
  59. mclass_info = {
  60. mxFULL_CLASS: 'double',
  61. mxCHAR_CLASS: 'char',
  62. mxSPARSE_CLASS: 'sparse',
  63. }
  64. class VarHeader4(object):
  65. # Mat4 variables never logical or global
  66. is_logical = False
  67. is_global = False
  68. def __init__(self,
  69. name,
  70. dtype,
  71. mclass,
  72. dims,
  73. is_complex):
  74. self.name = name
  75. self.dtype = dtype
  76. self.mclass = mclass
  77. self.dims = dims
  78. self.is_complex = is_complex
  79. class VarReader4(object):
  80. ''' Class to read matlab 4 variables '''
  81. def __init__(self, file_reader):
  82. self.file_reader = file_reader
  83. self.mat_stream = file_reader.mat_stream
  84. self.dtypes = file_reader.dtypes
  85. self.chars_as_strings = file_reader.chars_as_strings
  86. self.squeeze_me = file_reader.squeeze_me
  87. def read_header(self):
  88. ''' Read and return header for variable '''
  89. data = read_dtype(self.mat_stream, self.dtypes['header'])
  90. name = self.mat_stream.read(int(data['namlen'])).strip(b'\x00')
  91. if data['mopt'] < 0 or data['mopt'] > 5000:
  92. raise ValueError('Mat 4 mopt wrong format, byteswapping problem?')
  93. M, rest = divmod(data['mopt'], 1000) # order code
  94. if M not in (0, 1):
  95. warnings.warn("We do not support byte ordering '%s'; returned "
  96. "data may be corrupt" % order_codes[M],
  97. UserWarning)
  98. O, rest = divmod(rest, 100) # unused, should be 0
  99. if O != 0:
  100. raise ValueError('O in MOPT integer should be 0, wrong format?')
  101. P, rest = divmod(rest, 10) # data type code e.g miDOUBLE (see above)
  102. T = rest # matrix type code e.g. mxFULL_CLASS (see above)
  103. dims = (data['mrows'], data['ncols'])
  104. is_complex = data['imagf'] == 1
  105. dtype = self.dtypes[P]
  106. return VarHeader4(
  107. name,
  108. dtype,
  109. T,
  110. dims,
  111. is_complex)
  112. def array_from_header(self, hdr, process=True):
  113. mclass = hdr.mclass
  114. if mclass == mxFULL_CLASS:
  115. arr = self.read_full_array(hdr)
  116. elif mclass == mxCHAR_CLASS:
  117. arr = self.read_char_array(hdr)
  118. if process and self.chars_as_strings:
  119. arr = chars_to_strings(arr)
  120. elif mclass == mxSPARSE_CLASS:
  121. # no current processing (below) makes sense for sparse
  122. return self.read_sparse_array(hdr)
  123. else:
  124. raise TypeError('No reader for class code %s' % mclass)
  125. if process and self.squeeze_me:
  126. return squeeze_element(arr)
  127. return arr
  128. def read_sub_array(self, hdr, copy=True):
  129. ''' Mat4 read using header `hdr` dtype and dims
  130. Parameters
  131. ----------
  132. hdr : object
  133. object with attributes ``dtype``, ``dims``. dtype is assumed to be
  134. the correct endianness
  135. copy : bool, optional
  136. copies array before return if True (default True)
  137. (buffer is usually read only)
  138. Returns
  139. -------
  140. arr : ndarray
  141. of dtype givem by `hdr` ``dtype`` and shape givem by `hdr` ``dims``
  142. '''
  143. dt = hdr.dtype
  144. dims = hdr.dims
  145. num_bytes = dt.itemsize
  146. for d in dims:
  147. num_bytes *= d
  148. buffer = self.mat_stream.read(int(num_bytes))
  149. if len(buffer) != num_bytes:
  150. raise ValueError("Not enough bytes to read matrix '%s'; is this "
  151. "a badly-formed file? Consider listing matrices "
  152. "with `whosmat` and loading named matrices with "
  153. "`variable_names` kwarg to `loadmat`" % hdr.name)
  154. arr = np.ndarray(shape=dims,
  155. dtype=dt,
  156. buffer=buffer,
  157. order='F')
  158. if copy:
  159. arr = arr.copy()
  160. return arr
  161. def read_full_array(self, hdr):
  162. ''' Full (rather than sparse) matrix getter
  163. Read matrix (array) can be real or complex
  164. Parameters
  165. ----------
  166. hdr : ``VarHeader4`` instance
  167. Returns
  168. -------
  169. arr : ndarray
  170. complex array if ``hdr.is_complex`` is True, otherwise a real
  171. numeric array
  172. '''
  173. if hdr.is_complex:
  174. # avoid array copy to save memory
  175. res = self.read_sub_array(hdr, copy=False)
  176. res_j = self.read_sub_array(hdr, copy=False)
  177. return res + (res_j * 1j)
  178. return self.read_sub_array(hdr)
  179. def read_char_array(self, hdr):
  180. ''' latin-1 text matrix (char matrix) reader
  181. Parameters
  182. ----------
  183. hdr : ``VarHeader4`` instance
  184. Returns
  185. -------
  186. arr : ndarray
  187. with dtype 'U1', shape given by `hdr` ``dims``
  188. '''
  189. arr = self.read_sub_array(hdr).astype(np.uint8)
  190. S = arr.tostring().decode('latin-1')
  191. return np.ndarray(shape=hdr.dims,
  192. dtype=np.dtype('U1'),
  193. buffer=np.array(S)).copy()
  194. def read_sparse_array(self, hdr):
  195. ''' Read and return sparse matrix type
  196. Parameters
  197. ----------
  198. hdr : ``VarHeader4`` instance
  199. Returns
  200. -------
  201. arr : ``scipy.sparse.coo_matrix``
  202. with dtype ``float`` and shape read from the sparse matrix data
  203. Notes
  204. -----
  205. MATLAB 4 real sparse arrays are saved in a N+1 by 3 array format, where
  206. N is the number of non-zero values. Column 1 values [0:N] are the
  207. (1-based) row indices of the each non-zero value, column 2 [0:N] are the
  208. column indices, column 3 [0:N] are the (real) values. The last values
  209. [-1,0:2] of the rows, column indices are shape[0] and shape[1]
  210. respectively of the output matrix. The last value for the values column
  211. is a padding 0. mrows and ncols values from the header give the shape of
  212. the stored matrix, here [N+1, 3]. Complex data is saved as a 4 column
  213. matrix, where the fourth column contains the imaginary component; the
  214. last value is again 0. Complex sparse data do *not* have the header
  215. ``imagf`` field set to True; the fact that the data are complex is only
  216. detectable because there are 4 storage columns
  217. '''
  218. res = self.read_sub_array(hdr)
  219. tmp = res[:-1,:]
  220. # All numbers are float64 in Matlab, but Scipy sparse expects int shape
  221. dims = (int(res[-1,0]), int(res[-1,1]))
  222. I = np.ascontiguousarray(tmp[:,0],dtype='intc') # fixes byte order also
  223. J = np.ascontiguousarray(tmp[:,1],dtype='intc')
  224. I -= 1 # for 1-based indexing
  225. J -= 1
  226. if res.shape[1] == 3:
  227. V = np.ascontiguousarray(tmp[:,2],dtype='float')
  228. else:
  229. V = np.ascontiguousarray(tmp[:,2],dtype='complex')
  230. V.imag = tmp[:,3]
  231. return scipy.sparse.coo_matrix((V,(I,J)), dims)
  232. def shape_from_header(self, hdr):
  233. '''Read the shape of the array described by the header.
  234. The file position after this call is unspecified.
  235. '''
  236. mclass = hdr.mclass
  237. if mclass == mxFULL_CLASS:
  238. shape = tuple(map(int, hdr.dims))
  239. elif mclass == mxCHAR_CLASS:
  240. shape = tuple(map(int, hdr.dims))
  241. if self.chars_as_strings:
  242. shape = shape[:-1]
  243. elif mclass == mxSPARSE_CLASS:
  244. dt = hdr.dtype
  245. dims = hdr.dims
  246. if not (len(dims) == 2 and dims[0] >= 1 and dims[1] >= 1):
  247. return ()
  248. # Read only the row and column counts
  249. self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1)
  250. rows = np.ndarray(shape=(1,), dtype=dt,
  251. buffer=self.mat_stream.read(dt.itemsize))
  252. self.mat_stream.seek(dt.itemsize * (dims[0] - 1), 1)
  253. cols = np.ndarray(shape=(1,), dtype=dt,
  254. buffer=self.mat_stream.read(dt.itemsize))
  255. shape = (int(rows), int(cols))
  256. else:
  257. raise TypeError('No reader for class code %s' % mclass)
  258. if self.squeeze_me:
  259. shape = tuple([x for x in shape if x != 1])
  260. return shape
  261. class MatFile4Reader(MatFileReader):
  262. ''' Reader for Mat4 files '''
  263. @docfiller
  264. def __init__(self, mat_stream, *args, **kwargs):
  265. ''' Initialize matlab 4 file reader
  266. %(matstream_arg)s
  267. %(load_args)s
  268. '''
  269. super(MatFile4Reader, self).__init__(mat_stream, *args, **kwargs)
  270. self._matrix_reader = None
  271. def guess_byte_order(self):
  272. self.mat_stream.seek(0)
  273. mopt = read_dtype(self.mat_stream, np.dtype('i4'))
  274. self.mat_stream.seek(0)
  275. if mopt == 0:
  276. return '<'
  277. if mopt < 0 or mopt > 5000:
  278. # Number must have been byteswapped
  279. return SYS_LITTLE_ENDIAN and '>' or '<'
  280. # Not byteswapped
  281. return SYS_LITTLE_ENDIAN and '<' or '>'
  282. def initialize_read(self):
  283. ''' Run when beginning read of variables
  284. Sets up readers from parameters in `self`
  285. '''
  286. self.dtypes = convert_dtypes(mdtypes_template, self.byte_order)
  287. self._matrix_reader = VarReader4(self)
  288. def read_var_header(self):
  289. ''' Read and return header, next position
  290. Parameters
  291. ----------
  292. None
  293. Returns
  294. -------
  295. header : object
  296. object that can be passed to self.read_var_array, and that
  297. has attributes ``name`` and ``is_global``
  298. next_position : int
  299. position in stream of next variable
  300. '''
  301. hdr = self._matrix_reader.read_header()
  302. n = reduce(lambda x, y: x*y, hdr.dims, 1) # fast product
  303. remaining_bytes = hdr.dtype.itemsize * n
  304. if hdr.is_complex and not hdr.mclass == mxSPARSE_CLASS:
  305. remaining_bytes *= 2
  306. next_position = self.mat_stream.tell() + remaining_bytes
  307. return hdr, next_position
  308. def read_var_array(self, header, process=True):
  309. ''' Read array, given `header`
  310. Parameters
  311. ----------
  312. header : header object
  313. object with fields defining variable header
  314. process : {True, False}, optional
  315. If True, apply recursive post-processing during loading of array.
  316. Returns
  317. -------
  318. arr : array
  319. array with post-processing applied or not according to
  320. `process`.
  321. '''
  322. return self._matrix_reader.array_from_header(header, process)
  323. def get_variables(self, variable_names=None):
  324. ''' get variables from stream as dictionary
  325. Parameters
  326. ----------
  327. variable_names : None or str or sequence of str, optional
  328. variable name, or sequence of variable names to get from Mat file /
  329. file stream. If None, then get all variables in file
  330. '''
  331. if isinstance(variable_names, string_types):
  332. variable_names = [variable_names]
  333. elif variable_names is not None:
  334. variable_names = list(variable_names)
  335. self.mat_stream.seek(0)
  336. # set up variable reader
  337. self.initialize_read()
  338. mdict = {}
  339. while not self.end_of_stream():
  340. hdr, next_position = self.read_var_header()
  341. name = asstr(hdr.name)
  342. if variable_names is not None and name not in variable_names:
  343. self.mat_stream.seek(next_position)
  344. continue
  345. mdict[name] = self.read_var_array(hdr)
  346. self.mat_stream.seek(next_position)
  347. if variable_names is not None:
  348. variable_names.remove(name)
  349. if len(variable_names) == 0:
  350. break
  351. return mdict
  352. def list_variables(self):
  353. ''' list variables from stream '''
  354. self.mat_stream.seek(0)
  355. # set up variable reader
  356. self.initialize_read()
  357. vars = []
  358. while not self.end_of_stream():
  359. hdr, next_position = self.read_var_header()
  360. name = asstr(hdr.name)
  361. shape = self._matrix_reader.shape_from_header(hdr)
  362. info = mclass_info.get(hdr.mclass, 'unknown')
  363. vars.append((name, shape, info))
  364. self.mat_stream.seek(next_position)
  365. return vars
  366. def arr_to_2d(arr, oned_as='row'):
  367. ''' Make ``arr`` exactly two dimensional
  368. If `arr` has more than 2 dimensions, raise a ValueError
  369. Parameters
  370. ----------
  371. arr : array
  372. oned_as : {'row', 'column'}, optional
  373. Whether to reshape 1D vectors as row vectors or column vectors.
  374. See documentation for ``matdims`` for more detail
  375. Returns
  376. -------
  377. arr2d : array
  378. 2D version of the array
  379. '''
  380. dims = matdims(arr, oned_as)
  381. if len(dims) > 2:
  382. raise ValueError('Matlab 4 files cannot save arrays with more than '
  383. '2 dimensions')
  384. return arr.reshape(dims)
  385. class VarWriter4(object):
  386. def __init__(self, file_writer):
  387. self.file_stream = file_writer.file_stream
  388. self.oned_as = file_writer.oned_as
  389. def write_bytes(self, arr):
  390. self.file_stream.write(arr.tostring(order='F'))
  391. def write_string(self, s):
  392. self.file_stream.write(s)
  393. def write_header(self, name, shape, P=miDOUBLE, T=mxFULL_CLASS, imagf=0):
  394. ''' Write header for given data options
  395. Parameters
  396. ----------
  397. name : str
  398. name of variable
  399. shape : sequence
  400. Shape of array as it will be read in matlab
  401. P : int, optional
  402. code for mat4 data type, one of ``miDOUBLE, miSINGLE, miINT32,
  403. miINT16, miUINT16, miUINT8``
  404. T : int, optional
  405. code for mat4 matrix class, one of ``mxFULL_CLASS, mxCHAR_CLASS,
  406. mxSPARSE_CLASS``
  407. imagf : int, optional
  408. flag indicating complex
  409. '''
  410. header = np.empty((), mdtypes_template['header'])
  411. M = not SYS_LITTLE_ENDIAN
  412. O = 0
  413. header['mopt'] = (M * 1000 +
  414. O * 100 +
  415. P * 10 +
  416. T)
  417. header['mrows'] = shape[0]
  418. header['ncols'] = shape[1]
  419. header['imagf'] = imagf
  420. header['namlen'] = len(name) + 1
  421. self.write_bytes(header)
  422. self.write_string(asbytes(name + '\0'))
  423. def write(self, arr, name):
  424. ''' Write matrix `arr`, with name `name`
  425. Parameters
  426. ----------
  427. arr : array_like
  428. array to write
  429. name : str
  430. name in matlab workspace
  431. '''
  432. # we need to catch sparse first, because np.asarray returns an
  433. # an object array for scipy.sparse
  434. if scipy.sparse.issparse(arr):
  435. self.write_sparse(arr, name)
  436. return
  437. arr = np.asarray(arr)
  438. dt = arr.dtype
  439. if not dt.isnative:
  440. arr = arr.astype(dt.newbyteorder('='))
  441. dtt = dt.type
  442. if dtt is np.object_:
  443. raise TypeError('Cannot save object arrays in Mat4')
  444. elif dtt is np.void:
  445. raise TypeError('Cannot save void type arrays')
  446. elif dtt in (np.unicode_, np.string_):
  447. self.write_char(arr, name)
  448. return
  449. self.write_numeric(arr, name)
  450. def write_numeric(self, arr, name):
  451. arr = arr_to_2d(arr, self.oned_as)
  452. imagf = arr.dtype.kind == 'c'
  453. try:
  454. P = np_to_mtypes[arr.dtype.str[1:]]
  455. except KeyError:
  456. if imagf:
  457. arr = arr.astype('c128')
  458. else:
  459. arr = arr.astype('f8')
  460. P = miDOUBLE
  461. self.write_header(name,
  462. arr.shape,
  463. P=P,
  464. T=mxFULL_CLASS,
  465. imagf=imagf)
  466. if imagf:
  467. self.write_bytes(arr.real)
  468. self.write_bytes(arr.imag)
  469. else:
  470. self.write_bytes(arr)
  471. def write_char(self, arr, name):
  472. arr = arr_to_chars(arr)
  473. arr = arr_to_2d(arr, self.oned_as)
  474. dims = arr.shape
  475. self.write_header(
  476. name,
  477. dims,
  478. P=miUINT8,
  479. T=mxCHAR_CLASS)
  480. if arr.dtype.kind == 'U':
  481. # Recode unicode to latin1
  482. n_chars = np.product(dims)
  483. st_arr = np.ndarray(shape=(),
  484. dtype=arr_dtype_number(arr, n_chars),
  485. buffer=arr)
  486. st = st_arr.item().encode('latin-1')
  487. arr = np.ndarray(shape=dims, dtype='S1', buffer=st)
  488. self.write_bytes(arr)
  489. def write_sparse(self, arr, name):
  490. ''' Sparse matrices are 2D
  491. See docstring for VarReader4.read_sparse_array
  492. '''
  493. A = arr.tocoo() # convert to sparse COO format (ijv)
  494. imagf = A.dtype.kind == 'c'
  495. ijv = np.zeros((A.nnz + 1, 3+imagf), dtype='f8')
  496. ijv[:-1,0] = A.row
  497. ijv[:-1,1] = A.col
  498. ijv[:-1,0:2] += 1 # 1 based indexing
  499. if imagf:
  500. ijv[:-1,2] = A.data.real
  501. ijv[:-1,3] = A.data.imag
  502. else:
  503. ijv[:-1,2] = A.data
  504. ijv[-1,0:2] = A.shape
  505. self.write_header(
  506. name,
  507. ijv.shape,
  508. P=miDOUBLE,
  509. T=mxSPARSE_CLASS)
  510. self.write_bytes(ijv)
  511. class MatFile4Writer(object):
  512. ''' Class for writing matlab 4 format files '''
  513. def __init__(self, file_stream, oned_as=None):
  514. self.file_stream = file_stream
  515. if oned_as is None:
  516. oned_as = 'row'
  517. self.oned_as = oned_as
  518. self._matrix_writer = None
  519. def put_variables(self, mdict, write_header=None):
  520. ''' Write variables in `mdict` to stream
  521. Parameters
  522. ----------
  523. mdict : mapping
  524. mapping with method ``items`` return name, contents pairs
  525. where ``name`` which will appeak in the matlab workspace in
  526. file load, and ``contents`` is something writeable to a
  527. matlab file, such as a numpy array.
  528. write_header : {None, True, False}
  529. If True, then write the matlab file header before writing the
  530. variables. If None (the default) then write the file header
  531. if we are at position 0 in the stream. By setting False
  532. here, and setting the stream position to the end of the file,
  533. you can append variables to a matlab file
  534. '''
  535. # there is no header for a matlab 4 mat file, so we ignore the
  536. # ``write_header`` input argument. It's there for compatibility
  537. # with the matlab 5 version of this method
  538. self._matrix_writer = VarWriter4(self)
  539. for name, var in mdict.items():
  540. self._matrix_writer.write(var, name)