records.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879
  1. """
  2. Record Arrays
  3. =============
  4. Record arrays expose the fields of structured arrays as properties.
  5. Most commonly, ndarrays contain elements of a single type, e.g. floats,
  6. integers, bools etc. However, it is possible for elements to be combinations
  7. of these using structured types, such as::
  8. >>> a = np.array([(1, 2.0), (1, 2.0)], dtype=[('x', int), ('y', float)])
  9. >>> a
  10. array([(1, 2.0), (1, 2.0)],
  11. dtype=[('x', '<i4'), ('y', '<f8')])
  12. Here, each element consists of two fields: x (and int), and y (a float).
  13. This is known as a structured array. The different fields are analogous
  14. to columns in a spread-sheet. The different fields can be accessed as
  15. one would a dictionary::
  16. >>> a['x']
  17. array([1, 1])
  18. >>> a['y']
  19. array([ 2., 2.])
  20. Record arrays allow us to access fields as properties::
  21. >>> ar = np.rec.array(a)
  22. >>> ar.x
  23. array([1, 1])
  24. >>> ar.y
  25. array([ 2., 2.])
  26. """
  27. from __future__ import division, absolute_import, print_function
  28. import sys
  29. import os
  30. import warnings
  31. from . import numeric as sb
  32. from . import numerictypes as nt
  33. from numpy.compat import isfileobj, bytes, long, unicode, os_fspath
  34. from numpy.core.overrides import set_module
  35. from .arrayprint import get_printoptions
  36. # All of the functions allow formats to be a dtype
  37. __all__ = ['record', 'recarray', 'format_parser']
  38. ndarray = sb.ndarray
  39. _byteorderconv = {'b':'>',
  40. 'l':'<',
  41. 'n':'=',
  42. 'B':'>',
  43. 'L':'<',
  44. 'N':'=',
  45. 'S':'s',
  46. 's':'s',
  47. '>':'>',
  48. '<':'<',
  49. '=':'=',
  50. '|':'|',
  51. 'I':'|',
  52. 'i':'|'}
  53. # formats regular expression
  54. # allows multidimension spec with a tuple syntax in front
  55. # of the letter code '(2,3)f4' and ' ( 2 , 3 ) f4 '
  56. # are equally allowed
  57. numfmt = nt.typeDict
  58. def find_duplicate(list):
  59. """Find duplication in a list, return a list of duplicated elements"""
  60. dup = []
  61. for i in range(len(list)):
  62. if (list[i] in list[i + 1:]):
  63. if (list[i] not in dup):
  64. dup.append(list[i])
  65. return dup
  66. @set_module('numpy')
  67. class format_parser(object):
  68. """
  69. Class to convert formats, names, titles description to a dtype.
  70. After constructing the format_parser object, the dtype attribute is
  71. the converted data-type:
  72. ``dtype = format_parser(formats, names, titles).dtype``
  73. Attributes
  74. ----------
  75. dtype : dtype
  76. The converted data-type.
  77. Parameters
  78. ----------
  79. formats : str or list of str
  80. The format description, either specified as a string with
  81. comma-separated format descriptions in the form ``'f8, i4, a5'``, or
  82. a list of format description strings in the form
  83. ``['f8', 'i4', 'a5']``.
  84. names : str or list/tuple of str
  85. The field names, either specified as a comma-separated string in the
  86. form ``'col1, col2, col3'``, or as a list or tuple of strings in the
  87. form ``['col1', 'col2', 'col3']``.
  88. An empty list can be used, in that case default field names
  89. ('f0', 'f1', ...) are used.
  90. titles : sequence
  91. Sequence of title strings. An empty list can be used to leave titles
  92. out.
  93. aligned : bool, optional
  94. If True, align the fields by padding as the C-compiler would.
  95. Default is False.
  96. byteorder : str, optional
  97. If specified, all the fields will be changed to the
  98. provided byte-order. Otherwise, the default byte-order is
  99. used. For all available string specifiers, see `dtype.newbyteorder`.
  100. See Also
  101. --------
  102. dtype, typename, sctype2char
  103. Examples
  104. --------
  105. >>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
  106. ... ['T1', 'T2', 'T3']).dtype
  107. dtype([(('T1', 'col1'), '<f8'), (('T2', 'col2'), '<i4'),
  108. (('T3', 'col3'), '|S5')])
  109. `names` and/or `titles` can be empty lists. If `titles` is an empty list,
  110. titles will simply not appear. If `names` is empty, default field names
  111. will be used.
  112. >>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
  113. ... []).dtype
  114. dtype([('col1', '<f8'), ('col2', '<i4'), ('col3', '|S5')])
  115. >>> np.format_parser(['f8', 'i4', 'a5'], [], []).dtype
  116. dtype([('f0', '<f8'), ('f1', '<i4'), ('f2', '|S5')])
  117. """
  118. def __init__(self, formats, names, titles, aligned=False, byteorder=None):
  119. self._parseFormats(formats, aligned)
  120. self._setfieldnames(names, titles)
  121. self._createdescr(byteorder)
  122. self.dtype = self._descr
  123. def _parseFormats(self, formats, aligned=0):
  124. """ Parse the field formats """
  125. if formats is None:
  126. raise ValueError("Need formats argument")
  127. if isinstance(formats, list):
  128. if len(formats) < 2:
  129. formats.append('')
  130. formats = ','.join(formats)
  131. dtype = sb.dtype(formats, aligned)
  132. fields = dtype.fields
  133. if fields is None:
  134. dtype = sb.dtype([('f1', dtype)], aligned)
  135. fields = dtype.fields
  136. keys = dtype.names
  137. self._f_formats = [fields[key][0] for key in keys]
  138. self._offsets = [fields[key][1] for key in keys]
  139. self._nfields = len(keys)
  140. def _setfieldnames(self, names, titles):
  141. """convert input field names into a list and assign to the _names
  142. attribute """
  143. if (names):
  144. if (type(names) in [list, tuple]):
  145. pass
  146. elif isinstance(names, (str, unicode)):
  147. names = names.split(',')
  148. else:
  149. raise NameError("illegal input names %s" % repr(names))
  150. self._names = [n.strip() for n in names[:self._nfields]]
  151. else:
  152. self._names = []
  153. # if the names are not specified, they will be assigned as
  154. # "f0, f1, f2,..."
  155. # if not enough names are specified, they will be assigned as "f[n],
  156. # f[n+1],..." etc. where n is the number of specified names..."
  157. self._names += ['f%d' % i for i in range(len(self._names),
  158. self._nfields)]
  159. # check for redundant names
  160. _dup = find_duplicate(self._names)
  161. if _dup:
  162. raise ValueError("Duplicate field names: %s" % _dup)
  163. if (titles):
  164. self._titles = [n.strip() for n in titles[:self._nfields]]
  165. else:
  166. self._titles = []
  167. titles = []
  168. if (self._nfields > len(titles)):
  169. self._titles += [None] * (self._nfields - len(titles))
  170. def _createdescr(self, byteorder):
  171. descr = sb.dtype({'names':self._names,
  172. 'formats':self._f_formats,
  173. 'offsets':self._offsets,
  174. 'titles':self._titles})
  175. if (byteorder is not None):
  176. byteorder = _byteorderconv[byteorder[0]]
  177. descr = descr.newbyteorder(byteorder)
  178. self._descr = descr
  179. class record(nt.void):
  180. """A data-type scalar that allows field access as attribute lookup.
  181. """
  182. # manually set name and module so that this class's type shows up
  183. # as numpy.record when printed
  184. __name__ = 'record'
  185. __module__ = 'numpy'
  186. def __repr__(self):
  187. if get_printoptions()['legacy'] == '1.13':
  188. return self.__str__()
  189. return super(record, self).__repr__()
  190. def __str__(self):
  191. if get_printoptions()['legacy'] == '1.13':
  192. return str(self.item())
  193. return super(record, self).__str__()
  194. def __getattribute__(self, attr):
  195. if attr in ['setfield', 'getfield', 'dtype']:
  196. return nt.void.__getattribute__(self, attr)
  197. try:
  198. return nt.void.__getattribute__(self, attr)
  199. except AttributeError:
  200. pass
  201. fielddict = nt.void.__getattribute__(self, 'dtype').fields
  202. res = fielddict.get(attr, None)
  203. if res:
  204. obj = self.getfield(*res[:2])
  205. # if it has fields return a record,
  206. # otherwise return the object
  207. try:
  208. dt = obj.dtype
  209. except AttributeError:
  210. #happens if field is Object type
  211. return obj
  212. if dt.names is not None:
  213. return obj.view((self.__class__, obj.dtype))
  214. return obj
  215. else:
  216. raise AttributeError("'record' object has no "
  217. "attribute '%s'" % attr)
  218. def __setattr__(self, attr, val):
  219. if attr in ['setfield', 'getfield', 'dtype']:
  220. raise AttributeError("Cannot set '%s' attribute" % attr)
  221. fielddict = nt.void.__getattribute__(self, 'dtype').fields
  222. res = fielddict.get(attr, None)
  223. if res:
  224. return self.setfield(val, *res[:2])
  225. else:
  226. if getattr(self, attr, None):
  227. return nt.void.__setattr__(self, attr, val)
  228. else:
  229. raise AttributeError("'record' object has no "
  230. "attribute '%s'" % attr)
  231. def __getitem__(self, indx):
  232. obj = nt.void.__getitem__(self, indx)
  233. # copy behavior of record.__getattribute__,
  234. if isinstance(obj, nt.void) and obj.dtype.names is not None:
  235. return obj.view((self.__class__, obj.dtype))
  236. else:
  237. # return a single element
  238. return obj
  239. def pprint(self):
  240. """Pretty-print all fields."""
  241. # pretty-print all fields
  242. names = self.dtype.names
  243. maxlen = max(len(name) for name in names)
  244. fmt = '%% %ds: %%s' % maxlen
  245. rows = [fmt % (name, getattr(self, name)) for name in names]
  246. return "\n".join(rows)
  247. # The recarray is almost identical to a standard array (which supports
  248. # named fields already) The biggest difference is that it can use
  249. # attribute-lookup to find the fields and it is constructed using
  250. # a record.
  251. # If byteorder is given it forces a particular byteorder on all
  252. # the fields (and any subfields)
  253. class recarray(ndarray):
  254. """Construct an ndarray that allows field access using attributes.
  255. Arrays may have a data-types containing fields, analogous
  256. to columns in a spread sheet. An example is ``[(x, int), (y, float)]``,
  257. where each entry in the array is a pair of ``(int, float)``. Normally,
  258. these attributes are accessed using dictionary lookups such as ``arr['x']``
  259. and ``arr['y']``. Record arrays allow the fields to be accessed as members
  260. of the array, using ``arr.x`` and ``arr.y``.
  261. Parameters
  262. ----------
  263. shape : tuple
  264. Shape of output array.
  265. dtype : data-type, optional
  266. The desired data-type. By default, the data-type is determined
  267. from `formats`, `names`, `titles`, `aligned` and `byteorder`.
  268. formats : list of data-types, optional
  269. A list containing the data-types for the different columns, e.g.
  270. ``['i4', 'f8', 'i4']``. `formats` does *not* support the new
  271. convention of using types directly, i.e. ``(int, float, int)``.
  272. Note that `formats` must be a list, not a tuple.
  273. Given that `formats` is somewhat limited, we recommend specifying
  274. `dtype` instead.
  275. names : tuple of str, optional
  276. The name of each column, e.g. ``('x', 'y', 'z')``.
  277. buf : buffer, optional
  278. By default, a new array is created of the given shape and data-type.
  279. If `buf` is specified and is an object exposing the buffer interface,
  280. the array will use the memory from the existing buffer. In this case,
  281. the `offset` and `strides` keywords are available.
  282. Other Parameters
  283. ----------------
  284. titles : tuple of str, optional
  285. Aliases for column names. For example, if `names` were
  286. ``('x', 'y', 'z')`` and `titles` is
  287. ``('x_coordinate', 'y_coordinate', 'z_coordinate')``, then
  288. ``arr['x']`` is equivalent to both ``arr.x`` and ``arr.x_coordinate``.
  289. byteorder : {'<', '>', '='}, optional
  290. Byte-order for all fields.
  291. aligned : bool, optional
  292. Align the fields in memory as the C-compiler would.
  293. strides : tuple of ints, optional
  294. Buffer (`buf`) is interpreted according to these strides (strides
  295. define how many bytes each array element, row, column, etc.
  296. occupy in memory).
  297. offset : int, optional
  298. Start reading buffer (`buf`) from this offset onwards.
  299. order : {'C', 'F'}, optional
  300. Row-major (C-style) or column-major (Fortran-style) order.
  301. Returns
  302. -------
  303. rec : recarray
  304. Empty array of the given shape and type.
  305. See Also
  306. --------
  307. rec.fromrecords : Construct a record array from data.
  308. record : fundamental data-type for `recarray`.
  309. format_parser : determine a data-type from formats, names, titles.
  310. Notes
  311. -----
  312. This constructor can be compared to ``empty``: it creates a new record
  313. array but does not fill it with data. To create a record array from data,
  314. use one of the following methods:
  315. 1. Create a standard ndarray and convert it to a record array,
  316. using ``arr.view(np.recarray)``
  317. 2. Use the `buf` keyword.
  318. 3. Use `np.rec.fromrecords`.
  319. Examples
  320. --------
  321. Create an array with two fields, ``x`` and ``y``:
  322. >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', float), ('y', int)])
  323. >>> x
  324. array([(1.0, 2), (3.0, 4)],
  325. dtype=[('x', '<f8'), ('y', '<i4')])
  326. >>> x['x']
  327. array([ 1., 3.])
  328. View the array as a record array:
  329. >>> x = x.view(np.recarray)
  330. >>> x.x
  331. array([ 1., 3.])
  332. >>> x.y
  333. array([2, 4])
  334. Create a new, empty record array:
  335. >>> np.recarray((2,),
  336. ... dtype=[('x', int), ('y', float), ('z', int)]) #doctest: +SKIP
  337. rec.array([(-1073741821, 1.2249118382103472e-301, 24547520),
  338. (3471280, 1.2134086255804012e-316, 0)],
  339. dtype=[('x', '<i4'), ('y', '<f8'), ('z', '<i4')])
  340. """
  341. # manually set name and module so that this class's type shows
  342. # up as "numpy.recarray" when printed
  343. __name__ = 'recarray'
  344. __module__ = 'numpy'
  345. def __new__(subtype, shape, dtype=None, buf=None, offset=0, strides=None,
  346. formats=None, names=None, titles=None,
  347. byteorder=None, aligned=False, order='C'):
  348. if dtype is not None:
  349. descr = sb.dtype(dtype)
  350. else:
  351. descr = format_parser(formats, names, titles, aligned, byteorder)._descr
  352. if buf is None:
  353. self = ndarray.__new__(subtype, shape, (record, descr), order=order)
  354. else:
  355. self = ndarray.__new__(subtype, shape, (record, descr),
  356. buffer=buf, offset=offset,
  357. strides=strides, order=order)
  358. return self
  359. def __array_finalize__(self, obj):
  360. if self.dtype.type is not record and self.dtype.names is not None:
  361. # if self.dtype is not np.record, invoke __setattr__ which will
  362. # convert it to a record if it is a void dtype.
  363. self.dtype = self.dtype
  364. def __getattribute__(self, attr):
  365. # See if ndarray has this attr, and return it if so. (note that this
  366. # means a field with the same name as an ndarray attr cannot be
  367. # accessed by attribute).
  368. try:
  369. return object.__getattribute__(self, attr)
  370. except AttributeError: # attr must be a fieldname
  371. pass
  372. # look for a field with this name
  373. fielddict = ndarray.__getattribute__(self, 'dtype').fields
  374. try:
  375. res = fielddict[attr][:2]
  376. except (TypeError, KeyError):
  377. raise AttributeError("recarray has no attribute %s" % attr)
  378. obj = self.getfield(*res)
  379. # At this point obj will always be a recarray, since (see
  380. # PyArray_GetField) the type of obj is inherited. Next, if obj.dtype is
  381. # non-structured, convert it to an ndarray. Then if obj is structured
  382. # with void type convert it to the same dtype.type (eg to preserve
  383. # numpy.record type if present), since nested structured fields do not
  384. # inherit type. Don't do this for non-void structures though.
  385. if obj.dtype.names is not None:
  386. if issubclass(obj.dtype.type, nt.void):
  387. return obj.view(dtype=(self.dtype.type, obj.dtype))
  388. return obj
  389. else:
  390. return obj.view(ndarray)
  391. # Save the dictionary.
  392. # If the attr is a field name and not in the saved dictionary
  393. # Undo any "setting" of the attribute and do a setfield
  394. # Thus, you can't create attributes on-the-fly that are field names.
  395. def __setattr__(self, attr, val):
  396. # Automatically convert (void) structured types to records
  397. # (but not non-void structures, subarrays, or non-structured voids)
  398. if attr == 'dtype' and issubclass(val.type, nt.void) and val.names is not None:
  399. val = sb.dtype((record, val))
  400. newattr = attr not in self.__dict__
  401. try:
  402. ret = object.__setattr__(self, attr, val)
  403. except Exception:
  404. fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
  405. if attr not in fielddict:
  406. exctype, value = sys.exc_info()[:2]
  407. raise exctype(value)
  408. else:
  409. fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
  410. if attr not in fielddict:
  411. return ret
  412. if newattr:
  413. # We just added this one or this setattr worked on an
  414. # internal attribute.
  415. try:
  416. object.__delattr__(self, attr)
  417. except Exception:
  418. return ret
  419. try:
  420. res = fielddict[attr][:2]
  421. except (TypeError, KeyError):
  422. raise AttributeError("record array has no attribute %s" % attr)
  423. return self.setfield(val, *res)
  424. def __getitem__(self, indx):
  425. obj = super(recarray, self).__getitem__(indx)
  426. # copy behavior of getattr, except that here
  427. # we might also be returning a single element
  428. if isinstance(obj, ndarray):
  429. if obj.dtype.names is not None:
  430. obj = obj.view(type(self))
  431. if issubclass(obj.dtype.type, nt.void):
  432. return obj.view(dtype=(self.dtype.type, obj.dtype))
  433. return obj
  434. else:
  435. return obj.view(type=ndarray)
  436. else:
  437. # return a single element
  438. return obj
  439. def __repr__(self):
  440. repr_dtype = self.dtype
  441. if (self.dtype.type is record
  442. or (not issubclass(self.dtype.type, nt.void))):
  443. # If this is a full record array (has numpy.record dtype),
  444. # or if it has a scalar (non-void) dtype with no records,
  445. # represent it using the rec.array function. Since rec.array
  446. # converts dtype to a numpy.record for us, convert back
  447. # to non-record before printing
  448. if repr_dtype.type is record:
  449. repr_dtype = sb.dtype((nt.void, repr_dtype))
  450. prefix = "rec.array("
  451. fmt = 'rec.array(%s,%sdtype=%s)'
  452. else:
  453. # otherwise represent it using np.array plus a view
  454. # This should only happen if the user is playing
  455. # strange games with dtypes.
  456. prefix = "array("
  457. fmt = 'array(%s,%sdtype=%s).view(numpy.recarray)'
  458. # get data/shape string. logic taken from numeric.array_repr
  459. if self.size > 0 or self.shape == (0,):
  460. lst = sb.array2string(
  461. self, separator=', ', prefix=prefix, suffix=',')
  462. else:
  463. # show zero-length shape unless it is (0,)
  464. lst = "[], shape=%s" % (repr(self.shape),)
  465. lf = '\n'+' '*len(prefix)
  466. if get_printoptions()['legacy'] == '1.13':
  467. lf = ' ' + lf # trailing space
  468. return fmt % (lst, lf, repr_dtype)
  469. def field(self, attr, val=None):
  470. if isinstance(attr, int):
  471. names = ndarray.__getattribute__(self, 'dtype').names
  472. attr = names[attr]
  473. fielddict = ndarray.__getattribute__(self, 'dtype').fields
  474. res = fielddict[attr][:2]
  475. if val is None:
  476. obj = self.getfield(*res)
  477. if obj.dtype.names is not None:
  478. return obj
  479. return obj.view(ndarray)
  480. else:
  481. return self.setfield(val, *res)
  482. def fromarrays(arrayList, dtype=None, shape=None, formats=None,
  483. names=None, titles=None, aligned=False, byteorder=None):
  484. """ create a record array from a (flat) list of arrays
  485. >>> x1=np.array([1,2,3,4])
  486. >>> x2=np.array(['a','dd','xyz','12'])
  487. >>> x3=np.array([1.1,2,3,4])
  488. >>> r = np.core.records.fromarrays([x1,x2,x3],names='a,b,c')
  489. >>> print(r[1])
  490. (2, 'dd', 2.0)
  491. >>> x1[1]=34
  492. >>> r.a
  493. array([1, 2, 3, 4])
  494. """
  495. arrayList = [sb.asarray(x) for x in arrayList]
  496. if shape is None or shape == 0:
  497. shape = arrayList[0].shape
  498. if isinstance(shape, int):
  499. shape = (shape,)
  500. if formats is None and dtype is None:
  501. # go through each object in the list to see if it is an ndarray
  502. # and determine the formats.
  503. formats = []
  504. for obj in arrayList:
  505. if not isinstance(obj, ndarray):
  506. raise ValueError("item in the array list must be an ndarray.")
  507. formats.append(obj.dtype.str)
  508. formats = ','.join(formats)
  509. if dtype is not None:
  510. descr = sb.dtype(dtype)
  511. _names = descr.names
  512. else:
  513. parsed = format_parser(formats, names, titles, aligned, byteorder)
  514. _names = parsed._names
  515. descr = parsed._descr
  516. # Determine shape from data-type.
  517. if len(descr) != len(arrayList):
  518. raise ValueError("mismatch between the number of fields "
  519. "and the number of arrays")
  520. d0 = descr[0].shape
  521. nn = len(d0)
  522. if nn > 0:
  523. shape = shape[:-nn]
  524. for k, obj in enumerate(arrayList):
  525. nn = descr[k].ndim
  526. testshape = obj.shape[:obj.ndim - nn]
  527. if testshape != shape:
  528. raise ValueError("array-shape mismatch in array %d" % k)
  529. _array = recarray(shape, descr)
  530. # populate the record array (makes a copy)
  531. for i in range(len(arrayList)):
  532. _array[_names[i]] = arrayList[i]
  533. return _array
  534. def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
  535. titles=None, aligned=False, byteorder=None):
  536. """ create a recarray from a list of records in text form
  537. The data in the same field can be heterogeneous, they will be promoted
  538. to the highest data type. This method is intended for creating
  539. smaller record arrays. If used to create large array without formats
  540. defined
  541. r=fromrecords([(2,3.,'abc')]*100000)
  542. it can be slow.
  543. If formats is None, then this will auto-detect formats. Use list of
  544. tuples rather than list of lists for faster processing.
  545. >>> r=np.core.records.fromrecords([(456,'dbe',1.2),(2,'de',1.3)],
  546. ... names='col1,col2,col3')
  547. >>> print(r[0])
  548. (456, 'dbe', 1.2)
  549. >>> r.col1
  550. array([456, 2])
  551. >>> r.col2
  552. array(['dbe', 'de'],
  553. dtype='|S3')
  554. >>> import pickle
  555. >>> print(pickle.loads(pickle.dumps(r)))
  556. [(456, 'dbe', 1.2) (2, 'de', 1.3)]
  557. """
  558. if formats is None and dtype is None: # slower
  559. obj = sb.array(recList, dtype=object)
  560. arrlist = [sb.array(obj[..., i].tolist()) for i in range(obj.shape[-1])]
  561. return fromarrays(arrlist, formats=formats, shape=shape, names=names,
  562. titles=titles, aligned=aligned, byteorder=byteorder)
  563. if dtype is not None:
  564. descr = sb.dtype((record, dtype))
  565. else:
  566. descr = format_parser(formats, names, titles, aligned, byteorder)._descr
  567. try:
  568. retval = sb.array(recList, dtype=descr)
  569. except (TypeError, ValueError):
  570. if (shape is None or shape == 0):
  571. shape = len(recList)
  572. if isinstance(shape, (int, long)):
  573. shape = (shape,)
  574. if len(shape) > 1:
  575. raise ValueError("Can only deal with 1-d array.")
  576. _array = recarray(shape, descr)
  577. for k in range(_array.size):
  578. _array[k] = tuple(recList[k])
  579. # list of lists instead of list of tuples ?
  580. # 2018-02-07, 1.14.1
  581. warnings.warn(
  582. "fromrecords expected a list of tuples, may have received a list "
  583. "of lists instead. In the future that will raise an error",
  584. FutureWarning, stacklevel=2)
  585. return _array
  586. else:
  587. if shape is not None and retval.shape != shape:
  588. retval.shape = shape
  589. res = retval.view(recarray)
  590. return res
  591. def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None,
  592. names=None, titles=None, aligned=False, byteorder=None):
  593. """ create a (read-only) record array from binary data contained in
  594. a string"""
  595. if dtype is None and formats is None:
  596. raise TypeError("fromstring() needs a 'dtype' or 'formats' argument")
  597. if dtype is not None:
  598. descr = sb.dtype(dtype)
  599. else:
  600. descr = format_parser(formats, names, titles, aligned, byteorder)._descr
  601. itemsize = descr.itemsize
  602. if (shape is None or shape == 0 or shape == -1):
  603. shape = (len(datastring) - offset) // itemsize
  604. _array = recarray(shape, descr, buf=datastring, offset=offset)
  605. return _array
  606. def get_remaining_size(fd):
  607. try:
  608. fn = fd.fileno()
  609. except AttributeError:
  610. return os.path.getsize(fd.name) - fd.tell()
  611. st = os.fstat(fn)
  612. size = st.st_size - fd.tell()
  613. return size
  614. def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
  615. names=None, titles=None, aligned=False, byteorder=None):
  616. """Create an array from binary file data
  617. If file is a string or a path-like object then that file is opened,
  618. else it is assumed to be a file object. The file object must
  619. support random access (i.e. it must have tell and seek methods).
  620. >>> from tempfile import TemporaryFile
  621. >>> a = np.empty(10,dtype='f8,i4,a5')
  622. >>> a[5] = (0.5,10,'abcde')
  623. >>>
  624. >>> fd=TemporaryFile()
  625. >>> a = a.newbyteorder('<')
  626. >>> a.tofile(fd)
  627. >>>
  628. >>> fd.seek(0)
  629. >>> r=np.core.records.fromfile(fd, formats='f8,i4,a5', shape=10,
  630. ... byteorder='<')
  631. >>> print(r[5])
  632. (0.5, 10, 'abcde')
  633. >>> r.shape
  634. (10,)
  635. """
  636. if dtype is None and formats is None:
  637. raise TypeError("fromfile() needs a 'dtype' or 'formats' argument")
  638. if (shape is None or shape == 0):
  639. shape = (-1,)
  640. elif isinstance(shape, (int, long)):
  641. shape = (shape,)
  642. if isfileobj(fd):
  643. # file already opened
  644. name = 0
  645. else:
  646. # open file
  647. fd = open(os_fspath(fd), 'rb')
  648. name = 1
  649. if (offset > 0):
  650. fd.seek(offset, 1)
  651. size = get_remaining_size(fd)
  652. if dtype is not None:
  653. descr = sb.dtype(dtype)
  654. else:
  655. descr = format_parser(formats, names, titles, aligned, byteorder)._descr
  656. itemsize = descr.itemsize
  657. shapeprod = sb.array(shape).prod(dtype=nt.intp)
  658. shapesize = shapeprod * itemsize
  659. if shapesize < 0:
  660. shape = list(shape)
  661. shape[shape.index(-1)] = size // -shapesize
  662. shape = tuple(shape)
  663. shapeprod = sb.array(shape).prod(dtype=nt.intp)
  664. nbytes = shapeprod * itemsize
  665. if nbytes > size:
  666. raise ValueError(
  667. "Not enough bytes left in file for specified shape and type")
  668. # create the array
  669. _array = recarray(shape, descr)
  670. nbytesread = fd.readinto(_array.data)
  671. if nbytesread != nbytes:
  672. raise IOError("Didn't read as many bytes as expected")
  673. if name:
  674. fd.close()
  675. return _array
  676. def array(obj, dtype=None, shape=None, offset=0, strides=None, formats=None,
  677. names=None, titles=None, aligned=False, byteorder=None, copy=True):
  678. """Construct a record array from a wide-variety of objects.
  679. """
  680. if ((isinstance(obj, (type(None), str)) or isfileobj(obj)) and
  681. (formats is None) and (dtype is None)):
  682. raise ValueError("Must define formats (or dtype) if object is "
  683. "None, string, or an open file")
  684. kwds = {}
  685. if dtype is not None:
  686. dtype = sb.dtype(dtype)
  687. elif formats is not None:
  688. dtype = format_parser(formats, names, titles,
  689. aligned, byteorder)._descr
  690. else:
  691. kwds = {'formats': formats,
  692. 'names': names,
  693. 'titles': titles,
  694. 'aligned': aligned,
  695. 'byteorder': byteorder
  696. }
  697. if obj is None:
  698. if shape is None:
  699. raise ValueError("Must define a shape if obj is None")
  700. return recarray(shape, dtype, buf=obj, offset=offset, strides=strides)
  701. elif isinstance(obj, bytes):
  702. return fromstring(obj, dtype, shape=shape, offset=offset, **kwds)
  703. elif isinstance(obj, (list, tuple)):
  704. if isinstance(obj[0], (tuple, list)):
  705. return fromrecords(obj, dtype=dtype, shape=shape, **kwds)
  706. else:
  707. return fromarrays(obj, dtype=dtype, shape=shape, **kwds)
  708. elif isinstance(obj, recarray):
  709. if dtype is not None and (obj.dtype != dtype):
  710. new = obj.view(dtype)
  711. else:
  712. new = obj
  713. if copy:
  714. new = new.copy()
  715. return new
  716. elif isfileobj(obj):
  717. return fromfile(obj, dtype=dtype, shape=shape, offset=offset)
  718. elif isinstance(obj, ndarray):
  719. if dtype is not None and (obj.dtype != dtype):
  720. new = obj.view(dtype)
  721. else:
  722. new = obj
  723. if copy:
  724. new = new.copy()
  725. return new.view(recarray)
  726. else:
  727. interface = getattr(obj, "__array_interface__", None)
  728. if interface is None or not isinstance(interface, dict):
  729. raise ValueError("Unknown input type")
  730. obj = sb.array(obj)
  731. if dtype is not None and (obj.dtype != dtype):
  732. obj = obj.view(dtype)
  733. return obj.view(recarray)