123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879 |
- """
- Record Arrays
- =============
- Record arrays expose the fields of structured arrays as properties.
- Most commonly, ndarrays contain elements of a single type, e.g. floats,
- integers, bools etc. However, it is possible for elements to be combinations
- of these using structured types, such as::
- >>> a = np.array([(1, 2.0), (1, 2.0)], dtype=[('x', int), ('y', float)])
- >>> a
- array([(1, 2.0), (1, 2.0)],
- dtype=[('x', '<i4'), ('y', '<f8')])
- Here, each element consists of two fields: x (and int), and y (a float).
- This is known as a structured array. The different fields are analogous
- to columns in a spread-sheet. The different fields can be accessed as
- one would a dictionary::
- >>> a['x']
- array([1, 1])
- >>> a['y']
- array([ 2., 2.])
- Record arrays allow us to access fields as properties::
- >>> ar = np.rec.array(a)
- >>> ar.x
- array([1, 1])
- >>> ar.y
- array([ 2., 2.])
- """
- from __future__ import division, absolute_import, print_function
- import sys
- import os
- import warnings
- from . import numeric as sb
- from . import numerictypes as nt
- from numpy.compat import isfileobj, bytes, long, unicode, os_fspath
- from numpy.core.overrides import set_module
- from .arrayprint import get_printoptions
- # All of the functions allow formats to be a dtype
- __all__ = ['record', 'recarray', 'format_parser']
- ndarray = sb.ndarray
- _byteorderconv = {'b':'>',
- 'l':'<',
- 'n':'=',
- 'B':'>',
- 'L':'<',
- 'N':'=',
- 'S':'s',
- 's':'s',
- '>':'>',
- '<':'<',
- '=':'=',
- '|':'|',
- 'I':'|',
- 'i':'|'}
- # formats regular expression
- # allows multidimension spec with a tuple syntax in front
- # of the letter code '(2,3)f4' and ' ( 2 , 3 ) f4 '
- # are equally allowed
- numfmt = nt.typeDict
- def find_duplicate(list):
- """Find duplication in a list, return a list of duplicated elements"""
- dup = []
- for i in range(len(list)):
- if (list[i] in list[i + 1:]):
- if (list[i] not in dup):
- dup.append(list[i])
- return dup
- @set_module('numpy')
- class format_parser(object):
- """
- Class to convert formats, names, titles description to a dtype.
- After constructing the format_parser object, the dtype attribute is
- the converted data-type:
- ``dtype = format_parser(formats, names, titles).dtype``
- Attributes
- ----------
- dtype : dtype
- The converted data-type.
- Parameters
- ----------
- formats : str or list of str
- The format description, either specified as a string with
- comma-separated format descriptions in the form ``'f8, i4, a5'``, or
- a list of format description strings in the form
- ``['f8', 'i4', 'a5']``.
- names : str or list/tuple of str
- The field names, either specified as a comma-separated string in the
- form ``'col1, col2, col3'``, or as a list or tuple of strings in the
- form ``['col1', 'col2', 'col3']``.
- An empty list can be used, in that case default field names
- ('f0', 'f1', ...) are used.
- titles : sequence
- Sequence of title strings. An empty list can be used to leave titles
- out.
- aligned : bool, optional
- If True, align the fields by padding as the C-compiler would.
- Default is False.
- byteorder : str, optional
- If specified, all the fields will be changed to the
- provided byte-order. Otherwise, the default byte-order is
- used. For all available string specifiers, see `dtype.newbyteorder`.
- See Also
- --------
- dtype, typename, sctype2char
- Examples
- --------
- >>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
- ... ['T1', 'T2', 'T3']).dtype
- dtype([(('T1', 'col1'), '<f8'), (('T2', 'col2'), '<i4'),
- (('T3', 'col3'), '|S5')])
- `names` and/or `titles` can be empty lists. If `titles` is an empty list,
- titles will simply not appear. If `names` is empty, default field names
- will be used.
- >>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
- ... []).dtype
- dtype([('col1', '<f8'), ('col2', '<i4'), ('col3', '|S5')])
- >>> np.format_parser(['f8', 'i4', 'a5'], [], []).dtype
- dtype([('f0', '<f8'), ('f1', '<i4'), ('f2', '|S5')])
- """
- def __init__(self, formats, names, titles, aligned=False, byteorder=None):
- self._parseFormats(formats, aligned)
- self._setfieldnames(names, titles)
- self._createdescr(byteorder)
- self.dtype = self._descr
- def _parseFormats(self, formats, aligned=0):
- """ Parse the field formats """
- if formats is None:
- raise ValueError("Need formats argument")
- if isinstance(formats, list):
- if len(formats) < 2:
- formats.append('')
- formats = ','.join(formats)
- dtype = sb.dtype(formats, aligned)
- fields = dtype.fields
- if fields is None:
- dtype = sb.dtype([('f1', dtype)], aligned)
- fields = dtype.fields
- keys = dtype.names
- self._f_formats = [fields[key][0] for key in keys]
- self._offsets = [fields[key][1] for key in keys]
- self._nfields = len(keys)
- def _setfieldnames(self, names, titles):
- """convert input field names into a list and assign to the _names
- attribute """
- if (names):
- if (type(names) in [list, tuple]):
- pass
- elif isinstance(names, (str, unicode)):
- names = names.split(',')
- else:
- raise NameError("illegal input names %s" % repr(names))
- self._names = [n.strip() for n in names[:self._nfields]]
- else:
- self._names = []
- # if the names are not specified, they will be assigned as
- # "f0, f1, f2,..."
- # if not enough names are specified, they will be assigned as "f[n],
- # f[n+1],..." etc. where n is the number of specified names..."
- self._names += ['f%d' % i for i in range(len(self._names),
- self._nfields)]
- # check for redundant names
- _dup = find_duplicate(self._names)
- if _dup:
- raise ValueError("Duplicate field names: %s" % _dup)
- if (titles):
- self._titles = [n.strip() for n in titles[:self._nfields]]
- else:
- self._titles = []
- titles = []
- if (self._nfields > len(titles)):
- self._titles += [None] * (self._nfields - len(titles))
- def _createdescr(self, byteorder):
- descr = sb.dtype({'names':self._names,
- 'formats':self._f_formats,
- 'offsets':self._offsets,
- 'titles':self._titles})
- if (byteorder is not None):
- byteorder = _byteorderconv[byteorder[0]]
- descr = descr.newbyteorder(byteorder)
- self._descr = descr
- class record(nt.void):
- """A data-type scalar that allows field access as attribute lookup.
- """
- # manually set name and module so that this class's type shows up
- # as numpy.record when printed
- __name__ = 'record'
- __module__ = 'numpy'
- def __repr__(self):
- if get_printoptions()['legacy'] == '1.13':
- return self.__str__()
- return super(record, self).__repr__()
- def __str__(self):
- if get_printoptions()['legacy'] == '1.13':
- return str(self.item())
- return super(record, self).__str__()
- def __getattribute__(self, attr):
- if attr in ['setfield', 'getfield', 'dtype']:
- return nt.void.__getattribute__(self, attr)
- try:
- return nt.void.__getattribute__(self, attr)
- except AttributeError:
- pass
- fielddict = nt.void.__getattribute__(self, 'dtype').fields
- res = fielddict.get(attr, None)
- if res:
- obj = self.getfield(*res[:2])
- # if it has fields return a record,
- # otherwise return the object
- try:
- dt = obj.dtype
- except AttributeError:
- #happens if field is Object type
- return obj
- if dt.names is not None:
- return obj.view((self.__class__, obj.dtype))
- return obj
- else:
- raise AttributeError("'record' object has no "
- "attribute '%s'" % attr)
- def __setattr__(self, attr, val):
- if attr in ['setfield', 'getfield', 'dtype']:
- raise AttributeError("Cannot set '%s' attribute" % attr)
- fielddict = nt.void.__getattribute__(self, 'dtype').fields
- res = fielddict.get(attr, None)
- if res:
- return self.setfield(val, *res[:2])
- else:
- if getattr(self, attr, None):
- return nt.void.__setattr__(self, attr, val)
- else:
- raise AttributeError("'record' object has no "
- "attribute '%s'" % attr)
- def __getitem__(self, indx):
- obj = nt.void.__getitem__(self, indx)
- # copy behavior of record.__getattribute__,
- if isinstance(obj, nt.void) and obj.dtype.names is not None:
- return obj.view((self.__class__, obj.dtype))
- else:
- # return a single element
- return obj
- def pprint(self):
- """Pretty-print all fields."""
- # pretty-print all fields
- names = self.dtype.names
- maxlen = max(len(name) for name in names)
- fmt = '%% %ds: %%s' % maxlen
- rows = [fmt % (name, getattr(self, name)) for name in names]
- return "\n".join(rows)
- # The recarray is almost identical to a standard array (which supports
- # named fields already) The biggest difference is that it can use
- # attribute-lookup to find the fields and it is constructed using
- # a record.
- # If byteorder is given it forces a particular byteorder on all
- # the fields (and any subfields)
- class recarray(ndarray):
- """Construct an ndarray that allows field access using attributes.
- Arrays may have a data-types containing fields, analogous
- to columns in a spread sheet. An example is ``[(x, int), (y, float)]``,
- where each entry in the array is a pair of ``(int, float)``. Normally,
- these attributes are accessed using dictionary lookups such as ``arr['x']``
- and ``arr['y']``. Record arrays allow the fields to be accessed as members
- of the array, using ``arr.x`` and ``arr.y``.
- Parameters
- ----------
- shape : tuple
- Shape of output array.
- dtype : data-type, optional
- The desired data-type. By default, the data-type is determined
- from `formats`, `names`, `titles`, `aligned` and `byteorder`.
- formats : list of data-types, optional
- A list containing the data-types for the different columns, e.g.
- ``['i4', 'f8', 'i4']``. `formats` does *not* support the new
- convention of using types directly, i.e. ``(int, float, int)``.
- Note that `formats` must be a list, not a tuple.
- Given that `formats` is somewhat limited, we recommend specifying
- `dtype` instead.
- names : tuple of str, optional
- The name of each column, e.g. ``('x', 'y', 'z')``.
- buf : buffer, optional
- By default, a new array is created of the given shape and data-type.
- If `buf` is specified and is an object exposing the buffer interface,
- the array will use the memory from the existing buffer. In this case,
- the `offset` and `strides` keywords are available.
- Other Parameters
- ----------------
- titles : tuple of str, optional
- Aliases for column names. For example, if `names` were
- ``('x', 'y', 'z')`` and `titles` is
- ``('x_coordinate', 'y_coordinate', 'z_coordinate')``, then
- ``arr['x']`` is equivalent to both ``arr.x`` and ``arr.x_coordinate``.
- byteorder : {'<', '>', '='}, optional
- Byte-order for all fields.
- aligned : bool, optional
- Align the fields in memory as the C-compiler would.
- strides : tuple of ints, optional
- Buffer (`buf`) is interpreted according to these strides (strides
- define how many bytes each array element, row, column, etc.
- occupy in memory).
- offset : int, optional
- Start reading buffer (`buf`) from this offset onwards.
- order : {'C', 'F'}, optional
- Row-major (C-style) or column-major (Fortran-style) order.
- Returns
- -------
- rec : recarray
- Empty array of the given shape and type.
- See Also
- --------
- rec.fromrecords : Construct a record array from data.
- record : fundamental data-type for `recarray`.
- format_parser : determine a data-type from formats, names, titles.
- Notes
- -----
- This constructor can be compared to ``empty``: it creates a new record
- array but does not fill it with data. To create a record array from data,
- use one of the following methods:
- 1. Create a standard ndarray and convert it to a record array,
- using ``arr.view(np.recarray)``
- 2. Use the `buf` keyword.
- 3. Use `np.rec.fromrecords`.
- Examples
- --------
- Create an array with two fields, ``x`` and ``y``:
- >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', float), ('y', int)])
- >>> x
- array([(1.0, 2), (3.0, 4)],
- dtype=[('x', '<f8'), ('y', '<i4')])
- >>> x['x']
- array([ 1., 3.])
- View the array as a record array:
- >>> x = x.view(np.recarray)
- >>> x.x
- array([ 1., 3.])
- >>> x.y
- array([2, 4])
- Create a new, empty record array:
- >>> np.recarray((2,),
- ... dtype=[('x', int), ('y', float), ('z', int)]) #doctest: +SKIP
- rec.array([(-1073741821, 1.2249118382103472e-301, 24547520),
- (3471280, 1.2134086255804012e-316, 0)],
- dtype=[('x', '<i4'), ('y', '<f8'), ('z', '<i4')])
- """
- # manually set name and module so that this class's type shows
- # up as "numpy.recarray" when printed
- __name__ = 'recarray'
- __module__ = 'numpy'
- def __new__(subtype, shape, dtype=None, buf=None, offset=0, strides=None,
- formats=None, names=None, titles=None,
- byteorder=None, aligned=False, order='C'):
- if dtype is not None:
- descr = sb.dtype(dtype)
- else:
- descr = format_parser(formats, names, titles, aligned, byteorder)._descr
- if buf is None:
- self = ndarray.__new__(subtype, shape, (record, descr), order=order)
- else:
- self = ndarray.__new__(subtype, shape, (record, descr),
- buffer=buf, offset=offset,
- strides=strides, order=order)
- return self
- def __array_finalize__(self, obj):
- if self.dtype.type is not record and self.dtype.names is not None:
- # if self.dtype is not np.record, invoke __setattr__ which will
- # convert it to a record if it is a void dtype.
- self.dtype = self.dtype
- def __getattribute__(self, attr):
- # See if ndarray has this attr, and return it if so. (note that this
- # means a field with the same name as an ndarray attr cannot be
- # accessed by attribute).
- try:
- return object.__getattribute__(self, attr)
- except AttributeError: # attr must be a fieldname
- pass
- # look for a field with this name
- fielddict = ndarray.__getattribute__(self, 'dtype').fields
- try:
- res = fielddict[attr][:2]
- except (TypeError, KeyError):
- raise AttributeError("recarray has no attribute %s" % attr)
- obj = self.getfield(*res)
- # At this point obj will always be a recarray, since (see
- # PyArray_GetField) the type of obj is inherited. Next, if obj.dtype is
- # non-structured, convert it to an ndarray. Then if obj is structured
- # with void type convert it to the same dtype.type (eg to preserve
- # numpy.record type if present), since nested structured fields do not
- # inherit type. Don't do this for non-void structures though.
- if obj.dtype.names is not None:
- if issubclass(obj.dtype.type, nt.void):
- return obj.view(dtype=(self.dtype.type, obj.dtype))
- return obj
- else:
- return obj.view(ndarray)
- # Save the dictionary.
- # If the attr is a field name and not in the saved dictionary
- # Undo any "setting" of the attribute and do a setfield
- # Thus, you can't create attributes on-the-fly that are field names.
- def __setattr__(self, attr, val):
- # Automatically convert (void) structured types to records
- # (but not non-void structures, subarrays, or non-structured voids)
- if attr == 'dtype' and issubclass(val.type, nt.void) and val.names is not None:
- val = sb.dtype((record, val))
- newattr = attr not in self.__dict__
- try:
- ret = object.__setattr__(self, attr, val)
- except Exception:
- fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
- if attr not in fielddict:
- exctype, value = sys.exc_info()[:2]
- raise exctype(value)
- else:
- fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
- if attr not in fielddict:
- return ret
- if newattr:
- # We just added this one or this setattr worked on an
- # internal attribute.
- try:
- object.__delattr__(self, attr)
- except Exception:
- return ret
- try:
- res = fielddict[attr][:2]
- except (TypeError, KeyError):
- raise AttributeError("record array has no attribute %s" % attr)
- return self.setfield(val, *res)
- def __getitem__(self, indx):
- obj = super(recarray, self).__getitem__(indx)
- # copy behavior of getattr, except that here
- # we might also be returning a single element
- if isinstance(obj, ndarray):
- if obj.dtype.names is not None:
- obj = obj.view(type(self))
- if issubclass(obj.dtype.type, nt.void):
- return obj.view(dtype=(self.dtype.type, obj.dtype))
- return obj
- else:
- return obj.view(type=ndarray)
- else:
- # return a single element
- return obj
- def __repr__(self):
- repr_dtype = self.dtype
- if (self.dtype.type is record
- or (not issubclass(self.dtype.type, nt.void))):
- # If this is a full record array (has numpy.record dtype),
- # or if it has a scalar (non-void) dtype with no records,
- # represent it using the rec.array function. Since rec.array
- # converts dtype to a numpy.record for us, convert back
- # to non-record before printing
- if repr_dtype.type is record:
- repr_dtype = sb.dtype((nt.void, repr_dtype))
- prefix = "rec.array("
- fmt = 'rec.array(%s,%sdtype=%s)'
- else:
- # otherwise represent it using np.array plus a view
- # This should only happen if the user is playing
- # strange games with dtypes.
- prefix = "array("
- fmt = 'array(%s,%sdtype=%s).view(numpy.recarray)'
- # get data/shape string. logic taken from numeric.array_repr
- if self.size > 0 or self.shape == (0,):
- lst = sb.array2string(
- self, separator=', ', prefix=prefix, suffix=',')
- else:
- # show zero-length shape unless it is (0,)
- lst = "[], shape=%s" % (repr(self.shape),)
- lf = '\n'+' '*len(prefix)
- if get_printoptions()['legacy'] == '1.13':
- lf = ' ' + lf # trailing space
- return fmt % (lst, lf, repr_dtype)
- def field(self, attr, val=None):
- if isinstance(attr, int):
- names = ndarray.__getattribute__(self, 'dtype').names
- attr = names[attr]
- fielddict = ndarray.__getattribute__(self, 'dtype').fields
- res = fielddict[attr][:2]
- if val is None:
- obj = self.getfield(*res)
- if obj.dtype.names is not None:
- return obj
- return obj.view(ndarray)
- else:
- return self.setfield(val, *res)
- def fromarrays(arrayList, dtype=None, shape=None, formats=None,
- names=None, titles=None, aligned=False, byteorder=None):
- """ create a record array from a (flat) list of arrays
- >>> x1=np.array([1,2,3,4])
- >>> x2=np.array(['a','dd','xyz','12'])
- >>> x3=np.array([1.1,2,3,4])
- >>> r = np.core.records.fromarrays([x1,x2,x3],names='a,b,c')
- >>> print(r[1])
- (2, 'dd', 2.0)
- >>> x1[1]=34
- >>> r.a
- array([1, 2, 3, 4])
- """
- arrayList = [sb.asarray(x) for x in arrayList]
- if shape is None or shape == 0:
- shape = arrayList[0].shape
- if isinstance(shape, int):
- shape = (shape,)
- if formats is None and dtype is None:
- # go through each object in the list to see if it is an ndarray
- # and determine the formats.
- formats = []
- for obj in arrayList:
- if not isinstance(obj, ndarray):
- raise ValueError("item in the array list must be an ndarray.")
- formats.append(obj.dtype.str)
- formats = ','.join(formats)
- if dtype is not None:
- descr = sb.dtype(dtype)
- _names = descr.names
- else:
- parsed = format_parser(formats, names, titles, aligned, byteorder)
- _names = parsed._names
- descr = parsed._descr
- # Determine shape from data-type.
- if len(descr) != len(arrayList):
- raise ValueError("mismatch between the number of fields "
- "and the number of arrays")
- d0 = descr[0].shape
- nn = len(d0)
- if nn > 0:
- shape = shape[:-nn]
- for k, obj in enumerate(arrayList):
- nn = descr[k].ndim
- testshape = obj.shape[:obj.ndim - nn]
- if testshape != shape:
- raise ValueError("array-shape mismatch in array %d" % k)
- _array = recarray(shape, descr)
- # populate the record array (makes a copy)
- for i in range(len(arrayList)):
- _array[_names[i]] = arrayList[i]
- return _array
- def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
- titles=None, aligned=False, byteorder=None):
- """ create a recarray from a list of records in text form
- The data in the same field can be heterogeneous, they will be promoted
- to the highest data type. This method is intended for creating
- smaller record arrays. If used to create large array without formats
- defined
- r=fromrecords([(2,3.,'abc')]*100000)
- it can be slow.
- If formats is None, then this will auto-detect formats. Use list of
- tuples rather than list of lists for faster processing.
- >>> r=np.core.records.fromrecords([(456,'dbe',1.2),(2,'de',1.3)],
- ... names='col1,col2,col3')
- >>> print(r[0])
- (456, 'dbe', 1.2)
- >>> r.col1
- array([456, 2])
- >>> r.col2
- array(['dbe', 'de'],
- dtype='|S3')
- >>> import pickle
- >>> print(pickle.loads(pickle.dumps(r)))
- [(456, 'dbe', 1.2) (2, 'de', 1.3)]
- """
- if formats is None and dtype is None: # slower
- obj = sb.array(recList, dtype=object)
- arrlist = [sb.array(obj[..., i].tolist()) for i in range(obj.shape[-1])]
- return fromarrays(arrlist, formats=formats, shape=shape, names=names,
- titles=titles, aligned=aligned, byteorder=byteorder)
- if dtype is not None:
- descr = sb.dtype((record, dtype))
- else:
- descr = format_parser(formats, names, titles, aligned, byteorder)._descr
- try:
- retval = sb.array(recList, dtype=descr)
- except (TypeError, ValueError):
- if (shape is None or shape == 0):
- shape = len(recList)
- if isinstance(shape, (int, long)):
- shape = (shape,)
- if len(shape) > 1:
- raise ValueError("Can only deal with 1-d array.")
- _array = recarray(shape, descr)
- for k in range(_array.size):
- _array[k] = tuple(recList[k])
- # list of lists instead of list of tuples ?
- # 2018-02-07, 1.14.1
- warnings.warn(
- "fromrecords expected a list of tuples, may have received a list "
- "of lists instead. In the future that will raise an error",
- FutureWarning, stacklevel=2)
- return _array
- else:
- if shape is not None and retval.shape != shape:
- retval.shape = shape
- res = retval.view(recarray)
- return res
- def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None,
- names=None, titles=None, aligned=False, byteorder=None):
- """ create a (read-only) record array from binary data contained in
- a string"""
- if dtype is None and formats is None:
- raise TypeError("fromstring() needs a 'dtype' or 'formats' argument")
- if dtype is not None:
- descr = sb.dtype(dtype)
- else:
- descr = format_parser(formats, names, titles, aligned, byteorder)._descr
- itemsize = descr.itemsize
- if (shape is None or shape == 0 or shape == -1):
- shape = (len(datastring) - offset) // itemsize
- _array = recarray(shape, descr, buf=datastring, offset=offset)
- return _array
- def get_remaining_size(fd):
- try:
- fn = fd.fileno()
- except AttributeError:
- return os.path.getsize(fd.name) - fd.tell()
- st = os.fstat(fn)
- size = st.st_size - fd.tell()
- return size
- def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
- names=None, titles=None, aligned=False, byteorder=None):
- """Create an array from binary file data
- If file is a string or a path-like object then that file is opened,
- else it is assumed to be a file object. The file object must
- support random access (i.e. it must have tell and seek methods).
- >>> from tempfile import TemporaryFile
- >>> a = np.empty(10,dtype='f8,i4,a5')
- >>> a[5] = (0.5,10,'abcde')
- >>>
- >>> fd=TemporaryFile()
- >>> a = a.newbyteorder('<')
- >>> a.tofile(fd)
- >>>
- >>> fd.seek(0)
- >>> r=np.core.records.fromfile(fd, formats='f8,i4,a5', shape=10,
- ... byteorder='<')
- >>> print(r[5])
- (0.5, 10, 'abcde')
- >>> r.shape
- (10,)
- """
-
- if dtype is None and formats is None:
- raise TypeError("fromfile() needs a 'dtype' or 'formats' argument")
- if (shape is None or shape == 0):
- shape = (-1,)
- elif isinstance(shape, (int, long)):
- shape = (shape,)
- if isfileobj(fd):
- # file already opened
- name = 0
- else:
- # open file
- fd = open(os_fspath(fd), 'rb')
- name = 1
- if (offset > 0):
- fd.seek(offset, 1)
- size = get_remaining_size(fd)
- if dtype is not None:
- descr = sb.dtype(dtype)
- else:
- descr = format_parser(formats, names, titles, aligned, byteorder)._descr
- itemsize = descr.itemsize
- shapeprod = sb.array(shape).prod(dtype=nt.intp)
- shapesize = shapeprod * itemsize
- if shapesize < 0:
- shape = list(shape)
- shape[shape.index(-1)] = size // -shapesize
- shape = tuple(shape)
- shapeprod = sb.array(shape).prod(dtype=nt.intp)
- nbytes = shapeprod * itemsize
- if nbytes > size:
- raise ValueError(
- "Not enough bytes left in file for specified shape and type")
- # create the array
- _array = recarray(shape, descr)
- nbytesread = fd.readinto(_array.data)
- if nbytesread != nbytes:
- raise IOError("Didn't read as many bytes as expected")
- if name:
- fd.close()
- return _array
- def array(obj, dtype=None, shape=None, offset=0, strides=None, formats=None,
- names=None, titles=None, aligned=False, byteorder=None, copy=True):
- """Construct a record array from a wide-variety of objects.
- """
- if ((isinstance(obj, (type(None), str)) or isfileobj(obj)) and
- (formats is None) and (dtype is None)):
- raise ValueError("Must define formats (or dtype) if object is "
- "None, string, or an open file")
- kwds = {}
- if dtype is not None:
- dtype = sb.dtype(dtype)
- elif formats is not None:
- dtype = format_parser(formats, names, titles,
- aligned, byteorder)._descr
- else:
- kwds = {'formats': formats,
- 'names': names,
- 'titles': titles,
- 'aligned': aligned,
- 'byteorder': byteorder
- }
- if obj is None:
- if shape is None:
- raise ValueError("Must define a shape if obj is None")
- return recarray(shape, dtype, buf=obj, offset=offset, strides=strides)
- elif isinstance(obj, bytes):
- return fromstring(obj, dtype, shape=shape, offset=offset, **kwds)
- elif isinstance(obj, (list, tuple)):
- if isinstance(obj[0], (tuple, list)):
- return fromrecords(obj, dtype=dtype, shape=shape, **kwds)
- else:
- return fromarrays(obj, dtype=dtype, shape=shape, **kwds)
- elif isinstance(obj, recarray):
- if dtype is not None and (obj.dtype != dtype):
- new = obj.view(dtype)
- else:
- new = obj
- if copy:
- new = new.copy()
- return new
- elif isfileobj(obj):
- return fromfile(obj, dtype=dtype, shape=shape, offset=offset)
- elif isinstance(obj, ndarray):
- if dtype is not None and (obj.dtype != dtype):
- new = obj.view(dtype)
- else:
- new = obj
- if copy:
- new = new.copy()
- return new.view(recarray)
- else:
- interface = getattr(obj, "__array_interface__", None)
- if interface is None or not isinstance(interface, dict):
- raise ValueError("Unknown input type")
- obj = sb.array(obj)
- if dtype is not None and (obj.dtype != dtype):
- obj = obj.view(dtype)
- return obj.view(recarray)
|