123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547 |
- """
- Implementation of Harwell-Boeing read/write.
- At the moment not the full Harwell-Boeing format is supported. Supported
- features are:
- - assembled, non-symmetric, real matrices
- - integer for pointer/indices
- - exponential format for float values, and int format
- """
- from __future__ import division, print_function, absolute_import
- # TODO:
- # - Add more support (symmetric/complex matrices, non-assembled matrices ?)
- # XXX: reading is reasonably efficient (>= 85 % is in numpy.fromstring), but
- # takes a lot of memory. Being faster would require compiled code.
- # write is not efficient. Although not a terribly exciting task,
- # having reusable facilities to efficiently read/write fortran-formatted files
- # would be useful outside this module.
- import warnings
- import numpy as np
- from scipy.sparse import csc_matrix
- from scipy.io.harwell_boeing._fortran_format_parser import \
- FortranFormatParser, IntFormat, ExpFormat
- __all__ = ["MalformedHeader", "hb_read", "hb_write", "HBInfo", "HBFile",
- "HBMatrixType"]
- class MalformedHeader(Exception):
- pass
- class LineOverflow(Warning):
- pass
- def _nbytes_full(fmt, nlines):
- """Return the number of bytes to read to get every full lines for the
- given parsed fortran format."""
- return (fmt.repeat * fmt.width + 1) * (nlines - 1)
- class HBInfo(object):
- @classmethod
- def from_data(cls, m, title="Default title", key="0", mxtype=None, fmt=None):
- """Create a HBInfo instance from an existing sparse matrix.
- Parameters
- ----------
- m : sparse matrix
- the HBInfo instance will derive its parameters from m
- title : str
- Title to put in the HB header
- key : str
- Key
- mxtype : HBMatrixType
- type of the input matrix
- fmt : dict
- not implemented
- Returns
- -------
- hb_info : HBInfo instance
- """
- m = m.tocsc(copy=False)
- pointer = m.indptr
- indices = m.indices
- values = m.data
- nrows, ncols = m.shape
- nnon_zeros = m.nnz
- if fmt is None:
- # +1 because HB use one-based indexing (Fortran), and we will write
- # the indices /pointer as such
- pointer_fmt = IntFormat.from_number(np.max(pointer+1))
- indices_fmt = IntFormat.from_number(np.max(indices+1))
- if values.dtype.kind in np.typecodes["AllFloat"]:
- values_fmt = ExpFormat.from_number(-np.max(np.abs(values)))
- elif values.dtype.kind in np.typecodes["AllInteger"]:
- values_fmt = IntFormat.from_number(-np.max(np.abs(values)))
- else:
- raise NotImplementedError("type %s not implemented yet" % values.dtype.kind)
- else:
- raise NotImplementedError("fmt argument not supported yet.")
- if mxtype is None:
- if not np.isrealobj(values):
- raise ValueError("Complex values not supported yet")
- if values.dtype.kind in np.typecodes["AllInteger"]:
- tp = "integer"
- elif values.dtype.kind in np.typecodes["AllFloat"]:
- tp = "real"
- else:
- raise NotImplementedError("type %s for values not implemented"
- % values.dtype)
- mxtype = HBMatrixType(tp, "unsymmetric", "assembled")
- else:
- raise ValueError("mxtype argument not handled yet.")
- def _nlines(fmt, size):
- nlines = size // fmt.repeat
- if nlines * fmt.repeat != size:
- nlines += 1
- return nlines
- pointer_nlines = _nlines(pointer_fmt, pointer.size)
- indices_nlines = _nlines(indices_fmt, indices.size)
- values_nlines = _nlines(values_fmt, values.size)
- total_nlines = pointer_nlines + indices_nlines + values_nlines
- return cls(title, key,
- total_nlines, pointer_nlines, indices_nlines, values_nlines,
- mxtype, nrows, ncols, nnon_zeros,
- pointer_fmt.fortran_format, indices_fmt.fortran_format,
- values_fmt.fortran_format)
- @classmethod
- def from_file(cls, fid):
- """Create a HBInfo instance from a file object containing a matrix in the
- HB format.
- Parameters
- ----------
- fid : file-like matrix
- File or file-like object containing a matrix in the HB format.
- Returns
- -------
- hb_info : HBInfo instance
- """
- # First line
- line = fid.readline().strip("\n")
- if not len(line) > 72:
- raise ValueError("Expected at least 72 characters for first line, "
- "got: \n%s" % line)
- title = line[:72]
- key = line[72:]
- # Second line
- line = fid.readline().strip("\n")
- if not len(line.rstrip()) >= 56:
- raise ValueError("Expected at least 56 characters for second line, "
- "got: \n%s" % line)
- total_nlines = _expect_int(line[:14])
- pointer_nlines = _expect_int(line[14:28])
- indices_nlines = _expect_int(line[28:42])
- values_nlines = _expect_int(line[42:56])
- rhs_nlines = line[56:72].strip()
- if rhs_nlines == '':
- rhs_nlines = 0
- else:
- rhs_nlines = _expect_int(rhs_nlines)
- if not rhs_nlines == 0:
- raise ValueError("Only files without right hand side supported for "
- "now.")
- # Third line
- line = fid.readline().strip("\n")
- if not len(line) >= 70:
- raise ValueError("Expected at least 72 character for third line, got:\n"
- "%s" % line)
- mxtype_s = line[:3].upper()
- if not len(mxtype_s) == 3:
- raise ValueError("mxtype expected to be 3 characters long")
- mxtype = HBMatrixType.from_fortran(mxtype_s)
- if mxtype.value_type not in ["real", "integer"]:
- raise ValueError("Only real or integer matrices supported for "
- "now (detected %s)" % mxtype)
- if not mxtype.structure == "unsymmetric":
- raise ValueError("Only unsymmetric matrices supported for "
- "now (detected %s)" % mxtype)
- if not mxtype.storage == "assembled":
- raise ValueError("Only assembled matrices supported for now")
- if not line[3:14] == " " * 11:
- raise ValueError("Malformed data for third line: %s" % line)
- nrows = _expect_int(line[14:28])
- ncols = _expect_int(line[28:42])
- nnon_zeros = _expect_int(line[42:56])
- nelementals = _expect_int(line[56:70])
- if not nelementals == 0:
- raise ValueError("Unexpected value %d for nltvl (last entry of line 3)"
- % nelementals)
- # Fourth line
- line = fid.readline().strip("\n")
- ct = line.split()
- if not len(ct) == 3:
- raise ValueError("Expected 3 formats, got %s" % ct)
- return cls(title, key,
- total_nlines, pointer_nlines, indices_nlines, values_nlines,
- mxtype, nrows, ncols, nnon_zeros,
- ct[0], ct[1], ct[2],
- rhs_nlines, nelementals)
- def __init__(self, title, key,
- total_nlines, pointer_nlines, indices_nlines, values_nlines,
- mxtype, nrows, ncols, nnon_zeros,
- pointer_format_str, indices_format_str, values_format_str,
- right_hand_sides_nlines=0, nelementals=0):
- """Do not use this directly, but the class ctrs (from_* functions)."""
- self.title = title
- self.key = key
- if title is None:
- title = "No Title"
- if len(title) > 72:
- raise ValueError("title cannot be > 72 characters")
- if key is None:
- key = "|No Key"
- if len(key) > 8:
- warnings.warn("key is > 8 characters (key is %s)" % key, LineOverflow)
- self.total_nlines = total_nlines
- self.pointer_nlines = pointer_nlines
- self.indices_nlines = indices_nlines
- self.values_nlines = values_nlines
- parser = FortranFormatParser()
- pointer_format = parser.parse(pointer_format_str)
- if not isinstance(pointer_format, IntFormat):
- raise ValueError("Expected int format for pointer format, got %s"
- % pointer_format)
- indices_format = parser.parse(indices_format_str)
- if not isinstance(indices_format, IntFormat):
- raise ValueError("Expected int format for indices format, got %s" %
- indices_format)
- values_format = parser.parse(values_format_str)
- if isinstance(values_format, ExpFormat):
- if mxtype.value_type not in ["real", "complex"]:
- raise ValueError("Inconsistency between matrix type %s and "
- "value type %s" % (mxtype, values_format))
- values_dtype = np.float64
- elif isinstance(values_format, IntFormat):
- if mxtype.value_type not in ["integer"]:
- raise ValueError("Inconsistency between matrix type %s and "
- "value type %s" % (mxtype, values_format))
- # XXX: fortran int -> dtype association ?
- values_dtype = int
- else:
- raise ValueError("Unsupported format for values %r" % (values_format,))
- self.pointer_format = pointer_format
- self.indices_format = indices_format
- self.values_format = values_format
- self.pointer_dtype = np.int32
- self.indices_dtype = np.int32
- self.values_dtype = values_dtype
- self.pointer_nlines = pointer_nlines
- self.pointer_nbytes_full = _nbytes_full(pointer_format, pointer_nlines)
- self.indices_nlines = indices_nlines
- self.indices_nbytes_full = _nbytes_full(indices_format, indices_nlines)
- self.values_nlines = values_nlines
- self.values_nbytes_full = _nbytes_full(values_format, values_nlines)
- self.nrows = nrows
- self.ncols = ncols
- self.nnon_zeros = nnon_zeros
- self.nelementals = nelementals
- self.mxtype = mxtype
- def dump(self):
- """Gives the header corresponding to this instance as a string."""
- header = [self.title.ljust(72) + self.key.ljust(8)]
- header.append("%14d%14d%14d%14d" %
- (self.total_nlines, self.pointer_nlines,
- self.indices_nlines, self.values_nlines))
- header.append("%14s%14d%14d%14d%14d" %
- (self.mxtype.fortran_format.ljust(14), self.nrows,
- self.ncols, self.nnon_zeros, 0))
- pffmt = self.pointer_format.fortran_format
- iffmt = self.indices_format.fortran_format
- vffmt = self.values_format.fortran_format
- header.append("%16s%16s%20s" %
- (pffmt.ljust(16), iffmt.ljust(16), vffmt.ljust(20)))
- return "\n".join(header)
- def _expect_int(value, msg=None):
- try:
- return int(value)
- except ValueError:
- if msg is None:
- msg = "Expected an int, got %s"
- raise ValueError(msg % value)
- def _read_hb_data(content, header):
- # XXX: look at a way to reduce memory here (big string creation)
- ptr_string = "".join([content.read(header.pointer_nbytes_full),
- content.readline()])
- ptr = np.fromstring(ptr_string,
- dtype=int, sep=' ')
- ind_string = "".join([content.read(header.indices_nbytes_full),
- content.readline()])
- ind = np.fromstring(ind_string,
- dtype=int, sep=' ')
- val_string = "".join([content.read(header.values_nbytes_full),
- content.readline()])
- val = np.fromstring(val_string,
- dtype=header.values_dtype, sep=' ')
- try:
- return csc_matrix((val, ind-1, ptr-1),
- shape=(header.nrows, header.ncols))
- except ValueError as e:
- raise e
- def _write_data(m, fid, header):
- m = m.tocsc(copy=False)
- def write_array(f, ar, nlines, fmt):
- # ar_nlines is the number of full lines, n is the number of items per
- # line, ffmt the fortran format
- pyfmt = fmt.python_format
- pyfmt_full = pyfmt * fmt.repeat
- # for each array to write, we first write the full lines, and special
- # case for partial line
- full = ar[:(nlines - 1) * fmt.repeat]
- for row in full.reshape((nlines-1, fmt.repeat)):
- f.write(pyfmt_full % tuple(row) + "\n")
- nremain = ar.size - full.size
- if nremain > 0:
- f.write((pyfmt * nremain) % tuple(ar[ar.size - nremain:]) + "\n")
- fid.write(header.dump())
- fid.write("\n")
- # +1 is for fortran one-based indexing
- write_array(fid, m.indptr+1, header.pointer_nlines,
- header.pointer_format)
- write_array(fid, m.indices+1, header.indices_nlines,
- header.indices_format)
- write_array(fid, m.data, header.values_nlines,
- header.values_format)
- class HBMatrixType(object):
- """Class to hold the matrix type."""
- # q2f* translates qualified names to fortran character
- _q2f_type = {
- "real": "R",
- "complex": "C",
- "pattern": "P",
- "integer": "I",
- }
- _q2f_structure = {
- "symmetric": "S",
- "unsymmetric": "U",
- "hermitian": "H",
- "skewsymmetric": "Z",
- "rectangular": "R"
- }
- _q2f_storage = {
- "assembled": "A",
- "elemental": "E",
- }
- _f2q_type = dict([(j, i) for i, j in _q2f_type.items()])
- _f2q_structure = dict([(j, i) for i, j in _q2f_structure.items()])
- _f2q_storage = dict([(j, i) for i, j in _q2f_storage.items()])
- @classmethod
- def from_fortran(cls, fmt):
- if not len(fmt) == 3:
- raise ValueError("Fortran format for matrix type should be 3 "
- "characters long")
- try:
- value_type = cls._f2q_type[fmt[0]]
- structure = cls._f2q_structure[fmt[1]]
- storage = cls._f2q_storage[fmt[2]]
- return cls(value_type, structure, storage)
- except KeyError:
- raise ValueError("Unrecognized format %s" % fmt)
- def __init__(self, value_type, structure, storage="assembled"):
- self.value_type = value_type
- self.structure = structure
- self.storage = storage
- if value_type not in self._q2f_type:
- raise ValueError("Unrecognized type %s" % value_type)
- if structure not in self._q2f_structure:
- raise ValueError("Unrecognized structure %s" % structure)
- if storage not in self._q2f_storage:
- raise ValueError("Unrecognized storage %s" % storage)
- @property
- def fortran_format(self):
- return self._q2f_type[self.value_type] + \
- self._q2f_structure[self.structure] + \
- self._q2f_storage[self.storage]
- def __repr__(self):
- return "HBMatrixType(%s, %s, %s)" % \
- (self.value_type, self.structure, self.storage)
- class HBFile(object):
- def __init__(self, file, hb_info=None):
- """Create a HBFile instance.
- Parameters
- ----------
- file : file-object
- StringIO work as well
- hb_info : HBInfo, optional
- Should be given as an argument for writing, in which case the file
- should be writable.
- """
- self._fid = file
- if hb_info is None:
- self._hb_info = HBInfo.from_file(file)
- else:
- #raise IOError("file %s is not writable, and hb_info "
- # "was given." % file)
- self._hb_info = hb_info
- @property
- def title(self):
- return self._hb_info.title
- @property
- def key(self):
- return self._hb_info.key
- @property
- def type(self):
- return self._hb_info.mxtype.value_type
- @property
- def structure(self):
- return self._hb_info.mxtype.structure
- @property
- def storage(self):
- return self._hb_info.mxtype.storage
- def read_matrix(self):
- return _read_hb_data(self._fid, self._hb_info)
- def write_matrix(self, m):
- return _write_data(m, self._fid, self._hb_info)
- def hb_read(path_or_open_file):
- """Read HB-format file.
- Parameters
- ----------
- path_or_open_file : path-like or file-like
- If a file-like object, it is used as-is. Otherwise it is opened
- before reading.
- Returns
- -------
- data : scipy.sparse.csc_matrix instance
- The data read from the HB file as a sparse matrix.
- Notes
- -----
- At the moment not the full Harwell-Boeing format is supported. Supported
- features are:
- - assembled, non-symmetric, real matrices
- - integer for pointer/indices
- - exponential format for float values, and int format
- """
- def _get_matrix(fid):
- hb = HBFile(fid)
- return hb.read_matrix()
- if hasattr(path_or_open_file, 'read'):
- return _get_matrix(path_or_open_file)
- else:
- with open(path_or_open_file) as f:
- return _get_matrix(f)
- def hb_write(path_or_open_file, m, hb_info=None):
- """Write HB-format file.
- Parameters
- ----------
- path_or_open_file : path-like or file-like
- If a file-like object, it is used as-is. Otherwise it is opened
- before writing.
- m : sparse-matrix
- the sparse matrix to write
- hb_info : HBInfo
- contains the meta-data for write
- Returns
- -------
- None
- Notes
- -----
- At the moment not the full Harwell-Boeing format is supported. Supported
- features are:
- - assembled, non-symmetric, real matrices
- - integer for pointer/indices
- - exponential format for float values, and int format
- """
- m = m.tocsc(copy=False)
- if hb_info is None:
- hb_info = HBInfo.from_data(m)
- def _set_matrix(fid):
- hb = HBFile(fid, hb_info)
- return hb.write_matrix(m)
- if hasattr(path_or_open_file, 'write'):
- return _set_matrix(path_or_open_file)
- else:
- with open(path_or_open_file, 'w') as f:
- return _set_matrix(f)
|