hb.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. """
  2. Implementation of Harwell-Boeing read/write.
  3. At the moment not the full Harwell-Boeing format is supported. Supported
  4. features are:
  5. - assembled, non-symmetric, real matrices
  6. - integer for pointer/indices
  7. - exponential format for float values, and int format
  8. """
  9. from __future__ import division, print_function, absolute_import
  10. # TODO:
  11. # - Add more support (symmetric/complex matrices, non-assembled matrices ?)
  12. # XXX: reading is reasonably efficient (>= 85 % is in numpy.fromstring), but
  13. # takes a lot of memory. Being faster would require compiled code.
  14. # write is not efficient. Although not a terribly exciting task,
  15. # having reusable facilities to efficiently read/write fortran-formatted files
  16. # would be useful outside this module.
  17. import warnings
  18. import numpy as np
  19. from scipy.sparse import csc_matrix
  20. from scipy.io.harwell_boeing._fortran_format_parser import \
  21. FortranFormatParser, IntFormat, ExpFormat
  22. __all__ = ["MalformedHeader", "hb_read", "hb_write", "HBInfo", "HBFile",
  23. "HBMatrixType"]
  24. class MalformedHeader(Exception):
  25. pass
  26. class LineOverflow(Warning):
  27. pass
  28. def _nbytes_full(fmt, nlines):
  29. """Return the number of bytes to read to get every full lines for the
  30. given parsed fortran format."""
  31. return (fmt.repeat * fmt.width + 1) * (nlines - 1)
  32. class HBInfo(object):
  33. @classmethod
  34. def from_data(cls, m, title="Default title", key="0", mxtype=None, fmt=None):
  35. """Create a HBInfo instance from an existing sparse matrix.
  36. Parameters
  37. ----------
  38. m : sparse matrix
  39. the HBInfo instance will derive its parameters from m
  40. title : str
  41. Title to put in the HB header
  42. key : str
  43. Key
  44. mxtype : HBMatrixType
  45. type of the input matrix
  46. fmt : dict
  47. not implemented
  48. Returns
  49. -------
  50. hb_info : HBInfo instance
  51. """
  52. m = m.tocsc(copy=False)
  53. pointer = m.indptr
  54. indices = m.indices
  55. values = m.data
  56. nrows, ncols = m.shape
  57. nnon_zeros = m.nnz
  58. if fmt is None:
  59. # +1 because HB use one-based indexing (Fortran), and we will write
  60. # the indices /pointer as such
  61. pointer_fmt = IntFormat.from_number(np.max(pointer+1))
  62. indices_fmt = IntFormat.from_number(np.max(indices+1))
  63. if values.dtype.kind in np.typecodes["AllFloat"]:
  64. values_fmt = ExpFormat.from_number(-np.max(np.abs(values)))
  65. elif values.dtype.kind in np.typecodes["AllInteger"]:
  66. values_fmt = IntFormat.from_number(-np.max(np.abs(values)))
  67. else:
  68. raise NotImplementedError("type %s not implemented yet" % values.dtype.kind)
  69. else:
  70. raise NotImplementedError("fmt argument not supported yet.")
  71. if mxtype is None:
  72. if not np.isrealobj(values):
  73. raise ValueError("Complex values not supported yet")
  74. if values.dtype.kind in np.typecodes["AllInteger"]:
  75. tp = "integer"
  76. elif values.dtype.kind in np.typecodes["AllFloat"]:
  77. tp = "real"
  78. else:
  79. raise NotImplementedError("type %s for values not implemented"
  80. % values.dtype)
  81. mxtype = HBMatrixType(tp, "unsymmetric", "assembled")
  82. else:
  83. raise ValueError("mxtype argument not handled yet.")
  84. def _nlines(fmt, size):
  85. nlines = size // fmt.repeat
  86. if nlines * fmt.repeat != size:
  87. nlines += 1
  88. return nlines
  89. pointer_nlines = _nlines(pointer_fmt, pointer.size)
  90. indices_nlines = _nlines(indices_fmt, indices.size)
  91. values_nlines = _nlines(values_fmt, values.size)
  92. total_nlines = pointer_nlines + indices_nlines + values_nlines
  93. return cls(title, key,
  94. total_nlines, pointer_nlines, indices_nlines, values_nlines,
  95. mxtype, nrows, ncols, nnon_zeros,
  96. pointer_fmt.fortran_format, indices_fmt.fortran_format,
  97. values_fmt.fortran_format)
  98. @classmethod
  99. def from_file(cls, fid):
  100. """Create a HBInfo instance from a file object containing a matrix in the
  101. HB format.
  102. Parameters
  103. ----------
  104. fid : file-like matrix
  105. File or file-like object containing a matrix in the HB format.
  106. Returns
  107. -------
  108. hb_info : HBInfo instance
  109. """
  110. # First line
  111. line = fid.readline().strip("\n")
  112. if not len(line) > 72:
  113. raise ValueError("Expected at least 72 characters for first line, "
  114. "got: \n%s" % line)
  115. title = line[:72]
  116. key = line[72:]
  117. # Second line
  118. line = fid.readline().strip("\n")
  119. if not len(line.rstrip()) >= 56:
  120. raise ValueError("Expected at least 56 characters for second line, "
  121. "got: \n%s" % line)
  122. total_nlines = _expect_int(line[:14])
  123. pointer_nlines = _expect_int(line[14:28])
  124. indices_nlines = _expect_int(line[28:42])
  125. values_nlines = _expect_int(line[42:56])
  126. rhs_nlines = line[56:72].strip()
  127. if rhs_nlines == '':
  128. rhs_nlines = 0
  129. else:
  130. rhs_nlines = _expect_int(rhs_nlines)
  131. if not rhs_nlines == 0:
  132. raise ValueError("Only files without right hand side supported for "
  133. "now.")
  134. # Third line
  135. line = fid.readline().strip("\n")
  136. if not len(line) >= 70:
  137. raise ValueError("Expected at least 72 character for third line, got:\n"
  138. "%s" % line)
  139. mxtype_s = line[:3].upper()
  140. if not len(mxtype_s) == 3:
  141. raise ValueError("mxtype expected to be 3 characters long")
  142. mxtype = HBMatrixType.from_fortran(mxtype_s)
  143. if mxtype.value_type not in ["real", "integer"]:
  144. raise ValueError("Only real or integer matrices supported for "
  145. "now (detected %s)" % mxtype)
  146. if not mxtype.structure == "unsymmetric":
  147. raise ValueError("Only unsymmetric matrices supported for "
  148. "now (detected %s)" % mxtype)
  149. if not mxtype.storage == "assembled":
  150. raise ValueError("Only assembled matrices supported for now")
  151. if not line[3:14] == " " * 11:
  152. raise ValueError("Malformed data for third line: %s" % line)
  153. nrows = _expect_int(line[14:28])
  154. ncols = _expect_int(line[28:42])
  155. nnon_zeros = _expect_int(line[42:56])
  156. nelementals = _expect_int(line[56:70])
  157. if not nelementals == 0:
  158. raise ValueError("Unexpected value %d for nltvl (last entry of line 3)"
  159. % nelementals)
  160. # Fourth line
  161. line = fid.readline().strip("\n")
  162. ct = line.split()
  163. if not len(ct) == 3:
  164. raise ValueError("Expected 3 formats, got %s" % ct)
  165. return cls(title, key,
  166. total_nlines, pointer_nlines, indices_nlines, values_nlines,
  167. mxtype, nrows, ncols, nnon_zeros,
  168. ct[0], ct[1], ct[2],
  169. rhs_nlines, nelementals)
  170. def __init__(self, title, key,
  171. total_nlines, pointer_nlines, indices_nlines, values_nlines,
  172. mxtype, nrows, ncols, nnon_zeros,
  173. pointer_format_str, indices_format_str, values_format_str,
  174. right_hand_sides_nlines=0, nelementals=0):
  175. """Do not use this directly, but the class ctrs (from_* functions)."""
  176. self.title = title
  177. self.key = key
  178. if title is None:
  179. title = "No Title"
  180. if len(title) > 72:
  181. raise ValueError("title cannot be > 72 characters")
  182. if key is None:
  183. key = "|No Key"
  184. if len(key) > 8:
  185. warnings.warn("key is > 8 characters (key is %s)" % key, LineOverflow)
  186. self.total_nlines = total_nlines
  187. self.pointer_nlines = pointer_nlines
  188. self.indices_nlines = indices_nlines
  189. self.values_nlines = values_nlines
  190. parser = FortranFormatParser()
  191. pointer_format = parser.parse(pointer_format_str)
  192. if not isinstance(pointer_format, IntFormat):
  193. raise ValueError("Expected int format for pointer format, got %s"
  194. % pointer_format)
  195. indices_format = parser.parse(indices_format_str)
  196. if not isinstance(indices_format, IntFormat):
  197. raise ValueError("Expected int format for indices format, got %s" %
  198. indices_format)
  199. values_format = parser.parse(values_format_str)
  200. if isinstance(values_format, ExpFormat):
  201. if mxtype.value_type not in ["real", "complex"]:
  202. raise ValueError("Inconsistency between matrix type %s and "
  203. "value type %s" % (mxtype, values_format))
  204. values_dtype = np.float64
  205. elif isinstance(values_format, IntFormat):
  206. if mxtype.value_type not in ["integer"]:
  207. raise ValueError("Inconsistency between matrix type %s and "
  208. "value type %s" % (mxtype, values_format))
  209. # XXX: fortran int -> dtype association ?
  210. values_dtype = int
  211. else:
  212. raise ValueError("Unsupported format for values %r" % (values_format,))
  213. self.pointer_format = pointer_format
  214. self.indices_format = indices_format
  215. self.values_format = values_format
  216. self.pointer_dtype = np.int32
  217. self.indices_dtype = np.int32
  218. self.values_dtype = values_dtype
  219. self.pointer_nlines = pointer_nlines
  220. self.pointer_nbytes_full = _nbytes_full(pointer_format, pointer_nlines)
  221. self.indices_nlines = indices_nlines
  222. self.indices_nbytes_full = _nbytes_full(indices_format, indices_nlines)
  223. self.values_nlines = values_nlines
  224. self.values_nbytes_full = _nbytes_full(values_format, values_nlines)
  225. self.nrows = nrows
  226. self.ncols = ncols
  227. self.nnon_zeros = nnon_zeros
  228. self.nelementals = nelementals
  229. self.mxtype = mxtype
  230. def dump(self):
  231. """Gives the header corresponding to this instance as a string."""
  232. header = [self.title.ljust(72) + self.key.ljust(8)]
  233. header.append("%14d%14d%14d%14d" %
  234. (self.total_nlines, self.pointer_nlines,
  235. self.indices_nlines, self.values_nlines))
  236. header.append("%14s%14d%14d%14d%14d" %
  237. (self.mxtype.fortran_format.ljust(14), self.nrows,
  238. self.ncols, self.nnon_zeros, 0))
  239. pffmt = self.pointer_format.fortran_format
  240. iffmt = self.indices_format.fortran_format
  241. vffmt = self.values_format.fortran_format
  242. header.append("%16s%16s%20s" %
  243. (pffmt.ljust(16), iffmt.ljust(16), vffmt.ljust(20)))
  244. return "\n".join(header)
  245. def _expect_int(value, msg=None):
  246. try:
  247. return int(value)
  248. except ValueError:
  249. if msg is None:
  250. msg = "Expected an int, got %s"
  251. raise ValueError(msg % value)
  252. def _read_hb_data(content, header):
  253. # XXX: look at a way to reduce memory here (big string creation)
  254. ptr_string = "".join([content.read(header.pointer_nbytes_full),
  255. content.readline()])
  256. ptr = np.fromstring(ptr_string,
  257. dtype=int, sep=' ')
  258. ind_string = "".join([content.read(header.indices_nbytes_full),
  259. content.readline()])
  260. ind = np.fromstring(ind_string,
  261. dtype=int, sep=' ')
  262. val_string = "".join([content.read(header.values_nbytes_full),
  263. content.readline()])
  264. val = np.fromstring(val_string,
  265. dtype=header.values_dtype, sep=' ')
  266. try:
  267. return csc_matrix((val, ind-1, ptr-1),
  268. shape=(header.nrows, header.ncols))
  269. except ValueError as e:
  270. raise e
  271. def _write_data(m, fid, header):
  272. m = m.tocsc(copy=False)
  273. def write_array(f, ar, nlines, fmt):
  274. # ar_nlines is the number of full lines, n is the number of items per
  275. # line, ffmt the fortran format
  276. pyfmt = fmt.python_format
  277. pyfmt_full = pyfmt * fmt.repeat
  278. # for each array to write, we first write the full lines, and special
  279. # case for partial line
  280. full = ar[:(nlines - 1) * fmt.repeat]
  281. for row in full.reshape((nlines-1, fmt.repeat)):
  282. f.write(pyfmt_full % tuple(row) + "\n")
  283. nremain = ar.size - full.size
  284. if nremain > 0:
  285. f.write((pyfmt * nremain) % tuple(ar[ar.size - nremain:]) + "\n")
  286. fid.write(header.dump())
  287. fid.write("\n")
  288. # +1 is for fortran one-based indexing
  289. write_array(fid, m.indptr+1, header.pointer_nlines,
  290. header.pointer_format)
  291. write_array(fid, m.indices+1, header.indices_nlines,
  292. header.indices_format)
  293. write_array(fid, m.data, header.values_nlines,
  294. header.values_format)
  295. class HBMatrixType(object):
  296. """Class to hold the matrix type."""
  297. # q2f* translates qualified names to fortran character
  298. _q2f_type = {
  299. "real": "R",
  300. "complex": "C",
  301. "pattern": "P",
  302. "integer": "I",
  303. }
  304. _q2f_structure = {
  305. "symmetric": "S",
  306. "unsymmetric": "U",
  307. "hermitian": "H",
  308. "skewsymmetric": "Z",
  309. "rectangular": "R"
  310. }
  311. _q2f_storage = {
  312. "assembled": "A",
  313. "elemental": "E",
  314. }
  315. _f2q_type = dict([(j, i) for i, j in _q2f_type.items()])
  316. _f2q_structure = dict([(j, i) for i, j in _q2f_structure.items()])
  317. _f2q_storage = dict([(j, i) for i, j in _q2f_storage.items()])
  318. @classmethod
  319. def from_fortran(cls, fmt):
  320. if not len(fmt) == 3:
  321. raise ValueError("Fortran format for matrix type should be 3 "
  322. "characters long")
  323. try:
  324. value_type = cls._f2q_type[fmt[0]]
  325. structure = cls._f2q_structure[fmt[1]]
  326. storage = cls._f2q_storage[fmt[2]]
  327. return cls(value_type, structure, storage)
  328. except KeyError:
  329. raise ValueError("Unrecognized format %s" % fmt)
  330. def __init__(self, value_type, structure, storage="assembled"):
  331. self.value_type = value_type
  332. self.structure = structure
  333. self.storage = storage
  334. if value_type not in self._q2f_type:
  335. raise ValueError("Unrecognized type %s" % value_type)
  336. if structure not in self._q2f_structure:
  337. raise ValueError("Unrecognized structure %s" % structure)
  338. if storage not in self._q2f_storage:
  339. raise ValueError("Unrecognized storage %s" % storage)
  340. @property
  341. def fortran_format(self):
  342. return self._q2f_type[self.value_type] + \
  343. self._q2f_structure[self.structure] + \
  344. self._q2f_storage[self.storage]
  345. def __repr__(self):
  346. return "HBMatrixType(%s, %s, %s)" % \
  347. (self.value_type, self.structure, self.storage)
  348. class HBFile(object):
  349. def __init__(self, file, hb_info=None):
  350. """Create a HBFile instance.
  351. Parameters
  352. ----------
  353. file : file-object
  354. StringIO work as well
  355. hb_info : HBInfo, optional
  356. Should be given as an argument for writing, in which case the file
  357. should be writable.
  358. """
  359. self._fid = file
  360. if hb_info is None:
  361. self._hb_info = HBInfo.from_file(file)
  362. else:
  363. #raise IOError("file %s is not writable, and hb_info "
  364. # "was given." % file)
  365. self._hb_info = hb_info
  366. @property
  367. def title(self):
  368. return self._hb_info.title
  369. @property
  370. def key(self):
  371. return self._hb_info.key
  372. @property
  373. def type(self):
  374. return self._hb_info.mxtype.value_type
  375. @property
  376. def structure(self):
  377. return self._hb_info.mxtype.structure
  378. @property
  379. def storage(self):
  380. return self._hb_info.mxtype.storage
  381. def read_matrix(self):
  382. return _read_hb_data(self._fid, self._hb_info)
  383. def write_matrix(self, m):
  384. return _write_data(m, self._fid, self._hb_info)
  385. def hb_read(path_or_open_file):
  386. """Read HB-format file.
  387. Parameters
  388. ----------
  389. path_or_open_file : path-like or file-like
  390. If a file-like object, it is used as-is. Otherwise it is opened
  391. before reading.
  392. Returns
  393. -------
  394. data : scipy.sparse.csc_matrix instance
  395. The data read from the HB file as a sparse matrix.
  396. Notes
  397. -----
  398. At the moment not the full Harwell-Boeing format is supported. Supported
  399. features are:
  400. - assembled, non-symmetric, real matrices
  401. - integer for pointer/indices
  402. - exponential format for float values, and int format
  403. """
  404. def _get_matrix(fid):
  405. hb = HBFile(fid)
  406. return hb.read_matrix()
  407. if hasattr(path_or_open_file, 'read'):
  408. return _get_matrix(path_or_open_file)
  409. else:
  410. with open(path_or_open_file) as f:
  411. return _get_matrix(f)
  412. def hb_write(path_or_open_file, m, hb_info=None):
  413. """Write HB-format file.
  414. Parameters
  415. ----------
  416. path_or_open_file : path-like or file-like
  417. If a file-like object, it is used as-is. Otherwise it is opened
  418. before writing.
  419. m : sparse-matrix
  420. the sparse matrix to write
  421. hb_info : HBInfo
  422. contains the meta-data for write
  423. Returns
  424. -------
  425. None
  426. Notes
  427. -----
  428. At the moment not the full Harwell-Boeing format is supported. Supported
  429. features are:
  430. - assembled, non-symmetric, real matrices
  431. - integer for pointer/indices
  432. - exponential format for float values, and int format
  433. """
  434. m = m.tocsc(copy=False)
  435. if hb_info is None:
  436. hb_info = HBInfo.from_data(m)
  437. def _set_matrix(fid):
  438. hb = HBFile(fid, hb_info)
  439. return hb.write_matrix(m)
  440. if hasattr(path_or_open_file, 'write'):
  441. return _set_matrix(path_or_open_file)
  442. else:
  443. with open(path_or_open_file, 'w') as f:
  444. return _set_matrix(f)