wavfile.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. """
  2. Module to read / write wav files using numpy arrays
  3. Functions
  4. ---------
  5. `read`: Return the sample rate (in samples/sec) and data from a WAV file.
  6. `write`: Write a numpy array as a WAV file.
  7. """
  8. from __future__ import division, print_function, absolute_import
  9. import sys
  10. import numpy
  11. import struct
  12. import warnings
  13. __all__ = [
  14. 'WavFileWarning',
  15. 'read',
  16. 'write'
  17. ]
  18. class WavFileWarning(UserWarning):
  19. pass
  20. WAVE_FORMAT_PCM = 0x0001
  21. WAVE_FORMAT_IEEE_FLOAT = 0x0003
  22. WAVE_FORMAT_EXTENSIBLE = 0xfffe
  23. KNOWN_WAVE_FORMATS = (WAVE_FORMAT_PCM, WAVE_FORMAT_IEEE_FLOAT)
  24. # assumes file pointer is immediately
  25. # after the 'fmt ' id
  26. def _read_fmt_chunk(fid, is_big_endian):
  27. """
  28. Returns
  29. -------
  30. size : int
  31. size of format subchunk in bytes (minus 8 for "fmt " and itself)
  32. format_tag : int
  33. PCM, float, or compressed format
  34. channels : int
  35. number of channels
  36. fs : int
  37. sampling frequency in samples per second
  38. bytes_per_second : int
  39. overall byte rate for the file
  40. block_align : int
  41. bytes per sample, including all channels
  42. bit_depth : int
  43. bits per sample
  44. """
  45. if is_big_endian:
  46. fmt = '>'
  47. else:
  48. fmt = '<'
  49. size = res = struct.unpack(fmt+'I', fid.read(4))[0]
  50. bytes_read = 0
  51. if size < 16:
  52. raise ValueError("Binary structure of wave file is not compliant")
  53. res = struct.unpack(fmt+'HHIIHH', fid.read(16))
  54. bytes_read += 16
  55. format_tag, channels, fs, bytes_per_second, block_align, bit_depth = res
  56. if format_tag == WAVE_FORMAT_EXTENSIBLE and size >= (16+2):
  57. ext_chunk_size = struct.unpack(fmt+'H', fid.read(2))[0]
  58. bytes_read += 2
  59. if ext_chunk_size >= 22:
  60. extensible_chunk_data = fid.read(22)
  61. bytes_read += 22
  62. raw_guid = extensible_chunk_data[2+4:2+4+16]
  63. # GUID template {XXXXXXXX-0000-0010-8000-00AA00389B71} (RFC-2361)
  64. # MS GUID byte order: first three groups are native byte order,
  65. # rest is Big Endian
  66. if is_big_endian:
  67. tail = b'\x00\x00\x00\x10\x80\x00\x00\xAA\x00\x38\x9B\x71'
  68. else:
  69. tail = b'\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71'
  70. if raw_guid.endswith(tail):
  71. format_tag = struct.unpack(fmt+'I', raw_guid[:4])[0]
  72. else:
  73. raise ValueError("Binary structure of wave file is not compliant")
  74. if format_tag not in KNOWN_WAVE_FORMATS:
  75. raise ValueError("Unknown wave file format")
  76. # move file pointer to next chunk
  77. if size > (bytes_read):
  78. fid.read(size - bytes_read)
  79. return (size, format_tag, channels, fs, bytes_per_second, block_align,
  80. bit_depth)
  81. # assumes file pointer is immediately after the 'data' id
  82. def _read_data_chunk(fid, format_tag, channels, bit_depth, is_big_endian,
  83. mmap=False):
  84. if is_big_endian:
  85. fmt = '>I'
  86. else:
  87. fmt = '<I'
  88. # Size of the data subchunk in bytes
  89. size = struct.unpack(fmt, fid.read(4))[0]
  90. # Number of bytes per sample
  91. bytes_per_sample = bit_depth//8
  92. if bit_depth == 8:
  93. dtype = 'u1'
  94. else:
  95. if is_big_endian:
  96. dtype = '>'
  97. else:
  98. dtype = '<'
  99. if format_tag == WAVE_FORMAT_PCM:
  100. dtype += 'i%d' % bytes_per_sample
  101. else:
  102. dtype += 'f%d' % bytes_per_sample
  103. if not mmap:
  104. data = numpy.frombuffer(fid.read(size), dtype=dtype)
  105. else:
  106. start = fid.tell()
  107. data = numpy.memmap(fid, dtype=dtype, mode='c', offset=start,
  108. shape=(size//bytes_per_sample,))
  109. fid.seek(start + size)
  110. if channels > 1:
  111. data = data.reshape(-1, channels)
  112. return data
  113. def _skip_unknown_chunk(fid, is_big_endian):
  114. if is_big_endian:
  115. fmt = '>I'
  116. else:
  117. fmt = '<I'
  118. data = fid.read(4)
  119. # call unpack() and seek() only if we have really read data from file
  120. # otherwise empty read at the end of the file would trigger
  121. # unnecessary exception at unpack() call
  122. # in case data equals somehow to 0, there is no need for seek() anyway
  123. if data:
  124. size = struct.unpack(fmt, data)[0]
  125. fid.seek(size, 1)
  126. def _read_riff_chunk(fid):
  127. str1 = fid.read(4) # File signature
  128. if str1 == b'RIFF':
  129. is_big_endian = False
  130. fmt = '<I'
  131. elif str1 == b'RIFX':
  132. is_big_endian = True
  133. fmt = '>I'
  134. else:
  135. # There are also .wav files with "FFIR" or "XFIR" signatures?
  136. raise ValueError("File format {}... not "
  137. "understood.".format(repr(str1)))
  138. # Size of entire file
  139. file_size = struct.unpack(fmt, fid.read(4))[0] + 8
  140. str2 = fid.read(4)
  141. if str2 != b'WAVE':
  142. raise ValueError("Not a WAV file.")
  143. return file_size, is_big_endian
  144. def read(filename, mmap=False):
  145. """
  146. Open a WAV file
  147. Return the sample rate (in samples/sec) and data from a WAV file.
  148. Parameters
  149. ----------
  150. filename : string or open file handle
  151. Input wav file.
  152. mmap : bool, optional
  153. Whether to read data as memory-mapped.
  154. Only to be used on real files (Default: False).
  155. .. versionadded:: 0.12.0
  156. Returns
  157. -------
  158. rate : int
  159. Sample rate of wav file.
  160. data : numpy array
  161. Data read from wav file. Data-type is determined from the file;
  162. see Notes.
  163. Notes
  164. -----
  165. This function cannot read wav files with 24-bit data.
  166. Common data types: [1]_
  167. ===================== =========== =========== =============
  168. WAV format Min Max NumPy dtype
  169. ===================== =========== =========== =============
  170. 32-bit floating-point -1.0 +1.0 float32
  171. 32-bit PCM -2147483648 +2147483647 int32
  172. 16-bit PCM -32768 +32767 int16
  173. 8-bit PCM 0 255 uint8
  174. ===================== =========== =========== =============
  175. Note that 8-bit PCM is unsigned.
  176. References
  177. ----------
  178. .. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming
  179. Interface and Data Specifications 1.0", section "Data Format of the
  180. Samples", August 1991
  181. http://www.tactilemedia.com/info/MCI_Control_Info.html
  182. """
  183. if hasattr(filename, 'read'):
  184. fid = filename
  185. mmap = False
  186. else:
  187. fid = open(filename, 'rb')
  188. try:
  189. file_size, is_big_endian = _read_riff_chunk(fid)
  190. fmt_chunk_received = False
  191. channels = 1
  192. bit_depth = 8
  193. format_tag = WAVE_FORMAT_PCM
  194. while fid.tell() < file_size:
  195. # read the next chunk
  196. chunk_id = fid.read(4)
  197. if not chunk_id:
  198. raise ValueError("Unexpected end of file.")
  199. elif len(chunk_id) < 4:
  200. raise ValueError("Incomplete wav chunk.")
  201. if chunk_id == b'fmt ':
  202. fmt_chunk_received = True
  203. fmt_chunk = _read_fmt_chunk(fid, is_big_endian)
  204. format_tag, channels, fs = fmt_chunk[1:4]
  205. bit_depth = fmt_chunk[6]
  206. if bit_depth not in (8, 16, 32, 64, 96, 128):
  207. raise ValueError("Unsupported bit depth: the wav file "
  208. "has {}-bit data.".format(bit_depth))
  209. elif chunk_id == b'fact':
  210. _skip_unknown_chunk(fid, is_big_endian)
  211. elif chunk_id == b'data':
  212. if not fmt_chunk_received:
  213. raise ValueError("No fmt chunk before data")
  214. data = _read_data_chunk(fid, format_tag, channels, bit_depth,
  215. is_big_endian, mmap)
  216. elif chunk_id == b'LIST':
  217. # Someday this could be handled properly but for now skip it
  218. _skip_unknown_chunk(fid, is_big_endian)
  219. elif chunk_id in (b'JUNK', b'Fake'):
  220. # Skip alignment chunks without warning
  221. _skip_unknown_chunk(fid, is_big_endian)
  222. else:
  223. warnings.warn("Chunk (non-data) not understood, skipping it.",
  224. WavFileWarning)
  225. _skip_unknown_chunk(fid, is_big_endian)
  226. finally:
  227. if not hasattr(filename, 'read'):
  228. fid.close()
  229. else:
  230. fid.seek(0)
  231. return fs, data
  232. def write(filename, rate, data):
  233. """
  234. Write a numpy array as a WAV file.
  235. Parameters
  236. ----------
  237. filename : string or open file handle
  238. Output wav file.
  239. rate : int
  240. The sample rate (in samples/sec).
  241. data : ndarray
  242. A 1-D or 2-D numpy array of either integer or float data-type.
  243. Notes
  244. -----
  245. * Writes a simple uncompressed WAV file.
  246. * To write multiple-channels, use a 2-D array of shape
  247. (Nsamples, Nchannels).
  248. * The bits-per-sample and PCM/float will be determined by the data-type.
  249. Common data types: [1]_
  250. ===================== =========== =========== =============
  251. WAV format Min Max NumPy dtype
  252. ===================== =========== =========== =============
  253. 32-bit floating-point -1.0 +1.0 float32
  254. 32-bit PCM -2147483648 +2147483647 int32
  255. 16-bit PCM -32768 +32767 int16
  256. 8-bit PCM 0 255 uint8
  257. ===================== =========== =========== =============
  258. Note that 8-bit PCM is unsigned.
  259. References
  260. ----------
  261. .. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming
  262. Interface and Data Specifications 1.0", section "Data Format of the
  263. Samples", August 1991
  264. http://www.tactilemedia.com/info/MCI_Control_Info.html
  265. """
  266. if hasattr(filename, 'write'):
  267. fid = filename
  268. else:
  269. fid = open(filename, 'wb')
  270. fs = rate
  271. try:
  272. dkind = data.dtype.kind
  273. if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and
  274. data.dtype.itemsize == 1)):
  275. raise ValueError("Unsupported data type '%s'" % data.dtype)
  276. header_data = b''
  277. header_data += b'RIFF'
  278. header_data += b'\x00\x00\x00\x00'
  279. header_data += b'WAVE'
  280. # fmt chunk
  281. header_data += b'fmt '
  282. if dkind == 'f':
  283. format_tag = WAVE_FORMAT_IEEE_FLOAT
  284. else:
  285. format_tag = WAVE_FORMAT_PCM
  286. if data.ndim == 1:
  287. channels = 1
  288. else:
  289. channels = data.shape[1]
  290. bit_depth = data.dtype.itemsize * 8
  291. bytes_per_second = fs*(bit_depth // 8)*channels
  292. block_align = channels * (bit_depth // 8)
  293. fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs,
  294. bytes_per_second, block_align, bit_depth)
  295. if not (dkind == 'i' or dkind == 'u'):
  296. # add cbSize field for non-PCM files
  297. fmt_chunk_data += b'\x00\x00'
  298. header_data += struct.pack('<I', len(fmt_chunk_data))
  299. header_data += fmt_chunk_data
  300. # fact chunk (non-PCM files)
  301. if not (dkind == 'i' or dkind == 'u'):
  302. header_data += b'fact'
  303. header_data += struct.pack('<II', 4, data.shape[0])
  304. # check data size (needs to be immediately before the data chunk)
  305. if ((len(header_data)-4-4) + (4+4+data.nbytes)) > 0xFFFFFFFF:
  306. raise ValueError("Data exceeds wave file size limit")
  307. fid.write(header_data)
  308. # data chunk
  309. fid.write(b'data')
  310. fid.write(struct.pack('<I', data.nbytes))
  311. if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and
  312. sys.byteorder == 'big'):
  313. data = data.byteswap()
  314. _array_tofile(fid, data)
  315. # Determine file size and place it in correct
  316. # position at start of the file.
  317. size = fid.tell()
  318. fid.seek(4)
  319. fid.write(struct.pack('<I', size-8))
  320. finally:
  321. if not hasattr(filename, 'write'):
  322. fid.close()
  323. else:
  324. fid.seek(0)
  325. if sys.version_info[0] >= 3:
  326. def _array_tofile(fid, data):
  327. # ravel gives a c-contiguous buffer
  328. fid.write(data.ravel().view('b').data)
  329. else:
  330. def _array_tofile(fid, data):
  331. fid.write(data.tostring())