From 1f22833dd4dd7ac746d62848dd30ccb32453efef Mon Sep 17 00:00:00 2001 From: Matthias Geier Date: Sun, 20 Apr 2014 12:51:11 +0200 Subject: [PATCH 01/24] Add open(), read() and write() functions This is the combination of a few commits from #18. See also #14. --- pysoundfile.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/pysoundfile.py b/pysoundfile.py index 088e960..3bbb1b0 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -663,6 +663,75 @@ def write(self, data): return written +def open(*args, **kwargs): + """Return a new SoundFile object. + + Takes the same arguments as SoundFile.__init__(). + + """ + return SoundFile(*args, **kwargs) + + +def read(file, frames=-1, start=None, stop=None, **kwargs): + """Read a sound file and return its contents as NumPy array. + + The number of frames to read can be specified with frames, the + position to start reading can be specified with start. + By default, the whole file is read from the beginning. + Alternatively, a range can be specified with start and stop. + Both start and stop accept negative indices to specify positions + relative to the end of the file. + + The returned data type can be specified with dtype. See the + documentation of SoundFile.read() for details. + + All further arguments are forwarded to SoundFile.__init__(). + + """ + if frames is not None and stop is not None: + raise RuntimeError("Only one of (frames, stop) may be used") + read_kwargs = {} + if 'dtype' in kwargs: + read_kwargs['dtype'] = kwargs.pop('dtype') + with SoundFile(file, 'r', **kwargs) as f: + start, stop, _ = slice(start, stop).indices(f.frames) + if frames is None: + frames = max(0, stop - start) + f.seek(start, SEEK_SET) + data = f.read(frames, **read_kwargs) + return data, f.sample_rate + + +def write(data, file, sample_rate, *args, **kwargs): + """Write data from a NumPy array into a sound file. + + If file exists, it will be overwritten! + + The number of channels is obtained from data, all further arguments + are forwarded to SoundFile.__init__(). See its documentation for + details. + + Example usage: + + import pysoundfile as sf + sf.write(myarray, 'myfile.wav', 44100, 'PCM_24') + + """ + data = _np.asarray(data) + if data.ndim == 1: + channels = 1 + elif data.ndim == 2: + channels = data.shape[1] + else: + raise RuntimeError("Only one- and two-dimensional arrays are allowed") + frames = data.shape[0] + with SoundFile(file, 'w', sample_rate, channels, *args, **kwargs) as f: + written = f.write(data) + if frames != written: + raise RuntimeError("Only %d of %d frames were written" % (written, + frames)) + + def default_subtype(format): """Return default subtype for given format.""" return _default_subtypes.get(str(format).upper()) From 4258760fd945df609c59c20ad9e6e0164f9ca831 Mon Sep 17 00:00:00 2001 From: Matthias Geier Date: Sun, 23 Mar 2014 22:32:24 +0100 Subject: [PATCH 02/24] read()/write() overhaul Replace ffi.new() with np.empty() and np.ascontiguousarray() Remove dicts for readers/writers read(): * reserve only as much memory as needed (if 'frames' is too large) * check return value of sf_readf_*() write(): * avoid copy if data has already the correct memory layout * check return value of sf_writef_*() * don't return the number of written frames! This is for symmetry with read() and it's redundant information anyway As the written frames are now checked in the write() method, the same check was removed from the write() *function*. --- pysoundfile.py | 97 ++++++++++++++++++++++++++------------------------ 1 file changed, 51 insertions(+), 46 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index 3bbb1b0..cf5ca9f 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -246,6 +246,13 @@ 'RF64': 'PCM_16', } +_ffi_types = { + _np.dtype('float64'): 'double', + _np.dtype('float32'): 'float', + _np.dtype('int32'): 'int', + _np.dtype('int16'): 'short' +} + _snd = _ffi.dlopen('sndfile') @@ -596,30 +603,32 @@ def read(self, frames=-1, dtype='float64'): self._check_if_closed() if self.mode == 'w': raise RuntimeError("Cannot read from file opened in write mode") - formats = { - _np.float64: 'double[]', - _np.float32: 'float[]', - _np.int32: 'int[]', - _np.int16: 'short[]' - } - readers = { - _np.float64: _snd.sf_readf_double, - _np.float32: _snd.sf_readf_float, - _np.int32: _snd.sf_readf_int, - _np.int16: _snd.sf_readf_short - } + dtype = _np.dtype(dtype) - if dtype.type not in formats: - raise ValueError("Can only read int16, int32, float32 and float64") - if frames < 0: - curr = self.seek(0, SEEK_CUR, 'r') + try: + ffi_type = _ffi_types[dtype] + except KeyError: + raise ValueError("dtype must be one of %s" % + repr([dt.name for dt in _ffi_types])) + + curr = self.seek(0, SEEK_CUR, 'r') + if frames < 0 or curr + frames > self.frames: frames = self.frames - curr - data = _ffi.new(formats[dtype.type], frames*self.channels) - read = readers[dtype.type](self._file, data, frames) + + data = _np.empty((frames, self.channels), dtype=dtype, order='C') + + assert data.flags['C_CONTIGUOUS'] + assert data.dtype.itemsize == _ffi.sizeof(ffi_type) + + reader = getattr(_snd, 'sf_readf_' + ffi_type) + ptr = _ffi.cast(ffi_type + '*', data.ctypes.data) + read = reader(self._file, ptr, frames) self._handle_error() - np_data = _np.frombuffer(_ffi.buffer(data), dtype=dtype, - count=read*self.channels) - return _np.reshape(np_data, (read, self.channels)) + + if frames != read: + raise RuntimeError("Only %d of %d frames were read" % + (read, frames)) + return data def write(self, data): """Write a number of frames to the file. @@ -635,32 +644,32 @@ def write(self, data): self._check_if_closed() if self.mode == 'r': raise RuntimeError("Cannot write to file opened in read mode") - formats = { - _np.float64: 'double*', - _np.float32: 'float*', - _np.int32: 'int*', - _np.int16: 'short*' - } - writers = { - _np.float64: _snd.sf_writef_double, - _np.float32: _snd.sf_writef_float, - _np.int32: _snd.sf_writef_int, - _np.int16: _snd.sf_writef_short - } - if data.dtype.type not in writers: - raise ValueError("Data must be int16, int32, float32 or float64") - raw_data = _ffi.new('char[]', data.flatten().tostring()) - written = writers[data.dtype.type](self._file, - _ffi.cast( - formats[data.dtype.type], raw_data), - len(data)) + + # no copy is made if data has already the correct memory layout: + data = _np.ascontiguousarray(data) + + try: + ffi_type = _ffi_types[data.dtype] + except KeyError: + raise ValueError("data.dtype must be one of %s" % + repr([dt.name for dt in _ffi_types])) + + assert data.flags['C_CONTIGUOUS'] + assert data.dtype.itemsize == _ffi.sizeof(ffi_type) + + frames = len(data) + writer = getattr(_snd, 'sf_writef_' + ffi_type) + ptr = _ffi.cast(ffi_type + '*', data.ctypes.data) + written = writer(self._file, ptr, frames) self._handle_error() curr = self.seek(0, SEEK_CUR, 'w') self._info.frames = self.seek(0, SEEK_END, 'w') self.seek(curr, SEEK_SET, 'w') - return written + if frames != written: + raise RuntimeError("Only %d of %d frames were written" % + (written, frames)) def open(*args, **kwargs): @@ -724,12 +733,8 @@ def write(data, file, sample_rate, *args, **kwargs): channels = data.shape[1] else: raise RuntimeError("Only one- and two-dimensional arrays are allowed") - frames = data.shape[0] with SoundFile(file, 'w', sample_rate, channels, *args, **kwargs) as f: - written = f.write(data) - if frames != written: - raise RuntimeError("Only %d of %d frames were written" % (written, - frames)) + f.write(data) def default_subtype(format): From 202fe2b73e3fe014e1fd1e742785abe2748c3317 Mon Sep 17 00:00:00 2001 From: Matthias Geier Date: Mon, 21 Apr 2014 14:08:36 +0200 Subject: [PATCH 03/24] Add arguments channels_first, always_2d, out and fill_value See also #16 --- pysoundfile.py | 146 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 104 insertions(+), 42 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index cf5ca9f..d3b66c3 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -586,7 +586,21 @@ def seek(self, frames, whence=SEEK_SET, which=None): raise ValueError("Invalid which: %s" % repr(which)) return _snd.sf_seek(self._file, frames, whence) - def read(self, frames=-1, dtype='float64'): + def _create_out_array(self, frames, dtype, channels_first, always_2d): + # Helper function for read() + if channels_first: + order = 'C' + if self.channels == 1 and not always_2d: + shape = frames, + else: + shape = frames, self.channels + else: + order = 'F' + shape = self.channels, frames + return _np.empty(shape, dtype, order) + + def read(self, frames=-1, dtype='float64', channels_first=True, + always_2d=True, out=None, fill_value=None): """Read a number of frames from the file. Reads the given number of frames in the given data format from @@ -594,59 +608,100 @@ def read(self, frames=-1, dtype='float64'): position by the same number of frames. Use frames=-1 to read until the end of the file. - Returns the read data as a (frames x channels) NumPy array. + By default, a two-dimensional NumPy array is returned, where the + channels are stored along the first dimension, i.e. as columns. + Use channels_first=False to store the channels along the second + dimension, i.e. as rows. A two-dimensional array is returned + even if the sound file has only one channel. Use + always_2d=False to return a one-dimensional array in this case. - If there is not enough data left in the file to read, a - smaller NumPy array will be returned. + If out is specified, the data is written into the given NumPy + array. In this case, the arguments frames, dtype and always_2d + are silently ignored! + + If there is less data left in the file than requested, the rest + of the frames are filled with fill_value. If fill_value=None, a + smaller array is returned. + Note: If out is given, fill_value cannot be None! """ self._check_if_closed() if self.mode == 'w': raise RuntimeError("Cannot read from file opened in write mode") - dtype = _np.dtype(dtype) + if out is not None: + if fill_value is None: + raise ValueError( + "If out is given, fill_value cannot be None") + dtype = out.dtype + frames = out.shape[not channels_first] + else: + dtype = _np.dtype(dtype) + try: ffi_type = _ffi_types[dtype] except KeyError: raise ValueError("dtype must be one of %s" % repr([dt.name for dt in _ffi_types])) - curr = self.seek(0, SEEK_CUR, 'r') - if frames < 0 or curr + frames > self.frames: - frames = self.frames - curr + max_frames = self.frames - self.seek(0, SEEK_CUR, 'r') + if out is None: + if frames < 0 or fill_value is None and frames > max_frames: + frames = max_frames + out = self._create_out_array(frames, dtype, + channels_first, always_2d) + else: + if out.size / frames != self.channels: + raise ValueError("Invalid out.shape: %s" % repr(out.shape)) - data = _np.empty((frames, self.channels), dtype=dtype, order='C') + if channels_first and not out.flags.c_contiguous: + raise ValueError( + "out must be C-contiguous for channels_first=True") + if not channels_first and not out.flags.f_contiguous: + raise ValueError( + "out must be Fortran-contiguous for channels_first=False") - assert data.flags['C_CONTIGUOUS'] - assert data.dtype.itemsize == _ffi.sizeof(ffi_type) + assert out.dtype.itemsize == _ffi.sizeof(ffi_type) reader = getattr(_snd, 'sf_readf_' + ffi_type) - ptr = _ffi.cast(ffi_type + '*', data.ctypes.data) + ptr = _ffi.cast(ffi_type + '*', out.ctypes.data) read = reader(self._file, ptr, frames) self._handle_error() - if frames != read: - raise RuntimeError("Only %d of %d frames were read" % - (read, frames)) - return data + idx = [Ellipsis, Ellipsis] + idx[not channels_first] = slice(read, None) + out[idx] = fill_value + + return out - def write(self, data): + def write(self, data, channels_first=True): """Write a number of frames to the file. - Writes a number of frames to the current read position in the - file. This also advances the read position by the same number + Writes a number of frames to the current write position in the + file. This also advances the write position by the same number of frames and enlarges the file if necessary. The data must be provided as a (frames x channels) NumPy - array. + array or as one-dimensional array for mono signals. + Use channels_first=False if you want to provide a (channels x + frames) array. """ self._check_if_closed() if self.mode == 'r': raise RuntimeError("Cannot write to file opened in read mode") - # no copy is made if data has already the correct memory layout: - data = _np.ascontiguousarray(data) + if channels_first: + # no copy is made if data has already the correct memory layout: + data = _np.ascontiguousarray(data) + if data.ndim not in (1, 2): + raise ValueError("data must be one- or two-dimensional") + else: + # this shouldn't make a copy either if already in Fortran order: + data = _np.asfortranarray(data) + if data.ndim != 2: + raise ValueError( + "data.ndim must be 2 for channels_first=False") try: ffi_type = _ffi_types[data.dtype] @@ -654,10 +709,17 @@ def write(self, data): raise ValueError("data.dtype must be one of %s" % repr([dt.name for dt in _ffi_types])) - assert data.flags['C_CONTIGUOUS'] + frames = data.shape[not channels_first] + channels = data.size / frames + + if channels != self.channels: + raise ValueError( + "Wrong number of channels (%d expected, %d given)" % + (self.channels, channels)) + + assert data.flags[('C_CONTIGUOUS', 'F_CONTIGUOUS')[not channels_first]] assert data.dtype.itemsize == _ffi.sizeof(ffi_type) - frames = len(data) writer = getattr(_snd, 'sf_writef_' + ffi_type) ptr = _ffi.cast(ffi_type + '*', data.ctypes.data) written = writer(self._file, ptr, frames) @@ -691,21 +753,22 @@ def read(file, frames=-1, start=None, stop=None, **kwargs): Both start and stop accept negative indices to specify positions relative to the end of the file. - The returned data type can be specified with dtype. See the - documentation of SoundFile.read() for details. - + The keyword arguments out, dtype, fill_value, channels_first and + always_2d are forwarded to SoundFile.read(). All further arguments are forwarded to SoundFile.__init__(). """ - if frames is not None and stop is not None: + from inspect import getargspec + + if frames >= 0 and stop is not None: raise RuntimeError("Only one of (frames, stop) may be used") + read_kwargs = {} - if 'dtype' in kwargs: - read_kwargs['dtype'] = kwargs.pop('dtype') + for arg in getargspec(SoundFile.read).args: + if arg in kwargs: + read_kwargs[arg] = kwargs.pop(arg) with SoundFile(file, 'r', **kwargs) as f: start, stop, _ = slice(start, stop).indices(f.frames) - if frames is None: - frames = max(0, stop - start) f.seek(start, SEEK_SET) data = f.read(frames, **read_kwargs) return data, f.sample_rate @@ -716,9 +779,10 @@ def write(data, file, sample_rate, *args, **kwargs): If file exists, it will be overwritten! - The number of channels is obtained from data, all further arguments - are forwarded to SoundFile.__init__(). See its documentation for - details. + If data is one-dimensional, a mono file is written. + For two-dimensional data, the columns are interpreted as channels by + default. Use channels_first=False to interpret the rows as channels. + All further arguments are forwarded to SoundFile.__init__(). Example usage: @@ -727,14 +791,12 @@ def write(data, file, sample_rate, *args, **kwargs): """ data = _np.asarray(data) - if data.ndim == 1: - channels = 1 - elif data.ndim == 2: - channels = data.shape[1] - else: - raise RuntimeError("Only one- and two-dimensional arrays are allowed") + if data.ndim not in (1, 2): + raise ValueError("data must be one- or two-dimensional") + channels_first = kwargs.pop('channels_first', True) + channels = data.size / data.shape[not channels_first] with SoundFile(file, 'w', sample_rate, channels, *args, **kwargs) as f: - f.write(data) + f.write(data, channels_first=channels_first) def default_subtype(format): From 5d7236cd79c114e91a1974a08ec23e894cdae3e8 Mon Sep 17 00:00:00 2001 From: Matthias Geier Date: Thu, 24 Apr 2014 22:41:00 +0200 Subject: [PATCH 04/24] Change handling of out argument If less frames are left in the file than would fit into out and fill_value=None, a smaller view into out is returned that contains only the valid frames. --- pysoundfile.py | 70 ++++++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 31 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index d3b66c3..25e9a1d 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -622,55 +622,66 @@ def read(self, frames=-1, dtype='float64', channels_first=True, If there is less data left in the file than requested, the rest of the frames are filled with fill_value. If fill_value=None, a smaller array is returned. - Note: If out is given, fill_value cannot be None! + If out is given, only a part of it is overwritten and a view + containing all valid frames is returned. """ self._check_if_closed() if self.mode == 'w': raise RuntimeError("Cannot read from file opened in write mode") - if out is not None: - if fill_value is None: - raise ValueError( - "If out is given, fill_value cannot be None") - dtype = out.dtype - frames = out.shape[not channels_first] - else: - dtype = _np.dtype(dtype) - + dtype = _np.dtype(dtype) if out is None else out.dtype try: ffi_type = _ffi_types[dtype] except KeyError: raise ValueError("dtype must be one of %s" % repr([dt.name for dt in _ffi_types])) + if out is not None: + if out.ndim not in (1, 2): + raise ValueError("out must be one- or two-dimensional") + if channels_first and not out.flags.c_contiguous: + raise ValueError( + "out must be C-contiguous for channels_first=True") + if not channels_first and not out.flags.f_contiguous: + raise ValueError( + "out must be Fortran-contiguous for channels_first=False") + frames = out.shape[not channels_first] + if frames and out.size / frames != self.channels: + raise ValueError("Invalid out.shape: %s" % repr(out.shape)) + max_frames = self.frames - self.seek(0, SEEK_CUR, 'r') + if frames < 0: + frames = max_frames + valid_frames = frames + if frames > max_frames: + valid_frames = max_frames + if out is None: - if frames < 0 or fill_value is None and frames > max_frames: - frames = max_frames out = self._create_out_array(frames, dtype, channels_first, always_2d) - else: - if out.size / frames != self.channels: - raise ValueError("Invalid out.shape: %s" % repr(out.shape)) - - if channels_first and not out.flags.c_contiguous: - raise ValueError( - "out must be C-contiguous for channels_first=True") - if not channels_first and not out.flags.f_contiguous: - raise ValueError( - "out must be Fortran-contiguous for channels_first=False") assert out.dtype.itemsize == _ffi.sizeof(ffi_type) reader = getattr(_snd, 'sf_readf_' + ffi_type) ptr = _ffi.cast(ffi_type + '*', out.ctypes.data) - read = reader(self._file, ptr, frames) + read = reader(self._file, ptr, valid_frames) self._handle_error() + assert read == valid_frames - idx = [Ellipsis, Ellipsis] - idx[not channels_first] = slice(read, None) - out[idx] = fill_value + if frames > valid_frames: + def multichannel_slice(start, stop): + """Return a slice of frames, considering channels_first""" + if channels_first: + idx = slice(start, stop) + else: + idx = Ellipsis, slice(start, stop) + return idx + + if fill_value is None: + out = out[multichannel_slice(None, valid_frames)] + else: + out[multichannel_slice(valid_frames, None)] = fill_value return out @@ -717,22 +728,19 @@ def write(self, data, channels_first=True): "Wrong number of channels (%d expected, %d given)" % (self.channels, channels)) - assert data.flags[('C_CONTIGUOUS', 'F_CONTIGUOUS')[not channels_first]] + assert data.flags['C_CONTIGUOUS' if channels_first else 'F_CONTIGUOUS'] assert data.dtype.itemsize == _ffi.sizeof(ffi_type) writer = getattr(_snd, 'sf_writef_' + ffi_type) ptr = _ffi.cast(ffi_type + '*', data.ctypes.data) written = writer(self._file, ptr, frames) self._handle_error() + assert written == frames curr = self.seek(0, SEEK_CUR, 'w') self._info.frames = self.seek(0, SEEK_END, 'w') self.seek(curr, SEEK_SET, 'w') - if frames != written: - raise RuntimeError("Only %d of %d frames were written" % - (written, frames)) - def open(*args, **kwargs): """Return a new SoundFile object. From f64da1192f1dfb48a52f9a72a470aca0be6b5bb0 Mon Sep 17 00:00:00 2001 From: Matthias Geier Date: Sun, 27 Apr 2014 13:15:22 +0200 Subject: [PATCH 05/24] Add helper function _check_frames_and_channels() --- pysoundfile.py | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index 25e9a1d..6a8f874 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -638,17 +638,14 @@ def read(self, frames=-1, dtype='float64', channels_first=True, repr([dt.name for dt in _ffi_types])) if out is not None: - if out.ndim not in (1, 2): - raise ValueError("out must be one- or two-dimensional") + frames = _check_frames_and_channels( + out, "out", channels_first, channels=self.channels) if channels_first and not out.flags.c_contiguous: raise ValueError( "out must be C-contiguous for channels_first=True") if not channels_first and not out.flags.f_contiguous: raise ValueError( "out must be Fortran-contiguous for channels_first=False") - frames = out.shape[not channels_first] - if frames and out.size / frames != self.channels: - raise ValueError("Invalid out.shape: %s" % repr(out.shape)) max_frames = self.frames - self.seek(0, SEEK_CUR, 'r') if frames < 0: @@ -705,8 +702,6 @@ def write(self, data, channels_first=True): if channels_first: # no copy is made if data has already the correct memory layout: data = _np.ascontiguousarray(data) - if data.ndim not in (1, 2): - raise ValueError("data must be one- or two-dimensional") else: # this shouldn't make a copy either if already in Fortran order: data = _np.asfortranarray(data) @@ -720,13 +715,8 @@ def write(self, data, channels_first=True): raise ValueError("data.dtype must be one of %s" % repr([dt.name for dt in _ffi_types])) - frames = data.shape[not channels_first] - channels = data.size / frames - - if channels != self.channels: - raise ValueError( - "Wrong number of channels (%d expected, %d given)" % - (self.channels, channels)) + frames = _check_frames_and_channels( + data, "data", channels_first, channels=self.channels) assert data.flags['C_CONTIGUOUS' if channels_first else 'F_CONTIGUOUS'] assert data.dtype.itemsize == _ffi.sizeof(ffi_type) @@ -742,6 +732,28 @@ def write(self, data, channels_first=True): self.seek(curr, SEEK_SET, 'w') +def _check_frames_and_channels(array, name, channels_first, channels=None): + # Return frames and channels for a given array. If channels is given (and + # if the number of channels matches), return only frames. + if array.ndim not in (1, 2): + raise ValueError("%s must be one- or two-dimensional" % repr(name)) + frames = array.shape[not channels_first] + if frames == 0: + raise ValueError("%s is empty" % repr(name)) + + expected_channels = channels + channels = array.size / frames + + if expected_channels is None: + return frames, channels + elif channels == expected_channels: + return frames + else: + raise ValueError( + "Wrong number of channels in %s: %d (instead of %d)" % + (repr(name), channels, expected_channels)) + + def open(*args, **kwargs): """Return a new SoundFile object. @@ -799,10 +811,8 @@ def write(data, file, sample_rate, *args, **kwargs): """ data = _np.asarray(data) - if data.ndim not in (1, 2): - raise ValueError("data must be one- or two-dimensional") channels_first = kwargs.pop('channels_first', True) - channels = data.size / data.shape[not channels_first] + _, channels = _check_frames_and_channels(data, "data", channels_first) with SoundFile(file, 'w', sample_rate, channels, *args, **kwargs) as f: f.write(data, channels_first=channels_first) From ace993615f82380449822fcb7d979825ddf68929 Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Mon, 28 Apr 2014 10:47:55 +0200 Subject: [PATCH 06/24] refactored tests for improved clarity - pysoundfile is not imported into the global namespace any longer, since that would overwrite `open`. - Split the test class into subclasses --- tests/test_pysoundfile.py | 124 ++++++++++++++++++++------------------ 1 file changed, 66 insertions(+), 58 deletions(-) diff --git a/tests/test_pysoundfile.py b/tests/test_pysoundfile.py index 93afc1a..836ce34 100644 --- a/tests/test_pysoundfile.py +++ b/tests/test_pysoundfile.py @@ -1,5 +1,5 @@ import unittest -from pysoundfile import * +import pysoundfile as sf import numpy as np import os import io @@ -11,12 +11,14 @@ def setUp(self): self.channels = 2 self.filename = 'test.wav' self.data = np.ones((self.sample_rate, self.channels))*0.5 - with SoundFile(self.filename, 'w', self.sample_rate, self.channels) as f: + with sf.SoundFile(self.filename, 'w', self.sample_rate, self.channels) as f: f.write(self.data) def tearDown(self): os.remove(self.filename) + +class TestBasicAttributesOfWaveFile(TestWaveFile): def test_file_exists(self): """The test file should exist""" self.assertTrue(os.path.isfile(self.filename)) @@ -24,46 +26,46 @@ def test_file_exists(self): def test_open_file_descriptor(self): """Opening a file handle should work""" handle = os.open(self.filename, os.O_RDONLY) - with SoundFile(handle) as f: + with sf.SoundFile(handle) as f: self.assertTrue(np.all(self.data == f[:])) def test_open_virtual_io(self): """Opening a file-like object should work""" with open(self.filename, 'rb') as bytesio: - with SoundFile(bytesio) as f: + with sf.SoundFile(bytesio) as f: self.assertTrue(np.all(self.data == f[:])) def test_read_mode(self): """Opening the file in read mode should open in read mode from beginning""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: self.assertEqual(f.mode, 'r') - self.assertEqual(f.seek(0, SEEK_CUR), 0) + self.assertEqual(f.seek(0, sf.SEEK_CUR), 0) def test_write_mode(self): """Opening the file in write mode should open in write mode from beginning""" - with SoundFile(self.filename, 'w', self.sample_rate, self.channels) as f: + with sf.SoundFile(self.filename, 'w', self.sample_rate, self.channels) as f: self.assertEqual(f.mode, 'w') - self.assertEqual(f.seek(0, SEEK_CUR), 0) + self.assertEqual(f.seek(0, sf.SEEK_CUR), 0) def test_rw_mode(self): """Opening the file in rw mode should open in rw mode from end""" - with SoundFile(self.filename, 'rw') as f: + with sf.SoundFile(self.filename, 'rw') as f: self.assertEqual(f.mode, 'rw') - self.assertEqual(f.seek(0, SEEK_CUR), len(f)) + self.assertEqual(f.seek(0, sf.SEEK_CUR), len(f)) def test_channels(self): """The test file should have the correct number of channels""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: self.assertEqual(f.channels, self.channels) def test_sample_rate(self): """The test file should have the correct number of sample rate""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: self.assertEqual(f.sample_rate, self.sample_rate) def test_format(self): """The test file should be a wave file""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: self.assertEqual(f.format, 'WAV') self.assertEqual(f.subtype, 'PCM_16') self.assertEqual(f.endian, 'FILE') @@ -72,121 +74,142 @@ def test_format(self): def test_context_manager(self): """The context manager should close the file""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: pass self.assertTrue(f.closed) def test_closing(self): """Closing a file should close it""" - f = SoundFile(self.filename) + f = sf.SoundFile(self.filename) self.assertFalse(f.closed) f.close() self.assertTrue(f.closed) def test_file_length(self): """The file should have the correct length""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: self.assertEqual(len(f), self.sample_rate) def test_file_contents(self): """The file should contain the correct data""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: self.assertTrue(np.all(self.data == f[:])) + def test_file_attributes(self): + """Changing a file attribute should save it on disk""" + with sf.SoundFile(self.filename, 'rw') as f: + f.title = 'testing' + with sf.SoundFile(self.filename) as f: + self.assertEqual(f.title, 'testing') + + def test_non_file_attributes(self): + """Changing a non-file attribute should not save to disk""" + with sf.SoundFile(self.filename, 'rw') as f: + f.foobar = 'testing' + with sf.SoundFile(self.filename) as f: + with self.assertRaises(AttributeError): + f.foobar + + +class TestSeekWaveFile(TestWaveFile): def test_seek(self): """Seeking should advance the read/write pointer""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: self.assertEqual(f.seek(100), 100) def test_seek_cur(self): """seeking multiple times should advance the read/write pointer""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: f.seek(100) - self.assertEqual(f.seek(100, whence=SEEK_CUR), 200) + self.assertEqual(f.seek(100, whence=sf.SEEK_CUR), 200) def test_seek_end(self): """seeking from end should advance the read/write pointer""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: self.assertEqual(f.seek(-100, whence=SEEK_END), self.sample_rate-100) def test_seek_read(self): """Read-seeking should advance the read pointer""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: self.assertEqual(f.seek(100, which='r'), 100) def test_seek_write(self): """write-seeking should advance the write pointer""" - with SoundFile(self.filename, 'rw') as f: + with sf.SoundFile(self.filename, 'rw') as f: self.assertEqual(f.seek(100, which='w'), 100) def test_flush(self): """After flushing, data should be written to disk""" - with SoundFile(self.filename, 'rw') as f: + with sf.SoundFile(self.filename, 'rw') as f: size = os.path.getsize(self.filename) f.write(np.zeros((10,2))) f.flush() self.assertEqual(os.path.getsize(self.filename), size+40) + +class TestSeekWaveFile(TestWaveFile): def test_read(self): """read should read data and advance the read pointer""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: data = f.read(100) self.assertTrue(np.all(data == self.data[:100])) - self.assertEqual(100, f.seek(0, SEEK_CUR)) + self.assertEqual(100, f.seek(0, sf.SEEK_CUR)) def test_read_write_only(self): """reading a write-only file should not work""" - with SoundFile(self.filename, 'w', self.sample_rate, self.channels) as f: + with sf.SoundFile(self.filename, 'w', self.sample_rate, self.channels) as f: with self.assertRaises(RuntimeError) as err: f.read(100) def test_default_read_format(self): """By default, np.float64 should be read""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: self.assertEqual(f[:].dtype, np.float64) def test_read_int16(self): """reading 16 bit integers should read np.int16""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: data = f.read(100, dtype='int16') self.assertEqual(data.dtype, np.int16) def test_read_int32(self): """reading 32 bit integers should read np.int32""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: data = f.read(100, dtype='int32') self.assertEqual(data.dtype, np.int32) def test_read_float32(self): """reading 32 bit floats should read np.float32""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: data = f.read(100, dtype='float32') self.assertEqual(data.dtype, np.float32) def test_read_indexing(self): """Reading using indexing should read but not advance read pointer""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: self.assertTrue(np.all(f[:100] == self.data[:100])) - self.assertEqual(0, f.seek(0, SEEK_CUR)) + self.assertEqual(0, f.seek(0, sf.SEEK_CUR)) + +class TestWriteWaveFile(TestWaveFile): def test_write(self): """write should write data and advance the write pointer""" - with SoundFile(self.filename, 'rw') as f: + with sf.SoundFile(self.filename, 'rw') as f: data = np.zeros((100,2)) - position = f.seek(0, SEEK_CUR) + position = f.seek(0, sf.SEEK_CUR) f.write(data) self.assertTrue(np.all(f[-100:] == data)) - self.assertEqual(100, f.seek(0, SEEK_CUR)-position) + self.assertEqual(100, f.seek(0, sf.SEEK_CUR)-position) def test_write_read_only(self): """writing to a read-only file should not work""" - with SoundFile(self.filename) as f: + with sf.SoundFile(self.filename) as f: with self.assertRaises(RuntimeError) as err: f.write(np.ones((100,2))) def test_write_float_precision(self): """Written float data should be written at most 2**-15 off""" - with SoundFile(self.filename, 'rw') as f: + with sf.SoundFile(self.filename, 'rw') as f: data = np.ones((100,2)) f.write(data) written_data = f[-100:] @@ -194,34 +217,19 @@ def test_write_float_precision(self): def test_write_int_precision(self): """Written int data should be written""" - with SoundFile(self.filename, 'rw') as f: + with sf.SoundFile(self.filename, 'rw') as f: data = np.zeros((100,2)) + 2**15-1 # full scale int16 data = np.array(data, dtype='int16') f.write(data) - f.seek(-100, SEEK_CUR) + f.seek(-100, sf.SEEK_CUR) written_data = f.read(dtype='int16') self.assertTrue(np.all(data == written_data)) def test_write_indexing(self): """Writing using indexing should write but not advance write pointer""" - with SoundFile(self.filename, 'rw') as f: - position = f.seek(0, SEEK_CUR) + with sf.SoundFile(self.filename, 'rw') as f: + position = f.seek(0, sf.SEEK_CUR) data = np.zeros((100,2)) f[:100] = data - self.assertEqual(position, f.seek(0, SEEK_CUR)) + self.assertEqual(position, f.seek(0, sf.SEEK_CUR)) self.assertTrue(np.all(data == f[:100])) - - def test_file_attributes(self): - """Changing a file attribute should save it on disk""" - with SoundFile(self.filename, 'rw') as f: - f.title = 'testing' - with SoundFile(self.filename) as f: - self.assertEqual(f.title, 'testing') - - def test_non_file_attributes(self): - """Changing a non-file attribute should not save to disk""" - with SoundFile(self.filename, 'rw') as f: - f.foobar = 'testing' - with SoundFile(self.filename) as f: - with self.assertRaises(AttributeError): - f.foobar From 49a16c8c474986e1b9bb2bc2ecb12a3c4f7db552 Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Mon, 28 Apr 2014 11:02:17 +0200 Subject: [PATCH 07/24] Simplified read and write - removed `channels_first`. - split read into `read` and `readinto`. - removed many helper functions that were not needed any longer. - added tests for `fill_value` and EOF behavior. --- pysoundfile.py | 158 +++++++++++++------------------------- tests/test_pysoundfile.py | 41 ++++++++++ 2 files changed, 94 insertions(+), 105 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index 6a8f874..eae3df0 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -586,21 +586,8 @@ def seek(self, frames, whence=SEEK_SET, which=None): raise ValueError("Invalid which: %s" % repr(which)) return _snd.sf_seek(self._file, frames, whence) - def _create_out_array(self, frames, dtype, channels_first, always_2d): - # Helper function for read() - if channels_first: - order = 'C' - if self.channels == 1 and not always_2d: - shape = frames, - else: - shape = frames, self.channels - else: - order = 'F' - shape = self.channels, frames - return _np.empty(shape, dtype, order) - - def read(self, frames=-1, dtype='float64', channels_first=True, - always_2d=True, out=None, fill_value=None): + def read(self, frames=-1, dtype='float64', always_2d=True, + fill_value=None): """Read a number of frames from the file. Reads the given number of frames in the given data format from @@ -608,81 +595,75 @@ def read(self, frames=-1, dtype='float64', channels_first=True, position by the same number of frames. Use frames=-1 to read until the end of the file. - By default, a two-dimensional NumPy array is returned, where the - channels are stored along the first dimension, i.e. as columns. - Use channels_first=False to store the channels along the second - dimension, i.e. as rows. A two-dimensional array is returned - even if the sound file has only one channel. Use - always_2d=False to return a one-dimensional array in this case. + By default, a two-dimensional array is returned even if the + sound file has only one channel. Use always_2d=False to return + a one-dimensional array in this case. + + If there is less data left in the file than requested, a + shorter array is returned. Use fill_value to always return the + given number of frames and fill all remaining frames with + fill_value. + + """ + if frames < 0: + frames = self.frames - self.seek(0, SEEK_CUR, 'r') + out = _np.empty((frames, self.channels), dtype) + if not always_2d and out.shape[1] == 1: + out = out.flatten() - If out is specified, the data is written into the given NumPy - array. In this case, the arguments frames, dtype and always_2d - are silently ignored! + try: + out = self.readinto(out, fill_value) + except Exception as e: + raise e + + return out + + def readinto(self, out, fill_value=None): + """Read a number of frames from the file into an array. + + Reads the given number of frames in the given data format from + the current read position. This also advances the read + position by the same number of frames. - If there is less data left in the file than requested, the rest - of the frames are filled with fill_value. If fill_value=None, a - smaller array is returned. - If out is given, only a part of it is overwritten and a view - containing all valid frames is returned. + The data is written into the given NumPy array. If there is + not enough data left in the file to fill the array, the rest + of the frames are ignored and a smaller view to the array is + returned. Use fill_value to fill the rest of the array and + return the full-length array. """ self._check_if_closed() if self.mode == 'w': raise RuntimeError("Cannot read from file opened in write mode") - dtype = _np.dtype(dtype) if out is None else out.dtype try: - ffi_type = _ffi_types[dtype] + ffi_type = _ffi_types[out.dtype] except KeyError: raise ValueError("dtype must be one of %s" % repr([dt.name for dt in _ffi_types])) - if out is not None: - frames = _check_frames_and_channels( - out, "out", channels_first, channels=self.channels) - if channels_first and not out.flags.c_contiguous: - raise ValueError( - "out must be C-contiguous for channels_first=True") - if not channels_first and not out.flags.f_contiguous: - raise ValueError( - "out must be Fortran-contiguous for channels_first=False") - - max_frames = self.frames - self.seek(0, SEEK_CUR, 'r') - if frames < 0: - frames = max_frames - valid_frames = frames - if frames > max_frames: - valid_frames = max_frames + if not out.flags.c_contiguous: + raise ValueError("out must be C-contiguous") - if out is None: - out = self._create_out_array(frames, dtype, - channels_first, always_2d) + read_frames = len(out) + if read_frames + self.seek(0, SEEK_CUR, 'r') > self.frames: + read_frames = self.frames - self.seek(0, SEEK_CUR, 'r') assert out.dtype.itemsize == _ffi.sizeof(ffi_type) reader = getattr(_snd, 'sf_readf_' + ffi_type) ptr = _ffi.cast(ffi_type + '*', out.ctypes.data) - read = reader(self._file, ptr, valid_frames) + read = reader(self._file, ptr, read_frames) self._handle_error() - assert read == valid_frames - - if frames > valid_frames: - def multichannel_slice(start, stop): - """Return a slice of frames, considering channels_first""" - if channels_first: - idx = slice(start, stop) - else: - idx = Ellipsis, slice(start, stop) - return idx - - if fill_value is None: - out = out[multichannel_slice(None, valid_frames)] - else: - out[multichannel_slice(valid_frames, None)] = fill_value + assert read == read_frames - return out + if fill_value is None: + return out[:read_frames] + else: + out[read_frames:] = fill_value + return out - def write(self, data, channels_first=True): + def write(self, data): """Write a number of frames to the file. Writes a number of frames to the current write position in the @@ -699,15 +680,7 @@ def write(self, data, channels_first=True): if self.mode == 'r': raise RuntimeError("Cannot write to file opened in read mode") - if channels_first: - # no copy is made if data has already the correct memory layout: - data = _np.ascontiguousarray(data) - else: - # this shouldn't make a copy either if already in Fortran order: - data = _np.asfortranarray(data) - if data.ndim != 2: - raise ValueError( - "data.ndim must be 2 for channels_first=False") + data = _np.ascontiguousarray(data) try: ffi_type = _ffi_types[data.dtype] @@ -715,45 +688,20 @@ def write(self, data, channels_first=True): raise ValueError("data.dtype must be one of %s" % repr([dt.name for dt in _ffi_types])) - frames = _check_frames_and_channels( - data, "data", channels_first, channels=self.channels) - - assert data.flags['C_CONTIGUOUS' if channels_first else 'F_CONTIGUOUS'] + assert data.flags['C_CONTIGUOUS'] assert data.dtype.itemsize == _ffi.sizeof(ffi_type) writer = getattr(_snd, 'sf_writef_' + ffi_type) ptr = _ffi.cast(ffi_type + '*', data.ctypes.data) - written = writer(self._file, ptr, frames) + written = writer(self._file, ptr, len(data)) self._handle_error() - assert written == frames + assert written == len(data) curr = self.seek(0, SEEK_CUR, 'w') self._info.frames = self.seek(0, SEEK_END, 'w') self.seek(curr, SEEK_SET, 'w') -def _check_frames_and_channels(array, name, channels_first, channels=None): - # Return frames and channels for a given array. If channels is given (and - # if the number of channels matches), return only frames. - if array.ndim not in (1, 2): - raise ValueError("%s must be one- or two-dimensional" % repr(name)) - frames = array.shape[not channels_first] - if frames == 0: - raise ValueError("%s is empty" % repr(name)) - - expected_channels = channels - channels = array.size / frames - - if expected_channels is None: - return frames, channels - elif channels == expected_channels: - return frames - else: - raise ValueError( - "Wrong number of channels in %s: %d (instead of %d)" % - (repr(name), channels, expected_channels)) - - def open(*args, **kwargs): """Return a new SoundFile object. diff --git a/tests/test_pysoundfile.py b/tests/test_pysoundfile.py index 836ce34..d92fc35 100644 --- a/tests/test_pysoundfile.py +++ b/tests/test_pysoundfile.py @@ -190,6 +190,47 @@ def test_read_indexing(self): self.assertTrue(np.all(f[:100] == self.data[:100])) self.assertEqual(0, f.seek(0, sf.SEEK_CUR)) + def test_read_number_of_frames(self): + """Reading N frames should return N frames""" + with sf.SoundFile(self.filename) as f: + data = f.read(100) + self.assertEqual(len(data), 100) + + def test_read_all_frames(self): + """Reading should return all remaining frames""" + with sf.SoundFile(self.filename) as f: + f.seek(-100, sf.SEEK_END) + data = f.read() + self.assertEqual(len(data), 100) + + def test_read_number_of_frames_over_end(self): + """Reading N frames at EOF should return only remaining frames""" + with sf.SoundFile(self.filename) as f: + f.seek(-50, sf.SEEK_END) + data = f.read(100) + self.assertEqual(len(data), 50) + + def test_read_number_of_frames_over_end_with_fill(self): + """Reading N frames with fill at EOF should return N frames""" + with sf.SoundFile(self.filename) as f: + f.seek(-50, sf.SEEK_END) + data = f.read(100, fill_value=0) + self.assertEqual(len(data), 100) + self.assertTrue(np.all(data[50:] == 0)) + + def test_read_mono_as_array(self): + """Reading N frames with fill at EOF should return N frames""" + # create a dummy mono wave file + self.sample_rate = 44100 + self.channels = 1 + self.filename = 'test.wav' + self.data = np.ones((self.sample_rate, self.channels))*0.5 + with sf.SoundFile(self.filename, 'w', self.sample_rate, self.channels) as f: + f.write(self.data) + + with sf.SoundFile(self.filename) as f: + data = f.read(100, always_2d=False) + self.assertEqual(data.shape, (100,)) class TestWriteWaveFile(TestWaveFile): def test_write(self): From db2cb0b0e17963484b211c8f076904b3d4c25651 Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Wed, 30 Apr 2014 09:30:07 +0200 Subject: [PATCH 08/24] refactored `read` to accept `out` readinto can thus be turned into a private method --- pysoundfile.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index eae3df0..b69ce9a 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -587,7 +587,7 @@ def seek(self, frames, whence=SEEK_SET, which=None): return _snd.sf_seek(self._file, frames, whence) def read(self, frames=-1, dtype='float64', always_2d=True, - fill_value=None): + fill_value=None, out=None): """Read a number of frames from the file. Reads the given number of frames in the given data format from @@ -604,21 +604,29 @@ def read(self, frames=-1, dtype='float64', always_2d=True, given number of frames and fill all remaining frames with fill_value. + If out is given as a numpy array, the data is written into + that array. If there is not enough data left in the file to + fill the array, the rest of the frames are ignored and a + smaller view to the array is returned. Use fill_value to fill + the rest of the array and always return the full-length array. + """ - if frames < 0: - frames = self.frames - self.seek(0, SEEK_CUR, 'r') - out = _np.empty((frames, self.channels), dtype) - if not always_2d and out.shape[1] == 1: - out = out.flatten() + + if out is None: + if frames < 0: + frames = self.frames - self.seek(0, SEEK_CUR, 'r') + out = _np.empty((frames, self.channels), dtype) + if not always_2d and out.shape[1] == 1: + out = out.flatten() try: - out = self.readinto(out, fill_value) + out = self._readinto(out, fill_value) except Exception as e: raise e return out - def readinto(self, out, fill_value=None): + def _readinto(self, out, fill_value=None): """Read a number of frames from the file into an array. Reads the given number of frames in the given data format from From a700fcf8f46cc34fb0786af54c1a6e83ea72236e Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Wed, 30 Apr 2014 09:40:05 +0200 Subject: [PATCH 09/24] re-integrated read and readinto --- pysoundfile.py | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index b69ce9a..7c22412 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -612,6 +612,10 @@ def read(self, frames=-1, dtype='float64', always_2d=True, """ + self._check_if_closed() + if self.mode == 'w': + raise RuntimeError("Cannot read from file opened in write mode") + if out is None: if frames < 0: frames = self.frames - self.seek(0, SEEK_CUR, 'r') @@ -619,31 +623,6 @@ def read(self, frames=-1, dtype='float64', always_2d=True, if not always_2d and out.shape[1] == 1: out = out.flatten() - try: - out = self._readinto(out, fill_value) - except Exception as e: - raise e - - return out - - def _readinto(self, out, fill_value=None): - """Read a number of frames from the file into an array. - - Reads the given number of frames in the given data format from - the current read position. This also advances the read - position by the same number of frames. - - The data is written into the given NumPy array. If there is - not enough data left in the file to fill the array, the rest - of the frames are ignored and a smaller view to the array is - returned. Use fill_value to fill the rest of the array and - return the full-length array. - - """ - self._check_if_closed() - if self.mode == 'w': - raise RuntimeError("Cannot read from file opened in write mode") - try: ffi_type = _ffi_types[out.dtype] except KeyError: From 161f97e2b2df72c8a4a7e0f7eb598677b34934a6 Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Wed, 30 Apr 2014 09:45:48 +0200 Subject: [PATCH 10/24] Fixed documentation of test case --- tests/test_pysoundfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pysoundfile.py b/tests/test_pysoundfile.py index d92fc35..c5a71b6 100644 --- a/tests/test_pysoundfile.py +++ b/tests/test_pysoundfile.py @@ -219,7 +219,7 @@ def test_read_number_of_frames_over_end_with_fill(self): self.assertTrue(np.all(data[50:] == 0)) def test_read_mono_as_array(self): - """Reading N frames with fill at EOF should return N frames""" + """Reading with always_2d=False should return array""" # create a dummy mono wave file self.sample_rate = 44100 self.channels = 1 From 94c2a007093c68a348b23d337d8b1d5ce1b8a205 Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Wed, 30 Apr 2014 09:56:58 +0200 Subject: [PATCH 11/24] Added tests for out argument of read --- tests/test_pysoundfile.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/test_pysoundfile.py b/tests/test_pysoundfile.py index c5a71b6..a0e8e07 100644 --- a/tests/test_pysoundfile.py +++ b/tests/test_pysoundfile.py @@ -218,6 +218,32 @@ def test_read_number_of_frames_over_end_with_fill(self): self.assertEqual(len(data), 100) self.assertTrue(np.all(data[50:] == 0)) + def test_read_into_out(self): + """Reading into out should return data and write into out""" + with sf.SoundFile(self.filename) as f: + data = np.empty((100, f.channels), dtype='float64') + out_data = f.read(out=data) + self.assertTrue(np.all(data == out_data)) + + def test_read_into_out_over_end(self): + """Reading into out over end should return shorter data and write into out""" + with sf.SoundFile(self.filename) as f: + data = np.empty((100, f.channels), dtype='float64') + f.seek(-50, sf.SEEK_END) + out_data = f.read(out=data) + self.assertTrue(np.all(data[:50] == out_data[:50])) + self.assertEqual(out_data.shape, (50,2)) + self.assertEqual(data.shape, (100,2)) + + def test_read_into_out_over_end_with_fill(self): + """Reading into out over end with fill should return padded data and write into out""" + with sf.SoundFile(self.filename) as f: + data = np.empty((100, f.channels), dtype='float64') + f.seek(-50, sf.SEEK_END) + out_data = f.read(out=data, fill_value=0) + self.assertTrue(np.all(data == out_data)) + self.assertTrue(np.all(data[50:] == 0)) + def test_read_mono_as_array(self): """Reading with always_2d=False should return array""" # create a dummy mono wave file From 564e9bc9924b0282e144f61e77b4234b1d59b87b Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Fri, 9 May 2014 15:26:33 +0200 Subject: [PATCH 12/24] removed duplicated self.seek(0, SEEK_CUR, 'r') --- pysoundfile.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index 7c22412..478c2d1 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -616,9 +616,11 @@ def read(self, frames=-1, dtype='float64', always_2d=True, if self.mode == 'w': raise RuntimeError("Cannot read from file opened in write mode") + current_frame = self.seek(0, SEEK_CUR, 'r') + if out is None: if frames < 0: - frames = self.frames - self.seek(0, SEEK_CUR, 'r') + frames = self.frames - current_frame out = _np.empty((frames, self.channels), dtype) if not always_2d and out.shape[1] == 1: out = out.flatten() @@ -633,8 +635,8 @@ def read(self, frames=-1, dtype='float64', always_2d=True, raise ValueError("out must be C-contiguous") read_frames = len(out) - if read_frames + self.seek(0, SEEK_CUR, 'r') > self.frames: - read_frames = self.frames - self.seek(0, SEEK_CUR, 'r') + if read_frames + current_frame > self.frames: + read_frames = self.frames - current_frame assert out.dtype.itemsize == _ffi.sizeof(ffi_type) From 1a154da658e5f16faffd396be1c97ce39de76c1b Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Fri, 9 May 2014 15:28:00 +0200 Subject: [PATCH 13/24] no more reshaping out after creation --- pysoundfile.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index 478c2d1..4918b7f 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -621,9 +621,10 @@ def read(self, frames=-1, dtype='float64', always_2d=True, if out is None: if frames < 0: frames = self.frames - current_frame - out = _np.empty((frames, self.channels), dtype) - if not always_2d and out.shape[1] == 1: - out = out.flatten() + if always_2d or self.channels > 1: + out = _np.empty((frames, self.channels), dtype) + else: + out = _np.empty(frames, dtype) try: ffi_type = _ffi_types[out.dtype] From 4d28a103ef90ec5370a716da2d4c0ce812477d26 Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Fri, 9 May 2014 15:36:18 +0200 Subject: [PATCH 14/24] no more allocating of unused memory --- pysoundfile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pysoundfile.py b/pysoundfile.py index 4918b7f..7e944af 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -621,6 +621,8 @@ def read(self, frames=-1, dtype='float64', always_2d=True, if out is None: if frames < 0: frames = self.frames - current_frame + if current_frame + frames > self.frames and fill_value is None: + frames = self.frames - current_frame if always_2d or self.channels > 1: out = _np.empty((frames, self.channels), dtype) else: From 28527a7ea4487f1e83ba79d267a6c9c6a7606590 Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Fri, 9 May 2014 15:37:10 +0200 Subject: [PATCH 15/24] no more returning views of all the data --- pysoundfile.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pysoundfile.py b/pysoundfile.py index 7e944af..3fb798c 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -649,7 +649,9 @@ def read(self, frames=-1, dtype='float64', always_2d=True, self._handle_error() assert read == read_frames - if fill_value is None: + if read_frames == len(out): + return out + elif fill_value is None: return out[:read_frames] else: out[read_frames:] = fill_value From 5c68a049d54eef30985fb650f7c413fef5572380 Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Fri, 9 May 2014 15:44:54 +0200 Subject: [PATCH 16/24] removed unnecessary nesting --- pysoundfile.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index 3fb798c..d92c87e 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -618,11 +618,12 @@ def read(self, frames=-1, dtype='float64', always_2d=True, current_frame = self.seek(0, SEEK_CUR, 'r') + if frames < 0: + frames = self.frames - current_frame + if current_frame + frames > self.frames and fill_value is None: + frames = self.frames - current_frame + if out is None: - if frames < 0: - frames = self.frames - current_frame - if current_frame + frames > self.frames and fill_value is None: - frames = self.frames - current_frame if always_2d or self.channels > 1: out = _np.empty((frames, self.channels), dtype) else: From 877fc3a3c08a28cba97bf5e7f076e288b2275768 Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Fri, 9 May 2014 15:57:09 +0200 Subject: [PATCH 17/24] fixed write function --- pysoundfile.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pysoundfile.py b/pysoundfile.py index d92c87e..aa2fde7 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -755,7 +755,10 @@ def write(data, file, sample_rate, *args, **kwargs): """ data = _np.asarray(data) channels_first = kwargs.pop('channels_first', True) - _, channels = _check_frames_and_channels(data, "data", channels_first) + if len(data.shape) == 1: + channels = 1 + else: + channels = data.shape[1] with SoundFile(file, 'w', sample_rate, channels, *args, **kwargs) as f: f.write(data, channels_first=channels_first) From b56606e32f25c4f92f60910b276fb7ec20546aac Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Fri, 9 May 2014 16:10:56 +0200 Subject: [PATCH 18/24] fixed write function for real and test it --- pysoundfile.py | 3 +-- tests/test_pysoundfile.py | 7 +++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index aa2fde7..12330a5 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -754,13 +754,12 @@ def write(data, file, sample_rate, *args, **kwargs): """ data = _np.asarray(data) - channels_first = kwargs.pop('channels_first', True) if len(data.shape) == 1: channels = 1 else: channels = data.shape[1] with SoundFile(file, 'w', sample_rate, channels, *args, **kwargs) as f: - f.write(data, channels_first=channels_first) + f.write(data) def default_subtype(format): diff --git a/tests/test_pysoundfile.py b/tests/test_pysoundfile.py index a0e8e07..1427187 100644 --- a/tests/test_pysoundfile.py +++ b/tests/test_pysoundfile.py @@ -300,3 +300,10 @@ def test_write_indexing(self): f[:100] = data self.assertEqual(position, f.seek(0, sf.SEEK_CUR)) self.assertTrue(np.all(data == f[:100])) + +class TestWriteFunctions(TestWaveFile): + def test_write(self): + """write should write data""" + data = np.ones((100,2)) + sf.write(data, self.filename, self.sample_rate) + self.assertTrue(np.allclose(sf.read(self.filename)[0], data, atol=2**-15)) From 1d3a21703fa8561cdc77e52d0d87ff209d33f7e5 Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Sun, 11 May 2014 20:17:45 +0200 Subject: [PATCH 19/24] some refactorings to improve clarity --- pysoundfile.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index 12330a5..419fcc2 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -616,12 +616,12 @@ def read(self, frames=-1, dtype='float64', always_2d=True, if self.mode == 'w': raise RuntimeError("Cannot read from file opened in write mode") - current_frame = self.seek(0, SEEK_CUR, 'r') + remaining_frames = self.frames - self.seek(0, SEEK_CUR, 'r') if frames < 0: - frames = self.frames - current_frame - if current_frame + frames > self.frames and fill_value is None: - frames = self.frames - current_frame + frames = remaining_frames + if frames > remaining_frames and fill_value is None: + frames = remaining_frames if out is None: if always_2d or self.channels > 1: @@ -638,9 +638,7 @@ def read(self, frames=-1, dtype='float64', always_2d=True, if not out.flags.c_contiguous: raise ValueError("out must be C-contiguous") - read_frames = len(out) - if read_frames + current_frame > self.frames: - read_frames = self.frames - current_frame + read_frames = min(len(out), remaining_frames) assert out.dtype.itemsize == _ffi.sizeof(ffi_type) @@ -683,7 +681,7 @@ def write(self, data): raise ValueError("data.dtype must be one of %s" % repr([dt.name for dt in _ffi_types])) - assert data.flags['C_CONTIGUOUS'] + assert data.flags.c_contiguous assert data.dtype.itemsize == _ffi.sizeof(ffi_type) writer = getattr(_snd, 'sf_writef_' + ffi_type) @@ -754,7 +752,7 @@ def write(data, file, sample_rate, *args, **kwargs): """ data = _np.asarray(data) - if len(data.shape) == 1: + if data.ndim == 1: channels = 1 else: channels = data.shape[1] From 6f43d1ca27b852170a3e99185af253fbfe1d91a0 Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Sun, 11 May 2014 20:27:22 +0200 Subject: [PATCH 20/24] fixed broken docstring --- pysoundfile.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index 419fcc2..3196795 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -665,8 +665,6 @@ def write(self, data): The data must be provided as a (frames x channels) NumPy array or as one-dimensional array for mono signals. - Use channels_first=False if you want to provide a (channels x - frames) array. """ self._check_if_closed() From d331c890dd37ee9e2122fc2844eb747f7158bf2f Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Sun, 11 May 2014 20:40:34 +0200 Subject: [PATCH 21/24] added test for read with empty out --- tests/test_pysoundfile.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_pysoundfile.py b/tests/test_pysoundfile.py index 1427187..4b79328 100644 --- a/tests/test_pysoundfile.py +++ b/tests/test_pysoundfile.py @@ -225,6 +225,13 @@ def test_read_into_out(self): out_data = f.read(out=data) self.assertTrue(np.all(data == out_data)) + def test_read_into_zero_len_out(self): + """Reading into aa zero len out should not read anything""" + with sf.SoundFile(self.filename) as f: + data = np.empty((0, f.channels), dtype='float64') + out_data = f.read(out=data) + self.assertTrue(np.all(data == out_data)) + def test_read_into_out_over_end(self): """Reading into out over end should return shorter data and write into out""" with sf.SoundFile(self.filename) as f: From 06599da0baf85cb6f1113adffd4ff55eb4927158 Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Mon, 12 May 2014 10:53:00 +0200 Subject: [PATCH 22/24] renamed read_frames to frames_to_read --- pysoundfile.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index 3196795..39f8f43 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -638,22 +638,22 @@ def read(self, frames=-1, dtype='float64', always_2d=True, if not out.flags.c_contiguous: raise ValueError("out must be C-contiguous") - read_frames = min(len(out), remaining_frames) + frames_to_read = min(len(out), remaining_frames) assert out.dtype.itemsize == _ffi.sizeof(ffi_type) reader = getattr(_snd, 'sf_readf_' + ffi_type) ptr = _ffi.cast(ffi_type + '*', out.ctypes.data) - read = reader(self._file, ptr, read_frames) + read_frames = reader(self._file, ptr, frames_to_read) self._handle_error() - assert read == read_frames + assert read_frames == frames_to_read - if read_frames == len(out): + if frames_to_read == len(out): return out elif fill_value is None: - return out[:read_frames] + return out[:frames_to_read] else: - out[read_frames:] = fill_value + out[frames_to_read:] = fill_value return out def write(self, data): From 74716377926acc683c2be712d97942a3793b47b2 Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Mon, 12 May 2014 11:20:18 +0200 Subject: [PATCH 23/24] Added checks for out dimensionality Also, according tests --- pysoundfile.py | 7 +++++++ tests/test_pysoundfile.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/pysoundfile.py b/pysoundfile.py index 39f8f43..4f4b5ae 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -628,6 +628,13 @@ def read(self, frames=-1, dtype='float64', always_2d=True, out = _np.empty((frames, self.channels), dtype) else: out = _np.empty(frames, dtype) + elif out.ndim not in (1, 2): + raise ValueError("out must be one- or two-dimensional") + elif out.ndim == 1 and self.channels != 1: + raise ValueError("out must have 2 dimensions for non-mono signals") + elif out.ndim == 2 and out.shape[1] != self.channels: + raise ValueError("two-dimensional out must have %i columns" % + self.channels) try: ffi_type = _ffi_types[out.dtype] diff --git a/tests/test_pysoundfile.py b/tests/test_pysoundfile.py index 4b79328..fe9e0ca 100644 --- a/tests/test_pysoundfile.py +++ b/tests/test_pysoundfile.py @@ -225,6 +225,35 @@ def test_read_into_out(self): out_data = f.read(out=data) self.assertTrue(np.all(data == out_data)) + def test_read_mono_into_out(self): + """Reading mono signal into out should return data and write into out""" + # create a dummy mono wave file + self.sample_rate = 44100 + self.channels = 1 + self.filename = 'test.wav' + self.data = np.ones((self.sample_rate, self.channels))*0.5 + with sf.SoundFile(self.filename, 'w', self.sample_rate, self.channels) as f: + f.write(self.data) + + with sf.SoundFile(self.filename) as f: + data = np.empty((100, f.channels), dtype='float64') + out_data = f.read(out=data) + self.assertTrue(np.all(data == out_data)) + + def test_read_into_out_with_too_many_channels(self): + """Reading into malformed out should throw an error""" + with sf.SoundFile(self.filename) as f: + data = np.empty((100, f.channels+1), dtype='float64') + with self.assertRaises(ValueError) as err: + out_data = f.read(out=data) + + def test_read_into_out_with_too_many_dimensions(self): + """Reading into malformed out should throw an error""" + with sf.SoundFile(self.filename) as f: + data = np.empty((100, f.channels, 1), dtype='float64') + with self.assertRaises(ValueError) as err: + out_data = f.read(out=data) + def test_read_into_zero_len_out(self): """Reading into aa zero len out should not read anything""" with sf.SoundFile(self.filename) as f: From 042d4254aa6e0191e4cef703e8f977c5bfefd54c Mon Sep 17 00:00:00 2001 From: Bastian Bechtold Date: Mon, 12 May 2014 11:35:51 +0200 Subject: [PATCH 24/24] refactored error checking into its own method --- pysoundfile.py | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index 4f4b5ae..1414a4d 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -628,27 +628,12 @@ def read(self, frames=-1, dtype='float64', always_2d=True, out = _np.empty((frames, self.channels), dtype) else: out = _np.empty(frames, dtype) - elif out.ndim not in (1, 2): - raise ValueError("out must be one- or two-dimensional") - elif out.ndim == 1 and self.channels != 1: - raise ValueError("out must have 2 dimensions for non-mono signals") - elif out.ndim == 2 and out.shape[1] != self.channels: - raise ValueError("two-dimensional out must have %i columns" % - self.channels) - - try: - ffi_type = _ffi_types[out.dtype] - except KeyError: - raise ValueError("dtype must be one of %s" % - repr([dt.name for dt in _ffi_types])) - if not out.flags.c_contiguous: - raise ValueError("out must be C-contiguous") + self._check_frames_and_channels(out) frames_to_read = min(len(out), remaining_frames) - assert out.dtype.itemsize == _ffi.sizeof(ffi_type) - + ffi_type = _ffi_types[out.dtype] reader = getattr(_snd, 'sf_readf_' + ffi_type) ptr = _ffi.cast(ffi_type + '*', out.ctypes.data) read_frames = reader(self._file, ptr, frames_to_read) @@ -680,15 +665,9 @@ def write(self, data): data = _np.ascontiguousarray(data) - try: - ffi_type = _ffi_types[data.dtype] - except KeyError: - raise ValueError("data.dtype must be one of %s" % - repr([dt.name for dt in _ffi_types])) - - assert data.flags.c_contiguous - assert data.dtype.itemsize == _ffi.sizeof(ffi_type) + self._check_frames_and_channels(data) + ffi_type = _ffi_types[data.dtype] writer = getattr(_snd, 'sf_writef_' + ffi_type) ptr = _ffi.cast(ffi_type + '*', data.ctypes.data) written = writer(self._file, ptr, len(data)) @@ -699,6 +678,25 @@ def write(self, data): self._info.frames = self.seek(0, SEEK_END, 'w') self.seek(curr, SEEK_SET, 'w') + def _check_frames_and_channels(self, data): + """Error if data is not compatible with the shape of the sound file. + + """ + if data.dtype not in _ffi_types: + raise ValueError("data.dtype must be one of %s" % + repr([dt.name for dt in _ffi_types])) + + if not data.flags.c_contiguous: + raise ValueError("data must be c_contiguous") + + if data.ndim not in (1, 2): + raise ValueError("data must be one- or two-dimensional") + elif data.ndim == 1 and self.channels != 1: + raise ValueError("data must have 2 dimensions for non-mono signals") + elif data.ndim == 2 and data.shape[1] != self.channels: + raise ValueError("two-dimensional data must have %i columns" % + self.channels) + def open(*args, **kwargs): """Return a new SoundFile object.