From 9a7b4d96c7e73b24dfd19114a09e76e724f68648 Mon Sep 17 00:00:00 2001 From: Matthias Geier Date: Sun, 23 Mar 2014 22:32:24 +0100 Subject: [PATCH 1/5] read()/write() overhaul Replace ffi.new() with np.empty() and np.ascontiguousarray() Remove dicts for readers/writers read(): * reserve only as much memory as needed (if 'frames' is too large) * check return value of sf_readf_*() write(): * avoid copy if data has already the correct memory layout * check return value of sf_writef_*() * don't return the number of written frames! This is for symmetry with read() and it's redundant information anyway --- pysoundfile.py | 91 +++++++++++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 41 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index 088e960..1dbc9e1 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -246,6 +246,13 @@ 'RF64': 'PCM_16', } +_ffi_types = { + _np.dtype('float64'): 'double', + _np.dtype('float32'): 'float', + _np.dtype('int32'): 'int', + _np.dtype('int16'): 'short' +} + _snd = _ffi.dlopen('sndfile') @@ -596,30 +603,32 @@ def read(self, frames=-1, dtype='float64'): self._check_if_closed() if self.mode == 'w': raise RuntimeError("Cannot read from file opened in write mode") - formats = { - _np.float64: 'double[]', - _np.float32: 'float[]', - _np.int32: 'int[]', - _np.int16: 'short[]' - } - readers = { - _np.float64: _snd.sf_readf_double, - _np.float32: _snd.sf_readf_float, - _np.int32: _snd.sf_readf_int, - _np.int16: _snd.sf_readf_short - } + dtype = _np.dtype(dtype) - if dtype.type not in formats: - raise ValueError("Can only read int16, int32, float32 and float64") - if frames < 0: - curr = self.seek(0, SEEK_CUR, 'r') + try: + ffi_type = _ffi_types[dtype] + except KeyError: + raise ValueError("dtype must be one of %s" % + repr([dt.name for dt in _ffi_types])) + + curr = self.seek(0, SEEK_CUR, 'r') + if frames < 0 or curr + frames > self.frames: frames = self.frames - curr - data = _ffi.new(formats[dtype.type], frames*self.channels) - read = readers[dtype.type](self._file, data, frames) + + data = _np.empty((frames, self.channels), dtype=dtype, order='C') + + assert data.flags['C_CONTIGUOUS'] + assert data.dtype.itemsize == _ffi.sizeof(ffi_type) + + reader = getattr(_snd, 'sf_readf_' + ffi_type) + ptr = _ffi.cast(ffi_type + '*', data.ctypes.data) + read = reader(self._file, ptr, frames) self._handle_error() - np_data = _np.frombuffer(_ffi.buffer(data), dtype=dtype, - count=read*self.channels) - return _np.reshape(np_data, (read, self.channels)) + + if frames != read: + raise RuntimeError("Only %d of %d frames were read" % + (read, frames)) + return data def write(self, data): """Write a number of frames to the file. @@ -635,32 +644,32 @@ def write(self, data): self._check_if_closed() if self.mode == 'r': raise RuntimeError("Cannot write to file opened in read mode") - formats = { - _np.float64: 'double*', - _np.float32: 'float*', - _np.int32: 'int*', - _np.int16: 'short*' - } - writers = { - _np.float64: _snd.sf_writef_double, - _np.float32: _snd.sf_writef_float, - _np.int32: _snd.sf_writef_int, - _np.int16: _snd.sf_writef_short - } - if data.dtype.type not in writers: - raise ValueError("Data must be int16, int32, float32 or float64") - raw_data = _ffi.new('char[]', data.flatten().tostring()) - written = writers[data.dtype.type](self._file, - _ffi.cast( - formats[data.dtype.type], raw_data), - len(data)) + + # no copy is made if data has already the correct memory layout: + data = _np.ascontiguousarray(data) + + try: + ffi_type = _ffi_types[data.dtype] + except KeyError: + raise ValueError("data.dtype must be one of %s" % + repr([dt.name for dt in _ffi_types])) + + assert data.flags['C_CONTIGUOUS'] + assert data.dtype.itemsize == _ffi.sizeof(ffi_type) + + frames = len(data) + writer = getattr(_snd, 'sf_writef_' + ffi_type) + ptr = _ffi.cast(ffi_type + '*', data.ctypes.data) + written = writer(self._file, ptr, frames) self._handle_error() curr = self.seek(0, SEEK_CUR, 'w') self._info.frames = self.seek(0, SEEK_END, 'w') self.seek(curr, SEEK_SET, 'w') - return written + if frames != written: + raise RuntimeError("Only %d of %d frames were written" % + (written, frames)) def default_subtype(format): From 53d1ecb18b51720de31c1e4ba7e39a5bbe2a6d80 Mon Sep 17 00:00:00 2001 From: Matthias Geier Date: Mon, 21 Apr 2014 14:08:36 +0200 Subject: [PATCH 2/5] Add arguments always_2d, out and fill_value See also #16 If less frames are left in the file than would fit into out and fill_value=None, a smaller view into out is returned that contains only the valid frames. This includes several suggestions by @bastibe from #37. --- pysoundfile.py | 78 +++++++++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index 1dbc9e1..69cf861 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -586,7 +586,8 @@ def seek(self, frames, whence=SEEK_SET, which=None): raise ValueError("Invalid which: %s" % repr(which)) return _snd.sf_seek(self._file, frames, whence) - def read(self, frames=-1, dtype='float64'): + def read(self, frames=-1, dtype='float64', always_2d=True, + fill_value=None, out=None): """Read a number of frames from the file. Reads the given number of frames in the given data format from @@ -594,51 +595,72 @@ def read(self, frames=-1, dtype='float64'): position by the same number of frames. Use frames=-1 to read until the end of the file. - Returns the read data as a (frames x channels) NumPy array. + A two-dimensional NumPy array is returned, where the channels + are stored along the first dimension, i.e. as columns. + A two-dimensional array is returned even if the sound file has + only one channel. Use always_2d=False to return a + one-dimensional array in this case. - If there is not enough data left in the file to read, a - smaller NumPy array will be returned. + If out is specified, the data is written into the given NumPy + array. In this case, the arguments frames, dtype and always_2d + are silently ignored! + + If there is less data left in the file than requested, the rest + of the frames are filled with fill_value. If fill_value=None, a + smaller array is returned. + If out is given, only a part of it is overwritten and a view + containing all valid frames is returned. """ self._check_if_closed() if self.mode == 'w': raise RuntimeError("Cannot read from file opened in write mode") - dtype = _np.dtype(dtype) + if out is None: + remaining_frames = self.frames - self.seek(0, SEEK_CUR, 'r') + if frames < 0 or (frames > remaining_frames and + fill_value is None): + frames = remaining_frames + if always_2d or self.channels > 1: + shape = frames, self.channels + else: + shape = frames, + out = _np.empty(shape, dtype, order='C') + else: + frames = len(out) + if not out.flags.c_contiguous: + raise ValueError("out must be C-contiguous") + try: - ffi_type = _ffi_types[dtype] + ffi_type = _ffi_types[out.dtype] except KeyError: raise ValueError("dtype must be one of %s" % repr([dt.name for dt in _ffi_types])) - curr = self.seek(0, SEEK_CUR, 'r') - if frames < 0 or curr + frames > self.frames: - frames = self.frames - curr - - data = _np.empty((frames, self.channels), dtype=dtype, order='C') - - assert data.flags['C_CONTIGUOUS'] - assert data.dtype.itemsize == _ffi.sizeof(ffi_type) + assert out.dtype.itemsize == _ffi.sizeof(ffi_type) reader = getattr(_snd, 'sf_readf_' + ffi_type) - ptr = _ffi.cast(ffi_type + '*', data.ctypes.data) - read = reader(self._file, ptr, frames) + ptr = _ffi.cast(ffi_type + '*', out.ctypes.data) + frames = reader(self._file, ptr, frames) self._handle_error() - if frames != read: - raise RuntimeError("Only %d of %d frames were read" % - (read, frames)) - return data + if len(out) > frames: + if fill_value is None: + out = out[:frames] + else: + out[frames:] = fill_value + + return out def write(self, data): """Write a number of frames to the file. - Writes a number of frames to the current read position in the - file. This also advances the read position by the same number + Writes a number of frames to the current write position in the + file. This also advances the write position by the same number of frames and enlarges the file if necessary. The data must be provided as a (frames x channels) NumPy - array. + array or as one-dimensional array for mono signals. """ self._check_if_closed() @@ -654,23 +676,19 @@ def write(self, data): raise ValueError("data.dtype must be one of %s" % repr([dt.name for dt in _ffi_types])) - assert data.flags['C_CONTIGUOUS'] + assert data.flags.c_contiguous assert data.dtype.itemsize == _ffi.sizeof(ffi_type) - frames = len(data) writer = getattr(_snd, 'sf_writef_' + ffi_type) ptr = _ffi.cast(ffi_type + '*', data.ctypes.data) - written = writer(self._file, ptr, frames) + written = writer(self._file, ptr, len(data)) self._handle_error() + assert written == len(data) curr = self.seek(0, SEEK_CUR, 'w') self._info.frames = self.seek(0, SEEK_END, 'w') self.seek(curr, SEEK_SET, 'w') - if frames != written: - raise RuntimeError("Only %d of %d frames were written" % - (written, frames)) - def default_subtype(format): """Return default subtype for given format.""" From d73ef60238e5fd1924d2e9485527515ee900e287 Mon Sep 17 00:00:00 2001 From: Matthias Geier Date: Mon, 12 May 2014 22:36:07 +0200 Subject: [PATCH 3/5] Factor out _read_or_write() --- pysoundfile.py | 43 ++++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index 69cf861..278141a 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -586,6 +586,22 @@ def seek(self, frames, whence=SEEK_SET, which=None): raise ValueError("Invalid which: %s" % repr(which)) return _snd.sf_seek(self._file, frames, whence) + def _read_or_write(self, funcname, array, frames): + try: + ffi_type = _ffi_types[array.dtype] + except KeyError: + raise ValueError("dtype must be one of %s" % + repr([dt.name for dt in _ffi_types])) + + assert array.flags.c_contiguous + assert array.dtype.itemsize == _ffi.sizeof(ffi_type) + + func = getattr(_snd, funcname + ffi_type) + ptr = _ffi.cast(ffi_type + '*', array.ctypes.data) + frames = func(self._file, ptr, frames) + self._handle_error() + return frames + def read(self, frames=-1, dtype='float64', always_2d=True, fill_value=None, out=None): """Read a number of frames from the file. @@ -631,18 +647,7 @@ def read(self, frames=-1, dtype='float64', always_2d=True, if not out.flags.c_contiguous: raise ValueError("out must be C-contiguous") - try: - ffi_type = _ffi_types[out.dtype] - except KeyError: - raise ValueError("dtype must be one of %s" % - repr([dt.name for dt in _ffi_types])) - - assert out.dtype.itemsize == _ffi.sizeof(ffi_type) - - reader = getattr(_snd, 'sf_readf_' + ffi_type) - ptr = _ffi.cast(ffi_type + '*', out.ctypes.data) - frames = reader(self._file, ptr, frames) - self._handle_error() + frames = self._read_or_write('sf_readf_', out, frames) if len(out) > frames: if fill_value is None: @@ -670,19 +675,7 @@ def write(self, data): # no copy is made if data has already the correct memory layout: data = _np.ascontiguousarray(data) - try: - ffi_type = _ffi_types[data.dtype] - except KeyError: - raise ValueError("data.dtype must be one of %s" % - repr([dt.name for dt in _ffi_types])) - - assert data.flags.c_contiguous - assert data.dtype.itemsize == _ffi.sizeof(ffi_type) - - writer = getattr(_snd, 'sf_writef_' + ffi_type) - ptr = _ffi.cast(ffi_type + '*', data.ctypes.data) - written = writer(self._file, ptr, len(data)) - self._handle_error() + written = self._read_or_write('sf_writef_', data, len(data)) assert written == len(data) curr = self.seek(0, SEEK_CUR, 'w') From add61d59f9005b796398625bc8cb66c547a51f74 Mon Sep 17 00:00:00 2001 From: Matthias Geier Date: Sat, 17 May 2014 19:13:11 +0200 Subject: [PATCH 4/5] Improvements to _read_or_write() * add comment for documentation * add check for array shape * add an assertion --- pysoundfile.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pysoundfile.py b/pysoundfile.py index 278141a..28f5273 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -587,6 +587,12 @@ def seek(self, frames, whence=SEEK_SET, which=None): return _snd.sf_seek(self._file, frames, whence) def _read_or_write(self, funcname, array, frames): + # Do some error checking and call into libsndfile + if (array.ndim not in (1, 2) or + array.ndim == 1 and self.channels != 1 or + array.ndim == 2 and array.shape[1] != self.channels): + raise ValueError("Invalid shape: %s" % repr(array.shape)) + try: ffi_type = _ffi_types[array.dtype] except KeyError: @@ -595,6 +601,7 @@ def _read_or_write(self, funcname, array, frames): assert array.flags.c_contiguous assert array.dtype.itemsize == _ffi.sizeof(ffi_type) + assert array.size == frames * self.channels func = getattr(_snd, funcname + ffi_type) ptr = _ffi.cast(ffi_type + '*', array.ctypes.data) From ab33e660f1493d3e3289c29df149f1fc7774b9c7 Mon Sep 17 00:00:00 2001 From: Matthias Geier Date: Mon, 26 May 2014 11:47:36 +0200 Subject: [PATCH 5/5] Refactor _check_array() out of _read_or_write() --- pysoundfile.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pysoundfile.py b/pysoundfile.py index 28f5273..b2fc518 100644 --- a/pysoundfile.py +++ b/pysoundfile.py @@ -586,19 +586,20 @@ def seek(self, frames, whence=SEEK_SET, which=None): raise ValueError("Invalid which: %s" % repr(which)) return _snd.sf_seek(self._file, frames, whence) - def _read_or_write(self, funcname, array, frames): - # Do some error checking and call into libsndfile + def _check_array(self, array): + # Do some error checking if (array.ndim not in (1, 2) or array.ndim == 1 and self.channels != 1 or array.ndim == 2 and array.shape[1] != self.channels): raise ValueError("Invalid shape: %s" % repr(array.shape)) - try: - ffi_type = _ffi_types[array.dtype] - except KeyError: + if array.dtype not in _ffi_types: raise ValueError("dtype must be one of %s" % repr([dt.name for dt in _ffi_types])) + def _read_or_write(self, funcname, array, frames): + # Call into libsndfile + ffi_type = _ffi_types[array.dtype] assert array.flags.c_contiguous assert array.dtype.itemsize == _ffi.sizeof(ffi_type) assert array.size == frames * self.channels @@ -654,6 +655,7 @@ def read(self, frames=-1, dtype='float64', always_2d=True, if not out.flags.c_contiguous: raise ValueError("out must be C-contiguous") + self._check_array(out) frames = self._read_or_write('sf_readf_', out, frames) if len(out) > frames: @@ -682,6 +684,7 @@ def write(self, data): # no copy is made if data has already the correct memory layout: data = _np.ascontiguousarray(data) + self._check_array(data) written = self._read_or_write('sf_writef_', data, len(data)) assert written == len(data)