Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
1f22833
Add open(), read() and write() functions
mgeier Apr 20, 2014
4258760
read()/write() overhaul
mgeier Mar 23, 2014
202fe2b
Add arguments channels_first, always_2d, out and fill_value
mgeier Apr 21, 2014
5d7236c
Change handling of out argument
mgeier Apr 24, 2014
f64da11
Add helper function _check_frames_and_channels()
mgeier Apr 27, 2014
ace9936
refactored tests for improved clarity
bastibe Apr 28, 2014
49a16c8
Simplified read and write
bastibe Apr 28, 2014
db2cb0b
refactored `read` to accept `out`
bastibe Apr 30, 2014
a700fcf
re-integrated read and readinto
bastibe Apr 30, 2014
161f97e
Fixed documentation of test case
bastibe Apr 30, 2014
94c2a00
Added tests for out argument of read
bastibe Apr 30, 2014
564e9bc
removed duplicated self.seek(0, SEEK_CUR, 'r')
bastibe May 9, 2014
1a154da
no more reshaping out after creation
bastibe May 9, 2014
4d28a10
no more allocating of unused memory
bastibe May 9, 2014
28527a7
no more returning views of all the data
bastibe May 9, 2014
5c68a04
removed unnecessary nesting
bastibe May 9, 2014
877fc3a
fixed write function
bastibe May 9, 2014
b56606e
fixed write function for real and test it
bastibe May 9, 2014
1d3a217
some refactorings to improve clarity
bastibe May 11, 2014
6f43d1c
fixed broken docstring
bastibe May 11, 2014
d331c89
added test for read with empty out
bastibe May 11, 2014
06599da
renamed read_frames to frames_to_read
bastibe May 12, 2014
7471637
Added checks for out dimensionality
bastibe May 12, 2014
042d425
refactored error checking into its own method
bastibe May 12, 2014
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 149 additions & 49 deletions pysoundfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,13 @@
'RF64': 'PCM_16',
}

_ffi_types = {
_np.dtype('float64'): 'double',
_np.dtype('float32'): 'float',
_np.dtype('int32'): 'int',
_np.dtype('int16'): 'short'
}

_snd = _ffi.dlopen('sndfile')


Expand Down Expand Up @@ -579,88 +586,181 @@ def seek(self, frames, whence=SEEK_SET, which=None):
raise ValueError("Invalid which: %s" % repr(which))
return _snd.sf_seek(self._file, frames, whence)

def read(self, frames=-1, dtype='float64'):
def read(self, frames=-1, dtype='float64', always_2d=True,
fill_value=None, out=None):
"""Read a number of frames from the file.

Reads the given number of frames in the given data format from
the current read position. This also advances the read
position by the same number of frames.
Use frames=-1 to read until the end of the file.

Returns the read data as a (frames x channels) NumPy array.
By default, a two-dimensional array is returned even if the
sound file has only one channel. Use always_2d=False to return
a one-dimensional array in this case.

If there is less data left in the file than requested, a
shorter array is returned. Use fill_value to always return the
given number of frames and fill all remaining frames with
fill_value.

If there is not enough data left in the file to read, a
smaller NumPy array will be returned.
If out is given as a numpy array, the data is written into
that array. If there is not enough data left in the file to
fill the array, the rest of the frames are ignored and a
smaller view to the array is returned. Use fill_value to fill
the rest of the array and always return the full-length array.

"""

self._check_if_closed()
if self.mode == 'w':
raise RuntimeError("Cannot read from file opened in write mode")
formats = {
_np.float64: 'double[]',
_np.float32: 'float[]',
_np.int32: 'int[]',
_np.int16: 'short[]'
}
readers = {
_np.float64: _snd.sf_readf_double,
_np.float32: _snd.sf_readf_float,
_np.int32: _snd.sf_readf_int,
_np.int16: _snd.sf_readf_short
}
dtype = _np.dtype(dtype)
if dtype.type not in formats:
raise ValueError("Can only read int16, int32, float32 and float64")

remaining_frames = self.frames - self.seek(0, SEEK_CUR, 'r')

if frames < 0:
curr = self.seek(0, SEEK_CUR, 'r')
frames = self.frames - curr
data = _ffi.new(formats[dtype.type], frames*self.channels)
read = readers[dtype.type](self._file, data, frames)
frames = remaining_frames
if frames > remaining_frames and fill_value is None:
frames = remaining_frames

if out is None:
if always_2d or self.channels > 1:
out = _np.empty((frames, self.channels), dtype)
else:
out = _np.empty(frames, dtype)

self._check_frames_and_channels(out)

frames_to_read = min(len(out), remaining_frames)

ffi_type = _ffi_types[out.dtype]
reader = getattr(_snd, 'sf_readf_' + ffi_type)
ptr = _ffi.cast(ffi_type + '*', out.ctypes.data)
read_frames = reader(self._file, ptr, frames_to_read)
self._handle_error()
np_data = _np.frombuffer(_ffi.buffer(data), dtype=dtype,
count=read*self.channels)
return _np.reshape(np_data, (read, self.channels))
assert read_frames == frames_to_read

if frames_to_read == len(out):
return out
elif fill_value is None:
return out[:frames_to_read]
else:
out[frames_to_read:] = fill_value
return out
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't quite like this if statement.
I think mine was easier to understand:

        if frames > valid_frames:
            if fill_value is None:
                out = out[:valid_frames]
            else:
                out[valid_frames:] = fill_value

        return out

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I disagree. The nice thing about my version is that it does not need nested logic. The two pieces of code are functionally equivalent, but my code makes it clear that there are three branches, whereas your code "hides" the first else.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is largely (but not entirely) a matter of taste.

Let's compare directly:

if frames == len(out):
    return out
elif fill_value is None:
    return out[:frames]
else:
    out[frames:] = fill_value
    return out

vs.

if len(out) > frames:
    if fill_value is None:
        out = out[:frames]
    else:
        out[frames:] = fill_value

return out

Both are functionally equivalent (if I'm not mistaken) except in the case that len(out) < frames, which I guess can never happen (at least it shouldn't).
I think the nested logic actually helps understanding, in your suggestion the nested logic is somehow "folded into" the elif condition.
I think the second example follows exactly my understanding (but probably only mine) about what's going on:

  1. we have out
  2. if a certain condition is met (out is larger than frames), we change out, otherwise we don't do anything with it
    1. this one condition has a subcondition, which decides how exactly we change out (by either "cutting off" the rest or by filling it with something
  3. we return out

I think this is a very common pattern: changing something if a certain condition is met, doing nothing if not.
This is also the reason why the else part of an if statement is optional in probably all languages that have an if statement [citation needed].

In addition to having a less clear (IMHO) logic, the first example has also a code smell because common functionality (returning something) is repeated in each branch.


def write(self, data):
"""Write a number of frames to the file.

Writes a number of frames to the current read position in the
file. This also advances the read position by the same number
Writes a number of frames to the current write position in the
file. This also advances the write position by the same number
of frames and enlarges the file if necessary.

The data must be provided as a (frames x channels) NumPy
array.
array or as one-dimensional array for mono signals.

"""
self._check_if_closed()
if self.mode == 'r':
raise RuntimeError("Cannot write to file opened in read mode")
formats = {
_np.float64: 'double*',
_np.float32: 'float*',
_np.int32: 'int*',
_np.int16: 'short*'
}
writers = {
_np.float64: _snd.sf_writef_double,
_np.float32: _snd.sf_writef_float,
_np.int32: _snd.sf_writef_int,
_np.int16: _snd.sf_writef_short
}
if data.dtype.type not in writers:
raise ValueError("Data must be int16, int32, float32 or float64")
raw_data = _ffi.new('char[]', data.flatten().tostring())
written = writers[data.dtype.type](self._file,
_ffi.cast(
formats[data.dtype.type], raw_data),
len(data))

data = _np.ascontiguousarray(data)

self._check_frames_and_channels(data)

ffi_type = _ffi_types[data.dtype]
writer = getattr(_snd, 'sf_writef_' + ffi_type)
ptr = _ffi.cast(ffi_type + '*', data.ctypes.data)
written = writer(self._file, ptr, len(data))
self._handle_error()
assert written == len(data)

curr = self.seek(0, SEEK_CUR, 'w')
self._info.frames = self.seek(0, SEEK_END, 'w')
self.seek(curr, SEEK_SET, 'w')

return written
def _check_frames_and_channels(self, data):
"""Error if data is not compatible with the shape of the sound file.

"""
if data.dtype not in _ffi_types:
raise ValueError("data.dtype must be one of %s" %
repr([dt.name for dt in _ffi_types]))

if not data.flags.c_contiguous:
raise ValueError("data must be c_contiguous")

if data.ndim not in (1, 2):
raise ValueError("data must be one- or two-dimensional")
elif data.ndim == 1 and self.channels != 1:
raise ValueError("data must have 2 dimensions for non-mono signals")
elif data.ndim == 2 and data.shape[1] != self.channels:
raise ValueError("two-dimensional data must have %i columns" %
self.channels)


def open(*args, **kwargs):
"""Return a new SoundFile object.

Takes the same arguments as SoundFile.__init__().

"""
return SoundFile(*args, **kwargs)


def read(file, frames=-1, start=None, stop=None, **kwargs):
"""Read a sound file and return its contents as NumPy array.

The number of frames to read can be specified with frames, the
position to start reading can be specified with start.
By default, the whole file is read from the beginning.
Alternatively, a range can be specified with start and stop.
Both start and stop accept negative indices to specify positions
relative to the end of the file.

The keyword arguments out, dtype, fill_value, channels_first and
always_2d are forwarded to SoundFile.read().
All further arguments are forwarded to SoundFile.__init__().

"""
from inspect import getargspec

if frames >= 0 and stop is not None:
raise RuntimeError("Only one of (frames, stop) may be used")

read_kwargs = {}
for arg in getargspec(SoundFile.read).args:
if arg in kwargs:
read_kwargs[arg] = kwargs.pop(arg)
with SoundFile(file, 'r', **kwargs) as f:
start, stop, _ = slice(start, stop).indices(f.frames)
f.seek(start, SEEK_SET)
data = f.read(frames, **read_kwargs)
return data, f.sample_rate


def write(data, file, sample_rate, *args, **kwargs):
"""Write data from a NumPy array into a sound file.

If file exists, it will be overwritten!

If data is one-dimensional, a mono file is written.
For two-dimensional data, the columns are interpreted as channels by
default. Use channels_first=False to interpret the rows as channels.
All further arguments are forwarded to SoundFile.__init__().

Example usage:

import pysoundfile as sf
sf.write(myarray, 'myfile.wav', 44100, 'PCM_24')

"""
data = _np.asarray(data)
if data.ndim == 1:
channels = 1
else:
channels = data.shape[1]
with SoundFile(file, 'w', sample_rate, channels, *args, **kwargs) as f:
f.write(data)


def default_subtype(format):
Expand Down
Loading