From d02fb28ff652b0a42882b01c3040b48e3580146f Mon Sep 17 00:00:00 2001 From: "tdos.apone" Date: Sun, 8 Jan 2017 15:18:48 -0600 Subject: [PATCH 1/5] first pass implementation of read_raw/write_raw + adds support for reading/writing files directly from byte buffers for additional 'dtype' formats: - int24 - int8 - uint8 Notes: + raw_read/write only supports dtype argument since 'int24' has no native c type + much of the functionality of _check_buffer/_cdata_io/_check_dtype has been rewritten into these functions specifically because the current functions aren't intended to handle int24 types --- soundfile.py | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 201 insertions(+), 1 deletion(-) diff --git a/soundfile.py b/soundfile.py index 307dbae..e258819 100644 --- a/soundfile.py +++ b/soundfile.py @@ -42,6 +42,8 @@ SFC_FILE_TRUNCATE = 0x1080, SFC_SET_CLIPPING = 0x10C0, + SFC_RAW_DATA_NEEDS_ENDSWAP = 0x1110, + SFC_SET_SCALE_FLOAT_INT_READ = 0x1014, SFC_SET_SCALE_INT_FLOAT_WRITE = 0x1015, } ; @@ -258,7 +260,19 @@ 'float64': 'double', 'float32': 'float', 'int32': 'int', - 'int16': 'short' + 'int16': 'short', + 'int8': 'char', + 'uint8': 'unsigned char', +} + +_samplesize = { + 'float64': 8, + 'float32': 4, + 'int32': 4, + 'int24': 3, + 'int16': 2, + 'int8': 1, + 'uint8': 1, } try: @@ -968,6 +982,130 @@ def read(self, frames=-1, dtype='float64', always_2d=False, out[frames:] = fill_value return out + def read_raw(self, frames=-1, dtype=None): + """\ + The raw read and write functions read raw audio data from the audio file + (not to be confused with reading RAW header-less PCM files). The number + of bytes read or written must always be an integer multiple of the + number of channels multiplied by the number of bytes required to + represent one sample from one channel. + + The raw read and write functions return the number of bytes read or + written (which should be the same as the bytes parameter). + + Note : The result of using of both regular reads/writes and raw + reads/writes on compressed file formats other than SF_FORMAT_ALAW and + SF_FORMAT_ULAW is undefined. + + Parameters + ---------- + frames : int, optional + The number of frames to read. If `frames < 0`, the whole + rest of the file is read. + + Returns + ------- + buffer + A buffer containing the read data. + + See Also + -------- + read_raw_into, .read, buffer_read + + """ + #_check_dtype + try: + _samplesize[dtype] + except KeyError: + raise ValueError("dtype must be one of {0!r}".format( + sorted(_samplesize.keys()))) + + frames = self._check_frames(frames, fill_value=None) + nbytes = frames * self.channels * _samplesize[dtype] + cdata = _ffi.new('unsigned char[]', nbytes) + + # _cdata_io + self._check_if_closed() + if self.seekable(): + curr = self.tell() + read_bytes = _snd.sf_read_raw(self._file, cdata, nbytes) + _error_check(self._errorcode) + if self.seekable(): + self.seek(curr + frames, SEEK_SET) # Update read & write position + + assert read_bytes == nbytes + + endswap = _snd.sf_command(self._file, _snd.SFC_RAW_DATA_NEEDS_ENDSWAP, + _ffi.NULL, 0) + assert not endswap + + return _ffi.buffer(cdata) + + def read_raw_into(self, buffer, dtype=None): + """\ + The raw read and write functions read raw audio data from the audio file + (not to be confused with reading RAW header-less PCM files). The number + of bytes read or written must always be an integer multiple of the + number of channels multiplied by the number of bytes required to + represent one sample from one channel. + + The raw read and write functions return the number of bytes read or + written (which should be the same as the bytes parameter). + + Note : The result of using of both regular reads/writes and raw + reads/writes on compressed file formats other than SF_FORMAT_ALAW and + SF_FORMAT_ULAW is undefined. + + Parameters + ---------- + frames : int, optional + The number of frames to read. If `frames < 0`, the whole + rest of the file is read. + + Returns + ------- + buffer + A buffer containing the read data. + + See Also + -------- + buffer_read_into, .read, buffer_write + + """ + #_check_dtype + try: + _samplesize[dtype] + except KeyError: + raise ValueError("dtype must be one of {0!r}".format( + sorted(_samplesize.keys()))) + + #_check_buffer + if not isinstance(buffer, bytes): + buffer = _ffi.from_buffer(buffer) + else: + cdata = buffer + frames, remainder = divmod(len(cdata), + self.channels * _samplesize[dtype]) + if remainder: + raise ValueError("Data size must be a multiple of frame size") + + nbytes = frames * self.channels * _samplesize[dtype] + + # _cdata_io + self._check_if_closed() + if self.seekable(): + curr = self.tell() + read_bytes = _snd.sf_read_raw(self._file, cdata, nbytes) + _error_check(self._errorcode) + if self.seekable(): + self.seek(curr + frames, SEEK_SET) # Update read & write position + + endswap = _snd.sf_command(self._file, _snd.SFC_RAW_DATA_NEEDS_ENDSWAP, + _ffi.NULL, 0) + assert not endswap + + return read_bytes + def buffer_read(self, frames=-1, ctype=None, dtype=None): """Read from the file and return data as buffer object. @@ -1091,6 +1229,68 @@ def write(self, data): assert written == len(data) self._update_len(written) + def write_raw(self, data, dtype=None): + """\ + The raw read and write functions read raw audio data from the audio file + (not to be confused with reading RAW header-less PCM files). The number + of bytes read or written must always be an integer multiple of the + number of channels multiplied by the number of bytes required to + represent one sample from one channel. + + The raw read and write functions return the number of bytes read or + written (which should be the same as the bytes parameter). + + Note : The result of using of both regular reads/writes and raw + reads/writes on compressed file formats other than SF_FORMAT_ALAW and + SF_FORMAT_ULAW is undefined. + + Parameters + ---------- + frames : int, optional + The number of frames to read. If `frames < 0`, the whole + rest of the file is read. + + Returns + ------- + buffer + A buffer containing the read data. + + See Also + -------- + buffer_read_into, .read, buffer_write + + """ + #_check_dtype + try: + _samplesize[dtype] + except KeyError: + raise ValueError("dtype must be one of {0!r}".format( + sorted(_samplesize.keys()))) + + #_check_buffer + if not isinstance(data, bytes): + cdata = _ffi.from_buffer(data) + else: + cdata = data + frames, remainder = divmod(len(cdata), + self.channels * _samplesize[dtype]) + if remainder: + raise ValueError("Data size must be a multiple of frame size") + + nbytes = frames * self.channels * _samplesize[dtype] + + # implements _cdata_io + self._check_if_closed() + if self.seekable(): + curr = self.tell() + written = _snd.sf_write_raw(self._file, cdata, nbytes) + _error_check(self._errorcode) + if self.seekable(): + self.seek(curr + frames, SEEK_SET) # Update read & write position + + assert written == nbytes + self._update_len(frames) + def buffer_write(self, data, ctype=None, dtype=None): """Write audio data from a buffer/bytes object to the file. From e0c9880d08353d1fdee86fee01c9fb5fc57d9f19 Mon Sep 17 00:00:00 2001 From: "tdos.apone" Date: Sun, 8 Jan 2017 16:12:20 -0600 Subject: [PATCH 2/5] updated raw_read/write functions docs --- soundfile.py | 96 +++++++++++++++++++++++++--------------------------- 1 file changed, 46 insertions(+), 50 deletions(-) diff --git a/soundfile.py b/soundfile.py index e258819..aa9da97 100644 --- a/soundfile.py +++ b/soundfile.py @@ -796,6 +796,16 @@ def extra_info(self): info, _ffi.sizeof(info)) return _ffi.string(info).decode() + @property + def needs_endswap(self): + """\ + Determine if raw data read using + :meth:`.read_raw`/:meth:`.read_raw_into` needs to be end swapped on the + host CPU. + """ + return _snd.sf_command(self._file, _snd.SFC_RAW_DATA_NEEDS_ENDSWAP, + _ffi.NULL, 0) + # avoid confusion if something goes wrong before assigning self._file: _file = None @@ -984,24 +994,20 @@ def read(self, frames=-1, dtype='float64', always_2d=False, def read_raw(self, frames=-1, dtype=None): """\ - The raw read and write functions read raw audio data from the audio file - (not to be confused with reading RAW header-less PCM files). The number - of bytes read or written must always be an integer multiple of the - number of channels multiplied by the number of bytes required to - represent one sample from one channel. - - The raw read and write functions return the number of bytes read or - written (which should be the same as the bytes parameter). + Read raw audio data from the audio file (not to be confused with reading + RAW header-less PCM files). Note : The result of using of both regular reads/writes and raw - reads/writes on compressed file formats other than SF_FORMAT_ALAW and - SF_FORMAT_ULAW is undefined. + reads/writes on compressed file formats other than ALAW and ULAW is + undefined. Parameters ---------- frames : int, optional The number of frames to read. If `frames < 0`, the whole rest of the file is read. + dtype : {'float64', 'float32', 'int32', 'int24', 'int16', 'int8', 'uint8'} + Audio data sample format. Returns ------- @@ -1010,7 +1016,7 @@ def read_raw(self, frames=-1, dtype=None): See Also -------- - read_raw_into, .read, buffer_read + read_raw_into, write_raw """ #_check_dtype @@ -1043,33 +1049,35 @@ def read_raw(self, frames=-1, dtype=None): def read_raw_into(self, buffer, dtype=None): """\ - The raw read and write functions read raw audio data from the audio file - (not to be confused with reading RAW header-less PCM files). The number - of bytes read or written must always be an integer multiple of the - number of channels multiplied by the number of bytes required to - represent one sample from one channel. + Read from the file into a given buffer object. - The raw read and write functions return the number of bytes read or - written (which should be the same as the bytes parameter). + Fills the given `buffer` with frames in the given data format + starting at the current read/write position (which can be + changed with :meth:`.seek`) until the buffer is full or the end + of the file is reached. This advances the read/write position + by the number of frames that were read. Note : The result of using of both regular reads/writes and raw - reads/writes on compressed file formats other than SF_FORMAT_ALAW and - SF_FORMAT_ULAW is undefined. + reads/writes on compressed file formats other than ALAW and ULAW is + undefined. Parameters ---------- - frames : int, optional - The number of frames to read. If `frames < 0`, the whole - rest of the file is read. + buffer : writable buffer + Audio frames from the file are written to this buffer. + dtype : {'float64', 'float32', 'int32', 'int24', 'int16', 'int8', 'uint8'} + Audio data sample format. Returns ------- - buffer - A buffer containing the read data. + int + The number of frames that were read from the file. + This can be less than the size of `buffer`. + The rest of the buffer is not filled with meaningful data. See Also -------- - buffer_read_into, .read, buffer_write + read_raw, write_raw """ #_check_dtype @@ -1100,11 +1108,8 @@ def read_raw_into(self, buffer, dtype=None): if self.seekable(): self.seek(curr + frames, SEEK_SET) # Update read & write position - endswap = _snd.sf_command(self._file, _snd.SFC_RAW_DATA_NEEDS_ENDSWAP, - _ffi.NULL, 0) - assert not endswap - - return read_bytes + assert not self.endian_swapped + return frames def buffer_read(self, frames=-1, ctype=None, dtype=None): """Read from the file and return data as buffer object. @@ -1231,33 +1236,24 @@ def write(self, data): def write_raw(self, data, dtype=None): """\ - The raw read and write functions read raw audio data from the audio file - (not to be confused with reading RAW header-less PCM files). The number - of bytes read or written must always be an integer multiple of the - number of channels multiplied by the number of bytes required to - represent one sample from one channel. - - The raw read and write functions return the number of bytes read or - written (which should be the same as the bytes parameter). + Write raw audio data to the audio file (not to be confused with writing + RAW header-less PCM files). Note : The result of using of both regular reads/writes and raw - reads/writes on compressed file formats other than SF_FORMAT_ALAW and - SF_FORMAT_ULAW is undefined. + reads/writes on compressed file formats other than ALAW and ULAW is + undefined. Parameters ---------- - frames : int, optional - The number of frames to read. If `frames < 0`, the whole - rest of the file is read. - - Returns - ------- - buffer - A buffer containing the read data. + data : buffer or bytes + A buffer or bytes object containing the audio data to be + written. + dtype : {'float64', 'float32', 'int32', 'int24', 'int16', 'int8', 'uint8'} + Audio data sample format. See Also -------- - buffer_read_into, .read, buffer_write + read_raw """ #_check_dtype From 9802c238f043d67fcbd621697fa1c009a5f72493 Mon Sep 17 00:00:00 2001 From: "tdos.apone" Date: Sun, 8 Jan 2017 16:19:51 -0600 Subject: [PATCH 3/5] expose needs_endswap --- soundfile.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/soundfile.py b/soundfile.py index aa9da97..f9ffb45 100644 --- a/soundfile.py +++ b/soundfile.py @@ -1041,10 +1041,6 @@ def read_raw(self, frames=-1, dtype=None): assert read_bytes == nbytes - endswap = _snd.sf_command(self._file, _snd.SFC_RAW_DATA_NEEDS_ENDSWAP, - _ffi.NULL, 0) - assert not endswap - return _ffi.buffer(cdata) def read_raw_into(self, buffer, dtype=None): @@ -1108,7 +1104,6 @@ def read_raw_into(self, buffer, dtype=None): if self.seekable(): self.seek(curr + frames, SEEK_SET) # Update read & write position - assert not self.endian_swapped return frames def buffer_read(self, frames=-1, ctype=None, dtype=None): From 431f6f134f92505bf9d16333df5d0ce64b17c0db Mon Sep 17 00:00:00 2001 From: "tdos.apone" Date: Sun, 8 Jan 2017 16:51:25 -0600 Subject: [PATCH 4/5] removed additional type from _ffi_types --- soundfile.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/soundfile.py b/soundfile.py index f9ffb45..0cda9e9 100644 --- a/soundfile.py +++ b/soundfile.py @@ -261,8 +261,6 @@ 'float32': 'float', 'int32': 'int', 'int16': 'short', - 'int8': 'char', - 'uint8': 'unsigned char', } _samplesize = { From a367046b99742fa7ae573af9fe0d03b3b2b4eb00 Mon Sep 17 00:00:00 2001 From: "tdos.apone" Date: Sun, 8 Jan 2017 16:58:07 -0600 Subject: [PATCH 5/5] fixed a variable name mistake in read_raw_into --- soundfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/soundfile.py b/soundfile.py index 0cda9e9..3abb0f0 100644 --- a/soundfile.py +++ b/soundfile.py @@ -1083,7 +1083,7 @@ def read_raw_into(self, buffer, dtype=None): #_check_buffer if not isinstance(buffer, bytes): - buffer = _ffi.from_buffer(buffer) + cdata = _ffi.from_buffer(buffer) else: cdata = buffer frames, remainder = divmod(len(cdata),