diff --git a/doc/BufMFCC.rst b/doc/BufMFCC.rst index e7baf298..ae0d0914 100644 --- a/doc/BufMFCC.rst +++ b/doc/BufMFCC.rst @@ -12,8 +12,6 @@ Other than the 0th coefficient, MFCCs are unchanged by differences in the overall energy of the spectrum (which relates to how we perceive loudness). This means that timbres with similar spectral contours, but different volumes, will still have similar MFCC values, other than MFCC 0. To remove any indication of loudness but keep the information about timbre, we can ignore MFCC 0 by setting the parameter ``startCoeff`` to 1. - For more information visit https://learn.flucoma.org/reference/mfcc/. - For an interactive explanation of this relationship, visit https://learn.flucoma.org/reference/mfcc/explain. :control source: diff --git a/doc/BufSineFeature.rst b/doc/BufSineFeature.rst new file mode 100644 index 00000000..a8df2981 --- /dev/null +++ b/doc/BufSineFeature.rst @@ -0,0 +1,96 @@ +:digest: Buffer-Based Sinusoidal Peak Tracking +:species: buffer-proc +:sc-categories: Libraries>FluidDecomposition, UGens>Buffer +:sc-related: Guides/FluidCorpusManipulation, Classes/SinOsc +:see-also: SineFeature, BufSines +:description: Interpolated Sinusoidal Peak Tracking on the Spectrum of Audio Stored in a Buffer. +:discussion: + This process is tracking peaks in the spectrum of audio stored in a buffer, then estimating an interpolated frequency and amplitude of that peak in relation to its spectral context. It is the first part of the process used by :fluid-obj:`BufSines`. + + The process will return two buffers containing time series that describes the interpolated frequencies and magnitudes changing over time in the source buffer. + +:process: This is the method that calls for the slicing to be calculated on a given source buffer. +:output: Nothing, as the various destination buffers are declared in the function call. + +:control source: + + The |buffer| to use as the source material. The channels of multichannel buffers will be processed sequentially. + +:control startFrame: + + The starting point for analysis in the source (in samples). + +:control numFrames: + + The duration (in samples) to analyse. + +:control startChan: + + For multichannel sources, the starting channel to analyse. + +:control numChans: + + For multichannel sources, the number of channels to analyse. + +:control frequency: + + The buffer where the interpolated frequency of the peaks will be written. + +:control magnitude: + + The buffer where the interpolated magnitude of the peaks will be written. + +:control numPeaks: + + The number of peaks to search report back. It is capped at (fftSize / 2) + 1. + +:control detectionThreshold: + + The threshold in dB above which a magnitude peak is considered to be a sinusoidal component. + +:control order: + + How the reported peaks are to be ordered. By default (0), it is by frequencies (lowest first), and the alternative (1) is by magnitude (loudest first). + +:control freqUnit: + + The units and scale used to report the frequency of the peaks. By default (0), it is in Hz (linear), and the alternative (1) is in MIDI (logarithmic). + +:control magUnit: + + The units and scale used to report the magnitude of the peaks. By default (0), it is in amp (linear), and the alternative (1) is in dB (logarithmic). + +:control windowSize: + + The window size. As sinusoidal estimation relies on spectral frames, we need to decide what precision we give it spectrally and temporally. For more information visit https://learn.flucoma.org/learn/fourier-transform/ + +:control hopSize: + + The window hop size. As sinusoidal estimation relies on spectral frames, we need to move the window forward. It can be any size, but low overlap will create audible artefacts. The -1 default value will default to half of windowSize (overlap of 2). + +:control fftSize: + + The inner FFT/IFFT size. It should be at least 4 samples long, at least the size of the window, and a power of 2. Making it larger allows an oversampling of the spectral precision. The -1 default value will default to windowSize. The -1 default value will default to the highest of windowSize and (bandwidth - 1) * 2. + +:control padding: + + Controls the zero-padding added to either end of the source buffer or segment. Padding ensures all values are analysed. Possible values are: + + :enum: + + :0: + No padding - The first analysis window starts at time 0, and the samples at either end will be tapered by the STFT windowing function. + + :1: + Half the window size - The first sample is centred in the analysis window ensuring that the start and end of the segment are accounted for in the analysis. + + :2: + Window size minus the hop size - Mode 2 can be useful when the overlap factor (window size / hop size) is greater than 2, to ensure that the input samples at either end of the segment are covered by the same number of analysis frames as the rest of the analysed material. + +:control maxFFTSize: + + How large can the FFT be, by allocating memory at instantiation time. This cannot be modulated. + +:control maxNumPeaks: + + Up to how many peaks can be reported, by allocating memory at instantiation time. This cannot be modulated. diff --git a/doc/BufStats.rst b/doc/BufStats.rst index 45f002fd..6fb80149 100644 --- a/doc/BufStats.rst +++ b/doc/BufStats.rst @@ -66,7 +66,7 @@ :control outliersCutoff: - A ratio of the inter quantile range (IQR) that defines a range from the median, outside of which data will be considered an outlier and not used to compute the statistical summary. For each frame, if a single value in any channel of that frame is considered an outlier (when compared to the rest of the values in it's channel), the whole frame (on all channels) will not be used for statistical calculations. The default of -1 bypasses this function, keeping all frames in the statistical measurements. + A ratio of the inter quantile range (IQR) that defines a range from the median, outside of which data will be considered an outlier and not used to compute the statistical summary. For each frame, if a single value in any channel of that frame is considered an outlier (when compared to the rest of the values in its channel), the whole frame (on all channels) will not be used for statistical calculations. The default of -1 bypasses this function, keeping all frames in the statistical measurements. :control weights: @@ -78,4 +78,4 @@ :control select: - An array of ``symbols`` indicating which statistics to return. The options are ``mean``, ``std``, ``skewness``, ``kurtosis``, ``low``, ``mid``, and ``high``. If nothing is specified, the object will return all the statistics. The statistics will always appear in their normal order, this argument just allows for a selection of them to be returned. Reordering the options in this argument will not reorder how the statistics are returned. \ No newline at end of file + An array of ``symbols`` indicating which statistics to return. The options are ``mean``, ``std``, ``skewness``, ``kurtosis``, ``low``, ``mid``, and ``high``. If nothing is specified, the object will return all the statistics. The statistics will always appear in their normal order, this argument just allows for a selection of them to be returned. Reordering the options in this argument will not reorder how the statistics are returned. diff --git a/doc/MFCC.rst b/doc/MFCC.rst index 453cfc0e..9f6883b8 100644 --- a/doc/MFCC.rst +++ b/doc/MFCC.rst @@ -15,8 +15,6 @@ .. only_in:: sc When ``numCoeffs`` is less than ``maxNumCoeffs`` the result will be zero-padded on the right so the control stream returned by this object is always ``maxNumCoeffs`` channels. - - For more information visit https://learn.flucoma.org/reference/mfcc/. For an interactive explanation of this relationship, visit https://learn.flucoma.org/reference/mfcc/explain. diff --git a/doc/SineFeature.rst b/doc/SineFeature.rst new file mode 100644 index 00000000..2537bce6 --- /dev/null +++ b/doc/SineFeature.rst @@ -0,0 +1,56 @@ +:digest: Sinusoidal Peak Tracking +:species: descriptor +:sc-categories: Libraries>FluidDecomposition +:sc-related: Guides/FluidCorpusManipulation, Classes/SinOsc +:see-also: BufSineFeature, Sines +:description: Interpolated Sinusoidal Peak Tracking on the Spectrum. +:discussion: + This process is tracking peaks in the spectrum, then estimating an interpolated frequency and amplitude of that peak in relation to its spectral context. It is the first part of the process used by :fluid-obj:`Sines`. + +:process: The audio rate version of the object. +:output: An array of two control streams: [0] is the interpolated frequency of the peaks extracted in Hz or MIDI, [1] is their respective magnitudes in amp or dB. The latency between the input and the output is windowSize samples. + + +:control in: + + The input to be processed + +:control numPeaks: + + The number of peaks to search report back. It is capped at (fftSize / 2) + 1. + +:control detectionThreshold: + + The threshold in dB above which a magnitude peak is considered to be a sinusoidal component. + +:control order: + + How the reported peaks are to be ordered. By default (0), it is by frequencies (lowest first), and the alternative (1) is by magnitude (loudest first). + +:control freqUnit: + + The units and scale used to report the frequency of the peaks. By default (0), it is in Hz (linear), and the alternative (1) is in MIDI (logarithmic). + +:control magUnit: + + The units and scale used to report the magnitude of the peaks. By default (0), it is in amp (linear), and the alternative (1) is in dB (logarithmic). + +:control windowSize: + + The window size. As sinusoidal estimation relies on spectral frames, we need to decide what precision we give it spectrally and temporally. For more information visit https://learn.flucoma.org/learn/fourier-transform/ + +:control hopSize: + + The window hop size. As sinusoidal estimation relies on spectral frames, we need to move the window forward. It can be any size, but low overlap will create audible artefacts. The -1 default value will default to half of windowSize (overlap of 2). + +:control fftSize: + + The inner FFT/IFFT size. It should be at least 4 samples long, at least the size of the window, and a power of 2. Making it larger allows an oversampling of the spectral precision. The -1 default value will default to windowSize. The -1 default value will default to the highest of windowSize and (bandwidth - 1) * 2. + +:control maxFFTSize: + + How large can the FFT be, by allocating memory at instantiation time. This cannot be modulated. + +:control maxNumPeaks: + + Up to how many peaks can be reported, by allocating memory at instantiation time. This cannot be modulated. diff --git a/example-code/sc/BufMFCC.scd b/example-code/sc/BufMFCC.scd index bdd0efe0..494737e1 100644 --- a/example-code/sc/BufMFCC.scd +++ b/example-code/sc/BufMFCC.scd @@ -29,6 +29,8 @@ FluidBufMFCC.processBlocking(s,~buf,features:~mfccs,action:{"done".postln;}) ( ~ds = FluidDataSet(s).fromBuffer(~mfccs); ~ds.print; +~ds2d = FluidDataSet(s); +~ds2dN = FluidDataSet(s); ) // dimensionally reduce the 13 MFCCs into 2D space @@ -37,9 +39,9 @@ fork{ ~umap = FluidUMAP(s); ~norm = FluidNormalize(s); s.sync; - ~umap.fitTransform(~ds,~ds); - ~norm.fitTransform(~ds,~ds); - ~dict = ~ds.dump({ + ~umap.fitTransform(~ds,~ds2d); + ~norm.fitTransform(~ds2d,~ds2dN); + ~dict = ~ds2dN.dump({ arg dict; defer{ FluidPlotter(bounds:Rect(0,0,800,800),dict:dict); diff --git a/example-code/sc/BufSineFeature.scd b/example-code/sc/BufSineFeature.scd new file mode 100644 index 00000000..8968a27b --- /dev/null +++ b/example-code/sc/BufSineFeature.scd @@ -0,0 +1,59 @@ +strong::Plot the BufSineFeature curves one over the other:: +code:: +~oboe = Buffer.read(s,FluidFilesPath("Harker-DS-TenOboeMultiphonics-M.wav"),numFrames: 311000); + +( +~freq = Buffer(s); +~mags = Buffer(s); +FluidBufSineFeature.processBlocking(s,~oboe, frequency: ~freq, magnitude: ~mags, numPeaks: 5); +w = Window("FluidWaveform Test",Rect(0,0,1000,500)); +w.view.layout = VLayout( + FluidWaveform(~oboe,featuresBuffer: ~freq,standalone: false), + FluidWaveform(~oboe,featuresBuffer: ~mags,standalone: false)); +w.front; +) +:: + +strong::A few didactic examples:: +code:: + +//mono source of 3 sines +b = {SinOsc.ar([440,789,535],mul: [0.01,0.03,0.02]).sum}.asBuffer(1) + +//listen +b.play + +//make destination buffers +~freq = Buffer(s); ~mags = Buffer(s); + +//process +FluidBufSineFeature.process(s,b,frequency: ~freq, magnitude: ~mags, numPeaks: 4, action: {\done.postln}) + +// retrieve the first 2 frames of 4 peaks +~freq.getn(0, 8, {|x|x.postln}) +~mags.getn(0, 8, {|x|x.postln}) + +// there are only 2 peaks... this is because the distance between 2 peaks has to be clearly segregated in the FFT world. At the default 1024 and the usual SC SR of 44100, this is 43Hz per bin, so 440 and 535 are too near each other... if we reprocess with a higher frame size, we get the right values + +FluidBufSineFeature.process(s,b,frequency: ~freq, magnitude: ~mags, numPeaks: 4, windowSize: 2048, action: {\done.postln}) + +// first 2 frames of 4 peaks +~freq.getn(0, 8, {|x|x.postln}) +~mags.getn(0, 8, {|x|x.postln}) + +//here is another source for fun, stereo this time, and triangle waves +b = {LFTri.ar([300, 500],mul: [-40, -45].dbamp)}.asBuffer(1) + +b.play +b.query + +// asking for 2 peaks - first and third harmonic of each should pop out +FluidBufSineFeature.process(s, b, frequency: ~freq, magnitude: ~mags, numPeaks: 2, magUnit: 1, action: {\done.postln}) + +// retrieving - the stereo values are interleaved, 2 for left 2 for right. +~freq.getn(0, 8, {|x|x.postln}) +~mags.getn(0, 8, {|x|x.postln}) + +:: + + diff --git a/example-code/sc/MDS.scd b/example-code/sc/MDS.scd index b0683efd..18748e86 100644 --- a/example-code/sc/MDS.scd +++ b/example-code/sc/MDS.scd @@ -6,9 +6,11 @@ code:: ~features = Buffer(s); FluidBufMFCC.processBlocking(s,~src,features:~features,startCoeff:1); ~ds = FluidDataSet(s).fromBuffer(~features); -FluidMDS(s).fitTransform(~ds,~ds); -FluidNormalize(s).fitTransform(~ds,~ds); -~ds.dump({ +~ds2d = FluidDataSet(s); +~ds2dN = FluidDataSet(s); +FluidMDS(s).fitTransform(~ds,~ds2d); +FluidNormalize(s).fitTransform(~ds2d,~ds2dN); +~ds2dN.dump({ arg dict; {FluidPlotter(bounds:Rect(0,0,800,800),dict:dict)}.defer; }); @@ -32,10 +34,11 @@ fork({ ["Manhattan","Euclidean","Squared Euclidean","Minkowski Max","Minkowski Min","Symmetric Kullback Leibler"].do{ arg name, dist_measure; var ds_transformed = FluidDataSet(s); + var ds_trans_normed = FluidDataSet(s); "computing distance measure: % %".format(dist_measure, name).postln; FluidMDS(s,2,dist_measure).fitTransform(~ds,ds_transformed); - FluidNormalize(s).fitTransform(ds_transformed,ds_transformed); - ds_transformed.dump({ + FluidNormalize(s).fitTransform(ds_transformed,ds_trans_normed); + ds_trans_normed.dump({ arg dict; defer{ var x = (dist_measure * 400) % win.bounds.width; diff --git a/example-code/sc/MFCC.scd b/example-code/sc/MFCC.scd index d5d60b5e..932265a3 100644 --- a/example-code/sc/MFCC.scd +++ b/example-code/sc/MFCC.scd @@ -108,6 +108,9 @@ CODE:: ( ~ds = FluidDataSet(s); +~ds2d = FluidDataSet(s); +~ds2dN = FluidDataSet(s); + ~bass = Buffer.read(s,FluidFilesPath("Tremblay-AaS-AcBassGuit-Melo-M.wav"),numFrames:44100 * 9); // only load the first 9 seconds so they're about the same length ~box = Buffer.read(s,FluidFilesPath("Green-Box639.wav")); ~umap = FluidUMAP(s); // algorithm for dimensionality reduction @@ -138,9 +141,9 @@ CODE:: ~ds.print; ( -~umap.fitTransform(~ds,~ds); // dimensionally reduce the 13 MFCCs into 2D space -~norm.fitTransform(~ds,~ds); -~ds.dump({ +~umap.fitTransform(~ds,~ds2d); // dimensionally reduce the 13 MFCCs into 2D space +~norm.fitTransform(~ds2d,~ds2dN); +~ds2dN.dump({ arg dict; var labels = Dictionary.newFrom(["data",Dictionary.new]); dict["data"].keys.do{ diff --git a/example-code/sc/Normalize.scd b/example-code/sc/Normalize.scd index cf4d1eed..8c912733 100644 --- a/example-code/sc/Normalize.scd +++ b/example-code/sc/Normalize.scd @@ -20,7 +20,7 @@ fork({ ~ds.print; ) -// normalize it +// normalize it in place, overwriting the dataset ( ~normalizer = FluidNormalize(s).fitTransform(~ds,~ds); ~ds.print; diff --git a/example-code/sc/PCA.scd b/example-code/sc/PCA.scd index 3d94e7e0..2b5011df 100644 --- a/example-code/sc/PCA.scd +++ b/example-code/sc/PCA.scd @@ -11,6 +11,9 @@ s.boot; FluidBufMFCC.processBlocking(s,~src,startCoeff:1,features:~mfcc_feature); ~ds = FluidDataSet(s).fromBuffer(~mfcc_feature); ~ds.print; +~ds2stan = FluidDataSet(s); +~ds2st2d = FluidDataSet(s); +~ds2st_2d_n = FluidDataSet(s); ) // first standardize our DataSet, so that the MFCC dimensions are in similar ranges @@ -18,10 +21,10 @@ FluidBufMFCC.processBlocking(s,~src,startCoeff:1,features:~mfcc_feature); // reducing the number of dimensions to the default of 2 // lastly normalize it so it can be plotted in a normalized space ( -~stand = FluidStandardize(s).fitTransform(~ds,~ds); -~pca = FluidPCA(s).fitTransform(~ds,~ds); -~norm = FluidNormalize(s).fitTransform(~ds,~ds); -~ds.dump({ +~stand = FluidStandardize(s).fitTransform(~ds,~ds2stan); +~pca = FluidPCA(s).fitTransform(~ds2stan,~ds2st2d); +~norm = FluidNormalize(s).fitTransform(~ds2st2d,~ds2st_2d_n); +~ds2st_2d_n.dump({ arg dict; defer{FluidPlotter(dict:dict)}; }); @@ -72,7 +75,7 @@ FluidBufSpectralShape.processBlocking(s,~src,features:~analysis); ~ds = FluidDataSet(s).fromBuffer(~analysis); ~ds.print; -~stand = FluidStandardize(s).fitTransform(~ds,~ds); +~stand = FluidStandardize(s).fitTransform(~ds,~ds); // note: standardize in place ~ds_pca = FluidDataSet(s); ~pca = FluidPCA(s).fitTransform(~ds,~ds_pca); diff --git a/example-code/sc/SineFeature.scd b/example-code/sc/SineFeature.scd new file mode 100644 index 00000000..fb93f711 --- /dev/null +++ b/example-code/sc/SineFeature.scd @@ -0,0 +1,34 @@ + +CODE:: + +// a didactic example: a cluster of sinusoids, sorted by magnitudes +{var source = SinOsc.ar([440,789],mul: [0.05,0.1]).sum; FluidSineFeature.kr(source,numPeaks: 3, order: 1).poll; source.dup}.play + +// or in MIDI and dB +{var source = SinOsc.ar([69,79].midicps,mul: [-40,-35].dbamp).sum; FluidSineFeature.kr(source,numPeaks: 3, order: 1, freqUnit: 1, magUnit: 1).poll; source.dup}.play + +// a more exciting example: resynthesizing audio input +( +var buf = Buffer.read(s,FluidFilesPath("Harker-DS-TenOboeMultiphonics-M.wav")); + +x = { + arg nbPeaks = 10, t_hold = 1; + var source = PlayBuf.ar(1, buf, loop: 1); + var analysis = Latch.kr(FluidSineFeature.kr(source,numPeaks: nbPeaks, maxNumPeaks: 50),t_hold); + var resynth = SinOsc.ar(analysis[Array.iota(50)], mul: analysis[Array.iota(50) + 50]).sum; + [source, resynth]; +}.play +) + +// play with the number of peaks to track +x.set(\nbPeaks, 5) +x.set(\nbPeaks, 1) +x.set(\nbPeaks, 50) + +// trigger the holder to hear the reconstruction on the right hand side +x.set(\t_hold, 1) + +// or make it automatic +r = Routine{x.set(\t_hold, 1);0.01.wait;}.loop.play +r.stop +:: diff --git a/example-code/sc/Standardize.scd b/example-code/sc/Standardize.scd index 68c6da98..85bde546 100644 --- a/example-code/sc/Standardize.scd +++ b/example-code/sc/Standardize.scd @@ -11,12 +11,13 @@ FluidBufSpectralShape.processBlocking(s,~src,features:~features,select:~select); ~ds = FluidDataSet(s).fromBuffer(~features); "First the Raw Data, then the Standardized Data:".postln; ~ds.print; -~stand = FluidStandardize(s).fitTransform(~ds,~ds); +~norm = FluidNormalize(s).fitTransform(~ds,~ds); // normalize (in place) first to match the ranges +~ds.print; +~stand = FluidStandardize(s).fitTransform(~ds,~ds); // standardize then in place again ~ds.print; -~norm = FluidNormalize(s).fitTransform(~ds,~ds); // normalize just for plotting ~ds.dump({ arg dict; - defer{FluidPlotter(dict:dict)} + defer{FluidPlotter(dict:dict,xmin: -1, xmax: 1, ymin: -1, ymax: 1)} //plotter range is +/- one standard deviation }); ) diff --git a/include/FluidParameterDump.hpp b/include/FluidParameterDump.hpp index 50f92ec7..1b5170b9 100644 --- a/include/FluidParameterDump.hpp +++ b/include/FluidParameterDump.hpp @@ -11,6 +11,7 @@ under the European Union’s Horizon 2020 research and innovation programme #pragma once #include #include +#include #include #include #include @@ -274,7 +275,8 @@ std::string getArgType(SharedClientRef&) return "Input LabelSet"; } -std::string getArgType(std::string&) { return "string"; } +std::string getArgType(std::string const&) { return "string"; } +std::string getArgType(rt::string const&) { return "string"; } template std::string getArgType(FluidTensor)