update qm-dsp library
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -27,14 +27,14 @@ Chromagram::Chromagram( ChromaConfig Config ) :
|
||||
}
|
||||
|
||||
int Chromagram::initialise( ChromaConfig Config )
|
||||
{
|
||||
{
|
||||
m_FMin = Config.min; // min freq
|
||||
m_FMax = Config.max; // max freq
|
||||
m_BPO = Config.BPO; // bins per octave
|
||||
m_normalise = Config.normalise; // if frame normalisation is required
|
||||
|
||||
// No. of constant Q bins
|
||||
m_uK = ( unsigned int ) ceil( m_BPO * log(m_FMax/m_FMin)/log(2.0));
|
||||
m_uK = ( unsigned int ) ceil( m_BPO * log(m_FMax/m_FMin)/log(2.0));
|
||||
|
||||
// Create array for chroma result
|
||||
m_chromadata = new double[ m_BPO ];
|
||||
@@ -49,7 +49,7 @@ int Chromagram::initialise( ChromaConfig Config )
|
||||
ConstantQConfig.max = m_FMax;
|
||||
ConstantQConfig.BPO = m_BPO;
|
||||
ConstantQConfig.CQThresh = Config.CQThresh;
|
||||
|
||||
|
||||
// Initialise ConstantQ operator
|
||||
m_ConstantQ = new ConstantQ( ConstantQConfig );
|
||||
|
||||
@@ -57,7 +57,7 @@ int Chromagram::initialise( ChromaConfig Config )
|
||||
m_frameSize = m_ConstantQ->getfftlength();
|
||||
m_hopSize = m_ConstantQ->gethop();
|
||||
|
||||
// Initialise FFT object
|
||||
// Initialise FFT object
|
||||
m_FFT = new FFTReal(m_frameSize);
|
||||
|
||||
m_FFTRe = new double[ m_frameSize ];
|
||||
@@ -124,7 +124,7 @@ void Chromagram::unityNormalise(double *src)
|
||||
double* Chromagram::process( const double *data )
|
||||
{
|
||||
if (!m_skGenerated) {
|
||||
// Generate CQ Kernel
|
||||
// Generate CQ Kernel
|
||||
m_ConstantQ->sparsekernel();
|
||||
m_skGenerated = true;
|
||||
}
|
||||
@@ -139,8 +139,7 @@ double* Chromagram::process( const double *data )
|
||||
}
|
||||
m_window->cut(m_windowbuf);
|
||||
|
||||
// FFT of current frame
|
||||
m_FFT->process(false, m_windowbuf, m_FFTRe, m_FFTIm);
|
||||
m_FFT->forward(m_windowbuf, m_FFTRe, m_FFTIm);
|
||||
|
||||
return process(m_FFTRe, m_FFTIm);
|
||||
}
|
||||
@@ -148,7 +147,7 @@ double* Chromagram::process( const double *data )
|
||||
double* Chromagram::process( const double *real, const double *imag )
|
||||
{
|
||||
if (!m_skGenerated) {
|
||||
// Generate CQ Kernel
|
||||
// Generate CQ Kernel
|
||||
m_ConstantQ->sparsekernel();
|
||||
m_skGenerated = true;
|
||||
}
|
||||
@@ -158,16 +157,15 @@ double* Chromagram::process( const double *real, const double *imag )
|
||||
|
||||
double cmax = 0.0;
|
||||
double cval = 0;
|
||||
|
||||
// Calculate ConstantQ frame
|
||||
m_ConstantQ->process( real, imag, m_CQRe, m_CQIm );
|
||||
|
||||
|
||||
// add each octave of cq data into Chromagram
|
||||
const unsigned octaves = (int)floor(double( m_uK/m_BPO))-1;
|
||||
for (unsigned octave = 0; octave <= octaves; octave++)
|
||||
for (unsigned octave = 0; octave <= octaves; octave++)
|
||||
{
|
||||
unsigned firstBin = octave*m_BPO;
|
||||
for (unsigned i = 0; i < m_BPO; i++)
|
||||
for (unsigned i = 0; i < m_BPO; i++)
|
||||
{
|
||||
m_chromadata[i] += kabs( m_CQRe[ firstBin + i ], m_CQIm[ firstBin + i ]);
|
||||
}
|
||||
|
||||
@@ -29,20 +29,20 @@ struct ChromaConfig{
|
||||
MathUtilities::NormaliseType normalise;
|
||||
};
|
||||
|
||||
class Chromagram
|
||||
class Chromagram
|
||||
{
|
||||
|
||||
public:
|
||||
public:
|
||||
Chromagram( ChromaConfig Config );
|
||||
~Chromagram();
|
||||
|
||||
|
||||
double* process( const double *data ); // time domain
|
||||
double* process( const double *real, const double *imag ); // frequency domain
|
||||
void unityNormalise( double* src );
|
||||
|
||||
// Complex arithmetic
|
||||
double kabs( double real, double imag );
|
||||
|
||||
|
||||
// Results
|
||||
unsigned int getK() { return m_uK;}
|
||||
unsigned int getFrameSize() { return m_frameSize; }
|
||||
@@ -54,7 +54,7 @@ private:
|
||||
|
||||
Window<double> *m_window;
|
||||
double *m_windowbuf;
|
||||
|
||||
|
||||
double* m_chromadata;
|
||||
double m_FMin;
|
||||
double m_FMax;
|
||||
|
||||
@@ -96,7 +96,7 @@ void ConstantQ::sparsekernel()
|
||||
double* transfHammingWindowRe = new double [ m_FFTLength ];
|
||||
double* transfHammingWindowIm = new double [ m_FFTLength ];
|
||||
|
||||
for (unsigned u=0; u < m_FFTLength; u++)
|
||||
for (unsigned u=0; u < m_FFTLength; u++)
|
||||
{
|
||||
hammingWindowRe[u] = 0;
|
||||
hammingWindowIm[u] = 0;
|
||||
@@ -109,28 +109,28 @@ void ConstantQ::sparsekernel()
|
||||
sk->js.reserve( m_FFTLength*2 );
|
||||
sk->real.reserve( m_FFTLength*2 );
|
||||
sk->imag.reserve( m_FFTLength*2 );
|
||||
|
||||
|
||||
// for each bin value K, calculate temporal kernel, take its fft to
|
||||
//calculate the spectral kernel then threshold it to make it sparse and
|
||||
//calculate the spectral kernel then threshold it to make it sparse and
|
||||
//add it to the sparse kernels matrix
|
||||
double squareThreshold = m_CQThresh * m_CQThresh;
|
||||
|
||||
FFT m_FFT(m_FFTLength);
|
||||
|
||||
for (unsigned k = m_uK; k--; )
|
||||
|
||||
for (unsigned k = m_uK; k--; )
|
||||
{
|
||||
for (unsigned u=0; u < m_FFTLength; u++)
|
||||
for (unsigned u=0; u < m_FFTLength; u++)
|
||||
{
|
||||
hammingWindowRe[u] = 0;
|
||||
hammingWindowIm[u] = 0;
|
||||
}
|
||||
|
||||
|
||||
// Computing a hamming window
|
||||
const unsigned hammingLength = (int) ceil( m_dQ * m_FS / ( m_FMin * pow(2,((double)(k))/(double)m_BPO)));
|
||||
|
||||
unsigned origin = m_FFTLength/2 - hammingLength/2;
|
||||
|
||||
for (unsigned i=0; i<hammingLength; i++)
|
||||
for (unsigned i=0; i<hammingLength; i++)
|
||||
{
|
||||
const double angle = 2*PI*m_dQ*i/hammingLength;
|
||||
const double real = cos(angle);
|
||||
@@ -148,17 +148,17 @@ void ConstantQ::sparsekernel()
|
||||
hammingWindowIm[i] = hammingWindowIm[i + m_FFTLength/2];
|
||||
hammingWindowIm[i + m_FFTLength/2] = temp;
|
||||
}
|
||||
|
||||
|
||||
//do fft of hammingWindow
|
||||
m_FFT.process( 0, hammingWindowRe, hammingWindowIm, transfHammingWindowRe, transfHammingWindowIm );
|
||||
|
||||
|
||||
for (unsigned j=0; j<( m_FFTLength ); j++)
|
||||
|
||||
for (unsigned j=0; j<( m_FFTLength ); j++)
|
||||
{
|
||||
// perform thresholding
|
||||
const double squaredBin = squaredModule( transfHammingWindowRe[ j ], transfHammingWindowIm[ j ]);
|
||||
if (squaredBin <= squareThreshold) continue;
|
||||
|
||||
|
||||
// Insert non-zero position indexes, doubled because they are floats
|
||||
sk->is.push_back(j);
|
||||
sk->js.push_back(k);
|
||||
@@ -241,7 +241,7 @@ void ConstantQ::sparsekernel()
|
||||
cout << "}" << endl;
|
||||
*/
|
||||
// std::cerr << "done\n -> is: " << sk->is.size() << ", js: " << sk->js.size() << ", reals: " << sk->real.size() << ", imags: " << sk->imag.size() << std::endl;
|
||||
|
||||
|
||||
m_sparseKernel = sk;
|
||||
return;
|
||||
}
|
||||
@@ -256,7 +256,7 @@ double* ConstantQ::process( const double* fftdata )
|
||||
|
||||
SparseKernel *sk = m_sparseKernel;
|
||||
|
||||
for (unsigned row=0; row<2*m_uK; row++)
|
||||
for (unsigned row=0; row<2*m_uK; row++)
|
||||
{
|
||||
m_CQdata[ row ] = 0;
|
||||
m_CQdata[ row+1 ] = 0;
|
||||
@@ -266,7 +266,7 @@ double* ConstantQ::process( const double* fftdata )
|
||||
const double *real = &(sk->real[0]);
|
||||
const double *imag = &(sk->imag[0]);
|
||||
const unsigned int sparseCells = sk->real.size();
|
||||
|
||||
|
||||
for (unsigned i = 0; i<sparseCells; i++)
|
||||
{
|
||||
const unsigned row = cqbin[i];
|
||||
@@ -324,7 +324,7 @@ void ConstantQ::process(const double *FFTRe, const double* FFTIm,
|
||||
|
||||
SparseKernel *sk = m_sparseKernel;
|
||||
|
||||
for (unsigned row=0; row<m_uK; row++)
|
||||
for (unsigned row=0; row<m_uK; row++)
|
||||
{
|
||||
CQRe[ row ] = 0;
|
||||
CQIm[ row ] = 0;
|
||||
@@ -335,7 +335,7 @@ void ConstantQ::process(const double *FFTRe, const double* FFTIm,
|
||||
const double *real = &(sk->real[0]);
|
||||
const double *imag = &(sk->imag[0]);
|
||||
const unsigned int sparseCells = sk->real.size();
|
||||
|
||||
|
||||
for (unsigned i = 0; i<sparseCells; i++)
|
||||
{
|
||||
const unsigned row = cqbin[i];
|
||||
|
||||
@@ -29,7 +29,7 @@ struct CQConfig{
|
||||
};
|
||||
|
||||
class ConstantQ {
|
||||
|
||||
|
||||
//public functions incl. sparsekernel so can keep out of loop in main
|
||||
public:
|
||||
void process( const double* FFTRe, const double* FFTIm,
|
||||
@@ -46,7 +46,7 @@ public:
|
||||
double out = 0.54 - 0.46*cos(2*PI*n/len);
|
||||
return(out);
|
||||
}
|
||||
|
||||
|
||||
int getnumwin() { return m_numWin;}
|
||||
double getQ() { return m_dQ;}
|
||||
int getK() {return m_uK ;}
|
||||
@@ -56,7 +56,7 @@ public:
|
||||
private:
|
||||
void initialise( CQConfig Config );
|
||||
void deInitialise();
|
||||
|
||||
|
||||
double* m_CQdata;
|
||||
unsigned int m_FS;
|
||||
double m_FMin;
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
/*
|
||||
Copyright (c) 2005 Centre for Digital Music ( C4DM )
|
||||
Queen Mary Univesrity of London
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
// GetKeyMode.cpp: implementation of the CGetKeyMode class.
|
||||
//
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -1,17 +1,12 @@
|
||||
/*
|
||||
* Author: c.landone
|
||||
* Description:
|
||||
*
|
||||
* Syntax: C++
|
||||
*
|
||||
* Copyright (c) 2005 Centre for Digital Music ( C4DM )
|
||||
* Queen Mary Univesrity of London
|
||||
*
|
||||
*
|
||||
* This program is not free software; you cannot redistribute it
|
||||
* without the explicit authorization from the centre for digital music,
|
||||
* queen mary university of london
|
||||
*
|
||||
Copyright (c) 2005 Centre for Digital Music ( C4DM )
|
||||
Queen Mary Univesrity of London
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#ifndef GETKEYMODE_H
|
||||
|
||||
@@ -27,75 +27,75 @@ MFCC::MFCC(MFCCConfig config)
|
||||
|
||||
/* Calculate at startup */
|
||||
double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
|
||||
|
||||
|
||||
lowestFrequency = 66.6666666;
|
||||
linearFilters = 13;
|
||||
linearSpacing = 66.66666666;
|
||||
logFilters = 27;
|
||||
logSpacing = 1.0711703;
|
||||
|
||||
|
||||
/* FFT and analysis window sizes */
|
||||
fftSize = config.fftsize;
|
||||
fft = new FFTReal(fftSize);
|
||||
|
||||
totalFilters = linearFilters + logFilters;
|
||||
logPower = config.logpower;
|
||||
|
||||
|
||||
samplingRate = config.FS;
|
||||
|
||||
|
||||
/* The number of cepstral componenents */
|
||||
nceps = config.nceps;
|
||||
|
||||
/* Set if user want C0 */
|
||||
WANT_C0 = (config.want_c0 ? 1 : 0);
|
||||
|
||||
|
||||
/* Allocate space for feature vector */
|
||||
if (WANT_C0 == 1) {
|
||||
ceps = (double*)calloc(nceps+1, sizeof(double));
|
||||
} else {
|
||||
ceps = (double*)calloc(nceps, sizeof(double));
|
||||
}
|
||||
|
||||
|
||||
/* Allocate space for local vectors */
|
||||
mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
|
||||
for (i = 0; i < nceps+1; i++) {
|
||||
mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
|
||||
mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
|
||||
}
|
||||
|
||||
mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
|
||||
for (i = 0; i < totalFilters; i++) {
|
||||
mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
|
||||
mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
|
||||
}
|
||||
|
||||
|
||||
freqs = (double*)calloc(totalFilters+2,sizeof(double));
|
||||
|
||||
|
||||
lower = (double*)calloc(totalFilters,sizeof(double));
|
||||
center = (double*)calloc(totalFilters,sizeof(double));
|
||||
upper = (double*)calloc(totalFilters,sizeof(double));
|
||||
|
||||
|
||||
triangleHeight = (double*)calloc(totalFilters,sizeof(double));
|
||||
fftFreqs = (double*)calloc(fftSize,sizeof(double));
|
||||
|
||||
|
||||
for (i = 0; i < linearFilters; i++) {
|
||||
freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
|
||||
}
|
||||
|
||||
|
||||
for (i = linearFilters; i < totalFilters+2; i++) {
|
||||
freqs[i] = freqs[linearFilters-1] *
|
||||
freqs[i] = freqs[linearFilters-1] *
|
||||
pow(logSpacing, (double)(i-linearFilters+1));
|
||||
}
|
||||
|
||||
|
||||
/* Define lower, center and upper */
|
||||
memcpy(lower, freqs,totalFilters*sizeof(double));
|
||||
memcpy(center, &freqs[1],totalFilters*sizeof(double));
|
||||
memcpy(upper, &freqs[2],totalFilters*sizeof(double));
|
||||
|
||||
|
||||
for (i=0;i<totalFilters;i++){
|
||||
triangleHeight[i] = 2./(upper[i]-lower[i]);
|
||||
}
|
||||
|
||||
|
||||
for (i=0;i<fftSize;i++){
|
||||
fftFreqs[i] = ((double) i / ((double) fftSize ) *
|
||||
fftFreqs[i] = ((double) i / ((double) fftSize ) *
|
||||
(double) samplingRate);
|
||||
}
|
||||
|
||||
@@ -103,12 +103,12 @@ MFCC::MFCC(MFCCConfig config)
|
||||
for (i=0;i<totalFilters;i++){
|
||||
|
||||
for (j=0;j<fftSize;j++) {
|
||||
|
||||
|
||||
if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
|
||||
|
||||
mfccFilterWeights[i][j] = triangleHeight[i] *
|
||||
(fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
|
||||
|
||||
|
||||
mfccFilterWeights[i][j] = triangleHeight[i] *
|
||||
(fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -118,7 +118,7 @@ MFCC::MFCC(MFCCConfig config)
|
||||
if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
|
||||
|
||||
mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
|
||||
+ triangleHeight[i] * (upper[i]-fftFreqs[j])
|
||||
+ triangleHeight[i] * (upper[i]-fftFreqs[j])
|
||||
/ (upper[i]-center[i]);
|
||||
}
|
||||
else
|
||||
@@ -130,15 +130,15 @@ MFCC::MFCC(MFCCConfig config)
|
||||
}
|
||||
|
||||
/*
|
||||
* We calculate now mfccDCT matrix
|
||||
* We calculate now mfccDCT matrix
|
||||
* NB: +1 because of the DC component
|
||||
*/
|
||||
|
||||
const double pi = 3.14159265358979323846264338327950288;
|
||||
|
||||
|
||||
for (i = 0; i < nceps+1; i++) {
|
||||
for (j = 0; j < totalFilters; j++) {
|
||||
mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
|
||||
mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
|
||||
* cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
|
||||
}
|
||||
}
|
||||
@@ -146,7 +146,7 @@ MFCC::MFCC(MFCCConfig config)
|
||||
for (j = 0; j < totalFilters; j++){
|
||||
mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
|
||||
}
|
||||
|
||||
|
||||
/* The analysis window */
|
||||
window = new Window<double>(config.window, fftSize);
|
||||
|
||||
@@ -156,7 +156,7 @@ MFCC::MFCC(MFCCConfig config)
|
||||
|
||||
earMag = (double*)calloc(totalFilters, sizeof(double));
|
||||
fftMag = (double*)calloc(fftSize/2, sizeof(double));
|
||||
|
||||
|
||||
free(freqs);
|
||||
free(lower);
|
||||
free(center);
|
||||
@@ -168,27 +168,27 @@ MFCC::MFCC(MFCCConfig config)
|
||||
MFCC::~MFCC()
|
||||
{
|
||||
int i;
|
||||
|
||||
|
||||
/* Free the structure */
|
||||
for (i = 0; i < nceps+1; i++) {
|
||||
free(mfccDCTMatrix[i]);
|
||||
}
|
||||
free(mfccDCTMatrix);
|
||||
|
||||
|
||||
for (i = 0; i < totalFilters; i++) {
|
||||
free(mfccFilterWeights[i]);
|
||||
}
|
||||
free(mfccFilterWeights);
|
||||
|
||||
|
||||
/* Free the feature vector */
|
||||
free(ceps);
|
||||
|
||||
|
||||
/* The analysis window */
|
||||
delete window;
|
||||
|
||||
free(earMag);
|
||||
free(fftMag);
|
||||
|
||||
|
||||
/* Free the FFT */
|
||||
free(realOut);
|
||||
free(imagOut);
|
||||
@@ -198,19 +198,19 @@ MFCC::~MFCC()
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* Extract the MFCC on the input frame
|
||||
*
|
||||
*/
|
||||
*
|
||||
* Extract the MFCC on the input frame
|
||||
*
|
||||
*/
|
||||
int MFCC::process(const double *inframe, double *outceps)
|
||||
{
|
||||
double *inputData = (double *)malloc(fftSize * sizeof(double));
|
||||
for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
|
||||
|
||||
window->cut(inputData);
|
||||
|
||||
|
||||
/* Calculate the fft on the input frame */
|
||||
fft->process(0, inputData, realOut, imagOut);
|
||||
fft->forward(inputData, realOut, imagOut);
|
||||
|
||||
free(inputData);
|
||||
|
||||
@@ -244,14 +244,14 @@ int MFCC::process(const double *real, const double *imag, double *outceps)
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
* Calculate now the cepstral coefficients
|
||||
*
|
||||
* Calculate now the cepstral coefficients
|
||||
* with or without the DC component
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
if (WANT_C0 == 1) {
|
||||
|
||||
|
||||
for (i = 0; i < nceps+1; i++) {
|
||||
double tmp = 0.;
|
||||
for (j = 0; j < totalFilters; j++){
|
||||
@@ -260,8 +260,8 @@ int MFCC::process(const double *real, const double *imag, double *outceps)
|
||||
outceps[i] = tmp;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
else
|
||||
{
|
||||
for (i = 1; i < nceps+1; i++) {
|
||||
double tmp = 0.;
|
||||
for (j = 0; j < totalFilters; j++){
|
||||
@@ -270,7 +270,7 @@ int MFCC::process(const double *real, const double *imag, double *outceps)
|
||||
outceps[i-1] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return nceps;
|
||||
}
|
||||
|
||||
|
||||
@@ -57,31 +57,31 @@ public:
|
||||
|
||||
private:
|
||||
/* Filter bank parameters */
|
||||
double lowestFrequency;
|
||||
int linearFilters;
|
||||
double lowestFrequency;
|
||||
int linearFilters;
|
||||
double linearSpacing;
|
||||
int logFilters;
|
||||
double logSpacing;
|
||||
|
||||
|
||||
/* FFT length */
|
||||
int fftSize;
|
||||
|
||||
|
||||
int totalFilters;
|
||||
double logPower;
|
||||
|
||||
|
||||
/* Misc. */
|
||||
int samplingRate;
|
||||
int nceps;
|
||||
|
||||
|
||||
/* MFCC vector */
|
||||
double *ceps;
|
||||
|
||||
|
||||
double **mfccDCTMatrix;
|
||||
double **mfccFilterWeights;
|
||||
|
||||
|
||||
/* The analysis window */
|
||||
Window<double> *window;
|
||||
|
||||
|
||||
/* For the FFT */
|
||||
double *realOut;
|
||||
double *imagOut;
|
||||
|
||||
@@ -40,10 +40,11 @@ DetectionFunction::~DetectionFunction()
|
||||
void DetectionFunction::initialise( DFConfig Config )
|
||||
{
|
||||
m_dataLength = Config.frameLength;
|
||||
m_halfLength = m_dataLength/2;
|
||||
m_halfLength = m_dataLength/2 + 1;
|
||||
|
||||
m_DFType = Config.DFType;
|
||||
m_stepSize = Config.stepSize;
|
||||
m_dbRise = Config.dbRise;
|
||||
|
||||
m_whiten = Config.adaptiveWhitening;
|
||||
m_whitenRelaxCoeff = Config.whiteningRelaxCoeff;
|
||||
@@ -53,7 +54,7 @@ void DetectionFunction::initialise( DFConfig Config )
|
||||
|
||||
m_magHistory = new double[ m_halfLength ];
|
||||
memset(m_magHistory,0, m_halfLength*sizeof(double));
|
||||
|
||||
|
||||
m_phaseHistory = new double[ m_halfLength ];
|
||||
memset(m_phaseHistory,0, m_halfLength*sizeof(double));
|
||||
|
||||
@@ -63,15 +64,14 @@ void DetectionFunction::initialise( DFConfig Config )
|
||||
m_magPeaks = new double[ m_halfLength ];
|
||||
memset(m_magPeaks,0, m_halfLength*sizeof(double));
|
||||
|
||||
// See note in process(const double *) below
|
||||
int actualLength = MathUtilities::previousPowerOfTwo(m_dataLength);
|
||||
m_phaseVoc = new PhaseVocoder(actualLength);
|
||||
m_phaseVoc = new PhaseVocoder(m_dataLength, m_stepSize);
|
||||
|
||||
m_DFWindowedFrame = new double[ m_dataLength ];
|
||||
m_magnitude = new double[ m_halfLength ];
|
||||
m_thetaAngle = new double[ m_halfLength ];
|
||||
m_unwrapped = new double[ m_halfLength ];
|
||||
|
||||
m_window = new Window<double>(HanningWindow, m_dataLength);
|
||||
m_windowed = new double[ m_dataLength ];
|
||||
}
|
||||
|
||||
void DetectionFunction::deInitialise()
|
||||
@@ -83,47 +83,31 @@ void DetectionFunction::deInitialise()
|
||||
|
||||
delete m_phaseVoc;
|
||||
|
||||
delete [] m_DFWindowedFrame;
|
||||
delete [] m_magnitude;
|
||||
delete [] m_thetaAngle;
|
||||
delete [] m_windowed;
|
||||
delete [] m_unwrapped;
|
||||
|
||||
delete m_window;
|
||||
}
|
||||
|
||||
double DetectionFunction::process( const double *TDomain )
|
||||
double DetectionFunction::processTimeDomain(const double *samples)
|
||||
{
|
||||
m_window->cut( TDomain, m_DFWindowedFrame );
|
||||
m_window->cut(samples, m_windowed);
|
||||
|
||||
// Our own FFT implementation supports power-of-two sizes only.
|
||||
// If we have to use this implementation (as opposed to the
|
||||
// version of process() below that operates on frequency domain
|
||||
// data directly), we will have to use the next smallest power of
|
||||
// two from the block size. Results may vary accordingly!
|
||||
|
||||
unsigned int actualLength = MathUtilities::previousPowerOfTwo(m_dataLength);
|
||||
|
||||
if (actualLength != m_dataLength) {
|
||||
// Pre-fill mag and phase vectors with zero, as the FFT output
|
||||
// will not fill the arrays
|
||||
for (unsigned int i = actualLength/2; i < m_dataLength/2; ++i) {
|
||||
m_magnitude[i] = 0;
|
||||
m_thetaAngle[0] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
m_phaseVoc->process(m_DFWindowedFrame, m_magnitude, m_thetaAngle);
|
||||
m_phaseVoc->processTimeDomain(m_windowed,
|
||||
m_magnitude, m_thetaAngle, m_unwrapped);
|
||||
|
||||
if (m_whiten) whiten();
|
||||
|
||||
return runDF();
|
||||
}
|
||||
|
||||
double DetectionFunction::process( const double *magnitudes, const double *phases )
|
||||
double DetectionFunction::processFrequencyDomain(const double *reals,
|
||||
const double *imags)
|
||||
{
|
||||
for (size_t i = 0; i < m_halfLength; ++i) {
|
||||
m_magnitude[i] = magnitudes[i];
|
||||
m_thetaAngle[i] = phases[i];
|
||||
}
|
||||
m_phaseVoc->processFrequencyDomain(reals, imags,
|
||||
m_magnitude, m_thetaAngle, m_unwrapped);
|
||||
|
||||
if (m_whiten) whiten();
|
||||
|
||||
@@ -152,15 +136,19 @@ double DetectionFunction::runDF()
|
||||
case DF_HFC:
|
||||
retVal = HFC( m_halfLength, m_magnitude);
|
||||
break;
|
||||
|
||||
|
||||
case DF_SPECDIFF:
|
||||
retVal = specDiff( m_halfLength, m_magnitude);
|
||||
break;
|
||||
|
||||
|
||||
case DF_PHASEDEV:
|
||||
// Using the instantaneous phases here actually provides the
|
||||
// same results (for these calculations) as if we had used
|
||||
// unwrapped phases, but without the possible accumulation of
|
||||
// phase error over time
|
||||
retVal = phaseDev( m_halfLength, m_thetaAngle);
|
||||
break;
|
||||
|
||||
|
||||
case DF_COMPLEXSD:
|
||||
retVal = complexSD( m_halfLength, m_magnitude, m_thetaAngle);
|
||||
break;
|
||||
@@ -169,7 +157,7 @@ double DetectionFunction::runDF()
|
||||
retVal = broadband( m_halfLength, m_magnitude);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
return retVal;
|
||||
}
|
||||
|
||||
@@ -195,7 +183,7 @@ double DetectionFunction::specDiff(unsigned int length, double *src)
|
||||
for( i = 0; i < length; i++)
|
||||
{
|
||||
temp = fabs( (src[ i ] * src[ i ]) - (m_magHistory[ i ] * m_magHistory[ i ]) );
|
||||
|
||||
|
||||
diff= sqrt(temp);
|
||||
|
||||
// (See note in phaseDev below.)
|
||||
@@ -230,15 +218,14 @@ double DetectionFunction::phaseDev(unsigned int length, double *srcPhase)
|
||||
// does significantly damage its ability to work with quieter
|
||||
// music, so I'm removing it and counting the result always.
|
||||
// Same goes for the spectral difference measure above.
|
||||
|
||||
|
||||
tmpVal = fabs(dev);
|
||||
val += tmpVal ;
|
||||
|
||||
m_phaseHistoryOld[ i ] = m_phaseHistory[ i ] ;
|
||||
m_phaseHistory[ i ] = srcPhase[ i ];
|
||||
}
|
||||
|
||||
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
@@ -250,7 +237,7 @@ double DetectionFunction::complexSD(unsigned int length, double *srcMagnitude, d
|
||||
double tmpPhase = 0;
|
||||
double tmpReal = 0;
|
||||
double tmpImag = 0;
|
||||
|
||||
|
||||
double dev = 0;
|
||||
ComplexData meas = ComplexData( 0, 0 );
|
||||
ComplexData j = ComplexData( 0, 1 );
|
||||
@@ -259,14 +246,14 @@ double DetectionFunction::complexSD(unsigned int length, double *srcMagnitude, d
|
||||
{
|
||||
tmpPhase = (srcPhase[ i ]- 2*m_phaseHistory[ i ]+m_phaseHistoryOld[ i ]);
|
||||
dev= MathUtilities::princarg( tmpPhase );
|
||||
|
||||
|
||||
meas = m_magHistory[i] - ( srcMagnitude[ i ] * exp( j * dev) );
|
||||
|
||||
tmpReal = real( meas );
|
||||
tmpImag = imag( meas );
|
||||
|
||||
val += sqrt( (tmpReal * tmpReal) + (tmpImag * tmpImag) );
|
||||
|
||||
|
||||
m_phaseHistoryOld[ i ] = m_phaseHistory[ i ] ;
|
||||
m_phaseHistory[ i ] = srcPhase[ i ];
|
||||
m_magHistory[ i ] = srcMagnitude[ i ];
|
||||
@@ -287,7 +274,7 @@ double DetectionFunction::broadband(unsigned int length, double *src)
|
||||
m_magHistory[i] = sqrmag;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
}
|
||||
|
||||
double* DetectionFunction::getSpectrumMagnitude()
|
||||
{
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
|
||||
struct DFConfig{
|
||||
unsigned int stepSize; // DF step in samples
|
||||
unsigned int frameLength; // DF analysis window - usually 2*step
|
||||
unsigned int frameLength; // DF analysis window - usually 2*step. Must be even!
|
||||
int DFType; // type of detection function ( see defines )
|
||||
double dbRise; // only used for broadband df (and required for it)
|
||||
bool adaptiveWhitening; // perform adaptive whitening
|
||||
@@ -37,14 +37,24 @@ struct DFConfig{
|
||||
double whiteningFloor; // if < 0, a sensible default will be used
|
||||
};
|
||||
|
||||
class DetectionFunction
|
||||
class DetectionFunction
|
||||
{
|
||||
public:
|
||||
double* getSpectrumMagnitude();
|
||||
DetectionFunction( DFConfig Config );
|
||||
virtual ~DetectionFunction();
|
||||
double process( const double* TDomain );
|
||||
double process( const double* magnitudes, const double* phases );
|
||||
|
||||
/**
|
||||
* Process a single time-domain frame of audio, provided as
|
||||
* frameLength samples.
|
||||
*/
|
||||
double processTimeDomain(const double* samples);
|
||||
|
||||
/**
|
||||
* Process a single frequency-domain frame, provided as
|
||||
* frameLength/2+1 real and imaginary component values.
|
||||
*/
|
||||
double processFrequencyDomain(const double* reals, const double* imags);
|
||||
|
||||
private:
|
||||
void whiten();
|
||||
@@ -55,7 +65,7 @@ private:
|
||||
double phaseDev(unsigned int length, double *srcPhase);
|
||||
double complexSD(unsigned int length, double *srcMagnitude, double *srcPhase);
|
||||
double broadband(unsigned int length, double *srcMagnitude);
|
||||
|
||||
|
||||
private:
|
||||
void initialise( DFConfig Config );
|
||||
void deInitialise();
|
||||
@@ -74,12 +84,13 @@ private:
|
||||
double* m_phaseHistoryOld;
|
||||
double* m_magPeaks;
|
||||
|
||||
double* m_DFWindowedFrame; // Array for windowed analysis frame
|
||||
double* m_windowed; // Array for windowed analysis frame
|
||||
double* m_magnitude; // Magnitude of analysis frame ( frequency domain )
|
||||
double* m_thetaAngle;// Phase of analysis frame ( frequency domain )
|
||||
double* m_unwrapped; // Unwrapped phase of analysis frame
|
||||
|
||||
Window<double> *m_window;
|
||||
PhaseVocoder* m_phaseVoc; // Phase Vocoder
|
||||
};
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -6,11 +6,19 @@
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
This file 2005-2006 Christian Landone.
|
||||
|
||||
Modifications:
|
||||
|
||||
- delta threshold
|
||||
Description: add delta threshold used as offset in the smoothed
|
||||
detection function
|
||||
Author: Mathieu Barthet
|
||||
Date: June 2010
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#include "PeakPicking.h"
|
||||
@@ -41,15 +49,16 @@ void PeakPicking::initialise( PPickParams Config )
|
||||
Qfilta = Config.QuadThresh.a ;
|
||||
Qfiltb = Config.QuadThresh.b ;
|
||||
Qfiltc = Config.QuadThresh.c ;
|
||||
|
||||
m_DFProcessingParams.length = m_DFLength;
|
||||
m_DFProcessingParams.LPOrd = Config.LPOrd;
|
||||
m_DFProcessingParams.LPACoeffs = Config.LPACoeffs;
|
||||
m_DFProcessingParams.LPBCoeffs = Config.LPBCoeffs;
|
||||
|
||||
m_DFProcessingParams.length = m_DFLength;
|
||||
m_DFProcessingParams.LPOrd = Config.LPOrd;
|
||||
m_DFProcessingParams.LPACoeffs = Config.LPACoeffs;
|
||||
m_DFProcessingParams.LPBCoeffs = Config.LPBCoeffs;
|
||||
m_DFProcessingParams.winPre = Config.WinT.pre;
|
||||
m_DFProcessingParams.winPost = Config.WinT.post;
|
||||
m_DFProcessingParams.winPost = Config.WinT.post;
|
||||
m_DFProcessingParams.AlphaNormParam = Config.alpha;
|
||||
m_DFProcessingParams.isMedianPositive = false;
|
||||
m_DFProcessingParams.delta = Config.delta; //add the delta threshold as an adjustable parameter
|
||||
|
||||
m_DFSmoothing = new DFProcess( m_DFProcessingParams );
|
||||
|
||||
@@ -68,19 +77,19 @@ void PeakPicking::process( double* src, unsigned int len, vector<int> &onsets )
|
||||
{
|
||||
if (len < 4) return;
|
||||
|
||||
vector <double> m_maxima;
|
||||
vector <double> m_maxima;
|
||||
|
||||
// Signal conditioning
|
||||
// Signal conditioning
|
||||
m_DFSmoothing->process( src, m_workBuffer );
|
||||
|
||||
|
||||
for( unsigned int u = 0; u < len; u++)
|
||||
{
|
||||
m_maxima.push_back( m_workBuffer[ u ] );
|
||||
m_maxima.push_back( m_workBuffer[ u ] );
|
||||
}
|
||||
|
||||
|
||||
quadEval( m_maxima, onsets );
|
||||
|
||||
for(unsigned int b = 0; b < m_maxima.size(); b++)
|
||||
for( int b = 0; b < (int)m_maxima.size(); b++)
|
||||
{
|
||||
src[ b ] = m_maxima[ b ];
|
||||
}
|
||||
@@ -92,7 +101,7 @@ int PeakPicking::quadEval( vector<double> &src, vector<int> &idx )
|
||||
|
||||
vector <int> m_maxIndex;
|
||||
vector <int> m_onsetPosition;
|
||||
|
||||
|
||||
vector <double> m_maxFit;
|
||||
vector <double> m_poly;
|
||||
vector <double> m_err;
|
||||
@@ -123,7 +132,7 @@ int PeakPicking::quadEval( vector<double> &src, vector<int> &idx )
|
||||
for (int k = -2; k <= 2; ++k)
|
||||
{
|
||||
selMax = src[ m_maxIndex[j] + k ] ;
|
||||
m_maxFit.push_back(selMax);
|
||||
m_maxFit.push_back(selMax);
|
||||
}
|
||||
|
||||
TPolyFit::PolyFit2(m_err, m_maxFit, m_poly);
|
||||
@@ -135,7 +144,7 @@ int PeakPicking::quadEval( vector<double> &src, vector<int> &idx )
|
||||
{
|
||||
idx.push_back(m_maxIndex[j]);
|
||||
}
|
||||
|
||||
|
||||
m_maxFit.clear();
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,14 @@
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
This file 2005-2006 Christian Landone.
|
||||
|
||||
Modifications:
|
||||
|
||||
- delta threshold
|
||||
Description: add delta threshold used as offset in the smoothed
|
||||
detection function
|
||||
Author: Mathieu Barthet
|
||||
Date: June 2010
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
@@ -29,6 +37,12 @@ struct PPWinThresh
|
||||
{
|
||||
unsigned int pre;
|
||||
unsigned int post;
|
||||
|
||||
PPWinThresh(unsigned int x, unsigned int y) :
|
||||
pre(x),
|
||||
post(y)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
struct QFitThresh
|
||||
@@ -36,12 +50,19 @@ struct QFitThresh
|
||||
double a;
|
||||
double b;
|
||||
double c;
|
||||
|
||||
QFitThresh(double x, double y, double z) :
|
||||
a(x),
|
||||
b(y),
|
||||
c(z)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
struct PPickParams
|
||||
{
|
||||
unsigned int length; //Detection FunctionLength
|
||||
double tau; // time resolution of the detection function:
|
||||
double tau; // time resolution of the detection function
|
||||
unsigned int alpha; //alpha-norm parameter
|
||||
double cutoff;//low-pass Filter cutoff freq
|
||||
unsigned int LPOrd; // low-pass Filter order
|
||||
@@ -49,14 +70,29 @@ struct PPickParams
|
||||
double* LPBCoeffs; //low pass Filter num coefficients
|
||||
PPWinThresh WinT;//window size in frames for adaptive thresholding [pre post]:
|
||||
QFitThresh QuadThresh;
|
||||
float delta; //delta threshold used as an offset when computing the smoothed detection function
|
||||
|
||||
PPickParams() :
|
||||
length(0),
|
||||
tau(0),
|
||||
alpha(0),
|
||||
cutoff(0),
|
||||
LPOrd(0),
|
||||
LPACoeffs(NULL),
|
||||
LPBCoeffs(NULL),
|
||||
WinT(0,0),
|
||||
QuadThresh(0,0,0),
|
||||
delta(0)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
class PeakPicking
|
||||
class PeakPicking
|
||||
{
|
||||
public:
|
||||
PeakPicking( PPickParams Config );
|
||||
virtual ~PeakPicking();
|
||||
|
||||
|
||||
void process( double* src, unsigned int len, vector<int> &onsets );
|
||||
|
||||
|
||||
@@ -64,7 +100,7 @@ private:
|
||||
void initialise( PPickParams Config );
|
||||
void deInitialise();
|
||||
int quadEval( vector<double> &src, vector<int> &idx );
|
||||
|
||||
|
||||
DFProcConfig m_DFProcessingParams;
|
||||
|
||||
unsigned int m_DFLength ;
|
||||
@@ -74,7 +110,7 @@ private:
|
||||
|
||||
|
||||
double* m_workBuffer;
|
||||
|
||||
|
||||
DFProcess* m_DFSmoothing;
|
||||
};
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
QM DSP Library
|
||||
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
This file 2005-2006 Christian Landone.
|
||||
This file 2005-2006 Christian Landone, copyright 2013 QMUL.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
@@ -15,30 +15,47 @@
|
||||
|
||||
#include "PhaseVocoder.h"
|
||||
#include "dsp/transforms/FFT.h"
|
||||
#include "maths/MathUtilities.h"
|
||||
#include <math.h>
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Construction/Destruction
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
#include <cassert>
|
||||
|
||||
PhaseVocoder::PhaseVocoder(unsigned int n) :
|
||||
m_n(n)
|
||||
#include <iostream>
|
||||
using std::cerr;
|
||||
using std::endl;
|
||||
|
||||
PhaseVocoder::PhaseVocoder(int n, int hop) :
|
||||
m_n(n),
|
||||
m_hop(hop)
|
||||
{
|
||||
m_fft = new FFTReal(m_n);
|
||||
m_realOut = new double[m_n];
|
||||
m_imagOut = new double[m_n];
|
||||
m_time = new double[m_n];
|
||||
m_real = new double[m_n];
|
||||
m_imag = new double[m_n];
|
||||
m_phase = new double[m_n/2 + 1];
|
||||
m_unwrapped = new double[m_n/2 + 1];
|
||||
|
||||
for (int i = 0; i < m_n/2 + 1; ++i) {
|
||||
m_phase[i] = 0.0;
|
||||
m_unwrapped[i] = 0.0;
|
||||
}
|
||||
|
||||
reset();
|
||||
}
|
||||
|
||||
PhaseVocoder::~PhaseVocoder()
|
||||
{
|
||||
delete [] m_realOut;
|
||||
delete [] m_imagOut;
|
||||
delete[] m_unwrapped;
|
||||
delete[] m_phase;
|
||||
delete[] m_real;
|
||||
delete[] m_imag;
|
||||
delete[] m_time;
|
||||
delete m_fft;
|
||||
}
|
||||
|
||||
void PhaseVocoder::FFTShift(unsigned int size, double *src)
|
||||
void PhaseVocoder::FFTShift(double *src)
|
||||
{
|
||||
const int hs = size/2;
|
||||
const int hs = m_n/2;
|
||||
for (int i = 0; i < hs; ++i) {
|
||||
double tmp = src[i];
|
||||
src[i] = src[i + hs];
|
||||
@@ -46,34 +63,73 @@ void PhaseVocoder::FFTShift(unsigned int size, double *src)
|
||||
}
|
||||
}
|
||||
|
||||
void PhaseVocoder::process(double *src, double *mag, double *theta)
|
||||
void PhaseVocoder::processTimeDomain(const double *src,
|
||||
double *mag, double *theta,
|
||||
double *unwrapped)
|
||||
{
|
||||
FFTShift( m_n, src);
|
||||
|
||||
m_fft->process(0, src, m_realOut, m_imagOut);
|
||||
|
||||
getMagnitude( m_n/2, mag, m_realOut, m_imagOut);
|
||||
getPhase( m_n/2, theta, m_realOut, m_imagOut);
|
||||
for (int i = 0; i < m_n; ++i) {
|
||||
m_time[i] = src[i];
|
||||
}
|
||||
FFTShift(m_time);
|
||||
m_fft->forward(m_time, m_real, m_imag);
|
||||
getMagnitudes(mag);
|
||||
getPhases(theta);
|
||||
unwrapPhases(theta, unwrapped);
|
||||
}
|
||||
|
||||
void PhaseVocoder::getMagnitude(unsigned int size, double *mag, double *real, double *imag)
|
||||
void PhaseVocoder::processFrequencyDomain(const double *reals,
|
||||
const double *imags,
|
||||
double *mag, double *theta,
|
||||
double *unwrapped)
|
||||
{
|
||||
unsigned int j;
|
||||
for (int i = 0; i < m_n/2 + 1; ++i) {
|
||||
m_real[i] = reals[i];
|
||||
m_imag[i] = imags[i];
|
||||
}
|
||||
getMagnitudes(mag);
|
||||
getPhases(theta);
|
||||
unwrapPhases(theta, unwrapped);
|
||||
}
|
||||
|
||||
for( j = 0; j < size; j++)
|
||||
{
|
||||
mag[ j ] = sqrt( real[ j ] * real[ j ] + imag[ j ] * imag[ j ]);
|
||||
void PhaseVocoder::reset()
|
||||
{
|
||||
for (int i = 0; i < m_n/2 + 1; ++i) {
|
||||
// m_phase stores the "previous" phase, so set to one step
|
||||
// behind so that a signal with initial phase at zero matches
|
||||
// the expected values. This is completely unnecessary for any
|
||||
// analytical purpose, it's just tidier.
|
||||
double omega = (2 * M_PI * m_hop * i) / m_n;
|
||||
m_phase[i] = -omega;
|
||||
m_unwrapped[i] = -omega;
|
||||
}
|
||||
}
|
||||
|
||||
void PhaseVocoder::getPhase(unsigned int size, double *theta, double *real, double *imag)
|
||||
{
|
||||
unsigned int k;
|
||||
|
||||
// Phase Angle "matlab" style
|
||||
//Watch out for quadrant mapping !!!
|
||||
for( k = 0; k < size; k++)
|
||||
{
|
||||
theta[ k ] = atan2( -imag[ k ], real[ k ]);
|
||||
void PhaseVocoder::getMagnitudes(double *mag)
|
||||
{
|
||||
for (int i = 0; i < m_n/2 + 1; i++) {
|
||||
mag[i] = sqrt(m_real[i] * m_real[i] + m_imag[i] * m_imag[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void PhaseVocoder::getPhases(double *theta)
|
||||
{
|
||||
for (int i = 0; i < m_n/2 + 1; i++) {
|
||||
theta[i] = atan2(m_imag[i], m_real[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void PhaseVocoder::unwrapPhases(double *theta, double *unwrapped)
|
||||
{
|
||||
for (int i = 0; i < m_n/2 + 1; ++i) {
|
||||
|
||||
double omega = (2 * M_PI * m_hop * i) / m_n;
|
||||
double expected = m_phase[i] + omega;
|
||||
double error = MathUtilities::princarg(theta[i] - expected);
|
||||
|
||||
unwrapped[i] = m_unwrapped[i] + omega + error;
|
||||
|
||||
m_phase[i] = theta[i];
|
||||
m_unwrapped[i] = unwrapped[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
QM DSP Library
|
||||
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
This file 2005-2006 Christian Landone.
|
||||
This file 2005-2006 Christian Landone, copyright 2013 QMUL.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
@@ -18,25 +18,63 @@
|
||||
|
||||
class FFTReal;
|
||||
|
||||
class PhaseVocoder
|
||||
class PhaseVocoder
|
||||
{
|
||||
public:
|
||||
PhaseVocoder( unsigned int size );
|
||||
PhaseVocoder(int size, int hop);
|
||||
virtual ~PhaseVocoder();
|
||||
|
||||
void process( double* src, double* mag, double* theta);
|
||||
/**
|
||||
* Given one frame of time-domain samples, FFT and return the
|
||||
* magnitudes, instantaneous phases, and unwrapped phases.
|
||||
*
|
||||
* src must have size values (where size is the frame size value
|
||||
* as passed to the PhaseVocoder constructor), and should have
|
||||
* been windowed as necessary by the caller (but not fft-shifted).
|
||||
*
|
||||
* mag, phase, and unwrapped must each be non-NULL and point to
|
||||
* enough space for size/2 + 1 values. The redundant conjugate
|
||||
* half of the output is not returned.
|
||||
*/
|
||||
void processTimeDomain(const double *src,
|
||||
double *mag, double *phase, double *unwrapped);
|
||||
|
||||
/**
|
||||
* Given one frame of frequency-domain samples, return the
|
||||
* magnitudes, instantaneous phases, and unwrapped phases.
|
||||
*
|
||||
* reals and imags must each contain size/2+1 values (where size
|
||||
* is the frame size value as passed to the PhaseVocoder
|
||||
* constructor).
|
||||
*
|
||||
* mag, phase, and unwrapped must each be non-NULL and point to
|
||||
* enough space for size/2+1 values.
|
||||
*/
|
||||
void processFrequencyDomain(const double *reals, const double *imags,
|
||||
double *mag, double *phase, double *unwrapped);
|
||||
|
||||
/**
|
||||
* Reset the stored phases to zero. Note that this may be
|
||||
* necessary occasionally (depending on the application) to avoid
|
||||
* loss of floating-point precision in the accumulated unwrapped
|
||||
* phase values as they grow.
|
||||
*/
|
||||
void reset();
|
||||
|
||||
protected:
|
||||
void getPhase(unsigned int size, double *theta, double *real, double *imag);
|
||||
// void coreFFT( unsigned int NumSamples, double *RealIn, double* ImagIn, double *RealOut, double *ImagOut);
|
||||
void getMagnitude( unsigned int size, double* mag, double* real, double* imag);
|
||||
void FFTShift( unsigned int size, double* src);
|
||||
void FFTShift(double *src);
|
||||
void getMagnitudes(double *mag);
|
||||
void getPhases(double *theta);
|
||||
void unwrapPhases(double *theta, double *unwrapped);
|
||||
|
||||
unsigned int m_n;
|
||||
int m_n;
|
||||
int m_hop;
|
||||
FFTReal *m_fft;
|
||||
double *m_imagOut;
|
||||
double *m_realOut;
|
||||
|
||||
double *m_time;
|
||||
double *m_imag;
|
||||
double *m_real;
|
||||
double *m_phase;
|
||||
double *m_unwrapped;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -199,10 +199,15 @@ void Decimator::doAntiAlias(const float *src, double *dst, unsigned int length)
|
||||
|
||||
void Decimator::process(const double *src, double *dst)
|
||||
{
|
||||
if( m_decFactor != 1 )
|
||||
{
|
||||
doAntiAlias( src, decBuffer, m_inputLength );
|
||||
if (m_decFactor == 1) {
|
||||
for( unsigned int i = 0; i < m_outputLength; i++ ) {
|
||||
dst[i] = src[i];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
doAntiAlias( src, decBuffer, m_inputLength );
|
||||
|
||||
unsigned idx = 0;
|
||||
|
||||
for( unsigned int i = 0; i < m_outputLength; i++ )
|
||||
@@ -213,10 +218,15 @@ void Decimator::process(const double *src, double *dst)
|
||||
|
||||
void Decimator::process(const float *src, float *dst)
|
||||
{
|
||||
if( m_decFactor != 1 )
|
||||
{
|
||||
doAntiAlias( src, decBuffer, m_inputLength );
|
||||
if (m_decFactor == 1) {
|
||||
for( unsigned int i = 0; i < m_outputLength; i++ ) {
|
||||
dst[i] = src[i];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
doAntiAlias( src, decBuffer, m_inputLength );
|
||||
|
||||
unsigned idx = 0;
|
||||
|
||||
for( unsigned int i = 0; i < m_outputLength; i++ )
|
||||
|
||||
@@ -15,12 +15,15 @@
|
||||
#ifndef DECIMATOR_H
|
||||
#define DECIMATOR_H
|
||||
|
||||
class Decimator
|
||||
/**
|
||||
* Decimator carries out a fast downsample by a power-of-two
|
||||
* factor. Only a limited number of factors are supported, from two to
|
||||
* whatever getHighestSupportedFactor() returns. This is much faster
|
||||
* than Resampler but has a worse signal-noise ratio.
|
||||
*/
|
||||
class Decimator
|
||||
{
|
||||
public:
|
||||
void process( const double* src, double* dst );
|
||||
void process( const float* src, float* dst );
|
||||
|
||||
/**
|
||||
* Construct a Decimator to operate on input blocks of length
|
||||
* inLength, with decimation factor decFactor. inLength should be
|
||||
@@ -34,11 +37,28 @@ public:
|
||||
Decimator( unsigned int inLength, unsigned int decFactor );
|
||||
virtual ~Decimator();
|
||||
|
||||
/**
|
||||
* Process inLength samples (as supplied to constructor) from src
|
||||
* and write inLength / decFactor samples to dst. Note that src
|
||||
* and dst may be the same or overlap (an intermediate buffer is
|
||||
* used).
|
||||
*/
|
||||
void process( const double* src, double* dst );
|
||||
|
||||
/**
|
||||
* Process inLength samples (as supplied to constructor) from src
|
||||
* and write inLength / decFactor samples to dst. Note that src
|
||||
* and dst may be the same or overlap (an intermediate buffer is
|
||||
* used).
|
||||
*/
|
||||
void process( const float* src, float* dst );
|
||||
|
||||
int getFactor() const { return m_decFactor; }
|
||||
static int getHighestSupportedFactor() { return 8; }
|
||||
|
||||
private:
|
||||
void resetFilter();
|
||||
|
||||
private:
|
||||
void deInitialise();
|
||||
void initialise( unsigned int inLength, unsigned int decFactor );
|
||||
void doAntiAlias( const double* src, double* dst, unsigned int length );
|
||||
@@ -55,8 +75,8 @@ private:
|
||||
|
||||
double a[ 9 ];
|
||||
double b[ 9 ];
|
||||
|
||||
|
||||
double* decBuffer;
|
||||
};
|
||||
|
||||
#endif //
|
||||
#endif //
|
||||
|
||||
160
libs/qm-dsp/dsp/rateconversion/DecimatorB.cpp
Normal file
160
libs/qm-dsp/dsp/rateconversion/DecimatorB.cpp
Normal file
@@ -0,0 +1,160 @@
|
||||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
|
||||
/*
|
||||
QM DSP Library
|
||||
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#include "DecimatorB.h"
|
||||
|
||||
#include "maths/MathUtilities.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
using std::vector;
|
||||
|
||||
DecimatorB::DecimatorB(int inLength, int decFactor)
|
||||
{
|
||||
m_inputLength = 0;
|
||||
m_outputLength = 0;
|
||||
m_decFactor = 1;
|
||||
m_aaBuffer = 0;
|
||||
m_tmpBuffer = 0;
|
||||
|
||||
initialise(inLength, decFactor);
|
||||
}
|
||||
|
||||
DecimatorB::~DecimatorB()
|
||||
{
|
||||
deInitialise();
|
||||
}
|
||||
|
||||
void DecimatorB::initialise(int inLength, int decFactor)
|
||||
{
|
||||
m_inputLength = inLength;
|
||||
m_decFactor = decFactor;
|
||||
m_outputLength = m_inputLength / m_decFactor;
|
||||
|
||||
if (m_decFactor < 2 || !MathUtilities::isPowerOfTwo(m_decFactor)) {
|
||||
std::cerr << "ERROR: DecimatorB::initialise: Decimation factor must be a power of 2 and at least 2 (was: " << m_decFactor << ")" << std::endl;
|
||||
m_decFactor = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_inputLength % m_decFactor != 0) {
|
||||
std::cerr << "ERROR: DecimatorB::initialise: inLength must be a multiple of decimation factor (was: " << m_inputLength << ", factor is " << m_decFactor << ")" << std::endl;
|
||||
m_decFactor = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
m_aaBuffer = new double[m_inputLength];
|
||||
m_tmpBuffer = new double[m_inputLength];
|
||||
|
||||
// Order 6 Butterworth lowpass filter
|
||||
// Calculated using e.g. MATLAB butter(6, 0.5, 'low')
|
||||
|
||||
m_b[0] = 0.029588223638661;
|
||||
m_b[1] = 0.177529341831965;
|
||||
m_b[2] = 0.443823354579912;
|
||||
m_b[3] = 0.591764472773216;
|
||||
m_b[4] = 0.443823354579912;
|
||||
m_b[5] = 0.177529341831965;
|
||||
m_b[6] = 0.029588223638661;
|
||||
|
||||
m_a[0] = 1.000000000000000;
|
||||
m_a[1] = 0.000000000000000;
|
||||
m_a[2] = 0.777695961855673;
|
||||
m_a[3] = 0.000000000000000;
|
||||
m_a[4] = 0.114199425062434;
|
||||
m_a[5] = 0.000000000000000;
|
||||
m_a[6] = 0.001750925956183;
|
||||
|
||||
for (int factor = m_decFactor; factor > 1; factor /= 2) {
|
||||
m_o.push_back(vector<double>(6, 0.0));
|
||||
}
|
||||
}
|
||||
|
||||
void DecimatorB::deInitialise()
|
||||
{
|
||||
delete [] m_aaBuffer;
|
||||
delete [] m_tmpBuffer;
|
||||
}
|
||||
|
||||
void DecimatorB::doAntiAlias(const double *src, double *dst, int length,
|
||||
int filteridx)
|
||||
{
|
||||
vector<double> &o = m_o[filteridx];
|
||||
|
||||
for (int i = 0; i < length; i++) {
|
||||
|
||||
double input = src[i];
|
||||
double output = input * m_b[0] + o[0];
|
||||
|
||||
o[0] = input * m_b[1] - output * m_a[1] + o[1];
|
||||
o[1] = input * m_b[2] - output * m_a[2] + o[2];
|
||||
o[2] = input * m_b[3] - output * m_a[3] + o[3];
|
||||
o[3] = input * m_b[4] - output * m_a[4] + o[4];
|
||||
o[4] = input * m_b[5] - output * m_a[5] + o[5];
|
||||
o[5] = input * m_b[6] - output * m_a[6];
|
||||
|
||||
dst[i] = output;
|
||||
}
|
||||
}
|
||||
|
||||
void DecimatorB::doProcess()
|
||||
{
|
||||
int filteridx = 0;
|
||||
int factorDone = 1;
|
||||
int factorRemaining = m_decFactor;
|
||||
|
||||
while (factorDone < m_decFactor) {
|
||||
|
||||
doAntiAlias(m_tmpBuffer, m_aaBuffer,
|
||||
m_inputLength / factorDone,
|
||||
filteridx);
|
||||
|
||||
filteridx ++;
|
||||
factorDone *= 2;
|
||||
|
||||
for (int i = 0; i < m_inputLength / factorDone; ++i) {
|
||||
m_tmpBuffer[i] = m_aaBuffer[i * 2];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void DecimatorB::process(const double *src, double *dst)
|
||||
{
|
||||
if (m_decFactor == 0) return;
|
||||
|
||||
for (int i = 0; i < m_inputLength; ++i) {
|
||||
m_tmpBuffer[i] = src[i];
|
||||
}
|
||||
|
||||
doProcess();
|
||||
|
||||
for (int i = 0; i < m_outputLength; ++i) {
|
||||
dst[i] = m_tmpBuffer[i];
|
||||
}
|
||||
}
|
||||
|
||||
void DecimatorB::process(const float *src, float *dst)
|
||||
{
|
||||
if (m_decFactor == 0) return;
|
||||
|
||||
for (int i = 0; i < m_inputLength; ++i) {
|
||||
m_tmpBuffer[i] = src[i];
|
||||
}
|
||||
|
||||
doProcess();
|
||||
|
||||
for (int i = 0; i < m_outputLength; ++i) {
|
||||
dst[i] = m_tmpBuffer[i];
|
||||
}
|
||||
}
|
||||
64
libs/qm-dsp/dsp/rateconversion/DecimatorB.h
Normal file
64
libs/qm-dsp/dsp/rateconversion/DecimatorB.h
Normal file
@@ -0,0 +1,64 @@
|
||||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
/*
|
||||
QM DSP Library
|
||||
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#ifndef DECIMATORB_H
|
||||
#define DECIMATORB_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
/**
|
||||
* DecimatorB carries out a fast downsample by a power-of-two
|
||||
* factor. It only knows how to decimate by a factor of 2, and will
|
||||
* use repeated decimation for higher factors. A Butterworth filter of
|
||||
* order 6 is used for the lowpass filter.
|
||||
*/
|
||||
class DecimatorB
|
||||
{
|
||||
public:
|
||||
void process( const double* src, double* dst );
|
||||
void process( const float* src, float* dst );
|
||||
|
||||
/**
|
||||
* Construct a DecimatorB to operate on input blocks of length
|
||||
* inLength, with decimation factor decFactor. inLength should be
|
||||
* a multiple of decFactor. Output blocks will be of length
|
||||
* inLength / decFactor.
|
||||
*
|
||||
* decFactor must be a power of two.
|
||||
*/
|
||||
DecimatorB(int inLength, int decFactor);
|
||||
virtual ~DecimatorB();
|
||||
|
||||
int getFactor() const { return m_decFactor; }
|
||||
|
||||
private:
|
||||
void deInitialise();
|
||||
void initialise(int inLength, int decFactor);
|
||||
void doAntiAlias(const double* src, double* dst, int length, int filteridx);
|
||||
void doProcess();
|
||||
|
||||
int m_inputLength;
|
||||
int m_outputLength;
|
||||
int m_decFactor;
|
||||
|
||||
std::vector<std::vector<double> > m_o;
|
||||
|
||||
double m_a[7];
|
||||
double m_b[7];
|
||||
|
||||
double *m_aaBuffer;
|
||||
double *m_tmpBuffer;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
416
libs/qm-dsp/dsp/rateconversion/Resampler.cpp
Normal file
416
libs/qm-dsp/dsp/rateconversion/Resampler.cpp
Normal file
@@ -0,0 +1,416 @@
|
||||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
/*
|
||||
QM DSP Library
|
||||
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
This file by Chris Cannam.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#include "Resampler.h"
|
||||
|
||||
#include "maths/MathUtilities.h"
|
||||
#include "base/KaiserWindow.h"
|
||||
#include "base/SincWindow.h"
|
||||
#include "thread/Thread.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <cassert>
|
||||
|
||||
using std::vector;
|
||||
using std::map;
|
||||
using std::cerr;
|
||||
using std::endl;
|
||||
|
||||
//#define DEBUG_RESAMPLER 1
|
||||
//#define DEBUG_RESAMPLER_VERBOSE 1
|
||||
|
||||
Resampler::Resampler(int sourceRate, int targetRate) :
|
||||
m_sourceRate(sourceRate),
|
||||
m_targetRate(targetRate)
|
||||
{
|
||||
initialise(100, 0.02);
|
||||
}
|
||||
|
||||
Resampler::Resampler(int sourceRate, int targetRate,
|
||||
double snr, double bandwidth) :
|
||||
m_sourceRate(sourceRate),
|
||||
m_targetRate(targetRate)
|
||||
{
|
||||
initialise(snr, bandwidth);
|
||||
}
|
||||
|
||||
Resampler::~Resampler()
|
||||
{
|
||||
delete[] m_phaseData;
|
||||
}
|
||||
|
||||
// peakToPole -> length -> beta -> window
|
||||
static map<double, map<int, map<double, vector<double> > > >
|
||||
knownFilters;
|
||||
|
||||
static Mutex
|
||||
knownFilterMutex;
|
||||
|
||||
void
|
||||
Resampler::initialise(double snr, double bandwidth)
|
||||
{
|
||||
int higher = std::max(m_sourceRate, m_targetRate);
|
||||
int lower = std::min(m_sourceRate, m_targetRate);
|
||||
|
||||
m_gcd = MathUtilities::gcd(lower, higher);
|
||||
m_peakToPole = higher / m_gcd;
|
||||
|
||||
if (m_targetRate < m_sourceRate) {
|
||||
// antialiasing filter, should be slightly below nyquist
|
||||
m_peakToPole = m_peakToPole / (1.0 - bandwidth/2.0);
|
||||
}
|
||||
|
||||
KaiserWindow::Parameters params =
|
||||
KaiserWindow::parametersForBandwidth(snr, bandwidth, higher / m_gcd);
|
||||
|
||||
params.length =
|
||||
(params.length % 2 == 0 ? params.length + 1 : params.length);
|
||||
|
||||
params.length =
|
||||
(params.length > 200001 ? 200001 : params.length);
|
||||
|
||||
m_filterLength = params.length;
|
||||
|
||||
vector<double> filter;
|
||||
knownFilterMutex.lock();
|
||||
|
||||
if (knownFilters[m_peakToPole][m_filterLength].find(params.beta) ==
|
||||
knownFilters[m_peakToPole][m_filterLength].end()) {
|
||||
|
||||
KaiserWindow kw(params);
|
||||
SincWindow sw(m_filterLength, m_peakToPole * 2);
|
||||
|
||||
filter = vector<double>(m_filterLength, 0.0);
|
||||
for (int i = 0; i < m_filterLength; ++i) filter[i] = 1.0;
|
||||
sw.cut(filter.data());
|
||||
kw.cut(filter.data());
|
||||
|
||||
knownFilters[m_peakToPole][m_filterLength][params.beta] = filter;
|
||||
}
|
||||
|
||||
filter = knownFilters[m_peakToPole][m_filterLength][params.beta];
|
||||
knownFilterMutex.unlock();
|
||||
|
||||
int inputSpacing = m_targetRate / m_gcd;
|
||||
int outputSpacing = m_sourceRate / m_gcd;
|
||||
|
||||
#ifdef DEBUG_RESAMPLER
|
||||
cerr << "resample " << m_sourceRate << " -> " << m_targetRate
|
||||
<< ": inputSpacing " << inputSpacing << ", outputSpacing "
|
||||
<< outputSpacing << ": filter length " << m_filterLength
|
||||
<< endl;
|
||||
#endif
|
||||
|
||||
// Now we have a filter of (odd) length flen in which the lower
|
||||
// sample rate corresponds to every n'th point and the higher rate
|
||||
// to every m'th where n and m are higher and lower rates divided
|
||||
// by their gcd respectively. So if x coordinates are on the same
|
||||
// scale as our filter resolution, then source sample i is at i *
|
||||
// (targetRate / gcd) and target sample j is at j * (sourceRate /
|
||||
// gcd).
|
||||
|
||||
// To reconstruct a single target sample, we want a buffer (real
|
||||
// or virtual) of flen values formed of source samples spaced at
|
||||
// intervals of (targetRate / gcd), in our example case 3. This
|
||||
// is initially formed with the first sample at the filter peak.
|
||||
//
|
||||
// 0 0 0 0 a 0 0 b 0
|
||||
//
|
||||
// and of course we have our filter
|
||||
//
|
||||
// f1 f2 f3 f4 f5 f6 f7 f8 f9
|
||||
//
|
||||
// We take the sum of products of non-zero values from this buffer
|
||||
// with corresponding values in the filter
|
||||
//
|
||||
// a * f5 + b * f8
|
||||
//
|
||||
// Then we drop (sourceRate / gcd) values, in our example case 4,
|
||||
// from the start of the buffer and fill until it has flen values
|
||||
// again
|
||||
//
|
||||
// a 0 0 b 0 0 c 0 0
|
||||
//
|
||||
// repeat to reconstruct the next target sample
|
||||
//
|
||||
// a * f1 + b * f4 + c * f7
|
||||
//
|
||||
// and so on.
|
||||
//
|
||||
// Above I said the buffer could be "real or virtual" -- ours is
|
||||
// virtual. We don't actually store all the zero spacing values,
|
||||
// except for padding at the start; normally we store only the
|
||||
// values that actually came from the source stream, along with a
|
||||
// phase value that tells us how many virtual zeroes there are at
|
||||
// the start of the virtual buffer. So the two examples above are
|
||||
//
|
||||
// 0 a b [ with phase 1 ]
|
||||
// a b c [ with phase 0 ]
|
||||
//
|
||||
// Having thus broken down the buffer so that only the elements we
|
||||
// need to multiply are present, we can also unzip the filter into
|
||||
// every-nth-element subsets at each phase, allowing us to do the
|
||||
// filter multiplication as a simply vector multiply. That is, rather
|
||||
// than store
|
||||
//
|
||||
// f1 f2 f3 f4 f5 f6 f7 f8 f9
|
||||
//
|
||||
// we store separately
|
||||
//
|
||||
// f1 f4 f7
|
||||
// f2 f5 f8
|
||||
// f3 f6 f9
|
||||
//
|
||||
// Each time we complete a multiply-and-sum, we need to work out
|
||||
// how many (real) samples to drop from the start of our buffer,
|
||||
// and how many to add at the end of it for the next multiply. We
|
||||
// know we want to drop enough real samples to move along by one
|
||||
// computed output sample, which is our outputSpacing number of
|
||||
// virtual buffer samples. Depending on the relationship between
|
||||
// input and output spacings, this may mean dropping several real
|
||||
// samples, one real sample, or none at all (and simply moving to
|
||||
// a different "phase").
|
||||
|
||||
m_phaseData = new Phase[inputSpacing];
|
||||
|
||||
for (int phase = 0; phase < inputSpacing; ++phase) {
|
||||
|
||||
Phase p;
|
||||
|
||||
p.nextPhase = phase - outputSpacing;
|
||||
while (p.nextPhase < 0) p.nextPhase += inputSpacing;
|
||||
p.nextPhase %= inputSpacing;
|
||||
|
||||
p.drop = int(ceil(std::max(0.0, double(outputSpacing - phase))
|
||||
/ inputSpacing));
|
||||
|
||||
int filtZipLength = int(ceil(double(m_filterLength - phase)
|
||||
/ inputSpacing));
|
||||
|
||||
for (int i = 0; i < filtZipLength; ++i) {
|
||||
p.filter.push_back(filter[i * inputSpacing + phase]);
|
||||
}
|
||||
|
||||
m_phaseData[phase] = p;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_RESAMPLER
|
||||
int cp = 0;
|
||||
int totDrop = 0;
|
||||
for (int i = 0; i < inputSpacing; ++i) {
|
||||
cerr << "phase = " << cp << ", drop = " << m_phaseData[cp].drop
|
||||
<< ", filter length = " << m_phaseData[cp].filter.size()
|
||||
<< ", next phase = " << m_phaseData[cp].nextPhase << endl;
|
||||
totDrop += m_phaseData[cp].drop;
|
||||
cp = m_phaseData[cp].nextPhase;
|
||||
}
|
||||
cerr << "total drop = " << totDrop << endl;
|
||||
#endif
|
||||
|
||||
// The May implementation of this uses a pull model -- we ask the
|
||||
// resampler for a certain number of output samples, and it asks
|
||||
// its source stream for as many as it needs to calculate
|
||||
// those. This means (among other things) that the source stream
|
||||
// can be asked for enough samples up-front to fill the buffer
|
||||
// before the first output sample is generated.
|
||||
//
|
||||
// In this implementation we're using a push model in which a
|
||||
// certain number of source samples is provided and we're asked
|
||||
// for as many output samples as that makes available. But we
|
||||
// can't return any samples from the beginning until half the
|
||||
// filter length has been provided as input. This means we must
|
||||
// either return a very variable number of samples (none at all
|
||||
// until the filter fills, then half the filter length at once) or
|
||||
// else have a lengthy declared latency on the output. We do the
|
||||
// latter. (What do other implementations do?)
|
||||
//
|
||||
// We want to make sure the first "real" sample will eventually be
|
||||
// aligned with the centre sample in the filter (it's tidier, and
|
||||
// easier to do diagnostic calculations that way). So we need to
|
||||
// pick the initial phase and buffer fill accordingly.
|
||||
//
|
||||
// Example: if the inputSpacing is 2, outputSpacing is 3, and
|
||||
// filter length is 7,
|
||||
//
|
||||
// x x x x a b c ... input samples
|
||||
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 ...
|
||||
// i j k l ... output samples
|
||||
// [--------|--------] <- filter with centre mark
|
||||
//
|
||||
// Let h be the index of the centre mark, here 3 (generally
|
||||
// int(filterLength/2) for odd-length filters).
|
||||
//
|
||||
// The smallest n such that h + n * outputSpacing > filterLength
|
||||
// is 2 (that is, ceil((filterLength - h) / outputSpacing)), and
|
||||
// (h + 2 * outputSpacing) % inputSpacing == 1, so the initial
|
||||
// phase is 1.
|
||||
//
|
||||
// To achieve our n, we need to pre-fill the "virtual" buffer with
|
||||
// 4 zero samples: the x's above. This is int((h + n *
|
||||
// outputSpacing) / inputSpacing). It's the phase that makes this
|
||||
// buffer get dealt with in such a way as to give us an effective
|
||||
// index for sample a of 9 rather than 8 or 10 or whatever.
|
||||
//
|
||||
// This gives us output latency of 2 (== n), i.e. output samples i
|
||||
// and j will appear before the one in which input sample a is at
|
||||
// the centre of the filter.
|
||||
|
||||
int h = int(m_filterLength / 2);
|
||||
int n = ceil(double(m_filterLength - h) / outputSpacing);
|
||||
|
||||
m_phase = (h + n * outputSpacing) % inputSpacing;
|
||||
|
||||
int fill = (h + n * outputSpacing) / inputSpacing;
|
||||
|
||||
m_latency = n;
|
||||
|
||||
m_buffer = vector<double>(fill, 0);
|
||||
m_bufferOrigin = 0;
|
||||
|
||||
#ifdef DEBUG_RESAMPLER
|
||||
cerr << "initial phase " << m_phase << " (as " << (m_filterLength/2) << " % " << inputSpacing << ")"
|
||||
<< ", latency " << m_latency << endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
double
|
||||
Resampler::reconstructOne()
|
||||
{
|
||||
Phase &pd = m_phaseData[m_phase];
|
||||
double v = 0.0;
|
||||
int n = pd.filter.size();
|
||||
|
||||
assert(n + m_bufferOrigin <= (int)m_buffer.size());
|
||||
|
||||
const double *const __restrict__ buf = m_buffer.data() + m_bufferOrigin;
|
||||
const double *const __restrict__ filt = pd.filter.data();
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
// NB gcc can only vectorize this with -ffast-math
|
||||
v += buf[i] * filt[i];
|
||||
}
|
||||
|
||||
m_bufferOrigin += pd.drop;
|
||||
m_phase = pd.nextPhase;
|
||||
return v;
|
||||
}
|
||||
|
||||
int
|
||||
Resampler::process(const double *src, double *dst, int n)
|
||||
{
|
||||
for (int i = 0; i < n; ++i) {
|
||||
m_buffer.push_back(src[i]);
|
||||
}
|
||||
|
||||
int maxout = int(ceil(double(n) * m_targetRate / m_sourceRate));
|
||||
int outidx = 0;
|
||||
|
||||
#ifdef DEBUG_RESAMPLER
|
||||
cerr << "process: buf siz " << m_buffer.size() << " filt siz for phase " << m_phase << " " << m_phaseData[m_phase].filter.size() << endl;
|
||||
#endif
|
||||
|
||||
double scaleFactor = (double(m_targetRate) / m_gcd) / m_peakToPole;
|
||||
|
||||
while (outidx < maxout &&
|
||||
m_buffer.size() >= m_phaseData[m_phase].filter.size() + m_bufferOrigin) {
|
||||
dst[outidx] = scaleFactor * reconstructOne();
|
||||
outidx++;
|
||||
}
|
||||
|
||||
m_buffer = vector<double>(m_buffer.begin() + m_bufferOrigin, m_buffer.end());
|
||||
m_bufferOrigin = 0;
|
||||
|
||||
return outidx;
|
||||
}
|
||||
|
||||
vector<double>
|
||||
Resampler::process(const double *src, int n)
|
||||
{
|
||||
int maxout = int(ceil(double(n) * m_targetRate / m_sourceRate));
|
||||
vector<double> out(maxout, 0.0);
|
||||
int got = process(src, out.data(), n);
|
||||
assert(got <= maxout);
|
||||
if (got < maxout) out.resize(got);
|
||||
return out;
|
||||
}
|
||||
|
||||
vector<double>
|
||||
Resampler::resample(int sourceRate, int targetRate, const double *data, int n)
|
||||
{
|
||||
Resampler r(sourceRate, targetRate);
|
||||
|
||||
int latency = r.getLatency();
|
||||
|
||||
// latency is the output latency. We need to provide enough
|
||||
// padding input samples at the end of input to guarantee at
|
||||
// *least* the latency's worth of output samples. that is,
|
||||
|
||||
int inputPad = int(ceil((double(latency) * sourceRate) / targetRate));
|
||||
|
||||
// that means we are providing this much input in total:
|
||||
|
||||
int n1 = n + inputPad;
|
||||
|
||||
// and obtaining this much output in total:
|
||||
|
||||
int m1 = int(ceil((double(n1) * targetRate) / sourceRate));
|
||||
|
||||
// in order to return this much output to the user:
|
||||
|
||||
int m = int(ceil((double(n) * targetRate) / sourceRate));
|
||||
|
||||
#ifdef DEBUG_RESAMPLER
|
||||
cerr << "n = " << n << ", sourceRate = " << sourceRate << ", targetRate = " << targetRate << ", m = " << m << ", latency = " << latency << ", inputPad = " << inputPad << ", m1 = " << m1 << ", n1 = " << n1 << ", n1 - n = " << n1 - n << endl;
|
||||
#endif
|
||||
|
||||
vector<double> pad(n1 - n, 0.0);
|
||||
vector<double> out(m1 + 1, 0.0);
|
||||
|
||||
int gotData = r.process(data, out.data(), n);
|
||||
int gotPad = r.process(pad.data(), out.data() + gotData, pad.size());
|
||||
int got = gotData + gotPad;
|
||||
|
||||
#ifdef DEBUG_RESAMPLER
|
||||
cerr << "resample: " << n << " in, " << pad.size() << " padding, " << got << " out (" << gotData << " data, " << gotPad << " padding, latency = " << latency << ")" << endl;
|
||||
#endif
|
||||
#ifdef DEBUG_RESAMPLER_VERBOSE
|
||||
int printN = 50;
|
||||
cerr << "first " << printN << " in:" << endl;
|
||||
for (int i = 0; i < printN && i < n; ++i) {
|
||||
if (i % 5 == 0) cerr << endl << i << "... ";
|
||||
cerr << data[i] << " ";
|
||||
}
|
||||
cerr << endl;
|
||||
#endif
|
||||
|
||||
int toReturn = got - latency;
|
||||
if (toReturn > m) toReturn = m;
|
||||
|
||||
vector<double> sliced(out.begin() + latency,
|
||||
out.begin() + latency + toReturn);
|
||||
|
||||
#ifdef DEBUG_RESAMPLER_VERBOSE
|
||||
cerr << "first " << printN << " out (after latency compensation), length " << sliced.size() << ":";
|
||||
for (int i = 0; i < printN && i < sliced.size(); ++i) {
|
||||
if (i % 5 == 0) cerr << endl << i << "... ";
|
||||
cerr << sliced[i] << " ";
|
||||
}
|
||||
cerr << endl;
|
||||
#endif
|
||||
|
||||
return sliced;
|
||||
}
|
||||
|
||||
102
libs/qm-dsp/dsp/rateconversion/Resampler.h
Normal file
102
libs/qm-dsp/dsp/rateconversion/Resampler.h
Normal file
@@ -0,0 +1,102 @@
|
||||
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
||||
/*
|
||||
QM DSP Library
|
||||
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
This file by Chris Cannam.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#ifndef RESAMPLER_H
|
||||
#define RESAMPLER_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
/**
|
||||
* Resampler resamples a stream from one integer sample rate to
|
||||
* another (arbitrary) rate, using a kaiser-windowed sinc filter. The
|
||||
* results and performance are pretty similar to libraries such as
|
||||
* libsamplerate, though this implementation does not support
|
||||
* time-varying ratios (the ratio is fixed on construction).
|
||||
*
|
||||
* See also Decimator, which is faster and rougher but supports only
|
||||
* power-of-two downsampling factors.
|
||||
*/
|
||||
class Resampler
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Construct a Resampler to resample from sourceRate to
|
||||
* targetRate.
|
||||
*/
|
||||
Resampler(int sourceRate, int targetRate);
|
||||
|
||||
/**
|
||||
* Construct a Resampler to resample from sourceRate to
|
||||
* targetRate, using the given filter parameters.
|
||||
*/
|
||||
Resampler(int sourceRate, int targetRate,
|
||||
double snr, double bandwidth);
|
||||
|
||||
virtual ~Resampler();
|
||||
|
||||
/**
|
||||
* Read n input samples from src and write resampled data to
|
||||
* dst. The return value is the number of samples written, which
|
||||
* will be no more than ceil((n * targetRate) / sourceRate). The
|
||||
* caller must ensure the dst buffer has enough space for the
|
||||
* samples returned.
|
||||
*/
|
||||
int process(const double *src, double *dst, int n);
|
||||
|
||||
/**
|
||||
* Read n input samples from src and return resampled data by
|
||||
* value.
|
||||
*/
|
||||
std::vector<double> process(const double *src, int n);
|
||||
|
||||
/**
|
||||
* Return the number of samples of latency at the output due by
|
||||
* the filter. (That is, the output will be delayed by this number
|
||||
* of samples relative to the input.)
|
||||
*/
|
||||
int getLatency() const { return m_latency; }
|
||||
|
||||
/**
|
||||
* Carry out a one-off resample of a single block of n
|
||||
* samples. The output is latency-compensated.
|
||||
*/
|
||||
static std::vector<double> resample
|
||||
(int sourceRate, int targetRate, const double *data, int n);
|
||||
|
||||
private:
|
||||
int m_sourceRate;
|
||||
int m_targetRate;
|
||||
int m_gcd;
|
||||
int m_filterLength;
|
||||
int m_bufferLength;
|
||||
int m_latency;
|
||||
double m_peakToPole;
|
||||
|
||||
struct Phase {
|
||||
int nextPhase;
|
||||
std::vector<double> filter;
|
||||
int drop;
|
||||
};
|
||||
|
||||
Phase *m_phaseData;
|
||||
int m_phase;
|
||||
std::vector<double> m_buffer;
|
||||
int m_bufferOrigin;
|
||||
|
||||
void initialise(double, double);
|
||||
double reconstructOne();
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -51,7 +51,7 @@ void ClusterMeltSegmenter::initialise(int fs)
|
||||
|
||||
if (featureType == FEATURE_TYPE_CONSTQ ||
|
||||
featureType == FEATURE_TYPE_CHROMA) {
|
||||
|
||||
|
||||
// run internal processing at 11025 or thereabouts
|
||||
int internalRate = 11025;
|
||||
int decimationFactor = samplerate / internalRate;
|
||||
@@ -77,11 +77,11 @@ void ClusterMeltSegmenter::initialise(int fs)
|
||||
|
||||
constq = new ConstantQ(config);
|
||||
constq->sparsekernel();
|
||||
|
||||
|
||||
ncoeff = constq->getK();
|
||||
|
||||
fft = new FFTReal(constq->getfftlength());
|
||||
|
||||
|
||||
} else if (featureType == FEATURE_TYPE_MFCC) {
|
||||
|
||||
// run internal processing at 22050 or thereabouts
|
||||
@@ -110,7 +110,7 @@ void ClusterMeltSegmenter::initialise(int fs)
|
||||
}
|
||||
}
|
||||
|
||||
ClusterMeltSegmenter::~ClusterMeltSegmenter()
|
||||
ClusterMeltSegmenter::~ClusterMeltSegmenter()
|
||||
{
|
||||
delete window;
|
||||
delete constq;
|
||||
@@ -164,7 +164,7 @@ void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsam
|
||||
vector<double> cq(ncoeff);
|
||||
|
||||
for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
|
||||
|
||||
|
||||
const double *psource = samples;
|
||||
int pcount = nsamples;
|
||||
|
||||
@@ -174,9 +174,9 @@ void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsam
|
||||
decimator->process(samples, decout);
|
||||
psource = decout;
|
||||
}
|
||||
|
||||
|
||||
int origin = 0;
|
||||
|
||||
|
||||
// std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
|
||||
|
||||
int frames = 0;
|
||||
@@ -208,11 +208,11 @@ void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsam
|
||||
}
|
||||
|
||||
window->cut(frame);
|
||||
|
||||
fft->process(false, frame, real, imag);
|
||||
|
||||
|
||||
fft->forward(frame, real, imag);
|
||||
|
||||
constq->process(real, imag, cqre, cqim);
|
||||
|
||||
|
||||
for (int i = 0; i < ncoeff; ++i) {
|
||||
cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
|
||||
}
|
||||
@@ -255,7 +255,7 @@ void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsampl
|
||||
vector<double> cc(ncoeff);
|
||||
|
||||
for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
|
||||
|
||||
|
||||
const double *psource = samples;
|
||||
int pcount = nsamples;
|
||||
|
||||
@@ -287,7 +287,7 @@ void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsampl
|
||||
}
|
||||
|
||||
mfcc->process(frame, ccout);
|
||||
|
||||
|
||||
for (int i = 0; i < ncoeff; ++i) {
|
||||
cc[i] += ccout[i];
|
||||
}
|
||||
@@ -330,44 +330,44 @@ void ClusterMeltSegmenter::segment()
|
||||
decimator = 0;
|
||||
|
||||
if (features.size() < histogramLength) return;
|
||||
/*
|
||||
/*
|
||||
std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
|
||||
<< " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
|
||||
*/
|
||||
// copy the features to a native array and use the existing C segmenter...
|
||||
double** arrFeatures = new double*[features.size()];
|
||||
double** arrFeatures = new double*[features.size()];
|
||||
for (int i = 0; i < features.size(); i++)
|
||||
{
|
||||
if (featureType == FEATURE_TYPE_UNKNOWN) {
|
||||
arrFeatures[i] = new double[features[0].size()];
|
||||
for (int j = 0; j < features[0].size(); j++)
|
||||
arrFeatures[i][j] = features[i][j];
|
||||
arrFeatures[i][j] = features[i][j];
|
||||
} else {
|
||||
arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
|
||||
for (int j = 0; j < ncoeff; j++)
|
||||
arrFeatures[i][j] = features[i][j];
|
||||
arrFeatures[i][j] = features[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
q = new int[features.size()];
|
||||
|
||||
|
||||
if (featureType == FEATURE_TYPE_UNKNOWN ||
|
||||
featureType == FEATURE_TYPE_MFCC)
|
||||
cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
|
||||
cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
|
||||
nclusters, neighbourhoodLimit);
|
||||
else
|
||||
constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
|
||||
constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
|
||||
nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
|
||||
|
||||
|
||||
// convert the cluster assignment sequence to a segmentation
|
||||
makeSegmentation(q, features.size());
|
||||
|
||||
makeSegmentation(q, features.size());
|
||||
|
||||
// de-allocate arrays
|
||||
delete [] q;
|
||||
for (int i = 0; i < features.size(); i++)
|
||||
delete [] arrFeatures[i];
|
||||
delete [] arrFeatures;
|
||||
|
||||
|
||||
// clear the features
|
||||
clear();
|
||||
}
|
||||
@@ -377,11 +377,11 @@ void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
|
||||
segmentation.segments.clear();
|
||||
segmentation.nsegtypes = nclusters;
|
||||
segmentation.samplerate = samplerate;
|
||||
|
||||
|
||||
Segment segment;
|
||||
segment.start = 0;
|
||||
segment.type = q[0];
|
||||
|
||||
|
||||
for (int i = 1; i < len; i++)
|
||||
{
|
||||
if (q[i] != q[i-1])
|
||||
|
||||
@@ -31,12 +31,12 @@ class ClusterMeltSegmenterParams
|
||||
// defaults are sensible for 11025Hz with 0.2 second hopsize
|
||||
{
|
||||
public:
|
||||
ClusterMeltSegmenterParams() :
|
||||
ClusterMeltSegmenterParams() :
|
||||
featureType(FEATURE_TYPE_CONSTQ),
|
||||
hopSize(0.2),
|
||||
windowSize(0.6),
|
||||
fmin(62),
|
||||
fmax(16000),
|
||||
fmax(16000),
|
||||
nbins(8),
|
||||
ncomponents(20),
|
||||
nHMMStates(40),
|
||||
@@ -72,34 +72,34 @@ public:
|
||||
|
||||
protected:
|
||||
void makeSegmentation(int* q, int len);
|
||||
|
||||
|
||||
void extractFeaturesConstQ(const double *, int);
|
||||
void extractFeaturesMFCC(const double *, int);
|
||||
|
||||
Window<double> *window;
|
||||
FFTReal *fft;
|
||||
ConstantQ* constq;
|
||||
ConstantQ* constq;
|
||||
MFCC* mfcc;
|
||||
model_t* model; // the HMM
|
||||
int* q; // the decoded HMM state sequence
|
||||
vector<vector<double> > histograms;
|
||||
|
||||
feature_types featureType;
|
||||
vector<vector<double> > histograms;
|
||||
|
||||
feature_types featureType;
|
||||
double hopSize; // in seconds
|
||||
double windowSize; // in seconds
|
||||
|
||||
|
||||
// constant-Q parameters
|
||||
int fmin;
|
||||
int fmax;
|
||||
int nbins;
|
||||
int ncoeff;
|
||||
|
||||
|
||||
// PCA parameters
|
||||
int ncomponents;
|
||||
|
||||
|
||||
// HMM parameters
|
||||
int nHMMStates;
|
||||
|
||||
|
||||
// clustering parameters
|
||||
int nclusters;
|
||||
int histogramLength;
|
||||
|
||||
@@ -19,13 +19,13 @@
|
||||
ostream& operator<<(ostream& os, const Segmentation& s)
|
||||
{
|
||||
os << "structure_name : begin_time end_time\n";
|
||||
|
||||
|
||||
for (int i = 0; i < s.segments.size(); i++)
|
||||
{
|
||||
Segment seg = s.segments[i];
|
||||
os << std::fixed << seg.type << ':' << '\t' << std::setprecision(6) << seg.start / static_cast<double>(s.samplerate)
|
||||
os << std::fixed << seg.type << ':' << '\t' << std::setprecision(6) << seg.start / static_cast<double>(s.samplerate)
|
||||
<< '\t' << std::setprecision(6) << seg.end / static_cast<double>(s.samplerate) << "\n";
|
||||
}
|
||||
|
||||
|
||||
return os;
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ class Segmentation
|
||||
public:
|
||||
int nsegtypes; // number of segment types, so possible types are {0,1,...,nsegtypes-1}
|
||||
int samplerate;
|
||||
vector<Segment> segments;
|
||||
vector<Segment> segments;
|
||||
};
|
||||
|
||||
ostream& operator<<(ostream& os, const Segmentation& s);
|
||||
@@ -52,7 +52,7 @@ public:
|
||||
virtual void segment() = 0; // call once all the features have been extracted
|
||||
virtual void segment(int m) = 0; // specify desired number of segment-types
|
||||
virtual void clear() { features.clear(); }
|
||||
const Segmentation& getSegmentation() const { return segmentation; }
|
||||
const Segmentation& getSegmentation() const { return segmentation; }
|
||||
protected:
|
||||
vector<vector<double> > features;
|
||||
Segmentation segmentation;
|
||||
|
||||
@@ -25,7 +25,7 @@ double kldist(double* a, double* b, int n) {
|
||||
because a, b represent probability distributions */
|
||||
double q, d;
|
||||
int i;
|
||||
|
||||
|
||||
d = 0;
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
@@ -38,8 +38,8 @@ double kldist(double* a, double* b, int n) {
|
||||
d += b[i] * log(b[i] / q);
|
||||
}
|
||||
}
|
||||
return d;
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, int *c) {
|
||||
double lambda, sum, beta, logsumexp, maxlp;
|
||||
@@ -48,9 +48,9 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
|
||||
int** nc; /* neighbour counts for each histogram */
|
||||
double** lp; /* soft assignment probs for each histogram */
|
||||
int* oldc; /* previous hard assignments (to check convergence) */
|
||||
|
||||
|
||||
/* NB h is passed as a 1d row major array */
|
||||
|
||||
|
||||
/* parameter values */
|
||||
lambda = DEFAULT_LAMBDA;
|
||||
if (l > 0)
|
||||
@@ -60,22 +60,22 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
|
||||
B = 2 * limit + 1;
|
||||
maxiter0 = 20; /* number of iterations at initial temperature */
|
||||
maxiter1 = 5; /* number of iterations at subsequent temperatures */
|
||||
|
||||
/* allocate memory */
|
||||
|
||||
/* allocate memory */
|
||||
cl = (double**) malloc(k*sizeof(double*));
|
||||
for (i= 0; i < k; i++)
|
||||
cl[i] = (double*) malloc(m*sizeof(double));
|
||||
|
||||
|
||||
nc = (int**) malloc(n*sizeof(int*));
|
||||
for (i= 0; i < n; i++)
|
||||
nc[i] = (int*) malloc(k*sizeof(int));
|
||||
|
||||
|
||||
lp = (double**) malloc(n*sizeof(double*));
|
||||
for (i= 0; i < n; i++)
|
||||
lp[i] = (double*) malloc(k*sizeof(double));
|
||||
|
||||
|
||||
oldc = (int*) malloc(n * sizeof(int));
|
||||
|
||||
|
||||
/* initialise */
|
||||
for (i = 0; i < k; i++)
|
||||
{
|
||||
@@ -90,40 +90,40 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
|
||||
{
|
||||
cl[i][j] /= sum; /* normalise */
|
||||
}
|
||||
}
|
||||
}
|
||||
//print_array(cl, k, m);
|
||||
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
c[i] = 1; /* initially assign all histograms to cluster 1 */
|
||||
|
||||
|
||||
for (a = 0; a < t; a++)
|
||||
{
|
||||
beta = Bsched[a];
|
||||
|
||||
|
||||
if (a == 0)
|
||||
maxiter = maxiter0;
|
||||
else
|
||||
maxiter = maxiter1;
|
||||
|
||||
|
||||
for (it = 0; it < maxiter; it++)
|
||||
{
|
||||
//if (it == maxiter - 1)
|
||||
// mexPrintf("hasn't converged after %d iterations\n", maxiter);
|
||||
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
/* save current hard assignments */
|
||||
oldc[i] = c[i];
|
||||
|
||||
|
||||
/* calculate soft assignment logprobs for each cluster */
|
||||
sum = 0;
|
||||
for (j = 0; j < k; j++)
|
||||
{
|
||||
lp[i][ j] = -beta * kldist(cl[j], &h[i*m], m);
|
||||
|
||||
|
||||
/* update matching neighbour counts for this histogram, based on current hard assignments */
|
||||
/* old version:
|
||||
nc[i][j] = 0;
|
||||
nc[i][j] = 0;
|
||||
if (i >= limit && i <= n - 1 - limit)
|
||||
{
|
||||
for (b = i - limit; b <= i + limit; b++)
|
||||
@@ -144,14 +144,14 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
|
||||
for (b = b0; b <= b1; b++)
|
||||
if (c[b] == j+1)
|
||||
nc[i][j]--;
|
||||
|
||||
|
||||
sum += exp(lp[i][j]);
|
||||
}
|
||||
|
||||
|
||||
/* normalise responsibilities and add duration logprior */
|
||||
logsumexp = log(sum);
|
||||
for (j = 0; j < k; j++)
|
||||
lp[i][j] -= logsumexp + lambda * nc[i][j];
|
||||
lp[i][j] -= logsumexp + lambda * nc[i][j];
|
||||
}
|
||||
//print_array(lp, n, k);
|
||||
/*
|
||||
@@ -160,10 +160,10 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
|
||||
for (j = 0; j < k; j++)
|
||||
mexPrintf("%d ", nc[i][j]);
|
||||
mexPrintf("\n");
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
/* update the assignments now that we know the duration priors
|
||||
based on the current assignments */
|
||||
for (i = 0; i < n; i++)
|
||||
@@ -177,14 +177,14 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
|
||||
c[i] = j+1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* break if assignments haven't changed */
|
||||
i = 0;
|
||||
while (i < n && oldc[i] == c[i])
|
||||
i++;
|
||||
if (i == n)
|
||||
break;
|
||||
|
||||
|
||||
/* update reference histograms now we know new responsibilities */
|
||||
for (j = 0; j < k; j++)
|
||||
{
|
||||
@@ -194,21 +194,21 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
cl[j][b] += exp(lp[i][j]) * h[i*m+b];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sum = 0;
|
||||
|
||||
sum = 0;
|
||||
for (i = 0; i < n; i++)
|
||||
sum += exp(lp[i][j]);
|
||||
for (b = 0; b < m; b++)
|
||||
cl[j][b] /= sum; /* normalise */
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//print_array(cl, k, m);
|
||||
//mexPrintf("\n\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* free memory */
|
||||
for (i = 0; i < k; i++)
|
||||
free(cl[i]);
|
||||
@@ -219,7 +219,7 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
|
||||
for (i = 0; i < n; i++)
|
||||
free(lp[i]);
|
||||
free(lp);
|
||||
free(oldc);
|
||||
free(oldc);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
|
||||
int t, b, oct, ix;
|
||||
//double maxchroma; /* max chroma value at each time, for normalisation */
|
||||
//double sum; /* for normalisation */
|
||||
|
||||
|
||||
for (t = 0; t < nframes; t++)
|
||||
{
|
||||
for (b = 0; b < bins; b++)
|
||||
@@ -50,7 +50,7 @@ void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
|
||||
maxchroma = chroma[t][b];
|
||||
if (maxchroma > 0)
|
||||
for (b = 0; b < bins; b++)
|
||||
chroma[t][b] /= maxchroma;
|
||||
chroma[t][b] /= maxchroma;
|
||||
*/
|
||||
}
|
||||
}
|
||||
@@ -62,13 +62,13 @@ void mpeg7_constq(double** features, int nframes, int ncoeff)
|
||||
double ss;
|
||||
double env;
|
||||
double maxenv = 0;
|
||||
|
||||
|
||||
/* convert const-Q features to dB scale */
|
||||
for (i = 0; i < nframes; i++)
|
||||
for (j = 0; j < ncoeff; j++)
|
||||
features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
|
||||
|
||||
/* normalise each feature vector and add the norm as an extra feature dimension */
|
||||
|
||||
/* normalise each feature vector and add the norm as an extra feature dimension */
|
||||
for (i = 0; i < nframes; i++)
|
||||
{
|
||||
ss = 0;
|
||||
@@ -80,10 +80,10 @@ void mpeg7_constq(double** features, int nframes, int ncoeff)
|
||||
features[i][ncoeff] = env;
|
||||
if (env > maxenv)
|
||||
maxenv = env;
|
||||
}
|
||||
}
|
||||
/* normalise the envelopes */
|
||||
for (i = 0; i < nframes; i++)
|
||||
features[i][ncoeff] /= maxenv;
|
||||
features[i][ncoeff] /= maxenv;
|
||||
}
|
||||
|
||||
/* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
|
||||
@@ -94,7 +94,7 @@ void create_histograms(int* x, int nx, int m, int hlen, double* h)
|
||||
int i, j, t;
|
||||
double norm;
|
||||
|
||||
for (i = 0; i < nx*m; i++)
|
||||
for (i = 0; i < nx*m; i++)
|
||||
h[i] = 0;
|
||||
|
||||
for (i = hlen/2; i < nx-hlen/2; i++)
|
||||
@@ -109,7 +109,7 @@ void create_histograms(int* x, int nx, int m, int hlen, double* h)
|
||||
for (j = 0; j < m; j++)
|
||||
h[i*m+j] /= norm;
|
||||
}
|
||||
|
||||
|
||||
/* duplicate histograms at beginning and end to create one histogram for each data value supplied */
|
||||
for (i = 0; i < hlen/2; i++)
|
||||
for (j = 0; j < m; j++)
|
||||
@@ -120,11 +120,11 @@ void create_histograms(int* x, int nx, int m, int hlen, double* h)
|
||||
}
|
||||
|
||||
/* segment using HMM and then histogram clustering */
|
||||
void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
|
||||
void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
|
||||
int histogram_length, int nclusters, int neighbour_limit)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
|
||||
/*****************************/
|
||||
if (0) {
|
||||
/* try just using the predominant bin number as a 'decoded state' */
|
||||
@@ -137,60 +137,60 @@ void cluster_segment(int* q, double** features, int frames_read, int feature_len
|
||||
maxval = 0;
|
||||
for (j = 0; j < feature_length; j++)
|
||||
{
|
||||
if (features[i][j] > maxval)
|
||||
if (features[i][j] > maxval)
|
||||
{
|
||||
maxval = features[i][j];
|
||||
maxbin = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (maxval > chroma_thresh)
|
||||
q[i] = maxbin;
|
||||
else
|
||||
q[i] = feature_length;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
if (1) {
|
||||
/*****************************/
|
||||
|
||||
|
||||
|
||||
|
||||
/* scale all the features to 'balance covariances' during HMM training */
|
||||
double scale = 10;
|
||||
for (i = 0; i < frames_read; i++)
|
||||
for (j = 0; j < feature_length; j++)
|
||||
features[i][j] *= scale;
|
||||
|
||||
|
||||
/* train an HMM on the features */
|
||||
|
||||
|
||||
/* create a model */
|
||||
model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
|
||||
|
||||
|
||||
/* train the model */
|
||||
hmm_train(features, frames_read, model);
|
||||
/*
|
||||
/*
|
||||
printf("\n\nafter training:\n");
|
||||
hmm_print(model);
|
||||
*/
|
||||
*/
|
||||
/* decode the hidden state sequence */
|
||||
viterbi_decode(features, frames_read, model, q);
|
||||
viterbi_decode(features, frames_read, model, q);
|
||||
hmm_close(model);
|
||||
|
||||
|
||||
/*****************************/
|
||||
}
|
||||
/*****************************/
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
fprintf(stderr, "HMM state sequence:\n");
|
||||
for (i = 0; i < frames_read; i++)
|
||||
fprintf(stderr, "%d ", q[i]);
|
||||
fprintf(stderr, "\n\n");
|
||||
*/
|
||||
|
||||
|
||||
/* create histograms of states */
|
||||
double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */
|
||||
create_histograms(q, frames_read, nHMM_states, histogram_length, h);
|
||||
|
||||
|
||||
/* cluster the histograms */
|
||||
int nbsched = 20; /* length of inverse temperature schedule */
|
||||
double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */
|
||||
@@ -200,39 +200,39 @@ void cluster_segment(int* q, double** features, int frames_read, int feature_len
|
||||
for (i = 1; i < nbsched; i++)
|
||||
bsched[i] = alpha * bsched[i-1];
|
||||
cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
|
||||
|
||||
|
||||
/* now q holds a sequence of cluster assignments */
|
||||
|
||||
free(h);
|
||||
|
||||
free(h);
|
||||
free(bsched);
|
||||
}
|
||||
|
||||
/* segment constant-Q or chroma features */
|
||||
void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
|
||||
void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
|
||||
int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
|
||||
{
|
||||
int feature_length;
|
||||
double** chroma;
|
||||
int i;
|
||||
|
||||
|
||||
if (feature_type == FEATURE_TYPE_CONSTQ)
|
||||
{
|
||||
/* fprintf(stderr, "Converting to dB and normalising...\n");
|
||||
*/
|
||||
*/
|
||||
mpeg7_constq(features, frames_read, ncoeff);
|
||||
/*
|
||||
/*
|
||||
fprintf(stderr, "Running PCA...\n");
|
||||
*/
|
||||
*/
|
||||
/* do PCA on the features (but not the envelope) */
|
||||
int ncomponents = 20;
|
||||
pca_project(features, frames_read, ncoeff, ncomponents);
|
||||
|
||||
|
||||
/* copy the envelope so that it immediatly follows the chosen components */
|
||||
for (i = 0; i < frames_read; i++)
|
||||
features[i][ncomponents] = features[i][ncoeff];
|
||||
|
||||
features[i][ncomponents] = features[i][ncoeff];
|
||||
|
||||
feature_length = ncomponents + 1;
|
||||
|
||||
|
||||
/**************************************
|
||||
//TEST
|
||||
// feature file name
|
||||
@@ -241,7 +241,7 @@ void constq_segment(int* q, double** features, int frames_read, int bins, int nc
|
||||
strcpy(file_name, dir);
|
||||
strcat(file_name, trackname);
|
||||
strcat(file_name, "_features_c20r8h0.2f0.6.mat");
|
||||
|
||||
|
||||
// get the features from Matlab from mat-file
|
||||
int frames_in_file;
|
||||
readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
|
||||
@@ -254,27 +254,27 @@ void constq_segment(int* q, double** features, int frames_read, int bins, int nc
|
||||
features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
|
||||
--missing_frames;
|
||||
}
|
||||
|
||||
|
||||
free(file_name);
|
||||
******************************************/
|
||||
|
||||
|
||||
cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
|
||||
}
|
||||
|
||||
|
||||
if (feature_type == FEATURE_TYPE_CHROMA)
|
||||
{
|
||||
/*
|
||||
fprintf(stderr, "Converting to chroma features...\n");
|
||||
*/
|
||||
*/
|
||||
/* convert constant-Q to normalised chroma features */
|
||||
chroma = (double**) malloc(frames_read*sizeof(double*));
|
||||
for (i = 0; i < frames_read; i++)
|
||||
chroma[i] = (double*) malloc(bins*sizeof(double));
|
||||
cq2chroma(features, frames_read, ncoeff, bins, chroma);
|
||||
feature_length = bins;
|
||||
|
||||
|
||||
cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
|
||||
|
||||
|
||||
for (i = 0; i < frames_read; i++)
|
||||
free(chroma[i]);
|
||||
free(chroma);
|
||||
|
||||
@@ -38,10 +38,10 @@ void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma);
|
||||
|
||||
void create_histograms(int* x, int nx, int m, int hlen, double* h);
|
||||
|
||||
void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
|
||||
void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
|
||||
int histogram_length, int nclusters, int neighbour_limit);
|
||||
|
||||
void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
|
||||
void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
|
||||
int nHMM_states, int histogram_length, int nclusters, int neighbour_limit);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -34,10 +34,10 @@ typedef struct segmentation_t
|
||||
segment_t* segments;
|
||||
} segmentation_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
FEATURE_TYPE_UNKNOWN = 0,
|
||||
FEATURE_TYPE_CONSTQ = 1,
|
||||
typedef enum
|
||||
{
|
||||
FEATURE_TYPE_UNKNOWN = 0,
|
||||
FEATURE_TYPE_CONSTQ = 1,
|
||||
FEATURE_TYPE_CHROMA = 2,
|
||||
FEATURE_TYPE_MFCC = 3
|
||||
} feature_types;
|
||||
|
||||
@@ -6,6 +6,14 @@
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
This file 2005-2006 Christian Landone.
|
||||
|
||||
Modifications:
|
||||
|
||||
- delta threshold
|
||||
Description: add delta threshold used as offset in the smoothed
|
||||
detection function
|
||||
Author: Mathieu Barthet
|
||||
Date: June 2010
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
@@ -25,7 +33,7 @@
|
||||
DFProcess::DFProcess( DFProcConfig Config )
|
||||
{
|
||||
filtSrc = NULL;
|
||||
filtDst = NULL;
|
||||
filtDst = NULL;
|
||||
m_filtScratchIn = NULL;
|
||||
m_filtScratchOut = NULL;
|
||||
|
||||
@@ -51,13 +59,16 @@ void DFProcess::initialise( DFProcConfig Config )
|
||||
filtSrc = new double[ m_length ];
|
||||
filtDst = new double[ m_length ];
|
||||
|
||||
|
||||
|
||||
//Low Pass Smoothing Filter Config
|
||||
m_FilterConfigParams.ord = Config.LPOrd;
|
||||
m_FilterConfigParams.ACoeffs = Config.LPACoeffs;
|
||||
m_FilterConfigParams.BCoeffs = Config.LPBCoeffs;
|
||||
|
||||
|
||||
m_FiltFilt = new FiltFilt( m_FilterConfigParams );
|
||||
|
||||
//add delta threshold
|
||||
m_delta = Config.delta;
|
||||
}
|
||||
|
||||
void DFProcess::deInitialise()
|
||||
@@ -115,7 +126,7 @@ void DFProcess::medianFilter(double *src, double *dst)
|
||||
{
|
||||
if (index >= m_length) break;
|
||||
|
||||
|
||||
|
||||
l = 0;
|
||||
for( j = i; j < ( i + m_winPost + m_winPre + 1); j++)
|
||||
{
|
||||
@@ -139,15 +150,17 @@ void DFProcess::medianFilter(double *src, double *dst)
|
||||
|
||||
l++;
|
||||
}
|
||||
|
||||
scratch[ index++ ] = MathUtilities::median( y, l);
|
||||
|
||||
scratch[ index++ ] = MathUtilities::median( y, l);
|
||||
}
|
||||
|
||||
|
||||
for( i = 0; i < m_length; i++ )
|
||||
{
|
||||
val = src[ i ] - scratch[ i ];// - 0.033;
|
||||
|
||||
//add a delta threshold used as an offset when computing the smoothed detection function
|
||||
//(helps to discard noise when detecting peaks)
|
||||
val = src[ i ] - scratch[ i ] - m_delta;
|
||||
|
||||
if( m_isMedianPositive )
|
||||
{
|
||||
if( val > 0 )
|
||||
@@ -164,7 +177,7 @@ void DFProcess::medianFilter(double *src, double *dst)
|
||||
dst[ i ] = val;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
delete [] y;
|
||||
delete [] scratch;
|
||||
}
|
||||
@@ -180,8 +193,8 @@ void DFProcess::removeDCNormalize( double *src, double*dst )
|
||||
|
||||
MathUtilities::getAlphaNorm( src, m_length, m_alphaNormParam, &DFAlphaNorm );
|
||||
|
||||
for(int i = 0; i< m_length; i++)
|
||||
for( unsigned int i = 0; i< m_length; i++)
|
||||
{
|
||||
dst[ i ] = ( src[ i ] - DFMin ) / DFAlphaNorm;
|
||||
dst[ i ] = ( src[ i ] - DFMin ) / DFAlphaNorm;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,14 @@
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
This file 2005-2006 Christian Landone.
|
||||
|
||||
Modifications:
|
||||
|
||||
- delta threshold
|
||||
Description: add delta threshold used as offset in the smoothed
|
||||
detection function
|
||||
Author: Mathieu Barthet
|
||||
Date: June 2010
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
@@ -20,17 +28,31 @@
|
||||
#include "FiltFilt.h"
|
||||
|
||||
struct DFProcConfig{
|
||||
unsigned int length;
|
||||
unsigned int LPOrd;
|
||||
double *LPACoeffs;
|
||||
double *LPBCoeffs;
|
||||
unsigned int length;
|
||||
unsigned int LPOrd;
|
||||
double *LPACoeffs;
|
||||
double *LPBCoeffs;
|
||||
unsigned int winPre;
|
||||
unsigned int winPost;
|
||||
unsigned int winPost;
|
||||
double AlphaNormParam;
|
||||
bool isMedianPositive;
|
||||
float delta; //delta threshold used as an offset when computing the smoothed detection function
|
||||
|
||||
DFProcConfig() :
|
||||
length(0),
|
||||
LPOrd(0),
|
||||
LPACoeffs(NULL),
|
||||
LPBCoeffs(NULL),
|
||||
winPre(0),
|
||||
winPost(0),
|
||||
AlphaNormParam(0),
|
||||
isMedianPositive(false),
|
||||
delta(0)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
class DFProcess
|
||||
class DFProcess
|
||||
{
|
||||
public:
|
||||
DFProcess( DFProcConfig Config );
|
||||
@@ -38,7 +60,7 @@ public:
|
||||
|
||||
void process( double* src, double* dst );
|
||||
|
||||
|
||||
|
||||
private:
|
||||
void initialise( DFProcConfig Config );
|
||||
void deInitialise();
|
||||
@@ -59,11 +81,12 @@ private:
|
||||
double* m_filtScratchIn;
|
||||
double* m_filtScratchOut;
|
||||
|
||||
FiltFiltConfig m_FilterConfigParams;
|
||||
FilterConfig m_FilterConfigParams;
|
||||
|
||||
FiltFilt* m_FiltFilt;
|
||||
|
||||
bool m_isMedianPositive;
|
||||
float m_delta; //add delta threshold
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -19,12 +19,12 @@
|
||||
// Construction/Destruction
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
FiltFilt::FiltFilt( FiltFiltConfig Config )
|
||||
FiltFilt::FiltFilt( FilterConfig Config )
|
||||
{
|
||||
m_filtScratchIn = NULL;
|
||||
m_filtScratchOut = NULL;
|
||||
m_ord = 0;
|
||||
|
||||
|
||||
initialise( Config );
|
||||
}
|
||||
|
||||
@@ -33,13 +33,13 @@ FiltFilt::~FiltFilt()
|
||||
deInitialise();
|
||||
}
|
||||
|
||||
void FiltFilt::initialise( FiltFiltConfig Config )
|
||||
void FiltFilt::initialise( FilterConfig Config )
|
||||
{
|
||||
m_ord = Config.ord;
|
||||
m_filterConfig.ord = Config.ord;
|
||||
m_filterConfig.ACoeffs = Config.ACoeffs;
|
||||
m_filterConfig.BCoeffs = Config.BCoeffs;
|
||||
|
||||
|
||||
m_filter = new Filter( m_filterConfig );
|
||||
}
|
||||
|
||||
@@ -50,7 +50,7 @@ void FiltFilt::deInitialise()
|
||||
|
||||
|
||||
void FiltFilt::process(double *src, double *dst, unsigned int length)
|
||||
{
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
if (length == 0) return;
|
||||
@@ -62,8 +62,8 @@ void FiltFilt::process(double *src, double *dst, unsigned int length)
|
||||
m_filtScratchIn = new double[ nExt ];
|
||||
m_filtScratchOut = new double[ nExt ];
|
||||
|
||||
|
||||
for( i = 0; i< nExt; i++ )
|
||||
|
||||
for( i = 0; i< nExt; i++ )
|
||||
{
|
||||
m_filtScratchIn[ i ] = 0.0;
|
||||
m_filtScratchOut[ i ] = 0.0;
|
||||
@@ -89,21 +89,21 @@ void FiltFilt::process(double *src, double *dst, unsigned int length)
|
||||
{
|
||||
m_filtScratchIn[ i + nFact ] = src[ i ];
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////
|
||||
// Do 0Ph filtering
|
||||
m_filter->process( m_filtScratchIn, m_filtScratchOut, nExt);
|
||||
|
||||
// reverse the series for FILTFILT
|
||||
|
||||
// reverse the series for FILTFILT
|
||||
for ( i = 0; i < nExt; i++)
|
||||
{
|
||||
{
|
||||
m_filtScratchIn[ i ] = m_filtScratchOut[ nExt - i - 1];
|
||||
}
|
||||
|
||||
// do FILTER again
|
||||
// do FILTER again
|
||||
m_filter->process( m_filtScratchIn, m_filtScratchOut, nExt);
|
||||
|
||||
// reverse the series back
|
||||
|
||||
// reverse the series back
|
||||
for ( i = 0; i < nExt; i++)
|
||||
{
|
||||
m_filtScratchIn[ i ] = m_filtScratchOut[ nExt - i - 1 ];
|
||||
@@ -117,7 +117,7 @@ void FiltFilt::process(double *src, double *dst, unsigned int length)
|
||||
for( i = 0; i < length; i++ )
|
||||
{
|
||||
dst[ index++ ] = m_filtScratchOut[ i + nFact ];
|
||||
}
|
||||
}
|
||||
|
||||
delete [] m_filtScratchIn;
|
||||
delete [] m_filtScratchOut;
|
||||
|
||||
@@ -18,23 +18,22 @@
|
||||
|
||||
#include "Filter.h"
|
||||
|
||||
struct FiltFiltConfig{
|
||||
unsigned int ord;
|
||||
double* ACoeffs;
|
||||
double* BCoeffs;
|
||||
};
|
||||
|
||||
class FiltFilt
|
||||
/**
|
||||
* Zero-phase digital filter, implemented by processing the data
|
||||
* through a filter specified by the given FilterConfig structure (see
|
||||
* Filter) and then processing it again in reverse.
|
||||
*/
|
||||
class FiltFilt
|
||||
{
|
||||
public:
|
||||
FiltFilt( FiltFiltConfig Config );
|
||||
FiltFilt( FilterConfig Config );
|
||||
virtual ~FiltFilt();
|
||||
|
||||
void reset();
|
||||
void process( double* src, double* dst, unsigned int length );
|
||||
|
||||
private:
|
||||
void initialise( FiltFiltConfig Config );
|
||||
void initialise( FilterConfig Config );
|
||||
void deInitialise();
|
||||
|
||||
unsigned int m_ord;
|
||||
|
||||
@@ -20,13 +20,22 @@
|
||||
#define NULL 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Filter specification. For a filter of order ord, the ACoeffs and
|
||||
* BCoeffs arrays must point to ord+1 values each. ACoeffs provides
|
||||
* the denominator and BCoeffs the numerator coefficients of the
|
||||
* filter.
|
||||
*/
|
||||
struct FilterConfig{
|
||||
unsigned int ord;
|
||||
double* ACoeffs;
|
||||
double* BCoeffs;
|
||||
};
|
||||
|
||||
class Filter
|
||||
/**
|
||||
* Digital filter specified through FilterConfig structure.
|
||||
*/
|
||||
class Filter
|
||||
{
|
||||
public:
|
||||
Filter( FilterConfig Config );
|
||||
@@ -36,7 +45,6 @@ public:
|
||||
|
||||
void process( double *src, double *dst, unsigned int length );
|
||||
|
||||
|
||||
private:
|
||||
void initialise( FilterConfig Config );
|
||||
void deInitialise();
|
||||
|
||||
@@ -44,14 +44,14 @@ void Framer::configure( unsigned int frameLength, unsigned int hop )
|
||||
|
||||
if( m_dataFrame != NULL )
|
||||
{
|
||||
delete [] m_dataFrame;
|
||||
delete [] m_dataFrame;
|
||||
m_dataFrame = NULL;
|
||||
}
|
||||
m_dataFrame = new double[ m_frameLength ];
|
||||
|
||||
if( m_strideFrame != NULL )
|
||||
{
|
||||
delete [] m_strideFrame;
|
||||
delete [] m_strideFrame;
|
||||
m_strideFrame = NULL;
|
||||
}
|
||||
m_strideFrame = new double[ m_stepSize ];
|
||||
@@ -64,8 +64,8 @@ void Framer::getFrame(double *dst)
|
||||
{
|
||||
for( unsigned int u = 0; u < m_frameLength; u++)
|
||||
{
|
||||
dst[ u ] = m_srcBuffer[ m_ulSrcIndex++ ];
|
||||
}
|
||||
dst[ u ] = m_srcBuffer[ m_ulSrcIndex++ ];
|
||||
}
|
||||
m_ulSrcIndex -= ( m_frameLength - m_stepSize );
|
||||
}
|
||||
else
|
||||
@@ -77,7 +77,7 @@ void Framer::getFrame(double *dst)
|
||||
{
|
||||
dst[ u ] = m_srcBuffer[ m_ulSrcIndex++ ];
|
||||
}
|
||||
|
||||
|
||||
for( unsigned int u = 0; u < zero; u++ )
|
||||
{
|
||||
dst[ rem + u ] = 0;
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
class Framer
|
||||
class Framer
|
||||
{
|
||||
public:
|
||||
void setSource( double* src, unsigned int length );
|
||||
|
||||
@@ -44,7 +44,10 @@ DownBeat::DownBeat(float originalSampleRate,
|
||||
// 16x decimation, which is our expected normal situation)
|
||||
m_beatframesize = MathUtilities::nextPowerOfTwo
|
||||
(int((m_rate / decimationFactor) * 1.3));
|
||||
// std::cerr << "rate = " << m_rate << ", bfs = " << m_beatframesize << std::endl;
|
||||
if (m_beatframesize < 2) {
|
||||
m_beatframesize = 2;
|
||||
}
|
||||
// std::cerr << "rate = " << m_rate << ", dec = " << decimationFactor << ", bfs = " << m_beatframesize << std::endl;
|
||||
m_beatframe = new double[m_beatframesize];
|
||||
m_fftRealOut = new double[m_beatframesize];
|
||||
m_fftImagOut = new double[m_beatframesize];
|
||||
@@ -122,7 +125,7 @@ DownBeat::pushAudioBlock(const float *audio)
|
||||
// std::cerr << "pushAudioBlock: rms in " << sqrt(rmsin) << ", out " << sqrt(rmsout) << std::endl;
|
||||
m_buffill += m_increment / m_factor;
|
||||
}
|
||||
|
||||
|
||||
const float *
|
||||
DownBeat::getBufferedAudio(size_t &length) const
|
||||
{
|
||||
@@ -192,9 +195,9 @@ DownBeat::findDownBeats(const float *audio,
|
||||
}
|
||||
|
||||
// Now FFT beat frame
|
||||
|
||||
m_fft->process(false, m_beatframe, m_fftRealOut, m_fftImagOut);
|
||||
|
||||
|
||||
m_fft->forward(m_beatframe, m_fftRealOut, m_fftImagOut);
|
||||
|
||||
// Calculate magnitudes
|
||||
|
||||
for (size_t j = 0; j < m_beatframesize/2; ++j) {
|
||||
@@ -257,7 +260,7 @@ DownBeat::measureSpecDiff(d_vec_t oldspec, d_vec_t newspec)
|
||||
{
|
||||
// JENSEN-SHANNON DIVERGENCE BETWEEN SPECTRAL FRAMES
|
||||
|
||||
unsigned int SPECSIZE = 512; // ONLY LOOK AT FIRST 512 SAMPLES OF SPECTRUM.
|
||||
unsigned int SPECSIZE = 512; // ONLY LOOK AT FIRST 512 SAMPLES OF SPECTRUM.
|
||||
if (SPECSIZE > oldspec.size()/4) {
|
||||
SPECSIZE = oldspec.size()/4;
|
||||
}
|
||||
@@ -266,37 +269,37 @@ DownBeat::measureSpecDiff(d_vec_t oldspec, d_vec_t newspec)
|
||||
|
||||
double sumnew = 0.;
|
||||
double sumold = 0.;
|
||||
|
||||
|
||||
for (unsigned int i = 0;i < SPECSIZE;i++)
|
||||
{
|
||||
newspec[i] +=EPS;
|
||||
oldspec[i] +=EPS;
|
||||
|
||||
|
||||
sumnew+=newspec[i];
|
||||
sumold+=oldspec[i];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for (unsigned int i = 0;i < SPECSIZE;i++)
|
||||
{
|
||||
newspec[i] /= (sumnew);
|
||||
oldspec[i] /= (sumold);
|
||||
|
||||
|
||||
// IF ANY SPECTRAL VALUES ARE 0 (SHOULDN'T BE ANY!) SET THEM TO 1
|
||||
if (newspec[i] == 0)
|
||||
{
|
||||
newspec[i] = 1.;
|
||||
}
|
||||
|
||||
|
||||
if (oldspec[i] == 0)
|
||||
{
|
||||
oldspec[i] = 1.;
|
||||
}
|
||||
|
||||
|
||||
// JENSEN-SHANNON CALCULATION
|
||||
sd1 = 0.5*oldspec[i] + 0.5*newspec[i];
|
||||
sd1 = 0.5*oldspec[i] + 0.5*newspec[i];
|
||||
SD = SD + (-sd1*log(sd1)) + (0.5*(oldspec[i]*log(oldspec[i]))) + (0.5*(newspec[i]*log(newspec[i])));
|
||||
}
|
||||
|
||||
|
||||
return SD;
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#define DOWNBEAT_H
|
||||
|
||||
#include <vector>
|
||||
#include <cstddef>
|
||||
|
||||
#include "dsp/rateconversion/Decimator.h"
|
||||
|
||||
@@ -28,7 +29,7 @@ class FFTReal;
|
||||
* This class takes an input audio signal and a sequence of beat
|
||||
* locations (calculated e.g. by TempoTrackV2) and estimates which of
|
||||
* the beat locations are downbeats (first beat of the bar).
|
||||
*
|
||||
*
|
||||
* The input audio signal is expected to have been downsampled to a
|
||||
* very low sampling rate (e.g. 2700Hz). A utility function for
|
||||
* downsampling and buffering incoming block-by-block audio is
|
||||
@@ -56,7 +57,7 @@ public:
|
||||
|
||||
/**
|
||||
* Estimate which beats are down-beats.
|
||||
*
|
||||
*
|
||||
* audio contains the input audio stream after downsampling, and
|
||||
* audioLength contains the number of samples in this downsampled
|
||||
* stream.
|
||||
@@ -83,18 +84,18 @@ public:
|
||||
* and the region following it.
|
||||
*/
|
||||
void getBeatSD(vector<double> &beatsd) const;
|
||||
|
||||
|
||||
/**
|
||||
* For your downsampling convenience: call this function
|
||||
* repeatedly with input audio blocks containing dfIncrement
|
||||
* samples at the original sample rate, to decimate them to the
|
||||
* downsampled rate and buffer them within the DownBeat class.
|
||||
*
|
||||
*
|
||||
* Call getBufferedAudio() to retrieve the results after all
|
||||
* blocks have been processed.
|
||||
*/
|
||||
void pushAudioBlock(const float *audio);
|
||||
|
||||
|
||||
/**
|
||||
* Retrieve the accumulated audio produced by pushAudioBlock calls.
|
||||
*/
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,11 +5,11 @@
|
||||
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
This file 2005-2006 Christian Landone.
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License as
|
||||
published by the Free Software Foundation; either version 2 of the
|
||||
License, or (at your option) any later version. See the file
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
@@ -31,7 +31,7 @@ using std::vector;
|
||||
struct WinThresh
|
||||
{
|
||||
unsigned int pre;
|
||||
unsigned int post;
|
||||
unsigned int post;
|
||||
};
|
||||
|
||||
struct TTParams
|
||||
|
||||
@@ -91,10 +91,17 @@ TempoTrackV2::filter_df(d_vec_t &df)
|
||||
}
|
||||
|
||||
|
||||
// MEPD 28/11/12
|
||||
// This function now allows for a user to specify an inputtempo (in BPM)
|
||||
// and a flag "constraintempo" which replaces the general rayleigh weighting for periodicities
|
||||
// with a gaussian which is centered around the input tempo
|
||||
// Note, if inputtempo = 120 and constraintempo = false, then functionality is
|
||||
// as it was before
|
||||
void
|
||||
TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
|
||||
vector<double> &beat_period,
|
||||
vector<double> &tempi)
|
||||
vector<double> &tempi,
|
||||
double inputtempo, bool constraintempo)
|
||||
{
|
||||
// to follow matlab.. split into 512 sample frames with a 128 hop size
|
||||
// calculate the acf,
|
||||
@@ -103,13 +110,42 @@ TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
|
||||
// and get best path
|
||||
|
||||
unsigned int wv_len = 128;
|
||||
double rayparam = 43.;
|
||||
|
||||
// MEPD 28/11/12
|
||||
// the default value of inputtempo in the beat tracking plugin is 120
|
||||
// so if the user specifies a different inputtempo, the rayparam will be updated
|
||||
// accordingly.
|
||||
// note: 60*44100/512 is a magic number
|
||||
// this might (will?) break if a user specifies a different frame rate for the onset detection function
|
||||
double rayparam = (60*44100/512)/inputtempo;
|
||||
|
||||
// these debug statements can be removed.
|
||||
// std::cerr << "inputtempo" << inputtempo << std::endl;
|
||||
// std::cerr << "rayparam" << rayparam << std::endl;
|
||||
// std::cerr << "constraintempo" << constraintempo << std::endl;
|
||||
|
||||
// make rayleigh weighting curve
|
||||
d_vec_t wv(wv_len);
|
||||
for (unsigned int i=0; i<wv.size(); i++)
|
||||
|
||||
// check whether or not to use rayleigh weighting (if constraintempo is false)
|
||||
// or use gaussian weighting it (constraintempo is true)
|
||||
if (constraintempo)
|
||||
{
|
||||
wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
|
||||
for (unsigned int i=0; i<wv.size(); i++)
|
||||
{
|
||||
// MEPD 28/11/12
|
||||
// do a gaussian weighting instead of rayleigh
|
||||
wv[i] = exp( (-1.*pow((static_cast<double> (i)-rayparam),2.)) / (2.*pow(rayparam/4.,2.)) );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (unsigned int i=0; i<wv.size(); i++)
|
||||
{
|
||||
// MEPD 28/11/12
|
||||
// standard rayleigh weighting over periodicities
|
||||
wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
|
||||
}
|
||||
}
|
||||
|
||||
// beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
|
||||
@@ -397,10 +433,14 @@ TempoTrackV2::normalise_vec(d_vec_t &df)
|
||||
}
|
||||
}
|
||||
|
||||
// MEPD 28/11/12
|
||||
// this function has been updated to allow the "alpha" and "tightness" parameters
|
||||
// of the dynamic program to be set by the user
|
||||
// the default value of alpha = 0.9 and tightness = 4
|
||||
void
|
||||
TempoTrackV2::calculateBeats(const vector<double> &df,
|
||||
const vector<double> &beat_period,
|
||||
vector<double> &beats)
|
||||
vector<double> &beats, double alpha, double tightness)
|
||||
{
|
||||
if (df.empty() || beat_period.empty()) return;
|
||||
|
||||
@@ -414,8 +454,12 @@ TempoTrackV2::calculateBeats(const vector<double> &df,
|
||||
backlink[i] = -1;
|
||||
}
|
||||
|
||||
double tightness = 4.;
|
||||
double alpha = 0.9;
|
||||
//double tightness = 4.;
|
||||
//double alpha = 0.9;
|
||||
// MEPD 28/11/12
|
||||
// debug statements that can be removed.
|
||||
// std::cerr << "alpha" << alpha << std::endl;
|
||||
// std::cerr << "tightness" << tightness << std::endl;
|
||||
|
||||
// main loop
|
||||
for (unsigned int i=0; i<localscore.size(); i++)
|
||||
@@ -462,7 +506,7 @@ TempoTrackV2::calculateBeats(const vector<double> &df,
|
||||
int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
|
||||
|
||||
// can happen if no results obtained earlier (e.g. input too short)
|
||||
if (startpoint >= backlink.size()) startpoint = backlink.size()-1;
|
||||
if (startpoint >= (int)backlink.size()) startpoint = backlink.size()-1;
|
||||
|
||||
// USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
|
||||
// BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
|
||||
|
||||
@@ -18,8 +18,7 @@
|
||||
#define TEMPOTRACKV2_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
using std::vector;
|
||||
using namespace std;
|
||||
|
||||
//!!! Question: how far is this actually sample rate dependent? I
|
||||
// think it does produce plausible results for e.g. 48000 as well as
|
||||
@@ -40,15 +39,35 @@ public:
|
||||
TempoTrackV2(float sampleRate, size_t dfIncrement);
|
||||
~TempoTrackV2();
|
||||
|
||||
// Returned beat periods are given in df increment units; tempi in bpm
|
||||
// Returned beat periods are given in df increment units; inputtempo and tempi in bpm
|
||||
void calculateBeatPeriod(const vector<double> &df,
|
||||
vector<double> &beatPeriod,
|
||||
vector<double> &tempi);
|
||||
vector<double> &tempi) {
|
||||
calculateBeatPeriod(df, beatPeriod, tempi, 120.0, false);
|
||||
}
|
||||
|
||||
// Returned beat periods are given in df increment units; inputtempo and tempi in bpm
|
||||
// MEPD 28/11/12 Expose inputtempo and constraintempo parameters
|
||||
// Note, if inputtempo = 120 and constraintempo = false, then functionality is as it was before
|
||||
void calculateBeatPeriod(const vector<double> &df,
|
||||
vector<double> &beatPeriod,
|
||||
vector<double> &tempi,
|
||||
double inputtempo, bool constraintempo);
|
||||
|
||||
// Returned beat positions are given in df increment units
|
||||
void calculateBeats(const vector<double> &df,
|
||||
const vector<double> &beatPeriod,
|
||||
vector<double> &beats);
|
||||
vector<double> &beats) {
|
||||
calculateBeats(df, beatPeriod, beats, 0.9, 4.0);
|
||||
}
|
||||
|
||||
// Returned beat positions are given in df increment units
|
||||
// MEPD 28/11/12 Expose alpha and tightness parameters
|
||||
// Note, if alpha = 0.9 and tightness = 4, then functionality is as it was before
|
||||
void calculateBeats(const vector<double> &df,
|
||||
const vector<double> &beatPeriod,
|
||||
vector<double> &beats,
|
||||
double alpha, double tightness);
|
||||
|
||||
private:
|
||||
typedef vector<int> i_vec_t;
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
#include "ChangeDetectionFunction.h"
|
||||
|
||||
#ifndef PI
|
||||
#define PI (3.14159265358979323846)
|
||||
#define PI (3.14159265358979232846)
|
||||
#endif
|
||||
|
||||
|
||||
@@ -34,20 +34,20 @@ ChangeDetectionFunction::~ChangeDetectionFunction()
|
||||
void ChangeDetectionFunction::setFilterWidth(const int iWidth)
|
||||
{
|
||||
m_iFilterWidth = iWidth*2+1;
|
||||
|
||||
|
||||
// it is assumed that the gaussian is 0 outside of +/- FWHM
|
||||
// => filter width = 2*FWHM = 2*2.3548*sigma
|
||||
m_dFilterSigma = double(m_iFilterWidth) / double(2*2.3548);
|
||||
m_vaGaussian.resize(m_iFilterWidth);
|
||||
|
||||
|
||||
double dScale = 1.0 / (m_dFilterSigma*sqrt(2*PI));
|
||||
|
||||
|
||||
for (int x = -(m_iFilterWidth-1)/2; x <= (m_iFilterWidth-1)/2; x++)
|
||||
{
|
||||
double w = dScale * std::exp ( -(x*x)/(2*m_dFilterSigma*m_dFilterSigma) );
|
||||
m_vaGaussian[x + (m_iFilterWidth-1)/2] = w;
|
||||
}
|
||||
|
||||
|
||||
#ifdef DEBUG_CHANGE_DETECTION_FUNCTION
|
||||
std::cerr << "Filter sigma: " << m_dFilterSigma << std::endl;
|
||||
std::cerr << "Filter width: " << m_iFilterWidth << std::endl;
|
||||
@@ -59,37 +59,37 @@ ChangeDistance ChangeDetectionFunction::process(const TCSGram& rTCSGram)
|
||||
{
|
||||
ChangeDistance retVal;
|
||||
retVal.resize(rTCSGram.getSize(), 0.0);
|
||||
|
||||
|
||||
TCSGram smoothedTCSGram;
|
||||
|
||||
for (int iPosition = 0; iPosition < rTCSGram.getSize(); iPosition++)
|
||||
{
|
||||
int iSkipLower = 0;
|
||||
|
||||
|
||||
int iLowerPos = iPosition - (m_iFilterWidth-1)/2;
|
||||
int iUpperPos = iPosition + (m_iFilterWidth-1)/2;
|
||||
|
||||
|
||||
if (iLowerPos < 0)
|
||||
{
|
||||
iSkipLower = -iLowerPos;
|
||||
iLowerPos = 0;
|
||||
}
|
||||
|
||||
|
||||
if (iUpperPos >= rTCSGram.getSize())
|
||||
{
|
||||
int iMaxIndex = rTCSGram.getSize() - 1;
|
||||
iUpperPos = iMaxIndex;
|
||||
}
|
||||
|
||||
|
||||
TCSVector smoothedVector;
|
||||
|
||||
// for every bin of the vector, calculate the smoothed value
|
||||
for (int iPC = 0; iPC < 6; iPC++)
|
||||
{
|
||||
{
|
||||
size_t j = 0;
|
||||
double dSmoothedValue = 0.0;
|
||||
TCSVector rCV;
|
||||
|
||||
|
||||
for (int i = iLowerPos; i <= iUpperPos; i++)
|
||||
{
|
||||
rTCSGram.getTCSVector(i, rCV);
|
||||
@@ -98,7 +98,7 @@ ChangeDistance ChangeDetectionFunction::process(const TCSGram& rTCSGram)
|
||||
|
||||
smoothedVector[iPC] = dSmoothedValue;
|
||||
}
|
||||
|
||||
|
||||
smoothedTCSGram.addTCSVector(smoothedVector);
|
||||
}
|
||||
|
||||
@@ -109,10 +109,10 @@ ChangeDistance ChangeDetectionFunction::process(const TCSGram& rTCSGram)
|
||||
if the current estimate is not confident enough, look further into the future/the past
|
||||
e.g., High frequency content, zero crossing rate, spectral flatness
|
||||
*/
|
||||
|
||||
|
||||
TCSVector nextTCS;
|
||||
TCSVector previousTCS;
|
||||
|
||||
|
||||
int iWindow = 1;
|
||||
|
||||
// while (previousTCS.magnitude() < 0.1 && (iPosition-iWindow) > 0)
|
||||
@@ -121,9 +121,9 @@ ChangeDistance ChangeDetectionFunction::process(const TCSGram& rTCSGram)
|
||||
// std::cout << previousTCS.magnitude() << std::endl;
|
||||
iWindow++;
|
||||
}
|
||||
|
||||
|
||||
iWindow = 1;
|
||||
|
||||
|
||||
// while (nextTCS.magnitude() < 0.1 && (iPosition+iWindow) < (rTCSGram.getSize()-1) )
|
||||
{
|
||||
smoothedTCSGram.getTCSVector(iPosition+iWindow, nextTCS);
|
||||
@@ -136,7 +136,7 @@ ChangeDistance ChangeDetectionFunction::process(const TCSGram& rTCSGram)
|
||||
{
|
||||
distance += std::pow(nextTCS[j] - previousTCS[j], 2.0);
|
||||
}
|
||||
|
||||
|
||||
retVal[iPosition] = std::pow(distance, 0.5);
|
||||
}
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ public:
|
||||
ChangeDistance process(const TCSGram& rTCSGram);
|
||||
private:
|
||||
void setFilterWidth(const int iWidth);
|
||||
|
||||
|
||||
private:
|
||||
valarray<double> m_vaGaussian;
|
||||
double m_dFilterSigma;
|
||||
|
||||
@@ -34,7 +34,7 @@ TCSGram::~TCSGram()
|
||||
|
||||
void TCSGram::getTCSVector(int iPosition, TCSVector& rTCSVector) const
|
||||
{
|
||||
if (iPosition < 0)
|
||||
if (iPosition < 0)
|
||||
rTCSVector = TCSVector();
|
||||
else if (iPosition >= m_VectorList.size())
|
||||
rTCSVector = TCSVector();
|
||||
@@ -52,10 +52,10 @@ void TCSGram::addTCSVector(const TCSVector& rTCSVector)
|
||||
{
|
||||
size_t uSize = m_VectorList.size();
|
||||
long lMilliSeconds = static_cast<long>(uSize*m_dFrameDurationMS);
|
||||
std::pair<long, TCSVector> p;
|
||||
std::pair<long, TCSVector> p;
|
||||
p.first = lMilliSeconds;
|
||||
p.second = rTCSVector;
|
||||
|
||||
|
||||
m_VectorList.push_back(p);
|
||||
}
|
||||
|
||||
@@ -68,7 +68,7 @@ long TCSGram::getDuration() const
|
||||
void TCSGram::printDebug()
|
||||
{
|
||||
vectorlist_t::iterator vectorIterator = m_VectorList.begin();
|
||||
|
||||
|
||||
while (vectorIterator != m_VectorList.end())
|
||||
{
|
||||
vectorIterator->second.printDebug();
|
||||
|
||||
@@ -26,7 +26,7 @@ typedef std::vector<std::pair<long, TCSVector> > vectorlist_t;
|
||||
|
||||
class TCSGram
|
||||
{
|
||||
public:
|
||||
public:
|
||||
TCSGram();
|
||||
~TCSGram();
|
||||
void getTCSVector(int, TCSVector&) const;
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
#include <iostream>
|
||||
|
||||
#ifndef PI
|
||||
#define PI (3.14159265358979323846)
|
||||
#define PI (3.14159265358979232846)
|
||||
#endif
|
||||
|
||||
TonalEstimator::TonalEstimator()
|
||||
@@ -27,15 +27,15 @@ TonalEstimator::TonalEstimator()
|
||||
m_Basis.resize(6);
|
||||
|
||||
int i = 0;
|
||||
|
||||
|
||||
|
||||
|
||||
// circle of fifths
|
||||
m_Basis[i].resize(12);
|
||||
for (int iP = 0; iP < 12; iP++)
|
||||
{
|
||||
m_Basis[i][iP] = std::sin( (7.0 / 6.0) * iP * PI);
|
||||
}
|
||||
|
||||
|
||||
i++;
|
||||
|
||||
m_Basis[i].resize(12);
|
||||
@@ -43,17 +43,17 @@ TonalEstimator::TonalEstimator()
|
||||
{
|
||||
m_Basis[i][iP] = std::cos( (7.0 / 6.0) * iP * PI);
|
||||
}
|
||||
|
||||
|
||||
i++;
|
||||
|
||||
|
||||
|
||||
|
||||
// circle of major thirds
|
||||
m_Basis[i].resize(12);
|
||||
for (int iP = 0; iP < 12; iP++)
|
||||
{
|
||||
m_Basis[i][iP] = 0.6 * std::sin( (2.0 / 3.0) * iP * PI);
|
||||
}
|
||||
|
||||
|
||||
i++;
|
||||
|
||||
m_Basis[i].resize(12);
|
||||
@@ -71,7 +71,7 @@ TonalEstimator::TonalEstimator()
|
||||
{
|
||||
m_Basis[i][iP] = 1.1 * std::sin( (3.0 / 2.0) * iP * PI);
|
||||
}
|
||||
|
||||
|
||||
i++;
|
||||
|
||||
m_Basis[i].resize(12);
|
||||
@@ -90,7 +90,7 @@ TCSVector TonalEstimator::transform2TCS(const ChromaVector& rVector)
|
||||
{
|
||||
TCSVector vaRetVal;
|
||||
vaRetVal.resize(6, 0.0);
|
||||
|
||||
|
||||
for (int i = 0; i < 6; i++)
|
||||
{
|
||||
for (int iP = 0; iP < 12; iP++)
|
||||
@@ -98,6 +98,6 @@ TCSVector TonalEstimator::transform2TCS(const ChromaVector& rVector)
|
||||
vaRetVal[i] += m_Basis[i][iP] * rVector[iP];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return vaRetVal;
|
||||
}
|
||||
|
||||
@@ -27,24 +27,24 @@ class ChromaVector : public std::valarray<double>
|
||||
public:
|
||||
ChromaVector(size_t uSize = 12) : std::valarray<double>()
|
||||
{ resize(uSize, 0.0f); }
|
||||
|
||||
|
||||
virtual ~ChromaVector() {};
|
||||
|
||||
|
||||
void printDebug()
|
||||
{
|
||||
for (int i = 0; i < size(); i++)
|
||||
{
|
||||
std::cout << (*this)[i] << ";";
|
||||
}
|
||||
|
||||
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
|
||||
void normalizeL1()
|
||||
{
|
||||
// normalize the chroma vector (L1 norm)
|
||||
double dSum = 0.0;
|
||||
|
||||
|
||||
for (size_t i = 0; i < 12; (dSum += std::abs((*this)[i++]))) ;
|
||||
for (size_t i = 0; i < 12; dSum > 0.0000001?((*this)[i] /= dSum):(*this)[i]=0.0, i++) ;
|
||||
|
||||
@@ -54,8 +54,8 @@ public:
|
||||
{
|
||||
for (size_t i = 0; i < 12; ++i) (*this)[i] = 0.0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
class TCSVector : public std::valarray<double>
|
||||
@@ -63,7 +63,7 @@ class TCSVector : public std::valarray<double>
|
||||
public:
|
||||
TCSVector() : std::valarray<double>()
|
||||
{ resize(6, 0.0f); }
|
||||
|
||||
|
||||
virtual ~TCSVector() {};
|
||||
|
||||
void printDebug()
|
||||
@@ -72,19 +72,19 @@ public:
|
||||
{
|
||||
std::cout << (*this)[i] << ";";
|
||||
}
|
||||
|
||||
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
|
||||
double magnitude() const
|
||||
{
|
||||
double dMag = 0.0;
|
||||
|
||||
|
||||
for (size_t i = 0; i < 6; i++)
|
||||
{
|
||||
dMag += std::pow((*this)[i], 2.0);
|
||||
}
|
||||
|
||||
|
||||
return std::sqrt(dMag);
|
||||
}
|
||||
|
||||
|
||||
@@ -4,178 +4,199 @@
|
||||
QM DSP Library
|
||||
|
||||
Centre for Digital Music, Queen Mary, University of London.
|
||||
This file is based on Don Cross's public domain FFT implementation.
|
||||
*/
|
||||
|
||||
#include "FFT.h"
|
||||
|
||||
#include "maths/MathUtilities.h"
|
||||
|
||||
#include "kiss_fft.h"
|
||||
#include "kiss_fftr.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
FFT::FFT(unsigned int n) :
|
||||
m_n(n),
|
||||
m_private(0)
|
||||
#include <stdexcept>
|
||||
|
||||
class FFT::D
|
||||
{
|
||||
if( !MathUtilities::isPowerOfTwo(m_n) )
|
||||
{
|
||||
std::cerr << "ERROR: FFT: Non-power-of-two FFT size "
|
||||
<< m_n << " not supported in this implementation"
|
||||
<< std::endl;
|
||||
return;
|
||||
public:
|
||||
D(int n) : m_n(n) {
|
||||
m_planf = kiss_fft_alloc(m_n, 0, NULL, NULL);
|
||||
m_plani = kiss_fft_alloc(m_n, 1, NULL, NULL);
|
||||
m_kin = new kiss_fft_cpx[m_n];
|
||||
m_kout = new kiss_fft_cpx[m_n];
|
||||
}
|
||||
|
||||
~D() {
|
||||
kiss_fft_free(m_planf);
|
||||
kiss_fft_free(m_plani);
|
||||
delete[] m_kin;
|
||||
delete[] m_kout;
|
||||
}
|
||||
|
||||
void process(bool inverse,
|
||||
const double *ri,
|
||||
const double *ii,
|
||||
double *ro,
|
||||
double *io) {
|
||||
|
||||
for (int i = 0; i < m_n; ++i) {
|
||||
m_kin[i].r = ri[i];
|
||||
m_kin[i].i = (ii ? ii[i] : 0.0);
|
||||
}
|
||||
|
||||
if (!inverse) {
|
||||
|
||||
kiss_fft(m_planf, m_kin, m_kout);
|
||||
|
||||
for (int i = 0; i < m_n; ++i) {
|
||||
ro[i] = m_kout[i].r;
|
||||
io[i] = m_kout[i].i;
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
kiss_fft(m_plani, m_kin, m_kout);
|
||||
|
||||
double scale = 1.0 / m_n;
|
||||
|
||||
for (int i = 0; i < m_n; ++i) {
|
||||
ro[i] = m_kout[i].r * scale;
|
||||
io[i] = m_kout[i].i * scale;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int m_n;
|
||||
kiss_fft_cfg m_planf;
|
||||
kiss_fft_cfg m_plani;
|
||||
kiss_fft_cpx *m_kin;
|
||||
kiss_fft_cpx *m_kout;
|
||||
};
|
||||
|
||||
FFT::FFT(int n) :
|
||||
m_d(new D(n))
|
||||
{
|
||||
}
|
||||
|
||||
FFT::~FFT()
|
||||
{
|
||||
|
||||
delete m_d;
|
||||
}
|
||||
|
||||
FFTReal::FFTReal(unsigned int n) :
|
||||
m_n(n),
|
||||
m_private_real(0)
|
||||
void
|
||||
FFT::process(bool inverse,
|
||||
const double *p_lpRealIn, const double *p_lpImagIn,
|
||||
double *p_lpRealOut, double *p_lpImagOut)
|
||||
{
|
||||
m_d->process(inverse,
|
||||
p_lpRealIn, p_lpImagIn,
|
||||
p_lpRealOut, p_lpImagOut);
|
||||
}
|
||||
|
||||
class FFTReal::D
|
||||
{
|
||||
public:
|
||||
D(int n) : m_n(n) {
|
||||
if (n % 2) {
|
||||
throw std::invalid_argument
|
||||
("nsamples must be even in FFTReal constructor");
|
||||
}
|
||||
m_planf = kiss_fftr_alloc(m_n, 0, NULL, NULL);
|
||||
m_plani = kiss_fftr_alloc(m_n, 1, NULL, NULL);
|
||||
m_c = new kiss_fft_cpx[m_n];
|
||||
}
|
||||
|
||||
~D() {
|
||||
kiss_fftr_free(m_planf);
|
||||
kiss_fftr_free(m_plani);
|
||||
delete[] m_c;
|
||||
}
|
||||
|
||||
void forward(const double *ri, double *ro, double *io) {
|
||||
|
||||
kiss_fftr(m_planf, ri, m_c);
|
||||
|
||||
for (int i = 0; i <= m_n/2; ++i) {
|
||||
ro[i] = m_c[i].r;
|
||||
io[i] = m_c[i].i;
|
||||
}
|
||||
|
||||
for (int i = 0; i + 1 < m_n/2; ++i) {
|
||||
ro[m_n - i - 1] = ro[i + 1];
|
||||
io[m_n - i - 1] = -io[i + 1];
|
||||
}
|
||||
}
|
||||
|
||||
void forwardMagnitude(const double *ri, double *mo) {
|
||||
|
||||
double *io = new double[m_n];
|
||||
|
||||
forward(ri, mo, io);
|
||||
|
||||
for (int i = 0; i < m_n; ++i) {
|
||||
mo[i] = sqrt(mo[i] * mo[i] + io[i] * io[i]);
|
||||
}
|
||||
|
||||
delete[] io;
|
||||
}
|
||||
|
||||
void inverse(const double *ri, const double *ii, double *ro) {
|
||||
|
||||
// kiss_fftr.h says
|
||||
// "input freqdata has nfft/2+1 complex points"
|
||||
|
||||
for (int i = 0; i < m_n/2 + 1; ++i) {
|
||||
m_c[i].r = ri[i];
|
||||
m_c[i].i = ii[i];
|
||||
}
|
||||
|
||||
kiss_fftri(m_plani, m_c, ro);
|
||||
|
||||
double scale = 1.0 / m_n;
|
||||
|
||||
for (int i = 0; i < m_n; ++i) {
|
||||
ro[i] *= scale;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int m_n;
|
||||
kiss_fftr_cfg m_planf;
|
||||
kiss_fftr_cfg m_plani;
|
||||
kiss_fft_cpx *m_c;
|
||||
};
|
||||
|
||||
FFTReal::FFTReal(int n) :
|
||||
m_d(new D(n))
|
||||
{
|
||||
m_private_real = new FFT(m_n);
|
||||
}
|
||||
|
||||
FFTReal::~FFTReal()
|
||||
{
|
||||
delete (FFT *)m_private_real;
|
||||
delete m_d;
|
||||
}
|
||||
|
||||
void
|
||||
FFTReal::process(bool inverse,
|
||||
const double *realIn,
|
||||
double *realOut, double *imagOut)
|
||||
FFTReal::forward(const double *ri, double *ro, double *io)
|
||||
{
|
||||
((FFT *)m_private_real)->process(inverse, realIn, 0, realOut, imagOut);
|
||||
}
|
||||
|
||||
static unsigned int numberOfBitsNeeded(unsigned int p_nSamples)
|
||||
{
|
||||
int i;
|
||||
|
||||
if( p_nSamples < 2 )
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
for ( i=0; ; i++ )
|
||||
{
|
||||
if( p_nSamples & (1 << i) ) return i;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int reverseBits(unsigned int p_nIndex, unsigned int p_nBits)
|
||||
{
|
||||
unsigned int i, rev;
|
||||
|
||||
for(i=rev=0; i < p_nBits; i++)
|
||||
{
|
||||
rev = (rev << 1) | (p_nIndex & 1);
|
||||
p_nIndex >>= 1;
|
||||
}
|
||||
|
||||
return rev;
|
||||
m_d->forward(ri, ro, io);
|
||||
}
|
||||
|
||||
void
|
||||
FFT::process(bool p_bInverseTransform,
|
||||
const double *p_lpRealIn, const double *p_lpImagIn,
|
||||
double *p_lpRealOut, double *p_lpImagOut)
|
||||
FFTReal::forwardMagnitude(const double *ri, double *mo)
|
||||
{
|
||||
if (!p_lpRealIn || !p_lpRealOut || !p_lpImagOut) return;
|
||||
|
||||
// std::cerr << "FFT::process(" << m_n << "," << p_bInverseTransform << ")" << std::endl;
|
||||
|
||||
unsigned int NumBits;
|
||||
unsigned int i, j, k, n;
|
||||
unsigned int BlockSize, BlockEnd;
|
||||
|
||||
double angle_numerator = 2.0 * M_PI;
|
||||
double tr, ti;
|
||||
|
||||
if( !MathUtilities::isPowerOfTwo(m_n) )
|
||||
{
|
||||
std::cerr << "ERROR: FFT::process: Non-power-of-two FFT size "
|
||||
<< m_n << " not supported in this implementation"
|
||||
<< std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
if( p_bInverseTransform ) angle_numerator = -angle_numerator;
|
||||
|
||||
NumBits = numberOfBitsNeeded ( m_n );
|
||||
|
||||
|
||||
for( i=0; i < m_n; i++ )
|
||||
{
|
||||
j = reverseBits ( i, NumBits );
|
||||
p_lpRealOut[j] = p_lpRealIn[i];
|
||||
p_lpImagOut[j] = (p_lpImagIn == 0) ? 0.0 : p_lpImagIn[i];
|
||||
}
|
||||
|
||||
|
||||
BlockEnd = 1;
|
||||
for( BlockSize = 2; BlockSize <= m_n; BlockSize <<= 1 )
|
||||
{
|
||||
double delta_angle = angle_numerator / (double)BlockSize;
|
||||
double sm2 = -sin ( -2 * delta_angle );
|
||||
double sm1 = -sin ( -delta_angle );
|
||||
double cm2 = cos ( -2 * delta_angle );
|
||||
double cm1 = cos ( -delta_angle );
|
||||
double w = 2 * cm1;
|
||||
double ar[3], ai[3];
|
||||
|
||||
for( i=0; i < m_n; i += BlockSize )
|
||||
{
|
||||
|
||||
ar[2] = cm2;
|
||||
ar[1] = cm1;
|
||||
|
||||
ai[2] = sm2;
|
||||
ai[1] = sm1;
|
||||
|
||||
for ( j=i, n=0; n < BlockEnd; j++, n++ )
|
||||
{
|
||||
|
||||
ar[0] = w*ar[1] - ar[2];
|
||||
ar[2] = ar[1];
|
||||
ar[1] = ar[0];
|
||||
|
||||
ai[0] = w*ai[1] - ai[2];
|
||||
ai[2] = ai[1];
|
||||
ai[1] = ai[0];
|
||||
|
||||
k = j + BlockEnd;
|
||||
tr = ar[0]*p_lpRealOut[k] - ai[0]*p_lpImagOut[k];
|
||||
ti = ar[0]*p_lpImagOut[k] + ai[0]*p_lpRealOut[k];
|
||||
|
||||
p_lpRealOut[k] = p_lpRealOut[j] - tr;
|
||||
p_lpImagOut[k] = p_lpImagOut[j] - ti;
|
||||
|
||||
p_lpRealOut[j] += tr;
|
||||
p_lpImagOut[j] += ti;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
BlockEnd = BlockSize;
|
||||
|
||||
}
|
||||
|
||||
|
||||
if( p_bInverseTransform )
|
||||
{
|
||||
double denom = (double)m_n;
|
||||
|
||||
for ( i=0; i < m_n; i++ )
|
||||
{
|
||||
p_lpRealOut[i] /= denom;
|
||||
p_lpImagOut[i] /= denom;
|
||||
}
|
||||
}
|
||||
m_d->forwardMagnitude(ri, mo);
|
||||
}
|
||||
|
||||
void
|
||||
FFTReal::inverse(const double *ri, const double *ii, double *ro)
|
||||
{
|
||||
m_d->inverse(ri, ii, ro);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -9,34 +9,97 @@
|
||||
#ifndef FFT_H
|
||||
#define FFT_H
|
||||
|
||||
class FFT
|
||||
class FFT
|
||||
{
|
||||
public:
|
||||
FFT(unsigned int nsamples);
|
||||
virtual ~FFT();
|
||||
/**
|
||||
* Construct an FFT object to carry out complex-to-complex
|
||||
* transforms of size nsamples. nsamples does not have to be a
|
||||
* power of two.
|
||||
*/
|
||||
FFT(int nsamples);
|
||||
~FFT();
|
||||
|
||||
/**
|
||||
* Carry out a forward or inverse transform (depending on the
|
||||
* value of inverse) of size nsamples, where nsamples is the value
|
||||
* provided to the constructor above.
|
||||
*
|
||||
* realIn and (where present) imagIn should contain nsamples each,
|
||||
* and realOut and imagOut should point to enough space to receive
|
||||
* nsamples each.
|
||||
*
|
||||
* imagIn may be NULL if the signal is real, but the other
|
||||
* pointers must be valid.
|
||||
*
|
||||
* The inverse transform is scaled by 1/nsamples.
|
||||
*/
|
||||
void process(bool inverse,
|
||||
const double *realIn, const double *imagIn,
|
||||
double *realOut, double *imagOut);
|
||||
|
||||
|
||||
private:
|
||||
unsigned int m_n;
|
||||
void *m_private;
|
||||
class D;
|
||||
D *m_d;
|
||||
};
|
||||
|
||||
class FFTReal
|
||||
{
|
||||
public:
|
||||
FFTReal(unsigned int nsamples);
|
||||
/**
|
||||
* Construct an FFT object to carry out real-to-complex transforms
|
||||
* of size nsamples. nsamples does not have to be a power of two,
|
||||
* but it does have to be even. (Use the complex-complex FFT above
|
||||
* if you need an odd FFT size. This constructor will throw
|
||||
* std::invalid_argument if nsamples is odd.)
|
||||
*/
|
||||
FFTReal(int nsamples);
|
||||
~FFTReal();
|
||||
|
||||
void process(bool inverse,
|
||||
const double *realIn,
|
||||
/**
|
||||
* Carry out a forward real-to-complex transform of size nsamples,
|
||||
* where nsamples is the value provided to the constructor above.
|
||||
*
|
||||
* realIn, realOut, and imagOut must point to (enough space for)
|
||||
* nsamples values. For consistency with the FFT class above, and
|
||||
* compatibility with existing code, the conjugate half of the
|
||||
* output is returned even though it is redundant.
|
||||
*/
|
||||
void forward(const double *realIn,
|
||||
double *realOut, double *imagOut);
|
||||
|
||||
/**
|
||||
* Carry out a forward real-to-complex transform of size nsamples,
|
||||
* where nsamples is the value provided to the constructor
|
||||
* above. Return only the magnitudes of the complex output values.
|
||||
*
|
||||
* realIn and magOut must point to (enough space for) nsamples
|
||||
* values. For consistency with the FFT class above, and
|
||||
* compatibility with existing code, the conjugate half of the
|
||||
* output is returned even though it is redundant.
|
||||
*/
|
||||
void forwardMagnitude(const double *realIn, double *magOut);
|
||||
|
||||
/**
|
||||
* Carry out an inverse real transform (i.e. complex-to-real) of
|
||||
* size nsamples, where nsamples is the value provided to the
|
||||
* constructor above.
|
||||
*
|
||||
* realIn and imagIn should point to at least nsamples/2+1 values;
|
||||
* if more are provided, only the first nsamples/2+1 values of
|
||||
* each will be used (the conjugate half will always be deduced
|
||||
* from the first nsamples/2+1 rather than being read from the
|
||||
* input data). realOut should point to enough space to receive
|
||||
* nsamples values.
|
||||
*
|
||||
* The inverse transform is scaled by 1/nsamples.
|
||||
*/
|
||||
void inverse(const double *realIn, const double *imagIn,
|
||||
double *realOut);
|
||||
|
||||
private:
|
||||
unsigned int m_n;
|
||||
void *m_private_real;
|
||||
};
|
||||
class D;
|
||||
D *m_d;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -13,10 +13,6 @@
|
||||
COPYING included with this distribution for more information.
|
||||
*/
|
||||
|
||||
#ifdef COMPILER_MSVC
|
||||
#pragma warning(disable:4305)
|
||||
#endif
|
||||
|
||||
#include "Wavelet.h"
|
||||
|
||||
#include <cassert>
|
||||
@@ -81,11 +77,11 @@ Wavelet::createDecompositionFilters(Type wavelet,
|
||||
lpd.clear();
|
||||
hpd.clear();
|
||||
|
||||
unsigned int flength = 0;
|
||||
|
||||
int flength = 0;
|
||||
|
||||
switch (wavelet) {
|
||||
|
||||
case Haar:
|
||||
case Haar:
|
||||
lpd.push_back(0.70710678118655);
|
||||
lpd.push_back(0.70710678118655);
|
||||
hpd.push_back(-0.70710678118655);
|
||||
@@ -103,7 +99,7 @@ Wavelet::createDecompositionFilters(Type wavelet,
|
||||
hpd.push_back(-0.22414386804186);
|
||||
hpd.push_back(-0.12940952255092);
|
||||
flength = 4;
|
||||
break;
|
||||
break;
|
||||
|
||||
case Daubechies_3:
|
||||
lpd.push_back(0.03522629188210);
|
||||
@@ -592,7 +588,7 @@ Wavelet::createDecompositionFilters(Type wavelet,
|
||||
hpd.push_back(-0.00000000000000);
|
||||
flength = 80;
|
||||
break;
|
||||
|
||||
|
||||
case Symlet_2:
|
||||
lpd.push_back(-0.12940952255092);
|
||||
lpd.push_back(0.22414386804186);
|
||||
@@ -692,7 +688,7 @@ Wavelet::createDecompositionFilters(Type wavelet,
|
||||
hpd.push_back(0.01540410932703);
|
||||
flength = 12;
|
||||
break;
|
||||
|
||||
|
||||
case Symlet_7:
|
||||
lpd.push_back(0.00268181456826);
|
||||
lpd.push_back(-0.00104738488868);
|
||||
|
||||
Reference in New Issue
Block a user