update qm-dsp library

This commit is contained in:
Robin Gareus
2016-10-06 00:16:44 +02:00
parent 2a27cc4758
commit f68d2e06bc
100 changed files with 58968 additions and 55091 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -27,14 +27,14 @@ Chromagram::Chromagram( ChromaConfig Config ) :
}
int Chromagram::initialise( ChromaConfig Config )
{
{
m_FMin = Config.min; // min freq
m_FMax = Config.max; // max freq
m_BPO = Config.BPO; // bins per octave
m_normalise = Config.normalise; // if frame normalisation is required
// No. of constant Q bins
m_uK = ( unsigned int ) ceil( m_BPO * log(m_FMax/m_FMin)/log(2.0));
m_uK = ( unsigned int ) ceil( m_BPO * log(m_FMax/m_FMin)/log(2.0));
// Create array for chroma result
m_chromadata = new double[ m_BPO ];
@@ -49,7 +49,7 @@ int Chromagram::initialise( ChromaConfig Config )
ConstantQConfig.max = m_FMax;
ConstantQConfig.BPO = m_BPO;
ConstantQConfig.CQThresh = Config.CQThresh;
// Initialise ConstantQ operator
m_ConstantQ = new ConstantQ( ConstantQConfig );
@@ -57,7 +57,7 @@ int Chromagram::initialise( ChromaConfig Config )
m_frameSize = m_ConstantQ->getfftlength();
m_hopSize = m_ConstantQ->gethop();
// Initialise FFT object
// Initialise FFT object
m_FFT = new FFTReal(m_frameSize);
m_FFTRe = new double[ m_frameSize ];
@@ -124,7 +124,7 @@ void Chromagram::unityNormalise(double *src)
double* Chromagram::process( const double *data )
{
if (!m_skGenerated) {
// Generate CQ Kernel
// Generate CQ Kernel
m_ConstantQ->sparsekernel();
m_skGenerated = true;
}
@@ -139,8 +139,7 @@ double* Chromagram::process( const double *data )
}
m_window->cut(m_windowbuf);
// FFT of current frame
m_FFT->process(false, m_windowbuf, m_FFTRe, m_FFTIm);
m_FFT->forward(m_windowbuf, m_FFTRe, m_FFTIm);
return process(m_FFTRe, m_FFTIm);
}
@@ -148,7 +147,7 @@ double* Chromagram::process( const double *data )
double* Chromagram::process( const double *real, const double *imag )
{
if (!m_skGenerated) {
// Generate CQ Kernel
// Generate CQ Kernel
m_ConstantQ->sparsekernel();
m_skGenerated = true;
}
@@ -158,16 +157,15 @@ double* Chromagram::process( const double *real, const double *imag )
double cmax = 0.0;
double cval = 0;
// Calculate ConstantQ frame
m_ConstantQ->process( real, imag, m_CQRe, m_CQIm );
// add each octave of cq data into Chromagram
const unsigned octaves = (int)floor(double( m_uK/m_BPO))-1;
for (unsigned octave = 0; octave <= octaves; octave++)
for (unsigned octave = 0; octave <= octaves; octave++)
{
unsigned firstBin = octave*m_BPO;
for (unsigned i = 0; i < m_BPO; i++)
for (unsigned i = 0; i < m_BPO; i++)
{
m_chromadata[i] += kabs( m_CQRe[ firstBin + i ], m_CQIm[ firstBin + i ]);
}

View File

@@ -29,20 +29,20 @@ struct ChromaConfig{
MathUtilities::NormaliseType normalise;
};
class Chromagram
class Chromagram
{
public:
public:
Chromagram( ChromaConfig Config );
~Chromagram();
double* process( const double *data ); // time domain
double* process( const double *real, const double *imag ); // frequency domain
void unityNormalise( double* src );
// Complex arithmetic
double kabs( double real, double imag );
// Results
unsigned int getK() { return m_uK;}
unsigned int getFrameSize() { return m_frameSize; }
@@ -54,7 +54,7 @@ private:
Window<double> *m_window;
double *m_windowbuf;
double* m_chromadata;
double m_FMin;
double m_FMax;

View File

@@ -96,7 +96,7 @@ void ConstantQ::sparsekernel()
double* transfHammingWindowRe = new double [ m_FFTLength ];
double* transfHammingWindowIm = new double [ m_FFTLength ];
for (unsigned u=0; u < m_FFTLength; u++)
for (unsigned u=0; u < m_FFTLength; u++)
{
hammingWindowRe[u] = 0;
hammingWindowIm[u] = 0;
@@ -109,28 +109,28 @@ void ConstantQ::sparsekernel()
sk->js.reserve( m_FFTLength*2 );
sk->real.reserve( m_FFTLength*2 );
sk->imag.reserve( m_FFTLength*2 );
// for each bin value K, calculate temporal kernel, take its fft to
//calculate the spectral kernel then threshold it to make it sparse and
//calculate the spectral kernel then threshold it to make it sparse and
//add it to the sparse kernels matrix
double squareThreshold = m_CQThresh * m_CQThresh;
FFT m_FFT(m_FFTLength);
for (unsigned k = m_uK; k--; )
for (unsigned k = m_uK; k--; )
{
for (unsigned u=0; u < m_FFTLength; u++)
for (unsigned u=0; u < m_FFTLength; u++)
{
hammingWindowRe[u] = 0;
hammingWindowIm[u] = 0;
}
// Computing a hamming window
const unsigned hammingLength = (int) ceil( m_dQ * m_FS / ( m_FMin * pow(2,((double)(k))/(double)m_BPO)));
unsigned origin = m_FFTLength/2 - hammingLength/2;
for (unsigned i=0; i<hammingLength; i++)
for (unsigned i=0; i<hammingLength; i++)
{
const double angle = 2*PI*m_dQ*i/hammingLength;
const double real = cos(angle);
@@ -148,17 +148,17 @@ void ConstantQ::sparsekernel()
hammingWindowIm[i] = hammingWindowIm[i + m_FFTLength/2];
hammingWindowIm[i + m_FFTLength/2] = temp;
}
//do fft of hammingWindow
m_FFT.process( 0, hammingWindowRe, hammingWindowIm, transfHammingWindowRe, transfHammingWindowIm );
for (unsigned j=0; j<( m_FFTLength ); j++)
for (unsigned j=0; j<( m_FFTLength ); j++)
{
// perform thresholding
const double squaredBin = squaredModule( transfHammingWindowRe[ j ], transfHammingWindowIm[ j ]);
if (squaredBin <= squareThreshold) continue;
// Insert non-zero position indexes, doubled because they are floats
sk->is.push_back(j);
sk->js.push_back(k);
@@ -241,7 +241,7 @@ void ConstantQ::sparsekernel()
cout << "}" << endl;
*/
// std::cerr << "done\n -> is: " << sk->is.size() << ", js: " << sk->js.size() << ", reals: " << sk->real.size() << ", imags: " << sk->imag.size() << std::endl;
m_sparseKernel = sk;
return;
}
@@ -256,7 +256,7 @@ double* ConstantQ::process( const double* fftdata )
SparseKernel *sk = m_sparseKernel;
for (unsigned row=0; row<2*m_uK; row++)
for (unsigned row=0; row<2*m_uK; row++)
{
m_CQdata[ row ] = 0;
m_CQdata[ row+1 ] = 0;
@@ -266,7 +266,7 @@ double* ConstantQ::process( const double* fftdata )
const double *real = &(sk->real[0]);
const double *imag = &(sk->imag[0]);
const unsigned int sparseCells = sk->real.size();
for (unsigned i = 0; i<sparseCells; i++)
{
const unsigned row = cqbin[i];
@@ -324,7 +324,7 @@ void ConstantQ::process(const double *FFTRe, const double* FFTIm,
SparseKernel *sk = m_sparseKernel;
for (unsigned row=0; row<m_uK; row++)
for (unsigned row=0; row<m_uK; row++)
{
CQRe[ row ] = 0;
CQIm[ row ] = 0;
@@ -335,7 +335,7 @@ void ConstantQ::process(const double *FFTRe, const double* FFTIm,
const double *real = &(sk->real[0]);
const double *imag = &(sk->imag[0]);
const unsigned int sparseCells = sk->real.size();
for (unsigned i = 0; i<sparseCells; i++)
{
const unsigned row = cqbin[i];

View File

@@ -29,7 +29,7 @@ struct CQConfig{
};
class ConstantQ {
//public functions incl. sparsekernel so can keep out of loop in main
public:
void process( const double* FFTRe, const double* FFTIm,
@@ -46,7 +46,7 @@ public:
double out = 0.54 - 0.46*cos(2*PI*n/len);
return(out);
}
int getnumwin() { return m_numWin;}
double getQ() { return m_dQ;}
int getK() {return m_uK ;}
@@ -56,7 +56,7 @@ public:
private:
void initialise( CQConfig Config );
void deInitialise();
double* m_CQdata;
unsigned int m_FS;
double m_FMin;

View File

@@ -1,5 +1,14 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
Copyright (c) 2005 Centre for Digital Music ( C4DM )
Queen Mary Univesrity of London
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
// GetKeyMode.cpp: implementation of the CGetKeyMode class.
//
//////////////////////////////////////////////////////////////////////

View File

@@ -1,17 +1,12 @@
/*
* Author: c.landone
* Description:
*
* Syntax: C++
*
* Copyright (c) 2005 Centre for Digital Music ( C4DM )
* Queen Mary Univesrity of London
*
*
* This program is not free software; you cannot redistribute it
* without the explicit authorization from the centre for digital music,
* queen mary university of london
*
Copyright (c) 2005 Centre for Digital Music ( C4DM )
Queen Mary Univesrity of London
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#ifndef GETKEYMODE_H

View File

@@ -27,75 +27,75 @@ MFCC::MFCC(MFCCConfig config)
/* Calculate at startup */
double *freqs, *lower, *center, *upper, *triangleHeight, *fftFreqs;
lowestFrequency = 66.6666666;
linearFilters = 13;
linearSpacing = 66.66666666;
logFilters = 27;
logSpacing = 1.0711703;
/* FFT and analysis window sizes */
fftSize = config.fftsize;
fft = new FFTReal(fftSize);
totalFilters = linearFilters + logFilters;
logPower = config.logpower;
samplingRate = config.FS;
/* The number of cepstral componenents */
nceps = config.nceps;
/* Set if user want C0 */
WANT_C0 = (config.want_c0 ? 1 : 0);
/* Allocate space for feature vector */
if (WANT_C0 == 1) {
ceps = (double*)calloc(nceps+1, sizeof(double));
} else {
ceps = (double*)calloc(nceps, sizeof(double));
}
/* Allocate space for local vectors */
mfccDCTMatrix = (double**)calloc(nceps+1, sizeof(double*));
for (i = 0; i < nceps+1; i++) {
mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
mfccDCTMatrix[i]= (double*)calloc(totalFilters, sizeof(double));
}
mfccFilterWeights = (double**)calloc(totalFilters, sizeof(double*));
for (i = 0; i < totalFilters; i++) {
mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
mfccFilterWeights[i] = (double*)calloc(fftSize, sizeof(double));
}
freqs = (double*)calloc(totalFilters+2,sizeof(double));
lower = (double*)calloc(totalFilters,sizeof(double));
center = (double*)calloc(totalFilters,sizeof(double));
upper = (double*)calloc(totalFilters,sizeof(double));
triangleHeight = (double*)calloc(totalFilters,sizeof(double));
fftFreqs = (double*)calloc(fftSize,sizeof(double));
for (i = 0; i < linearFilters; i++) {
freqs[i] = lowestFrequency + ((double)i) * linearSpacing;
}
for (i = linearFilters; i < totalFilters+2; i++) {
freqs[i] = freqs[linearFilters-1] *
freqs[i] = freqs[linearFilters-1] *
pow(logSpacing, (double)(i-linearFilters+1));
}
/* Define lower, center and upper */
memcpy(lower, freqs,totalFilters*sizeof(double));
memcpy(center, &freqs[1],totalFilters*sizeof(double));
memcpy(upper, &freqs[2],totalFilters*sizeof(double));
for (i=0;i<totalFilters;i++){
triangleHeight[i] = 2./(upper[i]-lower[i]);
}
for (i=0;i<fftSize;i++){
fftFreqs[i] = ((double) i / ((double) fftSize ) *
fftFreqs[i] = ((double) i / ((double) fftSize ) *
(double) samplingRate);
}
@@ -103,12 +103,12 @@ MFCC::MFCC(MFCCConfig config)
for (i=0;i<totalFilters;i++){
for (j=0;j<fftSize;j++) {
if ((fftFreqs[j] > lower[i]) && (fftFreqs[j] <= center[i])) {
mfccFilterWeights[i][j] = triangleHeight[i] *
(fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
mfccFilterWeights[i][j] = triangleHeight[i] *
(fftFreqs[j]-lower[i]) / (center[i]-lower[i]);
}
else
{
@@ -118,7 +118,7 @@ MFCC::MFCC(MFCCConfig config)
if ((fftFreqs[j]>center[i]) && (fftFreqs[j]<upper[i])) {
mfccFilterWeights[i][j] = mfccFilterWeights[i][j]
+ triangleHeight[i] * (upper[i]-fftFreqs[j])
+ triangleHeight[i] * (upper[i]-fftFreqs[j])
/ (upper[i]-center[i]);
}
else
@@ -130,15 +130,15 @@ MFCC::MFCC(MFCCConfig config)
}
/*
* We calculate now mfccDCT matrix
* We calculate now mfccDCT matrix
* NB: +1 because of the DC component
*/
const double pi = 3.14159265358979323846264338327950288;
for (i = 0; i < nceps+1; i++) {
for (j = 0; j < totalFilters; j++) {
mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
mfccDCTMatrix[i][j] = (1./sqrt((double) totalFilters / 2.))
* cos((double) i * ((double) j + 0.5) / (double) totalFilters * pi);
}
}
@@ -146,7 +146,7 @@ MFCC::MFCC(MFCCConfig config)
for (j = 0; j < totalFilters; j++){
mfccDCTMatrix[0][j] = (sqrt(2.)/2.) * mfccDCTMatrix[0][j];
}
/* The analysis window */
window = new Window<double>(config.window, fftSize);
@@ -156,7 +156,7 @@ MFCC::MFCC(MFCCConfig config)
earMag = (double*)calloc(totalFilters, sizeof(double));
fftMag = (double*)calloc(fftSize/2, sizeof(double));
free(freqs);
free(lower);
free(center);
@@ -168,27 +168,27 @@ MFCC::MFCC(MFCCConfig config)
MFCC::~MFCC()
{
int i;
/* Free the structure */
for (i = 0; i < nceps+1; i++) {
free(mfccDCTMatrix[i]);
}
free(mfccDCTMatrix);
for (i = 0; i < totalFilters; i++) {
free(mfccFilterWeights[i]);
}
free(mfccFilterWeights);
/* Free the feature vector */
free(ceps);
/* The analysis window */
delete window;
free(earMag);
free(fftMag);
/* Free the FFT */
free(realOut);
free(imagOut);
@@ -198,19 +198,19 @@ MFCC::~MFCC()
/*
*
* Extract the MFCC on the input frame
*
*/
*
* Extract the MFCC on the input frame
*
*/
int MFCC::process(const double *inframe, double *outceps)
{
double *inputData = (double *)malloc(fftSize * sizeof(double));
for (int i = 0; i < fftSize; ++i) inputData[i] = inframe[i];
window->cut(inputData);
/* Calculate the fft on the input frame */
fft->process(0, inputData, realOut, imagOut);
fft->forward(inputData, realOut, imagOut);
free(inputData);
@@ -244,14 +244,14 @@ int MFCC::process(const double *real, const double *imag, double *outceps)
}
/*
*
* Calculate now the cepstral coefficients
*
* Calculate now the cepstral coefficients
* with or without the DC component
*
*/
if (WANT_C0 == 1) {
for (i = 0; i < nceps+1; i++) {
double tmp = 0.;
for (j = 0; j < totalFilters; j++){
@@ -260,8 +260,8 @@ int MFCC::process(const double *real, const double *imag, double *outceps)
outceps[i] = tmp;
}
}
else
{
else
{
for (i = 1; i < nceps+1; i++) {
double tmp = 0.;
for (j = 0; j < totalFilters; j++){
@@ -270,7 +270,7 @@ int MFCC::process(const double *real, const double *imag, double *outceps)
outceps[i-1] = tmp;
}
}
return nceps;
}

View File

@@ -57,31 +57,31 @@ public:
private:
/* Filter bank parameters */
double lowestFrequency;
int linearFilters;
double lowestFrequency;
int linearFilters;
double linearSpacing;
int logFilters;
double logSpacing;
/* FFT length */
int fftSize;
int totalFilters;
double logPower;
/* Misc. */
int samplingRate;
int nceps;
/* MFCC vector */
double *ceps;
double **mfccDCTMatrix;
double **mfccFilterWeights;
/* The analysis window */
Window<double> *window;
/* For the FFT */
double *realOut;
double *imagOut;

View File

@@ -40,10 +40,11 @@ DetectionFunction::~DetectionFunction()
void DetectionFunction::initialise( DFConfig Config )
{
m_dataLength = Config.frameLength;
m_halfLength = m_dataLength/2;
m_halfLength = m_dataLength/2 + 1;
m_DFType = Config.DFType;
m_stepSize = Config.stepSize;
m_dbRise = Config.dbRise;
m_whiten = Config.adaptiveWhitening;
m_whitenRelaxCoeff = Config.whiteningRelaxCoeff;
@@ -53,7 +54,7 @@ void DetectionFunction::initialise( DFConfig Config )
m_magHistory = new double[ m_halfLength ];
memset(m_magHistory,0, m_halfLength*sizeof(double));
m_phaseHistory = new double[ m_halfLength ];
memset(m_phaseHistory,0, m_halfLength*sizeof(double));
@@ -63,15 +64,14 @@ void DetectionFunction::initialise( DFConfig Config )
m_magPeaks = new double[ m_halfLength ];
memset(m_magPeaks,0, m_halfLength*sizeof(double));
// See note in process(const double *) below
int actualLength = MathUtilities::previousPowerOfTwo(m_dataLength);
m_phaseVoc = new PhaseVocoder(actualLength);
m_phaseVoc = new PhaseVocoder(m_dataLength, m_stepSize);
m_DFWindowedFrame = new double[ m_dataLength ];
m_magnitude = new double[ m_halfLength ];
m_thetaAngle = new double[ m_halfLength ];
m_unwrapped = new double[ m_halfLength ];
m_window = new Window<double>(HanningWindow, m_dataLength);
m_windowed = new double[ m_dataLength ];
}
void DetectionFunction::deInitialise()
@@ -83,47 +83,31 @@ void DetectionFunction::deInitialise()
delete m_phaseVoc;
delete [] m_DFWindowedFrame;
delete [] m_magnitude;
delete [] m_thetaAngle;
delete [] m_windowed;
delete [] m_unwrapped;
delete m_window;
}
double DetectionFunction::process( const double *TDomain )
double DetectionFunction::processTimeDomain(const double *samples)
{
m_window->cut( TDomain, m_DFWindowedFrame );
m_window->cut(samples, m_windowed);
// Our own FFT implementation supports power-of-two sizes only.
// If we have to use this implementation (as opposed to the
// version of process() below that operates on frequency domain
// data directly), we will have to use the next smallest power of
// two from the block size. Results may vary accordingly!
unsigned int actualLength = MathUtilities::previousPowerOfTwo(m_dataLength);
if (actualLength != m_dataLength) {
// Pre-fill mag and phase vectors with zero, as the FFT output
// will not fill the arrays
for (unsigned int i = actualLength/2; i < m_dataLength/2; ++i) {
m_magnitude[i] = 0;
m_thetaAngle[0] = 0;
}
}
m_phaseVoc->process(m_DFWindowedFrame, m_magnitude, m_thetaAngle);
m_phaseVoc->processTimeDomain(m_windowed,
m_magnitude, m_thetaAngle, m_unwrapped);
if (m_whiten) whiten();
return runDF();
}
double DetectionFunction::process( const double *magnitudes, const double *phases )
double DetectionFunction::processFrequencyDomain(const double *reals,
const double *imags)
{
for (size_t i = 0; i < m_halfLength; ++i) {
m_magnitude[i] = magnitudes[i];
m_thetaAngle[i] = phases[i];
}
m_phaseVoc->processFrequencyDomain(reals, imags,
m_magnitude, m_thetaAngle, m_unwrapped);
if (m_whiten) whiten();
@@ -152,15 +136,19 @@ double DetectionFunction::runDF()
case DF_HFC:
retVal = HFC( m_halfLength, m_magnitude);
break;
case DF_SPECDIFF:
retVal = specDiff( m_halfLength, m_magnitude);
break;
case DF_PHASEDEV:
// Using the instantaneous phases here actually provides the
// same results (for these calculations) as if we had used
// unwrapped phases, but without the possible accumulation of
// phase error over time
retVal = phaseDev( m_halfLength, m_thetaAngle);
break;
case DF_COMPLEXSD:
retVal = complexSD( m_halfLength, m_magnitude, m_thetaAngle);
break;
@@ -169,7 +157,7 @@ double DetectionFunction::runDF()
retVal = broadband( m_halfLength, m_magnitude);
break;
}
return retVal;
}
@@ -195,7 +183,7 @@ double DetectionFunction::specDiff(unsigned int length, double *src)
for( i = 0; i < length; i++)
{
temp = fabs( (src[ i ] * src[ i ]) - (m_magHistory[ i ] * m_magHistory[ i ]) );
diff= sqrt(temp);
// (See note in phaseDev below.)
@@ -230,15 +218,14 @@ double DetectionFunction::phaseDev(unsigned int length, double *srcPhase)
// does significantly damage its ability to work with quieter
// music, so I'm removing it and counting the result always.
// Same goes for the spectral difference measure above.
tmpVal = fabs(dev);
val += tmpVal ;
m_phaseHistoryOld[ i ] = m_phaseHistory[ i ] ;
m_phaseHistory[ i ] = srcPhase[ i ];
}
return val;
}
@@ -250,7 +237,7 @@ double DetectionFunction::complexSD(unsigned int length, double *srcMagnitude, d
double tmpPhase = 0;
double tmpReal = 0;
double tmpImag = 0;
double dev = 0;
ComplexData meas = ComplexData( 0, 0 );
ComplexData j = ComplexData( 0, 1 );
@@ -259,14 +246,14 @@ double DetectionFunction::complexSD(unsigned int length, double *srcMagnitude, d
{
tmpPhase = (srcPhase[ i ]- 2*m_phaseHistory[ i ]+m_phaseHistoryOld[ i ]);
dev= MathUtilities::princarg( tmpPhase );
meas = m_magHistory[i] - ( srcMagnitude[ i ] * exp( j * dev) );
tmpReal = real( meas );
tmpImag = imag( meas );
val += sqrt( (tmpReal * tmpReal) + (tmpImag * tmpImag) );
m_phaseHistoryOld[ i ] = m_phaseHistory[ i ] ;
m_phaseHistory[ i ] = srcPhase[ i ];
m_magHistory[ i ] = srcMagnitude[ i ];
@@ -287,7 +274,7 @@ double DetectionFunction::broadband(unsigned int length, double *src)
m_magHistory[i] = sqrmag;
}
return val;
}
}
double* DetectionFunction::getSpectrumMagnitude()
{

View File

@@ -29,7 +29,7 @@
struct DFConfig{
unsigned int stepSize; // DF step in samples
unsigned int frameLength; // DF analysis window - usually 2*step
unsigned int frameLength; // DF analysis window - usually 2*step. Must be even!
int DFType; // type of detection function ( see defines )
double dbRise; // only used for broadband df (and required for it)
bool adaptiveWhitening; // perform adaptive whitening
@@ -37,14 +37,24 @@ struct DFConfig{
double whiteningFloor; // if < 0, a sensible default will be used
};
class DetectionFunction
class DetectionFunction
{
public:
double* getSpectrumMagnitude();
DetectionFunction( DFConfig Config );
virtual ~DetectionFunction();
double process( const double* TDomain );
double process( const double* magnitudes, const double* phases );
/**
* Process a single time-domain frame of audio, provided as
* frameLength samples.
*/
double processTimeDomain(const double* samples);
/**
* Process a single frequency-domain frame, provided as
* frameLength/2+1 real and imaginary component values.
*/
double processFrequencyDomain(const double* reals, const double* imags);
private:
void whiten();
@@ -55,7 +65,7 @@ private:
double phaseDev(unsigned int length, double *srcPhase);
double complexSD(unsigned int length, double *srcMagnitude, double *srcPhase);
double broadband(unsigned int length, double *srcMagnitude);
private:
void initialise( DFConfig Config );
void deInitialise();
@@ -74,12 +84,13 @@ private:
double* m_phaseHistoryOld;
double* m_magPeaks;
double* m_DFWindowedFrame; // Array for windowed analysis frame
double* m_windowed; // Array for windowed analysis frame
double* m_magnitude; // Magnitude of analysis frame ( frequency domain )
double* m_thetaAngle;// Phase of analysis frame ( frequency domain )
double* m_unwrapped; // Unwrapped phase of analysis frame
Window<double> *m_window;
PhaseVocoder* m_phaseVoc; // Phase Vocoder
};
#endif
#endif

View File

@@ -6,11 +6,19 @@
Centre for Digital Music, Queen Mary, University of London.
This file 2005-2006 Christian Landone.
Modifications:
- delta threshold
Description: add delta threshold used as offset in the smoothed
detection function
Author: Mathieu Barthet
Date: June 2010
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
COPYING included with this distribution for more information.
*/
#include "PeakPicking.h"
@@ -41,15 +49,16 @@ void PeakPicking::initialise( PPickParams Config )
Qfilta = Config.QuadThresh.a ;
Qfiltb = Config.QuadThresh.b ;
Qfiltc = Config.QuadThresh.c ;
m_DFProcessingParams.length = m_DFLength;
m_DFProcessingParams.LPOrd = Config.LPOrd;
m_DFProcessingParams.LPACoeffs = Config.LPACoeffs;
m_DFProcessingParams.LPBCoeffs = Config.LPBCoeffs;
m_DFProcessingParams.length = m_DFLength;
m_DFProcessingParams.LPOrd = Config.LPOrd;
m_DFProcessingParams.LPACoeffs = Config.LPACoeffs;
m_DFProcessingParams.LPBCoeffs = Config.LPBCoeffs;
m_DFProcessingParams.winPre = Config.WinT.pre;
m_DFProcessingParams.winPost = Config.WinT.post;
m_DFProcessingParams.winPost = Config.WinT.post;
m_DFProcessingParams.AlphaNormParam = Config.alpha;
m_DFProcessingParams.isMedianPositive = false;
m_DFProcessingParams.delta = Config.delta; //add the delta threshold as an adjustable parameter
m_DFSmoothing = new DFProcess( m_DFProcessingParams );
@@ -68,19 +77,19 @@ void PeakPicking::process( double* src, unsigned int len, vector<int> &onsets )
{
if (len < 4) return;
vector <double> m_maxima;
vector <double> m_maxima;
// Signal conditioning
// Signal conditioning
m_DFSmoothing->process( src, m_workBuffer );
for( unsigned int u = 0; u < len; u++)
{
m_maxima.push_back( m_workBuffer[ u ] );
m_maxima.push_back( m_workBuffer[ u ] );
}
quadEval( m_maxima, onsets );
for(unsigned int b = 0; b < m_maxima.size(); b++)
for( int b = 0; b < (int)m_maxima.size(); b++)
{
src[ b ] = m_maxima[ b ];
}
@@ -92,7 +101,7 @@ int PeakPicking::quadEval( vector<double> &src, vector<int> &idx )
vector <int> m_maxIndex;
vector <int> m_onsetPosition;
vector <double> m_maxFit;
vector <double> m_poly;
vector <double> m_err;
@@ -123,7 +132,7 @@ int PeakPicking::quadEval( vector<double> &src, vector<int> &idx )
for (int k = -2; k <= 2; ++k)
{
selMax = src[ m_maxIndex[j] + k ] ;
m_maxFit.push_back(selMax);
m_maxFit.push_back(selMax);
}
TPolyFit::PolyFit2(m_err, m_maxFit, m_poly);
@@ -135,7 +144,7 @@ int PeakPicking::quadEval( vector<double> &src, vector<int> &idx )
{
idx.push_back(m_maxIndex[j]);
}
m_maxFit.clear();
}

View File

@@ -6,6 +6,14 @@
Centre for Digital Music, Queen Mary, University of London.
This file 2005-2006 Christian Landone.
Modifications:
- delta threshold
Description: add delta threshold used as offset in the smoothed
detection function
Author: Mathieu Barthet
Date: June 2010
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
@@ -29,6 +37,12 @@ struct PPWinThresh
{
unsigned int pre;
unsigned int post;
PPWinThresh(unsigned int x, unsigned int y) :
pre(x),
post(y)
{
}
};
struct QFitThresh
@@ -36,12 +50,19 @@ struct QFitThresh
double a;
double b;
double c;
QFitThresh(double x, double y, double z) :
a(x),
b(y),
c(z)
{
}
};
struct PPickParams
{
unsigned int length; //Detection FunctionLength
double tau; // time resolution of the detection function:
double tau; // time resolution of the detection function
unsigned int alpha; //alpha-norm parameter
double cutoff;//low-pass Filter cutoff freq
unsigned int LPOrd; // low-pass Filter order
@@ -49,14 +70,29 @@ struct PPickParams
double* LPBCoeffs; //low pass Filter num coefficients
PPWinThresh WinT;//window size in frames for adaptive thresholding [pre post]:
QFitThresh QuadThresh;
float delta; //delta threshold used as an offset when computing the smoothed detection function
PPickParams() :
length(0),
tau(0),
alpha(0),
cutoff(0),
LPOrd(0),
LPACoeffs(NULL),
LPBCoeffs(NULL),
WinT(0,0),
QuadThresh(0,0,0),
delta(0)
{
}
};
class PeakPicking
class PeakPicking
{
public:
PeakPicking( PPickParams Config );
virtual ~PeakPicking();
void process( double* src, unsigned int len, vector<int> &onsets );
@@ -64,7 +100,7 @@ private:
void initialise( PPickParams Config );
void deInitialise();
int quadEval( vector<double> &src, vector<int> &idx );
DFProcConfig m_DFProcessingParams;
unsigned int m_DFLength ;
@@ -74,7 +110,7 @@ private:
double* m_workBuffer;
DFProcess* m_DFSmoothing;
};

View File

@@ -4,7 +4,7 @@
QM DSP Library
Centre for Digital Music, Queen Mary, University of London.
This file 2005-2006 Christian Landone.
This file 2005-2006 Christian Landone, copyright 2013 QMUL.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
@@ -15,30 +15,47 @@
#include "PhaseVocoder.h"
#include "dsp/transforms/FFT.h"
#include "maths/MathUtilities.h"
#include <math.h>
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
#include <cassert>
PhaseVocoder::PhaseVocoder(unsigned int n) :
m_n(n)
#include <iostream>
using std::cerr;
using std::endl;
PhaseVocoder::PhaseVocoder(int n, int hop) :
m_n(n),
m_hop(hop)
{
m_fft = new FFTReal(m_n);
m_realOut = new double[m_n];
m_imagOut = new double[m_n];
m_time = new double[m_n];
m_real = new double[m_n];
m_imag = new double[m_n];
m_phase = new double[m_n/2 + 1];
m_unwrapped = new double[m_n/2 + 1];
for (int i = 0; i < m_n/2 + 1; ++i) {
m_phase[i] = 0.0;
m_unwrapped[i] = 0.0;
}
reset();
}
PhaseVocoder::~PhaseVocoder()
{
delete [] m_realOut;
delete [] m_imagOut;
delete[] m_unwrapped;
delete[] m_phase;
delete[] m_real;
delete[] m_imag;
delete[] m_time;
delete m_fft;
}
void PhaseVocoder::FFTShift(unsigned int size, double *src)
void PhaseVocoder::FFTShift(double *src)
{
const int hs = size/2;
const int hs = m_n/2;
for (int i = 0; i < hs; ++i) {
double tmp = src[i];
src[i] = src[i + hs];
@@ -46,34 +63,73 @@ void PhaseVocoder::FFTShift(unsigned int size, double *src)
}
}
void PhaseVocoder::process(double *src, double *mag, double *theta)
void PhaseVocoder::processTimeDomain(const double *src,
double *mag, double *theta,
double *unwrapped)
{
FFTShift( m_n, src);
m_fft->process(0, src, m_realOut, m_imagOut);
getMagnitude( m_n/2, mag, m_realOut, m_imagOut);
getPhase( m_n/2, theta, m_realOut, m_imagOut);
for (int i = 0; i < m_n; ++i) {
m_time[i] = src[i];
}
FFTShift(m_time);
m_fft->forward(m_time, m_real, m_imag);
getMagnitudes(mag);
getPhases(theta);
unwrapPhases(theta, unwrapped);
}
void PhaseVocoder::getMagnitude(unsigned int size, double *mag, double *real, double *imag)
void PhaseVocoder::processFrequencyDomain(const double *reals,
const double *imags,
double *mag, double *theta,
double *unwrapped)
{
unsigned int j;
for (int i = 0; i < m_n/2 + 1; ++i) {
m_real[i] = reals[i];
m_imag[i] = imags[i];
}
getMagnitudes(mag);
getPhases(theta);
unwrapPhases(theta, unwrapped);
}
for( j = 0; j < size; j++)
{
mag[ j ] = sqrt( real[ j ] * real[ j ] + imag[ j ] * imag[ j ]);
void PhaseVocoder::reset()
{
for (int i = 0; i < m_n/2 + 1; ++i) {
// m_phase stores the "previous" phase, so set to one step
// behind so that a signal with initial phase at zero matches
// the expected values. This is completely unnecessary for any
// analytical purpose, it's just tidier.
double omega = (2 * M_PI * m_hop * i) / m_n;
m_phase[i] = -omega;
m_unwrapped[i] = -omega;
}
}
void PhaseVocoder::getPhase(unsigned int size, double *theta, double *real, double *imag)
{
unsigned int k;
// Phase Angle "matlab" style
//Watch out for quadrant mapping !!!
for( k = 0; k < size; k++)
{
theta[ k ] = atan2( -imag[ k ], real[ k ]);
void PhaseVocoder::getMagnitudes(double *mag)
{
for (int i = 0; i < m_n/2 + 1; i++) {
mag[i] = sqrt(m_real[i] * m_real[i] + m_imag[i] * m_imag[i]);
}
}
void PhaseVocoder::getPhases(double *theta)
{
for (int i = 0; i < m_n/2 + 1; i++) {
theta[i] = atan2(m_imag[i], m_real[i]);
}
}
void PhaseVocoder::unwrapPhases(double *theta, double *unwrapped)
{
for (int i = 0; i < m_n/2 + 1; ++i) {
double omega = (2 * M_PI * m_hop * i) / m_n;
double expected = m_phase[i] + omega;
double error = MathUtilities::princarg(theta[i] - expected);
unwrapped[i] = m_unwrapped[i] + omega + error;
m_phase[i] = theta[i];
m_unwrapped[i] = unwrapped[i];
}
}

View File

@@ -4,7 +4,7 @@
QM DSP Library
Centre for Digital Music, Queen Mary, University of London.
This file 2005-2006 Christian Landone.
This file 2005-2006 Christian Landone, copyright 2013 QMUL.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
@@ -18,25 +18,63 @@
class FFTReal;
class PhaseVocoder
class PhaseVocoder
{
public:
PhaseVocoder( unsigned int size );
PhaseVocoder(int size, int hop);
virtual ~PhaseVocoder();
void process( double* src, double* mag, double* theta);
/**
* Given one frame of time-domain samples, FFT and return the
* magnitudes, instantaneous phases, and unwrapped phases.
*
* src must have size values (where size is the frame size value
* as passed to the PhaseVocoder constructor), and should have
* been windowed as necessary by the caller (but not fft-shifted).
*
* mag, phase, and unwrapped must each be non-NULL and point to
* enough space for size/2 + 1 values. The redundant conjugate
* half of the output is not returned.
*/
void processTimeDomain(const double *src,
double *mag, double *phase, double *unwrapped);
/**
* Given one frame of frequency-domain samples, return the
* magnitudes, instantaneous phases, and unwrapped phases.
*
* reals and imags must each contain size/2+1 values (where size
* is the frame size value as passed to the PhaseVocoder
* constructor).
*
* mag, phase, and unwrapped must each be non-NULL and point to
* enough space for size/2+1 values.
*/
void processFrequencyDomain(const double *reals, const double *imags,
double *mag, double *phase, double *unwrapped);
/**
* Reset the stored phases to zero. Note that this may be
* necessary occasionally (depending on the application) to avoid
* loss of floating-point precision in the accumulated unwrapped
* phase values as they grow.
*/
void reset();
protected:
void getPhase(unsigned int size, double *theta, double *real, double *imag);
// void coreFFT( unsigned int NumSamples, double *RealIn, double* ImagIn, double *RealOut, double *ImagOut);
void getMagnitude( unsigned int size, double* mag, double* real, double* imag);
void FFTShift( unsigned int size, double* src);
void FFTShift(double *src);
void getMagnitudes(double *mag);
void getPhases(double *theta);
void unwrapPhases(double *theta, double *unwrapped);
unsigned int m_n;
int m_n;
int m_hop;
FFTReal *m_fft;
double *m_imagOut;
double *m_realOut;
double *m_time;
double *m_imag;
double *m_real;
double *m_phase;
double *m_unwrapped;
};
#endif

View File

@@ -199,10 +199,15 @@ void Decimator::doAntiAlias(const float *src, double *dst, unsigned int length)
void Decimator::process(const double *src, double *dst)
{
if( m_decFactor != 1 )
{
doAntiAlias( src, decBuffer, m_inputLength );
if (m_decFactor == 1) {
for( unsigned int i = 0; i < m_outputLength; i++ ) {
dst[i] = src[i];
}
return;
}
doAntiAlias( src, decBuffer, m_inputLength );
unsigned idx = 0;
for( unsigned int i = 0; i < m_outputLength; i++ )
@@ -213,10 +218,15 @@ void Decimator::process(const double *src, double *dst)
void Decimator::process(const float *src, float *dst)
{
if( m_decFactor != 1 )
{
doAntiAlias( src, decBuffer, m_inputLength );
if (m_decFactor == 1) {
for( unsigned int i = 0; i < m_outputLength; i++ ) {
dst[i] = src[i];
}
return;
}
doAntiAlias( src, decBuffer, m_inputLength );
unsigned idx = 0;
for( unsigned int i = 0; i < m_outputLength; i++ )

View File

@@ -15,12 +15,15 @@
#ifndef DECIMATOR_H
#define DECIMATOR_H
class Decimator
/**
* Decimator carries out a fast downsample by a power-of-two
* factor. Only a limited number of factors are supported, from two to
* whatever getHighestSupportedFactor() returns. This is much faster
* than Resampler but has a worse signal-noise ratio.
*/
class Decimator
{
public:
void process( const double* src, double* dst );
void process( const float* src, float* dst );
/**
* Construct a Decimator to operate on input blocks of length
* inLength, with decimation factor decFactor. inLength should be
@@ -34,11 +37,28 @@ public:
Decimator( unsigned int inLength, unsigned int decFactor );
virtual ~Decimator();
/**
* Process inLength samples (as supplied to constructor) from src
* and write inLength / decFactor samples to dst. Note that src
* and dst may be the same or overlap (an intermediate buffer is
* used).
*/
void process( const double* src, double* dst );
/**
* Process inLength samples (as supplied to constructor) from src
* and write inLength / decFactor samples to dst. Note that src
* and dst may be the same or overlap (an intermediate buffer is
* used).
*/
void process( const float* src, float* dst );
int getFactor() const { return m_decFactor; }
static int getHighestSupportedFactor() { return 8; }
private:
void resetFilter();
private:
void deInitialise();
void initialise( unsigned int inLength, unsigned int decFactor );
void doAntiAlias( const double* src, double* dst, unsigned int length );
@@ -55,8 +75,8 @@ private:
double a[ 9 ];
double b[ 9 ];
double* decBuffer;
};
#endif //
#endif //

View File

@@ -0,0 +1,160 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
QM DSP Library
Centre for Digital Music, Queen Mary, University of London.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#include "DecimatorB.h"
#include "maths/MathUtilities.h"
#include <iostream>
using std::vector;
DecimatorB::DecimatorB(int inLength, int decFactor)
{
m_inputLength = 0;
m_outputLength = 0;
m_decFactor = 1;
m_aaBuffer = 0;
m_tmpBuffer = 0;
initialise(inLength, decFactor);
}
DecimatorB::~DecimatorB()
{
deInitialise();
}
void DecimatorB::initialise(int inLength, int decFactor)
{
m_inputLength = inLength;
m_decFactor = decFactor;
m_outputLength = m_inputLength / m_decFactor;
if (m_decFactor < 2 || !MathUtilities::isPowerOfTwo(m_decFactor)) {
std::cerr << "ERROR: DecimatorB::initialise: Decimation factor must be a power of 2 and at least 2 (was: " << m_decFactor << ")" << std::endl;
m_decFactor = 0;
return;
}
if (m_inputLength % m_decFactor != 0) {
std::cerr << "ERROR: DecimatorB::initialise: inLength must be a multiple of decimation factor (was: " << m_inputLength << ", factor is " << m_decFactor << ")" << std::endl;
m_decFactor = 0;
return;
}
m_aaBuffer = new double[m_inputLength];
m_tmpBuffer = new double[m_inputLength];
// Order 6 Butterworth lowpass filter
// Calculated using e.g. MATLAB butter(6, 0.5, 'low')
m_b[0] = 0.029588223638661;
m_b[1] = 0.177529341831965;
m_b[2] = 0.443823354579912;
m_b[3] = 0.591764472773216;
m_b[4] = 0.443823354579912;
m_b[5] = 0.177529341831965;
m_b[6] = 0.029588223638661;
m_a[0] = 1.000000000000000;
m_a[1] = 0.000000000000000;
m_a[2] = 0.777695961855673;
m_a[3] = 0.000000000000000;
m_a[4] = 0.114199425062434;
m_a[5] = 0.000000000000000;
m_a[6] = 0.001750925956183;
for (int factor = m_decFactor; factor > 1; factor /= 2) {
m_o.push_back(vector<double>(6, 0.0));
}
}
void DecimatorB::deInitialise()
{
delete [] m_aaBuffer;
delete [] m_tmpBuffer;
}
void DecimatorB::doAntiAlias(const double *src, double *dst, int length,
int filteridx)
{
vector<double> &o = m_o[filteridx];
for (int i = 0; i < length; i++) {
double input = src[i];
double output = input * m_b[0] + o[0];
o[0] = input * m_b[1] - output * m_a[1] + o[1];
o[1] = input * m_b[2] - output * m_a[2] + o[2];
o[2] = input * m_b[3] - output * m_a[3] + o[3];
o[3] = input * m_b[4] - output * m_a[4] + o[4];
o[4] = input * m_b[5] - output * m_a[5] + o[5];
o[5] = input * m_b[6] - output * m_a[6];
dst[i] = output;
}
}
void DecimatorB::doProcess()
{
int filteridx = 0;
int factorDone = 1;
int factorRemaining = m_decFactor;
while (factorDone < m_decFactor) {
doAntiAlias(m_tmpBuffer, m_aaBuffer,
m_inputLength / factorDone,
filteridx);
filteridx ++;
factorDone *= 2;
for (int i = 0; i < m_inputLength / factorDone; ++i) {
m_tmpBuffer[i] = m_aaBuffer[i * 2];
}
}
}
void DecimatorB::process(const double *src, double *dst)
{
if (m_decFactor == 0) return;
for (int i = 0; i < m_inputLength; ++i) {
m_tmpBuffer[i] = src[i];
}
doProcess();
for (int i = 0; i < m_outputLength; ++i) {
dst[i] = m_tmpBuffer[i];
}
}
void DecimatorB::process(const float *src, float *dst)
{
if (m_decFactor == 0) return;
for (int i = 0; i < m_inputLength; ++i) {
m_tmpBuffer[i] = src[i];
}
doProcess();
for (int i = 0; i < m_outputLength; ++i) {
dst[i] = m_tmpBuffer[i];
}
}

View File

@@ -0,0 +1,64 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
QM DSP Library
Centre for Digital Music, Queen Mary, University of London.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#ifndef DECIMATORB_H
#define DECIMATORB_H
#include <vector>
/**
* DecimatorB carries out a fast downsample by a power-of-two
* factor. It only knows how to decimate by a factor of 2, and will
* use repeated decimation for higher factors. A Butterworth filter of
* order 6 is used for the lowpass filter.
*/
class DecimatorB
{
public:
void process( const double* src, double* dst );
void process( const float* src, float* dst );
/**
* Construct a DecimatorB to operate on input blocks of length
* inLength, with decimation factor decFactor. inLength should be
* a multiple of decFactor. Output blocks will be of length
* inLength / decFactor.
*
* decFactor must be a power of two.
*/
DecimatorB(int inLength, int decFactor);
virtual ~DecimatorB();
int getFactor() const { return m_decFactor; }
private:
void deInitialise();
void initialise(int inLength, int decFactor);
void doAntiAlias(const double* src, double* dst, int length, int filteridx);
void doProcess();
int m_inputLength;
int m_outputLength;
int m_decFactor;
std::vector<std::vector<double> > m_o;
double m_a[7];
double m_b[7];
double *m_aaBuffer;
double *m_tmpBuffer;
};
#endif

View File

@@ -0,0 +1,416 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
QM DSP Library
Centre for Digital Music, Queen Mary, University of London.
This file by Chris Cannam.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#include "Resampler.h"
#include "maths/MathUtilities.h"
#include "base/KaiserWindow.h"
#include "base/SincWindow.h"
#include "thread/Thread.h"
#include <iostream>
#include <vector>
#include <map>
#include <cassert>
using std::vector;
using std::map;
using std::cerr;
using std::endl;
//#define DEBUG_RESAMPLER 1
//#define DEBUG_RESAMPLER_VERBOSE 1
Resampler::Resampler(int sourceRate, int targetRate) :
m_sourceRate(sourceRate),
m_targetRate(targetRate)
{
initialise(100, 0.02);
}
Resampler::Resampler(int sourceRate, int targetRate,
double snr, double bandwidth) :
m_sourceRate(sourceRate),
m_targetRate(targetRate)
{
initialise(snr, bandwidth);
}
Resampler::~Resampler()
{
delete[] m_phaseData;
}
// peakToPole -> length -> beta -> window
static map<double, map<int, map<double, vector<double> > > >
knownFilters;
static Mutex
knownFilterMutex;
void
Resampler::initialise(double snr, double bandwidth)
{
int higher = std::max(m_sourceRate, m_targetRate);
int lower = std::min(m_sourceRate, m_targetRate);
m_gcd = MathUtilities::gcd(lower, higher);
m_peakToPole = higher / m_gcd;
if (m_targetRate < m_sourceRate) {
// antialiasing filter, should be slightly below nyquist
m_peakToPole = m_peakToPole / (1.0 - bandwidth/2.0);
}
KaiserWindow::Parameters params =
KaiserWindow::parametersForBandwidth(snr, bandwidth, higher / m_gcd);
params.length =
(params.length % 2 == 0 ? params.length + 1 : params.length);
params.length =
(params.length > 200001 ? 200001 : params.length);
m_filterLength = params.length;
vector<double> filter;
knownFilterMutex.lock();
if (knownFilters[m_peakToPole][m_filterLength].find(params.beta) ==
knownFilters[m_peakToPole][m_filterLength].end()) {
KaiserWindow kw(params);
SincWindow sw(m_filterLength, m_peakToPole * 2);
filter = vector<double>(m_filterLength, 0.0);
for (int i = 0; i < m_filterLength; ++i) filter[i] = 1.0;
sw.cut(filter.data());
kw.cut(filter.data());
knownFilters[m_peakToPole][m_filterLength][params.beta] = filter;
}
filter = knownFilters[m_peakToPole][m_filterLength][params.beta];
knownFilterMutex.unlock();
int inputSpacing = m_targetRate / m_gcd;
int outputSpacing = m_sourceRate / m_gcd;
#ifdef DEBUG_RESAMPLER
cerr << "resample " << m_sourceRate << " -> " << m_targetRate
<< ": inputSpacing " << inputSpacing << ", outputSpacing "
<< outputSpacing << ": filter length " << m_filterLength
<< endl;
#endif
// Now we have a filter of (odd) length flen in which the lower
// sample rate corresponds to every n'th point and the higher rate
// to every m'th where n and m are higher and lower rates divided
// by their gcd respectively. So if x coordinates are on the same
// scale as our filter resolution, then source sample i is at i *
// (targetRate / gcd) and target sample j is at j * (sourceRate /
// gcd).
// To reconstruct a single target sample, we want a buffer (real
// or virtual) of flen values formed of source samples spaced at
// intervals of (targetRate / gcd), in our example case 3. This
// is initially formed with the first sample at the filter peak.
//
// 0 0 0 0 a 0 0 b 0
//
// and of course we have our filter
//
// f1 f2 f3 f4 f5 f6 f7 f8 f9
//
// We take the sum of products of non-zero values from this buffer
// with corresponding values in the filter
//
// a * f5 + b * f8
//
// Then we drop (sourceRate / gcd) values, in our example case 4,
// from the start of the buffer and fill until it has flen values
// again
//
// a 0 0 b 0 0 c 0 0
//
// repeat to reconstruct the next target sample
//
// a * f1 + b * f4 + c * f7
//
// and so on.
//
// Above I said the buffer could be "real or virtual" -- ours is
// virtual. We don't actually store all the zero spacing values,
// except for padding at the start; normally we store only the
// values that actually came from the source stream, along with a
// phase value that tells us how many virtual zeroes there are at
// the start of the virtual buffer. So the two examples above are
//
// 0 a b [ with phase 1 ]
// a b c [ with phase 0 ]
//
// Having thus broken down the buffer so that only the elements we
// need to multiply are present, we can also unzip the filter into
// every-nth-element subsets at each phase, allowing us to do the
// filter multiplication as a simply vector multiply. That is, rather
// than store
//
// f1 f2 f3 f4 f5 f6 f7 f8 f9
//
// we store separately
//
// f1 f4 f7
// f2 f5 f8
// f3 f6 f9
//
// Each time we complete a multiply-and-sum, we need to work out
// how many (real) samples to drop from the start of our buffer,
// and how many to add at the end of it for the next multiply. We
// know we want to drop enough real samples to move along by one
// computed output sample, which is our outputSpacing number of
// virtual buffer samples. Depending on the relationship between
// input and output spacings, this may mean dropping several real
// samples, one real sample, or none at all (and simply moving to
// a different "phase").
m_phaseData = new Phase[inputSpacing];
for (int phase = 0; phase < inputSpacing; ++phase) {
Phase p;
p.nextPhase = phase - outputSpacing;
while (p.nextPhase < 0) p.nextPhase += inputSpacing;
p.nextPhase %= inputSpacing;
p.drop = int(ceil(std::max(0.0, double(outputSpacing - phase))
/ inputSpacing));
int filtZipLength = int(ceil(double(m_filterLength - phase)
/ inputSpacing));
for (int i = 0; i < filtZipLength; ++i) {
p.filter.push_back(filter[i * inputSpacing + phase]);
}
m_phaseData[phase] = p;
}
#ifdef DEBUG_RESAMPLER
int cp = 0;
int totDrop = 0;
for (int i = 0; i < inputSpacing; ++i) {
cerr << "phase = " << cp << ", drop = " << m_phaseData[cp].drop
<< ", filter length = " << m_phaseData[cp].filter.size()
<< ", next phase = " << m_phaseData[cp].nextPhase << endl;
totDrop += m_phaseData[cp].drop;
cp = m_phaseData[cp].nextPhase;
}
cerr << "total drop = " << totDrop << endl;
#endif
// The May implementation of this uses a pull model -- we ask the
// resampler for a certain number of output samples, and it asks
// its source stream for as many as it needs to calculate
// those. This means (among other things) that the source stream
// can be asked for enough samples up-front to fill the buffer
// before the first output sample is generated.
//
// In this implementation we're using a push model in which a
// certain number of source samples is provided and we're asked
// for as many output samples as that makes available. But we
// can't return any samples from the beginning until half the
// filter length has been provided as input. This means we must
// either return a very variable number of samples (none at all
// until the filter fills, then half the filter length at once) or
// else have a lengthy declared latency on the output. We do the
// latter. (What do other implementations do?)
//
// We want to make sure the first "real" sample will eventually be
// aligned with the centre sample in the filter (it's tidier, and
// easier to do diagnostic calculations that way). So we need to
// pick the initial phase and buffer fill accordingly.
//
// Example: if the inputSpacing is 2, outputSpacing is 3, and
// filter length is 7,
//
// x x x x a b c ... input samples
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 ...
// i j k l ... output samples
// [--------|--------] <- filter with centre mark
//
// Let h be the index of the centre mark, here 3 (generally
// int(filterLength/2) for odd-length filters).
//
// The smallest n such that h + n * outputSpacing > filterLength
// is 2 (that is, ceil((filterLength - h) / outputSpacing)), and
// (h + 2 * outputSpacing) % inputSpacing == 1, so the initial
// phase is 1.
//
// To achieve our n, we need to pre-fill the "virtual" buffer with
// 4 zero samples: the x's above. This is int((h + n *
// outputSpacing) / inputSpacing). It's the phase that makes this
// buffer get dealt with in such a way as to give us an effective
// index for sample a of 9 rather than 8 or 10 or whatever.
//
// This gives us output latency of 2 (== n), i.e. output samples i
// and j will appear before the one in which input sample a is at
// the centre of the filter.
int h = int(m_filterLength / 2);
int n = ceil(double(m_filterLength - h) / outputSpacing);
m_phase = (h + n * outputSpacing) % inputSpacing;
int fill = (h + n * outputSpacing) / inputSpacing;
m_latency = n;
m_buffer = vector<double>(fill, 0);
m_bufferOrigin = 0;
#ifdef DEBUG_RESAMPLER
cerr << "initial phase " << m_phase << " (as " << (m_filterLength/2) << " % " << inputSpacing << ")"
<< ", latency " << m_latency << endl;
#endif
}
double
Resampler::reconstructOne()
{
Phase &pd = m_phaseData[m_phase];
double v = 0.0;
int n = pd.filter.size();
assert(n + m_bufferOrigin <= (int)m_buffer.size());
const double *const __restrict__ buf = m_buffer.data() + m_bufferOrigin;
const double *const __restrict__ filt = pd.filter.data();
for (int i = 0; i < n; ++i) {
// NB gcc can only vectorize this with -ffast-math
v += buf[i] * filt[i];
}
m_bufferOrigin += pd.drop;
m_phase = pd.nextPhase;
return v;
}
int
Resampler::process(const double *src, double *dst, int n)
{
for (int i = 0; i < n; ++i) {
m_buffer.push_back(src[i]);
}
int maxout = int(ceil(double(n) * m_targetRate / m_sourceRate));
int outidx = 0;
#ifdef DEBUG_RESAMPLER
cerr << "process: buf siz " << m_buffer.size() << " filt siz for phase " << m_phase << " " << m_phaseData[m_phase].filter.size() << endl;
#endif
double scaleFactor = (double(m_targetRate) / m_gcd) / m_peakToPole;
while (outidx < maxout &&
m_buffer.size() >= m_phaseData[m_phase].filter.size() + m_bufferOrigin) {
dst[outidx] = scaleFactor * reconstructOne();
outidx++;
}
m_buffer = vector<double>(m_buffer.begin() + m_bufferOrigin, m_buffer.end());
m_bufferOrigin = 0;
return outidx;
}
vector<double>
Resampler::process(const double *src, int n)
{
int maxout = int(ceil(double(n) * m_targetRate / m_sourceRate));
vector<double> out(maxout, 0.0);
int got = process(src, out.data(), n);
assert(got <= maxout);
if (got < maxout) out.resize(got);
return out;
}
vector<double>
Resampler::resample(int sourceRate, int targetRate, const double *data, int n)
{
Resampler r(sourceRate, targetRate);
int latency = r.getLatency();
// latency is the output latency. We need to provide enough
// padding input samples at the end of input to guarantee at
// *least* the latency's worth of output samples. that is,
int inputPad = int(ceil((double(latency) * sourceRate) / targetRate));
// that means we are providing this much input in total:
int n1 = n + inputPad;
// and obtaining this much output in total:
int m1 = int(ceil((double(n1) * targetRate) / sourceRate));
// in order to return this much output to the user:
int m = int(ceil((double(n) * targetRate) / sourceRate));
#ifdef DEBUG_RESAMPLER
cerr << "n = " << n << ", sourceRate = " << sourceRate << ", targetRate = " << targetRate << ", m = " << m << ", latency = " << latency << ", inputPad = " << inputPad << ", m1 = " << m1 << ", n1 = " << n1 << ", n1 - n = " << n1 - n << endl;
#endif
vector<double> pad(n1 - n, 0.0);
vector<double> out(m1 + 1, 0.0);
int gotData = r.process(data, out.data(), n);
int gotPad = r.process(pad.data(), out.data() + gotData, pad.size());
int got = gotData + gotPad;
#ifdef DEBUG_RESAMPLER
cerr << "resample: " << n << " in, " << pad.size() << " padding, " << got << " out (" << gotData << " data, " << gotPad << " padding, latency = " << latency << ")" << endl;
#endif
#ifdef DEBUG_RESAMPLER_VERBOSE
int printN = 50;
cerr << "first " << printN << " in:" << endl;
for (int i = 0; i < printN && i < n; ++i) {
if (i % 5 == 0) cerr << endl << i << "... ";
cerr << data[i] << " ";
}
cerr << endl;
#endif
int toReturn = got - latency;
if (toReturn > m) toReturn = m;
vector<double> sliced(out.begin() + latency,
out.begin() + latency + toReturn);
#ifdef DEBUG_RESAMPLER_VERBOSE
cerr << "first " << printN << " out (after latency compensation), length " << sliced.size() << ":";
for (int i = 0; i < printN && i < sliced.size(); ++i) {
if (i % 5 == 0) cerr << endl << i << "... ";
cerr << sliced[i] << " ";
}
cerr << endl;
#endif
return sliced;
}

View File

@@ -0,0 +1,102 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
QM DSP Library
Centre for Digital Music, Queen Mary, University of London.
This file by Chris Cannam.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
#ifndef RESAMPLER_H
#define RESAMPLER_H
#include <vector>
/**
* Resampler resamples a stream from one integer sample rate to
* another (arbitrary) rate, using a kaiser-windowed sinc filter. The
* results and performance are pretty similar to libraries such as
* libsamplerate, though this implementation does not support
* time-varying ratios (the ratio is fixed on construction).
*
* See also Decimator, which is faster and rougher but supports only
* power-of-two downsampling factors.
*/
class Resampler
{
public:
/**
* Construct a Resampler to resample from sourceRate to
* targetRate.
*/
Resampler(int sourceRate, int targetRate);
/**
* Construct a Resampler to resample from sourceRate to
* targetRate, using the given filter parameters.
*/
Resampler(int sourceRate, int targetRate,
double snr, double bandwidth);
virtual ~Resampler();
/**
* Read n input samples from src and write resampled data to
* dst. The return value is the number of samples written, which
* will be no more than ceil((n * targetRate) / sourceRate). The
* caller must ensure the dst buffer has enough space for the
* samples returned.
*/
int process(const double *src, double *dst, int n);
/**
* Read n input samples from src and return resampled data by
* value.
*/
std::vector<double> process(const double *src, int n);
/**
* Return the number of samples of latency at the output due by
* the filter. (That is, the output will be delayed by this number
* of samples relative to the input.)
*/
int getLatency() const { return m_latency; }
/**
* Carry out a one-off resample of a single block of n
* samples. The output is latency-compensated.
*/
static std::vector<double> resample
(int sourceRate, int targetRate, const double *data, int n);
private:
int m_sourceRate;
int m_targetRate;
int m_gcd;
int m_filterLength;
int m_bufferLength;
int m_latency;
double m_peakToPole;
struct Phase {
int nextPhase;
std::vector<double> filter;
int drop;
};
Phase *m_phaseData;
int m_phase;
std::vector<double> m_buffer;
int m_bufferOrigin;
void initialise(double, double);
double reconstructOne();
};
#endif

View File

@@ -51,7 +51,7 @@ void ClusterMeltSegmenter::initialise(int fs)
if (featureType == FEATURE_TYPE_CONSTQ ||
featureType == FEATURE_TYPE_CHROMA) {
// run internal processing at 11025 or thereabouts
int internalRate = 11025;
int decimationFactor = samplerate / internalRate;
@@ -77,11 +77,11 @@ void ClusterMeltSegmenter::initialise(int fs)
constq = new ConstantQ(config);
constq->sparsekernel();
ncoeff = constq->getK();
fft = new FFTReal(constq->getfftlength());
} else if (featureType == FEATURE_TYPE_MFCC) {
// run internal processing at 22050 or thereabouts
@@ -110,7 +110,7 @@ void ClusterMeltSegmenter::initialise(int fs)
}
}
ClusterMeltSegmenter::~ClusterMeltSegmenter()
ClusterMeltSegmenter::~ClusterMeltSegmenter()
{
delete window;
delete constq;
@@ -164,7 +164,7 @@ void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsam
vector<double> cq(ncoeff);
for (int i = 0; i < ncoeff; ++i) cq[i] = 0.0;
const double *psource = samples;
int pcount = nsamples;
@@ -174,9 +174,9 @@ void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsam
decimator->process(samples, decout);
psource = decout;
}
int origin = 0;
// std::cerr << "nsamples = " << nsamples << ", pcount = " << pcount << std::endl;
int frames = 0;
@@ -208,11 +208,11 @@ void ClusterMeltSegmenter::extractFeaturesConstQ(const double* samples, int nsam
}
window->cut(frame);
fft->process(false, frame, real, imag);
fft->forward(frame, real, imag);
constq->process(real, imag, cqre, cqim);
for (int i = 0; i < ncoeff; ++i) {
cq[i] += sqrt(cqre[i] * cqre[i] + cqim[i] * cqim[i]);
}
@@ -255,7 +255,7 @@ void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsampl
vector<double> cc(ncoeff);
for (int i = 0; i < ncoeff; ++i) cc[i] = 0.0;
const double *psource = samples;
int pcount = nsamples;
@@ -287,7 +287,7 @@ void ClusterMeltSegmenter::extractFeaturesMFCC(const double* samples, int nsampl
}
mfcc->process(frame, ccout);
for (int i = 0; i < ncoeff; ++i) {
cc[i] += ccout[i];
}
@@ -330,44 +330,44 @@ void ClusterMeltSegmenter::segment()
decimator = 0;
if (features.size() < histogramLength) return;
/*
/*
std::cerr << "ClusterMeltSegmenter::segment: have " << features.size()
<< " features with " << features[0].size() << " coefficients (ncoeff = " << ncoeff << ", ncomponents = " << ncomponents << ")" << std::endl;
*/
// copy the features to a native array and use the existing C segmenter...
double** arrFeatures = new double*[features.size()];
double** arrFeatures = new double*[features.size()];
for (int i = 0; i < features.size(); i++)
{
if (featureType == FEATURE_TYPE_UNKNOWN) {
arrFeatures[i] = new double[features[0].size()];
for (int j = 0; j < features[0].size(); j++)
arrFeatures[i][j] = features[i][j];
arrFeatures[i][j] = features[i][j];
} else {
arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope
for (int j = 0; j < ncoeff; j++)
arrFeatures[i][j] = features[i][j];
arrFeatures[i][j] = features[i][j];
}
}
q = new int[features.size()];
if (featureType == FEATURE_TYPE_UNKNOWN ||
featureType == FEATURE_TYPE_MFCC)
cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength,
nclusters, neighbourhoodLimit);
else
constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType,
nHMMStates, histogramLength, nclusters, neighbourhoodLimit);
// convert the cluster assignment sequence to a segmentation
makeSegmentation(q, features.size());
makeSegmentation(q, features.size());
// de-allocate arrays
delete [] q;
for (int i = 0; i < features.size(); i++)
delete [] arrFeatures[i];
delete [] arrFeatures;
// clear the features
clear();
}
@@ -377,11 +377,11 @@ void ClusterMeltSegmenter::makeSegmentation(int* q, int len)
segmentation.segments.clear();
segmentation.nsegtypes = nclusters;
segmentation.samplerate = samplerate;
Segment segment;
segment.start = 0;
segment.type = q[0];
for (int i = 1; i < len; i++)
{
if (q[i] != q[i-1])

View File

@@ -31,12 +31,12 @@ class ClusterMeltSegmenterParams
// defaults are sensible for 11025Hz with 0.2 second hopsize
{
public:
ClusterMeltSegmenterParams() :
ClusterMeltSegmenterParams() :
featureType(FEATURE_TYPE_CONSTQ),
hopSize(0.2),
windowSize(0.6),
fmin(62),
fmax(16000),
fmax(16000),
nbins(8),
ncomponents(20),
nHMMStates(40),
@@ -72,34 +72,34 @@ public:
protected:
void makeSegmentation(int* q, int len);
void extractFeaturesConstQ(const double *, int);
void extractFeaturesMFCC(const double *, int);
Window<double> *window;
FFTReal *fft;
ConstantQ* constq;
ConstantQ* constq;
MFCC* mfcc;
model_t* model; // the HMM
int* q; // the decoded HMM state sequence
vector<vector<double> > histograms;
feature_types featureType;
vector<vector<double> > histograms;
feature_types featureType;
double hopSize; // in seconds
double windowSize; // in seconds
// constant-Q parameters
int fmin;
int fmax;
int nbins;
int ncoeff;
// PCA parameters
int ncomponents;
// HMM parameters
int nHMMStates;
// clustering parameters
int nclusters;
int histogramLength;

View File

@@ -19,13 +19,13 @@
ostream& operator<<(ostream& os, const Segmentation& s)
{
os << "structure_name : begin_time end_time\n";
for (int i = 0; i < s.segments.size(); i++)
{
Segment seg = s.segments[i];
os << std::fixed << seg.type << ':' << '\t' << std::setprecision(6) << seg.start / static_cast<double>(s.samplerate)
os << std::fixed << seg.type << ':' << '\t' << std::setprecision(6) << seg.start / static_cast<double>(s.samplerate)
<< '\t' << std::setprecision(6) << seg.end / static_cast<double>(s.samplerate) << "\n";
}
return os;
}

View File

@@ -35,7 +35,7 @@ class Segmentation
public:
int nsegtypes; // number of segment types, so possible types are {0,1,...,nsegtypes-1}
int samplerate;
vector<Segment> segments;
vector<Segment> segments;
};
ostream& operator<<(ostream& os, const Segmentation& s);
@@ -52,7 +52,7 @@ public:
virtual void segment() = 0; // call once all the features have been extracted
virtual void segment(int m) = 0; // specify desired number of segment-types
virtual void clear() { features.clear(); }
const Segmentation& getSegmentation() const { return segmentation; }
const Segmentation& getSegmentation() const { return segmentation; }
protected:
vector<vector<double> > features;
Segmentation segmentation;

View File

@@ -25,7 +25,7 @@ double kldist(double* a, double* b, int n) {
because a, b represent probability distributions */
double q, d;
int i;
d = 0;
for (i = 0; i < n; i++)
{
@@ -38,8 +38,8 @@ double kldist(double* a, double* b, int n) {
d += b[i] * log(b[i] / q);
}
}
return d;
}
return d;
}
void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, int *c) {
double lambda, sum, beta, logsumexp, maxlp;
@@ -48,9 +48,9 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
int** nc; /* neighbour counts for each histogram */
double** lp; /* soft assignment probs for each histogram */
int* oldc; /* previous hard assignments (to check convergence) */
/* NB h is passed as a 1d row major array */
/* parameter values */
lambda = DEFAULT_LAMBDA;
if (l > 0)
@@ -60,22 +60,22 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
B = 2 * limit + 1;
maxiter0 = 20; /* number of iterations at initial temperature */
maxiter1 = 5; /* number of iterations at subsequent temperatures */
/* allocate memory */
/* allocate memory */
cl = (double**) malloc(k*sizeof(double*));
for (i= 0; i < k; i++)
cl[i] = (double*) malloc(m*sizeof(double));
nc = (int**) malloc(n*sizeof(int*));
for (i= 0; i < n; i++)
nc[i] = (int*) malloc(k*sizeof(int));
lp = (double**) malloc(n*sizeof(double*));
for (i= 0; i < n; i++)
lp[i] = (double*) malloc(k*sizeof(double));
oldc = (int*) malloc(n * sizeof(int));
/* initialise */
for (i = 0; i < k; i++)
{
@@ -90,40 +90,40 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
{
cl[i][j] /= sum; /* normalise */
}
}
}
//print_array(cl, k, m);
for (i = 0; i < n; i++)
c[i] = 1; /* initially assign all histograms to cluster 1 */
for (a = 0; a < t; a++)
{
beta = Bsched[a];
if (a == 0)
maxiter = maxiter0;
else
maxiter = maxiter1;
for (it = 0; it < maxiter; it++)
{
//if (it == maxiter - 1)
// mexPrintf("hasn't converged after %d iterations\n", maxiter);
for (i = 0; i < n; i++)
{
/* save current hard assignments */
oldc[i] = c[i];
/* calculate soft assignment logprobs for each cluster */
sum = 0;
for (j = 0; j < k; j++)
{
lp[i][ j] = -beta * kldist(cl[j], &h[i*m], m);
/* update matching neighbour counts for this histogram, based on current hard assignments */
/* old version:
nc[i][j] = 0;
nc[i][j] = 0;
if (i >= limit && i <= n - 1 - limit)
{
for (b = i - limit; b <= i + limit; b++)
@@ -144,14 +144,14 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
for (b = b0; b <= b1; b++)
if (c[b] == j+1)
nc[i][j]--;
sum += exp(lp[i][j]);
}
/* normalise responsibilities and add duration logprior */
logsumexp = log(sum);
for (j = 0; j < k; j++)
lp[i][j] -= logsumexp + lambda * nc[i][j];
lp[i][j] -= logsumexp + lambda * nc[i][j];
}
//print_array(lp, n, k);
/*
@@ -160,10 +160,10 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
for (j = 0; j < k; j++)
mexPrintf("%d ", nc[i][j]);
mexPrintf("\n");
}
}
*/
/* update the assignments now that we know the duration priors
based on the current assignments */
for (i = 0; i < n; i++)
@@ -177,14 +177,14 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
c[i] = j+1;
}
}
/* break if assignments haven't changed */
i = 0;
while (i < n && oldc[i] == c[i])
i++;
if (i == n)
break;
/* update reference histograms now we know new responsibilities */
for (j = 0; j < k; j++)
{
@@ -194,21 +194,21 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
for (i = 0; i < n; i++)
{
cl[j][b] += exp(lp[i][j]) * h[i*m+b];
}
}
}
sum = 0;
sum = 0;
for (i = 0; i < n; i++)
sum += exp(lp[i][j]);
for (b = 0; b < m; b++)
cl[j][b] /= sum; /* normalise */
}
}
//print_array(cl, k, m);
//mexPrintf("\n\n");
}
}
/* free memory */
for (i = 0; i < k; i++)
free(cl[i]);
@@ -219,7 +219,7 @@ void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l,
for (i = 0; i < n; i++)
free(lp[i]);
free(lp);
free(oldc);
free(oldc);
}

View File

@@ -25,7 +25,7 @@ void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
int t, b, oct, ix;
//double maxchroma; /* max chroma value at each time, for normalisation */
//double sum; /* for normalisation */
for (t = 0; t < nframes; t++)
{
for (b = 0; b < bins; b++)
@@ -50,7 +50,7 @@ void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
maxchroma = chroma[t][b];
if (maxchroma > 0)
for (b = 0; b < bins; b++)
chroma[t][b] /= maxchroma;
chroma[t][b] /= maxchroma;
*/
}
}
@@ -62,13 +62,13 @@ void mpeg7_constq(double** features, int nframes, int ncoeff)
double ss;
double env;
double maxenv = 0;
/* convert const-Q features to dB scale */
for (i = 0; i < nframes; i++)
for (j = 0; j < ncoeff; j++)
features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
/* normalise each feature vector and add the norm as an extra feature dimension */
/* normalise each feature vector and add the norm as an extra feature dimension */
for (i = 0; i < nframes; i++)
{
ss = 0;
@@ -80,10 +80,10 @@ void mpeg7_constq(double** features, int nframes, int ncoeff)
features[i][ncoeff] = env;
if (env > maxenv)
maxenv = env;
}
}
/* normalise the envelopes */
for (i = 0; i < nframes; i++)
features[i][ncoeff] /= maxenv;
features[i][ncoeff] /= maxenv;
}
/* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
@@ -94,7 +94,7 @@ void create_histograms(int* x, int nx, int m, int hlen, double* h)
int i, j, t;
double norm;
for (i = 0; i < nx*m; i++)
for (i = 0; i < nx*m; i++)
h[i] = 0;
for (i = hlen/2; i < nx-hlen/2; i++)
@@ -109,7 +109,7 @@ void create_histograms(int* x, int nx, int m, int hlen, double* h)
for (j = 0; j < m; j++)
h[i*m+j] /= norm;
}
/* duplicate histograms at beginning and end to create one histogram for each data value supplied */
for (i = 0; i < hlen/2; i++)
for (j = 0; j < m; j++)
@@ -120,11 +120,11 @@ void create_histograms(int* x, int nx, int m, int hlen, double* h)
}
/* segment using HMM and then histogram clustering */
void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
int histogram_length, int nclusters, int neighbour_limit)
{
int i, j;
/*****************************/
if (0) {
/* try just using the predominant bin number as a 'decoded state' */
@@ -137,60 +137,60 @@ void cluster_segment(int* q, double** features, int frames_read, int feature_len
maxval = 0;
for (j = 0; j < feature_length; j++)
{
if (features[i][j] > maxval)
if (features[i][j] > maxval)
{
maxval = features[i][j];
maxbin = j;
}
}
}
if (maxval > chroma_thresh)
q[i] = maxbin;
else
q[i] = feature_length;
}
}
if (1) {
/*****************************/
/* scale all the features to 'balance covariances' during HMM training */
double scale = 10;
for (i = 0; i < frames_read; i++)
for (j = 0; j < feature_length; j++)
features[i][j] *= scale;
/* train an HMM on the features */
/* create a model */
model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
/* train the model */
hmm_train(features, frames_read, model);
/*
/*
printf("\n\nafter training:\n");
hmm_print(model);
*/
*/
/* decode the hidden state sequence */
viterbi_decode(features, frames_read, model, q);
viterbi_decode(features, frames_read, model, q);
hmm_close(model);
/*****************************/
}
/*****************************/
/*
fprintf(stderr, "HMM state sequence:\n");
for (i = 0; i < frames_read; i++)
fprintf(stderr, "%d ", q[i]);
fprintf(stderr, "\n\n");
*/
/* create histograms of states */
double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */
create_histograms(q, frames_read, nHMM_states, histogram_length, h);
/* cluster the histograms */
int nbsched = 20; /* length of inverse temperature schedule */
double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */
@@ -200,39 +200,39 @@ void cluster_segment(int* q, double** features, int frames_read, int feature_len
for (i = 1; i < nbsched; i++)
bsched[i] = alpha * bsched[i-1];
cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
/* now q holds a sequence of cluster assignments */
free(h);
free(h);
free(bsched);
}
/* segment constant-Q or chroma features */
void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
{
int feature_length;
double** chroma;
int i;
if (feature_type == FEATURE_TYPE_CONSTQ)
{
/* fprintf(stderr, "Converting to dB and normalising...\n");
*/
*/
mpeg7_constq(features, frames_read, ncoeff);
/*
/*
fprintf(stderr, "Running PCA...\n");
*/
*/
/* do PCA on the features (but not the envelope) */
int ncomponents = 20;
pca_project(features, frames_read, ncoeff, ncomponents);
/* copy the envelope so that it immediatly follows the chosen components */
for (i = 0; i < frames_read; i++)
features[i][ncomponents] = features[i][ncoeff];
features[i][ncomponents] = features[i][ncoeff];
feature_length = ncomponents + 1;
/**************************************
//TEST
// feature file name
@@ -241,7 +241,7 @@ void constq_segment(int* q, double** features, int frames_read, int bins, int nc
strcpy(file_name, dir);
strcat(file_name, trackname);
strcat(file_name, "_features_c20r8h0.2f0.6.mat");
// get the features from Matlab from mat-file
int frames_in_file;
readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
@@ -254,27 +254,27 @@ void constq_segment(int* q, double** features, int frames_read, int bins, int nc
features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
--missing_frames;
}
free(file_name);
******************************************/
cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
}
if (feature_type == FEATURE_TYPE_CHROMA)
{
/*
fprintf(stderr, "Converting to chroma features...\n");
*/
*/
/* convert constant-Q to normalised chroma features */
chroma = (double**) malloc(frames_read*sizeof(double*));
for (i = 0; i < frames_read; i++)
chroma[i] = (double*) malloc(bins*sizeof(double));
cq2chroma(features, frames_read, ncoeff, bins, chroma);
feature_length = bins;
cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
for (i = 0; i < frames_read; i++)
free(chroma[i]);
free(chroma);

View File

@@ -38,10 +38,10 @@ void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma);
void create_histograms(int* x, int nx, int m, int hlen, double* h);
void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
int histogram_length, int nclusters, int neighbour_limit);
void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
int nHMM_states, int histogram_length, int nclusters, int neighbour_limit);
#ifdef __cplusplus

View File

@@ -34,10 +34,10 @@ typedef struct segmentation_t
segment_t* segments;
} segmentation_t;
typedef enum
{
FEATURE_TYPE_UNKNOWN = 0,
FEATURE_TYPE_CONSTQ = 1,
typedef enum
{
FEATURE_TYPE_UNKNOWN = 0,
FEATURE_TYPE_CONSTQ = 1,
FEATURE_TYPE_CHROMA = 2,
FEATURE_TYPE_MFCC = 3
} feature_types;

View File

@@ -6,6 +6,14 @@
Centre for Digital Music, Queen Mary, University of London.
This file 2005-2006 Christian Landone.
Modifications:
- delta threshold
Description: add delta threshold used as offset in the smoothed
detection function
Author: Mathieu Barthet
Date: June 2010
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
@@ -25,7 +33,7 @@
DFProcess::DFProcess( DFProcConfig Config )
{
filtSrc = NULL;
filtDst = NULL;
filtDst = NULL;
m_filtScratchIn = NULL;
m_filtScratchOut = NULL;
@@ -51,13 +59,16 @@ void DFProcess::initialise( DFProcConfig Config )
filtSrc = new double[ m_length ];
filtDst = new double[ m_length ];
//Low Pass Smoothing Filter Config
m_FilterConfigParams.ord = Config.LPOrd;
m_FilterConfigParams.ACoeffs = Config.LPACoeffs;
m_FilterConfigParams.BCoeffs = Config.LPBCoeffs;
m_FiltFilt = new FiltFilt( m_FilterConfigParams );
//add delta threshold
m_delta = Config.delta;
}
void DFProcess::deInitialise()
@@ -115,7 +126,7 @@ void DFProcess::medianFilter(double *src, double *dst)
{
if (index >= m_length) break;
l = 0;
for( j = i; j < ( i + m_winPost + m_winPre + 1); j++)
{
@@ -139,15 +150,17 @@ void DFProcess::medianFilter(double *src, double *dst)
l++;
}
scratch[ index++ ] = MathUtilities::median( y, l);
scratch[ index++ ] = MathUtilities::median( y, l);
}
for( i = 0; i < m_length; i++ )
{
val = src[ i ] - scratch[ i ];// - 0.033;
//add a delta threshold used as an offset when computing the smoothed detection function
//(helps to discard noise when detecting peaks)
val = src[ i ] - scratch[ i ] - m_delta;
if( m_isMedianPositive )
{
if( val > 0 )
@@ -164,7 +177,7 @@ void DFProcess::medianFilter(double *src, double *dst)
dst[ i ] = val;
}
}
delete [] y;
delete [] scratch;
}
@@ -180,8 +193,8 @@ void DFProcess::removeDCNormalize( double *src, double*dst )
MathUtilities::getAlphaNorm( src, m_length, m_alphaNormParam, &DFAlphaNorm );
for(int i = 0; i< m_length; i++)
for( unsigned int i = 0; i< m_length; i++)
{
dst[ i ] = ( src[ i ] - DFMin ) / DFAlphaNorm;
dst[ i ] = ( src[ i ] - DFMin ) / DFAlphaNorm;
}
}

View File

@@ -6,6 +6,14 @@
Centre for Digital Music, Queen Mary, University of London.
This file 2005-2006 Christian Landone.
Modifications:
- delta threshold
Description: add delta threshold used as offset in the smoothed
detection function
Author: Mathieu Barthet
Date: June 2010
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
@@ -20,17 +28,31 @@
#include "FiltFilt.h"
struct DFProcConfig{
unsigned int length;
unsigned int LPOrd;
double *LPACoeffs;
double *LPBCoeffs;
unsigned int length;
unsigned int LPOrd;
double *LPACoeffs;
double *LPBCoeffs;
unsigned int winPre;
unsigned int winPost;
unsigned int winPost;
double AlphaNormParam;
bool isMedianPositive;
float delta; //delta threshold used as an offset when computing the smoothed detection function
DFProcConfig() :
length(0),
LPOrd(0),
LPACoeffs(NULL),
LPBCoeffs(NULL),
winPre(0),
winPost(0),
AlphaNormParam(0),
isMedianPositive(false),
delta(0)
{
}
};
class DFProcess
class DFProcess
{
public:
DFProcess( DFProcConfig Config );
@@ -38,7 +60,7 @@ public:
void process( double* src, double* dst );
private:
void initialise( DFProcConfig Config );
void deInitialise();
@@ -59,11 +81,12 @@ private:
double* m_filtScratchIn;
double* m_filtScratchOut;
FiltFiltConfig m_FilterConfigParams;
FilterConfig m_FilterConfigParams;
FiltFilt* m_FiltFilt;
bool m_isMedianPositive;
float m_delta; //add delta threshold
};
#endif

View File

@@ -19,12 +19,12 @@
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
FiltFilt::FiltFilt( FiltFiltConfig Config )
FiltFilt::FiltFilt( FilterConfig Config )
{
m_filtScratchIn = NULL;
m_filtScratchOut = NULL;
m_ord = 0;
initialise( Config );
}
@@ -33,13 +33,13 @@ FiltFilt::~FiltFilt()
deInitialise();
}
void FiltFilt::initialise( FiltFiltConfig Config )
void FiltFilt::initialise( FilterConfig Config )
{
m_ord = Config.ord;
m_filterConfig.ord = Config.ord;
m_filterConfig.ACoeffs = Config.ACoeffs;
m_filterConfig.BCoeffs = Config.BCoeffs;
m_filter = new Filter( m_filterConfig );
}
@@ -50,7 +50,7 @@ void FiltFilt::deInitialise()
void FiltFilt::process(double *src, double *dst, unsigned int length)
{
{
unsigned int i;
if (length == 0) return;
@@ -62,8 +62,8 @@ void FiltFilt::process(double *src, double *dst, unsigned int length)
m_filtScratchIn = new double[ nExt ];
m_filtScratchOut = new double[ nExt ];
for( i = 0; i< nExt; i++ )
for( i = 0; i< nExt; i++ )
{
m_filtScratchIn[ i ] = 0.0;
m_filtScratchOut[ i ] = 0.0;
@@ -89,21 +89,21 @@ void FiltFilt::process(double *src, double *dst, unsigned int length)
{
m_filtScratchIn[ i + nFact ] = src[ i ];
}
////////////////////////////////
// Do 0Ph filtering
m_filter->process( m_filtScratchIn, m_filtScratchOut, nExt);
// reverse the series for FILTFILT
// reverse the series for FILTFILT
for ( i = 0; i < nExt; i++)
{
{
m_filtScratchIn[ i ] = m_filtScratchOut[ nExt - i - 1];
}
// do FILTER again
// do FILTER again
m_filter->process( m_filtScratchIn, m_filtScratchOut, nExt);
// reverse the series back
// reverse the series back
for ( i = 0; i < nExt; i++)
{
m_filtScratchIn[ i ] = m_filtScratchOut[ nExt - i - 1 ];
@@ -117,7 +117,7 @@ void FiltFilt::process(double *src, double *dst, unsigned int length)
for( i = 0; i < length; i++ )
{
dst[ index++ ] = m_filtScratchOut[ i + nFact ];
}
}
delete [] m_filtScratchIn;
delete [] m_filtScratchOut;

View File

@@ -18,23 +18,22 @@
#include "Filter.h"
struct FiltFiltConfig{
unsigned int ord;
double* ACoeffs;
double* BCoeffs;
};
class FiltFilt
/**
* Zero-phase digital filter, implemented by processing the data
* through a filter specified by the given FilterConfig structure (see
* Filter) and then processing it again in reverse.
*/
class FiltFilt
{
public:
FiltFilt( FiltFiltConfig Config );
FiltFilt( FilterConfig Config );
virtual ~FiltFilt();
void reset();
void process( double* src, double* dst, unsigned int length );
private:
void initialise( FiltFiltConfig Config );
void initialise( FilterConfig Config );
void deInitialise();
unsigned int m_ord;

View File

@@ -20,13 +20,22 @@
#define NULL 0
#endif
/**
* Filter specification. For a filter of order ord, the ACoeffs and
* BCoeffs arrays must point to ord+1 values each. ACoeffs provides
* the denominator and BCoeffs the numerator coefficients of the
* filter.
*/
struct FilterConfig{
unsigned int ord;
double* ACoeffs;
double* BCoeffs;
};
class Filter
/**
* Digital filter specified through FilterConfig structure.
*/
class Filter
{
public:
Filter( FilterConfig Config );
@@ -36,7 +45,6 @@ public:
void process( double *src, double *dst, unsigned int length );
private:
void initialise( FilterConfig Config );
void deInitialise();

View File

@@ -44,14 +44,14 @@ void Framer::configure( unsigned int frameLength, unsigned int hop )
if( m_dataFrame != NULL )
{
delete [] m_dataFrame;
delete [] m_dataFrame;
m_dataFrame = NULL;
}
m_dataFrame = new double[ m_frameLength ];
if( m_strideFrame != NULL )
{
delete [] m_strideFrame;
delete [] m_strideFrame;
m_strideFrame = NULL;
}
m_strideFrame = new double[ m_stepSize ];
@@ -64,8 +64,8 @@ void Framer::getFrame(double *dst)
{
for( unsigned int u = 0; u < m_frameLength; u++)
{
dst[ u ] = m_srcBuffer[ m_ulSrcIndex++ ];
}
dst[ u ] = m_srcBuffer[ m_ulSrcIndex++ ];
}
m_ulSrcIndex -= ( m_frameLength - m_stepSize );
}
else
@@ -77,7 +77,7 @@ void Framer::getFrame(double *dst)
{
dst[ u ] = m_srcBuffer[ m_ulSrcIndex++ ];
}
for( unsigned int u = 0; u < zero; u++ )
{
dst[ rem + u ] = 0;

View File

@@ -21,7 +21,7 @@
#include <stdio.h>
class Framer
class Framer
{
public:
void setSource( double* src, unsigned int length );

View File

@@ -44,7 +44,10 @@ DownBeat::DownBeat(float originalSampleRate,
// 16x decimation, which is our expected normal situation)
m_beatframesize = MathUtilities::nextPowerOfTwo
(int((m_rate / decimationFactor) * 1.3));
// std::cerr << "rate = " << m_rate << ", bfs = " << m_beatframesize << std::endl;
if (m_beatframesize < 2) {
m_beatframesize = 2;
}
// std::cerr << "rate = " << m_rate << ", dec = " << decimationFactor << ", bfs = " << m_beatframesize << std::endl;
m_beatframe = new double[m_beatframesize];
m_fftRealOut = new double[m_beatframesize];
m_fftImagOut = new double[m_beatframesize];
@@ -122,7 +125,7 @@ DownBeat::pushAudioBlock(const float *audio)
// std::cerr << "pushAudioBlock: rms in " << sqrt(rmsin) << ", out " << sqrt(rmsout) << std::endl;
m_buffill += m_increment / m_factor;
}
const float *
DownBeat::getBufferedAudio(size_t &length) const
{
@@ -192,9 +195,9 @@ DownBeat::findDownBeats(const float *audio,
}
// Now FFT beat frame
m_fft->process(false, m_beatframe, m_fftRealOut, m_fftImagOut);
m_fft->forward(m_beatframe, m_fftRealOut, m_fftImagOut);
// Calculate magnitudes
for (size_t j = 0; j < m_beatframesize/2; ++j) {
@@ -257,7 +260,7 @@ DownBeat::measureSpecDiff(d_vec_t oldspec, d_vec_t newspec)
{
// JENSEN-SHANNON DIVERGENCE BETWEEN SPECTRAL FRAMES
unsigned int SPECSIZE = 512; // ONLY LOOK AT FIRST 512 SAMPLES OF SPECTRUM.
unsigned int SPECSIZE = 512; // ONLY LOOK AT FIRST 512 SAMPLES OF SPECTRUM.
if (SPECSIZE > oldspec.size()/4) {
SPECSIZE = oldspec.size()/4;
}
@@ -266,37 +269,37 @@ DownBeat::measureSpecDiff(d_vec_t oldspec, d_vec_t newspec)
double sumnew = 0.;
double sumold = 0.;
for (unsigned int i = 0;i < SPECSIZE;i++)
{
newspec[i] +=EPS;
oldspec[i] +=EPS;
sumnew+=newspec[i];
sumold+=oldspec[i];
}
}
for (unsigned int i = 0;i < SPECSIZE;i++)
{
newspec[i] /= (sumnew);
oldspec[i] /= (sumold);
// IF ANY SPECTRAL VALUES ARE 0 (SHOULDN'T BE ANY!) SET THEM TO 1
if (newspec[i] == 0)
{
newspec[i] = 1.;
}
if (oldspec[i] == 0)
{
oldspec[i] = 1.;
}
// JENSEN-SHANNON CALCULATION
sd1 = 0.5*oldspec[i] + 0.5*newspec[i];
sd1 = 0.5*oldspec[i] + 0.5*newspec[i];
SD = SD + (-sd1*log(sd1)) + (0.5*(oldspec[i]*log(oldspec[i]))) + (0.5*(newspec[i]*log(newspec[i])));
}
return SD;
}

View File

@@ -17,6 +17,7 @@
#define DOWNBEAT_H
#include <vector>
#include <cstddef>
#include "dsp/rateconversion/Decimator.h"
@@ -28,7 +29,7 @@ class FFTReal;
* This class takes an input audio signal and a sequence of beat
* locations (calculated e.g. by TempoTrackV2) and estimates which of
* the beat locations are downbeats (first beat of the bar).
*
*
* The input audio signal is expected to have been downsampled to a
* very low sampling rate (e.g. 2700Hz). A utility function for
* downsampling and buffering incoming block-by-block audio is
@@ -56,7 +57,7 @@ public:
/**
* Estimate which beats are down-beats.
*
*
* audio contains the input audio stream after downsampling, and
* audioLength contains the number of samples in this downsampled
* stream.
@@ -83,18 +84,18 @@ public:
* and the region following it.
*/
void getBeatSD(vector<double> &beatsd) const;
/**
* For your downsampling convenience: call this function
* repeatedly with input audio blocks containing dfIncrement
* samples at the original sample rate, to decimate them to the
* downsampled rate and buffer them within the DownBeat class.
*
*
* Call getBufferedAudio() to retrieve the results after all
* blocks have been processed.
*/
void pushAudioBlock(const float *audio);
/**
* Retrieve the accumulated audio produced by pushAudioBlock calls.
*/

File diff suppressed because it is too large Load Diff

View File

@@ -5,11 +5,11 @@
Centre for Digital Music, Queen Mary, University of London.
This file 2005-2006 Christian Landone.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
*/
@@ -31,7 +31,7 @@ using std::vector;
struct WinThresh
{
unsigned int pre;
unsigned int post;
unsigned int post;
};
struct TTParams

View File

@@ -91,10 +91,17 @@ TempoTrackV2::filter_df(d_vec_t &df)
}
// MEPD 28/11/12
// This function now allows for a user to specify an inputtempo (in BPM)
// and a flag "constraintempo" which replaces the general rayleigh weighting for periodicities
// with a gaussian which is centered around the input tempo
// Note, if inputtempo = 120 and constraintempo = false, then functionality is
// as it was before
void
TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
vector<double> &beat_period,
vector<double> &tempi)
vector<double> &tempi,
double inputtempo, bool constraintempo)
{
// to follow matlab.. split into 512 sample frames with a 128 hop size
// calculate the acf,
@@ -103,13 +110,42 @@ TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
// and get best path
unsigned int wv_len = 128;
double rayparam = 43.;
// MEPD 28/11/12
// the default value of inputtempo in the beat tracking plugin is 120
// so if the user specifies a different inputtempo, the rayparam will be updated
// accordingly.
// note: 60*44100/512 is a magic number
// this might (will?) break if a user specifies a different frame rate for the onset detection function
double rayparam = (60*44100/512)/inputtempo;
// these debug statements can be removed.
// std::cerr << "inputtempo" << inputtempo << std::endl;
// std::cerr << "rayparam" << rayparam << std::endl;
// std::cerr << "constraintempo" << constraintempo << std::endl;
// make rayleigh weighting curve
d_vec_t wv(wv_len);
for (unsigned int i=0; i<wv.size(); i++)
// check whether or not to use rayleigh weighting (if constraintempo is false)
// or use gaussian weighting it (constraintempo is true)
if (constraintempo)
{
wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
for (unsigned int i=0; i<wv.size(); i++)
{
// MEPD 28/11/12
// do a gaussian weighting instead of rayleigh
wv[i] = exp( (-1.*pow((static_cast<double> (i)-rayparam),2.)) / (2.*pow(rayparam/4.,2.)) );
}
}
else
{
for (unsigned int i=0; i<wv.size(); i++)
{
// MEPD 28/11/12
// standard rayleigh weighting over periodicities
wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
}
}
// beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
@@ -397,10 +433,14 @@ TempoTrackV2::normalise_vec(d_vec_t &df)
}
}
// MEPD 28/11/12
// this function has been updated to allow the "alpha" and "tightness" parameters
// of the dynamic program to be set by the user
// the default value of alpha = 0.9 and tightness = 4
void
TempoTrackV2::calculateBeats(const vector<double> &df,
const vector<double> &beat_period,
vector<double> &beats)
vector<double> &beats, double alpha, double tightness)
{
if (df.empty() || beat_period.empty()) return;
@@ -414,8 +454,12 @@ TempoTrackV2::calculateBeats(const vector<double> &df,
backlink[i] = -1;
}
double tightness = 4.;
double alpha = 0.9;
//double tightness = 4.;
//double alpha = 0.9;
// MEPD 28/11/12
// debug statements that can be removed.
// std::cerr << "alpha" << alpha << std::endl;
// std::cerr << "tightness" << tightness << std::endl;
// main loop
for (unsigned int i=0; i<localscore.size(); i++)
@@ -462,7 +506,7 @@ TempoTrackV2::calculateBeats(const vector<double> &df,
int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
// can happen if no results obtained earlier (e.g. input too short)
if (startpoint >= backlink.size()) startpoint = backlink.size()-1;
if (startpoint >= (int)backlink.size()) startpoint = backlink.size()-1;
// USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
// BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0

View File

@@ -18,8 +18,7 @@
#define TEMPOTRACKV2_H
#include <vector>
using std::vector;
using namespace std;
//!!! Question: how far is this actually sample rate dependent? I
// think it does produce plausible results for e.g. 48000 as well as
@@ -40,15 +39,35 @@ public:
TempoTrackV2(float sampleRate, size_t dfIncrement);
~TempoTrackV2();
// Returned beat periods are given in df increment units; tempi in bpm
// Returned beat periods are given in df increment units; inputtempo and tempi in bpm
void calculateBeatPeriod(const vector<double> &df,
vector<double> &beatPeriod,
vector<double> &tempi);
vector<double> &tempi) {
calculateBeatPeriod(df, beatPeriod, tempi, 120.0, false);
}
// Returned beat periods are given in df increment units; inputtempo and tempi in bpm
// MEPD 28/11/12 Expose inputtempo and constraintempo parameters
// Note, if inputtempo = 120 and constraintempo = false, then functionality is as it was before
void calculateBeatPeriod(const vector<double> &df,
vector<double> &beatPeriod,
vector<double> &tempi,
double inputtempo, bool constraintempo);
// Returned beat positions are given in df increment units
void calculateBeats(const vector<double> &df,
const vector<double> &beatPeriod,
vector<double> &beats);
vector<double> &beats) {
calculateBeats(df, beatPeriod, beats, 0.9, 4.0);
}
// Returned beat positions are given in df increment units
// MEPD 28/11/12 Expose alpha and tightness parameters
// Note, if alpha = 0.9 and tightness = 4, then functionality is as it was before
void calculateBeats(const vector<double> &df,
const vector<double> &beatPeriod,
vector<double> &beats,
double alpha, double tightness);
private:
typedef vector<int> i_vec_t;

View File

@@ -16,7 +16,7 @@
#include "ChangeDetectionFunction.h"
#ifndef PI
#define PI (3.14159265358979323846)
#define PI (3.14159265358979232846)
#endif
@@ -34,20 +34,20 @@ ChangeDetectionFunction::~ChangeDetectionFunction()
void ChangeDetectionFunction::setFilterWidth(const int iWidth)
{
m_iFilterWidth = iWidth*2+1;
// it is assumed that the gaussian is 0 outside of +/- FWHM
// => filter width = 2*FWHM = 2*2.3548*sigma
m_dFilterSigma = double(m_iFilterWidth) / double(2*2.3548);
m_vaGaussian.resize(m_iFilterWidth);
double dScale = 1.0 / (m_dFilterSigma*sqrt(2*PI));
for (int x = -(m_iFilterWidth-1)/2; x <= (m_iFilterWidth-1)/2; x++)
{
double w = dScale * std::exp ( -(x*x)/(2*m_dFilterSigma*m_dFilterSigma) );
m_vaGaussian[x + (m_iFilterWidth-1)/2] = w;
}
#ifdef DEBUG_CHANGE_DETECTION_FUNCTION
std::cerr << "Filter sigma: " << m_dFilterSigma << std::endl;
std::cerr << "Filter width: " << m_iFilterWidth << std::endl;
@@ -59,37 +59,37 @@ ChangeDistance ChangeDetectionFunction::process(const TCSGram& rTCSGram)
{
ChangeDistance retVal;
retVal.resize(rTCSGram.getSize(), 0.0);
TCSGram smoothedTCSGram;
for (int iPosition = 0; iPosition < rTCSGram.getSize(); iPosition++)
{
int iSkipLower = 0;
int iLowerPos = iPosition - (m_iFilterWidth-1)/2;
int iUpperPos = iPosition + (m_iFilterWidth-1)/2;
if (iLowerPos < 0)
{
iSkipLower = -iLowerPos;
iLowerPos = 0;
}
if (iUpperPos >= rTCSGram.getSize())
{
int iMaxIndex = rTCSGram.getSize() - 1;
iUpperPos = iMaxIndex;
}
TCSVector smoothedVector;
// for every bin of the vector, calculate the smoothed value
for (int iPC = 0; iPC < 6; iPC++)
{
{
size_t j = 0;
double dSmoothedValue = 0.0;
TCSVector rCV;
for (int i = iLowerPos; i <= iUpperPos; i++)
{
rTCSGram.getTCSVector(i, rCV);
@@ -98,7 +98,7 @@ ChangeDistance ChangeDetectionFunction::process(const TCSGram& rTCSGram)
smoothedVector[iPC] = dSmoothedValue;
}
smoothedTCSGram.addTCSVector(smoothedVector);
}
@@ -109,10 +109,10 @@ ChangeDistance ChangeDetectionFunction::process(const TCSGram& rTCSGram)
if the current estimate is not confident enough, look further into the future/the past
e.g., High frequency content, zero crossing rate, spectral flatness
*/
TCSVector nextTCS;
TCSVector previousTCS;
int iWindow = 1;
// while (previousTCS.magnitude() < 0.1 && (iPosition-iWindow) > 0)
@@ -121,9 +121,9 @@ ChangeDistance ChangeDetectionFunction::process(const TCSGram& rTCSGram)
// std::cout << previousTCS.magnitude() << std::endl;
iWindow++;
}
iWindow = 1;
// while (nextTCS.magnitude() < 0.1 && (iPosition+iWindow) < (rTCSGram.getSize()-1) )
{
smoothedTCSGram.getTCSVector(iPosition+iWindow, nextTCS);
@@ -136,7 +136,7 @@ ChangeDistance ChangeDetectionFunction::process(const TCSGram& rTCSGram)
{
distance += std::pow(nextTCS[j] - previousTCS[j], 2.0);
}
retVal[iPosition] = std::pow(distance, 0.5);
}

View File

@@ -38,7 +38,7 @@ public:
ChangeDistance process(const TCSGram& rTCSGram);
private:
void setFilterWidth(const int iWidth);
private:
valarray<double> m_vaGaussian;
double m_dFilterSigma;

View File

@@ -34,7 +34,7 @@ TCSGram::~TCSGram()
void TCSGram::getTCSVector(int iPosition, TCSVector& rTCSVector) const
{
if (iPosition < 0)
if (iPosition < 0)
rTCSVector = TCSVector();
else if (iPosition >= m_VectorList.size())
rTCSVector = TCSVector();
@@ -52,10 +52,10 @@ void TCSGram::addTCSVector(const TCSVector& rTCSVector)
{
size_t uSize = m_VectorList.size();
long lMilliSeconds = static_cast<long>(uSize*m_dFrameDurationMS);
std::pair<long, TCSVector> p;
std::pair<long, TCSVector> p;
p.first = lMilliSeconds;
p.second = rTCSVector;
m_VectorList.push_back(p);
}
@@ -68,7 +68,7 @@ long TCSGram::getDuration() const
void TCSGram::printDebug()
{
vectorlist_t::iterator vectorIterator = m_VectorList.begin();
while (vectorIterator != m_VectorList.end())
{
vectorIterator->second.printDebug();

View File

@@ -26,7 +26,7 @@ typedef std::vector<std::pair<long, TCSVector> > vectorlist_t;
class TCSGram
{
public:
public:
TCSGram();
~TCSGram();
void getTCSVector(int, TCSVector&) const;

View File

@@ -19,7 +19,7 @@
#include <iostream>
#ifndef PI
#define PI (3.14159265358979323846)
#define PI (3.14159265358979232846)
#endif
TonalEstimator::TonalEstimator()
@@ -27,15 +27,15 @@ TonalEstimator::TonalEstimator()
m_Basis.resize(6);
int i = 0;
// circle of fifths
m_Basis[i].resize(12);
for (int iP = 0; iP < 12; iP++)
{
m_Basis[i][iP] = std::sin( (7.0 / 6.0) * iP * PI);
}
i++;
m_Basis[i].resize(12);
@@ -43,17 +43,17 @@ TonalEstimator::TonalEstimator()
{
m_Basis[i][iP] = std::cos( (7.0 / 6.0) * iP * PI);
}
i++;
// circle of major thirds
m_Basis[i].resize(12);
for (int iP = 0; iP < 12; iP++)
{
m_Basis[i][iP] = 0.6 * std::sin( (2.0 / 3.0) * iP * PI);
}
i++;
m_Basis[i].resize(12);
@@ -71,7 +71,7 @@ TonalEstimator::TonalEstimator()
{
m_Basis[i][iP] = 1.1 * std::sin( (3.0 / 2.0) * iP * PI);
}
i++;
m_Basis[i].resize(12);
@@ -90,7 +90,7 @@ TCSVector TonalEstimator::transform2TCS(const ChromaVector& rVector)
{
TCSVector vaRetVal;
vaRetVal.resize(6, 0.0);
for (int i = 0; i < 6; i++)
{
for (int iP = 0; iP < 12; iP++)
@@ -98,6 +98,6 @@ TCSVector TonalEstimator::transform2TCS(const ChromaVector& rVector)
vaRetVal[i] += m_Basis[i][iP] * rVector[iP];
}
}
return vaRetVal;
}

View File

@@ -27,24 +27,24 @@ class ChromaVector : public std::valarray<double>
public:
ChromaVector(size_t uSize = 12) : std::valarray<double>()
{ resize(uSize, 0.0f); }
virtual ~ChromaVector() {};
void printDebug()
{
for (int i = 0; i < size(); i++)
{
std::cout << (*this)[i] << ";";
}
std::cout << std::endl;
}
void normalizeL1()
{
// normalize the chroma vector (L1 norm)
double dSum = 0.0;
for (size_t i = 0; i < 12; (dSum += std::abs((*this)[i++]))) ;
for (size_t i = 0; i < 12; dSum > 0.0000001?((*this)[i] /= dSum):(*this)[i]=0.0, i++) ;
@@ -54,8 +54,8 @@ public:
{
for (size_t i = 0; i < 12; ++i) (*this)[i] = 0.0;
}
};
class TCSVector : public std::valarray<double>
@@ -63,7 +63,7 @@ class TCSVector : public std::valarray<double>
public:
TCSVector() : std::valarray<double>()
{ resize(6, 0.0f); }
virtual ~TCSVector() {};
void printDebug()
@@ -72,19 +72,19 @@ public:
{
std::cout << (*this)[i] << ";";
}
std::cout << std::endl;
}
double magnitude() const
{
double dMag = 0.0;
for (size_t i = 0; i < 6; i++)
{
dMag += std::pow((*this)[i], 2.0);
}
return std::sqrt(dMag);
}

View File

@@ -4,178 +4,199 @@
QM DSP Library
Centre for Digital Music, Queen Mary, University of London.
This file is based on Don Cross's public domain FFT implementation.
*/
#include "FFT.h"
#include "maths/MathUtilities.h"
#include "kiss_fft.h"
#include "kiss_fftr.h"
#include <cmath>
#include <iostream>
FFT::FFT(unsigned int n) :
m_n(n),
m_private(0)
#include <stdexcept>
class FFT::D
{
if( !MathUtilities::isPowerOfTwo(m_n) )
{
std::cerr << "ERROR: FFT: Non-power-of-two FFT size "
<< m_n << " not supported in this implementation"
<< std::endl;
return;
public:
D(int n) : m_n(n) {
m_planf = kiss_fft_alloc(m_n, 0, NULL, NULL);
m_plani = kiss_fft_alloc(m_n, 1, NULL, NULL);
m_kin = new kiss_fft_cpx[m_n];
m_kout = new kiss_fft_cpx[m_n];
}
~D() {
kiss_fft_free(m_planf);
kiss_fft_free(m_plani);
delete[] m_kin;
delete[] m_kout;
}
void process(bool inverse,
const double *ri,
const double *ii,
double *ro,
double *io) {
for (int i = 0; i < m_n; ++i) {
m_kin[i].r = ri[i];
m_kin[i].i = (ii ? ii[i] : 0.0);
}
if (!inverse) {
kiss_fft(m_planf, m_kin, m_kout);
for (int i = 0; i < m_n; ++i) {
ro[i] = m_kout[i].r;
io[i] = m_kout[i].i;
}
} else {
kiss_fft(m_plani, m_kin, m_kout);
double scale = 1.0 / m_n;
for (int i = 0; i < m_n; ++i) {
ro[i] = m_kout[i].r * scale;
io[i] = m_kout[i].i * scale;
}
}
}
private:
int m_n;
kiss_fft_cfg m_planf;
kiss_fft_cfg m_plani;
kiss_fft_cpx *m_kin;
kiss_fft_cpx *m_kout;
};
FFT::FFT(int n) :
m_d(new D(n))
{
}
FFT::~FFT()
{
delete m_d;
}
FFTReal::FFTReal(unsigned int n) :
m_n(n),
m_private_real(0)
void
FFT::process(bool inverse,
const double *p_lpRealIn, const double *p_lpImagIn,
double *p_lpRealOut, double *p_lpImagOut)
{
m_d->process(inverse,
p_lpRealIn, p_lpImagIn,
p_lpRealOut, p_lpImagOut);
}
class FFTReal::D
{
public:
D(int n) : m_n(n) {
if (n % 2) {
throw std::invalid_argument
("nsamples must be even in FFTReal constructor");
}
m_planf = kiss_fftr_alloc(m_n, 0, NULL, NULL);
m_plani = kiss_fftr_alloc(m_n, 1, NULL, NULL);
m_c = new kiss_fft_cpx[m_n];
}
~D() {
kiss_fftr_free(m_planf);
kiss_fftr_free(m_plani);
delete[] m_c;
}
void forward(const double *ri, double *ro, double *io) {
kiss_fftr(m_planf, ri, m_c);
for (int i = 0; i <= m_n/2; ++i) {
ro[i] = m_c[i].r;
io[i] = m_c[i].i;
}
for (int i = 0; i + 1 < m_n/2; ++i) {
ro[m_n - i - 1] = ro[i + 1];
io[m_n - i - 1] = -io[i + 1];
}
}
void forwardMagnitude(const double *ri, double *mo) {
double *io = new double[m_n];
forward(ri, mo, io);
for (int i = 0; i < m_n; ++i) {
mo[i] = sqrt(mo[i] * mo[i] + io[i] * io[i]);
}
delete[] io;
}
void inverse(const double *ri, const double *ii, double *ro) {
// kiss_fftr.h says
// "input freqdata has nfft/2+1 complex points"
for (int i = 0; i < m_n/2 + 1; ++i) {
m_c[i].r = ri[i];
m_c[i].i = ii[i];
}
kiss_fftri(m_plani, m_c, ro);
double scale = 1.0 / m_n;
for (int i = 0; i < m_n; ++i) {
ro[i] *= scale;
}
}
private:
int m_n;
kiss_fftr_cfg m_planf;
kiss_fftr_cfg m_plani;
kiss_fft_cpx *m_c;
};
FFTReal::FFTReal(int n) :
m_d(new D(n))
{
m_private_real = new FFT(m_n);
}
FFTReal::~FFTReal()
{
delete (FFT *)m_private_real;
delete m_d;
}
void
FFTReal::process(bool inverse,
const double *realIn,
double *realOut, double *imagOut)
FFTReal::forward(const double *ri, double *ro, double *io)
{
((FFT *)m_private_real)->process(inverse, realIn, 0, realOut, imagOut);
}
static unsigned int numberOfBitsNeeded(unsigned int p_nSamples)
{
int i;
if( p_nSamples < 2 )
{
return 0;
}
for ( i=0; ; i++ )
{
if( p_nSamples & (1 << i) ) return i;
}
}
static unsigned int reverseBits(unsigned int p_nIndex, unsigned int p_nBits)
{
unsigned int i, rev;
for(i=rev=0; i < p_nBits; i++)
{
rev = (rev << 1) | (p_nIndex & 1);
p_nIndex >>= 1;
}
return rev;
m_d->forward(ri, ro, io);
}
void
FFT::process(bool p_bInverseTransform,
const double *p_lpRealIn, const double *p_lpImagIn,
double *p_lpRealOut, double *p_lpImagOut)
FFTReal::forwardMagnitude(const double *ri, double *mo)
{
if (!p_lpRealIn || !p_lpRealOut || !p_lpImagOut) return;
// std::cerr << "FFT::process(" << m_n << "," << p_bInverseTransform << ")" << std::endl;
unsigned int NumBits;
unsigned int i, j, k, n;
unsigned int BlockSize, BlockEnd;
double angle_numerator = 2.0 * M_PI;
double tr, ti;
if( !MathUtilities::isPowerOfTwo(m_n) )
{
std::cerr << "ERROR: FFT::process: Non-power-of-two FFT size "
<< m_n << " not supported in this implementation"
<< std::endl;
return;
}
if( p_bInverseTransform ) angle_numerator = -angle_numerator;
NumBits = numberOfBitsNeeded ( m_n );
for( i=0; i < m_n; i++ )
{
j = reverseBits ( i, NumBits );
p_lpRealOut[j] = p_lpRealIn[i];
p_lpImagOut[j] = (p_lpImagIn == 0) ? 0.0 : p_lpImagIn[i];
}
BlockEnd = 1;
for( BlockSize = 2; BlockSize <= m_n; BlockSize <<= 1 )
{
double delta_angle = angle_numerator / (double)BlockSize;
double sm2 = -sin ( -2 * delta_angle );
double sm1 = -sin ( -delta_angle );
double cm2 = cos ( -2 * delta_angle );
double cm1 = cos ( -delta_angle );
double w = 2 * cm1;
double ar[3], ai[3];
for( i=0; i < m_n; i += BlockSize )
{
ar[2] = cm2;
ar[1] = cm1;
ai[2] = sm2;
ai[1] = sm1;
for ( j=i, n=0; n < BlockEnd; j++, n++ )
{
ar[0] = w*ar[1] - ar[2];
ar[2] = ar[1];
ar[1] = ar[0];
ai[0] = w*ai[1] - ai[2];
ai[2] = ai[1];
ai[1] = ai[0];
k = j + BlockEnd;
tr = ar[0]*p_lpRealOut[k] - ai[0]*p_lpImagOut[k];
ti = ar[0]*p_lpImagOut[k] + ai[0]*p_lpRealOut[k];
p_lpRealOut[k] = p_lpRealOut[j] - tr;
p_lpImagOut[k] = p_lpImagOut[j] - ti;
p_lpRealOut[j] += tr;
p_lpImagOut[j] += ti;
}
}
BlockEnd = BlockSize;
}
if( p_bInverseTransform )
{
double denom = (double)m_n;
for ( i=0; i < m_n; i++ )
{
p_lpRealOut[i] /= denom;
p_lpImagOut[i] /= denom;
}
}
m_d->forwardMagnitude(ri, mo);
}
void
FFTReal::inverse(const double *ri, const double *ii, double *ro)
{
m_d->inverse(ri, ii, ro);
}

View File

@@ -9,34 +9,97 @@
#ifndef FFT_H
#define FFT_H
class FFT
class FFT
{
public:
FFT(unsigned int nsamples);
virtual ~FFT();
/**
* Construct an FFT object to carry out complex-to-complex
* transforms of size nsamples. nsamples does not have to be a
* power of two.
*/
FFT(int nsamples);
~FFT();
/**
* Carry out a forward or inverse transform (depending on the
* value of inverse) of size nsamples, where nsamples is the value
* provided to the constructor above.
*
* realIn and (where present) imagIn should contain nsamples each,
* and realOut and imagOut should point to enough space to receive
* nsamples each.
*
* imagIn may be NULL if the signal is real, but the other
* pointers must be valid.
*
* The inverse transform is scaled by 1/nsamples.
*/
void process(bool inverse,
const double *realIn, const double *imagIn,
double *realOut, double *imagOut);
private:
unsigned int m_n;
void *m_private;
class D;
D *m_d;
};
class FFTReal
{
public:
FFTReal(unsigned int nsamples);
/**
* Construct an FFT object to carry out real-to-complex transforms
* of size nsamples. nsamples does not have to be a power of two,
* but it does have to be even. (Use the complex-complex FFT above
* if you need an odd FFT size. This constructor will throw
* std::invalid_argument if nsamples is odd.)
*/
FFTReal(int nsamples);
~FFTReal();
void process(bool inverse,
const double *realIn,
/**
* Carry out a forward real-to-complex transform of size nsamples,
* where nsamples is the value provided to the constructor above.
*
* realIn, realOut, and imagOut must point to (enough space for)
* nsamples values. For consistency with the FFT class above, and
* compatibility with existing code, the conjugate half of the
* output is returned even though it is redundant.
*/
void forward(const double *realIn,
double *realOut, double *imagOut);
/**
* Carry out a forward real-to-complex transform of size nsamples,
* where nsamples is the value provided to the constructor
* above. Return only the magnitudes of the complex output values.
*
* realIn and magOut must point to (enough space for) nsamples
* values. For consistency with the FFT class above, and
* compatibility with existing code, the conjugate half of the
* output is returned even though it is redundant.
*/
void forwardMagnitude(const double *realIn, double *magOut);
/**
* Carry out an inverse real transform (i.e. complex-to-real) of
* size nsamples, where nsamples is the value provided to the
* constructor above.
*
* realIn and imagIn should point to at least nsamples/2+1 values;
* if more are provided, only the first nsamples/2+1 values of
* each will be used (the conjugate half will always be deduced
* from the first nsamples/2+1 rather than being read from the
* input data). realOut should point to enough space to receive
* nsamples values.
*
* The inverse transform is scaled by 1/nsamples.
*/
void inverse(const double *realIn, const double *imagIn,
double *realOut);
private:
unsigned int m_n;
void *m_private_real;
};
class D;
D *m_d;
};
#endif

View File

@@ -13,10 +13,6 @@
COPYING included with this distribution for more information.
*/
#ifdef COMPILER_MSVC
#pragma warning(disable:4305)
#endif
#include "Wavelet.h"
#include <cassert>
@@ -81,11 +77,11 @@ Wavelet::createDecompositionFilters(Type wavelet,
lpd.clear();
hpd.clear();
unsigned int flength = 0;
int flength = 0;
switch (wavelet) {
case Haar:
case Haar:
lpd.push_back(0.70710678118655);
lpd.push_back(0.70710678118655);
hpd.push_back(-0.70710678118655);
@@ -103,7 +99,7 @@ Wavelet::createDecompositionFilters(Type wavelet,
hpd.push_back(-0.22414386804186);
hpd.push_back(-0.12940952255092);
flength = 4;
break;
break;
case Daubechies_3:
lpd.push_back(0.03522629188210);
@@ -592,7 +588,7 @@ Wavelet::createDecompositionFilters(Type wavelet,
hpd.push_back(-0.00000000000000);
flength = 80;
break;
case Symlet_2:
lpd.push_back(-0.12940952255092);
lpd.push_back(0.22414386804186);
@@ -692,7 +688,7 @@ Wavelet::createDecompositionFilters(Type wavelet,
hpd.push_back(0.01540410932703);
flength = 12;
break;
case Symlet_7:
lpd.push_back(0.00268181456826);
lpd.push_back(-0.00104738488868);