forked from Sneeds-Feed-and-Seed/sneedacity
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathVoiceKey.h
104 lines (74 loc) · 3.48 KB
/
VoiceKey.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
/***************************************************************************
Sneedacity: A Digtial Audio Editor
VoiceKey.h: a class implementing a voice key
(c) 2002-2005 Shane T. Mueller
Distributed under the terms of the GPL Version 2 or later.
***************************************************************************/
#ifndef __SNEEDACITY_VOICEKEY__
#define __SNEEDACITY_VOICEKEY__
#ifndef M_PI
#define M_PI 3.14159265358979323846 /* pi */
#endif
#include "sneedacity/Types.h"
class WaveTrack;
enum VoiceKeyTypes
{
VKT_NONE = 0,
VKT_ENERGY = 1,
VKT_SIGN_CHANGES_LOW = 2,
VKT_SIGN_CHANGES_HIGH = 4,
VKT_DIRECTION_CHANGES_LOW = 8,
VKT_DIRECTION_CHANGES_HIGH = 16
};
class VoiceKey {
public:
VoiceKey();
~VoiceKey();
sampleCount OnForward (const WaveTrack & t, sampleCount start, sampleCount len);
sampleCount OnBackward (const WaveTrack & t, sampleCount start, sampleCount len);
sampleCount OffForward (const WaveTrack & t, sampleCount start, sampleCount len);
sampleCount OffBackward (const WaveTrack & t, sampleCount start, sampleCount len);
void CalibrateNoise(const WaveTrack & t, sampleCount start, sampleCount len);
void AdjustThreshold(double t);
bool AboveThreshold(const WaveTrack & t, sampleCount start,sampleCount len);
void SetKeyType(bool erg, bool scLow, bool scHigh,
bool dcLow, bool dcHigh);
private:
double mWindowSize; //Size of analysis window, in milliseconds
double mThresholdAdjustment; //User-accessible sensitivity calibration variable
double mEnergyMean;
double mEnergySD;
double mSignChangesMean;
double mSignChangesSD;
double mDirectionChangesMean;
double mDirectionChangesSD;
double mThresholdEnergy; // Threshold[*] is equal to [*]Mean + [*]SD * ThresholdAdjustment
double mThresholdSignChangesLower;
double mThresholdSignChangesUpper;
double mThresholdDirectionChangesLower;
double mThresholdDirectionChangesUpper;
//These determine which statistics should be used.
bool mUseEnergy;
bool mUseSignChangesLow;
bool mUseSignChangesHigh;
bool mUseDirectionChangesLow;
bool mUseDirectionChangesHigh;
double mSilentWindowSize; //Time in milliseconds of below-threshold windows required for silence
double mSignalWindowSize; //Time in milliseconds of above-threshold windows required for speech
double TestEnergy (const WaveTrack & t, sampleCount start,sampleCount len);
double TestSignChanges (
const WaveTrack & t, sampleCount start, sampleCount len);
double TestDirectionChanges(
const WaveTrack & t, sampleCount start, sampleCount len);
void TestEnergyUpdate (double & prevErg, int length, const float & drop, const float & add);
void TestSignChangesUpdate(double & currentsignchanges,int length, const float & a1,
const float & a2, const float & z1, const float & z2);
void TestDirectionChangesUpdate(double & currentdirectionchanges,int length,
int & atrend, const float & a1, const float & a2,
int & ztrend, const float & z1, const float & z2);
};
inline int sgn(int number){ return (number<0) ? -1: 1;}
//This returns a logistic density based on a z-score
// a logistic distn has variance (pi*s)^2/3
//inline float inline float logistic(float z){ return fexp(-1 * z/(pi / sqrt(3)) / (1 + pow(fexp(-1 * z(pi / sqrt(3))),2)));}
#endif