Skip to content

Commit 15e5db8

Browse files
authored
AC: Added ovelapping while clipping audio (openvinotoolkit#1160)
* Ovelapping while clipping audio * Delete confusing redundant parameters * Values 'duration' and 'overlap' processing extract to methods
1 parent a8e5d68 commit 15e5db8

File tree

2 files changed

+90
-13
lines changed

2 files changed

+90
-13
lines changed

tools/accuracy_checker/accuracy_checker/preprocessor/README.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,9 @@ Accuracy Checker supports following set of preprocessors:
122122
* `resample_audio` - converts audio to new sample rate
123123
* `sample_rate` - sets new sample rate
124124
* `clip_audio` - slices audio into several parts with equal duration
125-
* `duration` - sets duration in seconds
126-
* `max_clips` - sets the maximum number of clips (by default `1`)
125+
* `duration` - sets duration of each clip in seconds or samples (use `samples` suffix), e.g. `1.5`, `16000samples`
126+
* `overlap` - sets overlapping for clips in percents or samples (use `%` or `samples` suffixes respectively) (no overlapping by default), e.g. `25%`, `4000samples`
127+
* `max_clips` - sets the maximum number of clips (clips all record by default)
127128
* `audio_normalization` - normalize audio record with mean sample subtraction and division on standard deviation of samples.
128129
* `similarity_transform_box` - apply to image similarity transformation to get rectangle region stored in annotation metadata/
129130
* `box_scale` - box scale factor (Optional, default 1).

tools/accuracy_checker/accuracy_checker/preprocessor/audio_preprocessing.py

+87-11
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
import numpy as np
1818

19-
from ..config import NumberField
19+
from ..config import BaseField, NumberField, ConfigError
2020
from ..preprocessor import Preprocessor
2121

2222

@@ -64,20 +64,28 @@ class ClipAudio(Preprocessor):
6464
def parameters(cls):
6565
parameters = super().parameters()
6666
parameters.update({
67-
'duration': NumberField(
68-
value_type=int, min_value=0,
69-
description="Length of audio clip in seconds."
67+
'duration': BaseField(
68+
description="Length of audio clip in seconds or samples (with 'samples' suffix)."
7069
),
7170
'max_clips': NumberField(
72-
value_type=int, min_value=1, default=1,
71+
value_type=int, min_value=1, optional=True,
7372
description="Maximum number of clips per audiofile."
7473
),
74+
'overlap': BaseField(
75+
optional=True,
76+
description="Overlapping part for each clip."
77+
),
7578
})
7679
return parameters
7780

7881
def configure(self):
79-
self.duration = self.get_value_from_config('duration')
80-
self.max_clips = self.get_value_from_config('max_clips')
82+
duration = self.get_value_from_config('duration')
83+
self._parse_duration(duration)
84+
85+
self.max_clips = self.get_value_from_config('max_clips') or np.inf
86+
87+
overlap = self.get_value_from_config('overlap')
88+
self._parse_overlap(overlap)
8189

8290
def process(self, image, annotation_meta=None):
8391
data = image.data
@@ -86,18 +94,86 @@ def process(self, image, annotation_meta=None):
8694
raise RuntimeError('Operation "{}" failed: required "sample rate" in metadata.'.
8795
format(self.__provider__))
8896
audio_duration = data.shape[1]
89-
clip_duration = self.duration * sample_rate
97+
clip_duration = self.duration if self.is_duration_in_samples else int(self.duration * sample_rate)
9098
clipped_data = []
91-
for i in range(self.max_clips):
92-
if (i + 1) * clip_duration > audio_duration:
99+
if self.is_overlap_in_samples:
100+
hop = clip_duration - self.overlap_in_samples
101+
else:
102+
hop = int((1 - self.overlap) * clip_duration)
103+
104+
if hop > clip_duration:
105+
raise ConfigError("Preprocessor {}: clip overlapping exceeds clip length.".format(self.__provider__))
106+
107+
for clip_no, clip_start in enumerate(range(0, audio_duration, hop)):
108+
if clip_start + clip_duration > audio_duration or clip_no >= self.max_clips:
93109
break
94-
clip = data[:, i * clip_duration: (i+1) * clip_duration]
110+
clip = data[:, clip_start: clip_start + clip_duration]
95111
clipped_data.append(clip)
112+
96113
image.data = clipped_data
97114
image.metadata['multi_infer'] = True
98115

99116
return image
100117

118+
def _parse_overlap(self, overlap):
119+
self.is_overlap_in_samples = False
120+
self.overlap = 0
121+
if isinstance(overlap, str):
122+
if overlap.endswith('%'):
123+
try:
124+
self.overlap = float(overlap[:-1]) / 100
125+
except ValueError:
126+
raise ConfigError("Preprocessor {}: invalid value for 'overlap' - {}."
127+
.format(self.__provider__, overlap))
128+
elif overlap.endswith('samples'):
129+
try:
130+
self.overlap_in_samples = int(overlap[:-7])
131+
except ValueError:
132+
raise ConfigError("Preprocessor {}: invalid value for 'overlap' - {}."
133+
.format(self.__provider__, overlap))
134+
if self.overlap_in_samples < 1:
135+
raise ConfigError("Preprocessor {}: invalid value for 'overlap' - {}."
136+
.format(self.__provider__, overlap))
137+
self.is_overlap_in_samples = True
138+
else:
139+
raise ConfigError("Preprocessor {}: invalid value for 'overlap' - {}."
140+
.format(self.__provider__, overlap))
141+
else:
142+
try:
143+
self.overlap = float(overlap)
144+
except ValueError:
145+
raise ConfigError("Preprocessor {}: invalid value for 'overlap' - {}."
146+
.format(self.__provider__, overlap))
147+
if self.overlap <= 0 or self.overlap >= 1:
148+
raise ConfigError("Preprocessor {}: invalid value for 'overlap' - {}."
149+
.format(self.__provider__, overlap))
150+
151+
def _parse_duration(self, duration):
152+
self.is_duration_in_samples = False
153+
if isinstance(duration, str):
154+
if duration.endswith('samples'):
155+
try:
156+
self.duration = int(duration[:-7])
157+
except ValueError:
158+
raise ConfigError("Preprocessor {}: invalid value for duration - {}."
159+
.format(self.__provider__, duration))
160+
if self.duration <= 1:
161+
raise ConfigError("Preprocessor {}: duration should be positive value - {}."
162+
.format(self.__provider__, self.duration))
163+
self.is_duration_in_samples = True
164+
else:
165+
raise ConfigError("Preprocessor {}: invalid value for duration - {}.".
166+
format(self.__provider__, duration))
167+
else:
168+
try:
169+
self.duration = float(duration)
170+
except ValueError:
171+
raise ConfigError("Preprocessor {}: invalid value for duration - {}."
172+
.format(self.__provider__, duration))
173+
if self.duration <= 0:
174+
raise ConfigError("Preprocessor {}: duration should be positive value - {}."
175+
.format(self.__provider__, self.duration))
176+
101177

102178
class NormalizeAudio(Preprocessor):
103179
__provider__ = 'audio_normalization'

0 commit comments

Comments
 (0)