forked from rpuntaie/syncstart
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsyncstart.py
executable file
·222 lines (195 loc) · 6.42 KB
/
syncstart.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#!/usr/bin/env python3
"""
The steps taken by ``syncstart``:
- extract start audio as ``.wav`` using ffmpeg
- optionally normalize, denoise, lowpass the two ``.wav``
- compute offset via correlation using scipy ifft/fft
- print result and optionally show in diagrams
Requirements:
- ffmpeg installed
- Python3 with tk (tk is separate on Ubuntu: python3-tk)
References:
- https://ffmpeg.org/ffmpeg-all.html
- https://github.com/slhck/ffmpeg-normalize
- https://dsp.stackexchange.com/questions/736/how-do-i-implement-cross-correlation-to-prove-two-audio-files-are-similar
Within Python:
from syncstart import file_offset
file_offset
"""
import matplotlib
matplotlib.use('TkAgg')
from matplotlib import pyplot as plt
import numpy as np
from scipy import fft
from scipy.io import wavfile
import tempfile
import os
import pathlib
import sys
__version__ = "1.0.1"
__author__ = """Roland Puntaier"""
__email__ = 'roland.puntaier@gmail.com'
#global
ax = None
take = 20
normalize = False
denoise = False
lowpass = 0
ffmpegwav = 'ffmpeg -i "{}" -t %s -c:a pcm_s16le -map 0:a "{}"'
ffmpegnormalize = ('ffmpeg -y -nostdin -i "{}" -filter_complex ' +
"'[0:0]loudnorm=i=-23.0:lra=7.0:tp=-2.0:offset=4.45:linear=true:print_format=json[norm0]' " +
"-map_metadata 0 -map_metadata:s:a:0 0:s:a:0 -map_chapters 0 -c:v copy -map '[norm0]' " +
'-c:a:0 pcm_s16le -c:s copy "{}"')
ffmpegdenoise = 'ffmpeg -i "{}" -af'+" 'afftdn=nf=-25' "+'"{}"'
ffmpeglow = 'ffmpeg -i "{}" -af'+" 'lowpass=f=%s' "+'"{}"'
o = lambda x: '%s%s'%(x,'.wav')
def in_out(command,infile,outfile):
hdr = '-'*len(command)
print("%s\n%s\n%s"%(hdr,command,hdr))
ret = os.system(command.format(infile,outfile))
if 0 != ret:
sys.exit(ret)
def normalize_denoise(infile,outname):
with tempfile.TemporaryDirectory() as tempdir:
outfile = o(pathlib.Path(tempdir)/outname)
in_out(ffmpegwav%take,infile,outfile)
if normalize:
infile, outfile = outfile,o(outfile)
in_out(ffmpegnormalize,infile,outfile)
if denoise:
infile, outfile = outfile,o(outfile)
in_out(ffmpegdenoise,infile,outfile)
infile, outfile = outfile,o(outfile)
in_out(ffmpegdenoise,infile,outfile)
if int(lowpass):
infile, outfile = outfile,o(outfile)
in_out(ffmpeglow%lowpass,infile,outfile)
r,s = wavfile.read(outfile)
if len(s.shape)>1: #stereo
s = s[:,0]
return r,s
def fig1(title=None):
fig = plt.figure(1)
plt.margins(0, 0.1)
plt.grid(True, color='0.7', linestyle='-', which='major', axis='both')
plt.grid(True, color='0.9', linestyle='-', which='minor', axis='both')
plt.title(title or 'Signal')
plt.xlabel('Time [seconds]')
plt.ylabel('Amplitude')
axs = fig.get_axes()
global ax
ax = axs[0]
def show1(fs, s, color=None, title=None, v=None):
if not color: fig1(title)
if ax and v: ax.axvline(x=v,color='green')
plt.plot(np.arange(len(s))/fs, s, color or 'black')
if not color: plt.show()
def show2(fs,s1,s2,title=None):
fig1(title)
show1(fs,s1,'blue')
show1(fs,s2,'red')
plt.show()
def read_normalized(in1,in2):
global normalize
r1,s1 = normalize_denoise(in1,'out1')
r2,s2 = normalize_denoise(in2,'out2')
if r1 != r2:
old,normalize = normalize,True
r1,s1 = normalize_denoise(in1,'out1')
r2,s2 = normalize_denoise(in2,'out2')
normalize = old
assert r1 == r2, "not same sample rate"
fs = r1
return fs,s1,s2
def corrabs(s1,s2):
ls1 = len(s1)
ls2 = len(s2)
padsize = ls1+ls2+1
padsize = 2**(int(np.log(padsize)/np.log(2))+1)
s1pad = np.zeros(padsize)
s1pad[:ls1] = s1
s2pad = np.zeros(padsize)
s2pad[:ls2] = s2
corr = fft.ifft(fft.fft(s1pad)*np.conj(fft.fft(s2pad)))
ca = np.absolute(corr)
xmax = np.argmax(ca)
return ls1,ls2,padsize,xmax,ca
def cli_parser(**ka):
import argparse
parser = argparse.ArgumentParser(description=file_offset.__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('--version', action='version', version = __version__)
if 'in1' not in ka:
parser.add_argument(
'in1',
help='First media file to sync with second, using audio.')
if 'in2' not in ka:
parser.add_argument(
'in2',
help='Second media file to sync with first, using audio.')
if 'take' not in ka:
parser.add_argument(
'-t','--take',
dest='take',
action='store',
default=20,
help='Take X seconds of the inputs to look at. (default: 20)')
if 'show' not in ka:
parser.add_argument(
'-s','--show',
dest='show',
action='store_false',
default=True,
help='Turn off "show diagrams", in case you are confident.')
if 'normalize' not in ka:
parser.add_argument(
'-n','--normalize',
dest='normalize',
action='store_true',
default=False,
help='Turn on normalize. It turns on by itself in a second pass, if sampling rates differ.')
if 'denoise' not in ka:
parser.add_argument(
'-d','--denoise',
dest='denoise',
action='store_true',
default=False,
help='Turns on denoise, as experiment in case of failure.')
if 'lowpass' not in ka:
parser.add_argument(
'-l','--lowpass',
dest='lowpass',
action='store',
default=0,
help="lowpass, just in case, because like with manual sync'ing,\
the low frequencies matter more. 0 == off. (default: 0)")
return parser
def file_offset(**ka):
"""CLI interface to sync two media files using their audio streams.
ffmpeg needs to be available.
"""
parser = cli_parser(**ka)
args = parser.parse_args().__dict__
ka.update(args)
global take,normalize,denoise,lowpass
in1,in2,take,show = ka['in1'],ka['in2'],ka['take'],ka['show']
normalize,denoise,lowpass = ka['normalize'],ka['denoise'],ka['lowpass']
fs,s1,s2 = read_normalized(in1,in2)
ls1,ls2,padsize,xmax,ca = corrabs(s1,s2)
if show: show1(fs,ca,title='Correlation',v=xmax/fs)
sync_text = """
==============================================================================
%s needs 'ffmpeg -ss %s' cut to get in sync
==============================================================================
"""
if xmax > padsize // 2:
if show: show2(fs,s1,s2[padsize-xmax:],title='1st=blue;2nd=red=cut(%s;%s)'%(in1,in2))
file,offset = in2,(padsize-xmax)/fs
else:
if show: show2(fs,s1[xmax:],s2,title='1st=blue=cut;2nd=red (%s;%s)'%(in1,in2))
file,offset = in1,xmax/fs
print(sync_text%(file,offset))
return file,offset
main = file_offset
if __name__ == '__main__':
main()