-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathdcpostprocessing.py
346 lines (288 loc) · 14 KB
/
dcpostprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
"""
The functions contained in this file are used to post-process previously
computed raw experimental dispersion data.
Functions:
computeDCstats:
Bin dispersion data and compute statistics (e.g., mean and std. dev.).
plotDCforRmv:
Plot dispersion data for various source-offsets along with mean and std.
deviation dispersion data.
rmvDCpoints:
This program is used to manually remove "bad" dispersion data (e.g.,
offline-noise, near-field effects, higher-modes, etc.).
This code was developed at the University of Texas at Austin.
Copyright (C) 2016 David P. Teague, Clinton M. Wood, and Brady R. Cox
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
# Specify backend for interactive plotting
import matplotlib as mpl
mpl.use('Qt4Agg')
# Import modules
import numpy as np
import tkinter as tk
import matplotlib.pyplot as plt
import dctypes
import shotgathers
# Width and height (in) of plots
mwdth = 10
mhght = 6
fsize = 11
# Function to bin dispersion data from one or more arrays or offsets and compute
# statistics for each bin.
def computeDCstats( rawDC, minP=5, maxP=100, numbins=96, binScale="linear", binType="frequency", arrayWeights=[] ):
# Combine all dispersion data into single vector for freq., vel., and wavel.
vel = rawDC.velocity[0]
fr = rawDC.frequency[0]
if len(arrayWeights)!=0:
wt = arrayWeights[0]*np.ones(len(rawDC.velocity[0]))
if len(vel)>1:
for p in range( 1, len(rawDC.velocity) ):
vel = np.concatenate((vel, rawDC.velocity[p]))
fr = np.concatenate((fr, rawDC.frequency[p]))
if len(arrayWeights)!=0:
wt = np.concatenate((wt, arrayWeights[p]*np.ones(len(rawDC.velocity[p]))))
wl = vel / fr
# Bin edges
if str.lower(binScale)=="linear":
binEdges = np.linspace( minP, maxP, numbins+1 )
elif str.lower(binScale)=="log":
binEdges = np.logspace( np.log10(minP), np.log10(maxP), numbins+1 )
else:
raise ValueError("Invalid binScale")
# Determine how many frequencies or wavelengths falls into each bin
if str.lower(binType)=="frequency":
whichBin = np.digitize( fr, binEdges )
elif str.lower(binType)=="wavelength":
whichBin = np.digitize( wl, binEdges )
else:
raise ValueError("Invalid binType")
# Initialize variables
weightPoints = np.zeros(numbins)
binWeight = np.zeros(numbins)
velMean = np.zeros(numbins)
velStd = np.zeros(numbins)
slowMean = np.zeros(numbins)
slowStd = np.zeros(numbins)
freqMean = np.zeros(numbins)
waveMean = np.zeros(numbins)
# Compute statistics for each bin
for g in range( numbins ):
# Flag points in current bin
flagPoints = np.where( whichBin == (g+1) )[0]
freqPoints = fr[flagPoints]
wavePoints = wl[flagPoints]
velPoints = vel[flagPoints]
slowPoints = 1/velPoints
# Compute averages and standard deviations
# Set values equal to NaN if no points fall within current bin
# Weighted calculations
if len(flagPoints)!=0:
if len(arrayWeights)!=0:
weightPoints[g] = wt[flagPoints]
binWeight[g] = sum(weightPoints)
velMean[g] = float(sum(velPoints*weightPoints)) / sum(weightPoints)
velStd[g] = np.sqrt( 1.0/sum(weightPoints) * sum( ((velPoints-velMean[g])**2) *weightPoints ) )
slowMean[g] = float(sum(slowPoints*weightPoints)) / sum(weightPoints)
slowStd[g] = np.sqrt( 1.0/sum(weightPoints) * sum( ((slowPoints-slowMean[g])**2) *weightPoints ) )
freqMean[g] = float(sum(freqPoints*weightPoints)) / sum(weightPoints)
waveMean[g] = float(sum(wavePoints*weightPoints)) / sum(weightPoints)
# Unweighted calculations
# (use unbiased sample standard deviation, with a normalization of 1/(n-1) or a ddof=1 )
else:
binWeight[g] = len(velPoints)
velMean[g] = np.average(velPoints)
slowMean[g] = np.average(slowPoints)
if binWeight[g] > 1:
velStd[g] = np.std(velPoints, ddof=1)
slowStd[g] = np.std(slowPoints, ddof=1)
freqMean[g] = np.average(freqPoints)
waveMean[g] = np.average(wavePoints)
# Remove zeros
ids=np.where( freqMean>0 )[0]
freqMean = freqMean[ids]
velMean = velMean[ids]
velStd = velStd[ids]
slowMean = slowMean[ids]
slowStd = slowStd[ids]
waveMean = waveMean[ids]
binWeight = binWeight[ids]
# Calculate the coefficient of variation
cov = velStd / velMean
# Combine results into a single matrix
meanDisp = np.vstack((freqMean, velMean, velStd, slowMean, slowStd, waveMean, binWeight, cov))
meanDisp = meanDisp.transpose()
# Remove rows corresponding to empty bins (meanFreq==0)
z_ids=np.where( meanDisp[:,0]==0 )[0]
meanDisp = np.delete(meanDisp, z_ids, 0)
return meanDisp
# Function to plot dispersion data along with averages and standard deviations.
# (Note that the kmin and kmax curves are used for passive-source FK processing,
# thus, kmin and kmax are set equal to NaN for MASW testing to avoid plotting.)
def plotDCforRmv( rawDC, meanDisp, setLeg, markType=[], colorSpec=[], xScaleType="log", kmin=float('nan'), kmax=float('nan') ):
n_off = len(rawDC.velocity)
# Default markers and colors
if not markType:
markType = ['o']*n_off
if not colorSpec:
colorSpec = shotgathers.create_ColorMap( n_off )
# Set figure size equal to 2/3 screen size
# Get screen size in mm and convert to in (25.4 mm per inch)
#root = tk.Tk()
#width = root.winfo_screenmmwidth() / 25.4 * 0.66
#height = root.winfo_screenmmheight() / 25.4 * 0.66
#cfig = plt.figure( figsize=(width,height) )
cfig = plt.figure( figsize=(mwdth,mhght) )
# Curves for kmin and kmax (if kmin and kmax are provided)
minF = np.min(meanDisp[:,0])
maxF = np.max(meanDisp[:,0])
maxV = 0
maxW = 0
for k in range( n_off ):
if max(rawDC.velocity[k]) > maxV:
maxV = max(rawDC.velocity[k])
if max(rawDC.velocity[k]/rawDC.frequency[k]) > maxW:
maxW = max(rawDC.velocity[k]/rawDC.frequency[k])
# kmin and kmax curves for frequency vs velocity
freq_klim = np.logspace( np.log10(minF), np.log10(maxF), 100 )
vel_klimF = np.vstack( ( 2*np.pi*freq_klim/kmax, 2*np.pi*freq_klim/(kmax/2), 2*np.pi*freq_klim/kmin, 2*np.pi*freq_klim/(kmin/2) ) )
vel_klimF = vel_klimF.transpose()
# Don't plot higher than maximum velocity of dispersion data
if not(np.isnan(kmax)):
for j in range( np.shape(vel_klimF)[1] ):
rmvID = np.where( vel_klimF[:,j] > maxV )[0]
vel_klimF[rmvID,j] = float('nan')
# kmin and kmax curves for wavelength vs velocity
wave_lim = np.hstack( ( 2*np.pi/kmax*np.array([[1],[1]]), 2*np.pi/(kmax/2)*np.array([[1],[1]]), 2*np.pi/kmin*np.array([[1],[1]]), 2*np.pi/(kmin/2)*np.array([[1],[1]]) ) )
vel_klimW = np.array([0,maxV])
# Velocity vs frequency plot
axf = cfig.add_subplot(1,2,1)
for r in range( len(rawDC.velocity) ):
axf.plot( rawDC.frequency[r], rawDC.velocity[r], marker=markType[r], markersize=5, markeredgecolor=colorSpec[r], markerfacecolor="none", linestyle="none" )
axf.errorbar( meanDisp[:,0], meanDisp[:,1], meanDisp[:,2], marker="o", markersize=5, color="k", linestyle="none" )
# kmin and kmax lines
if not np.isnan(kmin):
axf.plot( freq_klim, vel_klimF[:,0], linestyle=":" )
axf.plot( freq_klim, vel_klimF[:,1], linestyle="-" )
axf.plot( freq_klim, vel_klimF[:,2], linestyle="--" )
axf.plot( freq_klim, vel_klimF[:,3], linestyle="-." )
axf.set_xlabel( "Frequency (Hz)", fontsize=fsize, fontname="arial" )
axf.set_ylabel( "Velocity (m/s)", fontsize=fsize, fontname="arial" )
axf.set_xticklabels(axf.get_xticks(), fontsize=fsize, fontname="arial" )
axf.set_yticklabels(axf.get_yticks(), fontsize=fsize, fontname="arial" )
axf.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%d'))
axf.set_xscale( xScaleType )
# Velocity vs wavelength
axw = cfig.add_subplot(1,2,2)
# Raw data and error bars
for r in range( len(rawDC.velocity) ):
axw.plot( rawDC.velocity[r]/rawDC.frequency[r], rawDC.velocity[r], marker=markType[r], markersize=5, markeredgecolor=colorSpec[r], markerfacecolor="none", linestyle="none", label=setLeg[r] )
axw.errorbar( meanDisp[:,5], meanDisp[:,1], meanDisp[:,2], marker="o", markersize=5, color="k", linestyle="none" )
# kmin and kmax lines
if not np.isnan(kmin):
axw.plot( wave_lim[:,0], vel_klimW, linestyle=":", label='kmax' )
axw.plot( wave_lim[:,1], vel_klimW, linestyle="-", label='kmax/2' )
axw.plot( wave_lim[:,2], vel_klimW, linestyle="--", label='kmin' )
axw.plot( wave_lim[:,3], vel_klimW, linestyle="-.", label='kmin/2' )
handles, labels = axw.get_legend_handles_labels()
axw.legend(handles, labels, loc='upper left')
axw.set_xlabel( "Wavelength (m)", fontsize=fsize, fontname="arial" )
axw.set_xscale( xScaleType )
axw.set_xticklabels(axw.get_xticks(), fontsize=fsize, fontname="arial" )
axw.set_yticklabels(axw.get_yticks(), fontsize=fsize, fontname="arial" )
axw.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%d'))
cfig.show()
axf.set_autoscale_on(False)
axw.set_autoscale_on(False)
# Return figure handle
return cfig
# Function to prompt user to draw a box on a dispersion curve figure. Data points
# inside of the box are removed and data points outside of the box are kept.
def rmvDCpoints( rawDC, cfig ):
frequency = rawDC.frequency
velocity = rawDC.velocity
offset = rawDC.offset
wavelength = []
for m in range(len(frequency)):
wavelength.append( velocity[m]/frequency[m] )
# Point elimination loop
while True:
# Determine which axis was clicked
axclick = []
def on_click(event):
if event.inaxes is not None:
if len(axclick)<2:
axclick.append(event.inaxes)
cid = cfig.canvas.mpl_connect('button_press_event', on_click)
# Draw box, determine upper and lower bounds
rawBounds = np.asarray(cfig.ginput(2, timeout=0))
cfig.canvas.mpl_disconnect(cid)
xmin = np.min(rawBounds[:,0])
xmax = np.max(rawBounds[:,0])
ymin = np.min(rawBounds[:,1])
ymax = np.max(rawBounds[:,1])
# Total removed points
n_rmv = 0
for g in range( len(frequency) ):
# Arrays containing data for current offset
f = frequency[g]
v = velocity[g]
w = wavelength[g]
# Create arrays to store indices of data that will be kept and removed
rmv_id = np.zeros( len(f), int )
keep_id = np.zeros( len(f), int )
# If user clicked on two different axes, warn user and return
if axclick[0] != axclick[1]:
print "WARNING: BOTH CLICKS MUST BE ON SAME AXIS"
return
# Identify axes
axf = min(cfig.axes)
axw = max(cfig.axes)
if axf.get_xlabel() != 'Frequency (Hz)':
axf = max(cfig.axes)
axw = min(cfig.axes)
# Determine if points fall within box
for i in range( len(f) ):
condition1 = (axclick[0]==axf) and ( xmin<f[i] and f[i]<xmax and ymin<v[i] and v[i]<ymax )
condition2 = (axclick[0]==axw) and ( xmin<w[i] and w[i]<xmax and ymin<v[i] and v[i]<ymax )
# Points inside of selectd box are removed
if condition1 or condition2:
rmv_id[i] = i+1
# Points outside of selected box are kept
else:
keep_id[i] = i+1
# Remove zeros from rmv_id and keep_id
zid = np.where( rmv_id == 0 )[0]
rmv_id = np.delete( rmv_id, zid, 0 )
zid = np.where( keep_id == 0 )[0]
keep_id = np.delete( keep_id, zid, 0 )
# Removed data
frmv = f[(rmv_id-1)]
vrmv = v[(rmv_id-1)]
wrmv = w[(rmv_id-1)]
n_rmv += len(vrmv)
# Plot deleted data with black x's
axf.plot(frmv, vrmv, marker="x", color="k", markersize=5, linestyle="none")
axw.plot(wrmv, vrmv, marker="x", color="k", markersize=5, linestyle="none")
cfig.canvas.draw_idle()
# Retained data
fnew = f[(keep_id-1)]
vnew = v[(keep_id-1)]
wnew = w[(keep_id-1)]
# Revise velocity, frequency, and wavelength cell arrays
velocity[g] = vnew
frequency[g] = fnew
wavelength[g] = wnew
if n_rmv==0:
break
del cid
rawDC = dctypes.RawDispersion( frequency, velocity, offset )
return