|
1 | 1 | """
|
2 |
| -Note: V 0.9.1 Originally, filling data methods was developed by Eric Alfaro and Javier Soley in SCILAB |
| 2 | +Note: V 0.9.2 Originally, filling data methods was developed by Eric Alfaro and Javier Soley in SCILAB |
3 | 3 | Python version was developed by Rolando Duarte and Erick Rivera
|
4 | 4 | Centro de Investigaciones Geofísicas (CIGEFI)
|
5 | 5 | Universidad de Costa Rica (UCR)
|
@@ -154,25 +154,33 @@ def checkPrincipalComponents(self):
|
154 | 154 | upperError: int
|
155 | 155 | Maximum value to choose principal components
|
156 | 156 | """
|
| 157 | + #Scalating to get the best performance using PCA |
157 | 158 | scale = StandardScaler()
|
158 | 159 | dfMeanScaled = scale.fit_transform(self.dfMean)
|
159 | 160 | pca = PCA(n_components = self.dfColumns, copy = True, svd_solver = "full", random_state = 0)
|
160 |
| - pca.fit(dfMeanScaled) |
| 161 | + vectorsPCA = pca.fit_transform(dfMeanScaled) |
161 | 162 | explainedVariance = pca.explained_variance_
|
162 |
| - errorExplainedVarience = explainedVariance * sqrt(2 / self.dfRows) |
| 163 | + errorExplainedVarience = [] |
| 164 | + |
| 165 | + #Calculating error bars |
| 166 | + for index in arange(0, len(explainedVariance)): |
| 167 | + dfComponents = DataFrame({"Original": vectorsPCA[:, index]}) |
| 168 | + dfComponents["Shift"] = dfComponents.Original.shift(1) |
| 169 | + corr = dfComponents.corr().iloc[0, 1] |
| 170 | + nEffective = self.dfRows * (1 - corr**2) / (1 + corr**2) |
| 171 | + errorExplainedVarience.append(explainedVariance[index] * sqrt(2 / nEffective)) |
163 | 172 | components = arange(1, len(explainedVariance) + 1)
|
164 | 173 | upperError = len(explainedVariance) - 1
|
165 | 174 |
|
166 |
| - figure(figsize = (20, 20)) |
| 175 | + #Plotting eigenvalues and principal components |
167 | 176 | errorbar(components, explainedVariance,
|
168 |
| - yerr=errorExplainedVarience, fmt="o", color="#9b6dff", |
169 |
| - ecolor="black", capsize=6, |
| 177 | + yerr=errorExplainedVarience, fmt="D", color="green", |
| 178 | + ecolor="red", capsize=10, |
170 | 179 | )
|
171 | 180 | title("Explained variance vs. principal components")
|
172 | 181 | xlabel("Principal components")
|
173 | 182 | ylabel("Explained variance")
|
174 | 183 | show()
|
175 |
| - |
176 | 184 | return upperError
|
177 | 185 |
|
178 | 186 | def PCAMethod(self, components=1, tol=1e-1, itermax=10, valueMin=0.0):
|
@@ -247,7 +255,8 @@ def checkPrincipalComponents(self):
|
247 | 255 | upperError: int
|
248 | 256 | Maximum value to choose principal components
|
249 | 257 | """
|
250 |
| - return PrincipalComponentAnalysis(self.df).checkPrincipalComponents() |
| 258 | + upperError = PrincipalComponentAnalysis(self.df).checkPrincipalComponents() |
| 259 | + return upperError |
251 | 260 |
|
252 | 261 | def FullMethod(self, lags=1, components=1, tol=1e-1, itermax=10, valueMin=0.0):
|
253 | 262 | """
|
|
0 commit comments