This repository has been archived by the owner on Oct 21, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path週K與熱力對照圖範例.py
274 lines (224 loc) · 12.5 KB
/
週K與熱力對照圖範例.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
# =============================================================================
# 嘗試臨時新增欄位 Index 供資料回朔,並新增週 K 與熱力圖的對照圖
# =============================================================================
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
import numpy as np
import time
# 讀取數據
df = pd.read_excel('data.xlsx')
print(f"總資料數:{len(df)}")
df['Index'] = range(len(df))
df_Latest = pd.read_excel("data_Latest.xlsx")
#print(df.columns)
feature_names = ['Gold_Close', 'Gold_High', 'CPIAUCNS', 'Gold_Open', 'UNRATE',
'MA_20', 'MA_10', 'USD_Index_Growth_Rate', 'TW_CPI_Rate',
'WILLR', 'Open', 'K', 'RSI_14', 'Gold_Volume',
'Gold_Growth_Rate', 'FEDFUNDS', 'Bollinger Bands lower',
'Bollinger Bands Upper', 'USA_GDP_Rate', 'Index']
# 0.821
def split_stock_data(stock_data, label_column, test_size = 0.3,
random_state = 42):
X = stock_data[feature_names].values
y = stock_data[label_column].values
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size = test_size,
random_state =
random_state)
return X_train, X_test, y_train, y_test, feature_names
label_column = 'LABEL'
# 分割資料
trainX, testX, trainY, testY, feature_names = split_stock_data(df,
label_column)
# 訓練模型
Xgboost = XGBClassifier()
new_trainX = [feature_names.index('Index')] # 排除臨時新增欄位影響
start_time = time.time()
Xgboost.fit(np.delete(trainX, new_trainX, axis = 1),
trainY)
training_time = time.time() - start_time
test_predic = Xgboost.predict(np.delete(testX, new_trainX,
axis = 1)) # 取得預測的結果
test_acc = Xgboost.score(np.delete(testX, new_trainX,
axis = 1), testY)
# 近期數據測試
X_Latest = df_Latest[feature_names[:-1]].values # 原始資料裡並沒有 'Index' 這欄,故需要再次特別指定
y_Latest = df_Latest['LABEL'].values
latest_data_test_acc = Xgboost.score(X_Latest, y_Latest)
# 進行 XOR 運算
xor_result = np.bitwise_xor(test_predic, testY)
# =============================================================================
# test_predic 和 testY 之間的 XOR 運算可以幫助你找出模型預測錯誤的情況:
#
# 如果 test_predic[i] 和 testY[i] 相同(即模型預測正確),則 XOR 結果為 0。
# 如果 test_predic[i] 和 testY[i] 不同(即模型預測錯誤),則 XOR 結果為 1。
# =============================================================================
print('Xgboost測試集準確率 %.3f' % test_acc)
print(f"訓練時間: {training_time // 60:.0f} 分 {training_time % 60:.2f} 秒")
print('Xgboost近期數據測試準確率 %.3f' % latest_data_test_acc)
# 0.821
# 這裡本圖僅參考分布,不具實際應用意義,因 train_test_split 抽選資料為隨機抽取(並
# 不連續),並且輸出的資料完全不可回朔,僅能自行添加欄位回朔,不利於本繪圖的時間資料。
import matplotlib.colors as mcolors
import matplotlib.patches as patches
def HeatmapAndWeeklyCandlestick_darw(testX, feature_names):
# 將 numpy.ndarray 接回成 DataFrame
test_df = pd.DataFrame(testX, columns = feature_names)
test_df['xor_result'] = xor_result
# 使用 'Index' 進行回朔資料
test_df = test_df[['Index', 'Open', 'xor_result']].merge(
df[['DATE', 'Index', 'High', 'Low', 'Close']], on = 'Index',
how = 'left')
test_df.set_index('DATE', inplace = True)
test_df = test_df.sort_values(by = 'DATE', ascending = False) # 依時間排序
# 定義函數計算每個分組的聚合值
def aggregate_group(group):
return pd.Series({
'Open': group['Open'].iloc[0], # 第一筆 Open 值
'High': group['High'].max(), # 最大 High 值
'Low': group['Low'].min(), # 最小 Low 值
'Close': group['Close'].iloc[-1] # 最後一筆 Close 值
})
# 生成分組標籤
group_labels = np.arange(
len(test_df) // 5 + (1 if len(test_df) % 5 > 0 else 0))
group_labels = np.repeat(group_labels, 5)[:len(test_df)] # 重複標籤並修正長度
test_df['Group'] = group_labels
# 根據分組進行聚合
KLine_df = test_df.groupby(by = 'Group').apply(
aggregate_group, include_groups = False).reset_index(drop = True)
# =============================================================================
# # DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
# KLine_df = test_df.groupby(by = 'Group').apply(
# aggregate_group).reset_index(drop = True)
# =============================================================================
# 設定綠漲紅跌
KLine_df['Color'] = KLine_df.apply(lambda row: 'g' if row['Close'] >
row['Open'] else 'r', axis = 1)
# 繪製 K 線圖
plt.rcParams['font.family'] = 'Microsoft JhengHei' # 設置中文字體
plt.figure(figsize = (12, 6), facecolor = 'black')
plt.subplot(2, 1, 1).set_facecolor('black') # 設該子圖背景為黑色
# =========================================================================
# plt.subplot(nrows, ncols, index)
# nrows:子圖的列數,垂直向要切成幾張子圖。
# ncols:子圖的欄數,水平向要切成幾張子圖。
# index:子圖索引,從 1 開始,由左至右的順序排列。
# =========================================================================
# 繪製 K 棒
for i in range(len(KLine_df)):
row = KLine_df.iloc[i]
color = row['Color']
plt.plot([KLine_df.index[i], KLine_df.index[i]],
[row['Low'], row['High']], color = color, linewidth = 1) # 垂直細線
plt.plot([KLine_df.index[i], KLine_df.index[i]],
[row['Open'], row['Close']], color = color, linewidth = 5) # 垂直粗線
plt.xlim(-0.5, len(KLine_df) - 0.5) # 調整距離,與下圖對應
plt.xticks([0, 4] + [i for i in range(9, len(KLine_df), 5)],
[1 if i == 0 else i for i in range(0, len(KLine_df), 5)]) # 不可隱藏,會影響網格直線的繪製
plt.yticks(fontsize = 10, color = 'white')
plt.ylabel('匯率', fontsize = 11, color = 'white')
plt.title('【週 K 線】 與 【模型預測分布】 對照圖', fontsize = 14,
color = 'white', va = 'baseline')
plt.grid(True)
# 添加上圖例(但參考依據是下子圖物件)
colors = ['g', 'r']
legend_labels = ['上漲', '下跌']
legend_elements = [plt.Line2D([], [], linestyle = '-.',
color = colors,
lw = 6,
label = label) for colors,
label in zip(colors, legend_labels)]
plt.legend(handles = legend_elements, loc = 'upper left',
ncol = 2, bbox_to_anchor = (-0.01, 1.2),
facecolor = 'black', labelcolor = 'w')
# =============================================================================
# 繪製熱量圖
result = test_df['xor_result']
num_columns = 5 # 每列 5 筆數據
num_rows = (len(result) + num_columns - 1) // num_columns # 計算需要的行數
data_padded = np.pad(result,
(0, num_rows * num_columns - len(result)),
mode = 'constant', constant_values = -1) # 補齊數據
# 重塑為每列 5 筆數據的二維矩陣
result_2d = data_padded.reshape(num_rows, num_columns).T # 轉置以符合每列顯示的要求
plt.subplot(2, 1, 2)
# =========================================================================
# plt.subplot(nrows, ncols, index)
# nrows:子圖的列數,垂直向要切成幾張子圖。
# ncols:子圖的欄數,水平向要切成幾張子圖。
# index:子圖索引,從 1 開始,由左至右的順序排列。
# =========================================================================
# 設定配色
colors = ['#636363', '#00EB00', '#9c9c9c'] # 黑、綠、灰
cmap = mcolors.ListedColormap(colors) # 自訂顏色映射:補齊、0(對) 、1(錯)
bounds = [-1, 0, 1, 2] # -1:補齊數據, 0:原始數據, 1:補齊數據
# =============================================================================
# bounds 的 2 是用來定義顏色映射的邊界範圍,並且它不會直接影響圖像中的顏色。
# =============================================================================
norm = mcolors.BoundaryNorm(bounds, cmap.N)
# 繪圖
plt.imshow(result_2d, cmap = cmap, norm = norm, interpolation = 'none',
aspect = 'equal')
# 加繪每個方格的邊框
ax = plt.gca()
num_rows, num_columns = result_2d.shape
for i in range(num_rows):
for j in range(num_columns):
rect = patches.Rectangle((j - 0.5, i - 0.55), 1, 1,
linewidth = 0.5,
edgecolor = '#636363',
facecolor = 'none')
ax.add_patch(rect)
# 設置圖表整體邊框顏色和寬度
for spine in ax.spines.values():
spine.set_edgecolor('#636363')
spine.set_linewidth(1)
# 設定刻度
plt.xticks([0, 4] + [i for i in range(9, len(KLine_df), 5)] + \
[len(result) // 5] if len(result) % 5 != 0 else [],
[1 if i == 0 else i for i in range(0, len(KLine_df), 5)] + \
[len(result) // 5 + 1] if len(result) % 5 != 0 else [],
fontsize = 10, color = 'white')
plt.yticks(range(5), ['1st', '2nd', '3rd', '4th', '5th'],
fontsize = 9, color = 'white', ha = 'left')
plt.xlabel("交易日(週)", fontsize = 11, color = 'white') # x 軸的標籤
plt.ylabel("每組交易日次序", fontsize = 11, color = 'white') # x 軸的標籤
plt.tick_params(axis = 'y', pad = 16) # 因 ha = 'left' 時會導致重疊顯示,需校正
# =============================================================================
# axis:指定軸
# pad:偏移量,正值左移參考軸,負值右移。pad 是刻度標籤與刻度線之間的距離
# =============================================================================
# 創建圖例框
legend_labels = ['自動補齊', '正確預測', '錯誤預測']
legend_elements = [plt.Line2D([], [], linestyle = '-.',
color = color,
lw = 6,
label = label) for color,
label in zip(colors[1:], legend_labels[1:])]
# =============================================================================
# plt.Line2D:
#
# plt.Line2D([0], [0], color = color, lw = 4, label = label) 創建了一個線條對象,用於圖例。
# linestyle:'-', '--', '-.', ':', 'None', ' ', '', 'solid', 'dashed',
# 'dashdot', 'dotted'
# color = color 設定線條顏色。
# lw = 4 設定線條寬度。
# label = label 設定圖例標籤。
# =============================================================================
# 添加圖例
plt.legend(handles = legend_elements, loc = 'lower left',
ncol = 3, bbox_to_anchor = (-0.005, -1),
facecolor = 'black', labelcolor = 'w')
# =============================================================================
# loc:圖例於圖表中的位置
# ncol:單一行顯示幾個標籤
# bbox_to_anchor:以 loc 為原點,進階調整圖例位置
# facecolor:圖例背景色
# labelcolor:圖例統一文字顏色
# =============================================================================
plt.subplots_adjust(hspace = -0.1) # 調整子圖之間的垂直間距
# 繪製圖形
HeatmapAndWeeklyCandlestick_darw(testX, feature_names)