-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathai-nose-dataset-curation.py
84 lines (70 loc) · 2.2 KB
/
ai-nose-dataset-curation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import csv
import os
import numpy as np
import pandas as pd
### Setting
HOME_PATH = ""
DATESET_PATH = "dataset"
TRAIN_PATH = "train"
TEST_PATH = "test"
### Read in .csv files to construct one long multi-axis, time series data
# Store header, raw data, and number of lines found in each .csv file
# header = None
trainData = []
testData = []
folderNames = []
dataFrame = {}
# Read each CSV file
for folderName in os.listdir(DATESET_PATH):
# Check if the path is a file
filePath = os.path.abspath(DATESET_PATH + "/" + folderName)
files = os.listdir(filePath)
if len(files) == 0:
continue
for file in files:
category = file.split(".")[0]
with open(filePath + "/" + file) as f:
csvReader = csv.reader(f, delimiter=";")
for lineCount, line in enumerate(csvReader):
if category == "test":
testData.append(line[1:])
else:
trainData.append([category] + line[1:])
# print(rawData)
# rawData = np.array(rawData).astype(float)
trainData = np.array(trainData)
testData = np.array(testData)
# Print out our results
# print("Dataset array shape:", trainData.shape)
# print("Dataset array shape:", testData.shape)
# Train Dataset
dataFrame = {}
for j in range(trainData.shape[1]):
colData = []
for i in range(trainData.shape[0]):
colData.append(trainData[i][j])
if j == 0:
header = "category"
else:
header = f"sensor{j}"
dataFrame[header] = colData
trainDf = pd.DataFrame(dataFrame)
trainFilePath = os.path.abspath(TRAIN_PATH)
if not os.path.exists(trainFilePath):
os.makedirs(trainFilePath)
trainDf.to_csv(os.path.join(trainFilePath, "train.csv"), index=False)
print("train data is ready.")
# Test Dataset
dataFrame = {}
for j in range(testData.shape[1]):
colData = []
for i in range(testData.shape[0]):
colData.append(testData[i][j])
header = f"sensor{j+1}"
dataFrame[header] = colData
testDf = pd.DataFrame(dataFrame)
testFilePath = os.path.abspath(TEST_PATH)
if not os.path.exists(testFilePath):
os.makedirs(testFilePath)
testDf.to_csv(os.path.join(testFilePath, "test.csv"), index=False)
print("test data is ready.")