-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathPart 1 (S3 Script).py
108 lines (97 loc) · 3.08 KB
/
Part 1 (S3 Script).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# Part 1 MSP
# Retrieve data from S3 bucket
# Save to CSV file
import boto3
_BUCKET_NAME = ''
_PREFIX = ''
client = boto3.client('s3',
aws_access_key_id = '',
aws_secret_access_key = '')
def ListFiles(client):
"""List files in specific S3 URL"""
response = client.list_objects(Bucket = _BUCKET_NAME,
Prefix = _PREFIX)
for content in response.get('Contents', []):
yield content.get('Key')
file_list = ListFiles(client)
for file in file_list:
print(f'File found: {file}')
# Accessing s3 bucket data
s3 = boto3.resource('s3',
aws_access_key_id = '',
aws_secret_access_key = '')
import json
import pandas as pd
import boto3
import io
bucket = s3.Bucket('')
prefix_objs = bucket.objects.filter(Prefix = "")
# Read in accelerometer sensor data
# Check the shape
# Save as csv
df_accel = pd.DataFrame(columns = ['Altitude',
'JourneyID',
'lat',
'lng',
'Mode',
'Timestamp'])
for obj in prefix_objs:
key = obj.key
obje = client.get_object(Bucket = '',
Key = key)
data = obje['Body'].read().decode()
json_content = json.loads(data)
try:
json_contents = json_content['accelerometer']
dff = pd.DataFrame(json_contents)
except KeyError:
pass
df_accel = df_accel.append(dff)
df_accel.shape
df_accel.head()
df_accel.to_csv('accelerometer.csv',
index = False)
# Read in gyroscope sensor data
# Check the shape
# Save as csv
for obj in prefix_objs:
key = obj.key
obje = client.get_object(Bucket = '',
Key = key)
data = obje['Body'].read().decode()
json_content = json.loads(data)
try:
json_contents = json_content['gyroscope']
dff = pd.DataFrame(json_contents)
except KeyError:
pass
df_gyro = df_gyro.append(dff)
df_gyro.shape
df_gyro.head()
df_gyro.to_csv('gyroscope.csv',
index = False)
# Read in magnetometer sensor data
# Check the shape
# Save as csv
df_magneto = pd.DataFrame(columns = ['Altitude',
'JourneyID',
'lat',
'lng',
'Mode',
'Timestamp'])
for obj in prefix_objs:
key = obj.key
obje = client.get_object(Bucket = '',
Key = key)
data = obje['Body'].read().decode()
json_content = json.loads(data)
try:
json_contents = json_content['magnetometer']
dff = pd.DataFrame(json_contents)
except KeyError:
pass
df_magneto = df_magneto.append(dff)
df_magneto.shape
df_magneto.head()
df_magneto.to_csv('magnetometer.csv',
index = False)