-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpreprocessing.js
102 lines (91 loc) · 3.8 KB
/
preprocessing.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
const fs = require('fs')
const d3Dsv = require('d3-dsv')
const d3Array = require('d3-array')
const geolocation = require('./geolocation.js')
//This settigns object controls the global settings for this programme
const settings = {
fileName: 'output/jsonData',
filterData: true,
removeResidenceData: false
}
loadFile()
//Load a file using the fs package, then call parseData
function loadFile(){
fs.readFile("input/rawData.csv", {encoding: 'utf-8'}, function(err,data){
if (!err) {
// console.log('received data items: ' + data.length);
parseData(data)
} else {
console.log(err);
}
})
}
//Parsedata takes a source and manipulates it the way we want it
function parseData(source){
//First let's convert the data to JSON using d3.csvParse and shuffle it to help with anonymity
const data = d3Array.shuffle(d3Dsv.csvParse(source))
console.log("#Entries in data: ", data.length)
//If filtering is on, pass data through the filterProperties function
let selection = settings.filterData ? data.map(filterProperties) : data//.slice(0,10)
//You can make this script more functional by putting this pattern in a function
selection.forEach( (item, index) => {
item.id = index
item.huidigeLocatie = item["Woonplaats: plak GPS locatie uit google maps (instructie hier: https://www.lifewire.com/latitude-longitude-coordinates-google-maps-1683398 )"]
delete item["Woonplaats: plak GPS locatie uit google maps (instructie hier: https://www.lifewire.com/latitude-longitude-coordinates-google-maps-1683398 )"]
item.geboortePlaats = item['Geboorteplaats (plak GPS locatie)']
delete item['Geboorteplaats (plak GPS locatie)']
})
//If removeResidenceData is on, call removePlaceOfResidence, if not, keep the data as it is
selection = settings.removeResidenceData ? selection.map(removePlaceOfResidence) : selection
//Function to remove place of residence entirely
function removePlaceOfResidence(item){
item.huidigeLocatie = null
return item
}
selection.forEach(item => {
//console.log("mapping item", item.id)
if (item.huidigeLocatie){
item.huidigeLocatie = geolocation.obfuscateLocation(item.huidigeLocatie)
}
if (item.geboortePlaats){
item.geboortePlaats = geolocation.obfuscateLocation(item.geboortePlaats)
}
//console.log(item.huidigeLocatie)
})
writeDataFile(selection)
}
//Notice that this function checks if a filename exists and if it does it calls itself again
// But this time the index is increased. This makes the function recursive.
// We can make this possible without using an outside variable by using a ES6 feature called default
// parameter. Each time we call the function we iterate index (BEFORE THE FUNCTION IS CALLED)
function writeDataFile(data, fileIndex = 0)
{
fs.writeFile(settings.fileName +"_"+ fileIndex +".json",
JSON.stringify(data, null, 4),
{ encoding:'utf8', flag:'wx' },
function (err) {
//Check if filename already exists, if it does, increase the number at the end by 1
if (err && err.code == "EEXIST") {
writeDataFile(data, ++fileIndex)
} else if(err){
return console.log(err)
} else {
console.log("The file was saved!", (settings.fileName +"_"+ fileIndex +".json"))
}
})
}
//This function is used to only return properties we want in the output
// And to rename them to more usable properties.
function filterProperties(item){
return {
gender: item["Gender"],
brothers: item["Hoeveel broers heb je?"],
sisters: item["Hoeveel zussen heb je?"],
height: item["Wat is je lengte? (in centimeters)"],
license: item["Heb je een rijbewijs?"],
health: item["Welk cijfer geef je je gezondheid?"],
stress: item["Geef een cijfer aan je stressniveau (1-10)"],
preference: item["Waar liggen je (CMD) voorkeuren?"],
biggestExpense: item["Hoogste bedrag dat je ooit aan iets hebt uitgegeven"]
}
}