-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaggregate_rural_and_urban.py
65 lines (60 loc) · 2.43 KB
/
aggregate_rural_and_urban.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/python
# This script takes an input file ("all_cases.csv") that contains location-stratified
# disease time series data, and aggregates the disease incidence based on whether the
# location was rural or urban.
#
# You will be making multiple changes to the code below. Each time you make a
# substantial change, verify that you have not changed the output from the program.
#
# The Exercise:
#
# 1) Read through, understand, and annotate the code. If you do not understand a line,
# investigate it by experimenting, checking python references, and talking to others.
#
# 2) If there anything simple that can be done to make the code more readable or
# manageable, go ahead and do that--things like renaming variables or reducing the use
# of "magical" numbers.
#
# 3) Break the code up into functional units, and then turn those units into functions.
#
# 4) Write a class that can represent the input data in a useful way. Create a parser
# function in the class that takes a line of text from the file and returns structured
# data that is useful and easy to understand.
urban_ts = dict()
rural_ts = dict()
for i in [1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011]:
urban_ts[i] = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
rural_ts[i] = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
header_line = True
for line in file("all_cases.csv"):
if header_line == True:
header_line = False
continue
parts = line.strip().split(',')
t = int(parts[0])
muni_num = parts[4]
data = map(int, parts[5:])
if muni_num in ['050','101','041']: # urban municipality codes
for i in range(0,52):
urban_ts[t][i] += data[i]
else:
for i in range(0,52):
rural_ts[t][i] += data[i]
header_line = True
print "location total_cases"
for line in file("all_cases.csv"):
if header_line == True:
header_line = False
continue
parts = line.strip().split(',')
year = int(parts[0])
muni_num = parts[4]
data = map(int, parts[5:])
print parts[3], parts[4], sum(map(int, parts[5:]))
print
print
print
print "year week urban rural"
for i in [1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011]:
for week in range(0,52):
print i, week+1, urban_ts[i][week], rural_ts[i][week]