forked from ashleyrabanales/p1_rabanales
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathP1.py
95 lines (85 loc) · 2.34 KB
/
P1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#Recreate the two graphics in this repo using the gapminder dataset from library(gapminder)
# (get them to match as closely as you can).
#Export the data from R and import it into your Python environment.
#Use plotnine or Altair to mimic the two graphics as close as possible.
#Build a weighted average data set using GDP, the black continent average line on the second plot.
#### Git config
git config --global user.name 'ashleyrabanales'
git config --global user.email 'arabana2@students.edu'
#%%
import sys
!{sys.executable} -m pip install altair_saver
#%%
#pip install altair_saver
#%%
#import sys
#!{sys.executable} -m pip install numpy pandas scikit-learn plotnine altair
# %%
!{sys.executable} -m pip install pyarrow
#%%
import matplotlib as plt
#%%
import sys
from altair.vegalite.v4.schema.channels import X
!{sys.executable} -m pip install --upgrade pip
# %%
import pandas as pd
import numpy as np
import altair as alt
from plotnine import *
#%%
gapminder = pd.read_csv("gapminder.csv").assign(
pop100k = lambda x: x['pop']/ 100000
)
#%%
#Python imagine 1
p1 = (alt.Chart(gapminder)
.encode(
x = alt.X('lifeExp', title = 'Life Expectancy',
scale=alt.Scale(zero=False)),
y = alt.Y('gdpPercap', title = "GDP per captia"),
color = alt.Color ('continent', title = "Continent"),
size = alt.Size('pop100k', title = "Population (100k)"
))
.mark_circle()
.properties(width = (125))
.facet(column = 'year'))
p1
#%%
p1.savefig('p1_python1.png')
#%%
## calculated weight avg
(gapminder
.groupby(['continent', 'year'])).apply(lambda x:
np.average(x.gdpPercap, weights = x
['pop'])).reset_index()
#%%
(gapminder
.groupby(['continent', 'year'])
.agg(gdpPercap = ('gdpPercap', np.mean))
)
#.apply(lambda x: np.average(x.gdpPercap, weights = x
# ['pop']))).reset_index()
#%%
# 2nd alternate but is not running
(gapminder
.assign(
gdppop = lambda x: x.gdpPerCap * x['pop'])
.groupby(['continent', 'year'])
.agg(
sumgdppop = ("gdppop", "sum"),
totalpop = ("pop", "sum"))
)
.reset_index()
.assign(gdpPercapWeighted = lambda x: x.sumgdppop / x.totalpop))
##not running due to some errors
#%%
(gapminder)
#Python image 2
#renaming data to convert into a new one
(gapminder
.groupby(['continent', 'year'])
.agg(gdpPercap = ('gdpPercap', np.mean))
)
#%%
# %%