@@ -4,15 +4,16 @@ library(srvyr)
4
4
library(ggplot2 )
5
5
6
6
# pull gss data
7
-
8
7
temp <- tempfile()
9
8
download.file(" https://gss.norc.org/documents/stata/GSS_stata.zip" ,temp )
10
- gss_orig <- haven :: read_dta(unz(temp , filename = " GSS7218_R1.DTA" )) %> %
9
+
10
+ # if this next line errors with "No such file or directory", try
11
+ # incrementing the number after "_R"
12
+ gss_orig <- haven :: read_dta(unz(temp , filename = " GSS7218_R2.DTA" )) %> %
11
13
haven :: as_factor()
12
14
unlink(temp )
13
15
14
16
# select relevant columns
15
-
16
17
gss_small <- gss_orig %> %
17
18
filter(! stringr :: str_detect(sample , " blk oversamp" )) %> % # this is for weighting
18
19
select(year , age , sex , college = degree , partyid , hompop ,
@@ -49,14 +50,13 @@ gss_small <- gss_orig %>%
49
50
)
50
51
)
51
52
52
- # sample 3k of the full data set
53
-
54
- set.seed(20191105 )
53
+ # sample 3k rows, first dropping NAs
54
+ set.seed(20200201 )
55
55
gss <- gss_small %> %
56
- sample_n(3000 )
56
+ drop_na() %> %
57
+ sample_n(500 )
57
58
58
59
# check that the sample is similar unweighted to weighted
59
-
60
60
gss_wt <- srvyr :: as_survey_design(gss , weights = weight )
61
61
62
62
unweighted <- gss %> %
@@ -70,10 +70,7 @@ weighted <- gss_wt %>%
70
70
group_by(year , sex , partyid ) %> %
71
71
summarize(prop = srvyr :: survey_mean())
72
72
73
- # ehhhh close enough until you really drill down, we'll put a disclaimer
74
-
75
73
# save data into package
76
-
77
74
usethis :: use_data(gss , overwrite = TRUE )
78
75
79
76
devtools :: document()
0 commit comments