Introduction

Please load the following packages:

library(tidyverse)
library(stargazer)
library(ggeffects)
library(psych)
library(janitor)
library(fastDummies)
library(gtsummary)

Loading data

Reading in data from the working directory

# Read in persson and tabellini dataset from working directory
persson_tabellini_original<-read_csv("persson_tabellini_workshop.csv")

Reading data directly from an online source (Alternative method)

# Read in persson and tabellini dataset from Github repo
persson_tabellini_original<-read_csv("https://raw.githubusercontent.com/aranganath24/r_primer/main/workshop_data/persson_tabellini_workshop.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   country = col_character(),
##   continent = col_character()
## )
## ℹ Use `spec()` for the full column specifications.

Make a copy of the dataset

# Make a copy of the dataset so we don't alter the original dataset; then, view
# the copied dataset 
pt_copy<-persson_tabellini_original
# Print contents of "pt_copy"
pt_copy
## # A tibble: 85 × 75
##     oecd country     pind pindo ctrycd col_uk t_indep col_uka col_espa col_otha
##    <dbl> <chr>      <dbl> <dbl>  <dbl>  <dbl>   <dbl>   <dbl>    <dbl>    <dbl>
##  1     0 Argentina  0     0        213      0     183   0        0.268    0    
##  2     1 Australia  1     1        193      1      98   0.608    0        0    
##  3     1 Austria    0     0        122      0     250   0        0        0    
##  4     0 Bahamas    1     1        313      1      26   0.896    0        0    
##  5     0 Bangladesh 1     1        513      0      28   0        0        0.888
##  6     0 Barbados   1     1        316      1      33   0.868    0        0    
##  7     0 Belarus    1     1        913      0       8   0        0        0.968
##  8     1 Belgium    0     0        124      0     169   0        0        0.324
##  9     0 Belize     1     1        339      1      18   0.928    0        0    
## 10     0 Bolivia    0.116 0.116    218      0     174   0        0.304    0    
## # … with 75 more rows, and 65 more variables: legor_uk <dbl>, legor_so <dbl>,
## #   legor_fr <dbl>, legor_ge <dbl>, legor_sc <dbl>, prot80 <dbl>,
## #   catho80 <dbl>, confu <dbl>, avelf <dbl>, govef <dbl>, graft <dbl>,
## #   logyl <dbl>, loga <dbl>, yrsopen <dbl>, gadp <dbl>, engfrac <dbl>,
## #   eurfrac <dbl>, frankrom <dbl>, latitude <dbl>, gastil <dbl>, cgexp <dbl>,
## #   cgrev <dbl>, ssw <dbl>, rgdph <dbl>, trade <dbl>, prop1564 <dbl>,
## #   prop65 <dbl>, federal <dbl>, eduger <dbl>, spropn <dbl>, yearele <dbl>, …
View(pt_copy)
oecd country pind pindo ctrycd col_uk t_indep col_uka col_espa col_otha legor_uk legor_so legor_fr legor_ge legor_sc prot80 catho80 confu avelf govef graft logyl loga yrsopen gadp engfrac eurfrac frankrom latitude gastil cgexp cgrev ssw rgdph trade prop1564 prop65 federal eduger spropn yearele yearreg seats maj pres lyp semi majpar majpres propres dem_age lat01 age polityIV spl cpi9500 du_60ctry magn sdm oecd.x mining_gdp gini_8090 con2150 con5180 con81 list maj_bad maj_gin maj_old pres_bad pres_gin pres_old propar lpop continent
0 Argentina 0 0 213 0 183 0.000 0.268 0.000 0 0 1 0 0 2.7 91.6 0 0.1769318 4.475911 5.549095 9.60270 8.39295 0.089 0.579 0.000 0.836 1.723 -36.676 2.333333 14.00048 12.31048 6.7540784 5831.075 18.42008 61.37738 9.293244 1 94.8000 0.0 1983 1983 257.11111 0 1 8.670957 0 0.0000000 0.0000000 1 1983 0.4075111 0.085 7.000000 -0.5086034 6.506667 1 0.0933475 0.1889764 0 1.8993865 NA 0 0 1 257.1111 0.000000 NA 0.000 2.333333 NA 0.085 0 17.35051 laam
1 Australia 1 1 193 1 98 0.608 0.000 0.000 1 0 0 0 0 23.5 29.6 0 0.1127971 2.082613 1.797832 10.30421 8.67103 0.689 0.931 0.950 0.950 1.404 -32.219 1.000000 25.78327 24.22912 8.6001902 15499.723 38.79273 66.80672 11.663297 1 111.3375 0.0 1901 1901 147.66667 1 0 9.648578 0 1.0000000 0.0000000 0 1901 0.3579889 0.495 10.000000 -0.6793869 1.340000 1 1.0000000 0.1904762 1 4.4933643 39.860 0 0 0 0.0000 1.000000 39.860 0.495 0.000000 0 0.000 0 16.69928 other
1 Austria 0 0 122 0 250 0.000 0.000 0.000 0 0 0 1 0 6.5 88.8 0 0.0332123 2.562550 2.085785 10.13165 8.80521 0.778 0.949 0.000 0.980 3.601 48.231 1.000000 40.14563 36.20243 17.8820381 13313.135 78.26205 67.45173 15.100343 1 103.9571 0.0 1945 1945 183.00000 0 0 9.496507 0 0.0000000 0.0000000 0 1945 0.5359000 0.275 10.000000 -4.4101000 2.478333 1 0.0491803 0.0728745 1 0.3989157 29.000 1 0 0 183.0000 0.000000 0.000 0.000 0.000000 0 0.000 1 15.89130 other
0 Bahamas 1 1 313 1 26 0.896 0.000 0.000 1 0 0 0 0 47.2 25.5 0 0.0000000 4.052546 4.005234 NA NA NA 0.614 0.865 0.865 3.638 24.700 1.722222 18.80976 17.46011 0.9501425 11768.360 101.83389 63.95686 4.389884 0 95.3000 0.0 1973 1973 47.00000 1 0 9.373170 0 1.0000000 0.0000000 0 1973 0.2744445 0.135 NA -2.3542581 NA 1 1.0000000 1.0000000 0 0.5927405 45.000 0 1 0 0.0000 1.722222 45.000 0.135 0.000000 0 0.000 0 12.51776 laam
0 Bangladesh 1 1 513 0 28 0.000 0.000 0.888 1 0 0 0 0 0.2 0.2 0 0.0000000 6.129216 5.578219 8.41382 8.65712 0.000 0.313 0.000 0.000 2.333 23.880 3.166667 12.56501 13.79131 0.1292526 1611.785 25.35251 53.81113 3.232165 0 43.6000 0.1 1991 1991 330.00000 1 0 7.385097 0 0.8888889 0.1111111 0 1991 0.2653333 0.045 4.777778 1.1960450 7.710000 0 0.9090909 1.0000000 0 0.0184373 33.635 0 0 1 0.0000 3.166667 33.635 0.045 0.000000 0 0.000 0 NA asiae
0 Barbados 1 1 316 1 33 0.868 0.000 0.000 1 0 0 0 0 33.2 5.9 0 0.0733333 NA NA 9.56870 8.54757 1.000 0.739 1.000 1.000 4.027 13.179 1.000000 32.36219 NA NA 7094.703 116.35070 65.17910 11.119803 0 NA 0.0 1966 1966 27.88889 1 0 8.867104 0 1.0000000 0.0000000 0 1966 0.1464333 0.170 NA NA NA 1 1.0000000 1.0000000 0 0.5067325 49.000 0 1 0 0.0000 1.000000 49.000 0.170 0.000000 0 0.000 0 12.47983 laam

Summary statistics

Generating a table of summary statistics

# Generate summary statistics for "pt_copy" and assign to new object named "pt_copy_summarystats1"
pt_copy_summarystats1<-describe(pt_copy)
# Print contents of "pt_copy_summarystats1"
pt_copy_summarystats1
##            vars  n    mean      sd  median trimmed     mad     min      max
## oecd          1 85    0.29    0.46    0.00    0.25    0.00    0.00     1.00
## country*      2 85   43.00   24.68   43.00   43.00   31.13    1.00    85.00
## pind          3 85    0.46    0.47    0.42    0.45    0.62    0.00     1.00
## pindo         4 85    0.61    0.46    0.90    0.63    0.15    0.00     1.00
## ctrycd        5 85  430.88  284.89  299.00  406.86  240.18  111.00   968.00
## col_uk        6 85    0.35    0.48    0.00    0.32    0.00    0.00     1.00
## t_indep       7 85  119.73   89.76   92.00  117.48  114.16    6.00   250.00
## col_uka       8 85    0.28    0.39    0.00    0.24    0.00    0.00     0.93
## col_espa      9 85    0.06    0.13    0.00    0.03    0.00    0.00     0.79
## col_otha     10 85    0.22    0.36    0.00    0.16    0.00    0.00     0.98
## legor_uk     11 85    0.39    0.49    0.00    0.36    0.00    0.00     1.00
## legor_so     12 85    0.13    0.34    0.00    0.04    0.00    0.00     1.00
## legor_fr     13 85    0.35    0.48    0.00    0.32    0.00    0.00     1.00
## legor_ge     14 85    0.07    0.26    0.00    0.00    0.00    0.00     1.00
## legor_sc     15 85    0.06    0.24    0.00    0.00    0.00    0.00     1.00
## prot80       16 85   17.46   25.50    2.70   12.24    3.85    0.00    97.80
## catho80      17 85   40.69   38.66   27.60   38.92   39.88    0.00    97.30
## confu        18 85    0.07    0.26    0.00    0.00    0.00    0.00     1.00
## avelf        19 85    0.29    0.26    0.18    0.26    0.21    0.00     0.84
## govef        20 81    4.21    1.75    4.48    4.25    1.99    0.84     7.26
## graft        21 81    4.17    1.89    4.24    4.26    2.18    0.74     6.92
## logyl        22 74    9.23    0.90    9.29    9.30    1.15    6.95    10.48
## loga         23 73    8.17    0.61    8.37    8.23    0.58    6.28     9.02
## yrsopen      24 75    0.52    0.54    0.42    0.47    0.53    0.00     4.09
## gadp         25 75    0.69    0.20    0.68    0.69    0.24    0.31     1.00
## engfrac      26 78    0.14    0.32    0.00    0.06    0.00    0.00     1.00
## eurfrac      27 78    0.40    0.44    0.08    0.38    0.12    0.00     1.00
## frankrom     28 78    2.87    0.84    2.91    2.86    0.78    0.94     5.64
## latitude     29 78   17.96   27.87   17.30   19.13   33.67  -36.89    63.89
## gastil       30 85    2.44    1.23    2.28    2.36    1.65    1.00     4.89
## cgexp        31 82   28.82   10.49   28.90   28.50   13.04    9.74    51.18
## cgrev        32 78   26.49   10.12   24.16   26.05   10.98    8.92    50.85
## ssw          33 71    8.15    6.67    7.17    7.65    8.60    0.13    22.38
## rgdph        34 85 6688.63 5495.17 4400.03 6130.45 4371.13  530.22 20782.81
## trade        35 85   78.77   47.34   68.45   73.50   34.38   17.56   343.39
## prop1564     36 84   62.07    5.76   63.94   62.49    5.31   49.05    71.70
## prop65       37 84    8.45    4.89    6.47    8.26    5.68    2.30    17.43
## federal      38 83    0.16    0.37    0.00    0.07    0.00    0.00     1.00
## eduger       39 82   88.58   17.70   93.48   90.55   14.33   40.05   117.11
## spropn       40 77    0.13    0.25    0.00    0.07    0.00    0.00     1.00
## yearele      41 81 1965.55   36.85 1981.00 1972.62   16.31 1800.00  1994.00
## yearreg      42 81 1961.48   40.16 1978.00 1969.65   20.76 1800.00  1994.00
## seats        43 85  215.45  162.54  166.00  195.25  127.17   15.00   656.00
## maj          44 85    0.39    0.49    0.00    0.36    0.00    0.00     1.00
## pres         45 85    0.39    0.49    0.00    0.36    0.00    0.00     1.00
## lyp          46 85    8.41    0.97    8.39    8.45    1.25    6.27     9.94
## semi         47 85    0.11    0.31    0.00    0.01    0.00    0.00     1.00
## majpar       48 85    0.25    0.42    0.00    0.19    0.00    0.00     1.00
## majpres      49 85    0.13    0.33    0.00    0.04    0.00    0.00     1.00
## propres      50 85    0.26    0.44    0.00    0.20    0.00    0.00     1.00
## dem_age      51 85 1958.34   43.74 1978.00 1967.00   19.27 1800.00  1994.00
## lat01        52 78    0.32    0.19    0.28    0.31    0.20    0.00     0.71
## age          53 85    0.21    0.22    0.11    0.17    0.10    0.03     1.00
## polityIV     54 80    7.17    3.64    8.11    7.92    2.80   -6.00    10.00
## spl          55 74   -2.18    3.48   -1.69   -2.22    2.85  -11.36    12.59
## cpi9500      56 71    4.81    2.38    5.29    4.95    2.46    0.27     8.25
## du_60ctry    57 85    0.71    0.46    1.00    0.75    0.00    0.00     1.00
## magn         58 84    0.47    0.40    0.26    0.46    0.31    0.01     1.00
## sdm          59 77    0.35    0.39    0.16    0.32    0.15    0.01     1.00
## oecd.x       60 85    0.27    0.45    0.00    0.22    0.00    0.00     1.00
## mining_gdp   61 77    4.26    6.72    1.29    2.82    1.51    0.02    37.20
## gini_8090    62 72   39.20   10.41   37.52   38.71   12.23   19.49    62.30
## con2150      63 85    0.11    0.31    0.00    0.01    0.00    0.00     1.00
## con5180      64 85    0.29    0.46    0.00    0.25    0.00    0.00     1.00
## con81        65 85    0.49    0.50    0.00    0.49    0.00    0.00     1.00
## list         66 84  114.48  129.55   77.67   93.16  115.15    0.00   510.33
## maj_bad      67 85    1.06    1.60    0.00    0.77    0.00    0.00     4.89
## maj_gin      68 72   16.40   20.96    0.00   13.86    0.00    0.00    62.00
## maj_old      69 85    0.08    0.19    0.00    0.04    0.00    0.00     1.00
## pres_bad     70 85    1.21    1.69    0.00    0.97    0.00    0.00     4.89
## pres_gin     71 72   16.66   22.90    0.00   13.83    0.00    0.00    62.00
## pres_old     72 85    0.06    0.16    0.00    0.02    0.00    0.00     1.00
## propar       73 85    0.35    0.48    0.00    0.32    0.00    0.00     1.00
## lpop         74 60   15.90    1.92   15.94   15.95    1.47   11.61    20.63
## continent*   75 85    3.04    1.06    3.00    3.16    1.48    1.00     4.00
##               range  skew kurtosis     se
## oecd           1.00  0.89    -1.23   0.05
## country*      84.00  0.00    -1.24   2.68
## pind           1.00  0.13    -1.89   0.05
## pindo          1.00 -0.46    -1.69   0.05
## ctrycd       857.00  0.63    -1.08  30.90
## col_uk         1.00  0.60    -1.65   0.05
## t_indep      244.00  0.26    -1.54   9.74
## col_uka        0.93  0.71    -1.44   0.04
## col_espa       0.79  2.68     9.07   0.01
## col_otha       0.98  1.16    -0.42   0.04
## legor_uk       1.00  0.45    -1.82   0.05
## legor_so       1.00  2.17     2.74   0.04
## legor_fr       1.00  0.60    -1.65   0.05
## legor_ge       1.00  3.29     8.96   0.03
## legor_sc       1.00  3.68    11.71   0.03
## prot80        97.80  1.65     1.99   2.77
## catho80       97.30  0.38    -1.59   4.19
## confu          1.00  3.29     8.96   0.03
## avelf          0.84  0.76    -0.79   0.03
## govef          6.42 -0.22    -1.11   0.19
## graft          6.17 -0.37    -1.14   0.21
## logyl          3.52 -0.57    -0.53   0.10
## loga           2.73 -0.85     0.12   0.07
## yrsopen        4.09  3.72    22.32   0.06
## gadp           0.69  0.11    -1.19   0.02
## engfrac        1.00  1.97     2.04   0.04
## eurfrac        1.00  0.31    -1.80   0.05
## frankrom       4.70  0.25     0.60   0.10
## latitude     100.78 -0.31    -0.98   3.16
## gastil         3.89  0.39    -1.16   0.13
## cgexp         41.44  0.17    -0.99   1.16
## cgrev         41.93  0.34    -0.84   1.15
## ssw           22.26  0.41    -1.16   0.79
## rgdph      20252.59  0.77    -0.71 596.03
## trade        325.82  2.37     9.85   5.13
## prop1564      22.66 -0.60    -0.87   0.63
## prop65        15.13  0.23    -1.60   0.53
## federal        1.00  1.86     1.46   0.04
## eduger        77.06 -0.94     0.59   1.95
## spropn         1.00  2.22     4.33   0.03
## yearele      194.00 -2.13     5.16   4.09
## yearreg      194.00 -1.85     3.28   4.46
## seats        641.00  1.06     0.18  17.63
## maj            1.00  0.45    -1.82   0.05
## pres           1.00  0.45    -1.82   0.05
## lyp            3.67 -0.24    -1.04   0.11
## semi           1.00  2.52     4.39   0.03
## majpar         1.00  1.12    -0.69   0.05
## majpres        1.00  2.17     2.75   0.04
## propres        1.00  1.08    -0.84   0.05
## dem_age      194.00 -1.65     2.05   4.74
## lat01          0.71  0.27    -1.10   0.02
## age            0.97  1.65     2.05   0.02
## polityIV      16.00 -1.62     2.20   0.41
## spl           23.95  0.88     3.85   0.40
## cpi9500        7.98 -0.49    -1.12   0.28
## du_60ctry      1.00 -0.89    -1.23   0.05
## magn           0.99  0.39    -1.64   0.04
## sdm            0.99  0.93    -0.95   0.04
## oecd.x         1.00  1.01    -0.98   0.05
## mining_gdp    37.18  2.41     6.61   0.77
## gini_8090     42.81  0.35    -0.80   1.23
## con2150        1.00  2.52     4.39   0.03
## con5180        1.00  0.89    -1.23   0.05
## con81          1.00  0.02    -2.02   0.05
## list         510.33  1.19     0.79  14.14
## maj_bad        4.89  1.23     0.04   0.17
## maj_gin       62.00  0.63    -1.30   2.47
## maj_old        1.00  3.05     9.57   0.02
## pres_bad       4.89  0.91    -0.81   0.18
## pres_gin      62.00  0.71    -1.33   2.70
## pres_old       1.00  4.26    18.75   0.02
## propar         1.00  0.60    -1.65   0.05
## lpop           9.02 -0.16    -0.32   0.25
## continent*     3.00 -0.72    -0.82   0.12
vars n mean sd median trimmed mad min max range skew kurtosis se
oecd 1 85 0.2941176 0.4583492 0.0000000 0.2463768 0.0000000 0 1 1 0.8877955 -1.225827 0.0497150
country* 2 85 43.0000000 24.6813020 43.0000000 43.0000000 31.1346000 1 85 84 0.0000000 -1.242428 2.6770631
pind 3 85 0.4600902 0.4659720 0.4188713 0.4508357 0.6210186 0 1 1 0.1317631 -1.887050 0.0505418
pindo 4 85 0.6061366 0.4573739 0.9000000 0.6307480 0.1482600 0 1 1 -0.4581688 -1.691084 0.0496092
ctrycd 5 85 430.8823529 284.8886755 299.0000000 406.8550725 240.1812000 111 968 857 0.6276472 -1.082973 30.9005154
col_uk 6 85 0.3529412 0.4807207 0.0000000 0.3188406 0.0000000 0 1 1 0.6046284 -1.653463 0.0521415

Exporting a summary statistics table as a CSV file

# add rownames to pt_copy_summarystats1
pt_copy_summarystats1$variable<-rownames(pt_copy_summarystats1)

# make "variables" field the first one in the dataset
pt_copy_summarystats1<-pt_copy_summarystats1 %>% relocate(variable)
# write out summary statistics table as CSV file
write_csv(pt_copy_summarystats1, "/Users/adra7980/Documents/git_repositories/r_primer/written_data/pt_copy_summarystats1.csv")

Using stargazer to create and export summary statistics

# Make the summary stats into a data frame
pt_copy_df<-as.data.frame(pt_copy)
# Use stargazer to export summary statistics as a text file
stargazer(pt_copy_df, type="text", title="Descriptive Statistics", digits=1, out="summary_stats.txt")
# Use stargazer to export summary statistics as an html file
stargazer(pt_copy_df, type="text", title="Descriptive Statistics", digits=1, out="summary_stats.html")

Summary statistics and grouping variables

Subsetting summary statistics by group with the describe function

# Creates summary statistics for each continent grouping, and puts results in list named "summary_stats_by_continent"
summary_stats_by_continent<-describeBy(pt_copy, pt_copy$continent)
# Accessing continent-level summary statistics for africa from the "summary_stats_by_continent" list
summary_stats_by_continent[["africa"]]
##            vars  n    mean      sd  median trimmed    mad     min     max
## oecd          1 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## country*      2 11    6.00    3.32    6.00    6.00   4.45    1.00   11.00
## pind          3 11    0.77    0.42    1.00    0.83   0.00    0.00    1.00
## pindo         4 11    0.77    0.42    1.00    0.83   0.00    0.00    1.00
## ctrycd        5 11  647.55  154.90  684.00  685.56  56.34  199.00  754.00
## col_uk        6 11    0.82    0.40    1.00    0.89   0.00    0.00    1.00
## t_indep       7 11   36.64   19.77   35.00   33.89   5.93    9.00   89.00
## col_uka       8 11    0.69    0.35    0.86    0.74   0.02    0.00    0.92
## col_espa      9 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## col_otha     10 11    0.15    0.33    0.00    0.07   0.00    0.00    0.96
## legor_uk     11 11    0.82    0.40    1.00    0.89   0.00    0.00    1.00
## legor_so     12 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## legor_fr     13 11    0.18    0.40    0.00    0.11   0.00    0.00    1.00
## legor_ge     14 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## legor_sc     15 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## prot80       16 11   22.17   20.23   25.80   19.96  19.57    0.10   64.20
## catho80      17 11   19.46   13.67   18.70   18.07  13.20    1.90   49.60
## confu        18 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## avelf        19 11    0.71    0.14    0.73    0.73   0.15    0.38    0.84
## govef        20 11    5.37    0.82    5.02    5.25   0.68    4.56    7.26
## graft        21 11    5.11    0.77    5.39    5.12   0.80    3.93    6.23
## logyl        22 11    7.93    0.78    7.75    7.90   0.53    6.95    9.13
## loga         23 11    7.38    0.66    7.33    7.37   0.55    6.28    8.58
## yrsopen      24 11    0.21    0.29    0.16    0.15   0.18    0.00    1.00
## gadp         25 11    0.55    0.12    0.54    0.55   0.12    0.37    0.74
## engfrac      26 11    0.02    0.04    0.00    0.02   0.00    0.00    0.09
## eurfrac      27 11    0.07    0.17    0.00    0.03   0.00    0.00    0.57
## frankrom     28 11    2.90    0.51    2.94    2.86   0.56    2.19    3.95
## latitude     29 11   -9.14   15.17  -15.81   -9.58   8.49  -29.13   14.77
## gastil       30 11    3.59    1.16    4.00    3.66   1.32    1.61    4.89
## cgexp        31 10   27.00    7.63   25.50   27.10   8.58   14.65   38.57
## cgrev        32  9   26.15   10.36   23.81   26.15   6.14   17.24   50.85
## ssw          33  6    1.67    1.46    0.94    1.67   0.58    0.44    3.80
## rgdph        34 11 1899.87 1832.60 1116.28 1522.39 738.30  530.22 6666.77
## trade        35 11   77.34   32.13   69.17   76.87  27.13   30.83  128.12
## prop1564     36 11   54.23    4.91   53.23   53.51   2.96   49.05   65.95
## prop65       37 11    3.28    1.16    2.80    3.06   0.65    2.34    6.26
## federal      38 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## eduger       39 11   73.95   23.54   73.55   73.64  25.47   40.05  110.67
## spropn       40 10    0.27    0.42    0.00    0.21   0.00    0.00    1.00
## yearele      41  8 1982.50   13.48 1990.50 1982.50   5.19 1965.00 1994.00
## yearreg      42  8 1982.50   13.48 1990.50 1982.50   5.19 1965.00 1994.00
## seats        43 11  151.20  109.96  122.22  136.21  86.65   37.33  400.00
## maj          44 11    0.73    0.47    1.00    0.78   0.00    0.00    1.00
## pres         45 11    0.64    0.50    1.00    0.67   0.00    0.00    1.00
## lyp          46 11    7.22    0.81    7.02    7.15   0.88    6.27    8.80
## semi         47 11    0.18    0.40    0.00    0.11   0.00    0.00    1.00
## majpar       48 11    0.18    0.40    0.00    0.11   0.00    0.00    1.00
## majpres      49 11    0.55    0.52    1.00    0.56   0.00    0.00    1.00
## propres      50 11    0.09    0.30    0.00    0.00   0.00    0.00    1.00
## dem_age      51 11 1975.82   24.77 1989.00 1981.11   7.41 1910.00 1994.00
## lat01        52 11    0.17    0.08    0.18    0.17   0.05    0.00    0.32
## age          53 11    0.12    0.12    0.05    0.09   0.04    0.03    0.45
## polityIV     54 11    2.34    5.56    0.22    2.42   6.75   -6.00   10.00
## spl          55  8   -1.55    4.52   -1.54   -1.55   1.91   -6.77    8.23
## cpi9500      56  9    5.70    1.15    5.90    5.70   1.14    3.93    7.55
## du_60ctry    57 11    0.27    0.47    0.00    0.22   0.00    0.00    1.00
## magn         58 11    0.71    0.41    1.00    0.75   0.00    0.02    1.00
## sdm          59  9    0.71    0.45    1.00    0.71   0.00    0.03    1.00
## oecd.x       60 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## mining_gdp   61 10    8.43   11.70    4.10    5.89   5.71    0.02   37.20
## gini_8090    62  9   50.25    9.95   54.00   50.25  11.86   35.36   62.30
## con2150      63 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## con5180      64 11    0.27    0.47    0.00    0.22   0.00    0.00    1.00
## con81        65 11    0.73    0.47    1.00    0.78   0.00    0.00    1.00
## list         66 11   49.83  119.87    0.00   16.46   0.00    0.00  400.00
## maj_bad      67 11    2.73    2.05    3.83    2.80   1.56    0.00    4.89
## maj_gin      68  9   37.31   22.84   41.35   37.31  18.75    0.00   62.00
## maj_old      69 11    0.06    0.07    0.04    0.06   0.06    0.00    0.17
## pres_bad     70 11    2.63    2.18    3.83    2.67   1.56    0.00    4.89
## pres_gin     71  9   26.72   26.59   35.36   26.72  39.50    0.00   62.00
## pres_old     72 11    0.04    0.05    0.03    0.03   0.04    0.00    0.17
## propar       73 11    0.18    0.40    0.00    0.11   0.00    0.00    1.00
## lpop         74  3   13.99    0.15   13.92   13.99   0.05   13.88   14.17
## continent*   75 11    1.00    0.00    1.00    1.00   0.00    1.00    1.00
##              range  skew kurtosis     se
## oecd          0.00   NaN      NaN   0.00
## country*     10.00  0.00    -1.53   1.00
## pind          1.00 -1.06    -0.79   0.13
## pindo         1.00 -1.06    -0.79   0.13
## ctrycd      555.00 -2.13     3.44  46.70
## col_uk        1.00 -1.43     0.08   0.12
## t_indep      80.00  1.38     1.88   5.96
## col_uka       0.92 -1.31    -0.14   0.10
## col_espa      0.00   NaN      NaN   0.00
## col_otha      0.96  1.58     0.79   0.10
## legor_uk      1.00 -1.43     0.08   0.12
## legor_so      0.00   NaN      NaN   0.00
## legor_fr      1.00  1.43     0.08   0.12
## legor_ge      0.00   NaN      NaN   0.00
## legor_sc      0.00   NaN      NaN   0.00
## prot80       64.10  0.46    -0.80   6.10
## catho80      47.70  0.71    -0.39   4.12
## confu         0.00   NaN      NaN   0.00
## avelf         0.46 -1.15     0.44   0.04
## govef         2.70  0.97    -0.17   0.25
## graft         2.30 -0.17    -1.62   0.23
## logyl         2.18  0.42    -1.43   0.23
## loga          2.29  0.03    -0.91   0.20
## yrsopen       1.00  1.72     2.15   0.09
## gadp          0.37  0.28    -1.38   0.04
## engfrac       0.09  0.95    -1.09   0.01
## eurfrac       0.57  2.24     3.76   0.05
## frankrom      1.77  0.54    -0.69   0.15
## latitude     43.90  0.44    -1.52   4.57
## gastil        3.28 -0.48    -1.45   0.35
## cgexp        23.92  0.06    -1.30   2.41
## cgrev        33.61  1.40     0.71   3.45
## ssw           3.36  0.52    -1.87   0.60
## rgdph      6136.54  1.50     1.28 552.55
## trade        97.29  0.31    -1.40   9.69
## prop1564     16.90  1.19     0.34   1.48
## prop65        3.92  1.47     1.16   0.35
## federal       0.00   NaN      NaN   0.00
## eduger       70.62  0.08    -1.50   7.10
## spropn        1.00  0.92    -1.07   0.13
## yearele      29.00 -0.41    -2.00   4.77
## yearreg      29.00 -0.41    -2.00   4.77
## seats       362.67  0.92    -0.20  33.16
## maj           1.00 -0.88    -1.31   0.14
## pres          1.00 -0.49    -1.91   0.15
## lyp           2.53  0.53    -1.18   0.25
## semi          1.00  1.43     0.08   0.12
## majpar        1.00  1.43     0.08   0.12
## majpres       1.00 -0.16    -2.15   0.16
## propres       1.00  2.47     4.52   0.09
## dem_age      84.00 -1.57     1.64   7.47
## lat01         0.32 -0.28    -0.38   0.03
## age           0.42  1.57     1.64   0.04
## polityIV     16.00  0.07    -1.63   1.68
## spl          15.00  0.98     0.05   1.60
## cpi9500       3.61  0.01    -1.45   0.38
## du_60ctry     1.00  0.88    -1.31   0.14
## magn          0.98 -0.58    -1.70   0.12
## sdm           0.97 -0.67    -1.63   0.15
## oecd.x        0.00   NaN      NaN   0.00
## mining_gdp   37.18  1.39     0.79   3.70
## gini_8090    26.94 -0.19    -1.71   3.32
## con2150       0.00   NaN      NaN   0.00
## con5180       1.00  0.88    -1.31   0.14
## con81         1.00 -0.88    -1.31   0.14
## list        400.00  2.22     3.64  36.14
## maj_bad       4.89 -0.32    -1.81   0.62
## maj_gin      62.00 -0.71    -1.17   7.61
## maj_old       0.17  0.69    -1.39   0.02
## pres_bad      4.89 -0.30    -1.92   0.66
## pres_gin     62.00  0.04    -1.97   8.86
## pres_old      0.17  1.66     2.10   0.02
## propar        1.00  1.43     0.08   0.12
## lpop          0.28  0.36    -2.33   0.09
## continent*    0.00   NaN      NaN   0.00
# Accessing continent-level summary statistics for africa from the "summary_stats_by_continent" list; alternate syntax
summary_stats_by_continent %>% pluck("africa")
##            vars  n    mean      sd  median trimmed    mad     min     max
## oecd          1 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## country*      2 11    6.00    3.32    6.00    6.00   4.45    1.00   11.00
## pind          3 11    0.77    0.42    1.00    0.83   0.00    0.00    1.00
## pindo         4 11    0.77    0.42    1.00    0.83   0.00    0.00    1.00
## ctrycd        5 11  647.55  154.90  684.00  685.56  56.34  199.00  754.00
## col_uk        6 11    0.82    0.40    1.00    0.89   0.00    0.00    1.00
## t_indep       7 11   36.64   19.77   35.00   33.89   5.93    9.00   89.00
## col_uka       8 11    0.69    0.35    0.86    0.74   0.02    0.00    0.92
## col_espa      9 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## col_otha     10 11    0.15    0.33    0.00    0.07   0.00    0.00    0.96
## legor_uk     11 11    0.82    0.40    1.00    0.89   0.00    0.00    1.00
## legor_so     12 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## legor_fr     13 11    0.18    0.40    0.00    0.11   0.00    0.00    1.00
## legor_ge     14 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## legor_sc     15 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## prot80       16 11   22.17   20.23   25.80   19.96  19.57    0.10   64.20
## catho80      17 11   19.46   13.67   18.70   18.07  13.20    1.90   49.60
## confu        18 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## avelf        19 11    0.71    0.14    0.73    0.73   0.15    0.38    0.84
## govef        20 11    5.37    0.82    5.02    5.25   0.68    4.56    7.26
## graft        21 11    5.11    0.77    5.39    5.12   0.80    3.93    6.23
## logyl        22 11    7.93    0.78    7.75    7.90   0.53    6.95    9.13
## loga         23 11    7.38    0.66    7.33    7.37   0.55    6.28    8.58
## yrsopen      24 11    0.21    0.29    0.16    0.15   0.18    0.00    1.00
## gadp         25 11    0.55    0.12    0.54    0.55   0.12    0.37    0.74
## engfrac      26 11    0.02    0.04    0.00    0.02   0.00    0.00    0.09
## eurfrac      27 11    0.07    0.17    0.00    0.03   0.00    0.00    0.57
## frankrom     28 11    2.90    0.51    2.94    2.86   0.56    2.19    3.95
## latitude     29 11   -9.14   15.17  -15.81   -9.58   8.49  -29.13   14.77
## gastil       30 11    3.59    1.16    4.00    3.66   1.32    1.61    4.89
## cgexp        31 10   27.00    7.63   25.50   27.10   8.58   14.65   38.57
## cgrev        32  9   26.15   10.36   23.81   26.15   6.14   17.24   50.85
## ssw          33  6    1.67    1.46    0.94    1.67   0.58    0.44    3.80
## rgdph        34 11 1899.87 1832.60 1116.28 1522.39 738.30  530.22 6666.77
## trade        35 11   77.34   32.13   69.17   76.87  27.13   30.83  128.12
## prop1564     36 11   54.23    4.91   53.23   53.51   2.96   49.05   65.95
## prop65       37 11    3.28    1.16    2.80    3.06   0.65    2.34    6.26
## federal      38 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## eduger       39 11   73.95   23.54   73.55   73.64  25.47   40.05  110.67
## spropn       40 10    0.27    0.42    0.00    0.21   0.00    0.00    1.00
## yearele      41  8 1982.50   13.48 1990.50 1982.50   5.19 1965.00 1994.00
## yearreg      42  8 1982.50   13.48 1990.50 1982.50   5.19 1965.00 1994.00
## seats        43 11  151.20  109.96  122.22  136.21  86.65   37.33  400.00
## maj          44 11    0.73    0.47    1.00    0.78   0.00    0.00    1.00
## pres         45 11    0.64    0.50    1.00    0.67   0.00    0.00    1.00
## lyp          46 11    7.22    0.81    7.02    7.15   0.88    6.27    8.80
## semi         47 11    0.18    0.40    0.00    0.11   0.00    0.00    1.00
## majpar       48 11    0.18    0.40    0.00    0.11   0.00    0.00    1.00
## majpres      49 11    0.55    0.52    1.00    0.56   0.00    0.00    1.00
## propres      50 11    0.09    0.30    0.00    0.00   0.00    0.00    1.00
## dem_age      51 11 1975.82   24.77 1989.00 1981.11   7.41 1910.00 1994.00
## lat01        52 11    0.17    0.08    0.18    0.17   0.05    0.00    0.32
## age          53 11    0.12    0.12    0.05    0.09   0.04    0.03    0.45
## polityIV     54 11    2.34    5.56    0.22    2.42   6.75   -6.00   10.00
## spl          55  8   -1.55    4.52   -1.54   -1.55   1.91   -6.77    8.23
## cpi9500      56  9    5.70    1.15    5.90    5.70   1.14    3.93    7.55
## du_60ctry    57 11    0.27    0.47    0.00    0.22   0.00    0.00    1.00
## magn         58 11    0.71    0.41    1.00    0.75   0.00    0.02    1.00
## sdm          59  9    0.71    0.45    1.00    0.71   0.00    0.03    1.00
## oecd.x       60 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## mining_gdp   61 10    8.43   11.70    4.10    5.89   5.71    0.02   37.20
## gini_8090    62  9   50.25    9.95   54.00   50.25  11.86   35.36   62.30
## con2150      63 11    0.00    0.00    0.00    0.00   0.00    0.00    0.00
## con5180      64 11    0.27    0.47    0.00    0.22   0.00    0.00    1.00
## con81        65 11    0.73    0.47    1.00    0.78   0.00    0.00    1.00
## list         66 11   49.83  119.87    0.00   16.46   0.00    0.00  400.00
## maj_bad      67 11    2.73    2.05    3.83    2.80   1.56    0.00    4.89
## maj_gin      68  9   37.31   22.84   41.35   37.31  18.75    0.00   62.00
## maj_old      69 11    0.06    0.07    0.04    0.06   0.06    0.00    0.17
## pres_bad     70 11    2.63    2.18    3.83    2.67   1.56    0.00    4.89
## pres_gin     71  9   26.72   26.59   35.36   26.72  39.50    0.00   62.00
## pres_old     72 11    0.04    0.05    0.03    0.03   0.04    0.00    0.17
## propar       73 11    0.18    0.40    0.00    0.11   0.00    0.00    1.00
## lpop         74  3   13.99    0.15   13.92   13.99   0.05   13.88   14.17
## continent*   75 11    1.00    0.00    1.00    1.00   0.00    1.00    1.00
##              range  skew kurtosis     se
## oecd          0.00   NaN      NaN   0.00
## country*     10.00  0.00    -1.53   1.00
## pind          1.00 -1.06    -0.79   0.13
## pindo         1.00 -1.06    -0.79   0.13
## ctrycd      555.00 -2.13     3.44  46.70
## col_uk        1.00 -1.43     0.08   0.12
## t_indep      80.00  1.38     1.88   5.96
## col_uka       0.92 -1.31    -0.14   0.10
## col_espa      0.00   NaN      NaN   0.00
## col_otha      0.96  1.58     0.79   0.10
## legor_uk      1.00 -1.43     0.08   0.12
## legor_so      0.00   NaN      NaN   0.00
## legor_fr      1.00  1.43     0.08   0.12
## legor_ge      0.00   NaN      NaN   0.00
## legor_sc      0.00   NaN      NaN   0.00
## prot80       64.10  0.46    -0.80   6.10
## catho80      47.70  0.71    -0.39   4.12
## confu         0.00   NaN      NaN   0.00
## avelf         0.46 -1.15     0.44   0.04
## govef         2.70  0.97    -0.17   0.25
## graft         2.30 -0.17    -1.62   0.23
## logyl         2.18  0.42    -1.43   0.23
## loga          2.29  0.03    -0.91   0.20
## yrsopen       1.00  1.72     2.15   0.09
## gadp          0.37  0.28    -1.38   0.04
## engfrac       0.09  0.95    -1.09   0.01
## eurfrac       0.57  2.24     3.76   0.05
## frankrom      1.77  0.54    -0.69   0.15
## latitude     43.90  0.44    -1.52   4.57
## gastil        3.28 -0.48    -1.45   0.35
## cgexp        23.92  0.06    -1.30   2.41
## cgrev        33.61  1.40     0.71   3.45
## ssw           3.36  0.52    -1.87   0.60
## rgdph      6136.54  1.50     1.28 552.55
## trade        97.29  0.31    -1.40   9.69
## prop1564     16.90  1.19     0.34   1.48
## prop65        3.92  1.47     1.16   0.35
## federal       0.00   NaN      NaN   0.00
## eduger       70.62  0.08    -1.50   7.10
## spropn        1.00  0.92    -1.07   0.13
## yearele      29.00 -0.41    -2.00   4.77
## yearreg      29.00 -0.41    -2.00   4.77
## seats       362.67  0.92    -0.20  33.16
## maj           1.00 -0.88    -1.31   0.14
## pres          1.00 -0.49    -1.91   0.15
## lyp           2.53  0.53    -1.18   0.25
## semi          1.00  1.43     0.08   0.12
## majpar        1.00  1.43     0.08   0.12
## majpres       1.00 -0.16    -2.15   0.16
## propres       1.00  2.47     4.52   0.09
## dem_age      84.00 -1.57     1.64   7.47
## lat01         0.32 -0.28    -0.38   0.03
## age           0.42  1.57     1.64   0.04
## polityIV     16.00  0.07    -1.63   1.68
## spl          15.00  0.98     0.05   1.60
## cpi9500       3.61  0.01    -1.45   0.38
## du_60ctry     1.00  0.88    -1.31   0.14
## magn          0.98 -0.58    -1.70   0.12
## sdm           0.97 -0.67    -1.63   0.15
## oecd.x        0.00   NaN      NaN   0.00
## mining_gdp   37.18  1.39     0.79   3.70
## gini_8090    26.94 -0.19    -1.71   3.32
## con2150       0.00   NaN      NaN   0.00
## con5180       1.00  0.88    -1.31   0.14
## con81         1.00 -0.88    -1.31   0.14
## list        400.00  2.22     3.64  36.14
## maj_bad       4.89 -0.32    -1.81   0.62
## maj_gin      62.00 -0.71    -1.17   7.61
## maj_old       0.17  0.69    -1.39   0.02
## pres_bad      4.89 -0.30    -1.92   0.66
## pres_gin     62.00  0.04    -1.97   8.86
## pres_old      0.17  1.66     2.10   0.02
## propar        1.00  1.43     0.08   0.12
## lpop          0.28  0.36    -2.33   0.09
## continent*    0.00   NaN      NaN   0.00
# Group-level summary statistics can be assigned to their own object for easy retrieval
asia_europe_summary_statistics<-summary_stats_by_continent %>% pluck("asiae")
# retrieve summary statistics for Asia/Europe 
asia_europe_summary_statistics
##            vars  n    mean      sd  median trimmed     mad     min      max
## oecd          1 13    0.00    0.00    0.00    0.00    0.00    0.00     0.00
## country*      2 13    7.00    3.89    7.00    7.00    4.45    1.00    13.00
## pind          3 13    0.87    0.28    1.00    0.94    0.00    0.00     1.00
## pindo         4 13    0.95    0.09    1.00    0.96    0.00    0.78     1.00
## ctrycd        5 13  623.00  141.16  564.00  605.64   32.62  513.00   924.00
## col_uk        6 13    0.54    0.52    1.00    0.55    0.00    0.00     1.00
## t_indep       7 13   74.31   78.67   51.00   62.91   13.34   24.00   250.00
## col_uka       8 13    0.45    0.44    0.79    0.45    0.17    0.00     0.90
## col_espa      9 13    0.06    0.22    0.00    0.00    0.00    0.00     0.79
## col_otha     10 13    0.39    0.44    0.00    0.38    0.00    0.00     0.90
## legor_uk     11 13    0.77    0.44    1.00    0.82    0.00    0.00     1.00
## legor_so     12 13    0.00    0.00    0.00    0.00    0.00    0.00     0.00
## legor_fr     13 13    0.08    0.28    0.00    0.00    0.00    0.00     1.00
## legor_ge     14 13    0.15    0.38    0.00    0.09    0.00    0.00     1.00
## legor_sc     15 13    0.00    0.00    0.00    0.00    0.00    0.00     0.00
## prot80       16 13    9.48   18.15    1.40    5.89    1.78    0.00    58.40
## catho80      17 13   11.38   23.52    2.80    5.81    3.56    0.00    84.10
## confu        18 13    0.38    0.51    0.00    0.36    0.00    0.00     1.00
## avelf        19 13    0.46    0.28    0.45    0.47    0.29    0.00     0.80
## govef        20 12    4.60    1.75    4.86    4.79    1.90    0.84     6.49
## graft        21 12    4.76    1.56    5.29    4.93    1.38    1.10     6.71
## logyl        22 12    8.83    0.66    8.62    8.80    0.82    7.92     9.97
## loga         23 12    8.05    0.52    8.01    8.03    0.58    7.30     8.90
## yrsopen      24 13    0.72    1.09    0.41    0.48    0.60    0.00     4.09
## gadp         25 12    0.61    0.17    0.62    0.61    0.20    0.31     0.86
## engfrac      26 13    0.01    0.02    0.00    0.00    0.00    0.00     0.09
## eurfrac      27 13    0.01    0.02    0.00    0.00    0.00    0.00     0.09
## frankrom     28 13    2.52    0.61    2.59    2.52    0.45    1.19     3.89
## latitude     29 13   14.28   16.21   13.92   15.08   16.83  -17.83    37.55
## gastil       30 13    3.52    0.75    3.33    3.54    0.74    2.17     4.67
## cgexp        31 13   20.44    5.76   18.48   20.22    4.29   12.57    30.81
## cgrev        32 13   19.54    6.81   18.45   19.06    6.91    9.74    34.67
## ssw          33 11    1.27    1.29    0.65    1.05    0.77    0.13     4.39
## rgdph        34 13 4601.57 4223.17 2441.69 3994.20 1919.26 1147.17 14737.04
## trade        35 13   97.78   84.61   80.63   82.35   45.68   21.88   343.39
## prop1564     36 12   60.79    6.17   60.00   60.41    7.40   53.78    71.70
## prop65       37 12    4.27    1.16    3.86    4.23    1.15    2.79     6.23
## federal      38 13    0.15    0.38    0.00    0.09    0.00    0.00     1.00
## eduger       39 12   74.16   20.58   77.72   74.91   20.56   42.15    98.74
## spropn       40 10    0.09    0.12    0.04    0.07    0.06    0.00     0.36
## yearele      41 12 1981.94   14.46 1989.00 1984.10    4.45 1950.00  1992.22
## yearreg      42 12 1979.00   17.19 1988.00 1980.80    5.19 1948.00  1992.00
## seats        43 13  229.54  126.01  202.89  217.97  141.01   66.00   520.33
## maj          44 13    0.69    0.48    1.00    0.73    0.00    0.00     1.00
## pres         45 13    0.31    0.48    0.00    0.27    0.00    0.00     1.00
## lyp          46 13    8.08    0.86    7.80    8.03    0.79    7.05     9.60
## semi         47 13    0.23    0.44    0.00    0.18    0.00    0.00     1.00
## majpar       48 13    0.57    0.49    0.89    0.59    0.16    0.00     1.00
## majpres      49 13    0.15    0.35    0.00    0.09    0.00    0.00     1.00
## propres      50 13    0.16    0.37    0.00    0.10    0.00    0.00     1.00
## dem_age      51 13 1977.92   16.91 1988.00 1979.36    5.93 1948.00  1992.00
## lat01        52 13    0.20    0.13    0.20    0.20    0.16    0.02     0.42
## age          53 13    0.11    0.08    0.06    0.10    0.03    0.04     0.26
## polityIV     54 13    6.24    3.08    6.22    6.65    2.14   -2.00    10.00
## spl          55 13   -1.52    5.32   -1.70   -2.27    4.30   -7.36    12.59
## cpi9500      56  9    5.89    2.16    6.90    5.89    1.42    1.00     7.86
## du_60ctry    57 13    0.62    0.51    1.00    0.64    0.00    0.00     1.00
## magn         58 12    0.76    0.32    0.94    0.80    0.08    0.10     1.00
## sdm          59 11    0.52    0.45    0.39    0.53    0.57    0.01     1.00
## oecd.x       60 13    0.00    0.00    0.00    0.00    0.00    0.00     0.00
## mining_gdp   61 11    2.86    5.07    1.04    1.64    0.96    0.02    16.74
## gini_8090    62 11   38.28    7.66   37.55   38.05    8.62   29.03    49.67
## con2150      63 13    0.15    0.38    0.00    0.09    0.00    0.00     1.00
## con5180      64 13    0.31    0.48    0.00    0.27    0.00    0.00     1.00
## con81        65 13    0.62    0.51    1.00    0.64    0.00    0.00     1.00
## list         66 12   25.74   65.45    0.00    8.39    0.00    0.00   224.89
## maj_bad      67 13    2.50    1.83    3.17    2.53    1.65    0.00     4.67
## maj_gin      68 11   28.37   19.49   32.00   29.15   20.02    0.00    49.67
## maj_old      69 13    0.08    0.09    0.05    0.07    0.07    0.00     0.25
## pres_bad     70 13    1.05    1.71    0.00    0.85    0.00    0.00     4.28
## pres_gin     71 11   10.43   18.14    0.00    7.70    0.00    0.00    45.50
## pres_old     72 13    0.03    0.07    0.00    0.02    0.00    0.00     0.26
## propar       73 13    0.15    0.38    0.00    0.09    0.00    0.00     1.00
## lpop         74  8   16.97    2.07   16.83   16.97    1.68   13.57    20.63
## continent*   75 13    1.00    0.00    1.00    1.00    0.00    1.00     1.00
##               range  skew kurtosis      se
## oecd           0.00   NaN      NaN    0.00
## country*      12.00  0.00    -1.48    1.08
## pind           1.00 -2.31     4.47    0.08
## pindo          0.22 -1.12    -0.77    0.03
## ctrycd       411.00  1.15    -0.50   39.15
## col_uk         1.00 -0.14    -2.13    0.14
## t_indep      226.00  1.64     0.87   21.82
## col_uka        0.90 -0.12    -2.11    0.12
## col_espa       0.79  2.82     6.44    0.06
## col_otha       0.90  0.15    -2.10    0.12
## legor_uk       1.00 -1.13    -0.76    0.12
## legor_so       0.00   NaN      NaN    0.00
## legor_fr       1.00  2.82     6.44    0.08
## legor_ge       1.00  1.70     0.99    0.10
## legor_sc       0.00   NaN      NaN    0.00
## prot80        58.40  1.77     1.64    5.04
## catho80       84.10  2.28     4.10    6.52
## confu          1.00  0.42    -1.96    0.14
## avelf          0.80 -0.31    -1.35    0.08
## govef          5.65 -0.69    -0.68    0.51
## graft          5.60 -0.85    -0.04    0.45
## logyl          2.05  0.30    -1.39    0.19
## loga           1.60  0.04    -1.32    0.15
## yrsopen        4.09  2.19     4.17    0.30
## gadp           0.55 -0.17    -1.31    0.05
## engfrac        0.09  2.63     5.72    0.01
## eurfrac        0.09  2.63     5.72    0.01
## frankrom       2.70  0.07     0.65    0.17
## latitude      55.38 -0.41    -1.06    4.49
## gastil         2.50  0.04    -1.21    0.21
## cgexp         18.25  0.47    -1.34    1.60
## cgrev         24.94  0.62    -0.47    1.89
## ssw            4.26  1.23     0.40    0.39
## rgdph      13589.87  1.11    -0.03 1171.30
## trade        321.50  1.79     2.64   23.47
## prop1564      17.92  0.47    -1.20    1.78
## prop65         3.44  0.37    -1.50    0.33
## federal        1.00  1.70     0.99    0.10
## eduger        56.59 -0.40    -1.42    5.94
## spropn         0.36  1.04    -0.24    0.04
## yearele       42.22 -1.19    -0.18    4.17
## yearreg       44.00 -0.89    -1.10    4.96
## seats        454.33  0.74    -0.21   34.95
## maj            1.00 -0.74    -1.56    0.13
## pres           1.00  0.74    -1.56    0.13
## lyp            2.55  0.40    -1.50    0.24
## semi           1.00  1.13    -0.76    0.12
## majpar         1.00 -0.29    -1.97    0.13
## majpres        1.00  1.69     1.02    0.10
## propres        1.00  1.68     0.94    0.10
## dem_age       44.00 -0.73    -1.30    4.69
## lat01          0.40  0.03    -1.44    0.04
## age            0.22  0.73    -1.30    0.02
## polityIV      12.00 -1.16     1.31    0.86
## spl           19.95  1.19     1.10    1.48
## cpi9500        6.87 -1.11     0.08    0.72
## du_60ctry      1.00 -0.42    -1.96    0.14
## magn           0.90 -0.87    -0.91    0.09
## sdm            0.99  0.06    -2.06    0.14
## oecd.x         0.00   NaN      NaN    0.00
## mining_gdp    16.72  1.87     2.21    1.53
## gini_8090     20.64  0.27    -1.64    2.31
## con2150        1.00  1.70     0.99    0.10
## con5180        1.00  0.74    -1.56    0.13
## con81          1.00 -0.42    -1.96    0.14
## list         224.89  2.32     4.17   18.89
## maj_bad        4.67 -0.45    -1.56    0.51
## maj_gin       49.67 -0.52    -1.45    5.88
## maj_old        0.25  0.79    -0.93    0.02
## pres_bad       4.28  0.97    -0.90    0.48
## pres_gin      45.50  0.97    -1.02    5.47
## pres_old       0.26  2.22     4.11    0.02
## propar         1.00  1.70     0.99    0.10
## lpop           7.06  0.10    -0.80    0.73
## continent*     0.00   NaN      NaN    0.00

The “vars” column in the summary statistics table is an index variable; it can be removed with the following:

# removes "vars" indexing variable from "asia_europe_summary_statistics"
asia_europe_summary_statistics<-asia_europe_summary_statistics %>% select(-vars)
# Prints contents of "asia_europe_summary_statistics"
asia_europe_summary_statistics
##             n    mean      sd  median trimmed     mad     min      max    range
## oecd       13    0.00    0.00    0.00    0.00    0.00    0.00     0.00     0.00
## country*   13    7.00    3.89    7.00    7.00    4.45    1.00    13.00    12.00
## pind       13    0.87    0.28    1.00    0.94    0.00    0.00     1.00     1.00
## pindo      13    0.95    0.09    1.00    0.96    0.00    0.78     1.00     0.22
## ctrycd     13  623.00  141.16  564.00  605.64   32.62  513.00   924.00   411.00
## col_uk     13    0.54    0.52    1.00    0.55    0.00    0.00     1.00     1.00
## t_indep    13   74.31   78.67   51.00   62.91   13.34   24.00   250.00   226.00
## col_uka    13    0.45    0.44    0.79    0.45    0.17    0.00     0.90     0.90
## col_espa   13    0.06    0.22    0.00    0.00    0.00    0.00     0.79     0.79
## col_otha   13    0.39    0.44    0.00    0.38    0.00    0.00     0.90     0.90
## legor_uk   13    0.77    0.44    1.00    0.82    0.00    0.00     1.00     1.00
## legor_so   13    0.00    0.00    0.00    0.00    0.00    0.00     0.00     0.00
## legor_fr   13    0.08    0.28    0.00    0.00    0.00    0.00     1.00     1.00
## legor_ge   13    0.15    0.38    0.00    0.09    0.00    0.00     1.00     1.00
## legor_sc   13    0.00    0.00    0.00    0.00    0.00    0.00     0.00     0.00
## prot80     13    9.48   18.15    1.40    5.89    1.78    0.00    58.40    58.40
## catho80    13   11.38   23.52    2.80    5.81    3.56    0.00    84.10    84.10
## confu      13    0.38    0.51    0.00    0.36    0.00    0.00     1.00     1.00
## avelf      13    0.46    0.28    0.45    0.47    0.29    0.00     0.80     0.80
## govef      12    4.60    1.75    4.86    4.79    1.90    0.84     6.49     5.65
## graft      12    4.76    1.56    5.29    4.93    1.38    1.10     6.71     5.60
## logyl      12    8.83    0.66    8.62    8.80    0.82    7.92     9.97     2.05
## loga       12    8.05    0.52    8.01    8.03    0.58    7.30     8.90     1.60
## yrsopen    13    0.72    1.09    0.41    0.48    0.60    0.00     4.09     4.09
## gadp       12    0.61    0.17    0.62    0.61    0.20    0.31     0.86     0.55
## engfrac    13    0.01    0.02    0.00    0.00    0.00    0.00     0.09     0.09
## eurfrac    13    0.01    0.02    0.00    0.00    0.00    0.00     0.09     0.09
## frankrom   13    2.52    0.61    2.59    2.52    0.45    1.19     3.89     2.70
## latitude   13   14.28   16.21   13.92   15.08   16.83  -17.83    37.55    55.38
## gastil     13    3.52    0.75    3.33    3.54    0.74    2.17     4.67     2.50
## cgexp      13   20.44    5.76   18.48   20.22    4.29   12.57    30.81    18.25
## cgrev      13   19.54    6.81   18.45   19.06    6.91    9.74    34.67    24.94
## ssw        11    1.27    1.29    0.65    1.05    0.77    0.13     4.39     4.26
## rgdph      13 4601.57 4223.17 2441.69 3994.20 1919.26 1147.17 14737.04 13589.87
## trade      13   97.78   84.61   80.63   82.35   45.68   21.88   343.39   321.50
## prop1564   12   60.79    6.17   60.00   60.41    7.40   53.78    71.70    17.92
## prop65     12    4.27    1.16    3.86    4.23    1.15    2.79     6.23     3.44
## federal    13    0.15    0.38    0.00    0.09    0.00    0.00     1.00     1.00
## eduger     12   74.16   20.58   77.72   74.91   20.56   42.15    98.74    56.59
## spropn     10    0.09    0.12    0.04    0.07    0.06    0.00     0.36     0.36
## yearele    12 1981.94   14.46 1989.00 1984.10    4.45 1950.00  1992.22    42.22
## yearreg    12 1979.00   17.19 1988.00 1980.80    5.19 1948.00  1992.00    44.00
## seats      13  229.54  126.01  202.89  217.97  141.01   66.00   520.33   454.33
## maj        13    0.69    0.48    1.00    0.73    0.00    0.00     1.00     1.00
## pres       13    0.31    0.48    0.00    0.27    0.00    0.00     1.00     1.00
## lyp        13    8.08    0.86    7.80    8.03    0.79    7.05     9.60     2.55
## semi       13    0.23    0.44    0.00    0.18    0.00    0.00     1.00     1.00
## majpar     13    0.57    0.49    0.89    0.59    0.16    0.00     1.00     1.00
## majpres    13    0.15    0.35    0.00    0.09    0.00    0.00     1.00     1.00
## propres    13    0.16    0.37    0.00    0.10    0.00    0.00     1.00     1.00
## dem_age    13 1977.92   16.91 1988.00 1979.36    5.93 1948.00  1992.00    44.00
## lat01      13    0.20    0.13    0.20    0.20    0.16    0.02     0.42     0.40
## age        13    0.11    0.08    0.06    0.10    0.03    0.04     0.26     0.22
## polityIV   13    6.24    3.08    6.22    6.65    2.14   -2.00    10.00    12.00
## spl        13   -1.52    5.32   -1.70   -2.27    4.30   -7.36    12.59    19.95
## cpi9500     9    5.89    2.16    6.90    5.89    1.42    1.00     7.86     6.87
## du_60ctry  13    0.62    0.51    1.00    0.64    0.00    0.00     1.00     1.00
## magn       12    0.76    0.32    0.94    0.80    0.08    0.10     1.00     0.90
## sdm        11    0.52    0.45    0.39    0.53    0.57    0.01     1.00     0.99
## oecd.x     13    0.00    0.00    0.00    0.00    0.00    0.00     0.00     0.00
## mining_gdp 11    2.86    5.07    1.04    1.64    0.96    0.02    16.74    16.72
## gini_8090  11   38.28    7.66   37.55   38.05    8.62   29.03    49.67    20.64
## con2150    13    0.15    0.38    0.00    0.09    0.00    0.00     1.00     1.00
## con5180    13    0.31    0.48    0.00    0.27    0.00    0.00     1.00     1.00
## con81      13    0.62    0.51    1.00    0.64    0.00    0.00     1.00     1.00
## list       12   25.74   65.45    0.00    8.39    0.00    0.00   224.89   224.89
## maj_bad    13    2.50    1.83    3.17    2.53    1.65    0.00     4.67     4.67
## maj_gin    11   28.37   19.49   32.00   29.15   20.02    0.00    49.67    49.67
## maj_old    13    0.08    0.09    0.05    0.07    0.07    0.00     0.25     0.25
## pres_bad   13    1.05    1.71    0.00    0.85    0.00    0.00     4.28     4.28
## pres_gin   11   10.43   18.14    0.00    7.70    0.00    0.00    45.50    45.50
## pres_old   13    0.03    0.07    0.00    0.02    0.00    0.00     0.26     0.26
## propar     13    0.15    0.38    0.00    0.09    0.00    0.00     1.00     1.00
## lpop        8   16.97    2.07   16.83   16.97    1.68   13.57    20.63     7.06
## continent* 13    1.00    0.00    1.00    1.00    0.00    1.00     1.00     0.00
##             skew kurtosis      se
## oecd         NaN      NaN    0.00
## country*    0.00    -1.48    1.08
## pind       -2.31     4.47    0.08
## pindo      -1.12    -0.77    0.03
## ctrycd      1.15    -0.50   39.15
## col_uk     -0.14    -2.13    0.14
## t_indep     1.64     0.87   21.82
## col_uka    -0.12    -2.11    0.12
## col_espa    2.82     6.44    0.06
## col_otha    0.15    -2.10    0.12
## legor_uk   -1.13    -0.76    0.12
## legor_so     NaN      NaN    0.00
## legor_fr    2.82     6.44    0.08
## legor_ge    1.70     0.99    0.10
## legor_sc     NaN      NaN    0.00
## prot80      1.77     1.64    5.04
## catho80     2.28     4.10    6.52
## confu       0.42    -1.96    0.14
## avelf      -0.31    -1.35    0.08
## govef      -0.69    -0.68    0.51
## graft      -0.85    -0.04    0.45
## logyl       0.30    -1.39    0.19
## loga        0.04    -1.32    0.15
## yrsopen     2.19     4.17    0.30
## gadp       -0.17    -1.31    0.05
## engfrac     2.63     5.72    0.01
## eurfrac     2.63     5.72    0.01
## frankrom    0.07     0.65    0.17
## latitude   -0.41    -1.06    4.49
## gastil      0.04    -1.21    0.21
## cgexp       0.47    -1.34    1.60
## cgrev       0.62    -0.47    1.89
## ssw         1.23     0.40    0.39
## rgdph       1.11    -0.03 1171.30
## trade       1.79     2.64   23.47
## prop1564    0.47    -1.20    1.78
## prop65      0.37    -1.50    0.33
## federal     1.70     0.99    0.10
## eduger     -0.40    -1.42    5.94
## spropn      1.04    -0.24    0.04
## yearele    -1.19    -0.18    4.17
## yearreg    -0.89    -1.10    4.96
## seats       0.74    -0.21   34.95
## maj        -0.74    -1.56    0.13
## pres        0.74    -1.56    0.13
## lyp         0.40    -1.50    0.24
## semi        1.13    -0.76    0.12
## majpar     -0.29    -1.97    0.13
## majpres     1.69     1.02    0.10
## propres     1.68     0.94    0.10
## dem_age    -0.73    -1.30    4.69
## lat01       0.03    -1.44    0.04
## age         0.73    -1.30    0.02
## polityIV   -1.16     1.31    0.86
## spl         1.19     1.10    1.48
## cpi9500    -1.11     0.08    0.72
## du_60ctry  -0.42    -1.96    0.14
## magn       -0.87    -0.91    0.09
## sdm         0.06    -2.06    0.14
## oecd.x       NaN      NaN    0.00
## mining_gdp  1.87     2.21    1.53
## gini_8090   0.27    -1.64    2.31
## con2150     1.70     0.99    0.10
## con5180     0.74    -1.56    0.13
## con81      -0.42    -1.96    0.14
## list        2.32     4.17   18.89
## maj_bad    -0.45    -1.56    0.51
## maj_gin    -0.52    -1.45    5.88
## maj_old     0.79    -0.93    0.02
## pres_bad    0.97    -0.90    0.48
## pres_gin    0.97    -1.02    5.47
## pres_old    2.22     4.11    0.02
## propar      1.70     0.99    0.10
## lpop        0.10    -0.80    0.73
## continent*   NaN      NaN    0.00

Generating group-level summary statistics with the summarize function from dplyr

# Generate a table that displays summary statistics for trade at the continent level and assign to object named "trade_age_by_continent"
trade_age_by_continent<-pt_copy %>% group_by(continent) %>% 
                                    summarise(meanTrade=mean(trade),sdTrade=sd(trade),
                                              meanAge=mean(age), sdAge=sd(age),
                                              n=n())
# prints contents of "trade_age_by_continent"
trade_age_by_continent
## # A tibble: 4 × 6
##   continent meanTrade sdTrade meanAge  sdAge     n
##   <chr>         <dbl>   <dbl>   <dbl>  <dbl> <int>
## 1 africa         77.3    32.1   0.121 0.124     11
## 2 asiae          97.8    84.6   0.110 0.0846    13
## 3 laam           68.6    32.8   0.139 0.153     23
## 4 other          78.8    40.7   0.309 0.263     38

Generating crosstabs

Crosstabs with tabyl

# Creates cross-tab showing the breakdown of federal/non federal across continents
crosstab_federal_continent<-pt_copy %>% tabyl(federal, continent)
# Prints contents of "crosstab_federal_continent"
crosstab_federal_continent
##  federal africa asiae laam other
##        0     11    11   19    29
##        1      0     2    4     7
##       NA      0     0    0     2
# Creates cross-tab showing the breakdown of majoritarian/nonmajoritarian across continents
crosstab_majoritarian_continent<-pt_copy %>% tabyl(maj, continent)
# prints contents of "crosstab_majoritarian_continent"
crosstab_majoritarian_continent
##  maj africa asiae laam other
##    0      3     4   16    29
##    1      8     9    7     9

Crosstabs with tbl_cross

# Uses "tbl_cross" function to create crosstab showing breakdown of federal/non-federal by continent
tbl_cross(pt_copy, row=federal, col=continent)
Characteristic continent Total
africa asiae laam other
federal
0 11 11 19 29 70
1 0 2 4 7 13
Unknown 0 0 0 2 2
Total 11 13 23 38 85
# Uses "tbl_cross" function to create crosstab showing breakdown of majoritarian/non majoritarian by continent
tbl_cross(pt_copy, row=maj, col=continent)
Characteristic continent Total
africa asiae laam other
maj
0 3 4 16 29 52
1 8 9 7 9 33
Total 11 13 23 38 85

Basic Data Cleaning and Preparation Tasks

Rearranging columns

# Prints contents of "pt_copy"
pt_copy
## # A tibble: 85 × 75
##     oecd country     pind pindo ctrycd col_uk t_indep col_uka col_espa col_otha
##    <dbl> <chr>      <dbl> <dbl>  <dbl>  <dbl>   <dbl>   <dbl>    <dbl>    <dbl>
##  1     0 Argentina  0     0        213      0     183   0        0.268    0    
##  2     1 Australia  1     1        193      1      98   0.608    0        0    
##  3     1 Austria    0     0        122      0     250   0        0        0    
##  4     0 Bahamas    1     1        313      1      26   0.896    0        0    
##  5     0 Bangladesh 1     1        513      0      28   0        0        0.888
##  6     0 Barbados   1     1        316      1      33   0.868    0        0    
##  7     0 Belarus    1     1        913      0       8   0        0        0.968
##  8     1 Belgium    0     0        124      0     169   0        0        0.324
##  9     0 Belize     1     1        339      1      18   0.928    0        0    
## 10     0 Bolivia    0.116 0.116    218      0     174   0        0.304    0    
## # … with 75 more rows, and 65 more variables: legor_uk <dbl>, legor_so <dbl>,
## #   legor_fr <dbl>, legor_ge <dbl>, legor_sc <dbl>, prot80 <dbl>,
## #   catho80 <dbl>, confu <dbl>, avelf <dbl>, govef <dbl>, graft <dbl>,
## #   logyl <dbl>, loga <dbl>, yrsopen <dbl>, gadp <dbl>, engfrac <dbl>,
## #   eurfrac <dbl>, frankrom <dbl>, latitude <dbl>, gastil <dbl>, cgexp <dbl>,
## #   cgrev <dbl>, ssw <dbl>, rgdph <dbl>, trade <dbl>, prop1564 <dbl>,
## #   prop65 <dbl>, federal <dbl>, eduger <dbl>, spropn <dbl>, yearele <dbl>, …
# bring the "country" column to the front of the dataset
pt_copy<-pt_copy %>% relocate(country)
pt_copy
## # A tibble: 85 × 75
##    country     oecd  pind pindo ctrycd col_uk t_indep col_uka col_espa col_otha
##    <chr>      <dbl> <dbl> <dbl>  <dbl>  <dbl>   <dbl>   <dbl>    <dbl>    <dbl>
##  1 Argentina      0 0     0        213      0     183   0        0.268    0    
##  2 Australia      1 1     1        193      1      98   0.608    0        0    
##  3 Austria        1 0     0        122      0     250   0        0        0    
##  4 Bahamas        0 1     1        313      1      26   0.896    0        0    
##  5 Bangladesh     0 1     1        513      0      28   0        0        0.888
##  6 Barbados       0 1     1        316      1      33   0.868    0        0    
##  7 Belarus        0 1     1        913      0       8   0        0        0.968
##  8 Belgium        1 0     0        124      0     169   0        0        0.324
##  9 Belize         0 1     1        339      1      18   0.928    0        0    
## 10 Bolivia        0 0.116 0.116    218      0     174   0        0.304    0    
## # … with 75 more rows, and 65 more variables: legor_uk <dbl>, legor_so <dbl>,
## #   legor_fr <dbl>, legor_ge <dbl>, legor_sc <dbl>, prot80 <dbl>,
## #   catho80 <dbl>, confu <dbl>, avelf <dbl>, govef <dbl>, graft <dbl>,
## #   logyl <dbl>, loga <dbl>, yrsopen <dbl>, gadp <dbl>, engfrac <dbl>,
## #   eurfrac <dbl>, frankrom <dbl>, latitude <dbl>, gastil <dbl>, cgexp <dbl>,
## #   cgrev <dbl>, ssw <dbl>, rgdph <dbl>, trade <dbl>, prop1564 <dbl>,
## #   prop65 <dbl>, federal <dbl>, eduger <dbl>, spropn <dbl>, yearele <dbl>, …
# bring the "country", "list", "trade", "oecd" columns to the front of the dataset
pt_copy<-pt_copy %>% relocate(country, list, trade, oecd)

# prints updated contents of "pt_copy"
pt_copy
## # A tibble: 85 × 75
##    country   list trade  oecd  pind pindo ctrycd col_uk t_indep col_uka col_espa
##    <chr>    <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl>   <dbl>   <dbl>    <dbl>
##  1 Argenti…  257.  18.4     0 0     0        213      0     183   0        0.268
##  2 Austral…    0   38.8     1 1     1        193      1      98   0.608    0    
##  3 Austria   183   78.3     1 0     0        122      0     250   0        0    
##  4 Bahamas     0  102.      0 1     1        313      1      26   0.896    0    
##  5 Banglad…    0   25.4     0 1     1        513      0      28   0        0    
##  6 Barbados    0  116.      0 1     1        316      1      33   0.868    0    
##  7 Belarus     0  117.      0 1     1        913      0       8   0        0    
##  8 Belgium   184. 132.      1 0     0        124      0     169   0        0    
##  9 Belize      0  113.      0 1     1        339      1      18   0.928    0    
## 10 Bolivia   115.  48.9     0 0.116 0.116    218      0     174   0        0.304
## # … with 75 more rows, and 64 more variables: col_otha <dbl>, legor_uk <dbl>,
## #   legor_so <dbl>, legor_fr <dbl>, legor_ge <dbl>, legor_sc <dbl>,
## #   prot80 <dbl>, catho80 <dbl>, confu <dbl>, avelf <dbl>, govef <dbl>,
## #   graft <dbl>, logyl <dbl>, loga <dbl>, yrsopen <dbl>, gadp <dbl>,
## #   engfrac <dbl>, eurfrac <dbl>, frankrom <dbl>, latitude <dbl>, gastil <dbl>,
## #   cgexp <dbl>, cgrev <dbl>, ssw <dbl>, rgdph <dbl>, prop1564 <dbl>,
## #   prop65 <dbl>, federal <dbl>, eduger <dbl>, spropn <dbl>, yearele <dbl>, …

Renaming variables

## Renaming a variable (renames "list" to "party_list")
pt_copy<-pt_copy %>% rename(party_list=list)

# prints updated contents of "pt_copy"
pt_copy
## # A tibble: 85 × 75
##    country    party_list trade  oecd  pind pindo ctrycd col_uk t_indep col_uka
##    <chr>           <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl>   <dbl>   <dbl>
##  1 Argentina        257.  18.4     0 0     0        213      0     183   0    
##  2 Australia          0   38.8     1 1     1        193      1      98   0.608
##  3 Austria          183   78.3     1 0     0        122      0     250   0    
##  4 Bahamas            0  102.      0 1     1        313      1      26   0.896
##  5 Bangladesh         0   25.4     0 1     1        513      0      28   0    
##  6 Barbados           0  116.      0 1     1        316      1      33   0.868
##  7 Belarus            0  117.      0 1     1        913      0       8   0    
##  8 Belgium          184. 132.      1 0     0        124      0     169   0    
##  9 Belize             0  113.      0 1     1        339      1      18   0.928
## 10 Bolivia          115.  48.9     0 0.116 0.116    218      0     174   0    
## # … with 75 more rows, and 65 more variables: col_espa <dbl>, col_otha <dbl>,
## #   legor_uk <dbl>, legor_so <dbl>, legor_fr <dbl>, legor_ge <dbl>,
## #   legor_sc <dbl>, prot80 <dbl>, catho80 <dbl>, confu <dbl>, avelf <dbl>,
## #   govef <dbl>, graft <dbl>, logyl <dbl>, loga <dbl>, yrsopen <dbl>,
## #   gadp <dbl>, engfrac <dbl>, eurfrac <dbl>, frankrom <dbl>, latitude <dbl>,
## #   gastil <dbl>, cgexp <dbl>, cgrev <dbl>, ssw <dbl>, rgdph <dbl>,
## #   prop1564 <dbl>, prop65 <dbl>, federal <dbl>, eduger <dbl>, spropn <dbl>, …

Sorting a dataset in ascending or descending order with respect to a variable

# sorting in ascending (low to high) order with respect to the "trade" variable
pt_copy<-pt_copy %>% arrange(trade)

# prints updated contents of "pt_copy"
pt_copy
## # A tibble: 85 × 75
##    country    party_list trade  oecd  pind pindo ctrycd col_uk t_indep col_uka
##    <chr>           <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl>   <dbl>   <dbl>
##  1 Brazil          510.   17.6     0 0     1        223      0     177   0    
##  2 Argentina       257.   18.4     0 0     0        213      0     183   0    
##  3 Japan            67.7  18.8     1 0.867 0.867    158      0     250   0    
##  4 India             0    21.9     0 1     1        534      1      52   0.792
##  5 USA               0    23.0     1 1     1        111      0     250   0    
##  6 Bangladesh        0    25.4     0 1     1        513      0      28   0    
##  7 Peru            153.   25.9     0 0     0        293      0     178   0    
##  8 Uganda            0    30.8     0 1     1        746      1      37   0.852
##  9 Colombia        157.   34.8     0 0     0        233      0     189   0    
## 10 Pakistan          0    38.7     0 1     1        564      1      52   0.792
## # … with 75 more rows, and 65 more variables: col_espa <dbl>, col_otha <dbl>,
## #   legor_uk <dbl>, legor_so <dbl>, legor_fr <dbl>, legor_ge <dbl>,
## #   legor_sc <dbl>, prot80 <dbl>, catho80 <dbl>, confu <dbl>, avelf <dbl>,
## #   govef <dbl>, graft <dbl>, logyl <dbl>, loga <dbl>, yrsopen <dbl>,
## #   gadp <dbl>, engfrac <dbl>, eurfrac <dbl>, frankrom <dbl>, latitude <dbl>,
## #   gastil <dbl>, cgexp <dbl>, cgrev <dbl>, ssw <dbl>, rgdph <dbl>,
## #   prop1564 <dbl>, prop65 <dbl>, federal <dbl>, eduger <dbl>, spropn <dbl>, …
# sorting in descending (high to low) order with respect to the "trade" variable
pt_copy<-pt_copy %>% arrange(desc(trade))

# prints updated contents of "pt_copy"
pt_copy
## # A tibble: 85 × 75
##    country      party_list trade  oecd  pind pindo ctrycd col_uk t_indep col_uka
##    <chr>             <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl>   <dbl>   <dbl>
##  1 Singapore           0    343.     0     1     1    576      1      34   0.864
##  2 Malta              65.9  190.     0     0     1    181      1      35   0.860
##  3 Luxembourg         60    189.     1     0     1    137      0     160   0    
##  4 Malaysia            0    176.     0     1     1    548      1      42   0.832
##  5 Estonia           101    154.     0     0     1    939      0       8   0    
##  6 Belgium           184.   132.     1     0     0    124      0     169   0    
##  7 Ireland           166    129.     1     0     1    178      1      78   0.688
##  8 Mauritius           0    128.     0     1     1    684      1      31   0.876
##  9 St. Vincent…        0    123.     0     1     1    364      1      20   0.920
## 10 Jamaica             0    122.     0     1     1    343      1      37   0.852
## # … with 75 more rows, and 65 more variables: col_espa <dbl>, col_otha <dbl>,
## #   legor_uk <dbl>, legor_so <dbl>, legor_fr <dbl>, legor_ge <dbl>,
## #   legor_sc <dbl>, prot80 <dbl>, catho80 <dbl>, confu <dbl>, avelf <dbl>,
## #   govef <dbl>, graft <dbl>, logyl <dbl>, loga <dbl>, yrsopen <dbl>,
## #   gadp <dbl>, engfrac <dbl>, eurfrac <dbl>, frankrom <dbl>, latitude <dbl>,
## #   gastil <dbl>, cgexp <dbl>, cgrev <dbl>, ssw <dbl>, rgdph <dbl>,
## #   prop1564 <dbl>, prop65 <dbl>, federal <dbl>, eduger <dbl>, spropn <dbl>, …

Creating new variables based on existing variables

# Create new variable named "non_catholic_80" that is calculated by substracting the Catholic share of the population in 1980 ("catho80") from 100  and relocates "country", "catho80", and the newly created "non_catholic_80" to the front of the dataset
pt_copy<-pt_copy %>% mutate(non_catholic_80=100-catho80) %>% 
                     relocate(country, catho80, non_catholic_80)
# prints updated contents of "pt_copy"
pt_copy
## # A tibble: 85 × 76
##    country     catho80 non_catholic_80 party_list trade  oecd  pind pindo ctrycd
##    <chr>         <dbl>           <dbl>      <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
##  1 Singapore      4.70           95.3         0    343.     0     1     1    576
##  2 Malta         97.3             2.70       65.9  190.     0     0     1    181
##  3 Luxembourg    93               7          60    189.     1     0     1    137
##  4 Malaysia       2.80           97.2         0    176.     0     1     1    548
##  5 Estonia        2              98         101    154.     0     0     1    939
##  6 Belgium       90              10         184.   132.     1     0     0    124
##  7 Ireland       95.3             4.70      166    129.     1     0     1    178
##  8 Mauritius     31.2            68.8         0    128.     0     1     1    684
##  9 St. Vincen…   19.4            80.6         0    123.     0     1     1    364
## 10 Jamaica        9.60           90.4         0    122.     0     1     1    343
## # … with 75 more rows, and 67 more variables: col_uk <dbl>, t_indep <dbl>,
## #   col_uka <dbl>, col_espa <dbl>, col_otha <dbl>, legor_uk <dbl>,
## #   legor_so <dbl>, legor_fr <dbl>, legor_ge <dbl>, legor_sc <dbl>,
## #   prot80 <dbl>, confu <dbl>, avelf <dbl>, govef <dbl>, graft <dbl>,
## #   logyl <dbl>, loga <dbl>, yrsopen <dbl>, gadp <dbl>, engfrac <dbl>,
## #   eurfrac <dbl>, frankrom <dbl>, latitude <dbl>, gastil <dbl>, cgexp <dbl>,
## #   cgrev <dbl>, ssw <dbl>, rgdph <dbl>, prop1564 <dbl>, prop65 <dbl>, …

Selecting or deleting variables

Selecting variables

# Selects "country", "cgexp", "cgrev", and "trade" variables from the "pt_copy" dataset
pt_copy %>% select(country, cgexp, cgrev, trade)
## # A tibble: 85 × 4
##    country       cgexp cgrev trade
##    <chr>         <dbl> <dbl> <dbl>
##  1 Singapore      18.5  34.7  343.
##  2 Malta          41.0  35.0  190.
##  3 Luxembourg     40.2  45.5  189.
##  4 Malaysia       24.5  26.8  176.
##  5 Estonia        30.0  31.1  154.
##  6 Belgium        47.9  43.7  132.
##  7 Ireland        38.1  34.8  129.
##  8 Mauritius      22.5  21.6  128.
##  9 St. Vincent&G  34.8  28.7  123.
## 10 Jamaica        NA    NA    122.
## # … with 75 more rows
# Selects "country", "cgexp", "cgrev", and "trade" variables from the "pt_copy" dataset and assigns the selection to a new object named "pt_copy_selection"
pt_copy_selection<-pt_copy %>% select(country, cgexp, cgrev, trade)
# Prints Contents of "pt_copy_selection"
pt_copy_selection
## # A tibble: 85 × 4
##    country       cgexp cgrev trade
##    <chr>         <dbl> <dbl> <dbl>
##  1 Singapore      18.5  34.7  343.
##  2 Malta          41.0  35.0  190.
##  3 Luxembourg     40.2  45.5  189.
##  4 Malaysia       24.5  26.8  176.
##  5 Estonia        30.0  31.1  154.
##  6 Belgium        47.9  43.7  132.
##  7 Ireland        38.1  34.8  129.
##  8 Mauritius      22.5  21.6  128.
##  9 St. Vincent&G  34.8  28.7  123.
## 10 Jamaica        NA    NA    122.
## # … with 75 more rows

Deleting variables

# Deletes "cgrev" variable from "pt_copy_selection" dataset
pt_copy_selection %>% select(-cgrev)
## # A tibble: 85 × 3
##    country       cgexp trade
##    <chr>         <dbl> <dbl>
##  1 Singapore      18.5  343.
##  2 Malta          41.0  190.
##  3 Luxembourg     40.2  189.
##  4 Malaysia       24.5  176.
##  5 Estonia        30.0  154.
##  6 Belgium        47.9  132.
##  7 Ireland        38.1  129.
##  8 Mauritius      22.5  128.
##  9 St. Vincent&G  34.8  123.
## 10 Jamaica        NA    122.
## # … with 75 more rows
# Deletes "cgrev" AND "cgexp" variables from "pt_copy_selection" dataset
pt_copy_selection %>% select(-c(cgexp, cgrev))
## # A tibble: 85 × 2
##    country       trade
##    <chr>         <dbl>
##  1 Singapore      343.
##  2 Malta          190.
##  3 Luxembourg     189.
##  4 Malaysia       176.
##  5 Estonia        154.
##  6 Belgium        132.
##  7 Ireland        129.
##  8 Mauritius      128.
##  9 St. Vincent&G  123.
## 10 Jamaica        122.
## # … with 75 more rows
# Deletes "cgrev" AND "cgexp" variables from "pt_copy_selection" dataset and assigns the result to a new object named "pt_copy_trade"
pt_copy_trade<-pt_copy_selection %>% select(-c(cgexp, cgrev))
# Prints contents of "pt_copy_trade_revexp"
pt_copy_trade
## # A tibble: 85 × 2
##    country       trade
##    <chr>         <dbl>
##  1 Singapore      343.
##  2 Malta          190.
##  3 Luxembourg     189.
##  4 Malaysia       176.
##  5 Estonia        154.
##  6 Belgium        132.
##  7 Ireland        129.
##  8 Mauritius      128.
##  9 St. Vincent&G  123.
## 10 Jamaica        122.
## # … with 75 more rows
# Deletes "cgrev" AND "cgexp" variables from "pt_copy_selection" dataset and assigns the result to "pt_copy_selection", thereby overwriting the existing version of "pt_copy_selection" with a new version that reflects these deletions
pt_copy_selection<-pt_copy_selection %>% select(-c(cgexp, cgrev))
# prints updated contents of "pt_copy_selection"
pt_copy_selection
## # A tibble: 85 × 2
##    country       trade
##    <chr>         <dbl>
##  1 Singapore      343.
##  2 Malta          190.
##  3 Luxembourg     189.
##  4 Malaysia       176.
##  5 Estonia        154.
##  6 Belgium        132.
##  7 Ireland        129.
##  8 Mauritius      128.
##  9 St. Vincent&G  123.
## 10 Jamaica        122.
## # … with 75 more rows

Recoding variables

Creating Dummy Variables from Continuous Numeric Variables

# Creates a new dummy variable based on the existing "trade" variable named "trade_open" (which takes on a value of "1" if "trade" is greater than or equal to 77, and 0 otherwise) and then moves the newly created variable to the front of the dataset along with "country" and "trade"; all changes are assigned to "pt_copy", thereby overwriting the existing version of "pt_copy"

pt_copy<-pt_copy %>% mutate(trade_open=ifelse(trade>=77, 1, 0)) %>% 
                     relocate(country, trade_open, trade)
# prints updated contents of "pt_copy"; note the newly created dummy variable
pt_copy
## # A tibble: 85 × 77
##    country trade_open trade catho80 non_catholic_80 party_list  oecd  pind pindo
##    <chr>        <dbl> <dbl>   <dbl>           <dbl>      <dbl> <dbl> <dbl> <dbl>
##  1 Singap…          1  343.    4.70           95.3         0       0     1     1
##  2 Malta            1  190.   97.3             2.70       65.9     0     0     1
##  3 Luxemb…          1  189.   93               7          60       1     0     1
##  4 Malays…          1  176.    2.80           97.2         0       0     1     1
##  5 Estonia          1  154.    2              98         101       0     0     1
##  6 Belgium          1  132.   90              10         184.      1     0     0
##  7 Ireland          1  129.   95.3             4.70      166       1     0     1
##  8 Maurit…          1  128.   31.2            68.8         0       0     1     1
##  9 St. Vi…          1  123.   19.4            80.6         0       0     1     1
## 10 Jamaica          1  122.    9.60           90.4         0       0     1     1
## # … with 75 more rows, and 68 more variables: ctrycd <dbl>, col_uk <dbl>,
## #   t_indep <dbl>, col_uka <dbl>, col_espa <dbl>, col_otha <dbl>,
## #   legor_uk <dbl>, legor_so <dbl>, legor_fr <dbl>, legor_ge <dbl>,
## #   legor_sc <dbl>, prot80 <dbl>, confu <dbl>, avelf <dbl>, govef <dbl>,
## #   graft <dbl>, logyl <dbl>, loga <dbl>, yrsopen <dbl>, gadp <dbl>,
## #   engfrac <dbl>, eurfrac <dbl>, frankrom <dbl>, latitude <dbl>, gastil <dbl>,
## #   cgexp <dbl>, cgrev <dbl>, ssw <dbl>, rgdph <dbl>, prop1564 <dbl>, …

Creating categorical variables from continuous numeric variables

# Creates a new variable in the "pt_copy" dataset named "trade_level" (that is coded as "Low Trade" when the "trade" variable is greater than 15 and less than 50, coded as "Intermediate Trade" when "trade" is greater than or equal to 50 and less than 100, and coded as "High TradE" when "trade" is greater than or equal to 100), and then reorders the dataset such that "country", "trade_level", and "trade" are the first three variables in the dataset
pt_copy<-pt_copy %>% mutate(trade_level=case_when(trade>15 & trade<50~"Low_Trade",
                                                  trade>=50 & trade<100~"Intermediate_Trade",
                                                  trade>=100~"High_Trade")) %>% 
                    relocate(country, trade_level, trade)
# prints updated contents of "pt_copy"; note the newly created categorical variable
pt_copy
## # A tibble: 85 × 78
##    country trade_level trade trade_open catho80 non_catholic_80 party_list  oecd
##    <chr>   <chr>       <dbl>      <dbl>   <dbl>           <dbl>      <dbl> <dbl>
##  1 Singap… High_Trade   343.          1    4.70           95.3         0       0
##  2 Malta   High_Trade   190.          1   97.3             2.70       65.9     0
##  3 Luxemb… High_Trade   189.          1   93               7          60       1
##  4 Malays… High_Trade   176.          1    2.80           97.2         0       0
##  5 Estonia High_Trade   154.          1    2              98         101       0
##  6 Belgium High_Trade   132.          1   90              10         184.      1
##  7 Ireland High_Trade   129.          1   95.3             4.70      166       1
##  8 Maurit… High_Trade   128.          1   31.2            68.8         0       0
##  9 St. Vi… High_Trade   123.          1   19.4            80.6         0       0
## 10 Jamaica High_Trade   122.          1    9.60           90.4         0       0
## # … with 75 more rows, and 70 more variables: pind <dbl>, pindo <dbl>,
## #   ctrycd <dbl>, col_uk <dbl>, t_indep <dbl>, col_uka <dbl>, col_espa <dbl>,
## #   col_otha <dbl>, legor_uk <dbl>, legor_so <dbl>, legor_fr <dbl>,
## #   legor_ge <dbl>, legor_sc <dbl>, prot80 <dbl>, confu <dbl>, avelf <dbl>,
## #   govef <dbl>, graft <dbl>, logyl <dbl>, loga <dbl>, yrsopen <dbl>,
## #   gadp <dbl>, engfrac <dbl>, eurfrac <dbl>, frankrom <dbl>, latitude <dbl>,
## #   gastil <dbl>, cgexp <dbl>, cgrev <dbl>, ssw <dbl>, rgdph <dbl>, …

Creating dummmy variables from categorical variables

# Creates dummy variables from "trade_level" column, and relocates the new dummies to the front of the dataset
pt_copy<-pt_copy %>% dummy_cols("trade_level") %>% 
                      relocate(country, trade_level, trade_level_High_Trade, trade_level_Intermediate_Trade, trade_level_Low_Trade)
# Prints contents of "pt_copy"
pt_copy
## # A tibble: 85 × 81
##    country  trade_level trade_level_Hig… trade_level_Int… trade_level_Low… trade
##    <chr>    <chr>                  <int>            <int>            <int> <dbl>
##  1 Singapo… High_Trade                 1                0                0  343.
##  2 Malta    High_Trade                 1                0                0  190.
##  3 Luxembo… High_Trade                 1                0                0  189.
##  4 Malaysia High_Trade                 1                0                0  176.
##  5 Estonia  High_Trade                 1                0                0  154.
##  6 Belgium  High_Trade                 1                0                0  132.
##  7 Ireland  High_Trade                 1                0                0  129.
##  8 Mauriti… High_Trade                 1                0                0  128.
##  9 St. Vin… High_Trade                 1                0                0  123.
## 10 Jamaica  High_Trade                 1                0                0  122.
## # … with 75 more rows, and 75 more variables: trade_open <dbl>, catho80 <dbl>,
## #   non_catholic_80 <dbl>, party_list <dbl>, oecd <dbl>, pind <dbl>,
## #   pindo <dbl>, ctrycd <dbl>, col_uk <dbl>, t_indep <dbl>, col_uka <dbl>,
## #   col_espa <dbl>, col_otha <dbl>, legor_uk <dbl>, legor_so <dbl>,
## #   legor_fr <dbl>, legor_ge <dbl>, legor_sc <dbl>, prot80 <dbl>, confu <dbl>,
## #   avelf <dbl>, govef <dbl>, graft <dbl>, logyl <dbl>, loga <dbl>,
## #   yrsopen <dbl>, gadp <dbl>, engfrac <dbl>, eurfrac <dbl>, frankrom <dbl>, …

Subsetting rows data based on criteria

The filter function

# Extracts OECD observations in "pt_copy" and assigns to object named "oecd_countries"
oecd_countries<-pt_copy %>% filter(oecd==1) %>% 
                            relocate(country, oecd)
# Prints contents of "oecd_countries"
oecd_countries
## # A tibble: 25 × 81
##    country   oecd trade_level trade_level_Hig… trade_level_Int… trade_level_Low…
##    <chr>    <dbl> <chr>                  <int>            <int>            <int>
##  1 Luxembo…     1 High_Trade                 1                0                0
##  2 Belgium      1 High_Trade                 1                0                0
##  3 Ireland      1 High_Trade                 1                0                0
##  4 Netherl…     1 High_Trade                 1                0                0
##  5 Austria      1 Intermedia…                0                1                0
##  6 Norway       1 Intermedia…                0                1                0
##  7 Switzer…     1 Intermedia…                0                1                0
##  8 Portugal     1 Intermedia…                0                1                0
##  9 Sweden       1 Intermedia…                0                1                0
## 10 Iceland      1 Intermedia…                0                1                0
## # … with 15 more rows, and 75 more variables: trade <dbl>, trade_open <dbl>,
## #   catho80 <dbl>, non_catholic_80 <dbl>, party_list <dbl>, pind <dbl>,
## #   pindo <dbl>, ctrycd <dbl>, col_uk <dbl>, t_indep <dbl>, col_uka <dbl>,
## #   col_espa <dbl>, col_otha <dbl>, legor_uk <dbl>, legor_so <dbl>,
## #   legor_fr <dbl>, legor_ge <dbl>, legor_sc <dbl>, prot80 <dbl>, confu <dbl>,
## #   avelf <dbl>, govef <dbl>, graft <dbl>, logyl <dbl>, loga <dbl>,
## #   yrsopen <dbl>, gadp <dbl>, engfrac <dbl>, eurfrac <dbl>, frankrom <dbl>, …
# Extracts observations for which cgrev (central government revenue as % of gdp)>40, and assigns to object named "high_revenues"
high_revenues<-pt_copy %>% filter(cgrev>40) %>% 
                              relocate(country, cgrev)
# Prints contents of "high_revenues"
high_revenues
## # A tibble: 10 × 81
##    country  cgrev trade_level trade_level_Hig… trade_level_Int… trade_level_Low…
##    <chr>    <dbl> <chr>                  <int>            <int>            <int>
##  1 Luxembo…  45.5 High_Trade                 1                0                0
##  2 Belgium   43.7 High_Trade                 1                0                0
##  3 Netherl…  47.6 High_Trade                 1                0                0
##  4 Botswana  50.8 Intermedia…                0                1                0
##  5 Hungary   45.6 Intermedia…                0                1                0
##  6 Norway    41.1 Intermedia…                0                1                0
##  7 Sweden    40.8 Intermedia…                0                1                0
##  8 Poland    40.3 Low_Trade                  0                0                1
##  9 France    40.9 Low_Trade                  0                0                1
## 10 Italy     41.2 Low_Trade                  0                0                1
## # … with 75 more variables: trade <dbl>, trade_open <dbl>, catho80 <dbl>,
## #   non_catholic_80 <dbl>, party_list <dbl>, oecd <dbl>, pind <dbl>,
## #   pindo <dbl>, ctrycd <dbl>, col_uk <dbl>, t_indep <dbl>, col_uka <dbl>,
## #   col_espa <dbl>, col_otha <dbl>, legor_uk <dbl>, legor_so <dbl>,
## #   legor_fr <dbl>, legor_ge <dbl>, legor_sc <dbl>, prot80 <dbl>, confu <dbl>,
## #   avelf <dbl>, govef <dbl>, graft <dbl>, logyl <dbl>, loga <dbl>,
## #   yrsopen <dbl>, gadp <dbl>, engfrac <dbl>, eurfrac <dbl>, frankrom <dbl>, …
# Extracts observations for which the "catho80" variable is less than or equal to 50
minority_catholic<-pt_copy %>% filter(catho80<=50) %>% 
                               relocate(country, catho80)
# Prints contents of "minority_catholic"
minority_catholic
## # A tibble: 53 × 81
##    country       catho80 trade_level trade_level_High_Trade trade_level_Interme…
##    <chr>           <dbl> <chr>                        <int>                <int>
##  1 Singapore        4.70 High_Trade                       1                    0
##  2 Malaysia         2.80 High_Trade                       1                    0
##  3 Estonia          2    High_Trade                       1                    0
##  4 Mauritius       31.2  High_Trade                       1                    0
##  5 St. Vincent&G   19.4  High_Trade                       1                    0
##  6 Jamaica          9.60 High_Trade                       1                    0
##  7 Gambia           1.90 High_Trade                       1                    0
##  8 Fiji             9    High_Trade                       1                    0
##  9 Belarus         14    High_Trade                       1                    0
## 10 Barbados         5.90 High_Trade                       1                    0
## # … with 43 more rows, and 76 more variables: trade_level_Low_Trade <int>,
## #   trade <dbl>, trade_open <dbl>, non_catholic_80 <dbl>, party_list <dbl>,
## #   oecd <dbl>, pind <dbl>, pindo <dbl>, ctrycd <dbl>, col_uk <dbl>,
## #   t_indep <dbl>, col_uka <dbl>, col_espa <dbl>, col_otha <dbl>,
## #   legor_uk <dbl>, legor_so <dbl>, legor_fr <dbl>, legor_ge <dbl>,
## #   legor_sc <dbl>, prot80 <dbl>, confu <dbl>, avelf <dbl>, govef <dbl>,
## #   graft <dbl>, logyl <dbl>, loga <dbl>, yrsopen <dbl>, gadp <dbl>, …

Using the & operator

# Extracts federal OECD countries (where oecd=1 AND federal=1) and assigns to a new object named "oecd_federal_countries"
oecd_federal_countries<-pt_copy %>% filter(oecd==1 & federal==1) %>% 
                                      relocate(country, oecd, federal)
# prints contents of "oecd_federal_countries"
oecd_federal_countries
## # A tibble: 7 × 81
##   country      oecd federal trade_level        trade_level_Hig… trade_level_Int…
##   <chr>       <dbl>   <dbl> <chr>                         <int>            <int>
## 1 Austria         1       1 Intermediate_Trade                0                1
## 2 Switzerland     1       1 Intermediate_Trade                0                1
## 3 Canada          1       1 Intermediate_Trade                0                1
## 4 Germany         1       1 Low_Trade                         0                0
## 5 Mexico          1       1 Low_Trade                         0                0
## 6 Australia       1       1 Low_Trade                         0                0
## 7 USA             1       1 Low_Trade                         0                0
## # … with 75 more variables: trade_level_Low_Trade <int>, trade <dbl>,
## #   trade_open <dbl>, catho80 <dbl>, non_catholic_80 <dbl>, party_list <dbl>,
## #   pind <dbl>, pindo <dbl>, ctrycd <dbl>, col_uk <dbl>, t_indep <dbl>,
## #   col_uka <dbl>, col_espa <dbl>, col_otha <dbl>, legor_uk <dbl>,
## #   legor_so <dbl>, legor_fr <dbl>, legor_ge <dbl>, legor_sc <dbl>,
## #   prot80 <dbl>, confu <dbl>, avelf <dbl>, govef <dbl>, graft <dbl>,
## #   logyl <dbl>, loga <dbl>, yrsopen <dbl>, gadp <dbl>, engfrac <dbl>, …

Using the | operator

# Extracts observations that are in Africa ("africa") OR in Asia/Europe ("asiae) and assigns to an object named "asia_europe_africa"
asia_europe_africa<-pt_copy %>% filter(continent=="africa"|continent=="asiae") %>% 
                                  relocate(continent)
# Prints contents of "asia_europe_africa"
asia_europe_africa
## # A tibble: 24 × 81
##    continent country       trade_level        trade_level_High… trade_level_Int…
##    <chr>     <chr>         <chr>                          <int>            <int>
##  1 asiae     Singapore     High_Trade                         1                0
##  2 asiae     Malaysia      High_Trade                         1                0
##  3 africa    Mauritius     High_Trade                         1                0
##  4 africa    Gambia        High_Trade                         1                0
##  5 asiae     Fiji          High_Trade                         1                0
##  6 africa    Namibia       High_Trade                         1                0
##  7 asiae     Papua N. Guin High_Trade                         1                0
##  8 asiae     Taiwan        Intermediate_Trade                 0                1
##  9 africa    Botswana      Intermediate_Trade                 0                1
## 10 asiae     Thailand      Intermediate_Trade                 0                1
## # … with 14 more rows, and 76 more variables: trade_level_Low_Trade <int>,
## #   trade <dbl>, trade_open <dbl>, catho80 <dbl>, non_catholic_80 <dbl>,
## #   party_list <dbl>, oecd <dbl>, pind <dbl>, pindo <dbl>, ctrycd <dbl>,
## #   col_uk <dbl>, t_indep <dbl>, col_uka <dbl>, col_espa <dbl>, col_otha <dbl>,
## #   legor_uk <dbl>, legor_so <dbl>, legor_fr <dbl>, legor_ge <dbl>,
## #   legor_sc <dbl>, prot80 <dbl>, confu <dbl>, avelf <dbl>, govef <dbl>,
## #   graft <dbl>, logyl <dbl>, loga <dbl>, yrsopen <dbl>, gadp <dbl>, …

Filtering for observations that do NOT meet a condition:

# Extracts all non-Africa observations and assigns to object named "pt_copy_sans_africa"
pt_copy_sans_africa<-pt_copy %>% filter(continent!="africa") %>% relocate(continent)
# Prints contents of "pt_copy_sans_africa"
pt_copy_sans_africa
## # A tibble: 74 × 81
##    continent country         trade_level trade_level_High_Trade trade_level_Int…
##    <chr>     <chr>           <chr>                        <int>            <int>
##  1 asiae     Singapore       High_Trade                       1                0
##  2 other     Malta           High_Trade                       1                0
##  3 other     Luxembourg      High_Trade                       1                0
##  4 asiae     Malaysia        High_Trade                       1                0
##  5 other     Estonia         High_Trade                       1                0
##  6 other     Belgium         High_Trade                       1                0
##  7 other     Ireland         High_Trade                       1                0
##  8 laam      St. Vincent&G   High_Trade                       1                0
##  9 laam      Jamaica         High_Trade                       1                0
## 10 other     Slovak Republic High_Trade                       1                0
## # … with 64 more rows, and 76 more variables: trade_level_Low_Trade <int>,
## #   trade <dbl>, trade_open <dbl>, catho80 <dbl>, non_catholic_80 <dbl>,
## #   party_list <dbl>, oecd <dbl>, pind <dbl>, pindo <dbl>, ctrycd <dbl>,
## #   col_uk <dbl>, t_indep <dbl>, col_uka <dbl>, col_espa <dbl>, col_otha <dbl>,
## #   legor_uk <dbl>, legor_so <dbl>, legor_fr <dbl>, legor_ge <dbl>,
## #   legor_sc <dbl>, prot80 <dbl>, confu <dbl>, avelf <dbl>, govef <dbl>,
## #   graft <dbl>, logyl <dbl>, loga <dbl>, yrsopen <dbl>, gadp <dbl>, …

Exploratory visualizations using ggplot

Bar charts

Basic bar chart

# Creates a bar chart of the "cgexp" variable (central government expenditure as a share of GDP) and assigns the plot to an object named "cgexp_viz1"
cgexp_viz1<-pt_copy %>% 
  drop_na(cgexp) %>% 
  ggplot()+
  geom_col(aes(x=reorder(country, cgexp), y=cgexp))+
  labs(title="Central Govt Expenditure as Pct of GDP (1990-1998 Average)", x="Country Name", 
       y="CGEXP")+
  theme(plot.title=element_text(hjust=0.5),
        axis.text.x = element_text(angle = 90))

# Prints contents of "cgexp_viz1"
cgexp_viz1

Inverted bar chart

# Creates an inverted bar chart of the "cgexp" variable (with countries on vertical axis) and assigns the result to an object named "cgexp_viz2"
cgexp_viz2<-pt_copy %>% 
           drop_na(cgexp) %>% 
           ggplot()+
           geom_col(aes(x=reorder(country, cgexp), y=cgexp))+
           coord_flip()+
           labs(title="Central Govt Expenditure as Pct of GDP (1990-1998 Average) ", x="Country Name", 
                y="CGEXP")+
           theme(plot.title=element_text(hjust=0.5)) 

# Prints contents of "cgexp_viz2"
cgexp_viz2

Scatterplots

Basic scatterplot

# Creates scatterplot with "cgexp" variable on x-axis and "trade" variiable on y-axis and assigns to object named "scatter_cgexp_trade"
scatter_cgexp_trade<-
  pt_copy %>% 
  drop_na(cgexp) %>% 
  ggplot()+
  geom_point(aes(x=cgexp, y=trade))+
  labs(title="Trade Share of GDP \nas a function of\n Central Govt Expenditure (1990-1998 Average) ", 
       x="Central Government Expenditure (Pct of GDP)", y="Overall Trade (Pct of GDP)")+
  theme(plot.title=element_text(hjust=0.5)) 

# Prints contents of "scatter_cgexp_trade"
scatter_cgexp_trade

Grouped scatterplot

# Creates scatterplot with "cgexp" variable on x-axis and "trade" variiable on y-axis, and uses different color points for different continents; plot is assigned to object named "scatter_cgexp_trade_grouped"
scatter_cgexp_trade_grouped<-
  pt_copy %>% 
  drop_na(cgexp) %>% 
  ggplot()+
  geom_point(aes(x=cgexp, y=trade, color=continent))+
  labs(title="Trade Share of GDP \nas a function of\n Central Govt Expenditure (1990-1998 Average) ", 
       x="Central Government Expenditure (Pct of GDP)", y="Overall Trade (Pct of GDP)")+
  theme(plot.title=element_text(hjust=0.5)) 

# Prints contents of "scatter_cgexp_trade_grouped"
scatter_cgexp_trade_grouped

Scatterplot with line of best fit

# Creates scatterplot with "cgexp" variable on x-axis and "trade" variiable on y-axis, adds line of best fit; plot assigned to object named "scatter_cgexp_trade_line"
scatter_cgexp_trade_line<-
  pt_copy %>% 
  drop_na(cgexp) %>% 
  ggplot()+
  geom_point(aes(x=cgexp, y=trade))+
  geom_smooth(aes(x=cgexp, y=trade), method="lm")+
  labs(title="Trade Share of GDP \nas a function of\n Central Govt Expenditure (1990-1998 Average) ", 
       x="Central Government Expenditure (Pct of GDP)", y="Overall Trade (Pct of GDP)")+
  theme(plot.title=element_text(hjust=0.5)) 

# Prints contents of "scatter_cgexp_trade_line"
scatter_cgexp_trade_line
## `geom_smooth()` using formula 'y ~ x'
test

test

Elementary Data Analysis

Computing correlations

Computing correlations between two variables in a dataset

# Prints correlation coefficient between "trade" and "cgexp" variables
cor.test(pt_copy$trade, pt_copy$cgexp, use="complete.obs")
## 
##  Pearson's product-moment correlation
## 
## data:  pt_copy$trade and pt_copy$cgexp
## t = 1.8131, df = 80, p-value = 0.07356
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.01915884  0.39850057
## sample estimates:
##       cor 
## 0.1986743

Generating and exporting a correlation matrix

# Extracts variables for which we want a correlation matrix
desired_variables<-pt_copy %>% select(trade, cgexp, cgrev, catho80)
# Creates correlation matrix from "desired_variables" object and assigns to object named "cor_matrix"
cor_matrix<-cor(desired_variables, use="complete.obs")
# prints contents of "cor_matrix"
cor_matrix
##               trade      cgexp      cgrev     catho80
## trade    1.00000000  0.1792884  0.3458730 -0.08442666
## cgexp    0.17928838  1.0000000  0.9094998 -0.07010910
## cgrev    0.34587298  0.9094998  1.0000000 -0.05923500
## catho80 -0.08442666 -0.0701091 -0.0592350  1.00000000
# Exports correlation matrix assigned to "cor_matrix" object using stargazer
stargazer(cor_matrix, type="text", title="Correlation Matrix", digits=3, out="corr_table.html")
## 
## Correlation Matrix
## ====================================
##         trade  cgexp  cgrev  catho80
## ------------------------------------
## trade     1    0.179  0.346  -0.084 
## cgexp   0.179    1    0.909  -0.070 
## cgrev   0.346  0.909    1    -0.059 
## catho80 -0.084 -0.070 -0.059    1   
## ------------------------------------

Basic regression analysis

Running a regression model

# Implements regression with "gexp" as DV, and assigns to object named "regression1"
regression1<-lm(cgexp~gastil+lyp+trade+prop1564+prop65+federal+oecd, data=pt_copy)
# Prints regression table
summary(regression1)
## 
## Call:
## lm(formula = cgexp ~ gastil + lyp + trade + prop1564 + prop65 + 
##     federal + oecd, data = pt_copy)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.9861  -4.6981  -0.5521   4.4482  16.1124 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 52.17290   16.08572   3.243  0.00179 ** 
## gastil      -2.15202    1.10609  -1.946  0.05561 .  
## lyp         -2.04441    2.00721  -1.019  0.31184    
## trade        0.04978    0.01924   2.587  0.01170 *  
## prop1564    -0.28482    0.26686  -1.067  0.28939    
## prop65       1.58627    0.33548   4.728 1.09e-05 ***
## federal     -4.58101    2.38015  -1.925  0.05822 .  
## oecd         0.96969    2.97171   0.326  0.74514    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.064 on 72 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.5865, Adjusted R-squared:  0.5463 
## F-statistic: 14.59 on 7 and 72 DF,  p-value: 1.137e-11

Working with categorical variables in a regression model

Working with categorical variables by using factors

# Set Continent variable as factor
pt_copy$continent<-as.factor(pt_copy$continent)

# Examines levels of factor variable
levels(pt_copy$continent)
## [1] "africa" "asiae"  "laam"   "other"
# Relevels factor variable to set "other" as reference category
pt_copy$continent<-relevel(pt_copy$continent, ref="other")
# Run regression with the continent variable and assign result to object named "regression2"
regression2<-lm(cgexp~gastil+lyp+trade+prop1564+prop65+federal+continent+col_espa+col_uka+col_otha+oecd, data=pt_copy)

# Prints regression table for "regression2"
summary(regression2)
## 
## Call:
## lm(formula = cgexp ~ gastil + lyp + trade + prop1564 + prop65 + 
##     federal + continent + col_espa + col_uka + col_otha + oecd, 
##     data = pt_copy)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.3617  -3.9886  -0.3921   4.6050  17.3752 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     44.88833   17.56879   2.555   0.0129 *
## gastil          -2.06438    1.13670  -1.816   0.0739 .
## lyp             -0.12414    2.08305  -0.060   0.9527  
## trade            0.03018    0.02069   1.459   0.1494  
## prop1564        -0.25399    0.27421  -0.926   0.3577  
## prop65           0.98675    0.45822   2.153   0.0349 *
## federal         -4.73466    2.34235  -2.021   0.0473 *
## continentafrica -3.42365    4.58573  -0.747   0.4580  
## continentasiae  -7.72223    4.17322  -1.850   0.0687 .
## continentlaam   -9.03522    4.25535  -2.123   0.0375 *
## col_espa         0.58034    8.05720   0.072   0.9428  
## col_uka          2.68929    3.22769   0.833   0.4077  
## col_otha        -0.80223    3.02997  -0.265   0.7920  
## oecd            -2.37769    3.33814  -0.712   0.4788  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.789 on 66 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.6499, Adjusted R-squared:  0.5809 
## F-statistic: 9.424 on 13 and 66 DF,  p-value: 1.21e-10

Working with categorical variables by creating dummy variables

# Use "continent" field to make continent dummy variables
pt_copy<-pt_copy %>% dummy_cols("continent")
# run regression with continent dummies, with "other" category excluded
regression2_alt<-lm(cgexp~gastil+lyp+trade+prop1564+prop65+federal+continent_africa+
                      continent_asiae+continent_laam+col_espa+col_uka+col_otha+oecd,
                      data=pt_copy)
# Prints "regression2_alt" regression table
summary(regression2_alt)
## 
## Call:
## lm(formula = cgexp ~ gastil + lyp + trade + prop1564 + prop65 + 
##     federal + continent_africa + continent_asiae + continent_laam + 
##     col_espa + col_uka + col_otha + oecd, data = pt_copy)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.3617  -3.9886  -0.3921   4.6050  17.3752 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      44.88833   17.56879   2.555   0.0129 *
## gastil           -2.06438    1.13670  -1.816   0.0739 .
## lyp              -0.12414    2.08305  -0.060   0.9527  
## trade             0.03018    0.02069   1.459   0.1494  
## prop1564         -0.25399    0.27421  -0.926   0.3577  
## prop65            0.98675    0.45822   2.153   0.0349 *
## federal          -4.73466    2.34235  -2.021   0.0473 *
## continent_africa -3.42365    4.58573  -0.747   0.4580  
## continent_asiae  -7.72223    4.17322  -1.850   0.0687 .
## continent_laam   -9.03522    4.25535  -2.123   0.0375 *
## col_espa          0.58034    8.05720   0.072   0.9428  
## col_uka           2.68929    3.22769   0.833   0.4077  
## col_otha         -0.80223    3.02997  -0.265   0.7920  
## oecd             -2.37769    3.33814  -0.712   0.4788  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.789 on 66 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.6499, Adjusted R-squared:  0.5809 
## F-statistic: 9.424 on 13 and 66 DF,  p-value: 1.21e-10

Working with interaction terms in regression models

# run regression with interaction term between "trade" and "federal"
trade_federal_interaction<-lm(cgexp~trade*federal, data=pt_copy)
# prints "trade_federal_interaction" regression table
summary(trade_federal_interaction)
## 
## Call:
## lm(formula = cgexp ~ trade * federal, data = pt_copy)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -20.0774  -8.1325   0.5782   7.7004  21.0072 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   26.708234   2.517787  10.608   <2e-16 ***
## trade          0.034512   0.026410   1.307    0.195    
## federal       -4.695595   5.512752  -0.852    0.397    
## trade:federal  0.009965   0.076991   0.129    0.897    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.42 on 77 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.05761,    Adjusted R-squared:  0.02089 
## F-statistic: 1.569 on 3 and 77 DF,  p-value: 0.2037

Plotting marginal effects

# Finds mean value of trade variable
mean(pt_copy$trade)
## [1] 78.7659
# Calculates marginal effects of federalism, with "trade" held at mean
marginal_effect_federalism<-ggpredict(trade_federal_interaction, terms="federal", condition=c(trade=78.7659))
# Prints marginal effects table
marginal_effect_federalism
## # Predicted values of cgexp
## 
## federal | Predicted |         95% CI
## ------------------------------------
##       0 |     29.43 | [26.94, 31.91]
##       1 |     25.52 | [18.91, 32.12]
# Plot marginal effects of federalism
ggpredict(trade_federal_interaction, terms="federal") %>% 
  ggplot(aes(x, predicted))+
  geom_point()+
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high),width=0.05)+
  scale_x_continuous(breaks=(seq(0,1, by=1)))+
  labs(title="Predicted Effects of Federalism on Gov't Expenditure\n(with trade set to mean)", y="Predicted Expenditure", x="Federalism")

Creating and exporting regression tables

# Put the regression models you want in your regression table in a list
model_list<-list(regression1,regression2)
# Exporting table as text file
stargazer(model_list, type="text", out="cgexp_regressions.txt")
# Exporting regression table as html file
stargazer(model_list, type="html", out="cgexp_regressions.html")

Working with multiple datasets

Merging data

# Read in capital mobility from working directory
capital_mobility<-read_csv("chinn_eto_capitalopenness_summary.csv")

Alternatively,

# Read in capital mobility data from Github repository
capital_mobility<-read_csv("https://raw.githubusercontent.com/aranganath24/r_primer/main/workshop_data/chinn_eto_capitalopenness_summary.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   ccode = col_character(),
##   country_name = col_character(),
##   ctrycd = col_double(),
##   kaopen = col_double(),
##   ka_open = col_double()
## )
# View capital mobility data
capital_mobility
## # A tibble: 182 × 5
##    ccode country_name         ctrycd kaopen ka_open
##    <chr> <chr>                 <dbl>  <dbl>   <dbl>
##  1 ABW   Aruba                   314 -0.607  0.309 
##  2 AFG   Afghanistan             512 -1.92   0     
##  3 AGO   Angola                  614 -1.57   0.0825
##  4 ALB   Albania                 914 -0.148  0.417 
##  5 ANT   Netherlands Antilles    353 -0.104  0.427 
##  6 ARE   United Arab Emirates    466  2.33   1     
##  7 ARG   Argentina               213  0.662  0.607 
##  8 ARM   Armenia                 911  1.17   0.725 
##  9 ATG   Antigua and Barbuda     311  1.98   0.916 
## 10 AUS   Australia               193  2.16   0.960 
## # … with 172 more rows
# Joins "capital_mobility" to "pt_copy" using "ctrycd" as the join field (only keeps observations from "pt_copy"; countries in "capital_mobility" but not "pt_copy" are not included in the final joined dataset; joined dataset is assigned to an object named "pt_capitalmobility")
pt_capitalmobility<-inner_join(pt_copy, capital_mobility, by="ctrycd")
# prints contents of "pt_capitalmobility"
pt_capitalmobility
## # A tibble: 84 × 89
##    country        kaopen trade_level trade_level_High_Trade trade_level_Interme…
##    <chr>           <dbl> <chr>                        <int>                <int>
##  1 Singapore      2.10   High_Trade                       1                    0
##  2 Malta         -1.06   High_Trade                       1                    0
##  3 Malaysia       1.36   High_Trade                       1                    0
##  4 Estonia        2.08   High_Trade                       1                    0
##  5 Belgium        2.16   High_Trade                       1                    0
##  6 Ireland        1.36   High_Trade                       1                    0
##  7 Mauritius     -0.0671 High_Trade                       1                    0
##  8 St. Vincent&G -0.505  High_Trade                       1                    0
##  9 Jamaica        0.245  High_Trade                       1                    0
## 10 Gambia         1.37   High_Trade                       1                    0
## # … with 74 more rows, and 84 more variables: trade_level_Low_Trade <int>,
## #   trade <dbl>, trade_open <dbl>, catho80 <dbl>, non_catholic_80 <dbl>,
## #   party_list <dbl>, oecd <dbl>, pind <dbl>, pindo <dbl>, ctrycd <dbl>,
## #   col_uk <dbl>, t_indep <dbl>, col_uka <dbl>, col_espa <dbl>, col_otha <dbl>,
## #   legor_uk <dbl>, legor_so <dbl>, legor_fr <dbl>, legor_ge <dbl>,
## #   legor_sc <dbl>, prot80 <dbl>, confu <dbl>, avelf <dbl>, govef <dbl>,
## #   graft <dbl>, logyl <dbl>, loga <dbl>, yrsopen <dbl>, gadp <dbl>, …