library(rtweet)
library(httpuv)
library(tidyverse)
library(tidytext)
Make sure you’re logged onto twitter in a web browser
# Pull tweets with #ValentinesDay; returns 1000 most recent tweets; time by GMT
valentine_tweets<-search_tweets(q="#ValentinesDay",
n=1000,
include_rts=FALSE,
`-filter`="replies",
lang="en")
#prints "valentine_tweets"
valentine_tweets
## # A tibble: 991 × 90
## user_id status_id created_at screen_name text source
## <chr> <chr> <dttm> <chr> <chr> <chr>
## 1 266143492 1495089702940073987 2022-02-19 17:35:37 InnatLB "Ignite… Later…
## 2 542918259 1495089698045321223 2022-02-19 17:35:36 ruby_redsky "silver… Twitt…
## 3 542918259 1494795575006568450 2022-02-18 22:06:51 ruby_redsky "silver… Twitt…
## 4 542918259 1494795421138534411 2022-02-18 22:06:15 ruby_redsky "Obsidi… Twitt…
## 5 542918259 1494865769708064772 2022-02-19 02:45:47 ruby_redsky "Wolf t… Twitt…
## 6 542918259 1495043965619515394 2022-02-19 14:33:52 ruby_redsky "Black … Twitt…
## 7 542918259 1494731181904048128 2022-02-18 17:50:59 ruby_redsky "Shooti… Twitt…
## 8 542918259 1495064426482970633 2022-02-19 15:55:10 ruby_redsky "Anklet… Twitt…
## 9 542918259 1494867826338840576 2022-02-19 02:53:57 ruby_redsky "dragon… Twitt…
## 10 542918259 1494756928421044226 2022-02-18 19:33:17 ruby_redsky "twig h… Twitt…
## # … with 981 more rows, and 84 more variables: display_text_width <dbl>,
## # reply_to_status_id <chr>, reply_to_user_id <chr>,
## # reply_to_screen_name <chr>, is_quote <lgl>, is_retweet <lgl>,
## # favorite_count <int>, retweet_count <int>, quote_count <int>,
## # reply_count <int>, hashtags <list>, symbols <list>, urls_url <list>,
## # urls_t.co <list>, urls_expanded_url <list>, media_url <list>,
## # media_t.co <list>, media_expanded_url <list>, media_type <list>, …
#Pull tweets from an account (doesn't have same time constraints)
# Pull last 500 tweets from @VDay, a global activist movement to end violence against women that is associated with Valentine's day (note sometimes the query will return less than specified number due to deletions)
vday_tweets<-get_timeline("@VDay", n=500)
# prints vday_tweets
vday_tweets
## # A tibble: 600 × 90
## user_id status_id created_at screen_name text source
## <chr> <chr> <dttm> <chr> <chr> <chr>
## 1 27031487 1495076927194890243 2022-02-19 16:44:51 VDay "#Afghan… Twitt…
## 2 27031487 1494963941428006914 2022-02-19 09:15:53 VDay "\"Lider… Twitt…
## 3 27031487 1494924854990020608 2022-02-19 06:40:34 VDay "\"Vario… Twitt…
## 4 27031487 1494924798161424385 2022-02-19 06:40:20 VDay "\"Vario… Twitt…
## 5 27031487 1494924739898269696 2022-02-19 06:40:07 VDay "\"Vario… Twitt…
## 6 27031487 1494909659416653833 2022-02-19 05:40:11 VDay "\"#WeOn… Twitt…
## 7 27031487 1494884200922505216 2022-02-19 03:59:01 VDay "\"Atten… Twitt…
## 8 27031487 1494843840171167746 2022-02-19 01:18:39 VDay "\"CARE … Twitt…
## 9 27031487 1494826837452607493 2022-02-19 00:11:05 VDay "This #B… Twitt…
## 10 27031487 1494794626737184769 2022-02-18 22:03:05 VDay "New Mob… Twitt…
## # … with 590 more rows, and 84 more variables: display_text_width <dbl>,
## # reply_to_status_id <chr>, reply_to_user_id <chr>,
## # reply_to_screen_name <chr>, is_quote <lgl>, is_retweet <lgl>,
## # favorite_count <int>, retweet_count <int>, quote_count <int>,
## # reply_count <int>, hashtags <list>, symbols <list>, urls_url <list>,
## # urls_t.co <list>, urls_expanded_url <list>, media_url <list>,
## # media_t.co <list>, media_expanded_url <list>, media_type <list>, …
vday_tweets
to find the 10 tweets with most favorites# Extracts 10 most favorited tweets from "vday_tweets"
vday_tweets_most_favorites<-vday_tweets %>%
slice_max(favorite_count, n=10)
# prints "vday_tweets_most_favorites"
vday_tweets_most_favorites
## # A tibble: 11 × 90
## user_id status_id created_at screen_name text source
## <chr> <chr> <dttm> <chr> <chr> <chr>
## 1 27031487 1493244612063891456 2022-02-14 15:23:53 VDay "\"On 14… Twitt…
## 2 27031487 1493251071208161287 2022-02-14 15:49:33 VDay "TODAY a… Twitt…
## 3 27031487 1491631604346753025 2022-02-10 04:34:22 VDay "\"At 8:… Twitt…
## 4 27031487 1467250501678813189 2021-12-04 21:52:34 VDay "\"Gabri… Twitt…
## 5 27031487 1493135547686010881 2022-02-14 08:10:30 VDay "#1Billi… Twitt…
## 6 27031487 1452073970006593543 2021-10-24 00:46:27 VDay "\"Brave… Twitt…
## 7 27031487 1484337581567332352 2022-01-21 01:30:31 VDay "\"Despi… Twitt…
## 8 27031487 1493017894866669568 2022-02-14 00:22:59 VDay "#1Billi… Twitt…
## 9 27031487 1493079847509258241 2022-02-14 04:29:10 VDay "\"Are y… Twitt…
## 10 27031487 1463190977049333763 2021-11-23 17:01:28 VDay "\"What … Twitt…
## 11 27031487 1449101796325031936 2021-10-15 19:56:05 VDay "\"We ex… Twitt…
## # … with 84 more variables: display_text_width <dbl>, reply_to_status_id <chr>,
## # reply_to_user_id <chr>, reply_to_screen_name <chr>, is_quote <lgl>,
## # is_retweet <lgl>, favorite_count <int>, retweet_count <int>,
## # quote_count <int>, reply_count <int>, hashtags <list>, symbols <list>,
## # urls_url <list>, urls_t.co <list>, urls_expanded_url <list>,
## # media_url <list>, media_t.co <list>, media_expanded_url <list>,
## # media_type <list>, ext_media_url <list>, ext_media_t.co <list>, …
vday_tweets
to find the 10 tweets with most retweets# Extracts 10 most retweeted observations from "vday_tweets"
vday_tweets_most_retweeted<-vday_tweets %>%
slice_max(retweet_count, n=10) %>%
select(created_at, screen_name, text, retweet_count)
# prints "vday_tweets_most_retweeted"
vday_tweets_most_retweeted
## # A tibble: 11 × 4
## created_at screen_name text retweet_count
## <dttm> <chr> <chr> <int>
## 1 2022-02-14 08:10:30 VDay "#1BillionRising activists eve… 14
## 2 2022-01-21 01:30:31 VDay "\"Despite the achievements in… 13
## 3 2022-02-10 04:34:22 VDay "\"At 8:30am SLT, 11 Feb, at t… 11
## 4 2021-12-13 01:20:42 VDay "\"Major press conference to a… 11
## 5 2021-12-27 21:59:40 VDay "\"Thank you Berenice Leila fo… 10
## 6 2021-11-25 19:22:43 VDay "Reasons why you shouldn't sto… 10
## 7 2022-02-14 15:23:53 VDay "\"On 14 Feb, we dance & R… 9
## 8 2022-02-09 06:50:18 VDay "\"I feel happy & grateful… 9
## 9 2022-02-02 02:24:41 VDay "CALL TO ACTION: Call For Inte… 9
## 10 2022-01-30 21:13:05 VDay "\"#MexicoCity will Rise for t… 9
## 11 2021-11-23 17:01:28 VDay "\"What a great day w the offi… 9
ggplot
to visualize twitter data# creates new column that adds #
CancelStudentDebt_coinciding_hashtags<-ValentinesDay_coinciding_hashtags %>%
mutate(hashtag=paste0("#", hashtags))
# Makes inverted bar chart of "CancelStudentDebt_coinciding_hashtags"
coincident_hashtags_plot<-
ggplot(CancelStudentDebt_coinciding_hashtags, aes(x=reorder(hashtag, n), y=n))+
geom_bar(stat="identity")+
coord_flip()+
xlab("")+
ylab("Frequency")+
ggtitle("Hashtags Most Frequently Used Along With #ValentinesDay")+
labs(caption = "Data Collected from Twitter REST API via rtweet")
# prints "coincident_hashtags_plot"
coincident_hashtags_plot
# creates hourly time series of tweets with #ValentinesDay on February 17
ts_plot(valentine_tweets, by="hours") +
labs(x = NULL, y = NULL,
title = "Frequency of tweets with #ValentinesDay",
subtitle = paste0(format(min(valentine_tweets$created_at), "%d %B %Y"),
" to ",
format(max(valentine_tweets$created_at),"%d %B %Y")),
caption = "Data collected from Twitter's REST API via rtweet") +
theme_minimal()
Student Exercise: Select a hashtag, and make a visualization of the 15 most frequently along with your chosen hashtag