The United States Geological Survey continuously monitors earth quakes and makes the corresponding data available to the public. A dataset containing all worldwide earth quakes for a time frame of 30 days is available at https://raw.githubusercontent.com/Stat579-at-ISU/stat579-at-isu.github.io/master/exams/data/earthquakes.csv.
You can find the accompanying codebook at US Geology Survey (you should be able to answer all questions in this exam without the codebook).
eq <- read.csv("https://raw.githubusercontent.com/Stat579-at-ISU/stat579-at-isu.github.io/master/exams/data/earthquakes.csv")
# time frame:
eq$Date <- lubridate::ymd(eq$Date)
summary(eq$Date)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## "2012-09-05" "2012-09-13" "2012-09-21" "2012-09-20" "2012-09-27" "2012-10-05"
# the range of dates considered is between
min(eq$Date) # Sep 5 2012
## [1] "2012-09-05"
# and
max(eq$Date) # Oct 5 2012
## [1] "2012-10-05"
# number of earth quakes:
nrow(eq)
## [1] 7162
# [1] 7162
eq[which.max(eq$Magnitude), c("Date", "Location")]
## Date Location
## 952 2012-09-30 9km WNW of San Agustin
# Date Location
#952 2012-09-30 9km WNW of San Agustin
max(eq$Magnitude)
## [1] 7.3
#[1] 7.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
eq <- eq %>% mutate(
Country = factor(Country)
)
levels(eq$Country)[11] <- "California"
sort(table(eq$Country), decreasing=TRUE)[1:5]
##
## California Alaska British Virgin Islands
## 2957 1907 479
## Nevada Washington
## 242 207
# California Alaska British Virgin Islands
# 2957 1907 479
# Nevada Washington
# 242 207
library(ggplot2)
library(forcats)
eq %>% mutate(
Country = fct_infreq(Country)) %>%
ggplot(aes(x = Country)) + geom_bar() + coord_flip()
# idea: create a table of earth quake frequency by country
# get the names of the top ten countries
top10 <- eq %>% filter(!is.na(Country)) %>%
group_by(Country) %>% tally() %>% slice_max(n, n=10)
# use mutate to include a variable Country10
eq <- eq %>% mutate(
Country10 = ifelse(Country %in% top10$Country, as.character(Country), "Other")
)
eq %>% mutate(
Country10 = fct_reorder(Country10, Magnitude)
) %>%
ggplot(aes(x = Country10, y = Magnitude)) + geom_boxplot() + coord_flip()
eq %>% ggplot(aes(x = Magnitude)) + geom_histogram(binwidth = 0.1)
# let's use 4 as the cutoff between 'small' and 'large' earth quakes
eq$size <- c("small", "large")[(eq$Magnitude >= 4)+1]
maps
package and extract a world map (hint:
think of map_data
). Plot the world map using a polygon
layer. Set the fill color to grey50
. Add a layer of points
to the map showing the locations of earthquakes use color to distinguish
between small and large earthquakes.Describe what you see.
library(maps)
world <- map_data("world")
worldmap <- world %>% ggplot(aes(x = long, y = lat, group=group)) +
geom_polygon(fill = "grey50")
worldmap + geom_point(aes(x = Longitude, y = Latitude, colour = size, group=1), data = eq)
# US has most of small earthquakes
the number of earthquakes,
their average magnitude,
the name of the country/state in which most of them happened that day.
Based on the summary data, draw a single chart that incorporates all of the above information.
eq.stats <- eq %>% group_by(Date) %>% summarize(
n = n(),
Magnitude = mean(Magnitude),
Country = names(sort(table(Country), decreasing=TRUE))[1]
)
eq.stats %>%
ggplot(aes(x = Date, y = n, colour=Country, size=Magnitude)) +
geom_point()