Chapter 3 Plotting
In this chapter, we will go over some Time Series examples. The aim of this chapter is to help you grasp some of the ideas about plotting.
3.1 Example Data
Click OSU_enroll.csv to download data.
<- read.csv(file = "OSU_enroll.csv",
osu.enroll stringsAsFactors = TRUE)
head(osu.enroll)
## t Semester Year Enrollment date
## 1 1 Fall 1989 20110 8/31/1989
## 2 2 Spring 1990 19128 2/1/1990
## 3 3 Summer 1990 7553 6/1/1990
## 4 4 Fall 1990 19591 8/31/1990
## 5 5 Spring 1991 18361 2/1/1991
## 6 6 Summer 1991 6702 6/1/1991
tail(osu.enroll)
## t Semester Year Enrollment date
## 35 35 Spring 2001 20004 2/1/2001
## 36 36 Summer 2001 7558 6/1/2001
## 37 37 Fall 2001 21872 8/31/2001
## 38 38 Spring 2002 20922 2/1/2002
## 39 39 Summer 2002 7868 6/1/2002
## 40 40 Fall 2002 22992 8/31/2002
<- osu.enroll$Enrollment x
#One way to do plot
dev.new(width = 8, height = 6, pointsize = 10)
# we did not specify y-axis and R put our x in y-axis, time in x-axis
plot(x = x, ylab = "OSU Enrollment",
xlab = "t (time)", type="l", col = "red",
main = "OSU Enrollment from Fall 1989 to Fall 2002",
panel.first = grid(col = "gray", lty = "dotted"))
points(x = x, pch = 20, col = "blue")
# A little different version of the plot
plot(x = x, ylab = "OSU Enrollment", type = "o", xlab = "t (time)", col = "red",
main = "OSU enrollment data", panel.first = grid(col = "gray", lty = "dotted"))
dev.new(width = 8, height = 6, pointsize = 10)
# we did not specify y-axis and R put our x in y-axis, time in x-axis
plot(x = x, ylab = "OSU Enrollment",
xlab = "t (time)", type="l", col = "red",
main = "OSU Enrollment from Fall 1989 to Fall 2002",
panel.first = grid(col = "gray", lty = "dotted"))
points(x = osu.enroll$Enrollment, pch = 20, col = "blue")
Altenatively, you can do the same thing using ggplot.
library(ggplot2)
# Create a data frame
<- data.frame(osu.enroll)
df
# Create the plot
ggplot(df, aes(x = t, y = Enrollment)) +
geom_line(colour = "red") + # Line plot
geom_point(shape = 20, colour = "blue") + # Add points
labs(x = "t (time)", y = "OSU Enrollment",
title = "OSU Enrollment from Fall 1989 to Fall 2002") + # Set axis labels and title
theme_bw() + # Set the theme to a white background with black lines
theme(panel.grid.major = element_line(colour = "gray", linetype = "dotted")) # Add gray dotted lines to the plot
When only x is specified in the plot()
function, R puts this on the y-axis and uses the observation number on the x-axis.
Compare this to the next plot below where both x and y arguments are specified.
#More complicated plot
<- osu.enroll[osu.enroll$Semester == "Fall",]
fall <- osu.enroll[osu.enroll$Semester == "Spring",]
spring <- osu.enroll[osu.enroll$Semester == "Summer",]
summer
plot(y = fall$Enrollment, x = fall$t,
ylab = "OSU Enrollment", xlab = "t (time)",
col = "blue",
main = "OSU Enrollment from Fall 1989 to Fall 2002",
panel.first = grid(col = "gray", lty = "dotted"),
pch = 1, type = "o", ylim = c(0,max(osu.enroll$Enrollment)))
lines(y = spring$Enrollment, x = spring$t, col = "red",
type = "o", pch = 2)
lines(y = summer$Enrollment, x = summer$t, col =
"darkgreen", type = "o", pch = 3)
legend(x="center", legend= c("Fall","Spring","Summer"), pch=c(1,2,3), lty=c(1,1,1), col=c("blue","red","darkgreen"), bty="n")
#Another way to do plot with actual dates
plot(y = osu.enroll$Enrollment,
x = as.Date(osu.enroll$date, format = "%m/%d/%Y"),
xlab = "Time", type = "l", col = "red",
main = "OSU Enrollment from Fall 1989 to Fall 2002",
ylab = "OSU Enrollment")
points(y = osu.enroll$Enrollment,
x = as.Date(osu.enroll$date, format = "%m/%d/%Y"), pch
= 20, col = "blue")
#Create own gridlines
# v specifies vertical line; h specifies horizontal line
abline(v = as.Date(c("1990/1/1", "1992/1/1", "1994/1/1",
"1996/1/1", "1998/1/1", "2000/1/1", "2002/1/1")),
lty = "dotted", col = "lightgray")
abline(h = c(10000, 15000, 20000), lty = "dotted", col =
"lightgray")
# Autocorrelation
<- acf(x = x, type = "correlation", main = "OSU Enrollment series") rho.x
rho.x
##
## Autocorrelations of series 'x', by lag
##
## 0 1 2 3 4 5 6 7 8 9 10
## 1.000 -0.470 -0.425 0.909 -0.438 -0.395 0.822 -0.403 -0.358 0.739 -0.367
## 11 12 13 14 15 16
## -0.327 0.655 -0.337 -0.297 0.581 -0.309
$acf[1:9] rho.x
## [1] 1.0000000 -0.4702315 -0.4253427 0.9087421 -0.4377336 -0.3946048 0.8224660
## [8] -0.4025871 -0.3584216
3.2 S&P500 Index
Click SP500weekly.csv to download data.
<- read.csv(file="SP500weekly.csv",stringsAsFactors = TRUE) SP500
head(SP500)
## WeekStart Open High Low Close AdjClose Volume
## 1 1/1/1995 459.21 462.49 457.20 460.68 460.68 1199080000
## 2 1/8/1995 460.67 466.43 458.65 465.97 465.97 1627330000
## 3 1/15/1995 465.97 470.43 463.99 464.78 464.78 1667400000
## 4 1/22/1995 464.78 471.36 461.14 470.39 470.39 1628110000
## 5 1/29/1995 470.39 479.91 467.49 478.65 478.65 1888560000
## 6 2/5/1995 478.64 482.60 478.36 481.46 481.46 1579920000
tail(SP500)
## WeekStart Open High Low Close AdjClose Volume
## 1395 9/19/2021 4402.95 4465.40 4305.91 4455.48 4455.48 15697030000
## 1396 9/26/2021 4442.12 4457.30 4288.52 4357.04 4357.04 15555390000
## 1397 10/3/2021 4348.84 4429.97 4278.94 4391.34 4391.34 14795520000
## 1398 10/10/2021 4385.44 4475.82 4329.92 4471.37 4471.37 13758090000
## 1399 10/17/2021 4463.72 4559.67 4447.47 4544.90 4544.90 13966070000
## 1400 10/24/2021 4553.69 4608.08 4537.36 4605.38 4605.38 16206040000
<- SP500$Close x
#One way to do plot
dev.new(width = 8, height = 6, pointsize = 10)
#again, we do not specify y-axis here
plot(x = x, ylab = "S&P 500 Index", xlab = "t (time)",
type = "l", col = "red", main = "S&P 500 Index from
1/1/1995 to 10/25/2021 (weekly)",
panel.first = grid(col = "gray", lty = "dotted"))
#Another way to do plot with actual dates
plot(y = x, x = as.Date(SP500$WeekStart, format =
"%m/%d/%Y"), xlab = "Time", type = "l", col = "red", main
= "S&P 500 Index from 1/1/1995 to 10/25/2021 (weekly)",
ylab = "S&P 500 Index")
#Create own gridlines
abline(v = as.Date(c("1995/1/1", "2000/1/1", "2005/1/1",
"2010/1/1", "2015/1/1", "2020/1/1")), lty = "dotted",
col = "lightgray")
abline(h = seq(from = 0, to = 5000, by = 1000), lty =
"dotted", col = "lightgray")
# One more way with fine control of the dates
plot(y = x, x = as.Date(SP500$WeekStart, format =
"%m/%d/%Y"), xlab = "Time", type = "l", col = "red",
main = "S&P 500 Index from 1/1/1995 to 10/25/2021
(weekly)", ylab = "S&P 500 Index", xaxt = "n")
axis.Date(side = 1, at = seq(from = as.Date("1995/1/1"),
to = as.Date("2021/12/31"), by = "years"), labels =
format(x = seq(from = as.Date("1995/1/1"), to =
as.Date("2021/12/31"), by = "years"), format = "%b%y"),
las = 2) #las changes orientation of labels
#Create own gridlines
abline(v = as.Date(c("1995/1/1", "2000/1/1", "2005/1/1",
"2010/1/1", "2015/1/1", "2020/1/1")), lty = "dotted",
col = "lightgray")
abline(h = seq(from = 0, to = 5000, by = 1000), lty =
"dotted", col = "lightgray")
3.3 Sunspots
Click SN_y_tot_V2.0.csv to download data.
<- read.table(file = "SN_y_tot_V2.0.csv", sep =
sunspots ";", col.names = c("Mid.year", "Mean.total",
"Mean.SD.total", "Numb.obs.used", "Definitive"))
head(sunspots)
## Mid.year Mean.total Mean.SD.total Numb.obs.used Definitive
## 1 1700.5 8.3 -1 -1 1
## 2 1701.5 18.3 -1 -1 1
## 3 1702.5 26.7 -1 -1 1
## 4 1703.5 38.3 -1 -1 1
## 5 1704.5 60.0 -1 -1 1
## 6 1705.5 96.7 -1 -1 1
tail(sunspots)
## Mid.year Mean.total Mean.SD.total Numb.obs.used Definitive
## 316 2015.5 69.8 6.4 8903 1
## 317 2016.5 39.8 3.9 9940 1
## 318 2017.5 21.7 2.5 11444 1
## 319 2018.5 7.0 1.1 12611 1
## 320 2019.5 3.6 0.5 12884 1
## 321 2020.5 8.8 4.1 14440 1
dev.new(width = 8, height = 6, pointsize = 10)
#again, we did not specify y-axis here
plot(x = sunspots$Mean.total, ylab = "Number of
sunspots", xlab = "t (time)", type = "l", col = "red",
main = "Sunspots per year from 1700 to 2020",
panel.first = grid(col = "gray", lty = "dotted"))
points(x = sunspots$Mean.total, pch = 20, col = "blue")
# Include dates
plot(y = sunspots$Mean.total, x = sunspots$Mid.year, ylab
= "Number of sunspots", xlab = "Year", type = "l", col
= "red", main = "Sunspots per year from 1700 to 2020",
panel.first = grid(col = "gray", lty = "dotted"))
points(y = sunspots$Mean.total, x = sunspots$Mid.year,
pch = 20, col = "blue")
#Convert to an object of class "ts"
<- ts(data = sunspots$Mean.total, start = 1700, frequency
x = 1)
x
## Time Series:
## Start = 1700
## End = 2020
## Frequency = 1
## [1] 8.3 18.3 26.7 38.3 60.0 96.7 48.3 33.3 16.7 13.3 5.0 0.0
## [13] 0.0 3.3 18.3 45.0 78.3 105.0 100.0 65.0 46.7 43.3 36.7 18.3
## [25] 35.0 66.7 130.0 203.3 171.7 121.7 78.3 58.3 18.3 8.3 26.7 56.7
## [37] 116.7 135.0 185.0 168.3 121.7 66.7 33.3 26.7 8.3 18.3 36.7 66.7
## [49] 100.0 134.8 139.0 79.5 79.7 51.2 20.3 16.0 17.0 54.0 79.3 90.0
## [61] 104.8 143.2 102.0 75.2 60.7 34.8 19.0 63.0 116.3 176.8 168.0 136.0
## [73] 110.8 58.0 51.0 11.7 33.0 154.2 257.3 209.8 141.3 113.5 64.2 38.0
## [85] 17.0 40.2 138.2 220.0 218.2 196.8 149.8 111.0 100.0 78.2 68.3 35.5
## [97] 26.7 10.7 6.8 11.3 24.2 56.7 75.0 71.8 79.2 70.3 46.8 16.8
## [109] 13.5 4.2 0.0 2.3 8.3 20.3 23.2 59.0 76.3 68.3 52.9 38.5
## [121] 24.2 9.2 6.3 2.2 11.4 28.2 59.9 83.0 108.5 115.2 117.4 80.8
## [133] 44.3 13.4 19.5 85.8 192.7 227.3 168.7 143.0 105.5 63.3 40.3 18.1
## [145] 25.1 65.8 102.7 166.3 208.3 182.5 126.3 122.0 102.7 74.1 39.0 12.7
## [157] 8.2 43.4 104.4 178.3 182.2 146.6 112.1 83.5 89.2 57.8 30.7 13.9
## [169] 62.8 123.6 232.0 185.3 169.2 110.1 74.5 28.3 18.9 20.7 5.7 10.0
## [181] 53.7 90.5 99.0 106.1 105.8 86.3 42.4 21.8 11.2 10.4 11.8 59.5
## [193] 121.7 142.0 130.0 106.6 69.4 43.8 44.4 20.2 15.7 4.6 8.5 40.8
## [205] 70.1 105.5 90.1 102.8 80.9 73.2 30.9 9.5 6.0 2.4 16.1 79.0
## [217] 95.0 173.6 134.6 105.7 62.7 43.5 23.7 9.7 27.9 74.0 106.5 114.7
## [229] 129.7 108.2 59.4 35.1 18.6 9.2 14.6 60.2 132.8 190.6 182.6 148.0
## [241] 113.0 79.2 50.8 27.1 16.1 55.3 154.3 214.7 193.0 190.7 118.9 98.3
## [253] 45.0 20.1 6.6 54.2 200.7 269.3 261.7 225.1 159.0 76.4 53.4 39.9
## [265] 15.0 22.0 66.8 132.9 150.0 149.4 148.0 94.4 97.6 54.1 49.2 22.5
## [277] 18.4 39.3 131.0 220.1 218.9 198.9 162.4 91.0 60.5 20.6 14.8 33.9
## [289] 123.0 211.1 191.8 203.3 133.0 76.1 44.9 25.1 11.6 28.9 88.3 136.3
## [301] 173.9 170.4 163.6 99.3 65.3 45.8 24.7 12.6 4.2 4.8 24.9 80.8
## [313] 84.5 94.0 113.3 69.8 39.8 21.7 7.0 3.6 8.8
class(x)
## [1] "ts"
class(sunspots$Mean.total)
## [1] "numeric"
3.3.1 plot.ts()
plot() is a generic function - uses the plot.ts() method function
# we did not specify y-axis here, but x is now ts
plot(x = x, ylab = expression(paste(x[t], " (Number of
sunspots)")), xlab = "Year", type = "o", col = "red", main
= "Sunspots per year from 1700 to 2020")
plot.ts(x = x, ylab = expression(paste(x[t], " (Number of sunspots)")),
xlab = "Year", type = "o", col = "red", main = "Sunspots per year from 1700 to 2020")
#type = "b" also works for "both" points and lines, but it leaves spaces between the points and lines