Difference between revisions of "Univariate data analysis (2014)"
Jump to navigation
Jump to search
Kevin Dunn (talk | contribs) |
Kevin Dunn (talk | contribs) |
||
(26 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
__NOTOC__{{ | __NOTOC__{{ClassSidebarYouTube | ||
| date = 13 to 23 January 2014 | | date = 13 to 23 January 2014 | ||
| dates_alt_text = | | dates_alt_text = | ||
| vimeoID1 = | | vimeoID1 = 0a1YeaheSXc | ||
| vimeoID2 = | | vimeoID2 = 2ffZAlWUUAE | ||
| vimeoID3 = | | vimeoID3 = fWcvYScLSC4 | ||
| vimeoID4 = | | vimeoID4 = 8OJGWGlP0Ok | ||
| vimeoID5 = | | vimeoID5 = q5wzW8k2TIE | ||
| vimeoID6 = | | vimeoID6 = aGr4LVOgVhk | ||
| vimeoID7 = | | vimeoID7 = 58487266 <!-- replace later --> | ||
| vimeoID8 = | | vimeoID8 = f7KkIy9wZco | ||
| course_notes_PDF = 2014-4C3-6C3-Univariate-data-analysis.pdf | | course_notes_PDF = 2014-4C3-6C3-Univariate-data-analysis.pdf | ||
| course_notes_alt = Course slides | | course_notes_alt = Course slides | ||
Line 18: | Line 18: | ||
| video_download_link_MP4_size = 343 M | | video_download_link_MP4_size = 343 M | ||
| video_notes1 = | | video_notes1 = | ||
| video_download_link2_MP4 = http://learnche.mcmaster.ca/media/ | | video_download_link2_MP4 = http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-02B.mp4 | ||
| video_download_link2_MP4_size = M | | video_download_link2_MP4_size = 327 M | ||
| video_notes2 = | | video_notes2 = | ||
| video_download_link3_MP4 = http://learnche.mcmaster.ca/media/ | | video_download_link3_MP4 = http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-02C.mp4 | ||
| video_download_link3_MP4_size = M | | video_download_link3_MP4_size = 347 M | ||
| video_notes3 = | | video_notes3 = | ||
| video_download_link4_MP4 = http://learnche.mcmaster.ca/media/ | | video_download_link4_MP4 = http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-03A.mp4 | ||
| video_download_link4_MP4_size = M | | video_download_link4_MP4_size = 344 M | ||
| video_notes4 = | | video_notes4 = | ||
| video_download_link5_MP4 = http://learnche.mcmaster.ca/media/ | | video_download_link5_MP4 = http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-03B.mp4 | ||
| video_download_link5_MP4_size = M | | video_download_link5_MP4_size = 262 M | ||
| video_notes5 = | | video_notes5 = | ||
| video_download_link6_MP4 = http://learnche.mcmaster.ca/media/ | | video_download_link6_MP4 = http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-03C.mp4 | ||
| video_download_link6_MP4_size = M | | video_download_link6_MP4_size = 293 M | ||
| video_notes6 = | | video_notes6 = | ||
| video_download_link7_MP4 = http://learnche.mcmaster.ca/media/ | | video_download_link7_MP4 = http://learnche.mcmaster.ca/media/4C3-2013-Class-04A.mp4 | ||
| video_download_link7_MP4_size = M | | video_download_link7_MP4_size = 357 M | ||
| video_notes7 = | | video_notes7 = | ||
| video_download_link8_MP4 = http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-04B.mp4 | |||
| video_download_link8_MP4_size = 180 M | |||
| video_notes8 = | |||
}} | }} | ||
== Class materials == | == Class materials == | ||
{| class="wikitable" style="text-align: center;" | {| class="wikitable" style="text-align: center;" | ||
Line 56: | Line 58: | ||
| [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-02A.mp3 Audio] (42 M) | | [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-02A.mp3 Audio] (42 M) | ||
|[[Media:Demo.R|R demo file]] | |[[Media:Demo.R|R demo file]] | ||
| rowspan=" | | rowspan="8"|[http://learnche.mcmaster.ca/pid/?source=Univariate Chapter 2] | ||
| rowspan=" | | rowspan="8"|[[Image:Nuvola_mimetypes_pdf.png|20px|link=Media:2014-4C3-6C3-Univariate-data-analysis.pdf]] [[Media:2014-4C3-6C3-Univariate-data-analysis.pdf|Slides for class]] | ||
|- | |- | ||
| 15 January | | 15 January | ||
Line 63: | Line 65: | ||
| [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-02B.mp4 Video] (327 M) | | [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-02B.mp4 Video] (327 M) | ||
| [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-02B.mp3 Audio] (42 M) | | [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-02B.mp3 Audio] (42 M) | ||
| | | See code below | ||
|- | |- | ||
| 16 January | | 16 January | ||
| 02C | | 02C | ||
| | | [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-02C.mp4 Video] (347 M) | ||
| | | [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-02C.mp3 Audio] (42 M) | ||
| See code below | |||
|- | |||
| 20 January | |||
| 03A | |||
| [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-03A.mp4 Video] (347 M) | |||
| [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-03A.mp3 Audio] (42 M) | |||
| [[Tables_of_the_normal_and_t-distribution| Using tables of the normal distribution]] | |||
|- | |||
| 22 January | |||
| 03B | |||
| [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-03B.mp4 Video] (262 M) | |||
| [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-03B.mp3 Audio] (42 M) | |||
| [[Tables_of_the_normal_and_t-distribution | Using tables of the t-distribution]] | |||
|- | |||
| 23 January | |||
| 03C | |||
| [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-03C.mp4 Video] (293 M) | |||
| [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-03C.mp3 Audio] (41 M) | |||
| None | |||
|- | |||
| 27 January | |||
| 04A | |||
| [http://learnche.mcmaster.ca/media/4C3-2013-Class-04A.mp4 Video from 2013] (357M) | |||
| [http://learnche.mcmaster.ca/media/4C3-2013-Class-04A.mp3 Audio from 2013] (43M) | |||
| See code below | |||
|- | |||
| 29 January | |||
| 04B | |||
| [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-04B.mp4 Video] (180 M) | |||
| [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-04B.mp3 Audio] (24 M) | |||
| None | | None | ||
|} | |} | ||
Line 76: | Line 108: | ||
Please follow the [[Software_tutorial|software tutorial]] to install and run the course software. You should be able to quickly read, understand and use the material in steps 1 to 13. | Please follow the [[Software_tutorial|software tutorial]] to install and run the course software. You should be able to quickly read, understand and use the material in steps 1 to 13. | ||
< | == Class example, 15 Jan == | ||
Seeing the Central Limit Theorem in action: ''rolling dice''. [http://www.r-fiddle.org/#/fiddle?id=vGjkcRws&version=1 Run this code in your browser] (no need to install/run in R) | |||
<syntaxhighlight lang="rsplus"> | |||
N = 500 | |||
m <- t(matrix(seq(1,6), 3, 2)) | |||
layout(m) | |||
s1 <- as.integer(runif(N, 1, 7)) | |||
s2 <- as.integer(runif(N, 1, 7)) | |||
s3 <- as.integer(runif(N, 1, 7)) | |||
s4 <- as.integer(runif(N, 1, 7)) | |||
s5 <- as.integer(runif(N, 1, 7)) | |||
s6 <- as.integer(runif(N, 1, 7)) | |||
s7 <- as.integer(runif(N, 1, 7)) | |||
s8 <- as.integer(runif(N, 1, 7)) | |||
s9 <- as.integer(runif(N, 1, 7)) | |||
s10 <- as.integer(runif(N, 1, 7)) | |||
hist(s1, main="", xlab="One throw", breaks=seq(0,6)+0.5) | |||
bins = 8 | |||
hist((s1+s2)/2, breaks=bins, main="", xlab="Average of two throws") | |||
hist((s1+s2+s3+s4)/4, breaks=bins, main="", xlab="Average of 4 throws") | |||
hist((s1+s2+s3+s4+s5+s6)/6, breaks=bins, main="", xlab="Average of 6 throws") | |||
bins=12 | |||
hist((s1+s2+s3+s4+s5+s6+s7+s8)/8, breaks=bins, main="", xlab="Average of 8 throws") | |||
hist((s1+s2+s3+s4+s5+s6+s7+s8+s9+s10)/10, breaks=bins, main="", xlab="Average of 10 throws") | |||
</syntaxhighlight> | |||
== Class example, 16 Jan == | |||
<syntaxhighlight lang="rsplus"> | <syntaxhighlight lang="rsplus"> | ||
Line 85: | Line 145: | ||
Code used to illustrate how the q-q plot is constructed: | Code used to illustrate how the q-q plot is constructed: | ||
[http://www.r-fiddle.org/#/fiddle?id=mBl1TMyP&version=1 Run this code in your browser] (no need to install/run in R) | |||
<syntaxhighlight lang="rsplus"> | <syntaxhighlight lang="rsplus"> | ||
N <- 10 | N <- 10 | ||
Line 122: | Line 184: | ||
Code used to illustrate the central limit theorem's reduction in variance: | Code used to illustrate the central limit theorem's reduction in variance: | ||
[http://www.r-fiddle.org/#/fiddle?id=MHIU665v&version=1 Run this code in your browser] (no need to install/run in R) | |||
<syntaxhighlight lang="rsplus"> | <syntaxhighlight lang="rsplus"> | ||
# Show the 3 plots side by side | # Show the 3 plots side by side | ||
Line 155: | Line 219: | ||
</syntaxhighlight> | </syntaxhighlight> | ||
== Class example, 27 Jan == | |||
Test for differences: plotting the raw data. | |||
[http://www.r-fiddle.org/#/fiddle?id=deSYk0W1&version=2 Run this code in a web-browser] | |||
<syntaxhighlight lang="rsplus"> | <syntaxhighlight lang="rsplus"> | ||
data <- | # Analysis of the data here: | ||
dilution <- c(11, 26, 18, 16, 20, 12, 8, 26, 12, 17, 14) | |||
manometric <- c(25, 3, 27, 30, 33, 16, 28, 27, 12, 32, 16) | |||
mean(manometric) | |||
mean(dilution) | |||
plot(c(dilution, manometric), ylab="BOD values", xaxt='n') | |||
plot( | text(5.5,3, "Dilution") | ||
text(18,3, "Manometric") | |||
abline(v=11.5) | |||
# | par(mar=c(4.2, 4.2, 0.2, 0.2)) # (bottom, left, top, right); defaults are par(mar=c(5, 4, 4, 2) + 0.1) | ||
plot( | plot(dilution, type="p", pch=4, | ||
lines( | cex=2, cex.lab=1.5, cex.main=1.8, cex.sub=1.8, cex.axis=1.8, | ||
ylab="BOD values", xlab="Sample number", | |||
ylim=c(0,35), xlim=c(0,11.5), col="darkgreen") | |||
lines(manometric, type="p", pch=16, cex=2, col="blue") | |||
lines(rep(0, N), dilution, type="p", pch=4, cex=2, col="darkgreen") | |||
lines(rep(0, N), manometric, type="p", pch=16, cex=2, col="blue") | |||
abline(v=0.5) | |||
legend(8, 5, pch=c(4, 16), c("Dilution", "Manometric"), col=c("darkgreen", "blue"), pt.cex=2) | |||
par(mar=c(4.2, 4.2, 0.2, 0.2)) # (bottom, left, top, right); defaults are par(mar=c(5, 4, 4, 2) + 0.1) | |||
plot(dilution-manometric, type="p", ylab="Dilution - Manometric", xlab="Sample number", | |||
cex.lab=1.5, cex.main=1.8, cex.sub=1.8, cex.axis=1.8, cex=2) | |||
abline(h=0, col="grey60") | |||
</syntaxhighlight> | </syntaxhighlight> | ||
Latest revision as of 06:54, 4 January 2017
Class date(s): | 13 to 23 January 2014 | ||||
(PDF) | Course slides | ||||
| |||||
| |||||
| |||||
| |||||
| |||||
| |||||
| |||||
| |||||
Class materials
Date | Class number | Video and audio files | Other materials | Reading (PID) | Slides | |
---|---|---|---|---|---|---|
13 January | 02A | Video (343 M) | Audio (42 M) | R demo file | Chapter 2 | Slides for class |
15 January | 02B | Video (327 M) | Audio (42 M) | See code below | ||
16 January | 02C | Video (347 M) | Audio (42 M) | See code below | ||
20 January | 03A | Video (347 M) | Audio (42 M) | Using tables of the normal distribution | ||
22 January | 03B | Video (262 M) | Audio (42 M) | Using tables of the t-distribution | ||
23 January | 03C | Video (293 M) | Audio (41 M) | None | ||
27 January | 04A | Video from 2013 (357M) | Audio from 2013 (43M) | See code below | ||
29 January | 04B | Video (180 M) | Audio (24 M) | None |
Software source code
Please follow the software tutorial to install and run the course software. You should be able to quickly read, understand and use the material in steps 1 to 13.
Class example, 15 Jan
Seeing the Central Limit Theorem in action: rolling dice. Run this code in your browser (no need to install/run in R)
N = 500
m <- t(matrix(seq(1,6), 3, 2))
layout(m)
s1 <- as.integer(runif(N, 1, 7))
s2 <- as.integer(runif(N, 1, 7))
s3 <- as.integer(runif(N, 1, 7))
s4 <- as.integer(runif(N, 1, 7))
s5 <- as.integer(runif(N, 1, 7))
s6 <- as.integer(runif(N, 1, 7))
s7 <- as.integer(runif(N, 1, 7))
s8 <- as.integer(runif(N, 1, 7))
s9 <- as.integer(runif(N, 1, 7))
s10 <- as.integer(runif(N, 1, 7))
hist(s1, main="", xlab="One throw", breaks=seq(0,6)+0.5)
bins = 8
hist((s1+s2)/2, breaks=bins, main="", xlab="Average of two throws")
hist((s1+s2+s3+s4)/4, breaks=bins, main="", xlab="Average of 4 throws")
hist((s1+s2+s3+s4+s5+s6)/6, breaks=bins, main="", xlab="Average of 6 throws")
bins=12
hist((s1+s2+s3+s4+s5+s6+s7+s8)/8, breaks=bins, main="", xlab="Average of 8 throws")
hist((s1+s2+s3+s4+s5+s6+s7+s8+s9+s10)/10, breaks=bins, main="", xlab="Average of 10 throws")
Class example, 16 Jan
# Read data from a web address
batch <- read.csv('http://datasets.connectmv.com/file/batch-yields.csv')
Code used to illustrate how the q-q plot is constructed:
Run this code in your browser (no need to install/run in R)
N <- 10
# What are the quantiles from the theoretical normal distribution?
index <- seq(1, N)
P <- (index - 0.5) / N
theoretical.quantity <- qnorm(P)
# Our sampled data:
yields <- c(86.2, 85.7, 71.9, 95.3, 77.1, 71.4, 68.9, 78.9, 86.9, 78.4)
mean.yield <- mean(yields) # 80.0
sd.yield <- sd(yields) # 8.35
# What are the quantiles for the sampled data?
yields.z <- (yields - mean.yield)/sd.yield
yields.z
yields.z.sorted <- sort(yields.z)
# Compare the values in text:
yields.z.sorted
theoretical.quantity
# Compare them graphically:
plot(theoretical.quantity, yields.z.sorted, asp=1)
abline(a=0, b=1)
# Built-in R function to do all the above for you:
qqnorm(yields)
qqline(yields)
# A better function: see http://learnche.mcmaster.ca/4C3/Software_tutorial/Extending_R_with_packages
library(car)
qqPlot(yields)
Code used to illustrate the central limit theorem's reduction in variance:
Run this code in your browser (no need to install/run in R)
# Show the 3 plots side by side
layout(matrix(c(1,2,3), 1, 3))
# Sample the population:
N <- 100
x <- rnorm(N, mean=80, sd=5)
mean(x)
sd(x)
# Plot the raw data
x.range <- range(x)
plot(x, ylim=x.range, main='Raw data')
# Subgroups of 2
subsize <- 2
x.2 <- numeric(N/subsize)
for (i in 1:(N/subsize))
{
x.2[i] <- mean(x[((i-1)*subsize+1):(i*subsize)])
}
plot(x.2, ylim=x.range, main='Subgroups of 2')
# Subgroups of 4
subsize <- 4
x.4 <- numeric(N/subsize)
for (i in 1:(N/subsize))
{
x.4[i] <- mean(x[((i-1)*subsize+1):(i*subsize)])
}
plot(x.4, ylim=x.range, main='Subgroups of 4')
Class example, 27 Jan
Test for differences: plotting the raw data.
Run this code in a web-browser
# Analysis of the data here:
dilution <- c(11, 26, 18, 16, 20, 12, 8, 26, 12, 17, 14)
manometric <- c(25, 3, 27, 30, 33, 16, 28, 27, 12, 32, 16)
mean(manometric)
mean(dilution)
plot(c(dilution, manometric), ylab="BOD values", xaxt='n')
text(5.5,3, "Dilution")
text(18,3, "Manometric")
abline(v=11.5)
par(mar=c(4.2, 4.2, 0.2, 0.2)) # (bottom, left, top, right); defaults are par(mar=c(5, 4, 4, 2) + 0.1)
plot(dilution, type="p", pch=4,
cex=2, cex.lab=1.5, cex.main=1.8, cex.sub=1.8, cex.axis=1.8,
ylab="BOD values", xlab="Sample number",
ylim=c(0,35), xlim=c(0,11.5), col="darkgreen")
lines(manometric, type="p", pch=16, cex=2, col="blue")
lines(rep(0, N), dilution, type="p", pch=4, cex=2, col="darkgreen")
lines(rep(0, N), manometric, type="p", pch=16, cex=2, col="blue")
abline(v=0.5)
legend(8, 5, pch=c(4, 16), c("Dilution", "Manometric"), col=c("darkgreen", "blue"), pt.cex=2)
par(mar=c(4.2, 4.2, 0.2, 0.2)) # (bottom, left, top, right); defaults are par(mar=c(5, 4, 4, 2) + 0.1)
plot(dilution-manometric, type="p", ylab="Dilution - Manometric", xlab="Sample number",
cex.lab=1.5, cex.main=1.8, cex.sub=1.8, cex.axis=1.8, cex=2)
abline(h=0, col="grey60")