Difference between revisions of "Worksheets/Week9"

From Statistics for Engineering
Jump to navigation Jump to search
Line 5: Line 5:
<html><div data-datacamp-exercise data-lang="r" data-height="auto">
<html><div data-datacamp-exercise data-lang="r" data-height="auto">
     <code data-type="sample-code">
     <code data-type="sample-code">
center = 36
center = 36
range = 24
range = 24
Line 82: Line 81:
p
p


# Try a new point at +2: that is RW = coded * 0.5 * range + center; 60 hours
#------------
# y = 66.
# Try a new point at +2: that is RW = coded * 0.5 * range + center; x = 60 hours
y0 <- c(28, 63)
x.test <- data.frame(coded.x=coded.x)
rw.x <- c(24, 48, 36, 36, 60)
coded.x <- (rw.x - center)/(0.5*range)
predict(model.1.quad, newdata=x.test)  # predicts ~54
 
# Acutal y = 66. Therefore, our model isn't so good. Improve it with the new point
rw.x <- c(24, 48, 36, 36, 60)
rw.x <- c(24, 48, 36, 36, 60)
y2  <- c(28, 63, 55, 54, 66)
y2  <- c(28, 63, 55, 54, 66)
Line 91: Line 95:
summary(model.2.quad)
summary(model.2.quad)


# Predict value first.
# Then run the experiment.
# Plot it again:
# Plot it again:
DO: add the new point and response here
raw_data <- data.frame(coded.x = coded.x, y = y2)
plot_data <- data.frame(coded.x = seq(-2, +3, 0.1))
plot_data <- data.frame(coded.x = seq(-2, +3, 0.1))
plot_data$y <- predict(model.2.quad, newdata=plot_data)
plot_data$y <- predict(model.2.quad, newdata=plot_data)
p <- p + geom_point(aes(x=coded.x, y=y, color='purple', size=5), data=raw_data)
p <- p + geom_line(data=plot_data, color="purple", size=1)
p
#------------
#------------
# Reset our frame of reference; keep our range the same (24 hours)
# -1: 36
# +1: 60
#  0: 48 hours
center = 48
range = 24
rw.x <- c(48, 36, 36, 60)
y3  <- c(63, 55, 54, 66)
# Rebuild the model, and start the plots again
model.3 <- lm(y3 ~ rw.x + I(1/(rw.x)))
summary(model.3)
x.min = 35
x.max = 105
# Basic plot of everything so far:
raw_data <- data.frame(rw.x = rw.x, y = y3)
p <- ggplot(data=raw_data, aes(x=rw.x, y=y))  + 
  geom_point(size=5) +
  xlab("Real-world values: time") +
  scale_x_continuous(breaks=seq(x.min, x.max,5)) +
  ylab("Outcome variable") +
  scale_y_continuous(breaks=seq(0, 150, 10)) +
  theme_bw() +
  #theme(axis.text=element_text(size=18), legend.position = "none") +
  theme(axis.title=element_text(face="bold", size=14)) +
  expand_limits(x = c(x.min, x.max)) +
  expand_limits(y = c(50, 80))
p
plot_data <- data.frame(rw.x = seq(x.min, x.max, 0.1))
plot_data$y <- predict(model.3, newdata=plot_data)
p <- p + geom_line(data=plot_data, color="blue", size=1)
p
# Run experiment at +2 [72 hours; use 75 hours - just a little more]: predict it first
rw.x <- c(48, 36, 36, 60, 75)
coded.x <- (rw.x - center)/(0.5*range)
x.test <- data.frame(coded.x=coded.x)
predict(model.3, newdata=x.test)
# Expect a predicted value of 66 in the output. Actual: 79. So about 12 units difference.
# Update model and plot
rw.x <- c(48, 36, 36, 60, 75)
y4  <- c(63, 55, 54, 66, 79)
# Rebuild the model, and start the plots again
model.4 <- lm(y4 ~ rw.x + I(rw.x^2))
summary(model.4)
# The models are not working well for us. Let's try at around 90 hours.
# Expect an outcome of around 85. Got a value of 76. Stabilized? Try again
# at 95 hours
# Point 7: Try 90 hours. Stabilizing?                    : 76
# Point 8. Try 95 hours. Seems to confirm stabilization  : 81
# Point 9: Overshoot(?) of 105                            : 72
rw.x <- c(48, 36, 36, 60, 75, 90, 95, 105)
y5  <- c(63, 55, 54, 66, 79, 76, 81, 72)
# Rebuild the model, and start the plots again
model.5 <- lm(y5 ~ rw.x + I(rw.x^2))
summary(model.5)
raw_data <- data.frame(rw.x = rw.x, y = y5)
plot_data <- data.frame(rw.x = seq(x.min, x.max, 0.1))
plot_data$y <- predict(model.5, newdata=plot_data)
p <- p + geom_point(aes(x=rw.x, y=y, color='red', size=5), data=raw_data)
p <- p + geom_line(data=plot_data, color="red", size=1)
p
# Rebuild the model with a differt structure, and start the plots again
model.6 <- lm(y5 ~ rw.x + I(1/sqrt(rw.x)))
summary(model.6)
plot_data <- data.frame(rw.x = seq(x.min, x.max, 0.1))
plot_data$y <- predict(model.6, newdata=plot_data)
p <- p + geom_line(data=plot_data, color="purple", size=1)  
p <- p + geom_line(data=plot_data, color="purple", size=1)  
p
p

Revision as of 05:23, 5 May 2019

Part 1

Description here

center = 36 range = 24 rw.x <- c(24, 48) coded.x <- (rw.x - center)/(0.5*range) y0 <- c(28, 63) model.0 <- lm(y0 ~ coded.x) summary(model.0) # What is the interpretation of the # * slope? # * intercept # * why are R2 and SE where they are? # Basic plot of everything so far: raw_data <- data.frame(coded.x = coded.x, y = y0) library(ggplot2) p <- ggplot(data=raw_data, aes(x=coded.x, y=y)) + geom_point(size=5) + xlab("Coded value for x_A") + scale_x_continuous(breaks=seq(-2, 5, 1)) + ylab("Outcome variable") + scale_y_continuous(breaks=seq(0, 150, 10)) + theme_bw() + theme(axis.text=element_text(size=18), legend.position = "none") + theme(axis.title=element_text(face="bold", size=14)) p # Run experiment at center point: predict it first rw.x <- c(24, 48, 36) coded.x <- (rw.x - center)/(0.5*range) x.test <- data.frame(coded.x=coded.x) predict(model.0, newdata=x.test) # Expect a predicted value of 45.5 in the output. Actual: 54 and 55. So about # 10 units difference. # Add the linear fit: through the 2 points plot_data <- data.frame(coded.x = seq(-2, +5, 0.1)) plot_data$y <- predict(model.0, newdata=plot_data) p <- p + geom_line(data=plot_data, color="blue", size=1) p # Try fitting a linear model now through all the data points: rw.x <- c(24, 48, 36, 36) y1 <- c(28, 63, 55, 54) coded.x <- (rw.x - center)/(0.5*range) model.1 <- lm(y1 ~ coded.x) summary(model.1) # Show the linear fit with the extra data point (center points) raw_data <- data.frame(coded.x = coded.x, y = y1) plot_data$y <- predict(model.1, newdata=plot_data) p <- p + geom_point(aes(x=coded.x, y=y, color='darkgreen', size=5), data=raw_data) p <- p + geom_line(data=plot_data, color="darkgreen", size=1) p # A a quadratic component through all the data points: rw.x <- c(24, 48, 36, 36) y1 <- c(28, 63, 55, 54) coded.x <- (rw.x - center)/(0.5*range) model.1.quad <- lm(y1 ~ coded.x + I(coded.x^2)) summary(model.1.quad) # Show the quadratic fit between -2 and +2 in coded units # In real-world units this corresonds to _____ and _____ plot_data <- data.frame(coded.x = seq(-2, +3, 0.1)) plot_data$y <- predict(model.1.quad, newdata=plot_data) p <- p + geom_line(data=plot_data, color="red", size=1) p #------------ # Try a new point at +2: that is RW = coded * 0.5 * range + center; x = 60 hours x.test <- data.frame(coded.x=coded.x) rw.x <- c(24, 48, 36, 36, 60) coded.x <- (rw.x - center)/(0.5*range) predict(model.1.quad, newdata=x.test) # predicts ~54 # Acutal y = 66. Therefore, our model isn't so good. Improve it with the new point rw.x <- c(24, 48, 36, 36, 60) y2 <- c(28, 63, 55, 54, 66) coded.x <- (rw.x - center)/(0.5*range) model.2.quad <- lm(y2 ~ coded.x + I(coded.x^2)) summary(model.2.quad) # Plot it again: raw_data <- data.frame(coded.x = coded.x, y = y2) plot_data <- data.frame(coded.x = seq(-2, +3, 0.1)) plot_data$y <- predict(model.2.quad, newdata=plot_data) p <- p + geom_point(aes(x=coded.x, y=y, color='purple', size=5), data=raw_data) p <- p + geom_line(data=plot_data, color="purple", size=1) p #------------ #------------ # Reset our frame of reference; keep our range the same (24 hours) # -1: 36 # +1: 60 # 0: 48 hours center = 48 range = 24 rw.x <- c(48, 36, 36, 60) y3 <- c(63, 55, 54, 66) # Rebuild the model, and start the plots again model.3 <- lm(y3 ~ rw.x + I(1/(rw.x))) summary(model.3) x.min = 35 x.max = 105 # Basic plot of everything so far: raw_data <- data.frame(rw.x = rw.x, y = y3) p <- ggplot(data=raw_data, aes(x=rw.x, y=y)) + geom_point(size=5) + xlab("Real-world values: time") + scale_x_continuous(breaks=seq(x.min, x.max,5)) + ylab("Outcome variable") + scale_y_continuous(breaks=seq(0, 150, 10)) + theme_bw() + #theme(axis.text=element_text(size=18), legend.position = "none") + theme(axis.title=element_text(face="bold", size=14)) + expand_limits(x = c(x.min, x.max)) + expand_limits(y = c(50, 80)) p plot_data <- data.frame(rw.x = seq(x.min, x.max, 0.1)) plot_data$y <- predict(model.3, newdata=plot_data) p <- p + geom_line(data=plot_data, color="blue", size=1) p # Run experiment at +2 [72 hours; use 75 hours - just a little more]: predict it first rw.x <- c(48, 36, 36, 60, 75) coded.x <- (rw.x - center)/(0.5*range) x.test <- data.frame(coded.x=coded.x) predict(model.3, newdata=x.test) # Expect a predicted value of 66 in the output. Actual: 79. So about 12 units difference. # Update model and plot rw.x <- c(48, 36, 36, 60, 75) y4 <- c(63, 55, 54, 66, 79) # Rebuild the model, and start the plots again model.4 <- lm(y4 ~ rw.x + I(rw.x^2)) summary(model.4) # The models are not working well for us. Let's try at around 90 hours. # Expect an outcome of around 85. Got a value of 76. Stabilized? Try again # at 95 hours # Point 7: Try 90 hours. Stabilizing?  : 76 # Point 8. Try 95 hours. Seems to confirm stabilization  : 81 # Point 9: Overshoot(?) of 105  : 72 rw.x <- c(48, 36, 36, 60, 75, 90, 95, 105) y5 <- c(63, 55, 54, 66, 79, 76, 81, 72) # Rebuild the model, and start the plots again model.5 <- lm(y5 ~ rw.x + I(rw.x^2)) summary(model.5) raw_data <- data.frame(rw.x = rw.x, y = y5) plot_data <- data.frame(rw.x = seq(x.min, x.max, 0.1)) plot_data$y <- predict(model.5, newdata=plot_data) p <- p + geom_point(aes(x=rw.x, y=y, color='red', size=5), data=raw_data) p <- p + geom_line(data=plot_data, color="red", size=1) p # Rebuild the model with a differt structure, and start the plots again model.6 <- lm(y5 ~ rw.x + I(1/sqrt(rw.x))) summary(model.6) plot_data <- data.frame(rw.x = seq(x.min, x.max, 0.1)) plot_data$y <- predict(model.6, newdata=plot_data) p <- p + geom_line(data=plot_data, color="purple", size=1) p