@@ Line 5: / Line 5: @@
 <html><div data-datacamp-exercise data-lang="r" data-height="500">
      <code data-type="sample-code">
+# Copy/paste the code piece by piece.
+# Start small: with just 2 experiments
 center = 36
 range = 24
 rw.x <- c(24, 48)
 coded.x <- (rw.x - center)/(0.5*range)
 y0 <- c(28, 63)
+# Coded -1 : 24 hours
+# Coded +1 : 48 hours
 model.0 <- lm(y0 ~ coded.x)
@@ Line 18: / Line 23: @@
 # * slope?
 # * intercept
-# * why are R2 and SE where they are?
+# * why are R2 and SE the values they are?
 # Basic plot of everything so far:
@@ Line 29: / Line 34: @@
    ylab("Outcome variable") +
    scale_y_continuous(breaks=seq(0, 150, 10)) +
-  theme_bw() +
    theme(axis.text=element_text(size=18), legend.position = "none") +
    theme(axis.title=element_text(face="bold", size=14))
 p
+# Add the linear fit: through the 2 points
+plot_data <- data.frame(coded.x = seq(-2, +5, 0.1))
+plot_data$y <- predict(model.0, newdata=plot_data)
+p <- p + geom_line(data=plot_data, color="blue", size=1)
+p
 # Run experiment at center point: predict it first
@@ Line 41: / Line 50: @@
 predict(model.0, newdata=x.test)
-# Expect a predicted value of 45.5 in the output. Actual: 54 and 55. So about
+# Expect a predicted value of 45.5 in the output. Actual: 55.
-# 10 units difference.
+# Run a second experiment at the center, to get a feeling for spread. 54
+# Similar values, and about 10 units difference from linear model.
-# Add the linear fit: through the 2 points
-plot_data <- data.frame(coded.x = seq(-2, +5, 0.1))
-plot_data$y <- predict(model.0, newdata=plot_data)
-p <- p + geom_line(data=plot_data, color="blue", size=1)
-p
-# Try fitting a linear model now through all the data points:
+# Try fitting a linear model now through all the available data points:
 rw.x <- c(24, 48, 36, 36)
 y1   <- c(28, 63, 55, 54)
@@ Line 58: / Line 61: @@
 summary(model.1)
-# Show the linear fit with the extra data point (center points)
+# Show the linear fit with the extra data point (center points): green line
 raw_data <- data.frame(coded.x = coded.x, y = y1)
 plot_data$y <- predict(model.1, newdata=plot_data)
@@ Line 65: / Line 68: @@
 p
+# Seems like a quadratic model through all the data points could approximate it:
-# A a quadratic component through all the data points:
 rw.x <- c(24, 48, 36, 36)
 y1   <- c(28, 63, 55, 54)
@@ Line 74: / Line 75: @@
 summary(model.1.quad)
-# Show the quadratic fit between -2 and +2 in coded units
+# Show the quadratic fit between -2 and +3 in coded units: red line
 # In real-world units this corresonds to _____ and _____
 plot_data <- data.frame(coded.x = seq(-2, +3, 0.1))
@@ Line 83: / Line 84: @@
 #------------
 # Try a new point at +2: that is RW = coded * 0.5 * range + center; x = 60 hours
-x.test <- data.frame(coded.x=coded.x)
 rw.x <- c(24, 48, 36, 36, 60)
 coded.x <- (rw.x - center)/(0.5*range)
-predict(model.1.quad, newdata=x.test)  # predicts ~54
+x.test <- data.frame(coded.x=coded.x)
+predict(model.1.quad, newdata=x.test)  # predicts 53.5
 # Acutal y = 66. Therefore, our model isn't so good. Improve it with the new point
@@ Line 95: / Line 96: @@
 summary(model.2.quad)
-# Plot it again:
+# Plot it again: purple
 raw_data <- data.frame(coded.x = coded.x, y = y2)
 plot_data <- data.frame(coded.x = seq(-2, +3, 0.1))
@@ Line 105: / Line 106: @@
 #------------
-# Reset our frame of reference; keep our range the same (24 hours)
+# Normally at this point I would reset the frame of reference;
-# -1: 36
+# keep our range the same (24 hours)
-# +1: 60
+# -1: 36 hours [prior coded "0" now becomes "-1]
-#  0: 48 hours
+#  0: 48 hours [prior coded "+1" now becomes "0"]
+# +1: 60 hours [prior coded "+2" now becomes "+1"]
 center = 48
-range = 24
+range = 24  # unchanged, but could also have been chosen to be smaller or bigger
+# At this point I will also switch the model type back to real-world units.
+# For 2 reasons: there is only 1 variable that is interesting (duration), so
+# coded units don't matter; secondly the model types we will use cannot be
+# built when the coded x-value is negative.
+rw.x <- c(24, 48, 36, 36, 60)
+y3   <- c(28, 63, 55, 54, 66)
-rw.x <- c(48, 36, 36, 60)
-y3   <- c(63, 55, 54, 66)
-# Rebuild the model, and start the plots again
+# Rebuild the model, and start the plots again. Try different model type
-model.3 <- lm(y3 ~ rw.x + I(1/(rw.x)))
+model.3 <- lm(y3 ~ I(1/(rw.x)))
 summary(model.3)
-x.min = 35
+x.min = 20
 x.max = 105
 # Basic plot of everything so far:
@@ Line 129: / Line 137: @@
    ylab("Outcome variable") +
    scale_y_continuous(breaks=seq(0, 150, 10)) +
-  theme_bw() +
    #theme(axis.text=element_text(size=18), legend.position = "none") +
    theme(axis.title=element_text(face="bold", size=14)) +
@@ Line 136: / Line 143: @@
 p
-plot_data <- data.frame(rw.x = seq(x.min, x.max, 0.1))
+plot_data <- data.frame(rw.x = seq(x.min, x.max, 0.5))
 plot_data$y <- predict(model.3, newdata=plot_data)
 p <- p + geom_line(data=plot_data, color="blue", size=1)
 p
-# Run experiment at +2 [72 hours; use 75 hours - just a little more]: predict it first
+# Run experiment at use 75 hours - just a little more]: predict it first
-rw.x <- c(48, 36, 36, 60, 75)
+rw.x <- c(24, 48, 36, 36, 60, 75)
-coded.x <- (rw.x - center)/(0.5*range)
+x.test <- data.frame(rw.x=rw.x)
-x.test <- data.frame(coded.x=coded.x)
 predict(model.3, newdata=x.test)
-# Expect a predicted value of 66 in the output. Actual: 79. So about 12 units difference.
+# Expect a predicted value of 74.3 in the output. # Actual result: 79.
-# Update model and plot
+# So about 5 units difference. Not too bad.
-rw.x <- c(48, 36, 36, 60, 75)
+rw.x <- c(24, 48, 36, 36, 60, 75)
-y4   <- c(63, 55, 54, 66, 79)
+y4   <- c(28, 63, 55, 54, 66, 79)
-# Rebuild the model, and start the plots again
+# Rebuild the model, and start the plots again. Different model type: x + x^2
-model.4 <- lm(y4 ~ rw.x + I(rw.x^2))
+model.4 <- lm(y4 ~  I(1/(rw.x)))
 summary(model.4)
+# Plot it again: purple
+raw_data <- data.frame(rw.x = rw.x, y = y4)
+plot_data <- data.frame(rw.x = seq(x.min, x.max, 0.5))
+plot_data$y <- predict(model.4, newdata=plot_data)
+p <- p + geom_point(aes(x=rw.x, y=y, color='purple', size=5), data=raw_data)
+p <- p + geom_line(data=plot_data, color="purple", size=1)
+p
+# -------
+# Let's try at around 90 hours. Expect an outcome of around 80.
+# Got a value of 76 instead. Stabilized?
+rw.x <- c(24, 48, 36, 36, 60, 75, 90)
+y5   <- c(28, 63, 55, 54, 66, 79, 76)
+# Rebuild the model, and start the plots again. Different model type: x + x^2
+model.5 <- lm(y5 ~  I(1/(rw.x)))
+summary(model.5)
-# The models are not working well for us. Let's try at around 90 hours.
+# Plot it again: green
-# Expect an outcome of around 85. Got a value of 76. Stabilized? Try again
+raw_data <- data.frame(rw.x = rw.x, y = y5)
-# at 95 hours
+plot_data <- data.frame(rw.x = seq(x.min, x.max, 0.5))
-# Point 7: Try 90 hours. Stabilizing?                     : 76
+plot_data$y <- predict(model.5, newdata=plot_data)
-# Point 8. Try 95 hours. Seems to confirm stabilization   : 81
+p <- p + geom_point(aes(x=rw.x, y=y, color='green', size=5), data=raw_data)
-# Point 9: Overshoot(?) of 105                            : 72
+p <- p + geom_line(data=plot_data, color="green", size=1)
+p
+# Try adding few more points:
+# Point 8. Try 95 hours. Seems to confirm stabilization. Predict 79; actual: 81
+# Point 9: Overshoot(?) to 105 hour to see decline.      Predict 81; actual: 72
+# Build a model with all values now
 rw.x <- c(48, 36, 36, 60, 75, 90, 95, 105)
-y5   <- c(63, 55, 54, 66, 79, 76, 81, 72)
+y6   <- c(63, 55, 54, 66, 79, 76, 81, 72)
 # Rebuild the model, and start the plots again
-model.5 <- lm(y5 ~ rw.x + I(rw.x^2))
+model.6 <- lm(y6 ~ I(1/(rw.x)))
-summary(model.5)
+summary(model.6)
+raw_data <- data.frame(rw.x = rw.x, y = y6)
-raw_data <- data.frame(rw.x = rw.x, y = y5)
 plot_data <- data.frame(rw.x = seq(x.min, x.max, 0.1))
 plot_data$y <- predict(model.5, newdata=plot_data)
@@ Line 180: / Line 209: @@
-# Rebuild the model with a differt structure, and start the plots again
+# The current model structure does not allow for decrease/stabilization.
-model.6 <- lm(y5 ~ rw.x + I(1/sqrt(rw.x)))
+# Rebuild it with a different structure
-summary(model.6)
+# Try various ones: x + 1/sqrt(x)     SE = 4.11
+# Try various ones: x + 1/x           SE = 4.24
+# Try various ones: x + log(x)        SE = 3.98
+# Try various ones: x + x^2           SE = 3.41
+model.6.revised <- lm(y6 ~ rw.x + I((rw.x)^2)) #
+summary(model.6.revised)
-plot_data <- data.frame(rw.x = seq(x.min, x.max, 0.1))
+x.min = 35
-plot_data$y <- predict(model.6, newdata=plot_data)
+x.max = 105
-p <- p + geom_line(data=plot_data, color="purple", size=1)
+raw_data <- data.frame(rw.x = rw.x, y = y6)
+p <- ggplot(data=raw_data, aes(x=rw.x, y=y))  +
+  geom_point(size=5) +
+  xlab("Real-world values: time") +
+  scale_x_continuous(breaks=seq(x.min, x.max,5)) +
+  ylab("Outcome variable") +
+  scale_y_continuous(breaks=seq(0, 150, 10)) +
+  theme(axis.title=element_text(face="bold", size=14)) +
+  expand_limits(x = c(x.min, x.max)) +
+  expand_limits(y = c(50, 80))
 p
+plot_data <- data.frame(rw.x = seq(x.min, x.max, 0.5))
+plot_data$y <- predict(model.6.revised, newdata=plot_data)
+p <- p + geom_line(data=plot_data, color="orange", size=1)
+p
      </code>
 </div></html>

Difference between revisions of "Worksheets/Week9"

Revision as of 04:32, 6 May 2019

Part 1

Navigation menu

Search