Difference between revisions of "Design and analysis of experiments (2013)"
Kevin Dunn (talk | contribs) (Created page with "__NOTOC__{{ClassSidebar | date = 08 March 2013 | dates_alt_text = | vimeoID1 = | vimeoID2 = | vimeoID3 = | vimeoID4 = | vimeoID5 = | vimeoID6 = | vimeoID7 = | vimeoID8 = | co...") |
Kevin Dunn (talk | contribs) |
||
(39 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
__NOTOC__{{ClassSidebar | __NOTOC__{{ClassSidebar | ||
| date = 08 March 2013 | | date = 08 March 2013 to 09 April 2013 | ||
| dates_alt_text = | | dates_alt_text = | ||
| vimeoID1 = | | vimeoID1 = 61377497 | ||
| vimeoID2 = | | vimeoID2 = 61649809 | ||
| vimeoID3 = | | vimeoID3 = 61742616 | ||
| vimeoID4 = | | vimeoID4 = 61919772 | ||
| vimeoID5 = | | vimeoID5 = 62193115 | ||
| vimeoID6 = | | vimeoID6 = 62283505 | ||
| vimeoID7 = | | vimeoID7 = 62725017 | ||
| vimeoID8 = | | vimeoID8 = 62809022 | ||
| vimeoID9 = 63190676 | |||
| vimeoID10 = 63267610 | |||
| vimeoID11 = 63425141 | |||
| vimeoID12 = 63679543 | |||
| course_notes_PDF = 2013-4C3-Overheads-Design-and-analysis-of-Experiments.pdf | | course_notes_PDF = 2013-4C3-Overheads-Design-and-analysis-of-Experiments.pdf | ||
| course_notes_alt = Course slides | | course_notes_alt = Course slides | ||
Line 15: | Line 19: | ||
| assignment_instructions = | | assignment_instructions = | ||
| assignment_solutions = | | assignment_solutions = | ||
| video_download_link_MP4 = http://learnche.mcmaster.ca/media/4C3-2013-Class- | | video_download_link_MP4 = http://learnche.mcmaster.ca/media/4C3-2013-Class-08C.mp4 | ||
| video_download_link_MP4_size = | | video_download_link_MP4_size = 328 M | ||
| video_notes1 = | | video_notes1 = | ||
| video_download_link2_MP4 = http://learnche.mcmaster.ca/media/4C3-2013-Class- | | video_download_link2_MP4 = http://learnche.mcmaster.ca/media/4C3-2013-Class-09A.mp4 | ||
| video_download_link2_MP4_size = | | video_download_link2_MP4_size = 385 M | ||
| video_notes2 = | | video_notes2 = | ||
| video_download_link3_MP4 = http://learnche.mcmaster.ca/media/4C3-2013-Class- | | video_download_link3_MP4 = http://learnche.mcmaster.ca/media/4C3-2013-Class-09B.mp4 | ||
| video_download_link3_MP4_size = | | video_download_link3_MP4_size = 360 M | ||
| video_notes3 = | | video_notes3 = | ||
| video_download_link4_MP4 = http://learnche.mcmaster.ca/media/4C3-2013-Class- | | video_download_link4_MP4 = http://learnche.mcmaster.ca/media/4C3-2013-Class-09C.mp4 | ||
| video_download_link4_MP4_size = | | video_download_link4_MP4_size = 389 M | ||
| video_notes4 = | | video_notes4 = | ||
| video_download_link5_MP4 =http://learnche.mcmaster.ca/media/4C3-2013-Class- | | video_download_link5_MP4 =http://learnche.mcmaster.ca/media/4C3-2013-Class-10A.mp4 | ||
| video_download_link5_MP4_size = | | video_download_link5_MP4_size = 237 M | ||
| video_notes5 = | | video_notes5 = | ||
| video_download_link6_MP4 = | | video_download_link6_MP4 = http://learnche.mcmaster.ca/media/4C3-2013-Class-10B.mp4 | ||
| video_download_link6_MP4_size = M | | video_download_link6_MP4_size = 325 M | ||
| video_notes6 = | | video_notes6 = | ||
| video_download_link7_MP4 = | | video_download_link7_MP4 = http://learnche.mcmaster.ca/media/4C3-2013-Class-11A.mp4 | ||
| video_download_link7_MP4_size = M | | video_download_link7_MP4_size = 217 M | ||
| video_notes7 = | | video_notes7 = | ||
| video_download_link8_MP4 = | | video_download_link8_MP4 = http://learnche.mcmaster.ca/media/4C3-2013-Class-11B.mp4 | ||
| video_download_link8_MP4_size = M | | video_download_link8_MP4_size = 313 M | ||
| video_notes8 = | | video_notes8 = | ||
}} | | video_download_link9_MP4 = http://learnche.mcmaster.ca/media/4C3-2013-Class-12A.mp4 | ||
| video_download_link9_MP4_size = 374 M | |||
| video_notes9 = | |||
| video_download_link10_MP4 = http://learnche.mcmaster.ca/media/4C3-2013-Class-12B.mp4 | |||
| video_download_link10_MP4_size = 383 M | |||
| video_notes10 = | |||
| video_download_link11_MP4 = http://learnche.mcmaster.ca/media/4C3-2013-Class-12C.mp4 | |||
| video_download_link11_MP4_size = 504 M | |||
| video_notes11 = | |||
| video_download_link12_MP4 = http://learnche.mcmaster.ca/media/4C3-2013-Class-13A-part1.mp4 | |||
| video_download_link12_MP4_size = 234 M | |||
| video_notes12 =}} | |||
* [http://learnche.mcmaster.ca/pid/?source= | <span style="color:#900000">{{Huge|This page is out of date.}}</span> {{Huge|Please see the [[Design_and_analysis_of_experiments |latest version of these notes]].}} | ||
== Course notes and slides == | |||
* [http://learnche.mcmaster.ca/pid/?source=DOE Course textbook] (print chapter 5) | |||
* [[Image:Nuvola_mimetypes_pdf.png|20px|link=Media:2013-4C3-Overheads-Design-and-analysis-of-Experiments.pdf]] [[Media:2013-4C3-Overheads-Design-and-analysis-of-Experiments.pdf|Slides for class]] | * [[Image:Nuvola_mimetypes_pdf.png|20px|link=Media:2013-4C3-Overheads-Design-and-analysis-of-Experiments.pdf]] [[Media:2013-4C3-Overheads-Design-and-analysis-of-Experiments.pdf|Slides for class]] | ||
== Class materials == | |||
* 08 Mar 2013 (Class 08C): [http://learnche.mcmaster.ca/media/4C3-2013-Class-08C.mp3 Audio] and [http://learnche.mcmaster.ca/media/4C3-2013-Class-08C.mp4 video] and [[Media:4C03-6C03-DOE-slides-Emily-Nichols.pdf|guest speaker's slides]] (Emily Nichols) | |||
* 12 Mar 2013 (Class 09A): [http://learnche.mcmaster.ca/media/4C3-2013-Class-09A.mp3 Audio] and [http://learnche.mcmaster.ca/media/4C3-2013-Class-09A.mp4 video] | |||
* 13 Mar 2013 (Class 09B): [http://learnche.mcmaster.ca/media/4C3-2013-Class-09B.mp3 Audio] and [http://learnche.mcmaster.ca/media/4C3-2013-Class-09B.mp4 video] | |||
* 15 Mar 2013 (Class 09C): [http://learnche.mcmaster.ca/media/4C3-2013-Class-09C.mp3 Audio] and [http://learnche.mcmaster.ca/media/4C3-2013-Class-09C.mp4 video] | |||
* 19 Mar 2013 (Class 10A): [http://learnche.mcmaster.ca/media/4C3-2013-Class-10A.mp3 Audio] and [http://learnche.mcmaster.ca/media/4C3-2013-Class-10A.mp4 video] | |||
* 20 Mar 2013 (Class 10B): [http://learnche.mcmaster.ca/media/4C3-2013-Class-10B.mp3 Audio] and [http://learnche.mcmaster.ca/media/4C3-2013-Class-10B.mp4 video] | |||
* 26 Mar 2013 (Class 11A): [http://learnche.mcmaster.ca/media/4C3-2013-Class-11A.mp3 Audio] and [http://learnche.mcmaster.ca/media/4C3-2013-Class-11A.mp4 video] | |||
* 27 Mar 2013 (Class 11B): [http://learnche.mcmaster.ca/media/4C3-2013-Class-11B.mp3 Audio] and [http://learnche.mcmaster.ca/media/4C3-2013-Class-11B.mp4 video] | |||
* 02 Apr 2013 (Class 12A): [http://learnche.mcmaster.ca/media/4C3-2013-Class-12A.mp3 Audio] and [http://learnche.mcmaster.ca/media/4C3-2013-Class-12A.mp4 video] | |||
* 03 Apr 2013 (Class 12B): [http://learnche.mcmaster.ca/media/4C3-2013-Class-12B.mp3 Audio] and [http://learnche.mcmaster.ca/media/4C3-2013-Class-12B.mp4 video] | |||
* 05 Apr 2013 (Class 12C): [http://learnche.mcmaster.ca/media/4C3-2013-Class-12C.mp3 Audio] and [http://learnche.mcmaster.ca/media/4C3-2013-Class-12C.mp4 video] | |||
* 09 Apr 2013 (Class 13A-part1): [http://learnche.mcmaster.ca/media/4C3-2013-Class-13A-part1.mp3 Audio] and [http://learnche.mcmaster.ca/media/4C3-2013-Class-13A-part1.mp4 video] | |||
== Software source code == | |||
Source code to estimate the DOE model (''scroll down if the code is not visible'') | |||
{| class="wikitable" | |||
|- | |||
! R | |||
! MATLAB | |||
|- | |||
| | |||
<syntaxhighlight lang="R"> | |||
T <- c(-1, +1, -1, +1) # centered and scaled temperature | |||
S <- c(-1, -1, +1, +1) # centered and scaled substrate concentration | |||
y <- c(69, 60, 64, 53) # conversion is the response, y | |||
mod <- lm(y ~ T + S + T * S) | |||
summary(mod) | |||
Call: | |||
lm(formula = y ~ T + S + T * S) | |||
Residuals: | |||
ALL 4 residuals are 0: no residual degrees of freedom! | |||
Coefficients: | |||
Estimate Std. Error t value Pr(>|t|) | |||
(Intercept) 61.5 NA NA NA | |||
T -5.0 NA NA NA | |||
S -3.0 NA NA NA | |||
T:S -0.5 NA NA NA | |||
Residual standard error: NaN on 0 degrees of freedom | |||
Multiple R-squared: 1, Adjusted R-squared: NaN | |||
F-statistic: NaN on 3 and 0 DF, p-value: NA | |||
</syntaxhighlight> | |||
| valign="top"| | |||
<syntaxhighlight lang="matlab"> | |||
% Set up the X matrix: | |||
n = 4; | |||
temperature = [-1, +1, -1, +1]; | |||
substrate = [-1, -1, +1, +1]; | |||
X = [ ones(n, 1), temperature', substrate', (temperature .* substrate)']; | |||
% or you can type it in directly (but this is error prone) | |||
X = [+1 -1 -1 +1; ... | |||
+1 +1 -1 -1; ... | |||
+1 -1 +1 -1; ... | |||
+1 +1 +1 +1]; | |||
% Conversion is your y-variable | |||
y = [69, 60, 64, 53]'; | |||
% Regression coefficients = b = [Intercept, b_T, b_S, b_{TS}] | |||
b = inv(X'*X)*X'*y | |||
</syntaxhighlight> | |||
|} | |||
''3-factor example'' | |||
<rst> | |||
<rst-options: 'toc' = False/> | |||
The data are from a plastics molding factory which must treat its waste before discharge. The :math:`y` variable represents the average amount of pollutant discharged [lb per day], while the 3 factors that were varied were: | |||
- :math:`C`: the chemical compound added [A or B] | |||
- :math:`T`: the treatment temperature [72°F or 100°F] | |||
- :math:`S`: the stirring speed [200 rpm or 400 rpm] | |||
- :math:`y`: the amount of pollutant discharged [lb per day] | |||
.. tabularcolumns:: |l|l||c|c|c||c| | |||
+-----------+-------+---------------+-----------------+-----------------+-----------------+ | |||
| Experiment| Order | :math:`C` | :math:`T` [°F] | :math:`S` [rpm] | :math:`y` [lb] | | |||
+===========+=======+===============+=================+=================+=================+ | |||
| 1 | 5 | A | 72 | 200 | 5 | | |||
+-----------+-------+---------------+-----------------+-----------------+-----------------+ | |||
| 2 | 6 | B | 72 | 200 | 30 | | |||
+-----------+-------+---------------+-----------------+-----------------+-----------------+ | |||
| 3 | 1 | A | 100 | 200 | 6 | | |||
+-----------+-------+---------------+-----------------+-----------------+-----------------+ | |||
| 4 | 4 | B | 100 | 200 | 33 | | |||
+-----------+-------+---------------+-----------------+-----------------+-----------------+ | |||
| 5 | 2 | A | 72 | 400 | 4 | | |||
+-----------+-------+---------------+-----------------+-----------------+-----------------+ | |||
| 6 | 7 | B | 72 | 400 | 3 | | |||
+-----------+-------+---------------+-----------------+-----------------+-----------------+ | |||
| 7 | 3 | A | 100 | 400 | 5 | | |||
+-----------+-------+---------------+-----------------+-----------------+-----------------+ | |||
| 8 | 8 | B | 100 | 400 | 4 | | |||
+-----------+-------+---------------+-----------------+-----------------+-----------------+ | |||
We showed the cube plot for this system in the class on 10 March. From the cube plot we could already see the main factors, and even the **CS** interaction was noticeable. | |||
* **C effect**: There are 4 estimates of :math:`C = \displaystyle \frac{(+25) + (+27) + (-1) + (-1)}{4} = \frac{50}{4} = \bf{12.5}` | |||
* **T effect**: There are 4 estimates of :math:`T = \displaystyle \frac{(+1) + (+3) + (+1) + (+1)}{4} = \frac{6}{4} = \bf{1.5}` | |||
* **S effect**: There are 4 estimates of :math:`S = \displaystyle \frac{(-27) + (-1) + (-29) + (-1)}{4} = \frac{-58}{4} = \bf{-14.5}` | |||
* **CT interaction**: There are 2 estimates of :math:`CT`. Recall that interactions are calculated as the half difference going from high to low. Consider the change in :math:`C` when | |||
- :math:`T_\text{high}` (at :math:`S` high) = 4 - 5 = -1 | |||
- :math:`T_\text{low}` (at :math:`S` high) = 3 - 4 = -1 | |||
- First estimate = [(-1) - (-1)]/2 = 0 | |||
- :math:`T_\text{high}` (at :math:`S` low) = 33 - 6 = +27 | |||
- :math:`T_\text{low}` (at :math:`S` low) = 30 - 5 = +25 | |||
- Second estimate = [(+27) - (+25)]/2 = +1 | |||
- Average **CT** interaction = (0 + 1)/2 = **0.5** | |||
- You can interchange :math:`C` and :math:`T` and still get the same result. | |||
* **CS interaction**: There are 2 estimates of :math:`CS`. Consider the change in :math:`C` when | |||
- :math:`S_\text{high}` (at :math:`T` high) = 4 - 5 = -1 | |||
- :math:`S_\text{low}` (at :math:`T` high) = 33 - 6 = +27 | |||
- First estimate = [(-1) - (+27)]/2 = -14 | |||
- :math:`S_\text{high}` (at :math:`T` low) = 3 - 4 = -1 | |||
- :math:`S_\text{low}` (at :math:`T` low) = 30 - 5 = +25 | |||
- Second estimate = [(-1) - (+25)]/2 = -13 | |||
- Average **CS** interaction = (-13 - 14)/2 = **-13.5** | |||
- You can interchange :math:`C` and :math:`S` and still get the same result. | |||
* **ST interaction**: There are 2 estimates of :math:`ST`: (-1 + 0)/2 = **-0.5**, calculate in the same way as above. | |||
* **CTS interaction**: There is only a single estimate of :math:`CTS`: | |||
- :math:`CT` effect at high :math:`S` = 0 | |||
- :math:`CT` effect at low :math:`S` = +1 | |||
- :math:`CTS` interaction = [(0) - (+1)] / 2 = **-0.5** | |||
- You can calculate this also by considering the :math:`CS` effect at the two levels of :math:`T` | |||
- Or, you can calculate this by considering the :math:`ST` effect at the two levels of :math:`C`. | |||
- All 3 approaches give the same result. | |||
Next, use computer software to verify that | |||
.. math:: | |||
y = 11.25 + 6.25x_C + 0.75x_T -7.25x_S + 0.25 x_C x_T -6.75 x_C x_S -0.25 x_T x_S - 0.25 x_C x_T x_S | |||
The :math:`\mathbf{X}` matrix and :math:`\mathbf{y}` vector used to calculate the least squares model: | |||
.. math:: | |||
\begin{bmatrix} 5\\30\\6\\33\\4\\3\\5\\4 \end{bmatrix} &= \begin{bmatrix} +1 & -1 & -1 & -1 & +1 & +1 & +1 & -1\\ +1 & +1 & -1 & -1 & -1 & -1 & +1 & +1\\ +1 & -1 & +1 & -1 & -1 & +1 & -1 & +1\\ +1 & +1 & +1 & -1 & +1 & -1 & -1 & -1\\ +1 & -1 & -1 & +1 & +1 & -1 & -1 & +1\\ +1 & +1 & -1 & +1 & -1 & +1 & -1 & -1\\ +1 & -1 & +1 & +1 & -1 & -1 & +1 & -1\\ +1 & +1 & +1 & +1 & +1 & +1 & +1 & +1\\ \end{bmatrix} \begin{bmatrix} b_0 \\ b_C \\ b_T \\ b_{S} \\ b_{CT} \\ b_{CS} \\ b_{TS} \\ b_{CTS} \end{bmatrix} \\ \mathbf{y} &= \mathbf{X} \mathbf{b} | |||
</rst> | |||
<syntaxhighlight lang="R"> | |||
# Create the design matrix in a quick way in R | |||
C <- T <- S <- c(-1, +1) | |||
design <- expand.grid(C=C, T=T, S=S) | |||
design | |||
C T S | |||
1 -1 -1 -1 | |||
2 1 -1 -1 | |||
3 -1 1 -1 | |||
4 1 1 -1 | |||
5 -1 -1 1 | |||
6 1 -1 1 | |||
7 -1 1 1 | |||
8 1 1 1 | |||
C <- design$C | |||
T <- design$T | |||
S <- design$S | |||
y <- c(5, 30, 6, 33, 4, 3, 5, 4) | |||
# Full factorial model | |||
# mod.full <- lm(y ~ C + T + S + C*T + C*S + S*T + C*T*S) | |||
# This powerful notation will expand all terms up to the 3rd order interactions | |||
mod.full <- lm( y ~ (C + T + S)^3 ) | |||
summary(mod.full) | |||
Call: | |||
lm(formula = y ~ C + T + S + C * T + C * S + S * T + C * T * S) | |||
Residuals: | |||
ALL 8 residuals are 0: no residual degrees of freedom! | |||
Coefficients: | |||
Estimate Std. Error t value Pr(>|t|) | |||
(Intercept) 11.25 NA NA NA | |||
C 6.25 NA NA NA | |||
T 0.75 NA NA NA | |||
S -7.25 NA NA NA | |||
C:T 0.25 NA NA NA | |||
C:S -6.75 NA NA NA | |||
T:S -0.25 NA NA NA | |||
C:T:S -0.25 NA NA NA | |||
# Guide to estimating significant coefficients, ignoring the intercept (1st coefficient) | |||
coeff.full <- coef(mod.full)[2:length(coef(mod.full))] | |||
# Pareto plot of the absolute coefficients | |||
library(lattice) | |||
coeff <- sort(abs(coeff.full), index.return=TRUE) | |||
barchart(coeff$x, | |||
xlim=c(0, max(abs(coeff.full))+0.1), | |||
xlab=list("Magnitude of effect", cex=1.5), | |||
ylab = list("Effect", cex=1.5), | |||
groups=(coeff.full>0)[coeff$ix], col=c("lightblue", "orange"), | |||
scales=list(cex=1.5) | |||
) | |||
# Eliminate some factors: use the regular linear model notation | |||
# Eliminate C*T*S, C*T and S*T | |||
mod.sub <- lm(y ~ C + T + S + C*S) | |||
summary(mod.sub) | |||
confint(mod.sub) | |||
coeff.sub <- coef(mod.sub)[2:length(coef(mod.sub))] | |||
coeff <- sort(abs(coeff.sub), index.return=TRUE) | |||
barchart(coeff$x, | |||
xlim=c(0, max(abs(coeff.sub))+0.1), | |||
xlab=list("Magnitude of effect", cex=1.5), | |||
ylab = list("Effect", cex=1.5), | |||
groups=(coeff.sub>0)[coeff$ix], col=c("lightblue", "orange"), | |||
scales=list(cex=1.5) | |||
) | |||
</syntaxhighlight> | |||
Fractional factorial code in R (see Saturated Design example in the slides) | |||
<syntaxhighlight lang="R"> | |||
A <- B <- C <- c(-1, +1) | |||
design <- expand.grid(A=A, B=B, C=C) | |||
design | |||
# A B C | |||
# 1 -1 -1 -1 | |||
# 2 1 -1 -1 | |||
# 3 -1 1 -1 | |||
# 4 1 1 -1 | |||
# 5 -1 -1 1 | |||
# 6 1 -1 1 | |||
# 7 -1 1 1 | |||
# 8 1 1 1 | |||
A <- design$A | |||
B <- design$B | |||
C <- design$C | |||
# Specify the other factors 4: up to 7 factors + intercept can be estimated | |||
# from the 8 experiments. | |||
D <- A*B | |||
E <- A*C | |||
F <- B*C | |||
G <- A*B*C | |||
y <- c(77.1, 68.9, 75.5, 72.5, 67.9, 68.5, 71.5, 63.7) | |||
mod <- lm(y ~ A + B + C + D + E + F + G) | |||
summary(mod) | |||
library(lattice) | |||
coeff.full <- coef(mod)[2:length(coef(mod))] | |||
coeff <- sort(abs(coeff.full), index.return=TRUE) | |||
barchart(coeff$x, | |||
xlim=c(0, max(abs(coeff.full))+0.1), | |||
xlab=list("Magnitude of effect", cex=1.5), | |||
ylab = list("Effect", cex=1.5), | |||
groups=(coeff.full>0)[coeff$ix], col=c("lightblue", "orange"), | |||
scales=list(cex=1.5) | |||
) | |||
# Factors B, D and F are not important. Remove and rebuild. | |||
mod.update.1 <- lm(y ~ A + C + E + G) | |||
confint(mod.update.1) | |||
# The confidence interval for E spans zero. Might be of practical relevance, | |||
# but it is not of statistical relevance. | |||
mod.update.2 <- lm(y ~ A + C + G) | |||
confint(mod.update.2) | |||
</syntaxhighlight> | |||
Code for contour plots in response surface methods | |||
{| class="wikitable" | |||
|- | |||
! R | |||
! MATLAB | |||
|- | |||
| | |||
<syntaxhighlight lang="R"> | |||
# Initial set of 4 runs | |||
T <- c(-1, +1, -1, +1, 0) | |||
S <- c(-1, -1, +1, +1, 0) | |||
y <- c(193, 310, 468, 571, 407) | |||
# Visualize the surface | |||
# ------------------------------------------------------ | |||
mod <- lm(y ~ T + S + T*S) | |||
summary(mod) | |||
N <- 50 # resolution of surface (higher values give smoother plots) | |||
# The lower and upper bounds, in coded units, over which we want | |||
# to visualize the surface | |||
bound <- 3 | |||
T_plot <- seq(-bound, bound ,length=N) | |||
S_plot <- seq(-bound, bound, length=N) | |||
grd <- expand.grid(T=T_plot, S=S_plot) | |||
# Predict directly from least squares model | |||
grd$y <- predict(mod, grd) | |||
library(lattice) | |||
contourplot(y ~ T * S, | |||
data = grd, | |||
cuts = 10, | |||
region = TRUE, | |||
col.regions = terrain.colors, | |||
xlab = "Temperature", | |||
ylab = "Substrate", | |||
main = "Predicted response to factors T and S, in coded units" | |||
) | |||
trellis.focus("panel", 1, 1, highlight=FALSE) | |||
lpoints(T, S, pch="O", col="red", cex=3) | |||
llines(c(-1, +1, +1, -1, -1), c(-1, -1, +1, +1, -1), col="red", lwd=3) | |||
lpoints(1, 2.44, col="red", cex=5, pch="*") | |||
ltext(0.4, 2.44, "Expt 5", cex=2, col="red") | |||
trellis.unfocus() | |||
</syntaxhighlight> | |||
| valign="top"| | |||
<syntaxhighlight lang="MATLAB"> | |||
X = [+1 +1 +1 +1 +1; ... | |||
-1 +1 -1 +1 0; ... | |||
-1 -1 +1 +1 0; ... | |||
+1 -1 -1 +1 0]'; | |||
% Profit values recorded from the factorial experiments | |||
profit = [193 310 468 571 407]'; | |||
% Coefficient order = [Intercept, b_T, b_S, b_{TS}] | |||
b = inv(X'*X)*X'*profit | |||
% What does the model surface look like? | |||
[T, S] = meshgrid(-2:0.1:2, -2:0.1:2); | |||
Y = b(1) + b(2).*T + b(3).*S + b(4).*T.*S; | |||
subplot(1, 2, 1) | |||
surf(T, S, Y) | |||
xlabel('T') | |||
ylabel('S') | |||
grid('on') | |||
subplot(1, 2, 2) | |||
contour(T, S, Y) | |||
xlabel('T') | |||
ylabel('S') | |||
grid('on') | |||
axis equal | |||
colorbar | |||
</syntaxhighlight> | |||
|} | |||
Extending the response surface around the next factorial point 6 | |||
<syntaxhighlight lang="R"> | |||
# Factorial at point 6 | |||
T <- c(-1, +1, -1, +1, 0) | |||
S <- c(-1, -1, +1, +1, 0) | |||
y <- c(694, 725, 620, 642, 688) | |||
# Build a least squares model for the local region | |||
mod <- lm(y ~ T + S + T*S) | |||
summary(mod) | |||
next.12 <- data.frame(T=+1, S=-3.0) | |||
pred.12 <- predict(mod, next.12) # predicted $811, but actual was $716: far off! | |||
# Visualize the surface | |||
N <- 100 # resolution of surface (higher values give smoother plots) | |||
# The lower and upper bounds, in coded units, over which we want | |||
# to visualize the surface | |||
bound <- 3.1 | |||
T_plot <- seq(-bound, bound ,length=N) | |||
S_plot <- seq(-bound, bound, length=N) | |||
grd <- expand.grid(T=T_plot, S=S_plot) | |||
# Predict directly from least squares model | |||
grd$y <- predict(mod, grd) | |||
library(lattice) | |||
contourplot(y ~ T * S, | |||
data = grd, | |||
cuts = 10, | |||
region = TRUE, | |||
col.regions = terrain.colors, | |||
xlab = "Temperature", | |||
ylab = "Substrate", | |||
main = "Predicted response to factors T and S, in coded units" | |||
) | |||
trellis.focus("panel", 1, 1, highlight=FALSE) | |||
lpoints(T, S, pch="O", col="red", cex=3) | |||
llines(c(-1, +1, +1, -1, -1), c(-1, -1, +1, +1, -1), col="red", lwd=3) | |||
lpoints(1, -3, col="red", cex=5, pch="*") | |||
ltext(1.2, -3, "12", cex=2, col="red") | |||
trellis.unfocus() | |||
# Central composite design: experiments 8 to 11, 6, and 13 to 16 [9 expts] | |||
T <- c(-1, +1, -1, +1, 0, 0, +sqrt(2), 0, -sqrt(2)) | |||
S <- c(-1, -1, +1, +1, 0, -sqrt(2), 0, sqrt(2), 0) | |||
y <- c(694, 725, 620, 642, 688, 720, 699, 610, 663) | |||
# Add quadratic terms to the model | |||
mod.quad <- lm(y ~ T + S + T*S + I(S^2) + I(T^2)) | |||
summary(mod.quad) | |||
confint(mod.quad) | |||
# Predict directly from least squares model | |||
grd$y <- predict(mod.quad, grd) | |||
library(lattice) | |||
contourplot(y ~ T * S, | |||
data = grd, | |||
cuts = 100, # use more contour lines | |||
region = TRUE, | |||
col.regions = terrain.colors, | |||
xlab = "Temperature", | |||
ylab = "Substrate", | |||
main = "Predicted response to factors T and S, in coded units" | |||
) | |||
trellis.focus("panel", 1, 1, highlight=FALSE) | |||
lpoints(T, S, pch="O", col="red", cex=3) | |||
llines(c(-1, +1, +1, -1, -1), c(-1, -1, +1, +1, -1), col="red", lwd=3) | |||
llines(c(0, 0), c(-sqrt(2), sqrt(2)), col="red", lwd=3) | |||
llines(c(-sqrt(2), sqrt(2)), c(0, 0), col="red", lwd=3) | |||
trellis.unfocus() | |||
# Predicted profit for next experiment, number 17 at | |||
next.17 <- data.frame(T=2.0, S=-1.9) | |||
pred.17 <- predict(mod.quad, next.17) # $736; the actual recorded profit was $738, so close! | |||
</syntaxhighlight> |
Latest revision as of 19:01, 27 January 2017
Class date(s): | 08 March 2013 to 09 April 2013 | ||||
(PDF) | Course slides | ||||
| |||||
| |||||
| |||||
| |||||
| |||||
| |||||
| |||||
| |||||
| |||||
| |||||
| |||||
| |||||
This page is out of date. Please see the latest version of these notes.
Course notes and slides
- Course textbook (print chapter 5)
- Slides for class
Class materials
- 08 Mar 2013 (Class 08C): Audio and video and guest speaker's slides (Emily Nichols)
- 12 Mar 2013 (Class 09A): Audio and video
- 13 Mar 2013 (Class 09B): Audio and video
- 15 Mar 2013 (Class 09C): Audio and video
- 19 Mar 2013 (Class 10A): Audio and video
- 20 Mar 2013 (Class 10B): Audio and video
- 26 Mar 2013 (Class 11A): Audio and video
- 27 Mar 2013 (Class 11B): Audio and video
- 02 Apr 2013 (Class 12A): Audio and video
- 03 Apr 2013 (Class 12B): Audio and video
- 05 Apr 2013 (Class 12C): Audio and video
- 09 Apr 2013 (Class 13A-part1): Audio and video
Software source code
Source code to estimate the DOE model (scroll down if the code is not visible)
R | MATLAB |
---|---|
T <- c(-1, +1, -1, +1) # centered and scaled temperature
S <- c(-1, -1, +1, +1) # centered and scaled substrate concentration
y <- c(69, 60, 64, 53) # conversion is the response, y
mod <- lm(y ~ T + S + T * S)
summary(mod)
Call:
lm(formula = y ~ T + S + T * S)
Residuals:
ALL 4 residuals are 0: no residual degrees of freedom!
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 61.5 NA NA NA
T -5.0 NA NA NA
S -3.0 NA NA NA
T:S -0.5 NA NA NA
Residual standard error: NaN on 0 degrees of freedom
Multiple R-squared: 1, Adjusted R-squared: NaN
F-statistic: NaN on 3 and 0 DF, p-value: NA
|
% Set up the X matrix:
n = 4;
temperature = [-1, +1, -1, +1];
substrate = [-1, -1, +1, +1];
X = [ ones(n, 1), temperature', substrate', (temperature .* substrate)'];
% or you can type it in directly (but this is error prone)
X = [+1 -1 -1 +1; ...
+1 +1 -1 -1; ...
+1 -1 +1 -1; ...
+1 +1 +1 +1];
% Conversion is your y-variable
y = [69, 60, 64, 53]';
% Regression coefficients = b = [Intercept, b_T, b_S, b_{TS}]
b = inv(X'*X)*X'*y
|
3-factor example <rst> <rst-options: 'toc' = False/> The data are from a plastics molding factory which must treat its waste before discharge. The :math:`y` variable represents the average amount of pollutant discharged [lb per day], while the 3 factors that were varied were:
- :math:`C`: the chemical compound added [A or B]
- :math:`T`: the treatment temperature [72°F or 100°F] - :math:`S`: the stirring speed [200 rpm or 400 rpm] - :math:`y`: the amount of pollutant discharged [lb per day]
.. tabularcolumns:: |l|l||c|c|c||c|
+-----------+-------+---------------+-----------------+-----------------+-----------------+ | Experiment| Order | :math:`C` | :math:`T` [°F] | :math:`S` [rpm] | :math:`y` [lb] | +===========+=======+===============+=================+=================+=================+ | 1 | 5 | A | 72 | 200 | 5 | +-----------+-------+---------------+-----------------+-----------------+-----------------+ | 2 | 6 | B | 72 | 200 | 30 | +-----------+-------+---------------+-----------------+-----------------+-----------------+ | 3 | 1 | A | 100 | 200 | 6 | +-----------+-------+---------------+-----------------+-----------------+-----------------+ | 4 | 4 | B | 100 | 200 | 33 | +-----------+-------+---------------+-----------------+-----------------+-----------------+ | 5 | 2 | A | 72 | 400 | 4 | +-----------+-------+---------------+-----------------+-----------------+-----------------+ | 6 | 7 | B | 72 | 400 | 3 | +-----------+-------+---------------+-----------------+-----------------+-----------------+ | 7 | 3 | A | 100 | 400 | 5 | +-----------+-------+---------------+-----------------+-----------------+-----------------+ | 8 | 8 | B | 100 | 400 | 4 | +-----------+-------+---------------+-----------------+-----------------+-----------------+
We showed the cube plot for this system in the class on 10 March. From the cube plot we could already see the main factors, and even the **CS** interaction was noticeable.
- **C effect**: There are 4 estimates of :math:`C = \displaystyle \frac{(+25) + (+27) + (-1) + (-1)}{4} = \frac{50}{4} = \bf{12.5}`
- **T effect**: There are 4 estimates of :math:`T = \displaystyle \frac{(+1) + (+3) + (+1) + (+1)}{4} = \frac{6}{4} = \bf{1.5}`
- **S effect**: There are 4 estimates of :math:`S = \displaystyle \frac{(-27) + (-1) + (-29) + (-1)}{4} = \frac{-58}{4} = \bf{-14.5}`
- **CT interaction**: There are 2 estimates of :math:`CT`. Recall that interactions are calculated as the half difference going from high to low. Consider the change in :math:`C` when
- :math:`T_\text{high}` (at :math:`S` high) = 4 - 5 = -1 - :math:`T_\text{low}` (at :math:`S` high) = 3 - 4 = -1 - First estimate = [(-1) - (-1)]/2 = 0 - :math:`T_\text{high}` (at :math:`S` low) = 33 - 6 = +27 - :math:`T_\text{low}` (at :math:`S` low) = 30 - 5 = +25 - Second estimate = [(+27) - (+25)]/2 = +1
- Average **CT** interaction = (0 + 1)/2 = **0.5** - You can interchange :math:`C` and :math:`T` and still get the same result.
- **CS interaction**: There are 2 estimates of :math:`CS`. Consider the change in :math:`C` when
- :math:`S_\text{high}` (at :math:`T` high) = 4 - 5 = -1 - :math:`S_\text{low}` (at :math:`T` high) = 33 - 6 = +27 - First estimate = [(-1) - (+27)]/2 = -14 - :math:`S_\text{high}` (at :math:`T` low) = 3 - 4 = -1 - :math:`S_\text{low}` (at :math:`T` low) = 30 - 5 = +25 - Second estimate = [(-1) - (+25)]/2 = -13
- Average **CS** interaction = (-13 - 14)/2 = **-13.5** - You can interchange :math:`C` and :math:`S` and still get the same result.
- **ST interaction**: There are 2 estimates of :math:`ST`: (-1 + 0)/2 = **-0.5**, calculate in the same way as above.
- **CTS interaction**: There is only a single estimate of :math:`CTS`:
- :math:`CT` effect at high :math:`S` = 0 - :math:`CT` effect at low :math:`S` = +1 - :math:`CTS` interaction = [(0) - (+1)] / 2 = **-0.5**
- You can calculate this also by considering the :math:`CS` effect at the two levels of :math:`T` - Or, you can calculate this by considering the :math:`ST` effect at the two levels of :math:`C`. - All 3 approaches give the same result.
Next, use computer software to verify that
.. math::
y = 11.25 + 6.25x_C + 0.75x_T -7.25x_S + 0.25 x_C x_T -6.75 x_C x_S -0.25 x_T x_S - 0.25 x_C x_T x_S
The :math:`\mathbf{X}` matrix and :math:`\mathbf{y}` vector used to calculate the least squares model:
.. math::
\begin{bmatrix} 5\\30\\6\\33\\4\\3\\5\\4 \end{bmatrix} &= \begin{bmatrix} +1 & -1 & -1 & -1 & +1 & +1 & +1 & -1\\ +1 & +1 & -1 & -1 & -1 & -1 & +1 & +1\\ +1 & -1 & +1 & -1 & -1 & +1 & -1 & +1\\ +1 & +1 & +1 & -1 & +1 & -1 & -1 & -1\\ +1 & -1 & -1 & +1 & +1 & -1 & -1 & +1\\ +1 & +1 & -1 & +1 & -1 & +1 & -1 & -1\\ +1 & -1 & +1 & +1 & -1 & -1 & +1 & -1\\ +1 & +1 & +1 & +1 & +1 & +1 & +1 & +1\\ \end{bmatrix} \begin{bmatrix} b_0 \\ b_C \\ b_T \\ b_{S} \\ b_{CT} \\ b_{CS} \\ b_{TS} \\ b_{CTS} \end{bmatrix} \\ \mathbf{y} &= \mathbf{X} \mathbf{b} </rst>
# Create the design matrix in a quick way in R
C <- T <- S <- c(-1, +1)
design <- expand.grid(C=C, T=T, S=S)
design
C T S
1 -1 -1 -1
2 1 -1 -1
3 -1 1 -1
4 1 1 -1
5 -1 -1 1
6 1 -1 1
7 -1 1 1
8 1 1 1
C <- design$C
T <- design$T
S <- design$S
y <- c(5, 30, 6, 33, 4, 3, 5, 4)
# Full factorial model
# mod.full <- lm(y ~ C + T + S + C*T + C*S + S*T + C*T*S)
# This powerful notation will expand all terms up to the 3rd order interactions
mod.full <- lm( y ~ (C + T + S)^3 )
summary(mod.full)
Call:
lm(formula = y ~ C + T + S + C * T + C * S + S * T + C * T * S)
Residuals:
ALL 8 residuals are 0: no residual degrees of freedom!
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 11.25 NA NA NA
C 6.25 NA NA NA
T 0.75 NA NA NA
S -7.25 NA NA NA
C:T 0.25 NA NA NA
C:S -6.75 NA NA NA
T:S -0.25 NA NA NA
C:T:S -0.25 NA NA NA
# Guide to estimating significant coefficients, ignoring the intercept (1st coefficient)
coeff.full <- coef(mod.full)[2:length(coef(mod.full))]
# Pareto plot of the absolute coefficients
library(lattice)
coeff <- sort(abs(coeff.full), index.return=TRUE)
barchart(coeff$x,
xlim=c(0, max(abs(coeff.full))+0.1),
xlab=list("Magnitude of effect", cex=1.5),
ylab = list("Effect", cex=1.5),
groups=(coeff.full>0)[coeff$ix], col=c("lightblue", "orange"),
scales=list(cex=1.5)
)
# Eliminate some factors: use the regular linear model notation
# Eliminate C*T*S, C*T and S*T
mod.sub <- lm(y ~ C + T + S + C*S)
summary(mod.sub)
confint(mod.sub)
coeff.sub <- coef(mod.sub)[2:length(coef(mod.sub))]
coeff <- sort(abs(coeff.sub), index.return=TRUE)
barchart(coeff$x,
xlim=c(0, max(abs(coeff.sub))+0.1),
xlab=list("Magnitude of effect", cex=1.5),
ylab = list("Effect", cex=1.5),
groups=(coeff.sub>0)[coeff$ix], col=c("lightblue", "orange"),
scales=list(cex=1.5)
)
Fractional factorial code in R (see Saturated Design example in the slides)
A <- B <- C <- c(-1, +1)
design <- expand.grid(A=A, B=B, C=C)
design
# A B C
# 1 -1 -1 -1
# 2 1 -1 -1
# 3 -1 1 -1
# 4 1 1 -1
# 5 -1 -1 1
# 6 1 -1 1
# 7 -1 1 1
# 8 1 1 1
A <- design$A
B <- design$B
C <- design$C
# Specify the other factors 4: up to 7 factors + intercept can be estimated
# from the 8 experiments.
D <- A*B
E <- A*C
F <- B*C
G <- A*B*C
y <- c(77.1, 68.9, 75.5, 72.5, 67.9, 68.5, 71.5, 63.7)
mod <- lm(y ~ A + B + C + D + E + F + G)
summary(mod)
library(lattice)
coeff.full <- coef(mod)[2:length(coef(mod))]
coeff <- sort(abs(coeff.full), index.return=TRUE)
barchart(coeff$x,
xlim=c(0, max(abs(coeff.full))+0.1),
xlab=list("Magnitude of effect", cex=1.5),
ylab = list("Effect", cex=1.5),
groups=(coeff.full>0)[coeff$ix], col=c("lightblue", "orange"),
scales=list(cex=1.5)
)
# Factors B, D and F are not important. Remove and rebuild.
mod.update.1 <- lm(y ~ A + C + E + G)
confint(mod.update.1)
# The confidence interval for E spans zero. Might be of practical relevance,
# but it is not of statistical relevance.
mod.update.2 <- lm(y ~ A + C + G)
confint(mod.update.2)
Code for contour plots in response surface methods
R | MATLAB |
---|---|
# Initial set of 4 runs
T <- c(-1, +1, -1, +1, 0)
S <- c(-1, -1, +1, +1, 0)
y <- c(193, 310, 468, 571, 407)
# Visualize the surface
# ------------------------------------------------------
mod <- lm(y ~ T + S + T*S)
summary(mod)
N <- 50 # resolution of surface (higher values give smoother plots)
# The lower and upper bounds, in coded units, over which we want
# to visualize the surface
bound <- 3
T_plot <- seq(-bound, bound ,length=N)
S_plot <- seq(-bound, bound, length=N)
grd <- expand.grid(T=T_plot, S=S_plot)
# Predict directly from least squares model
grd$y <- predict(mod, grd)
library(lattice)
contourplot(y ~ T * S,
data = grd,
cuts = 10,
region = TRUE,
col.regions = terrain.colors,
xlab = "Temperature",
ylab = "Substrate",
main = "Predicted response to factors T and S, in coded units"
)
trellis.focus("panel", 1, 1, highlight=FALSE)
lpoints(T, S, pch="O", col="red", cex=3)
llines(c(-1, +1, +1, -1, -1), c(-1, -1, +1, +1, -1), col="red", lwd=3)
lpoints(1, 2.44, col="red", cex=5, pch="*")
ltext(0.4, 2.44, "Expt 5", cex=2, col="red")
trellis.unfocus()
|
X = [+1 +1 +1 +1 +1; ...
-1 +1 -1 +1 0; ...
-1 -1 +1 +1 0; ...
+1 -1 -1 +1 0]';
% Profit values recorded from the factorial experiments
profit = [193 310 468 571 407]';
% Coefficient order = [Intercept, b_T, b_S, b_{TS}]
b = inv(X'*X)*X'*profit
% What does the model surface look like?
[T, S] = meshgrid(-2:0.1:2, -2:0.1:2);
Y = b(1) + b(2).*T + b(3).*S + b(4).*T.*S;
subplot(1, 2, 1)
surf(T, S, Y)
xlabel('T')
ylabel('S')
grid('on')
subplot(1, 2, 2)
contour(T, S, Y)
xlabel('T')
ylabel('S')
grid('on')
axis equal
colorbar
|
Extending the response surface around the next factorial point 6
# Factorial at point 6
T <- c(-1, +1, -1, +1, 0)
S <- c(-1, -1, +1, +1, 0)
y <- c(694, 725, 620, 642, 688)
# Build a least squares model for the local region
mod <- lm(y ~ T + S + T*S)
summary(mod)
next.12 <- data.frame(T=+1, S=-3.0)
pred.12 <- predict(mod, next.12) # predicted $811, but actual was $716: far off!
# Visualize the surface
N <- 100 # resolution of surface (higher values give smoother plots)
# The lower and upper bounds, in coded units, over which we want
# to visualize the surface
bound <- 3.1
T_plot <- seq(-bound, bound ,length=N)
S_plot <- seq(-bound, bound, length=N)
grd <- expand.grid(T=T_plot, S=S_plot)
# Predict directly from least squares model
grd$y <- predict(mod, grd)
library(lattice)
contourplot(y ~ T * S,
data = grd,
cuts = 10,
region = TRUE,
col.regions = terrain.colors,
xlab = "Temperature",
ylab = "Substrate",
main = "Predicted response to factors T and S, in coded units"
)
trellis.focus("panel", 1, 1, highlight=FALSE)
lpoints(T, S, pch="O", col="red", cex=3)
llines(c(-1, +1, +1, -1, -1), c(-1, -1, +1, +1, -1), col="red", lwd=3)
lpoints(1, -3, col="red", cex=5, pch="*")
ltext(1.2, -3, "12", cex=2, col="red")
trellis.unfocus()
# Central composite design: experiments 8 to 11, 6, and 13 to 16 [9 expts]
T <- c(-1, +1, -1, +1, 0, 0, +sqrt(2), 0, -sqrt(2))
S <- c(-1, -1, +1, +1, 0, -sqrt(2), 0, sqrt(2), 0)
y <- c(694, 725, 620, 642, 688, 720, 699, 610, 663)
# Add quadratic terms to the model
mod.quad <- lm(y ~ T + S + T*S + I(S^2) + I(T^2))
summary(mod.quad)
confint(mod.quad)
# Predict directly from least squares model
grd$y <- predict(mod.quad, grd)
library(lattice)
contourplot(y ~ T * S,
data = grd,
cuts = 100, # use more contour lines
region = TRUE,
col.regions = terrain.colors,
xlab = "Temperature",
ylab = "Substrate",
main = "Predicted response to factors T and S, in coded units"
)
trellis.focus("panel", 1, 1, highlight=FALSE)
lpoints(T, S, pch="O", col="red", cex=3)
llines(c(-1, +1, +1, -1, -1), c(-1, -1, +1, +1, -1), col="red", lwd=3)
llines(c(0, 0), c(-sqrt(2), sqrt(2)), col="red", lwd=3)
llines(c(-sqrt(2), sqrt(2)), c(0, 0), col="red", lwd=3)
trellis.unfocus()
# Predicted profit for next experiment, number 17 at
next.17 <- data.frame(T=2.0, S=-1.9)
pred.17 <- predict(mod.quad, next.17) # $736; the actual recorded profit was $738, so close!