Difference between revisions of "Design and analysis of experiments (2014)"

From Statistics for Engineering
Jump to navigation Jump to search
 
(6 intermediate revisions by the same user not shown)
Line 1: Line 1:
__NOTOC__{{ClassSidebar
__NOTOC__{{ClassSidebarYouTube
| date = 27 February to 2731March 2014
| date = 27 February to 31March 2014
| dates_alt_text =  
| dates_alt_text =  
| vimeoID1 = 87882235
| vimeoID1 = Pe2KF9YOHXk
| vimeoID2 = 88125382
| vimeoID2 = d1sOgXVKO8E
| vimeoID3 = 88302027
| vimeoID3 = vkxiLRpB4Kw
| vimeoID4 = 88402005
| vimeoID4 = SqfsZ1AINsY
| vimeoID5 = 88755732
| vimeoID5 = MFs4Hym2Fao
| vimeoID6 = 89095717
| vimeoID6 = DaEVzZuR5mk
| vimeoID7 = 89425542
| vimeoID7 = APpAFh8Iv2A
| vimeoID8 = 89560903
| vimeoID8 = E9VyHoz1RLQ
| vimeoID9 = 89710171
| vimeoID9 = mlea4XngK5M
| vimeoID10 = 90119700
| vimeoID10 = 0Fx3l2Tnk8s
| vimeoID11 = 90230249
| vimeoID11 = kRcCawq8Aho
| vimeoID12 = 90274891
| vimeoID12 = LTvwsunC7zo
| vimeoID13 =  
| vimeoID13 = tkLF3120OOE
| course_notes_PDF = 2014-4C3-6C3-Design-and-analysis-of-Experiments.pdf
| course_notes_PDF = 2014-4C3-6C3-Design-and-analysis-of-Experiments.pdf
| course_notes_alt = Course slides
| course_notes_alt = Course slides
Line 60: Line 60:
| video_notes13 =
| video_notes13 =
}}
}}
<span style="color:#900000">{{Huge|This page is out of date.}}</span> {{Huge|Please see the [[Design_and_analysis_of_experiments |latest version of these notes]].}}


= Course notes and slides =
= Course notes and slides =
Line 80: Line 85:
| [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-07C.mp3 Audio] (40 M)
| [http://learnche.mcmaster.ca/media/2014-4C3-6C3-Class-07C.mp3 Audio] (40 M)
| None
| None
| rowspan="13"|[http://learnche.mcmaster.ca/pid/?source=DOE Chapter 5]
| rowspan="13"|[http://learnche.org/pid/?source=DOE Chapter 5]
| rowspan="13"|[[Image:Nuvola_mimetypes_pdf.png|20px|link=Media:2014-4C3-6C3-Design-and-analysis-of-Experiments.pdf]] [[Media:2014-4C3-6C3-Design-and-analysis-of-Experiments.pdf|Slides for class]]  
| rowspan="13"|[[Image:Nuvola_mimetypes_pdf.png|20px|link=Media:2014-4C3-6C3-Design-and-analysis-of-Experiments.pdf]] [[Media:2014-4C3-6C3-Design-and-analysis-of-Experiments.pdf|Slides for class]]  
|-
|-
Line 159: Line 164:
Source code to estimate the DOE model
Source code to estimate the DOE model


<syntaxhighlight lang="sas">
<syntaxhighlight lang="R">
T <- c(-1, +1, -1, +1)  # centered and scaled temperature
T <- c(-1, +1, -1, +1)  # centered and scaled temperature
S <- c(-1, -1, +1, +1)  # centered and scaled substrate concentration
S <- c(-1, -1, +1, +1)  # centered and scaled substrate concentration
Line 275: Line 280:


== Class 08C, 06 March ==
== Class 08C, 06 March ==
<syntaxhighlight lang="rsplus">
<syntaxhighlight lang="R">
# Create the design matrix in a quick way in R
# Create the design matrix in a quick way in R
C <- T <- S <- c(-1, +1)
x <- c(-1, +1)
design <- expand.grid(C=C, T=T, S=S)
design <- expand.grid(C=x, T=x, S=x)
design
design
   C  T  S
   C  T  S
Line 299: Line 304:


# This powerful notation will expand all terms up to the 3rd order interactions
# This powerful notation will expand all terms up to the 3rd order interactions
mod.full <- lm( y ~ (C + T + S)^3 )  
mod.full <- lm( y ~ C*T*S )  


summary(mod.full)
summary(mod.full)
Line 351: Line 356:
== Class 10A, 17 March ==
== Class 10A, 17 March ==
To find all the 2-factor and 3-factor coefficients in a linear model
To find all the 2-factor and 3-factor coefficients in a linear model
<syntaxhighlight lang="rsplus">
<syntaxhighlight lang="R">
terms.formula( y ~ (A+B+C+D)^2 )
terms.formula( y ~ (A+B+C+D)^2 )
terms.formula( y ~ (A+B+C+D)^3 )
terms.formula( y ~ (A+B+C+D)^3 )
Line 365: Line 370:
|-
|-
|  
|  
<syntaxhighlight lang="rsplus">
<syntaxhighlight lang="R">
# Initial set of 4 runs
# Initial set of 4 runs
T <- c(-1,    +1,  -1,  +1,    0)
T <- c(-1,    +1,  -1,  +1,    0)
Line 448: Line 453:
== Class 11C, 27 March ==
== Class 11C, 27 March ==


<syntaxhighlight lang="rsplus">
<syntaxhighlight lang="R">
# Factorial at point 6
# Factorial at point 6
T <- c(-1,    +1,  -1,  +1,    0)
T <- c(-1,    +1,  -1,  +1,    0)

Latest revision as of 05:52, 20 September 2018

Class date(s): 27 February to 31March 2014
Nuvola mimetypes pdf.png (PDF) Course slides














This page is out of date. Please see the latest version of these notes.


Course notes and slides

Date Class number Video and audio files Other materials Reading (PID) Slides
27 February 07C Video (227 M) Audio (40 M) None Chapter 5 Nuvola mimetypes pdf.png Slides for class
03 March 08A Video (299 M) Audio (40 M) None
05 March 08B Video (296 M) Audio (42 M) None
06 March 08C Video (286 M) Audio (42 M) See the code and derivations below
10 March 09A Video (311 M) Audio (42 M)
13 March 09B Video (296 M) Audio (42 M)
17 March 10A Video (280 M) Audio (42 M) See the source code below.
19 March 10B Video (305 M) Audio (42 M) None
20 March 10C Video (301 M) Audio (42 M) None
24 March 11A Video (309 M) Audio (43 M) None
26 March 11B Video (245 M) Audio (42 M) See the source code below.
27 March 11C Video (251M) Audio (M)
27 March 12A Video (118 M) Audio (20 M) None

Software source code

Class 08C, 06 March

Source code to estimate the DOE model

T <- c(-1, +1, -1, +1)  # centered and scaled temperature
S <- c(-1, -1, +1, +1)  # centered and scaled substrate concentration
y <- c(69, 60, 64, 53)  # conversion is the response, y
mod <- lm(y ~ T + S + T * S)
summary(mod)

Call:
lm(formula = y ~ T + S + T * S)

Residuals:
ALL 4 residuals are 0: no residual degrees of freedom!

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept)     61.5         NA      NA       NA
T               -5.0         NA      NA       NA
S               -3.0         NA      NA       NA
T:S             -0.5         NA      NA       NA

Residual standard error: NaN on 0 degrees of freedom
Multiple R-squared:     1,	Adjusted R-squared:   NaN 
F-statistic:   NaN on 3 and 0 DF,  p-value: NA

Class 08C, 06 March

3-factor example <rst> <rst-options: 'toc' = False/> The data are from a plastics molding factory which must treat its waste before discharge. The :math:`y` variable represents the average amount of pollutant discharged [lb per day], while the 3 factors that were varied were:

	-	:math:`C`: the chemical compound added [A or B]

- :math:`T`: the treatment temperature [72°F or 100°F] - :math:`S`: the stirring speed [200 rpm or 400 rpm] - :math:`y`: the amount of pollutant discharged [lb per day]

.. tabularcolumns:: |l|l||c|c|c||c|

+-----------+-------+---------------+-----------------+-----------------+-----------------+ | Experiment| Order | :math:`C` | :math:`T` [°F] | :math:`S` [rpm] | :math:`y` [lb] | +===========+=======+===============+=================+=================+=================+ | 1 | 5 | A | 72 | 200 | 5 | +-----------+-------+---------------+-----------------+-----------------+-----------------+ | 2 | 6 | B | 72 | 200 | 30 | +-----------+-------+---------------+-----------------+-----------------+-----------------+ | 3 | 1 | A | 100 | 200 | 6 | +-----------+-------+---------------+-----------------+-----------------+-----------------+ | 4 | 4 | B | 100 | 200 | 33 | +-----------+-------+---------------+-----------------+-----------------+-----------------+ | 5 | 2 | A | 72 | 400 | 4 | +-----------+-------+---------------+-----------------+-----------------+-----------------+ | 6 | 7 | B | 72 | 400 | 3 | +-----------+-------+---------------+-----------------+-----------------+-----------------+ | 7 | 3 | A | 100 | 400 | 5 | +-----------+-------+---------------+-----------------+-----------------+-----------------+ | 8 | 8 | B | 100 | 400 | 4 | +-----------+-------+---------------+-----------------+-----------------+-----------------+

We showed the cube plot for this system in the class on 10 March. From the cube plot we could already see the main factors, and even the **CS** interaction was noticeable.

  • **C effect**: There are 4 estimates of :math:`C = \displaystyle \frac{(+25) + (+27) + (-1) + (-1)}{4} = \frac{50}{4} = \bf{12.5}`
  • **T effect**: There are 4 estimates of :math:`T = \displaystyle \frac{(+1) + (+3) + (+1) + (+1)}{4} = \frac{6}{4} = \bf{1.5}`
  • **S effect**: There are 4 estimates of :math:`S = \displaystyle \frac{(-27) + (-1) + (-29) + (-1)}{4} = \frac{-58}{4} = \bf{-14.5}`
  • **CT interaction**: There are 2 estimates of :math:`CT`. Recall that interactions are calculated as the half difference going from high to low. Consider the change in :math:`C` when

- :math:`T_\text{high}` (at :math:`S` high) = 4 - 5 = -1 - :math:`T_\text{low}` (at :math:`S` high) = 3 - 4 = -1 - First estimate = [(-1) - (-1)]/2 = 0 - :math:`T_\text{high}` (at :math:`S` low) = 33 - 6 = +27 - :math:`T_\text{low}` (at :math:`S` low) = 30 - 5 = +25 - Second estimate = [(+27) - (+25)]/2 = +1

- Average **CT** interaction = (0 + 1)/2 = **0.5** - You can interchange :math:`C` and :math:`T` and still get the same result.

  • **CS interaction**: There are 2 estimates of :math:`CS`. Consider the change in :math:`C` when

- :math:`S_\text{high}` (at :math:`T` high) = 4 - 5 = -1 - :math:`S_\text{low}` (at :math:`T` high) = 33 - 6 = +27 - First estimate = [(-1) - (+27)]/2 = -14 - :math:`S_\text{high}` (at :math:`T` low) = 3 - 4 = -1 - :math:`S_\text{low}` (at :math:`T` low) = 30 - 5 = +25 - Second estimate = [(-1) - (+25)]/2 = -13

- Average **CS** interaction = (-13 - 14)/2 = **-13.5** - You can interchange :math:`C` and :math:`S` and still get the same result.

  • **ST interaction**: There are 2 estimates of :math:`ST`: (-1 + 0)/2 = **-0.5**, calculate in the same way as above.
  • **CTS interaction**: There is only a single estimate of :math:`CTS`:

- :math:`CT` effect at high :math:`S` = 0 - :math:`CT` effect at low :math:`S` = +1 - :math:`CTS` interaction = [(0) - (+1)] / 2 = **-0.5**

- You can calculate this also by considering the :math:`CS` effect at the two levels of :math:`T` - Or, you can calculate this by considering the :math:`ST` effect at the two levels of :math:`C`. - All 3 approaches give the same result.


Next, use computer software to verify that

.. math::

y = 11.25 + 6.25x_C + 0.75x_T -7.25x_S + 0.25 x_C x_T -6.75 x_C x_S -0.25 x_T x_S - 0.25 x_C x_T x_S


The :math:`\mathbf{X}` matrix and :math:`\mathbf{y}` vector used to calculate the least squares model:

.. math::

\begin{bmatrix} 5\\30\\6\\33\\4\\3\\5\\4 \end{bmatrix} &= \begin{bmatrix} +1 & -1 & -1 & -1 & +1 & +1 & +1 & -1\\ +1 & +1 & -1 & -1 & -1 & -1 & +1 & +1\\ +1 & -1 & +1 & -1 & -1 & +1 & -1 & +1\\ +1 & +1 & +1 & -1 & +1 & -1 & -1 & -1\\ +1 & -1 & -1 & +1 & +1 & -1 & -1 & +1\\ +1 & +1 & -1 & +1 & -1 & +1 & -1 & -1\\ +1 & -1 & +1 & +1 & -1 & -1 & +1 & -1\\ +1 & +1 & +1 & +1 & +1 & +1 & +1 & +1\\ \end{bmatrix} \begin{bmatrix} b_0 \\ b_C \\ b_T \\ b_{S} \\ b_{CT} \\ b_{CS} \\ b_{TS} \\ b_{CTS} \end{bmatrix} \\ \mathbf{y} &= \mathbf{X} \mathbf{b} </rst>

Class 08C, 06 March

# Create the design matrix in a quick way in R
x <- c(-1, +1)
design <- expand.grid(C=x, T=x, S=x)
design
   C  T  S
1 -1 -1 -1
2  1 -1 -1
3 -1  1 -1
4  1  1 -1
5 -1 -1  1
6  1 -1  1
7 -1  1  1
8  1  1  1

C <- design$C
T <- design$T
S <- design$S
y <- c(5, 30, 6, 33, 4, 3, 5, 4)

# Full factorial model (you could make errors typing this in)
mod.full <- lm(y ~ C + T + S + C*T + C*S + S*T + C*T*S)

# This powerful notation will expand all terms up to the 3rd order interactions
mod.full <- lm( y ~ C*T*S ) 

summary(mod.full)
Call:
lm(formula = y ~ C + T + S + C * T + C * S + S * T + C * T * S)

Residuals:
ALL 8 residuals are 0: no residual degrees of freedom!

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept)    11.25         NA      NA       NA
C               6.25         NA      NA       NA
T               0.75         NA      NA       NA
S              -7.25         NA      NA       NA
C:T             0.25         NA      NA       NA
C:S            -6.75         NA      NA       NA
T:S            -0.25         NA      NA       NA
C:T:S          -0.25         NA      NA       NA

# Guide to estimating significant coefficients, ignoring the intercept (1st coefficient)
coeff.full <- coef(mod.full)[2:length(coef(mod.full))]

# Pareto plot of the absolute coefficients
library(lattice)
coeff <- sort(abs(coeff.full), index.return=TRUE)
barchart(coeff$x, 
         xlim=c(0, max(abs(coeff.full))+0.1),
         xlab=list("Magnitude of effect", cex=1.5), 
         ylab = list("Effect", cex=1.5),
         groups=(coeff.full>0)[coeff$ix], col=c("lightblue", "orange"),
         scales=list(cex=1.5)
)

# Eliminate some factors: use the regular linear model notation
# Eliminate C*T*S, C*T and S*T
mod.sub <- lm(y ~ C + T + S + C*S)
summary(mod.sub)
confint(mod.sub)
coeff.sub <- coef(mod.sub)[2:length(coef(mod.sub))]
coeff <- sort(abs(coeff.sub), index.return=TRUE)
barchart(coeff$x, 
         xlim=c(0, max(abs(coeff.sub))+0.1),
         xlab=list("Magnitude of effect", cex=1.5), 
         ylab = list("Effect", cex=1.5),
         groups=(coeff.sub>0)[coeff$ix], col=c("lightblue", "orange"),
         scales=list(cex=1.5)
)

Class 10A, 17 March

To find all the 2-factor and 3-factor coefficients in a linear model

terms.formula( y ~ (A+B+C+D)^2 )
terms.formula( y ~ (A+B+C+D)^3 )

Class 11B, 26 March

Code for contour plots in response surface methods

R MATLAB
# Initial set of 4 runs
T <- c(-1,    +1,   -1,   +1,    0)
S <- c(-1,    -1,   +1,   +1,    0)
y <- c(193,  310,  468,  571,  407)

# Visualize the surface
# ------------------------------------------------------
mod <- lm(y ~ T + S + T*S)
summary(mod)

N <- 50  # resolution of surface
         #  (higher values give smoother plots)

# The lower and upper bounds, in coded units, 
# over which we want
# to visualize the surface
bound <- 3 
T_plot <- seq(-bound, bound ,length=N)
S_plot <- seq(-bound, bound, length=N)
grd <- expand.grid(T=T_plot, S=S_plot)

# Predict directly from least squares model
grd$y <- predict(mod, grd)

library(lattice)
contourplot(y ~ T * S, 
            data = grd,
            cuts = 10, 
            region = TRUE,
            col.regions = terrain.colors,
            xlab = "Temperature",
            ylab = "Substrate",
            main = "Predicted response"
)

trellis.focus("panel", 1, 1, highlight=FALSE)
lpoints(T, S, pch="O", col="red", cex=3)
llines(c(-1, +1, +1, -1, -1), c(-1, -1, +1, +1, -1), 
    col="red", lwd=3)
lpoints(1, 2.44, col="red", cex=5, pch="*")
ltext(0.4, 2.44, "Expt 5", cex=2, col="red")

lpoints(2, 4.88, col="red", cex=5, pch="*")
ltext(1.2, 4.88, "Expt 6", cex=2, col="red")

trellis.unfocus()
X = [+1 +1 +1 +1 +1; ...
     -1 +1 -1 +1 0; ...
     -1 -1 +1 +1 0; ...
     +1 -1 -1 +1 0]';
 
% Profit values recorded from the factorial experiments
profit = [193 310 468 571 407]';
% Coefficient order = [Intercept, b_T, b_S, b_{TS}]
b = inv(X'*X)*X'*profit
 
% What does the model surface look like?
[T, S] = meshgrid(-2:0.1:2, -2:0.1:2);
Y = b(1) + b(2).*T + b(3).*S + b(4).*T.*S;
 
subplot(1, 2, 1)
surf(T, S, Y)
xlabel('T')
ylabel('S')
grid('on')
 
subplot(1, 2, 2)
contour(T, S, Y)
xlabel('T')
ylabel('S')
grid('on')
axis equal
colorbar

Class 11C, 27 March

# Factorial at point 6
T <- c(-1,    +1,   -1,   +1,    0)
S <- c(-1,    -1,   +1,   +1,    0)
y <- c(694, 725, 620, 642, 688)

# Build a least squares model for the local region
mod <- lm(y ~ T + S + T*S)
summary(mod)

next.12 <- data.frame(T=+1, S=-3.0)
pred.12 <- predict(mod, next.12)   # predicted $811, but actual was $716: far off!

# Visualize the surface
N <- 50  # resolution of surface (higher values give smoother plots)

# The lower and upper bounds, in coded units, over which we want
# to visualize the surface
bound <- 3.1
T_plot <- seq(-bound, bound ,length=N)
S_plot <- seq(-bound, bound, length=N)
grd <- expand.grid(T=T_plot, S=S_plot)

# Predict directly from least squares model
grd$y <- predict(mod, grd)

library(lattice)
contourplot(y ~ T * S, 
            data = grd,
            cuts = 10, 
            region = TRUE,
            col.regions = terrain.colors,
            xlab = "Temperature",
            ylab = "Substrate",
            main = "Predicted response to factors T and S, in coded units"
)

trellis.focus("panel", 1, 1, highlight=FALSE)
lpoints(T, S, pch="O", col="red", cex=3)
llines(c(-1, +1, +1, -1, -1), c(-1, -1, +1, +1, -1), col="red", lwd=3)
lpoints(1, -3, col="red", cex=5, pch="*")
ltext(1.2, -3, "12", cex=2, col="red")



# Central composite design: experiments 8 to 11, 6, and 13 to 16 [9 expts]

T <- c(-1,    +1,   -1,   +1,    0,        0, +sqrt(2),       0, -sqrt(2))
S <- c(-1,    -1,   +1,   +1,    0, -sqrt(2),        0, sqrt(2),        0)
y <- c(694,  725,  620,  642,  688,      720,      699,     610,      663)

# Add quadratic terms to the model
mod.quad <- lm(y ~ T + S + T*S + I(S^2) + I(T^2))
summary(mod.quad)
confint(mod.quad)

# Predict directly from least squares model
grd$y <- predict(mod.quad, grd)
library(lattice)
contourplot(y ~ T * S, 
            data = grd,
            cuts = 50,  # use more contour lines 
            region = TRUE,
            col.regions = terrain.colors,
            xlab = "Temperature",
            ylab = "Substrate",
            main = "Predicted response to factors T and S, in coded units"
)
trellis.focus("panel", 1, 1, highlight=FALSE)
lpoints(T, S, pch="O", col="red", cex=3)
llines(c(-1, +1, +1, -1, -1), c(-1, -1, +1, +1, -1), col="red", lwd=3)
llines(c(0, 0), c(-sqrt(2), sqrt(2)), col="red", lwd=3)
llines(c(-sqrt(2), sqrt(2)), c(0, 0),  col="red", lwd=3)
#trellis.unfocus()

# Predicted profit for next experiment, number 17 at
next.17 <- data.frame(T=2.0, S=-2.0)
pred.17 <- predict(mod.quad, next.17)   # $736; the actual recorded profit was $738, so close!