Project 1. The following data is the estimated number of plankton caught in six hauls each with two nets,
Kind I 4 7 23 223 2 75 4 35 2 30
Kind II 257 225 554 707 2450 151 1070 616 399 384
Kind III 531 410 834 232 103 821 201 233 873 1072
Rweb:
SprVsLevel <- function(...)
{
x <- list(...)
z <- sapply(x, function(z){u<-letval2(z);c(u[1,1],u[2,5])})
z <- t(z)
b <- lsfit(log(z[, 1]), log(z[, 2]))$coef
boxplot(...)
plot(log(z))
abline(b)
lz <- list()
for(i in 1:length(x)) lz[[i]]
<- x[[i]]^(1-b[2])
# lz <- sapply(x,
function(u, uu=1-b[2]) u^uu)
boxplot(lz)
1 - b[2]
}
letval2 <- function(x, k =
4) {
LV <- c("M", "F", "E",
"D", "C", "B", "A", "Z",
"Y", "X", "W")
out <- array(NA, c(k, 6))
lx <- rx <- sort(x)
dimnames(out) <-
list(LV[1:k],c("LOWER","UPPER","DEPTH","MID","SPREAD","TAIL"))
for(i in 1:k) {
out[i, 1:2] <- c(median(lx), median(rx))
nn <- (length(lx) + 1)/2
lx <- lx[1:nn]
rx <- rev(rev(rx)[1:nn])
out[i, 3] <- nn
}
out[, 4] <- (out[, 1] + out[, 2])/2
out[, 5] <- out[, 2] - out[, 1]
out[, 6] <- c(0, out[-1, 5]/2/qnorm(1 - 1/2^(2:k)))
out
}
par(mfrow=c(2,2))
x1 <- c(4 ,
7 , 23 , 223 ,2
,75 ,4,35,2,30)
x2 <- c(257,225,
554,707,2450,151 ,1070 ,616,399,384)
x3 <-
c(531,410,834,232,103,821,201,233,873,1072)
SprVsLevel(x1,x2,x3)
letval2(x1)
letval2(x2)
letval2(x3)
SprVsLevel(x1^.5,x2^.5,x3^.5)
letval2(x1^0.5)
letval2(x2^0.5)
letval2(x3^0.5)
SAS:
PROC IML;
START LETVAL;
/* DATA IS IN X, M IS THE NUMBER OF
LETTER VALUES*/
M = 6;
N = NROW(X);
/* SORT X */
A = X;
X[RANK(X),]= A ;
/* SET THE TABLE OF LETTER VALUES
*/
LV = REPEAT(0,M,6);
/* D IS THE LENGTH OF THE SUBSAMPLE
FOR THE LV */
D = N;
DO I=1 TO M;
IF(D > 0) THEN DO;
/* SORT THE RIGHT SUBSET */
B = X[1:D,];
/* SORT THE LEFT SUBSET */
C = X[(N-D+1):N,];
/* THE MEDIANS*/
DD = INT((D+1)/2);
IF( D = (2*DD -1)) THEN DO;
LV[I,1] = B[DD];
LV[I,3] = C[DD];
END;
ELSE DO;
LV[I,1] = (B[DD]+B[DD+1])/2;
LV[I,3] = (C[DD]+C[DD+1])/2;
END;
/*CALCULATE THE MID VALUES*/
LV[I, 2] = (LV[I, 1] + LV[I, 3])/2;
/*CALCULATE THE SPREAD*/
LV[I, 4] = LV[I, 3] - LV[I, 1];
LV[I, 5] = LV[I, 2] - LV[1, 1] ;
LV[I, 6] = ((LV[I, 1] - LV[1, 1])**2 + (LV[I, 3] - LV[1,
1])**2)/
4/LV[1, 1];
D = DD;
END;
END;
FINISH;
/* INITIALIZE THINGS */
h1 = {4 ,7,23,223,2,75,4,35,2,30};
h2 = {257,225,
554,707,2450,151,1070,616,399,384};
h3 =
{531,410,834,232,103,821,201,233,873,1072};
XY = REPEAT(0,3,2);
/* COMPUTE THE LETTER VALUE
DISPLAYS */
X = h1;
run letval;
RN = {"M" "F"
"E" "D" "C"} ;
CN = {"LOWER"
"MID" "UPPER" "SPREAD" "Y" "X"};
PRINT LV[ ROWNAME=RN COLNAME=CN];
XY[1,1] = LV[1,2];
XY[1,2] = LV[2,4];
X = h2;
run letval;
RN = {"M" "F"
"E" "D" "C"} ;
CN = {"LOWER"
"MID" "UPPER" "SPREAD" "Y" "X"};
PRINT LV[ ROWNAME=RN COLNAME=CN];
XY[2,1] = LV[1,2];
XY[2,2] = LV[2,4];
X = h3;
run letval;
RN = {"M" "F"
"E" "D" "C"} ;
CN = {"LOWER"
"MID" "UPPER" "SPREAD" "Y" "X"};
PRINT LV[ ROWNAME=RN COLNAME=CN];
XY[3,1] = LV[1,2];
XY[3,2] = LV[2,4];
call gstart;
run gxyplot(log(XY[,1]) , log(XY[,2]));
* run gxyplot(log(LV[,6]) , log(LV[,5]));
run;
QUIT;
Method for calculating
p for the power transformation:
For several letter values define
Y = ((XU - M)2 - (M- XL)2
)/(4M)
Z = (XU + XL)/2 - M
Plot Y VS Z
b is the slope of Y vs Z
If the result is linear then p = 1-b is the power for the
tansformation:
T(X) = Xp or
maybe k Xp
Rweb:
letval2 <-
function(x, k = 4)
{
LV <- c("M", "F",
"E", "D", "C", "B", "A",
"Z", "Y", "X", "W")
out <- array(NA, c(k, 6))
lx <- rx <- sort(x)
dimnames(out) <-
list(LV[1:k],c("LOWER","UPPER","DEPTH","MID","SPREAD","TAIL"))
for(i in 1:k) {
out[i, 1:2] <- c(median(lx),
median(rx))
nn <- (length(lx) + 1)/2
lx <- lx[1:nn]
rx <- rev(rev(rx)[1:nn])
out[i, 3] <- nn
}
out[, 4] <- (out[, 1] + out[, 2])/2
out[, 5] <- out[, 2] - out[, 1]
out[, 6] <- c(0, out[-1, 5]/2/qnorm(1 -
1/2^(2:k)))
out
}
sym <-
function(x, k = 7) {
z
<- letval2(x, k)
M <- z[1,1]
u
<- ((z[-1,2]-M)^2 + (z[-1,1]-M)^2)/(4*M)
v
<- z[-1, 4] - M
plot(u,
v)
b
<- lsfit(u, v, int = F)$coef
abline(0,
b)
hist(x^(1
- b))
1
- b
}
par(mfrow=c(2,2))
data(rivers)
hist(rivers)
sym(rivers,4)
hist(log(rivers))