Project 1. The following data is the estimated number of plankton caught in six hauls each with two nets,

Kind I      4    7   23  223    2   75    4   35    2   30
Kind II   257  225  554  707 2450  151 1070  616  399  384
Kind III  531  410  834  232  103  821  201  233  873 1072

Rweb:

SprVsLevel <- function(...) {

 x <- list(...)

 z <- sapply(x, function(z){u<-letval2(z);c(u[1,1],u[2,5])})

 z <- t(z)

 b <- lsfit(log(z[, 1]), log(z[, 2]))$coef

 boxplot(...)

 plot(log(z))

 abline(b)

lz <- list()

for(i in 1:length(x)) lz[[i]] <- x[[i]]^(1-b[2])

# lz <- sapply(x, function(u, uu=1-b[2]) u^uu)

 boxplot(lz)

 1 - b[2]

}

 

letval2 <- function(x, k = 4) {

 LV <- c("M", "F", "E", "D", "C", "B", "A", "Z", "Y", "X", "W")

 out <- array(NA, c(k, 6))

 lx <- rx <- sort(x)

 dimnames(out) <- list(LV[1:k],c("LOWER","UPPER","DEPTH","MID","SPREAD","TAIL"))

 for(i in 1:k) {

  out[i, 1:2] <- c(median(lx), median(rx))

  nn <- (length(lx) + 1)/2

  lx <- lx[1:nn]

  rx <- rev(rev(rx)[1:nn])

  out[i, 3] <- nn

 }

 out[, 4] <- (out[, 1] + out[, 2])/2

 out[, 5] <- out[, 2] - out[, 1]

 out[, 6] <- c(0, out[-1, 5]/2/qnorm(1 - 1/2^(2:k)))

 out

}

 

par(mfrow=c(2,2))

x1 <- c(4  ,  7  , 23 , 223    ,2   ,75    ,4,35,2,30)

x2 <- c(257,225, 554,707,2450,151 ,1070  ,616,399,384)

x3 <- c(531,410,834,232,103,821,201,233,873,1072)

SprVsLevel(x1,x2,x3)

letval2(x1)

letval2(x2)

letval2(x3)

SprVsLevel(x1^.5,x2^.5,x3^.5)

letval2(x1^0.5)

letval2(x2^0.5)

letval2(x3^0.5)

SAS:

PROC IML;

START LETVAL;

/* DATA IS IN X, M IS THE NUMBER OF LETTER VALUES*/

M = 6;

N = NROW(X);

 

/* SORT X */

A = X;

X[RANK(X),]= A ;

 

/* SET THE TABLE OF LETTER VALUES */

LV = REPEAT(0,M,6);

 

/* D IS THE LENGTH OF THE SUBSAMPLE FOR THE LV */

D = N;

DO I=1 TO M;

     IF(D > 0) THEN DO;

 

/* SORT THE RIGHT SUBSET */

           B = X[1:D,];

 

/* SORT THE LEFT  SUBSET */

           C = X[(N-D+1):N,];

 

/* THE MEDIANS*/

           DD = INT((D+1)/2);

        IF( D = (2*DD -1)) THEN DO;

           LV[I,1] = B[DD];

           LV[I,3] = C[DD];

        END;

        ELSE DO;

           LV[I,1] = (B[DD]+B[DD+1])/2;

           LV[I,3] = (C[DD]+C[DD+1])/2;

        END;

 

/*CALCULATE THE MID VALUES*/

        LV[I, 2] = (LV[I, 1] + LV[I, 3])/2;

 

/*CALCULATE THE SPREAD*/

        LV[I, 4] = LV[I, 3] - LV[I, 1];

        LV[I, 5] = LV[I, 2] - LV[1, 1] ;

        LV[I, 6] = ((LV[I, 1] - LV[1, 1])**2 + (LV[I, 3] - LV[1, 1])**2)/

                                      4/LV[1, 1];

        D = DD;

     END;

 

END;

FINISH;

 

/* INITIALIZE THINGS */

h1 = {4 ,7,23,223,2,75,4,35,2,30};

h2 = {257,225, 554,707,2450,151,1070,616,399,384};

h3 = {531,410,834,232,103,821,201,233,873,1072};

XY = REPEAT(0,3,2);

 

/* COMPUTE THE LETTER VALUE DISPLAYS */

X =  h1;

run letval;

RN = {"M" "F" "E" "D" "C"} ;

CN = {"LOWER" "MID" "UPPER" "SPREAD" "Y" "X"};

PRINT LV[ ROWNAME=RN COLNAME=CN];

XY[1,1] = LV[1,2];

XY[1,2] = LV[2,4];

 

X = h2;

run letval;

RN = {"M" "F" "E" "D" "C"} ;

CN = {"LOWER" "MID" "UPPER" "SPREAD" "Y" "X"};

PRINT LV[ ROWNAME=RN COLNAME=CN];

XY[2,1] = LV[1,2];

XY[2,2] = LV[2,4];

X = h3;

run letval;

RN = {"M" "F" "E" "D" "C"} ;

CN = {"LOWER" "MID" "UPPER" "SPREAD" "Y" "X"};

PRINT LV[ ROWNAME=RN COLNAME=CN];

XY[3,1] = LV[1,2];

XY[3,2] = LV[2,4];

call gstart;

run gxyplot(log(XY[,1]) , log(XY[,2]));

* run gxyplot(log(LV[,6]) , log(LV[,5]));

run;

QUIT;

Method for calculating p for the power transformation:
For several letter values define

Y = ((XU - M)2 - (M- XL)2 )/(4M)
Z = (XU + XL)/2 - M

 Plot Y VS Z

b is the slope of Y vs Z

 If the result is linear then p = 1-b is the power for the tansformation:

        T(X) = Xp     or maybe   k Xp

Rweb:

letval2 <- function(x, k = 4)

{

  LV <- c("M", "F", "E", "D", "C", "B", "A", "Z", "Y", "X", "W")

  out <- array(NA, c(k, 6))

  lx <- rx <- sort(x)

  dimnames(out) <- list(LV[1:k],c("LOWER","UPPER","DEPTH","MID","SPREAD","TAIL"))

  for(i in 1:k) {

    out[i, 1:2] <- c(median(lx), median(rx))

    nn <- (length(lx) + 1)/2

    lx <- lx[1:nn]

    rx <- rev(rev(rx)[1:nn])

    out[i, 3] <- nn

   }

  out[, 4] <- (out[, 1] + out[, 2])/2

  out[, 5] <- out[, 2] - out[, 1]

  out[, 6] <- c(0, out[-1, 5]/2/qnorm(1 - 1/2^(2:k)))

  out

}

sym <- function(x, k = 7) {

        z <- letval2(x, k)

        M <- z[1,1]

        u <- ((z[-1,2]-M)^2 + (z[-1,1]-M)^2)/(4*M)

        v <- z[-1, 4] - M

        plot(u, v)

        b <- lsfit(u, v, int = F)$coef

        abline(0, b)

        hist(x^(1 - b))

        1 - b

}

par(mfrow=c(2,2))

data(rivers)

hist(rivers)

sym(rivers,4)

hist(log(rivers))