I've just started playing around in this comp, so I thought I'd revive the thread and contribute one or two R functions, using what hopefully are fairly obvious names for the data objects. A submission function, a training function for random forest parameters,
a quick variable importance function, and a couple of faster alternatives to table and tapply that may work for you depending on what you are doing.

submit.csv <- function(model, newdata, filename, ...)

{

preds <- predict(model,newdata=newdata, ...)

preds <- exp(preds)-1

if(any(is.na(preds))) stop("missing values in predictions")

if(any(is.infinite(preds))) stop("infinite values in predictions")

if(any(preds < 0)) stop("negative values in predictions")

preds <- data.frame(MemberID = daysY4$MemberID, ClaimsTruncated =

daysY4$ClaimsTruncated, DaysInHospital=round(preds,6))

if(nrow(preds) != 70942) stop("incorrect number of rows")

write.csv(preds, filename, row.names=FALSE, quote=FALSE)

}

rf.train <- function(sampsize=10000,mtry=10,ntree=100,nodesize=50,reps=3)

{

#arguments should be numeric vectors of any length

argm <- expand.grid(sampsize=sampsize,mtry=mtry,ntree=ntree,nodesize=nodesize)

mt <- matrix(NA,ncol=reps,nrow=nrow(argm))

for(i in 1:nrow(argm)) {

for(j in 1:reps) {

rfFita <- randomForest(daysY2MM,TargetY2,ntree=argm[i,3],mtry=argm[i,2],

replace=FALSE,sampsize=argm[i,1],nodesize=argm[i,4],maxnodes=NULL,

xtest=daysY3MM, ytest=TargetY3,

importance=FALSE,localImp=FALSE,keep.forest=TRUE)

mt[i,j] <- sqrt(rfFita$test$mse)[argm[i,3]]

}

}

cbind(argm,round(mt,5))

}

qvarimp <- function(prdata, target, sort=TRUE, ...)

{

# prdata should be numeric matrix with no missing values

# or zero variance variables

# target should have no missing values

impFunc <- function(x, y) abs(coef(summary(lm(y ~ x)))[2, "t value"])

ret <- data.frame(tstat = round(apply(prdata, 2, impFunc, y = target),1))

if(sort) ret <- ret[sort.list(ret$tstat,dec=TRUE),]

ret

}

qtapply <- function (X, INDEX, FUN, ..., simplify = TRUE)

{

#INDEX should be a factor

FUN <- match.fun(FUN)

ans <- lapply(split(X, INDEX), FUN, ...)

if(simplify) ans <- unlist(ans)

ans

}

qtable <- function(fac, names = FALSE)

{

#fac should be a factor

pd <- nlevels(fac)

y <- tabulate(fac, pd)

if(names) names(y) <- levels(fac)

y

}

with —