#### R instruction session for Lynch Lab

#### What is R?
# Open source (and free) implementation of S
# Written, maintained, and expanded by a community of volunteer developers and users
# An object oriented programing environment
# Allows amazing graphs to be produced

# Pros and Cons of CLI
# Cons
# learning curve is steep (can't beat around this)
# syntax not always intuitive (practice helps though)

# Pros
# unlike point and click menu GUIs, you leave a written trace of what you did, so...
# it is easy to save your code from an analysis, and easily adapt to repeat the analysis on new data (almost) automatically
# you can even write your own custom functions for common repeated tasks

#### Resources for using/learning R
# JGR (Java Gui for R) - Java based GUI installs across platforms
# Rseek.org - a google powered search engine for R. Makes finding help easier.
# Statistics: An Introduction using R. Michael J Crawley. 2005. John Wiley and Sons. Good basic intro, I often refer to this.
# Simple R http://cran.r-project.org/doc/contrib/Verzani-SimpleR.pdf
# R help files (try ?lm or ?rep) - they often contain examples with code that you can run, and adapt as needed.
# R graph gallery http://addictedtor.free.fr/graphiques/allgraph.php
# More advanced: Mixed Effects Models and Extensions in Ecology with R (2009). Zuur, Ieno, Walker, Saveliev and Smith. Springer (http://www.highstat.com/book2.htm) - PSU library has pdf version of this.
#
# Installing R - Go to http://cran.r-project.org/ and download the appropriate file
# I suggest installing JGR (http://jgr.markushelbig.org/JGR.html) - installation is not super easy (b/c you have to have a Java run time environment - JRE - installed), but it is worth it.
#
#### Basic R
# Creating and accessing variables
# rep(), seq(), c(), and read.table()
# data types - as.factor(), as.numeric()
# assign operator <- vs ==
#
#### Basic analysis
# Exploratory analysis
## read in data from .txt file
BRGA<-read.table("BRGA-R.txt",sep="\t",header=T)
## find out about what kind of data it is
names(BRGA)
dim(BRGA)
summary(BRGA)
summary(BRGA$Genotype)
summary(BRGA[,1])
BRGA$Genotype<-factor(BRGA$Genotype)
# see what objects exist in my R session
ls()
# make it easy to access BRGA
## ! use with care!
attach(BRGA)

## examin distribution of a variable
hist(W1RGA)

## plot is a generic fucntion
plot(Sample,TotDM.g)
plot(Field,TotDM.g)
plot(Treatment,TotDM.g)
# pretty this up a bit
plot(Treatment,TotDM.g,xlab="Treatment", ylab=)

## 2 panel plot (here used with hist, can be extended to other cases)
op <- par(mfrow = c(1, 2)) # create a 1 row 2 col plot
hist(W1RGA);hist(W2RGA)
par(op) # reset plotting to defaults

# regression
M1<-lm(W2RGA~W1RGA)
summary(M1)
plot(M1)

M2<-lm(TotDM.g~W1RGA); summary(M2)
plot(M2)

lnTotDM<-log(TotDM.g) # create log transformed biomass
M3<-lm(lnTotDM~W1RGA); summary(M3)
plot(M3)

## WARNING: R provides Type I sequential SS, not the default Type III marginal SS reported by SAS and SPSS. In a nonorthogonal design with more than one term on the right hand side of the equation order will matter (i.e., A+B and B+A will produce different results)! We will need use the drop1( ) function to produce the familiar Type III results. It will compare each term with the full model. Alternatively, we can use anova(fit.model1, fit.model2) to compare nested models directly.

## ANCOVA
M4<-lm(TotDM.g~W1RGA*Treatment); summary(M4)
plot(M4)

M5<-lm(lnTotDM~W1RGA*Treatment); summary(M5)
anova(M5,M6) # compare 2 (or more) models

## unbalanced data
BRGA<-BRGA[-(385:388),]

## more complex models (this data has too many NAs)
## so we'll use some fake data
BRGA<-read.table("BRGA-fake-R.txt",sep="\t",header=T)
attach(BRGA)
BRGA$Field<-factor(BRGA$Field)
summary(BRGA)
summary(BRGA$Field)

table(Treatment, Field)
## should we include a block effect?

## add a block effect
Block<-BRGA$Field
summary(Block)
Block<-replace(Block, Block==2,1)
Block<-replace(Block, Block==3|Block==4,2) # | is "or"
Block<-replace(Block, Block==5|Block==6,3)
Block<-replace(Block, Block==7|Block==8,4)
Block<-as.factor(as.numeric(Block))
summary(Block)
table(Field, Block)
BRGA[,11]<-Block # make Block the 11th column
names(BRGA)[11]<-"Block" #add the name
## add log transformed total biomass
lnTotDM<-log(BRGA$TotDM.g)
ncol(BRGA) #how many columns in the data frame
BRGA[,12]<-lnTotDM
names(BRGA2)[12]<-"lnTotDM"

## add the block to our ANCOVA
M6<-lm(lnTotDM~Block+Treatment*W1RGA);summary(M6)

## here we recognize that this is a split plot
## user beware! Know what your error terms should be!!
M7<-aov(lnTotDM~Treatment*Genotype + Error(Block/Treatment), data=BRGA); summary(M7)

## split plot with the root angle covariate
M8<-aov(lnTotDM~Treatment*Genotype + W1RGA + Error(Block/Treatment), data=BRGA); summary(M8)

#split plot with a random block effect
M9<-lme(lnTotDM~Treatment*W1RGA,random= ~1|Block,na.action=na.omit,data=BRGA); summary(M9)

Spotlight