R Notes

The following are some notes on the syntax, commands, etc. for R programming and plotting.


           R PROGRAMMING AND SYNTAX
---Directories----
getwd()               # print the working directory
setwd("test")         # sets working directory
dir.create("test")    # Like unix 'mkdir test'
list.files()          # Like unix ls
system("ls")          # Run a command from the OS
sessionInfo()         # packages loaded, version and so on
citation()            # to cite the R package

----Help----
help(seq)             # prints help documentation for command 'seq'
?seq                  # another way to invoke help 
??seq                 # look for 'seq' throughout help pages

----Run R commands from a file---
source("command_file") # it is ok to split a long command onto two lines. '#' is a comment

----Assigning variables-----
x=2                   #for a single variable
x <- c(23.,-12.,43.5) # The c() function assigns numbers to a vector
x=c(23.,-12,43.5)     # or do it this way
y=c(x,1,2*x)          # does what you think it should, y has 7 elements if x has 3
z=y-x                 # also has 7 elements. the 3 elements x get repeated to fill up the vector
z=1:10                # assigns values 1 through 10 to vector z
z=seq(1,10,by=2)      # odd numbers 1 through 10
z=seq(40,1,-5)        # start at 40, stop when <=1, increments of -5
z=seq(0.05,3,len=500) # uniform sampling of length 500 between 0.05 and 3
z[4]=5                # 4th value of vector z
z[z<5]                # all values of z that are < 5
z=sample(1:10)        # randomly sample from 1:10 w/o replacement
z=1e-3*sample(0:999,3,replace=TRUE) # 3 random numbers in [0,1) to 3 decimal places
z=rnorm(30,mean=10,sd=2) # 30 random samples from normal distribution with mu=10 sigma=2
z=c("aa","a_string")  # creating a vector of strings
nam=as.character(p)   # convert number to a string for plotting (useful when looping movies)
text(0.02,1.8,nam,col="black") #....and now plot that string. You could update the number p in a loop
z=c(TRUE,TRUE,FALSE)  # a logical vector
rm(x)                 # deletes the variable
rm(list=ls())         # deletes all variables
result=array(0,c(20,3)) # make an array of 20 rows, 3 columns, values all =0
z=matrix(c(4,6,12,9),2,2) # populate z, a 2x2 matrix, with values

----Store multiple plots as foo*.png files for later insertion as frames into movies
png("foo%04d.png",width=1400,height=700,pointsize=20,type=c("quartz"))
 ...commands to generate plots...
dev.off()
ffmpeg -f image2 -i 'foo%04d.png' try.mpeg  #unix command to make movie from frames; view in Quicktime


----Variables Format----
str(z)                # returns structure of object z, e.g. character, numeric, logical
class(z)              # similar to str
is.matrix(z)          # returns TRUE or FALSE depending on if z is a matrix

----Tabular data and subsamples------
rbind(x,y)            # combine objects into a table by rows
cbind(x,y)            # combine objects by columns
xy=as.data.frame(cbind(x,y)) # creates data frame of named variables. 
names(xy)=c("xval","yval")   # change column names
z=read.table("filename",T)   # read ascii table into a data frame, 'T' reads labels on first line
attach(z)             # puts variables within z into the search path of the program
z1=subset(z,M_A==2 & Dcool<75 & Dcool>50) #choose subset of data in frame z
                      # where  M_A and Dcool are variables. Then attach(z1) 

----Mathematical expressions-----
z=3*sqrt(x)           # expressions work
z=x**3.2              # powers, ^ also works
z=cos((pi/4))         # trig uses radians; acos, tan, etc
z=log10(x)            # log is natural ln 
z=exp(x)              # gives e^x
trunc(x)              # greatest integer less than or equal to
length(z)             # number of elements in z
max(z)                # maximum value; min(z) also works
abs(z)                # absolute value
sort(z)               # order vector from low to high
round(x,4)            # round off variable x to 4 digits

----Plots, Labels----
plot(x,y)             # plot x vs. y (resize window to change aspect ratio)
points(x,y)           # overplot points on existing graph (use lines to overplot lines)
plot(x,pch=20)        # pch=one of the 26 point types; cex is point size
plot(x,xlim=c(0,3),ylim=c(-0.5,4.5),xaxs="i",yaxs="i") # force user-specified limits on x and y
par()                 # show plotting parameters
par(mfrow=c(1,2))     # set up 2-panel figure
par(mar=c(4,4,1,1))   # size of margin around plot (bottom, left, top, right). Smaller numbers make
                      # the graph fill more of the available space. You can change these along with
                      # mgp to shift the graph relative to the labels.
par(mgp=c(1.8,0.3,0)) # offsets for text labels, numbers, and ticks. Usually ticks=0. The number offset
                      # can be negative to shift values closer to axis, 0.3 works well for tck=0.03 (see below)
title(xlab=expression(paste("something complicated"),line=2) # good for offsetting complicated axis labels
                      # set xlab = " " in the plot command
frac({1},{2})   # write the fraction 1/2. This can be made complicated with Greek letters, subscripts etc. For example:
                  frac({"[N I]"[lambda*lambda*5198+5200]},{"[N II]"[lambda*lambda*6548+6583]})
plot(x,y,tck=0.03,las=1,log='x',pch=M_A,cex=0.9,col="red",xlab=expression(paste("log"["10"]," d"["COOL"]," (AU)")))
                # tck=0.03 tic size, positive number moves it inside the graphic box
                # las=1 axis labels are all vertical
                # log='x' log scale on x (or 'y', 'xy')
                # pch is point type, here chosen from column labeled M_A
                # cex is size of points; use cex.lab=1.3, cex.axis=1.2 to increase label size by 30%, tic labels by 20%
                # col is color 
                # xlab is for x labels:
                # expression/paste syntax for subscripts [] and superscripts ^
                #  [] are followed by a comma. ^ are similar but use only one of them: e.g. paste("cos(45"^"o",")")
plot(x,y,xlab=expression(paste(lambda,"F",lambda))) # for Greek letters in labels 
text(0.5,0.9,"This is a label",0) # Put label at x=0.5, y=0.9.  The last parameter is an offset
                  # Use mtext to place xlabels and ylabels in margins
                  # las=1 makes y-axis (side=2) label vertical, 'at' puts it at y=1.5, and line=2 gives an offset
mtext(expression(paste(gamma)),side=2,cex=1.5,las=1,at=(1.5),line=2) 
axis(1,at=seq(0,1,0.01),labels=FALSE,tcl=0.2) # add some minor tics to the x-axis, x=0 to 1, intervals 0.01
arrows(m1$r,m1$l-20,m1$r,m1$l+20,length=0.05,angle=90,code=3,lwd=2.0) # draw some errorbars +-20 in y
legend("topright", c("M=2","M=5","M=10"), pch=c(2,5,10))       
                # legend location 'topright' (can be a coordinate)
                # The 'M=2' 'M=5' M=10' are text items for the legend
                # pch are the point types for the text items, or use lty and lwd for line type or width
hist(log10(M_A),breaks=c(0.165,0.500,0.835,1.170,1.505,1.84,2.175,2.51),axes=FALSE,xlab="",ylab="",main="")
                # Histogram plot. The breaks define the left and right edges of each bin. Here, M_A is
                # a variable already defined. Label the bins afterward, for example with 
                # axis(1,at=c(0.33,0.67,1.0,1.33,1.67,2.0),labels=c("2","5","10","20","50","100"),tcl=0.0,lwd=0)
                # Then use mtext to label the axes. Fiddle with mar and mgp before drawing the axis, 
                # for example, par(mar=c(0,3,1,1),mgp=c(2,0.8,0))
boxplot(z,range=0.3)  # boxplots, has various arguments, range is for the whiskers

---Statistics---
mean(x)               # mean of vector x
sd(x)                 # standard deviation
var(x)                # variance
range(x)              # range of x
summary(x)            # info on mean, median, max, min, quartiles
ecdf(x)               # cumulative distribution function
t.test(x,y)           # t-test for means being the same. A low p-value rejects hypothesis
cframeor.test(x,y,method='s') # rank correlation. p-value is prob of getting correlation
                              # that good from random data. s=spearman, k=kendall, p=pearson
fisher.test(x,conf.level=0.90,alternative="less") # one-sided contingency table test
                              # can also use exact2x2 as alternative
pairs(~x+y+t+s,data=z) # Correlation plots of variables x, y, t, and s against each other 
SmoothScatter(xy,lwd=4,pch=20,cex=0.2) # smooth the data and make a density map
                                       # points are plotted in low density regions. 

---Fitting Models, drawing lines---
fit=lm(y~x+I(x^2))      # y=c0+c1*x+c2*x^2; lm is 'linear model'
xx=seq(40,80,0.1)       # define continuous range of points xx=40 to 80 for drawing curve
yy=fit$coefficients[1]+fit$coefficients[2]*xx+fit$coefficients[3]*xx^2  # now you have (xx,yy) pairs
lines(xx,yy)            # draws the curve

fit=lm(y ~ log10(x))    # fit y to log10 of x
fit                     # print out the coeficients
summary(fit)            # prints fit, residuals, quartiles, errors and what-not
abline(fit)             # overplot line fit
abline(h=0)             # draw horizontal line at y=0
abline(v=0)             # draw vertical line at x=0
lines(sort(x),yfit$fit) # also plots the fitted line
z=smooth.spline(x,y)    # spline fit 
lines(z,lty=2,lw=3)     # draw fit on plot with heavy(lw=3) dotted (lty=2) line.

---Looping---
 x=c(50,60,70) # example plots a quadratic 20 times with normal dist. spread of errors in the 3 coefs
 z=c(22,49,65)
 plot(x,z,xlim=c(40,75),ylim=c(-5,80) #plot the points
 a=rnorm(20,0,2)                      # a,b,c have 20 values, mean=0 and sigma=2
 b=rnorm(20,0,2)
 c=rnorm(20,0,2)
 for(i in 1:20){                      #do the loop
 z=c(22+c[i],49+b[i],65+a[i])
 fit=lm(z ~ x + I(x^2))
 zz=fit$coefficients[1]+fit$coefficients[2]*xx+fit$coefficients[3]*xx^2
 lines(xx,zz)                         #plot the fits
 }

----Printing to Screen, Output Files----
dim(x)                # print dimensions of variable x (row,column,slice,etc.)
print(x)              # print values of a vector x
x                     # also prints values of x
x[3]                  # prints values of third column of x
head(z,n=10)          # print first 10 lines of array z to screen
tail(z,n=10)          # print last 10 lines of array z to screen
ls()                  # list the defined vectors
write(file='output',z)   # store vector z to an output file
write.table(rbind(x,y),file='out.dat')  # store a table in a file
dev.copy2eps(file='out.eps')            # store figure as an eps file; or just go to menu and save as a pdf!