## R Notes

The following are some notes on the syntax, commands, etc. for R programming and plotting.

```
R PROGRAMMING AND SYNTAX
---Directories, packages----
getwd()               # print the working directory
setwd("test")         # sets working directory
dir.create("test")    # Like unix 'mkdir test'
list.files()          # Like unix ls
system("ls")          # Run a command from the OS
sessionInfo()         # packages loaded, version and so on
install.packages("plotrix") # searches internet and installs package 'plotrix'
# sometimes these add-ons are exactly what you need
library(plotrix)      # have to do this each session for added packages
citation()            # to cite the R package

----Help----
help(seq)             # prints help documentation for command 'seq'
?seq                  # another way to invoke help
??seq                 # look for 'seq' throughout help pages

----Run R commands from a file---
source("command_file") # it is ok to split a long command onto two lines. '#' is a comment

----Assigning variables-----
x=2                   #for a single variable
x <- c(23.,-12.,43.5) # The c() function assigns numbers to a vector
x=c(23.,-12,43.5)     # or do it this way
y=c(x,1,2*x)          # does what you think it should, y has 7 elements if x has 3
z=y-x                 # also has 7 elements. the 3 elements x get repeated to fill up the vector
z=1:10                # assigns values 1 through 10 to vector z
z=seq(1,10,by=2)      # odd numbers 1 through 10
z=seq(40,1,-5)        # start at 40, stop when <=1, increments of -5
z=seq(0.05,3,len=500) # uniform sampling of length 500 between 0.05 and 3
z=5                # 4th value of vector z
z[z<5]                # all values of z that are < 5
z=sample(1:10)        # randomly sample from 1:10 w/o replacement
z=1e-3*sample(0:999,3,replace=TRUE) # 3 random numbers in [0,1) to 3 decimal places
z=rnorm(30,mean=10,sd=2) # 30 random samples from normal distribution with mu=10 sigma=2
z=c("aa","a_string")  # creating a vector of strings
nam=as.character(p)   # convert number to a string for plotting (useful when looping movies)
text(0.02,1.8,nam,col="black") #....and now plot that string. You could update the number p in a loop
z=c(TRUE,TRUE,FALSE)  # a logical vector
rm(x)                 # deletes the variable
rm(list=ls())         # deletes all variables
result=array(0,c(20,3)) # make an array of 20 rows, 3 columns, values all =0
z=matrix(c(4,6,12,9),2,2) # populate z, a 2x2 matrix, with values
zm3=zm2%*%zm1         # matrix multiplication of zm2 and zm1
matrix(4,3,3)         # populate a 3x3 matrix with 4's
diag(3)               # 3x3 matrix with 1's on diagonals, 0 elsewhere
t(z)                  # transpose of matrix z
cov(z)                # covariance of data matrix z; variables in columns, data points in rows,
# uses n-1 in denominator to get sigma^2(xy)
cor(z)                # correlation matrix of z

----Store multiple plots as foo*.png files for later insertion as frames into movies
png("foo%04d.png",width=1400,height=700,pointsize=20,type=c("quartz"))
...commands to generate plots...
dev.off()
ffmpeg -f image2 -i 'foo%04d.png' try.mpeg  #unix command to make movie from frames; view in Quicktime

----Variables Format----
str(z)                # returns structure of object z, e.g. character, numeric, logical
class(z)              # similar to str
is.matrix(z)          # returns TRUE or FALSE depending on if z is a matrix

----Tabular data and subsamples------
rbind(x,y)            # combine objects into a table by rows
cbind(x,y)            # combine objects by columns
c(xl1,xl2)            # combine two variables together into a single longer one
xy=as.data.frame(cbind(x,y)) # creates data frame of named variables.
names(xy)=c("xval","yval")   # change column names
attach(z)             # puts variables within z into the search path of the program
z1=subset(z,M_A==2 & Dcool<75 & Dcool>50) #choose subset of data in frame z
# where  M_A and Dcool are variables. Then attach(z1)

----Mathematical expressions-----
z=3*sqrt(x)           # expressions work
z=x**3.2              # powers, ^ also works
z=cos((pi/4))         # trig uses radians; acos, tan, etc
z=log10(x)            # log is natural ln
z=exp(x)              # gives e^x
trunc(x)              # cuts off the fraction part, leaving just the integer
length(z)             # number of elements in z
max(z)                # maximum value; min(z) also works
abs(z)                # absolute value
sort(z)               # order vector from low to high
round(x,4)            # round off variable x to 4 digits

----Plots, Labels----
plot(x,y)             # plot x vs. y (resize window to change aspect ratio)
points(x,y)           # overplot points on existing graph (use lines to overplot lines)
plot(x,pch=20)        # pch=one of the 26 point types; cex is point size
plot(x,xlim=c(0,3),ylim=c(-0.5,4.5),xaxs="i",yaxs="i") # force user-specified limits on x and y
par()                 # show plotting parameters
par(mfrow=c(1,2))     # set up 2-panel figure
par(mar=c(0,0,0,0),oma=c(4,4,0.5,0.5) #for multiple panels with no space between them
plot(p,a/23968,type='S',ylim=c(-0.05,1.05),xlim=c(-5,10.5),axes=FALSE,frame.plot=TRUE)
# no tics, but plot the box. Useful for multiple panels.
par(mar=c(4,4,1,1))   # size of margin around plot (bottom, left, top, right). Smaller numbers make
# the graph fill more of the available space. You can change these along with
# mgp to shift the graph relative to the labels.
par(mgp=c(1.8,0.3,0)) # offsets for text labels, numbers, and ticks. Usually ticks=0. The number offset
# can be negative to shift values closer to axis, 0.3 works well for tck=0.03 (see below)
title(xlab=expression(paste("something complicated"),line=2) # good for offsetting complicated axis labels
# set xlab = " " in the plot command
frac({1},{2})   # write the fraction 1/2. This can be made complicated with Greek letters, subscripts etc. For example:
frac({"[N I]"[lambda*lambda*5198+5200]},{"[N II]"[lambda*lambda*6548+6583]})
plot(x,y,tck=0.03,las=1,log='x',pch=M_A,cex=0.9,col="red",xlab=expression(paste("log"["10"]," d"["COOL"]," (AU)")))
# tck=0.03 tic size, positive number moves it inside the graphic box
# las=1 axis labels are all vertical
# log='x' log scale on x (or 'y', 'xy')
# pch is point type, here chosen from column labeled M_A
# cex is size of points; use cex.lab=1.3, cex.axis=1.2 to increase label size by 30%, tic labels by 20%
# col is color
# xlab is for x labels:
# expression/paste syntax for subscripts [] and superscripts ^
#  [] are followed by a comma. ^ are similar but use only one of them: e.g. paste("cos(45"^"o",")")
plot(x,y,xlab=expression(paste(lambda,"F",lambda))) # for Greek letters in labels
text(0.5,0.9,"This is a label",0) # Put label at x=0.5, y=0.9.  The last parameter is an offset
# Use mtext to place xlabels and ylabels in margins
# las=1 makes y-axis (side=2) label vertical, 'at' puts it at y=1.5, and line=2 gives an offset
mtext(expression(paste(gamma)),side=2,cex=1.5,las=1,at=(1.5),line=2)
axis(1,at=seq(0,1,0.01),labels=FALSE,tcl=0.2) # add some minor tics to the x-axis, x=0 to 1, intervals 0.01
grid(12,13,lty=3,lwd=2) # draw a dotted grid, 12 lines on x-axis and 13 on the y.  Works best when xaxs='i', yaxs='i'
arrows(m1\$r,m1\$l-20,m1\$r,m1\$l+20,length=0.05,angle=90,code=3,lwd=2.0) # draw some errorbars +-20 in y
legend("topright", c("M=2","M=5","M=10"), pch=c(2,5,10))
# legend location 'topright' (can be a coordinate)
# The 'M=2' 'M=5' M=10' are text items for the legend
# pch are the point types for the text items, or use lty and lwd for line type or width
hist(log10(M_A),breaks=c(0.165,0.500,0.835,1.170,1.505,1.84,2.175,2.51),axes=FALSE,xlab="",ylab="",main="")
# Histogram plot. The breaks define the left and right edges of each bin. Here, M_A is
# a variable already defined. Label the bins afterward, for example with
# axis(1,at=c(0.33,0.67,1.0,1.33,1.67,2.0),labels=c("2","5","10","20","50","100"),tcl=0.0,lwd=0)
# Then use mtext to label the axes. Fiddle with mar and mgp before drawing the axis,
# for example, par(mar=c(0,3,1,1),mgp=c(2,0.8,0))
boxplot(z,range=0.3)  # boxplots, has various arguments, range is for the whiskers
polar.plot(dist,angle,rp.type='sr') # polar coordinate plot in plotrix package, s is symbol, r is radial line

---Statistics---
mean(x)               # mean of vector x
sd(x)                 # standard deviation
var(x)                # variance
range(x)              # range of x
summary(x)            # info on mean, median, max, min, quartiles
ecdf(x)               # cumulative distribution function
t.test(x,y)           # t-test for means being the same. A low p-value rejects hypothesis
cframeor.test(x,y,method='s') # rank correlation. p-value is prob of getting correlation
# that good from random data. s=spearman, k=kendall, p=pearson
fisher.test(x,conf.level=0.90,alternative="less") # one-sided contingency table test
# can also use exact2x2 as alternative
pairs(~x+y+t+s,data=z) # Correlation plots of variables x, y, t, and s against each other
SmoothScatter(xy,lwd=4,pch=20,cex=0.2) # smooth the data and make a density map
# points are plotted in low density regions.

---Fitting Models, drawing lines---
fit=lm(y~x+I(x^2))      # y=c0+c1*x+c2*x^2; lm is 'linear model'
xx=seq(40,80,0.1)       # define continuous range of points xx=40 to 80 for drawing curve
yy=fit\$coefficients+fit\$coefficients*xx+fit\$coefficients*xx^2  # now you have (xx,yy) pairs
lines(xx,yy)            # draws the curve

fit=lm(y ~ log10(x))    # fit y to log10 of x
fit                     # print out the coeficients
summary(fit)            # prints fit, residuals, quartiles, errors and what-not
abline(fit)             # overplot line fit
abline(h=0)             # draw horizontal line at y=0
abline(v=0)             # draw vertical line at x=0
lines(sort(x),yfit\$fit) # also plots the fitted line
z=smooth.spline(x,y)    # spline fit
lines(z,lty=2,lw=3)     # draw fit on plot with heavy(lw=3) dotted (lty=2) line.

---Bootstrap---
library(boot)
sample_mean=function(data,index){   #First define the statistic to bootstrap
xbar=mean(data[index]) #in this case the mean
return(xbar)
}
results=boot(x,sample_mean,R=10000) # 10000 bootstraps on data x
#results for each returned statistic are in t1, t2, t3, etc.
hist(results\$t[,1],main="Mean of 10,000 Bootstrap Samples",xlab="X") #plot them\
boot.ci(boot.out=results,conf=0.95,type="bca") #Confidence intervals

---Smoothing, Image display---
bin=hexbin(z1\$xcen,z1\$ycen,xbins=50)  #make hexagonal density bin plot; larger xbins -> more resolution
plot(bin,main="Density map of Circle Centers",xlab="X (pixels)",ylab="Y (pixels)",
colorcut=c(seq(0,1,0.1)^1.5),legend=1,lcex=1,colramp=function(n) rainbow(12))  # change stretch via power (here=1.5)
# this is the hexbin plot command, differs from plot. Can also try style="nested.centroids" for fun
ker=kde2d(z1\$xcen,z1\$ycen,lims=c(-20,20,-10,30),n=100) ; image(ker)  #make and plot kernel density map

---Looping---
x=c(50,60,70) # example plots a quadratic 20 times with normal dist. spread of errors in the 3 coefs
z=c(22,49,65)
plot(x,z,xlim=c(40,75),ylim=c(-5,80) #plot the points
a=rnorm(20,0,2)                      # a,b,c have 20 values, mean=0 and sigma=2
b=rnorm(20,0,2)
c=rnorm(20,0,2)
for(i in 1:20){                      #do the loop
z=c(22+c[i],49+b[i],65+a[i])
fit=lm(z ~ x + I(x^2))
zz=fit\$coefficients+fit\$coefficients*xx+fit\$coefficients*xx^2
lines(xx,zz)                         #plot the fits
}

---- if/then/else ----
if(i>3){
x[i] = 4.5
} else if (i<-2){
x[i]=6.
} else {
x[i]=-6.
}

----Printing to Screen, Output Files----
dim(x)                # print dimensions of variable x (row,column,slice,etc.)
print(x)              # print values of a vector x
x                     # also prints values of x
x                  # prints values of third column of x
head(z,n=10)          # print first 10 lines of array z to screen
tail(z,n=10)          # print last 10 lines of array z to screen
ls()                  # list the defined vectors
write(file='output',z)   # store vector z to an output file
write.table(rbind(x,y),file='out.dat')  # store a table in a file
dev.copy2eps(file='out.eps')            # store figure as an eps file; or just go to menu and save as a pdf!

----A nice reference for syntax and the like----
https://www.stat.berkeley.edu/~spector/Rcourse.pdf

```