R PROGRAMMING AND SYNTAX ---Directories, packages---- getwd() # print the working directory setwd("test") # sets working directory dir.create("test") # Like unix 'mkdir test' list.files() # Like unix ls system("ls") # Run a command from the OS sessionInfo() # packages loaded, version and so on install.packages("plotrix") # searches internet and installs package 'plotrix' # sometimes these add-ons are exactly what you need library(plotrix) # have to do this each session for added packages citation() # to cite the R package ----Help---- help(seq) # prints help documentation for command 'seq' ?seq # another way to invoke help ??seq # look for 'seq' throughout help pages ----Run R commands from a file--- source("command_file") # it is ok to split a long command onto two lines. '#' is a comment ----Assigning variables----- x=2 #for a single variable x <- c(23.,-12.,43.5) # The c() function assigns numbers to a vector x=c(23.,-12,43.5) # or do it this way y=c(x,1,2*x) # does what you think it should, y has 7 elements if x has 3 z=y-x # also has 7 elements. the 3 elements x get repeated to fill up the vector z=1:10 # assigns values 1 through 10 to vector z z=seq(1,10,by=2) # odd numbers 1 through 10 z=seq(40,1,-5) # start at 40, stop when <=1, increments of -5 z=seq(0.05,3,len=500) # uniform sampling of length 500 between 0.05 and 3 z[4]=5 # 4th value of vector z z[z<5] # all values of z that are < 5 z=sample(1:10) # randomly sample from 1:10 w/o replacement z=1e-3*sample(0:999,3,replace=TRUE) # 3 random numbers in [0,1) to 3 decimal places z=rnorm(30,mean=10,sd=2) # 30 random samples from normal distribution with mu=10 sigma=2 z=c("aa","a_string") # creating a vector of strings nam=as.character(p) # convert number to a string for plotting (useful when looping movies) text(0.02,1.8,nam,col="black") #....and now plot that string. You could update the number p in a loop z=c(TRUE,TRUE,FALSE) # a logical vector rm(x) # deletes the variable rm(list=ls()) # deletes all variables result=array(0,c(20,3)) # make an array of 20 rows, 3 columns, values all =0 z=matrix(c(4,6,12,9),2,2) # populate z, a 2x2 matrix, with values zm3=zm2%*%zm1 # matrix multiplication of zm2 and zm1 matrix(4,3,3) # populate a 3x3 matrix with 4's diag(3) # 3x3 matrix with 1's on diagonals, 0 elsewhere t(z) # transpose of matrix z cov(z) # covariance of data matrix z; variables in columns, data points in rows, # uses n-1 in denominator to get sigma^2(xy) cor(z) # correlation matrix of z ----Store multiple plots as foo*.png files for later insertion as frames into movies png("foo%04d.png",width=1400,height=700,pointsize=20,type=c("quartz")) ...commands to generate plots... dev.off() ffmpeg -f image2 -i 'foo%04d.png' try.mpeg #unix command to make movie from frames; view in Quicktime ----Variables Format---- str(z) # returns structure of object z, e.g. character, numeric, logical class(z) # similar to str is.matrix(z) # returns TRUE or FALSE depending on if z is a matrix ----Tabular data and subsamples------ rbind(x,y) # combine objects into a table by rows cbind(x,y) # combine objects by columns c(xl1,xl2) # combine two variables together into a single longer one xy=as.data.frame(cbind(x,y)) # creates data frame of named variables. names(xy)=c("xval","yval") # change column names z=read.table("filename",T) # read ascii table into a data frame, 'T' reads labels on first line attach(z) # puts variables within z into the search path of the program z1=subset(z,M_A==2 & Dcool<75 & Dcool>50) #choose subset of data in frame z # where M_A and Dcool are variables. Then attach(z1) ----Mathematical expressions----- z=3*sqrt(x) # expressions work z=x**3.2 # powers, ^ also works z=cos((pi/4)) # trig uses radians; acos, tan, etc z=log10(x) # log is natural ln z=exp(x) # gives e^x trunc(x) # cuts off the fraction part, leaving just the integer length(z) # number of elements in z max(z) # maximum value; min(z) also works abs(z) # absolute value sort(z) # order vector from low to high round(x,4) # round off variable x to 4 digits ----Plots, Labels---- plot(x,y) # plot x vs. y (resize window to change aspect ratio) points(x,y) # overplot points on existing graph (use lines to overplot lines) plot(x,pch=20) # pch=one of the 26 point types; cex is point size plot(x,xlim=c(0,3),ylim=c(-0.5,4.5),xaxs="i",yaxs="i") # force user-specified limits on x and y par() # show plotting parameters par(mfrow=c(1,2)) # set up 2-panel figure par(mar=c(0,0,0,0),oma=c(4,4,0.5,0.5) #for multiple panels with no space between them plot(p,a/23968,type='S',ylim=c(-0.05,1.05),xlim=c(-5,10.5),axes=FALSE,frame.plot=TRUE) # no tics, but plot the box. Useful for multiple panels. par(mar=c(4,4,1,1)) # size of margin around plot (bottom, left, top, right). Smaller numbers make # the graph fill more of the available space. You can change these along with # mgp to shift the graph relative to the labels. par(mgp=c(1.8,0.3,0)) # offsets for text labels, numbers, and ticks. Usually ticks=0. The number offset # can be negative to shift values closer to axis, 0.3 works well for tck=0.03 (see below) title(xlab=expression(paste("something complicated"),line=2) # good for offsetting complicated axis labels # set xlab = " " in the plot command frac({1},{2}) # write the fraction 1/2. This can be made complicated with Greek letters, subscripts etc. For example: frac({"[N I]"[lambda*lambda*5198+5200]},{"[N II]"[lambda*lambda*6548+6583]}) plot(x,y,tck=0.03,las=1,log='x',pch=M_A,cex=0.9,col="red",xlab=expression(paste("log"["10"]," d"["COOL"]," (AU)"))) # tck=0.03 tic size, positive number moves it inside the graphic box # las=1 axis labels are all vertical # log='x' log scale on x (or 'y', 'xy') # pch is point type, here chosen from column labeled M_A # cex is size of points; use cex.lab=1.3, cex.axis=1.2 to increase label size by 30%, tic labels by 20% # col is color # xlab is for x labels: # expression/paste syntax for subscripts [] and superscripts ^ # [] are followed by a comma. ^ are similar but use only one of them: e.g. paste("cos(45"^"o",")") plot(x,y,xlab=expression(paste(lambda,"F",lambda))) # for Greek letters in labels text(0.5,0.9,"This is a label",0) # Put label at x=0.5, y=0.9. The last parameter is an offset # Use mtext to place xlabels and ylabels in margins # las=1 makes y-axis (side=2) label vertical, 'at' puts it at y=1.5, and line=2 gives an offset mtext(expression(paste(gamma)),side=2,cex=1.5,las=1,at=(1.5),line=2) mtext("peak = 23968",side=3,line=-2.0,adj=0.8,cex=0.8) # adj moves the string along direction you read it axis(1,at=seq(0,1,0.01),labels=FALSE,tcl=0.2) # add some minor tics to the x-axis, x=0 to 1, intervals 0.01 grid(12,13,lty=3,lwd=2) # draw a dotted grid, 12 lines on x-axis and 13 on the y. Works best when xaxs='i', yaxs='i' arrows(m1$r,m1$l-20,m1$r,m1$l+20,length=0.05,angle=90,code=3,lwd=2.0) # draw some errorbars +-20 in y legend("topright", c("M=2","M=5","M=10"), pch=c(2,5,10)) # legend location 'topright' (can be a coordinate) # The 'M=2' 'M=5' M=10' are text items for the legend # pch are the point types for the text items, or use lty and lwd for line type or width hist(log10(M_A),breaks=c(0.165,0.500,0.835,1.170,1.505,1.84,2.175,2.51),axes=FALSE,xlab="",ylab="",main="") # Histogram plot. The breaks define the left and right edges of each bin. Here, M_A is # a variable already defined. Label the bins afterward, for example with # axis(1,at=c(0.33,0.67,1.0,1.33,1.67,2.0),labels=c("2","5","10","20","50","100"),tcl=0.0,lwd=0) # Then use mtext to label the axes. Fiddle with mar and mgp before drawing the axis, # for example, par(mar=c(0,3,1,1),mgp=c(2,0.8,0)) boxplot(z,range=0.3) # boxplots, has various arguments, range is for the whiskers polar.plot(dist,angle,rp.type='sr') # polar coordinate plot in plotrix package, s is symbol, r is radial line ---Statistics--- mean(x) # mean of vector x sd(x) # standard deviation var(x) # variance range(x) # range of x summary(x) # info on mean, median, max, min, quartiles ecdf(x) # cumulative distribution function t.test(x,y) # t-test for means being the same. A low p-value rejects hypothesis cframeor.test(x,y,method='s') # rank correlation. p-value is prob of getting correlation # that good from random data. s=spearman, k=kendall, p=pearson fisher.test(x,conf.level=0.90,alternative="less") # one-sided contingency table test # can also use exact2x2 as alternative pairs(~x+y+t+s,data=z) # Correlation plots of variables x, y, t, and s against each other SmoothScatter(xy,lwd=4,pch=20,cex=0.2) # smooth the data and make a density map # points are plotted in low density regions. ---Fitting Models, drawing lines--- fit=lm(y~x+I(x^2)) # y=c0+c1*x+c2*x^2; lm is 'linear model' xx=seq(40,80,0.1) # define continuous range of points xx=40 to 80 for drawing curve yy=fit$coefficients[1]+fit$coefficients[2]*xx+fit$coefficients[3]*xx^2 # now you have (xx,yy) pairs lines(xx,yy) # draws the curve fit=lm(y ~ log10(x)) # fit y to log10 of x fit # print out the coeficients summary(fit) # prints fit, residuals, quartiles, errors and what-not abline(fit) # overplot line fit abline(h=0) # draw horizontal line at y=0 abline(v=0) # draw vertical line at x=0 lines(sort(x),yfit$fit) # also plots the fitted line z=smooth.spline(x,y) # spline fit lines(z,lty=2,lw=3) # draw fit on plot with heavy(lw=3) dotted (lty=2) line. ---Bootstrap--- library(boot) sample_mean=function(data,index){ #First define the statistic to bootstrap xbar=mean(data[index]) #in this case the mean return(xbar) } results=boot(x,sample_mean,R=10000) # 10000 bootstraps on data x #results for each returned statistic are in t1, t2, t3, etc. hist(results$t[,1],main="Mean of 10,000 Bootstrap Samples",xlab="X") #plot them\ boot.ci(boot.out=results,conf=0.95,type="bca") #Confidence intervals ---Smoothing, Image display--- bin=hexbin(z1$xcen,z1$ycen,xbins=50) #make hexagonal density bin plot; larger xbins -> more resolution plot(bin,main="Density map of Circle Centers",xlab="X (pixels)",ylab="Y (pixels)", colorcut=c(seq(0,1,0.1)^1.5),legend=1,lcex=1,colramp=function(n) rainbow(12)) # change stretch via power (here=1.5) # this is the hexbin plot command, differs from plot. Can also try style="nested.centroids" for fun ker=kde2d(z1$xcen,z1$ycen,lims=c(-20,20,-10,30),n=100) ; image(ker) #make and plot kernel density map ---Looping--- x=c(50,60,70) # example plots a quadratic 20 times with normal dist. spread of errors in the 3 coefs z=c(22,49,65) plot(x,z,xlim=c(40,75),ylim=c(-5,80) #plot the points a=rnorm(20,0,2) # a,b,c have 20 values, mean=0 and sigma=2 b=rnorm(20,0,2) c=rnorm(20,0,2) for(i in 1:20){ #do the loop z=c(22+c[i],49+b[i],65+a[i]) fit=lm(z ~ x + I(x^2)) zz=fit$coefficients[1]+fit$coefficients[2]*xx+fit$coefficients[3]*xx^2 lines(xx,zz) #plot the fits } ---- if/then/else ---- if(i>3){ x[i] = 4.5 } else if (i<-2){ x[i]=6. } else { x[i]=-6. } ----Printing to Screen, Output Files---- dim(x) # print dimensions of variable x (row,column,slice,etc.) print(x) # print values of a vector x x # also prints values of x x[3] # prints values of third column of x head(z,n=10) # print first 10 lines of array z to screen tail(z,n=10) # print last 10 lines of array z to screen ls() # list the defined vectors write(file='output',z) # store vector z to an output file write.table(rbind(x,y),file='out.dat') # store a table in a file dev.copy2eps(file='out.eps') # store figure as an eps file; or just go to menu and save as a pdf! ----A nice reference for syntax and the like---- https://www.stat.berkeley.edu/~spector/Rcourse.pdf