R PROGRAMMING AND SYNTAX ---Directories---- getwd() # print the working directory setwd("test") # sets working directory dir.create("test") # Like unix 'mkdir test' list.files() # Like unix ls system("ls") # Run a command from the OS sessionInfo() # packages loaded, version and so on citation() # to cite the R package ----Help---- help(seq) # prints help documentation for command 'seq' ?seq # another way to invoke help ??seq # look for 'seq' throughout help pages ----Run R commands from a file--- source("command_file") # it is ok to split a long command onto two lines. '#' is a comment ----Assigning variables----- x=2 #for a single variable x <- c(23.,-12.,43.5) # The c() function assigns numbers to a vector x=c(23.,-12,43.5) # or do it this way y=c(x,1,2*x) # does what you think it should, y has 7 elements if x has 3 z=y-x # also has 7 elements. the 3 elements x get repeated to fill up the vector z=1:10 # assigns values 1 through 10 to vector z z=seq(1,10,by=2) # odd numbers 1 through 10 z=seq(40,1,-5) # start at 40, stop when <=1, increments of -5 z=seq(0.05,3,len=500) # uniform sampling of length 500 between 0.05 and 3 z[4]=5 # 4th value of vector z z[z<5] # all values of z that are < 5 z=sample(1:10) # randomly sample from 1:10 w/o replacement z=1e-3*sample(0:999,3,replace=TRUE) # 3 random numbers in [0,1) to 3 decimal places z=rnorm(30,mean=10,sd=2) # 30 random samples from normal distribution with mu=10 sigma=2 z=c("aa","a_string") # creating a vector of strings nam=as.character(p) # convert number to a string for plotting (useful when looping movies) text(0.02,1.8,nam,col="black") #....and now plot that string. You could update the number p in a loop z=c(TRUE,TRUE,FALSE) # a logical vector rm(x) # deletes the variable rm(list=ls()) # deletes all variables result=array(0,c(20,3)) # make an array of 20 rows, 3 columns, values all =0 z=matrix(c(4,6,12,9),2,2) # populate z, a 2x2 matrix, with values ----Store multiple plots as foo*.png files for later insertion as frames into movies png("foo%04d.png",width=1400,height=700,pointsize=20,type=c("quartz")) ...commands to generate plots... dev.off() ffmpeg -f image2 -i 'foo%04d.png' try.mpeg #unix command to make movie from frames; view in Quicktime ----Variables Format---- str(z) # returns structure of object z, e.g. character, numeric, logical class(z) # similar to str is.matrix(z) # returns TRUE or FALSE depending on if z is a matrix ----Tabular data and subsamples------ rbind(x,y) # combine objects into a table by rows cbind(x,y) # combine objects by columns xy=as.data.frame(cbind(x,y)) # creates data frame of named variables. names(xy)=c("xval","yval") # change column names z=read.table("filename",T) # read ascii table into a data frame, 'T' reads labels on first line attach(z) # puts variables within z into the search path of the program z1=subset(z,M_A==2 & Dcool<75 & Dcool>50) #choose subset of data in frame z # where M_A and Dcool are variables. Then attach(z1) ----Mathematical expressions----- z=3*sqrt(x) # expressions work z=x**3.2 # powers, ^ also works z=cos((pi/4)) # trig uses radians; acos, tan, etc z=log10(x) # log is natural ln z=exp(x) # gives e^x trunc(x) # greatest integer less than or equal to length(z) # number of elements in z max(z) # maximum value; min(z) also works abs(z) # absolute value sort(z) # order vector from low to high round(x,4) # round off variable x to 4 digits ----Plots, Labels---- plot(x,y) # plot x vs. y (resize window to change aspect ratio) points(x,y) # overplot points on existing graph (use lines to overplot lines) plot(x,pch=20) # pch=one of the 26 point types; cex is point size plot(x,xlim=c(0,3),ylim=c(-0.5,4.5),xaxs="i",yaxs="i") # force user-specified limits on x and y par() # show plotting parameters par(mfrow=c(1,2)) # set up 2-panel figure par(mar=c(4,4,1,1)) # size of margin around plot (bottom, left, top, right). Smaller numbers make # the graph fill more of the available space. You can change these along with # mgp to shift the graph relative to the labels. par(mgp=c(1.8,0.3,0)) # offsets for text labels, numbers, and ticks. Usually ticks=0. The number offset # can be negative to shift values closer to axis, 0.3 works well for tck=0.03 (see below) title(xlab=expression(paste("something complicated"),line=2) # good for offsetting complicated axis labels # set xlab = " " in the plot command frac({1},{2}) # write the fraction 1/2. This can be made complicated with Greek letters, subscripts etc. For example: frac({"[N I]"[lambda*lambda*5198+5200]},{"[N II]"[lambda*lambda*6548+6583]}) plot(x,y,tck=0.03,las=1,log='x',pch=M_A,cex=0.9,col="red",xlab=expression(paste("log"["10"]," d"["COOL"]," (AU)"))) # tck=0.03 tic size, positive number moves it inside the graphic box # las=1 axis labels are all vertical # log='x' log scale on x (or 'y', 'xy') # pch is point type, here chosen from column labeled M_A # cex is size of points; use cex.lab=1.3, cex.axis=1.2 to increase label size by 30%, tic labels by 20% # col is color # xlab is for x labels: # expression/paste syntax for subscripts [] and superscripts ^ # [] are followed by a comma. ^ are similar but use only one of them: e.g. paste("cos(45"^"o",")") plot(x,y,xlab=expression(paste(lambda,"F",lambda))) # for Greek letters in labels text(0.5,0.9,"This is a label",0) # Put label at x=0.5, y=0.9. The last parameter is an offset # Use mtext to place xlabels and ylabels in margins # las=1 makes y-axis (side=2) label vertical, 'at' puts it at y=1.5, and line=2 gives an offset mtext(expression(paste(gamma)),side=2,cex=1.5,las=1,at=(1.5),line=2) axis(1,at=seq(0,1,0.01),labels=FALSE,tcl=0.2) # add some minor tics to the x-axis, x=0 to 1, intervals 0.01 arrows(m1$r,m1$l-20,m1$r,m1$l+20,length=0.05,angle=90,code=3,lwd=2.0) # draw some errorbars +-20 in y legend("topright", c("M=2","M=5","M=10"), pch=c(2,5,10)) # legend location 'topright' (can be a coordinate) # The 'M=2' 'M=5' M=10' are text items for the legend # pch are the point types for the text items, or use lty and lwd for line type or width hist(log10(M_A),breaks=c(0.165,0.500,0.835,1.170,1.505,1.84,2.175,2.51),axes=FALSE,xlab="",ylab="",main="") # Histogram plot. The breaks define the left and right edges of each bin. Here, M_A is # a variable already defined. Label the bins afterward, for example with # axis(1,at=c(0.33,0.67,1.0,1.33,1.67,2.0),labels=c("2","5","10","20","50","100"),tcl=0.0,lwd=0) # Then use mtext to label the axes. Fiddle with mar and mgp before drawing the axis, # for example, par(mar=c(0,3,1,1),mgp=c(2,0.8,0)) boxplot(z,range=0.3) # boxplots, has various arguments, range is for the whiskers ---Statistics--- mean(x) # mean of vector x sd(x) # standard deviation var(x) # variance range(x) # range of x summary(x) # info on mean, median, max, min, quartiles ecdf(x) # cumulative distribution function t.test(x,y) # t-test for means being the same. A low p-value rejects hypothesis cframeor.test(x,y,method='s') # rank correlation. p-value is prob of getting correlation # that good from random data. s=spearman, k=kendall, p=pearson fisher.test(x,conf.level=0.90,alternative="less") # one-sided contingency table test # can also use exact2x2 as alternative pairs(~x+y+t+s,data=z) # Correlation plots of variables x, y, t, and s against each other SmoothScatter(xy,lwd=4,pch=20,cex=0.2) # smooth the data and make a density map # points are plotted in low density regions. ---Fitting Models, drawing lines--- fit=lm(y~x+I(x^2)) # y=c0+c1*x+c2*x^2; lm is 'linear model' xx=seq(40,80,0.1) # define continuous range of points xx=40 to 80 for drawing curve yy=fit$coefficients[1]+fit$coefficients[2]*xx+fit$coefficients[3]*xx^2 # now you have (xx,yy) pairs lines(xx,yy) # draws the curve fit=lm(y ~ log10(x)) # fit y to log10 of x fit # print out the coeficients summary(fit) # prints fit, residuals, quartiles, errors and what-not abline(fit) # overplot line fit abline(h=0) # draw horizontal line at y=0 abline(v=0) # draw vertical line at x=0 lines(sort(x),yfit$fit) # also plots the fitted line z=smooth.spline(x,y) # spline fit lines(z,lty=2,lw=3) # draw fit on plot with heavy(lw=3) dotted (lty=2) line. ---Looping--- x=c(50,60,70) # example plots a quadratic 20 times with normal dist. spread of errors in the 3 coefs z=c(22,49,65) plot(x,z,xlim=c(40,75),ylim=c(-5,80) #plot the points a=rnorm(20,0,2) # a,b,c have 20 values, mean=0 and sigma=2 b=rnorm(20,0,2) c=rnorm(20,0,2) for(i in 1:20){ #do the loop z=c(22+c[i],49+b[i],65+a[i]) fit=lm(z ~ x + I(x^2)) zz=fit$coefficients[1]+fit$coefficients[2]*xx+fit$coefficients[3]*xx^2 lines(xx,zz) #plot the fits } ----Printing to Screen, Output Files---- dim(x) # print dimensions of variable x (row,column,slice,etc.) print(x) # print values of a vector x x # also prints values of x x[3] # prints values of third column of x head(z,n=10) # print first 10 lines of array z to screen tail(z,n=10) # print last 10 lines of array z to screen ls() # list the defined vectors write(file='output',z) # store vector z to an output file write.table(rbind(x,y),file='out.dat') # store a table in a file dev.copy2eps(file='out.eps') # store figure as an eps file; or just go to menu and save as a pdf!