Suraj Joshi's Blog: R commands

# Set working directory

setwd('/Users/surajjoshi/R-repo/')

# Set seed

set.seed(100)

# Import libraries

require(dplyr)

require(ggplot2)

require(scales)

# Download file

# from web

require(downloader)

url<- span="">raw.githubusercontent.com/genomicsclass/dagdata/master/inst/extdata/msleep_ggplot2.csv"

filename <- span=""> 'msleep_ggplot2.csv'

if (!file.exists(filename)) download(url, filename)

msleep<- filename="" read.csv="" span="">

# from working directory

msleep<- filename="" read.csv="" span="">

# Survey of data

head(df)

dim(df)

nrow(df# row only

ncol(df) # column only

rownames(df)

colnames(df) = names(df)

class(df$rank)

glimpse(df)

summary(df)

str(df)

# View(df)

tbl_df(df)

#unique(df$name)

count(df, smoke)

length(subset(df, readmitted=='<30 span=""> #gives the number of columns

unique(df)

scale_shape_discrete(name = 'Admit', labels =c('No', 'Yes'))

#find the number of records

glimpse(main[main$Math>80,])

nrow((main[main$Math>80,]))\

cln2 [ which(cln2$dose == 100), ]

EUR <- span=""> PCs[which(PCs$V13 < 9 & PCs$V13 > 3), ]

file and directory navigation

dir() #lists file and folders in the current directory

getwd() # returns the current directory

setwd('/Users/surajjoshi/R-repo/') # Sets the working directory to the given path

ls()

options()

history()

col.names = c('abc', 'def')

colClasses = c("character", "character", "complex",

"factor", "factor", "character", "integer",

"integer", "numeric", "character", "character",

"Date", "integer", "logical")))

# Exploratory Analysis

# three ways to do one task

highrank <- 3="" df="" rank="" span="">
subsetrank <-subset df="" rank="" span="">
filterrank <- 3="" df="" filter="" rank="" span="">

filter(df, age <=20 & age >=19) %>% nrow

Random sampling

sample_n(df, 10, replace = T)

sample_frac(df, .1, replace = T)%>%dim

count(df, smoke)

# Apply a function to select columns

apply(df[,c(2,3,4)], 1, sum, na.rm=T) # adds each row

apply(df[,c(2,3,4)], 2, sum, na.rm=T) # adds each column

# Transformation

main2 <- division="ifelse(Percentage" main="" transform="">=75, 'Distinction', ifelse(Percentage<40 ail="" ercentage="" ifelse="">=60, 'First', ifelse(Percentage>=50, 'Second', 'Third')))))

main2

# Merge

mainFinal <- main2="" main3="" main="" rbind="" span="">

mainFinal2 <- by="ID" main4="" mainfinal="" merge="" span="">

mainFinal2 <- -division="" -percentage="" -total="" mainfinal2="" select="" span="">

head(mainFinal2)

# Renaming column name

head(mainFinal2)

colnames(mainFinal2)[3] <- ath="" span="">

colnames(mainFinal2)[4] <- ci="" span="">

head(mainFinal2)

# Adding a column

df$new_column_name = 1:60

# Removing a column

df$new_column_name <- null="" span="">

Select

nth(main$Eng,2) # selecting 2nd row in Eng column

Selecting rows with certain values

mainFinal2[mainFinal2$Division=='First',]

writing csv file

# write.csv(carbon, file="carbon.csv")

Subset

Filter

Aggregate

# Visualization

## Basic Visualization

which(main==80)

match(main$Math, 80)

unique(main$Eng)

plot(main[,2:6], main$ID)

data("mtcars")

hist(mtcars$mpg, 20, freq = F)

lines(density(mtcars$mpg), col='blue', lwd=2)

fit1 <- span=""> lm(mpg~hp, mtcars)

summary(fit1)

plot(mpg~hp, mtcars)

abline(reg=fit1)

summary(mtcars$hp)

any(is.na(mtcars$hp))

sum(is.na(mtcars$hp))

sum(!is.na(mtcars$hp))

is.na(mtcars$hp)<- hp="=335)</span" mtcars="" which="">

fit2 <- span=""> lm(mpg~hp, mtcars)

summary(fit2)

plot(mpg~hp, mtcars)

abline(reg=fit2)

plot(fit2)

grouped <- division="" group_by="" main3="" span="">

summarise(grouped, last)

last(grouped$Percentage)

df %.% group_by(State) %.% arrange(Value) %.% filter(Value == nth(Value, 3))

main3%>% group_by(Division)%>% arrange(Percentage)%>%filter(Percentage==nth(Percentage,1))

Mark values in a range

table<- 2000="" 2500="" 300="" 5000="" 900="" data.frame="" habitat="c(1,2,3,4,5,6))</span" population="c(100,">

table$size[table$population <500 1="" span="">

table$size[table$population >=500 & table$population <=1000] <- 2="" span="">

table$size[table$population >1000 & table$population <=3000] <- 3="" span="">

table$size[table$population >3000] <- 4="" span="">

table$size2 <- 1000="" 2000="" 3000="" 5000="" 500="" c="" findinterval="" population="" rightmost.closed="TRUE)</span" table="">

table$size <- 1000="" 2000="" 3000="" 5000="" 500="" c="" findinterval="" population="" rightmost.closed="TRUE)</span" table="">

Remove Duplicates

x <- 0:8="" 1:5="" 3:7="" c="" span="">

(xu <- duplicated="" span="" x="">

R commands

No comments: