var1 = 5
var1 = 50
print(var1)
#[1] 50
print(var1 + var4)
var = 55
var = 55
var4 = 55
#print(var1, var4)
#Error in print.default(var1, var4) : invalid printing digits 55
cat(var1, var4) #50 55
print(‘var1 + var4’)
cat(‘var1 + var4=’,var1 + var4)
var1 + var4= 105
#class
print(class(var2)) #[1] “list”
#class
var2 <- 6
print(class(var2)) #[1] “numeric”
#class
var2 <- 6
print(class(var2)) #[1] “numeric”
var2 <- 6.0
print(class(var2)) #[1] “numeric”
var2 <- 6L #”numeric”
print(class(var2)) #[1] “integer”
var2 <- “6L” #”integer”
print(class(var2)) #[1] “character”
var2 = TRUE
print(class(var2)) ## “logical”
var1 = 10
var2 = 15
print(var1 %% var2) #modulo – remainder
var1 = 100
print(var1 %% var2) #modulo – remainder
var1 = 95
var2 = 15
print(var1 %% var2) #modulo – remainder
var1 = 5
var2 = 15
print(var1 ^ var2) #power:
var1<- 15
var2 <- 20
var3 <- 15
#Relational Operator / comparison operator – output is Logical
print(var1 > var2) #is var1 greater than var2? – FALSE
print(var1 >= var3)
print(var1 <= var3)
print(var1 == var3) # double = is for asking is it equal?
print(var1 != var3)
#Logical operator- input and output both are logical
#I will do work 1 and work 2 today
#actual – I did only work 1 => No
#I will do work 1 or work 2 today
#actual – I did only work 1 => Yes
print(var1 == var3 | var1 != var3) #
print(var1 == var3 & var1 != var3)
#CONDITIONAL STATEMENTS
var1 <- 0
# is it positive or not ?
if (var1 >0) {
print(“Its positive”)
}
if (var1 >0) {
print(“Its positive”)
} else {
print(“Its not positive”)
}
if (var1 >0) {
print(“Its positive”)
} else if (var1<0){
print(“Its negative”)
} else {
print(“Its zero”)
}
#Collections: Vectors, Lists, Matrices, Arrays, Factors & DataFrames
#Vectors: will store multiple values of same datatype
vec1 <- c(45,56,36)
print(vec1)
#List: multiple data types
list1 = list(45,56,”Hello”,c(2,4,6))
print(list1)
#Matrix
mat1 = matrix(c(2,2,4,4,6,6,8,8,10,10,11,11) ,nrow=3,ncol = 4,byrow = FALSE)
print(mat1)
#Arrays – more than 2-D
arr1 = array(c(2,2,4,4,6,6,8,8,10,10,11,11),dim=c(2,4,2,2))
print(arr1)
#factors: categorical values
gender = factor(c(“M”,”M”,”M”,’F’,”F”,”F”))
print(class(gender))
print(nlevels(gender))
#DataFrame
players_stats <- data.frame(
ID= c(10,20,30),
Name=c(“Sachin”,”Virat”,”Dhoni”)
)
print(players_stats)
#membership: %in% : check if left side value is in right side or not
cities<- c(“Delhii”,”New York”,”London”)
print(“Delhi” %in% cities)
avg <- 98
## avg: 80: Grade A, 70-80: B, 60-70- C, 50-60 – D, 40-50: E , <40: Failed
if (avg >=80) {
print(“Grade: A”)
if (avg>=90){
print(“You win special certificate!”)
if (avg>=95) {
print(“You win medal”)
}
}
} else if (avg>=70) {
print(“Grade: B”)
} else if (avg>=60) {
print(“Grade: C”)
} else if (avg >=50) {
print(“Grade: D”)
} else if (avg>=40) {
print(“Grade: E”)
} else {
print(“Failed”)
}
result = 3
val1 <- switch(result,
“Grade A”,”Grade B”,”Grade C”,”Grade D”,”Grade E”, “Grade F”)
cat(“Result – “,val1)
#Loops – to repeat:
#repeat: keep repeating – break when a condition is met -EXIT Controlled
#while: will check for the condition and then repeat: ENTRY Controlled
#for (exactly how many times to run)
start = 1
repeat{
print(start)
if (start==10){
break
}
start = start+1
}
start = 11
while (start <=20) {
print(start)
start = start + 1
}
#For loop
words <- LETTERS[1:5]
for (i in words) {
print(i)
}
numbers <- seq(1,10,by=3)
for (i in numbers) {
print(i)
}
num = 30
start = 2
isPrime=TRUE
repeat{
if (num%%start==0){
isPrime = FALSE
break
}
if (start==num-1) {
break
}
start=start+1
}
if (isPrime) {
print(“Number is Prime”)
} else {
print(“Number is not Prime”)
}
## Assignment 1: Do the above with WHILE and FOR
## Assignment 2: Extend the same logic (one of the 3) to generate prime numbers
## between 1000 and 1500
for (num in 10:20){
#print(num)
num1=53
Isprime=TRUE
for (a in 3:(num1-1)) {
# cat(“testing value a”,a)
if (num1%%a == 0) {
Isprime=FALSE
#print(a)
#print(“inside Hello”)
break
}
}
if (Isprime==TRUE){
print(num)
}
}
########################
#Built-in function
print() #parameter
myfunc.generatePrime <- function(num) {
isPrime=TRUE
for(i in 2:(num-1)) {
if (num %%i==0) {
isPrime=FALSE
}
}
if (isPrime){
print(‘num is prime’)
} else {
print(‘num is not Prime’)
}
}
val <- mean(1:100)
print(val)
myfunc.generatePrime(30)
myfunc.checkPrime2 <- function(num) {
isPrime=TRUE
for(i in 2:(num-1)) {
if (num %%i==0) {
isPrime=FALSE
}
}
return(isPrime)
}
output <- myfunc.checkPrime2(53)
if (output){
print(‘num is prime’)
} else {
print(‘num is not Prime’)
}
for (num in 1000:1300) {
output <- myfunc.checkPrime2(num)
if (output){
print(num)
}
}
###### ##################### ################
#built in functions
print(seq(10,90))
print(max(10:90))
print(mean(10:90))
#user defined functions
sum.func <- function(num1=1, num2=2,num3=4,num4=6) {
cat(“Number 1 = “,num1)
cat(“\n Number 2 = “,num2,”\n”)
cat(“Number 3 = “,num3)
cat(“\n Number 4 = “,num4,”\n”)
result = num1 * num2
print(result)
}
#calling the functions by parameters
sum.func(40,30)
#call by name
sum.func(num2=40,num4=30)
## Assignments: Logic built using loops- convert them to
## functions
# #####################
a <- “Whats your name”
b <- ‘What\’s your name?’
print(paste(a,b,sep = “:”))
print(substring(a,2,6))
print(tolower(a))
print(toupper(a))
vector1 = c(“Monday”, TRUE,5,”Thursday”)
print(vector1)
print(vector1[2])
print(vector1[-2])
print(vector1[-2])
print(vector1[c(2,4)])
list1 = list(“Monday”, TRUE,5,”Thursday”)
print(list1)
library(ggplot2)
dataset2 <- data.frame(city=c(“City A”,”City B”,”City C”),
revenue=c(200,220,190))
ggplot(dataset2, aes(x=city,y=revenue)) +
geom_bar(stat=”identity”)
##############################
# VECTORS
vec1 <- c(2,4,”HELLO”, 5,6)
print(vec1)
#built-in
vec2 <- 5:50
print(vec2)
vec2 <- 5.4:30.8
print(vec2)
#start, end and increment by
vec3 <- seq(5,30.2,by=0.9)
print(vec3)
vec1 <- c(2,4,”HELLO”, 5,6,9,11)
print(vec1[c(2,3,6)])
vec1 <- c(2,4,6,8,10)
vec2 <- c(1,2,1,2,0)
print(vec1 + vec2)
vec1 <- c(2,4,6,8,10,12)
vec2 <- c(1,2)
print(vec1 + vec2)
vec1 <- c(2,4,16,18,10,12)
vec3 <- sort(vec1)
print(vec3)
vec3 <- sort(vec1, decreasing = TRUE)
print(vec3)
## LIST
list1 <- list(55,”Hello”,c(2,4,6), 5.4)
print(list1)
print(list1[c(1,3)])
list2 <- list(33,99)
mergedlist <- c(list1,list2)
print(mergedlist)
###MATRICES
mat1 <- matrix(c(2,4,6,8,10,12),nrow = 3,byrow=FALSE)
print(mat1)
mat2 <- matrix(c(2,4,6,8,10,12),nrow = 3,byrow=TRUE)
print(mat2)
print(mat1 + mat2)
print(mat1 – mat2)
print(mat1 * mat2)
print(mat1 / mat2)
## ARRAY
arr1 <- array(c(2:20),dim = c(2,2,2))
print(arr1)
print(arr1[1,2,1])
print(arr1[,2,1])
# c(1,2,1)
## Factors
regions<- factor(c(“N”,”S”,”S”,”W”,”N”,”E”,”E”,”E”))
print(is.factor(regions))
dataset1 <- data.frame(
quarter = c(“Q1″,”Q2″,”Q3″,”Q4”),
revenue = c(100,150,200,170),
fruits = c(“Apple”,”Banana”,”Mango”,”Oranges”)
)
print(dataset1)
shorterrow <- dataset1[2:3,]
print(shorterrow)
print(dataset1[,c(2,3)])
setwd(“D:\\dataset”)
dataset <- read.csv(“1_Data_PreProcessing.csv”)
print(dataset)
dataset$Salesperson = ifelse(is.na(dataset$Salesperson),
ave(dataset$Salesperson,FUN=function(x) mean(x,na.rm=TRUE)),
dataset$Salesperson)
dataset$Quotation = ifelse(is.na(dataset$Quotation),
ave(dataset$Quotation,FUN=function(x) mean(x,na.rm=TRUE)),
dataset$Quotation)
#connecting to SQL Server
#ipaddress, username, password, dbname
#install and run library – RODBC
#sql_connection = odbcConnect(“SQLSERVERODBC”)
#sqlQuery(sql_connection,”Select * from table1″)
#handling the categorical value
dataset$Region = factor(dataset$Region)
#step 3: breaking into training and test set
library(caTools)
split = sample.split(dataset$Win, SplitRatio = 0.8)
training_set = subset(dataset,split==TRUE)
test_set = subset(dataset,split==FALSE)
#Step 4: Feature Scaling
# to bring dataset in similar range
### 1. divide the column with higher value, inthis case quotation by 1000
### 2. Min-Max Scaling – values ranges between 0 to 1
### 3. Z Score normalization – preferred
training_set[,2:3] = scale(training_set[,2:3])
test_set[,2:3] = scale(test_set[,2:3])
test_set
setwd(‘D:\\dataset’)
dataset = read.csv(“2_Marks_Data.csv”)
scatter.smooth(x=dataset$Hours,y=dataset$Marks,main=”Hours Studied v Marks Obtained”)
#split the dataset into training set and test set
library(caTools)
split = sample.split(dataset$Marks, SplitRatio=0.8)
training_set = subset(dataset, split=TRUE)
test_set = subset(dataset, split=FALSE)
#create regression object
regressor=lm(formula = Marks~Hours, data = training_set)
summary(regressor)
# y = 20.76 + 7.57x
#
# While solving machine learning problem –
## 1. Is my data in a ready state to run the algorithm
## 2. Run the algorithm and check the values
#### 2.1. Is this the best performance of this model (can I improve this model)
#### 2.2: Is this the best model
## 3. Evaluate the performance of the algorithm
## RMSE and Rsquare (o to 1) – closer to 1 means best formance
## training performance v test performance – over fitting and under fitting