##### Learn R Programming

var1 = 5

var1 = 50

print(var1)

# 50

print(var1 + var4)

var = 55

var = 55

var4 = 55

#print(var1, var4)

#Error in print.default(var1, var4) : invalid printing digits 55

cat(var1, var4)  #50 55

print(‘var1 + var4’)

cat(‘var1 + var4=’,var1 + var4)

var1 + var4= 105

#class

print(class(var2))  # “list”

#class

var2 <- 6

print(class(var2))   # “numeric”

#class

var2 <- 6

print(class(var2))  # “numeric”

var2 <- 6.0

print(class(var2))  # “numeric”

var2 <- 6L  #”numeric”

print(class(var2))   # “integer”

var2 <- “6L”  #”integer”

print(class(var2))   # “character”

var2 = TRUE

print(class(var2))   ## “logical”

var1 = 10

var2 = 15

print(var1 %% var2)  #modulo – remainder

var1 = 100

print(var1 %% var2)  #modulo – remainder

var1 = 95

var2 = 15

print(var1 %% var2)  #modulo – remainder

var1 = 5

var2 = 15

print(var1 ^ var2)  #power:

var1<- 15

var2 <- 20

var3 <- 15

#Relational Operator / comparison operator – output is Logical

print(var1 > var2)  #is var1 greater than var2? – FALSE

print(var1 >= var3)

print(var1 <= var3)

print(var1 == var3) # double = is for asking is it equal?

print(var1 != var3)

#Logical operator- input and output both are logical

#I will do work 1 and work 2 today

#actual – I did only work 1 => No

#I will do work 1 or work 2 today

#actual – I did only work 1 => Yes

print(var1 == var3 | var1 != var3)  #

print(var1 == var3 & var1 != var3)

#CONDITIONAL STATEMENTS

var1 <- 0

# is it positive or not ?

if (var1 >0) {

print(“Its positive”)

}

if (var1 >0) {

print(“Its positive”)

} else {

print(“Its not positive”)

}

if (var1 >0) {

print(“Its positive”)

} else if (var1<0){

print(“Its negative”)

} else {

print(“Its zero”)

}

#Collections: Vectors, Lists, Matrices, Arrays, Factors & DataFrames

#Vectors: will store multiple values of same datatype

vec1 <- c(45,56,36)

print(vec1)

#List: multiple data types

list1 = list(45,56,”Hello”,c(2,4,6))

print(list1)

#Matrix

mat1 = matrix(c(2,2,4,4,6,6,8,8,10,10,11,11) ,nrow=3,ncol = 4,byrow = FALSE)

print(mat1)

#Arrays – more than 2-D

arr1 = array(c(2,2,4,4,6,6,8,8,10,10,11,11),dim=c(2,4,2,2))

print(arr1)

#factors: categorical values

gender = factor(c(“M”,”M”,”M”,’F’,”F”,”F”))

print(class(gender))

print(nlevels(gender))

#DataFrame

players_stats <- data.frame(

ID= c(10,20,30),

Name=c(“Sachin”,”Virat”,”Dhoni”)

)

print(players_stats)

#membership:  %in% : check if left side value is in right side or not

cities<- c(“Delhii”,”New York”,”London”)

print(“Delhi” %in% cities)

avg <- 98

## avg: 80: Grade A, 70-80: B, 60-70- C, 50-60 – D, 40-50: E , <40: Failed

if (avg >=80) {

if (avg>=90){

print(“You win special certificate!”)

if (avg>=95) {

print(“You win medal”)

}

}

} else if (avg>=70) {

} else if (avg>=60) {

} else if (avg >=50) {

} else if (avg>=40) {

} else {

print(“Failed”)

}

result = 3

val1 <- switch(result,

cat(“Result – “,val1)

#Loops – to repeat:

#repeat: keep repeating – break when a condition is met -EXIT Controlled

#while: will check for the condition and then repeat: ENTRY Controlled

#for (exactly  how many times to run)

start = 1

repeat{

print(start)

if (start==10){

break

}

start = start+1

}

start = 11

while (start <=20) {

print(start)

start = start + 1

}

#For loop

words <- LETTERS[1:5]

for (i in words) {

print(i)

}

numbers <- seq(1,10,by=3)

for (i in numbers) {

print(i)

}

num = 30

start = 2

isPrime=TRUE

repeat{

if (num%%start==0){

isPrime = FALSE

break

}

if (start==num-1) {

break

}

start=start+1

}

if (isPrime) {

print(“Number is Prime”)

} else {

print(“Number is not Prime”)

}

## Assignment 1: Do the above with WHILE and FOR

## Assignment 2: Extend the same logic (one of the 3) to generate prime numbers

## between 1000 and 1500

for (num in 10:20){

#print(num)

num1=53

Isprime=TRUE

for (a in 3:(num1-1)) {

# cat(“testing value a”,a)

if (num1%%a == 0) {

Isprime=FALSE

#print(a)

#print(“inside Hello”)

break

}

}

if (Isprime==TRUE){

print(num)

}

}

########################

#Built-in function

print() #parameter

myfunc.generatePrime <- function(num) {

isPrime=TRUE

for(i in 2:(num-1)) {

if (num %%i==0) {

isPrime=FALSE

}

}

if (isPrime){

print(‘num is prime’)

} else {

print(‘num is not Prime’)

}

}

val <- mean(1:100)

print(val)

myfunc.generatePrime(30)

myfunc.checkPrime2 <- function(num) {

isPrime=TRUE

for(i in 2:(num-1)) {

if (num %%i==0) {

isPrime=FALSE

}

}

return(isPrime)

}

output <- myfunc.checkPrime2(53)

if (output){

print(‘num is prime’)

} else {

print(‘num is not Prime’)

}

for (num in 1000:1300) {

output <- myfunc.checkPrime2(num)

if (output){

print(num)

}

}

######   #####################  ################

#built in functions

print(seq(10,90))

print(max(10:90))

print(mean(10:90))

#user defined functions

sum.func <- function(num1=1, num2=2,num3=4,num4=6) {

cat(“Number 1 = “,num1)

cat(“\n Number 2 = “,num2,”\n”)

cat(“Number 3 = “,num3)

cat(“\n Number 4 = “,num4,”\n”)

result = num1 * num2

print(result)

}

#calling the functions by parameters

sum.func(40,30)

#call by name

sum.func(num2=40,num4=30)

## Assignments: Logic built using loops- convert them to

## functions

# #####################

print(paste(a,b,sep = “:”))

print(substring(a,2,6))

print(tolower(a))

print(toupper(a))

vector1 = c(“Monday”, TRUE,5,”Thursday”)

print(vector1)

print(vector1)

print(vector1[-2])

print(vector1[-2])

print(vector1[c(2,4)])

list1 = list(“Monday”, TRUE,5,”Thursday”)

print(list1)

## VIDEO RECORDING OF THE SESSION

library(ggplot2)

dataset2 <- data.frame(city=c(“City A”,”City B”,”City C”),

revenue=c(200,220,190))

ggplot(dataset2, aes(x=city,y=revenue)) +

geom_bar(stat=”identity”)

##############################

# VECTORS

vec1 <- c(2,4,”HELLO”, 5,6)

print(vec1)

#built-in

vec2 <- 5:50

print(vec2)

vec2 <- 5.4:30.8

print(vec2)

#start, end and increment by

vec3 <- seq(5,30.2,by=0.9)

print(vec3)

vec1 <- c(2,4,”HELLO”, 5,6,9,11)

print(vec1[c(2,3,6)])

vec1 <- c(2,4,6,8,10)

vec2 <- c(1,2,1,2,0)

print(vec1 + vec2)

vec1 <- c(2,4,6,8,10,12)

vec2 <- c(1,2)

print(vec1 + vec2)

vec1 <- c(2,4,16,18,10,12)

vec3 <- sort(vec1)

print(vec3)

vec3 <- sort(vec1, decreasing = TRUE)

print(vec3)

## LIST

list1 <- list(55,”Hello”,c(2,4,6), 5.4)

print(list1)

print(list1[c(1,3)])

list2 <- list(33,99)

mergedlist <- c(list1,list2)

print(mergedlist)

###MATRICES

mat1 <- matrix(c(2,4,6,8,10,12),nrow = 3,byrow=FALSE)

print(mat1)

mat2 <- matrix(c(2,4,6,8,10,12),nrow = 3,byrow=TRUE)

print(mat2)

print(mat1 + mat2)

print(mat1 – mat2)

print(mat1 * mat2)

print(mat1 / mat2)

## ARRAY

arr1 <- array(c(2:20),dim = c(2,2,2))

print(arr1)

print(arr1[1,2,1])

print(arr1[,2,1])

# c(1,2,1)

##  Factors

regions<- factor(c(“N”,”S”,”S”,”W”,”N”,”E”,”E”,”E”))

print(is.factor(regions))

dataset1 <- data.frame(

quarter = c(“Q1″,”Q2″,”Q3″,”Q4”),

revenue = c(100,150,200,170),

fruits = c(“Apple”,”Banana”,”Mango”,”Oranges”)

)

print(dataset1)

shorterrow <- dataset1[2:3,]

print(shorterrow)

print(dataset1[,c(2,3)])

## Class Video 6 - Introduction to Machine Learning - 2

setwd(“D:\\dataset”)

print(dataset)

dataset\$Salesperson = ifelse(is.na(dataset\$Salesperson),

ave(dataset\$Salesperson,FUN=function(x) mean(x,na.rm=TRUE)),

dataset\$Salesperson)

dataset\$Quotation = ifelse(is.na(dataset\$Quotation),

ave(dataset\$Quotation,FUN=function(x) mean(x,na.rm=TRUE)),

dataset\$Quotation)

#connecting to SQL Server

#install and run library – RODBC

#sql_connection = odbcConnect(“SQLSERVERODBC”)

#sqlQuery(sql_connection,”Select * from table1″)

#handling the categorical value

dataset\$Region = factor(dataset\$Region)

#step 3: breaking into training and test set

library(caTools)

split = sample.split(dataset\$Win, SplitRatio = 0.8)

training_set = subset(dataset,split==TRUE)

test_set = subset(dataset,split==FALSE)

#Step 4: Feature Scaling

# to bring dataset in similar range

### 1. divide the column with higher value, inthis case quotation by 1000

### 2. Min-Max Scaling – values ranges between 0 to 1

### 3. Z Score normalization – preferred

training_set[,2:3] = scale(training_set[,2:3])

test_set[,2:3] = scale(test_set[,2:3])

test_set

## COMPLETE MACHINE LEARNING NOTES HERE

setwd(‘D:\\dataset’)

scatter.smooth(x=dataset\$Hours,y=dataset\$Marks,main=”Hours Studied v Marks Obtained”)

#split the dataset into training set and test set

library(caTools)

split = sample.split(dataset\$Marks, SplitRatio=0.8)

training_set = subset(dataset, split=TRUE)

test_set = subset(dataset, split=FALSE)

#create regression object

regressor=lm(formula = Marks~Hours, data = training_set)

summary(regressor)

# y = 20.76 + 7.57x

#

# While solving machine learning problem –

## 1. Is my data in a ready state to run the algorithm

## 2. Run the algorithm and check the values

####  2.1. Is this the best performance of this model (can I improve this model)

####  2.2: Is this the best model

## 3. Evaluate the performance of the algorithm

## RMSE and Rsquare (o to 1) – closer to 1 means best formance

## training performance v test performance – over fitting and under fitting

## Video on Regression with R

setwd(‘D:\\dataset’)

print(dataset)

scatter.smooth(x=dataset\$Hours,y=dataset\$Marks,main=”Hours Studied v Marks Obtained”)

#split the dataset into training set and test set

library(caTools)

split = sample.split(dataset\$Marks, SplitRatio=0.75)

#training_set = subset(dataset, split=TRUE)

training_set = dataset[split,]

print(training_set)

test_set = dataset[!split,]

print(test_set)

#create regression object

regressor=lm(formula = Marks~Hours, data = training_set)

summary(regressor)

# y = 20.76 + 7.57x

#

# While solving machine learning problem –

## 1. Is my data in a ready state to run the algorithm

## 2. Run the algorithm and check the values

####  2.1. Is this the best performance of this model (can I improve this model)

####  2.2: Is this the best model

## 3. Evaluate the performance of the algorithm

## RMSE and Rsquare (o to 1) – closer to 1 means best formance

## training performance v test performance – over fitting and under fitting

y_predict = predict(regressor, newdata = test_set)

#y_predict = predict(regressor, newdata = training_set)

comparison = cbind(test_set, y_predict)

print(comparison)

mse = mean((comparison\$Marks – comparison\$y_predict)^2)

print(mse)

library(MLmetrics)

mape.value = MAPE(comparison\$y_predict, comparison\$Marks)

print(mape.value)

y_predict = predict(regressor, newdata = training_set)

#y_predict = predict(regressor, newdata = training_set)

comparison = cbind(test_set, y_predict)

print(comparison)

mse = mean((comparison\$Marks – comparison\$y_predict)^2)

print(mse)

library(MLmetrics)

mape.value = MAPE(comparison\$y_predict, comparison\$Marks)

print(mape.value)