Swapnil Saurav

Data Analytics Jan 2023

https://learn.swapnil.pwLearn and Practice Python

 

Refer Python notes here for installation of software:   https://learn.swapnil.pw

DAY 1 VIDEO HERE

#Scipy - scientific python
import scipy

#Permutation & Combination
## Both are about choosing r things from given n things
## default case replacement is not allowed

## Permutation order is important - n! / (n-r)!
## Combination is where order is not important - n! /(n-r)! r!

## 6 B & 4 G - I need to form a committe with 4 members, there has to be atleast a Boy
## 3B - 1G - x1
## 2B - 2 G - x2
## 1B - 3G - x3
## 4B - x4
## total= x1 + x2 + x3 + x4
from scipy.special import comb, perm
sum = 0
cnt = comb(6,3,repetition=False) * comb(4,1)
sum+=cnt
cnt = comb(6,2) * comb(4,2)
sum+=cnt
cnt = comb(6,1) * comb(4,3)
sum+=cnt
cnt = comb(6,4) * comb(4,0)
sum+=cnt
print("Total combination possible is ",sum)

#Permutation
# 4 coats, 5 waist coats, 6 caps - 3 members
#abcd lmnop
cnt1 = perm(4,3)
cnt2 = perm(5,3)
cnt3 = perm(6,3)
print("Total permutation = ", cnt1*cnt2*cnt3)

#####################################
######### OPTIMIZATION PROBLEM #####
#####################################
# There is a company that makes: laptops (profit = 750) and desktops (1000)
#objective is to Maximize profit
# x = no. of laptops = 750x
# y = no. of desktops = 1000x
#solution = 750x + 1000y
## constraint 1 =Processing chips = 10,000 = requires 1 chip each
## ==> x + y <= 10,000
## Memory chipset 1 GB size - Latops need 1GB memory , Desktops need 2GB
## ==> x + 2y <= 15,000
##Time to assemble 1 laptop = 4min, desktop = 3min, total time 25,000 min available
## ==> 4x + 3y <=25,000

## x+y <= 10
## x+2y <=15
## 4x+3y <=25
import numpy
from scipy.optimize import linprog, minimize,LinearConstraint



l = 1 #num of laptops
d = 1 #num of desktops
profit_l = 750
profit_d = 1000
total_profit = l*profit_l + d * profit_d
objective =[-profit_l, -profit_d] #minimization problem
## x+y <= 10
## x+2y <=15
## 4x+3y <=25
lhs_cons = [[1,1],
[1,2],
[4,3]]
rhs_val = [10000,
15000,
25000]
bnd = [(0,float("inf")),(0,float("inf"))]
optimize_sol = linprog(c=objective, A_ub=lhs_cons, b_ub=rhs_val,bounds=bnd,method="revised simplex")
if optimize_sol:
print(optimize_sol.x[0], optimize_sol.x[1])
print("Total profit = ",optimize_sol.fun*-1)


print("==================")
lhs_cons=[]
rhs_val=[]
while True:
l1 = int(input("Enter the value for notebook: "))
l2 = int(input("Enter the value for desktop: "))
y1 = int(input("Enter the value for Y: "))
lhs_cons.append([l1,l2])
rhs_val.append(y1)
ch=input("Do you have more constraints: ")
if ch!="y":
break
print("LHS Constraints = ",lhs_cons)
print("RHS Values = ",rhs_val)

#Pandas - dataframe - is a way to read data in table format (row & column)
import pandas as pd

data = [["Sachin",47],["Virat",33],["Rohit",35]]
df1 = pd.DataFrame(data,columns=['Name','Age'])
print(df1)
import pandas as pd
import sqlite3
con_str = sqlite3.connect("LibraryMS.db")
cursor = con_str.cursor()
q1 = "select * from students"
rows = cursor.execute(q1)
list2 = list(rows.fetchall())

con_str.close()
data_df = pd.DataFrame(list2)
print(data_df)

list1=[["Q1 2022",2300,3400,1900],
["Q2 2022",2300,3400,1900],
["Q3 2022",2300,3400,1900],
["Q4 2022",2300,3400,1900]]
print(list1)
columns=["Quarter","Apple","Banana","Oranges"]
ind=["Jan-March","April-June","Jul-Sep","Oct-Dec"]
data_df = pd.DataFrame(list1, columns=columns,index=ind)
print(data_df)
# df.iloc & loc
print(data_df.iloc[0:3,-3:])
print(data_df.iloc[0:3,[1,3]])

print(data_df.loc[['Jan-March',"Oct-Dec"],['Apple',"Oranges"]])

import pandas as pd

data_df1 = pd.read_csv("https://raw.githubusercontent.com/swapnilsaurav/Dataset/master/user_usage.csv")
print(data_df1)
data_df2 = pd.read_csv("https://raw.githubusercontent.com/swapnilsaurav/Dataset/master/user_device.csv")
print(data_df2)
import pandas as pd
import unicodedata
import nltk


#remove accent functions
def remove_accent(text):
txt = unicodedata.normalize('NFKD',text).encode('ascii',errors='ignore').decode('utf-8')
return txt
#getting the stop words set
STOP_WORDS = set(remove_accent(word) for word in nltk.corpus.stopwords.words('portuguese'))

#defining a function to perform NLP processes
def nlp_analysis_1(comment):
#nlp 1. convert to lowercase
comments = comment.lower()
#nlp 2. remove accents
comments = remove_accent(comments)
#nl 3. tokenize the content
tokens = nltk.tokenize.word_tokenize(comments)
return tokens

reviews = pd.read_csv("C:\\Users\\Hp\Downloads\\OnlineRetail-master\\order_reviews.csv")
#print(reviews['review_comment_message'])
#Step 1: removed the null values
comment_text = reviews[reviews['review_comment_message'].notnull()].copy()
print(comment_text.columns)
comment_text['review_comment_message'] = comment_text['review_comment_message'].apply(nlp_analysis_1)
print(comment_text['review_comment_message'])

SQL Learning

livesql.oracle.com

Select * from hr.employees;

 

select first_name, last_name,hire_date,salary from hr.employees;

 

select first_name FirstName, last_name,hire_date,salary from hr.employees;

 

select first_name || ‘ ‘|| last_name  FULLNAME,hire_date,salary from hr.employees;

 

select first_name || ‘ ‘|| last_name  FULLNAME,hire_date,salary *12 ANNUAL_SALARY from hr.employees;

 

select first_name || ‘ ‘|| last_name  FULLNAME,hire_date,salary *12 ANNUAL_SALARY from hr.employees order by Last_name;

 

 

select first_name || ‘ ‘|| last_name  FULLNAME,hire_date,salary *12 ANNUAL_SALARY 

from hr.employees 

order by Hire_date, Last_name;

 

select first_name || ‘ ‘|| last_name  FULLNAME,hire_date,salary *12 ANNUAL_SALARY , COMMISSION_PCT

from hr.employees 

order by COMMISSION_PCT NULLS First;

 

 

select first_name “First Name”, last_name,hire_date,salary from hr.employees;

 

select first_name, last_name,hire_date,salary from hr.employees where salary>=9000;

 

select first_name, last_name,hire_date,salary from hr.employees 

where salary>=9000 and salary <=12000;

 

select first_name, last_name,hire_date,salary from hr.employees 

where salary BETWEEN 9000 and 12000;

 

select first_name, last_name,hire_date,salary, DEPARTMENT_ID from hr.employees 

where salary>=9000 or DEPARTMENT_ID =80;

 

select distinct salary from hr.employees;

 

— Tendulkar  9000

— Tendulkar  15000

January 2023 Evening
#interpreter: Python R
print("Hello")
print(5+4)
print('5+4=',5+4,'so what even 4+5=',4+5)
a=5 # variable
print("type of a in line #5 is ",type(a))
print("a = ",a)
#type of data (datatype) is integer - numbers without decimal point -99999,999
a = 5.0 #data type is float - numbers with decimal point, -999.5, 0.0, 99.9
print("type of a in line #9 is ",type(a))
a = 5j # i in Maths - square root of -1
print("type of a in line #11 is ",type(a))
#square root of -4 = 2i
print("a*a = ",a*a) #
a=9
print("a = ",a)
#function - print(), type()
# ,
#variable - constant
# is comment - Python these are not for you. these for us
a="HELLO" #text - in python type - string str
print("type of a in line #21 is ",type(a))

 

 

a = True #boolean = True or False
#print(type(a))
print(“type of a in line #24 is “,type(a))
#compiler: C. C++ Java

#Android – STORY MANTRA – after you login
#on the home page you will see- CATEGORIES -> Technical
#Technical -> Python, R ,

print("Hello")  #fist line
print('irte834t8ejviodjgiodfg0e8ruq34tuidfjgiodafjgodafbj')

 

print(5+3)
print(‘5+3’)
print(‘5+3=’,5+3,“and 6+4=”,6+4)
#whatever is given to print() shall be displayed on the screen
#syntax – rules (grammar)
#COMMENTS

#print(), type()
#comments
#data types: int, float, str,bool, complex
#variables - will accept alphabets, numbers and _
price = int(51.9876);
quantity = 23;
total_cost = price * quantity;
print(total_cost);
print("Given price is", price,"and quantity bought is",quantity,"so total cost will be",total_cost)

# f string
print(f"Given price is {price:.2f} and quantity bought is {quantity} so total cost will be {total_cost:.2f}")

player = "Sachin"
country = "India"
position = "Opener"
print(f"{player:<15} is a/an {position:>15} and plays for {country:^15} in international matches.")
player = "Mbwangebwe"
country = "Zimbabwe"
position = "Wicket-keeper"
print(f"{player:<15} is a/an {position:>15} and plays for {country:^15} in international matches.")

#Sachin is a/an Opener and plays for India in international matches.
#Mbwangebwe is a/an Wicket-keeper and plays for Zimbabwe in international matches.

#escape sequence \
print("abcdefghijklm\nopqrs\tuv\wx\y\z")
# \n - newline

# \n is used for newline in Python
print("\\n is used for newline in Python")

# \\n is actually give you \n in Python
print("\\\\n is actually give you \\n in Python")


# Data types - 5 main types
var1 = 5
print(type(var1)) # int - integer -9999 0 5

var1 = 5.0
print(type(var1)) #float - numbers with decimal

var1 = 5j
print(type(var1)) #complex - square root of minus 1

var1 = True #False #bool
print(type(var1))

var1 = "hello" #str - string
print(type(var1))

#input() - is used to read a value from the user
num1 = float(input("Enter a number: "))
print(f"{num1} is the number")
print("Datatype of num1 is ",type(num1))

var2 = "50"

#implicit and explicit conversion

# arithmetic Operations that can be performed on
# numeric (int, float, complex): i/p and o/p both are numbers
num1 = 23
num2 = 32 #assign 32 to num2
print(num1 + num2) #addition
print(num1 - num2) #
print(num1 * num2) #
print(num1 / num2) #
print(num1 // num2) #integer division: it will give you only the integer part
print(num1 ** num2) # Power
print(num1 % num2) # mod modulus - remainder

## comparison operator : input as numbers and output will be bool
## > < == (is it equal?) != , >= <=
num1 = 23
num2 = 32
num3 = 23
print(num2 > num3) # T
print(num3 > num1) # F
print(num2 >= num3) #T - is num2 greater than or equal to num3 ?
print(num3 >= num1) # T
print(num2 < num3) # F
print(num3 < num1) # F
print(num2 <= num3) # F
print(num3 <= num1) # T
print(num2 == num3) # F
print(num3 != num1) # F

# Logical operator: and or not
# prediction 1: Sachin or Saurav will open the batting - T
# prediction 2: Sachin and Saurav will open the batting - F
# actual: Sachin and Sehwag opened the batting

#Truth table - on boolean values
# AND Truth Table:
### T and T => T
### T and F => F
### F and T => F
### F and F => F

# OR Truth Table:
### T or T => T
### T or F => T
### F or T => T
### F or F => F

# NOT
## not True = False
## not False = True

## Assignment 1: Get lenght and breadth from the user and calculate
## area (l*b) and perimeter (2(l+b))

## Assignment 2: Get radius of a circle from the user and calculate
## area (pi r square) and curcumference (2 pi radius)
#Logical operator: works on bool and returns bool only
# and: all values have to be True to get the final result as True
# or: anyone value is True, you get the final result as True
# 5 * 99 * 7 * 151 * 45 * 0 = 0
# 0 + 0 + 0 + 0+1 = 1

print(True and True and False or True or True and True or False or False and True and True or False)
num1 = 5
num2 = 8
print(num1 !=num2 and num1>num2 or num1<=num2 and num2>=num1 or num1==num2 and num1<num2)

num3 = bin(18) #0b10010
print(num3)
print("hex(18) = ",hex(18)) #0x12
print("oct(18): ", oct(18)) #0o22

print("hex(0b1101111) = ",hex(0b1101111))
print("int(0b1101111) = ",int(0b1101111))

#BITWISE Operators
# left shift (<<) / right shift (>>) operators work on only binary numbers
print("56 << 3 = ",56 << 3) #output #111000000
print(bin(56))
print(int(0b111000000)) #448

print("56 >> 4 = ",56>>7) #

# & and in bitwise
print("23 & 12 = ",23 & 12) #4
print("23 | 12 = ",23 | 12) #31
print(bin(23)) #10111
print(bin(12)) #01100
#& 00100
print(int(0b100))
# | 11111
print(int(0b11111))

# | or in bitwise

num1 = 10
#positive
#negative

# area of a circle = pi * r**2 (3.14 = pi)
# circunference = 2 * pi * r

# Conditions
avg = 30
if avg >=40:
print("Pass") #indentation
print("Congratulations!")
else: #incase of IF getting False condition
print("You have failed")
print("try again")

#avg > 90 - Grade A
#avg 80 to 90 - Grade B
# avg 70 to 80 - Grade C
#avg 60 to 70 - Grade D
#avg 50 to 60 - Grade E
#avg 40 to 50 - Grade F
#avg <40 - Grade G

avg=30
if avg>=40:
print("Pass") # indentation
print("Congratulations!")

if avg>=90:
print("Grade A")
if avg >=95:
print("You win President Medal")

elif avg>=80:
print("Grade B")
elif avg >=70:
print("Grade C")
elif avg >=60:
print("Grade D")
elif avg >=50:
print("Grade E")
else:
print("Grade F")
else:
print("You have failed")
print("try again")
print("Grade G")


avg = 90
if avg <40:
print("Grade G")
elif avg <50:
print("Grade F")
elif avg <60:
print("Grade E")
elif avg <70:
print("Grade D")
elif avg <80:
print("Grade C")
elif avg <90:
print("Grade B")
else:
print("Grade A")

print("Thank you so much")

num = 5
if num > 0:
print("Number is positive")
if num % 2 == 1:
print("Its Odd")
else:
print("Its even")
if num % 3 == 0:
print("It is divisible by both 2 and 3. It is also divisible by 6")

else:
print("Its divisible by 2 but not 3")

elif num == 0:
print("Neither Positive not negative")
else:
print("Its Negative")

#loops - repeating multiple lines of code
#Python - 2 types of loops- one when you know how many times to repeat - FOR
#repeat until some condition true WHILE
# range(a,b,c) #generates range of values - start from a, go upto b(exlusive), c=increment
#range(2,6,2) = 2,4
#range(5,9) = (2 val indicate a&b - c is default 1) => 5,6,7,8
#range(5) = (its b, a is deafult 0 and c is default 1) = ?

for i in range(3,9,2):
print("HELLO",i)

for i in range(10):
print(i*2+2,end=", ")
print("\n")
for i in range(5):
print("*",end=" ")
print("\n=================")
'''
* * * * *
* * * * *
* * * * *
* * * * *
* * * * *
'''
for j in range(5):
for i in range(5):
print("*",end=" ")
print()

#for loop
for i in range(5):
print(i)
print()

for j in range(5):
for i in range(5):
print("*",end=" ")
print()

'''
*
* *
* * *
* * * *
* * * * *
'''
for j in range(5):
for i in range(j+1):
print("*", end=" ")
print()

'''
* * * * *
* * * *
* * *
* *
*
'''
for j in range(5):
for i in range(5-j):
print("*", end=" ")
print()

num,sum = 5,0
while num <103:
sum+=num
print(num)
num+=5

print("Sum = ",sum)
i=0
while True:
i=i+1
print("Hello, i is ",i)
ch=input("Enter y to stop: ")
if ch=='y':
break
print("One more hello")
if i%5==0:
continue
print("Another hello but not to print when its multiple of five")
a,b,c = 10,12,8
if a>=b:
#either a is greater or equal
if a>=c:
print(f"{a} is greatest value")
else:
print(f"{c} is greatest value")
else:
#b is greater
if b>=c:
print(f"{b} is greatest value")
else:
print(f"{c} is greatest value")
#Assignmnt : Modify the above program to display 3 number is descending order

#loops - repeating
#FOR - how many times
#range(a,b,c) - generates value from a upto b and increasing by c
#range(5,19,4) - 5, 9,13,17
#range(4,19,5) - 6,11,17
#WHILE - repeating based on condition

############################
#Strings
str1 = 'Hello how are you'
str2 = "Im fine"
str3 = '''How are you today
are you fine
hope you feel better'''
str4 = """I am fine today
expecting to do well
I am feeling better now"""
print(str3)

print(str1 + " "+str2)
print(str2*10)
print("Lo" in str1)

#indexing: slicing dicing
print(str1[0])
print(str1[4])
print(str2[-1])
print(str1[6:9])
print("->",str1[-11:-8])
print("First 3 values: ",str1[:3])
print("last 3 values: ",str1[-3:])

#
print(str1.upper())
#string immutable - you cant edit the string
#str1[0]="K" TypeError: 'str' object does not support item assignment
str1 = "K"+str1[1:]
print(str1)
str1 = "hello how are you?"
print("last 3 characters: ",str1[-3:])
for i in str1:
print(i)

print(str1.islower())
print(str1.isupper())
print(str1.isalpha()) #
str1 = "509058585855"
print(str1.isdigit())
print(str1.isspace())
str1 = "hello how are you?"
print(str1.title())
print(str1.lower())
print(str1.upper())

str2 = "I am fine how are you doing today"
target = "aeiou"
count=0
for i in str2:
if i.lower() in target:
count+=1
print("Total vowels: ",count)

result = str1.split()
print(result)
result2 = str1.split('ow')
print(result2)
result3 = "OW".join(result2)
print(result3)

print(str1.find('o',0,5))
print(str1.replace("hello","HELLO"))
print(str1)

#strings are immutable
var1 = 5 # integer
print(type(var1))
var1 = 5.0 # float
print(type(var1))
var1 = "5" # string
print(type(var1))
var1 = 5j # complex
print(type(var1))
var1 = True # bool
print(type(var1))

#Arithematic operations
num1 = 5
num2 = 3
print(num1 + num2)
print(num1 - num2)
print(num1 * num2)
print(num1 / num2)
print(num1 // num2) #integer division
print(num1 % num2) #modulo - remainder
print(num1 ** num2) # power

##Once I had been to the post-office to buy stamps of five rupees,
# two rupees and one rupee. I paid the clerk Rs. 20,
# and since he did not have change, he gave me three more
# stamps of one rupee. If the number of stamps of each type
# that I had ordered initially was more than one,
# what was the total number of stamps that I bought.
total = 30
stamp_5_count = 2 #>=
stamp_2_count = 2 #>=
stamp_1_count = 2+3 #>=
total_by_now = stamp_5_count * 5 + stamp_2_count * 2 + stamp_1_count * 1
print(total_by_now, "is total by now")
accounted_for = total - total_by_now

stamp_5_count = stamp_5_count + accounted_for //5
accounted_for = accounted_for %5

stamp_2_count = stamp_2_count + accounted_for //2
accounted_for = accounted_for %2

stamp_1_count = stamp_1_count + accounted_for

print("You will end up getting:")
print("Number of 5 Rs stamp = ",stamp_5_count)
print("Number of 2 Rs stamp = ",stamp_2_count)
print("Number of 1 Rs stamp = ",stamp_1_count)
total_value = stamp_5_count*5 + stamp_2_count* 2+ stamp_1_count*1
print("Net difference between amount and stamp value: ",total-total_value)

#Comparison operators: < > <= >= == !=
num1 = 7
num2 = 7
print("is num1 equal to num2? ", num1==num2) #== ??
print("is num1 not equal to num2?", num1!=num2)
print("is num1 greater than num2? ", num1>num2)
print("is num1 greater than or equal to num2?", num1>=num2)
print("is num1 less than num2? ", num1<num2)
print("is num1 less than or equal to num2?", num1<=num2)

#Logical operators: and (*) or (+) not
# pred: sachin and sehwag will open the batting
# actual: sachin and sourav opened the batting - wrong

# pred: sachin or sehwag will open the batting
# actual: sachin and sourav opened the batting - right

print(True and True) #True
print(True and False) #rest is all False
print(False and True)
print(False and False)

print(True or True) #True
print(True or False) #True
print(False or True) #True
print(False or False) #False

print(not True) #

num1 = 7
num2 = 7
print("=>",num1==num2 and num1!=num2 or num1>num2 or num1>=num2 and num1<num2 and num1<=num2)
# F
# conditional check
num1 = 100
#perform check - IF condition
if num1>0:
print("Its positive")
#We use conditions when we need to control the flow of the program
avg = 88

#avg: 90 to 100: A , 80-90: B, 70-80: C , 60-70: D
#50 to 60: E, 40 to 50: F, <40: Failed
# if ... elif.. else

if avg >=90:
print("Pass")
print("Grade A")
elif avg>=80:
print("Pass")
print("Grade : B")
elif avg>=70:
print("Pass")
print("Grade : C")
elif avg>=60:
print("Pass")
print("Grade : D")
elif avg>=50:
print("Pass")
print("Grade : E")
elif avg >=40:
print("Pass")
print("Grade : F")
else:
print("Grade : Failed")

## Assignment - use Nested condition: Take 3 numbers and put them
## in increasing order:
## 14, 13 13 => 13,13,14
# 19, 39,29 => 19,29,39
avg = 94
if avg>=40:
print("Pass")
if avg >= 90:
print("Grade A")
if avg>=95:
print("You win President's medal!")
elif avg >= 80:
print("Grade : B")
elif avg >= 70:
print("Grade : C")
elif avg >= 60:
print("Grade : D")
elif avg >= 50:
print("Grade : E")
else:
print("Grade : F")
else:
print("Grade : Failed")

num1= -0
if num1>0:
print("Number is positive")
elif num1<0:
print("Number is negative")
else:
print("0 - neither positive not negative")

# FOR Loop: when you know how many times to run the loop
# range(a,b,c) : start from a (including), go upto b (exclusive), increment by c
range(3,9,2) # 3,5,7 ... 8
print("For loop example 1:")
for i in range(3,9,2):
print(i)

print("For loop example 2:")
for i in range(3, 6): #range(a,b) => c=1 (default)
print(i)

print("For loop example 3:")
for i in range(3): # range(b) => a=0 (default) c=1 (default)
print(i)
# WHILE Loop: you dont know the count but you know when to stop
#LIST:  linear ordered mutable collection

l1 = [5,9,8.5,False,"Hello",[2,4,6,"Welcome"]]
print("Length: ",len(l1))
print(type(l1))
print(type(l1[1]))
print(l1[-3])
print(l1[-2][2])
print(l1[-1][-1][-1])

l2 = [10,20,30]
print(l1+l2)

print(l2 * 3)

files = ['abc.csv','xyz.csv','aaa.csv','bbb.csv','ccc.csv','ddd.csv']
for i in files:
print("I have completed",i)

#inbuilt methods for list
print("1. L2 = ",l2)
l2.append(40)
l2.append(60) #append will add members at the end
#insert()
l2.insert(3,50)
l2.insert(3,70)
print("2. L2 = ",l2)
l2.remove(50) #remove the given element
print("3. L2 = ",l2)
l2.pop(3)
#l2.clear()
print("4. L2 = ",l2)
l2 = [5,10,15,20,25,30,35,40]
print(l2.count(5))
l1 = [100,200,300]
l1.extend(l2) # l1 = l1 + l2
l1[0] = 999
print("L1 = ",l1)
l1.sort(reverse=True)
print("A. List1: ",l1)
l1.reverse()
print("B. List1: ",l1)

l2 = l1 #copy method 1 - deep copy: adds another name l2 to l1
l3 = l1.copy() #copy method 1
print("C1. List 1: ",l1)
print("C1. List 2: ",l2)
print("C1. List 3: ",l3)
l1.append(33)
l2.append(44)
l1.append(55)
print("C2. List 1: ",l1)
print("C2. List 2: ",l2)
print("C2. List 3: ",l3)
str1 = "hello"
str2 = str1.upper()
print(str2)
print(str1)
#l1

m1= int(input("marks 1:"))
m2= int(input("marks 1:"))
m3= int(input("marks 1:"))
m4= int(input("marks 1:"))
m5= int(input("marks 1:"))
total = m1+m2+m3+m4+m5
avg = total/5
print(m1,m2,m3,m4,m5)
print("Total marks = ",total," and Average = ",avg)

total = 0
for i in range(5):
m1 = int(input("marks:"))
total+=m1
avg = total/5
print("Total marks = ",total," and Average = ",avg)

marks=[]
total = 0
for i in range(5):
m1 = int(input("marks:"))
marks.append(m1)
total+=m1
avg = total/5
print(marks[0],marks[1],marks[2],marks[3],marks[4])
for i in marks:
print(i,end=" ")
print("\nTotal marks = ",total," and Average = ",avg)
########################
####
str1 = 'HELLO'
str2 = "I am fine"
str3 = '''Where are you going?
How long will you be here?
What are you going to do?'''
str4 = """I am here
I will be here for next 7 days
I am going to just relax and chill"""
print(type(str1),type(str2),type(str3),type(str4))
print(str1)
print(str2)
print(str3)
print(str4)

# What's you name?
str5 = "What's your name?"
print(str5)
#He asked,"Where are you?"
str6 = 'He asked,"Where are you?"'
print(str6)

#He asked,"What's your name?"
#escape sequence \
print('''He asked,"What's your name?"''')
print("He asked,\"What's your name?\"")

print('nnnnn\nnn\tnn')

print("\FOlder\\newfolder")
# \n is used to print newline in python
print("\\n is used to print newline in python")

# \\n will not print newline in python
print("\\\\n will not print newline in python")

str1 = "Hello You"
str2 = "There"
print(str1 + str2)
print(str1 *5)
for i in str1:
print("Hello")

#indexing
print(str1[2])
print("last element: ",str1[4])
print("last element: ",str1[-1])
print("second element: ",str1[-8])
print("ell: ",str1[1:4])
print("ell: ",str1[-8:-5])
print("First 3: ",str1[:3])
print("First 3: ",str1[:-6])
print("Last 3: ",str1[6:])
print("Last 3: ",str1[-3:])

#Methods - exactly same as your functions - only difference is they are linked to a class
import time
str1 = "HELLO"
print(str1.replace("L","X",1))

sub_str = "LL"
str2 = "HELLO HOW WELL ARE YOU LL"
cnt = str2.find(sub_str)
print("Count = ",cnt)

if cnt<0:
print("Sorry, no matching value hence removing")
else:
print("Value found, now replacing")
for i in range(5):
print(". ",end="")
time.sleep(0.5)
print("\n")
print(str2.replace(sub_str,"OOOO"))


out_res = str2.split("LL")
print("Output Result = ",out_res)

out_str = "LL".join(out_res)
print(out_str)

print(str2.title())
print(str2.lower())
print(str2.upper())

str3 = 'hello how well are you ll'
print(str3.islower())
print(str3.isupper())

num1 = input("Enter a number: ")
if num1.isdigit():
num1 = int(num1)
else:
print("Invaid input")

ename = input("Enter your first name: ")
if ename.isalpha():
print("Your name is being saved...")
else:
print("Invaid name")

#WAP to count of vowels in a sentence
para1 = "Work, family, and endless to-do lists can make it tough to find the time to catch up. But you'll never regret taking a break to chat with your friend, Frost reminds us. Everything else will still be there later."
sum=0
for l in para1:
if l=='a' or l=='A' or l=='e' or l=='E' or l=='i' or l=='I' or l=='o' or l=='O' or l=='u' or l=='3':
sum+=1
print("Total vowesl = ",sum)
sum=0
for l in para1.lower():
if l=='a' or l=='e' or l=='i' or l=='o' or l=='u':
sum+=1
print("Total vowesl = ",sum)

sum=0
for l in para1.lower():
if l in 'aeiou':
sum+=1
print("Total vowesl = ",sum)

########## LIST
#LIST
#collection of linear ordered items
list1 = [1,2,3,4,5]
print(type(list1))
print("Size = ",len(list1))

print(list1[0])
print(list1[-1])
print(list1[3])
print(list1[:3])
print(list1[-3:])
print(list1[1:4])

for i in list1:
print(i)

print([2,3,4]+[6,4,9])
print([2,3,4]*3)

str2 = "A B C D A B C A B A "
print(str2.count("D"))
print(list1.count(3))

l1 = [2,4,6,8]
print(l1.append(12))
print(l1)
l1[0]=10
print(l1)

l1.insert(2,15)
print(l1)

# Queue: FIFO
# Stack: LIFO

if 16 in l1:
l1.remove(16) #takes in value to remove
l1.remove(15)
print(l1)
l1.pop(1) #index
print(l1)

#################
while False:
print("Queue is: ",l1)
print("1. Add\n2. Remove\n3. Exit")
ch=input("Enter your choice: ")
if ch=="1":
val = input("Enter the value: ")
l1.append(val)
elif ch=="2":
l1.pop(0)
elif ch=="3":
break
else:
print("Try again!")

while False:
print("Stack is: ",l1)
print("1. Add\n2. Remove\n3. Exit")
ch=input("Enter your choice: ")
if ch=="1":
val = input("Enter the value: ")
l1.append(val)
elif ch=="2":
l1.pop(-1)
elif ch=="3":
break
else:
print("Try again!")

l2 = l1 #they become same
l3 = l1.copy()
print("1. List1 = ",l1)
print("1. List2 = ",l2)
print("1. List3 = ",l3)

l1.append(33)
l2.append(44)
l3.append(55)

print("2. List1 = ",l1)
print("2. List2 = ",l2)
print("2. List3 = ",l3)

l1.extend(l3)
print(l1)
print(l1.count(6))

sum=0
marks=[]
for i in range(3):
m = int(input("Enter marks in subject "+str(i+1)+": "))
marks.append(m)
sum+=m
print("Sum is ",sum, "and average is ",sum/3)
print("Marks obtained is ",marks)

#THREE STUDENTS AND THREE SUBJECTS:
allmarks=[]
for j in range(3):
sum=0
marks=[]
for i in range(3):
m = int(input("Enter marks in subject "+str(i+1)+": "))
marks.append(m)
sum+=m
print("Sum is ",sum, "and average is ",sum/3)
print("Marks obtained is ",marks)

allmarks.append(marks)

print("All the marks are: ",allmarks)

# All the marks are: [[88, 66, 77], [99, 44, 66], [44, 99, 88]]
# find the highest marks of each subject

#Tuple - linear order immutable collection
#strings are also immutable

tuple1 = (1,3,1,4,1,5,1,6)
print(type(tuple1))
print(len(tuple1))
print(tuple1.count(1))
print(tuple1.index(4))
print(tuple1[2])
for i in tuple1:
print(i)
t1 = list(tuple1)
t1.append(55)
t1=tuple(t1)
t2 = (2,4,6,8) #packing
#unpacking
a,b,c,d,e = t2
print(a,b,c,d)
#packing
# Dictionary
dict1 = {1: "Sachin Tendulkar","Runs": 50000, 'City':'Mumbai','Teams':['Mumbai','Mumbai Indians','India']}
print(dict1['Teams'])
dict2 = {'100s':[50,20,1]}
dict1.update(dict2)
print(dict1)
print(dict1.values())
print(dict1.keys())
print(dict1.items())

#Dictionary are mutable
dict1.pop('City')
print(dict1)
dict1.popitem()
print(dict1)

dict3 = dict1.copy() #shallow copy
dict4 = dict1 #deep copy

all_details={}
while True:
roll = input("Enter Roll Number of the Student: ")
marks=[]
for i in range(3):
m = int(input("Enter the marks: "))
marks.append(m)
temp={roll:marks}
all_details.update(temp)
ch=bool(input("Enter null to continue: "))
if ch:
break

print("All details: ",all_details)
for i, j in all_details.items():
sum=0
for a in j:
sum+=a
print(f"Total marks obtained by {i} is {sum} and average is {sum/3:.1f}")


### SET
'''
A B C D E
D E F G H
How many total (union): 8
How many common(intersection): 2
Remove set2 values from set1: (set1 - set2): 3

'''
set1 = {2,4,6,8,10,12}  #neither have duplicate values nor there is any order
print(type(set1))
set2 = {3,6,9,12}
print(set1 | set2)
print(set1.union(set2))
#print(set1.update(set2)) #meaning union_update
#print(set1)
print(set1 & set2)
print(set1.intersection(set2))
#print(set1.intersection_update(set2))
#print(set1)
print(set1 - set2)
print(set1.difference(set2))
#print(set1.difference_update(set2))
#print(set1)
print(set1^set2)
print(set1.symmetric_difference(set2))
#print(set1.symmetric_difference_update(set2))
#print(set1)

#####
## Functions

#defining a function
def sometxt():
print("Hello")
print("how are you?")
print("I am fine thank you!")
return "Great"

print(sometxt())
a= sometxt()
print(a)

#functions that return values v doesnt return values
# function taking input arguments /  pass - parameters
def function1(x,y,z): #required positional arguments
print("Value of X: ",x)
print("Value of Y: ", y)
print("Value of Z: ", z)

def function2(x,y=15,z=30): #default positional arguments
print("Value of X: ",x)
print("Value of Y: ", y)
print("Value of Z: ", z)

def function3(x,*y,**z):
print("Value of X: ", x)
print("Value of Y: ", y)
print("Value of Z: ", z)

function3(20, 2,4,6,8,10,12,14,16,18,20, fruit="Apple",calorie=150)
a=5
b=6
c=9
function1(a,b,c) #parameters
function2(12)

function2(z=30,x=12) #keywords (non-positional)

VIDEO- Function Types Intro

#Functions

def func1(num1,num2):
print("Number 1 = ", num1)
print("Number 2 = ", num2)
add = num1 + num2
print("Addition = ",add)
return add



def func2(num1,num2=100):
print("Number 1 = ", num1)
print("Number 2 = ", num2)
add = num1 + num2
print("Addition = ",add)
return add


#variable length arguments

def alldata(num1, num2, *var1, **var2):
print("Number 1 = ",num1)
print("Number 2 = ", num2)
print("Variable 1 = ", var1)
print("Variable 2 = ", var2)

if __name__ =="__main__":
result = func1(5, 10) # required positional arguments
result = func2(5) # num1 is required / num2 is default & positional arguments
print("Result of addition is", result)

result = func2(num2=5, num1=25) # keyword arguments (non-positional)
print("Result of addition is", result)

alldata(5, 83, 12, 24, 36, 48, 60, name="Sachin", city="Pune")

# class - definition
# class str - defining some properties to it like split(), lower()
# object is the usable form of class
str1 = "hello"

#creating a class called Library
#in that I added a function called printinfo()
# self: indicates function works at object level
class Library:
#class level variable
myClassName = "Library"

#__init__ - it has predefined meaning (constructor), called automatically
#when you create an object
def __init__(self):
name = input("Init: What's your name?")
self.name = name # self.name is object level

# object level method
def askinfo(self):
name = input("What's your name?")
#self.name = name #self.name is object level

#object level method
def printinfo(self):
myClassName="Temp class"
print(f"{Library.myClassName}, How are you Mr. {self.name}?")

#create object
l1 = Library()
l2 = Library()
l3 = Library()
#l1.askinfo()
#l2.askinfo()
#l3.askinfo()

l2.printinfo()
l3.printinfo()
l1.printinfo()
Quest Learning DS December 2022

Day 1:

a=5
#data type is integer
print(a)
print(type(a))
a='''hello
how are you
I am fine'''
#type is string
print(a)
print(type(a))
a="""hello"""
#type is string
print(a)
print(type(a))

a=5.0
print(a)
print(type(a))

a = True #False
print(a)
print(type(a))

a=5j
print(a);print(type(a));print(a*a)

print("How are you?",end=" -> ")
print("I am doing good.")

quant = 40
price = 10
total_cost = quant * price
print("Product quantity is",quant,"and bought at",price,"will cost total of Rs",total_cost)
print(f"Product quantity is {quant} and bought at {price} will cost total of Rs {total_cost}")

length = 50
breadth = 20
area = length * breadth #calc
#output: A rectangle with length 50 and breadth 20 will have area of area_val and perimter of perimeter_vl

#Operators:
##Arithematic operators + - * / ** (power) // (integer division) % (reminder)
a = 10
b = 3
print(a + b)
print(a - b)
print(a * b)
print(a / b)
print(a ** b)
print(a // b)
print(a % b)

##Comparison operator: == != > >= < <=
#input as numbers and output will be boolean value
a=10
b=3
print(a==b) #F
print(a!=b) #T
print(a > b)
print(a>=b)
print(a<=b)
print(a<b)

## Logical: and or not
a = 10
b = 3
print(a >b and b !=a) # T and T = T
print( True and True)
print( False and True)
print( False and False)
print( True and False)
print(not a!=b)
print( True or True)
print( False or True)
print( False or False)
print( True or False)

#bitwise: >> << & | ~
a=23
print(bin(a)) #bin - converts to binary( 0b) oct - octal 0c hex - hexadecimal 0x
print(hex(a))

print("23 >> 1: ",23 >> 1) #bitwise: right shift
print("23 >> 2: ",23 >> 2) #bitwise: right shift
print(23 << 2) #bitwise: left shift
print(int(0b1011))
# 10111. 1011

print(" & : ",23 & 12)
# 1 0 1 1 1
# 0 1 1 0 0
#&
#--------------
# 0 0 1 0 0
# 1 1 1 1 1

print(" | : ",23| 12)

a=-5

if a < 0:
print()
print()
print()
b = 6+4
print(b)
print("Thank you 1")

a = -5
if a<0:
print("This is a negative number")
else:
print("This is not a negative number")

a=0
if a<0:
print("Negative number")
elif a>0:
print("Positive number")
else:
print("Zero value")

Video Link Day 1

number = 6

if number<0:
print("Its negative")
elif number>0:
print("its positive")
if number%2==0:
print("Even")
if number%3==0:
print("Divisible by 3 and 2 both")
else:
print("Its divisible by 2 only")
else:
print("Odd")
if number%3==0:
print("Divisible by 3 only")
else:
print("Its not divisible by either 2 or 3")
else:
print("Its zero")

########## LOOP
# FOR Loop
#range(a,b,c): a = starting value (inclusive), b=ending value(exclusive), c=increment
#range(2,8,2): 2,4,6
#range(a,b): c is default 1
#range(3,7): 3,4,5,6
#range(3): a=0, c=1 => 0,1,2
for i in range(3):
print(i)

# While Loop
ch="n"
while ch=='y':
print("I am in While")
ch=input("Input your choice: ")

for j in range(5):
for i in range(5):
print("*",end=" ")
print()

for j in range(5):
for i in range(j+1):
print("*",end=" ")
print()

for j in range(5):
for i in range(5-j):
print("*",end=" ")
print()

for j in range(5):
for i in range(5-j):
print("*",end=" ")
print()

for j in range(5):
for i in range(5-j):
print("*",end=" ")
print()

print("\n\n")
for j in range(5):
for k in range(4-j):
print(" ",end="")
for i in range(j+1):
print("*",end=" ")
print()
choice = "y"
while choice=='y' or choice=='Y':
print("Hello")
choice = input("Enter Y to continue: ")

while True:
print("Hello")
choice = input("Enter Y to continue: ")
print("Hello 2")
if choice == 'B' or choice == 'b':
continue #Take you to the beginning of loop
print("Hello 3")
if choice!='y' and choice!='Y':
break #break which will throw you out of current loop
print("Hello 4")

print("Hello 5")
val1 =input("Enter your name: ")  #reading input given by the user
print(val1)
print(type(val1))

marks1 = input("Enter your marks in subject 1: ")
marks1 = int(marks1)
marks2 = int(input("Enter your marks in subject 2: "))
marks3 = int(input("Enter your marks in subject 3: "))
sum = marks1 +marks2+marks3
print("Total marks obtained is ",sum)
avg = sum/3
print(f"{val1} has scored a total of {sum} marks with an average of {avg:.2f}")

#<class 'str'> str()
#<class 'int'> int()
#<class 'float'> float()
#<class 'complex'> complex()
#<class 'bool'> bool()

###############
choice = input("Do you want milk (Y/N): ")
if choice =='Y' or choice =='y':
print("Give milk")
print("So you want milk tea")

print("Done")
val1 = "Sachin Tendulkar"
marks1 = input("Enter your marks in subject 1: ")
marks1 = int(marks1)
marks2 = int(input("Enter your marks in subject 2: "))
marks3 = int(input("Enter your marks in subject 3: "))
sum = marks1 +marks2+marks3
print("Total marks obtained is ",sum)
avg = sum/3
print(f"{val1} has scored a total of {sum} marks with an average of {avg:.2f}")

if avg >=90:
print("Congratulations, you won President Medal")

#if avg >=40, Pass and its not then say Fail
if avg >=40:
print("Result: PASS")
else: #default condition , executed only when if is false
print("Result: FAIL")


'''
80 to 100: Grade A - IF
70 to 80: Grade B - ELIF
60 to 70: Grade C - ELIF
50 to 60: Grade D - ELIF
40 to 50: Grade E - ELIF
<40: Grade F - ELSE
'''
#avg = 90
if avg>=80:
print("Grade: A")
elif avg>=70:
print("Grade: B")
elif avg>=60:
print("Grade: C")
elif avg>=50:
print("Grade: D")
elif avg>=40:
print("Grade: E")
else:
print("Grade: F")

number = 11
if number %2==0:
print("Its an even number")
else:
print("Its an odd number")
number = int(input("Enter a number: "))
if number <0:
print("Its a negative number")
elif number >0:
print("Its a positive number")
if number %2==0:
print("Its an even number")
if number %3 ==0:
print("Its divisible by both 2 and 3")
else:
print("Its an odd number")
else:
print("Its Zero")


number = 5
if number %5==0 and number %3==0:
print("Number is divisible by both 5 and 3")
else:
print("Its neither divisible 5 nor 3")

if number %5==0:
if number%3 ==0:
print("Divisible by both 5 and 3")
else:
print("Divisible only by 5")
else:
if number%3 ==0:
print("Divisible by only 3")
else:
print("Its neither divisible 5 nor 3")


if number ==0:
print("Zero")
else:
print("Its either positive or negative")

# Loops : FOR - you know how many times (boil water for 2 min)
#range(a,b,c): a is the starting value, b is the ending value minus 1 (UPTO), c increment
#range(2,8,2): 2,4,6
#range(2,5): 2 values these are a and b, c is default =1 || 2,3,4
#range(3) : 1 value indicate b, default a=0,c=1 || 0,1,2

#WAP to generate first 10 natural numbers
for i in range(10):
if i==9:
print(i)
else:
print(i, end=', ')


#Loops: WHILE - you know until when (boil water till you see bubble)

counter = -11
while counter <=10:
print(counter)
counter+= 1 # a = a X 5 => a X= 5



# Sum of first 10 natural numbers
sum=0
for i in range(1,11):
sum+=i # sum = sum + i
print("Sum from For loop: ",sum)

# Sum of first 10 natural numbers
sum=0
counter = 1
while counter <=10:
sum+=counter
counter+=1
print("Sum from While loop: ",sum)
 
str1 = 'hello'
str2 = "hi"
str3 = '''Hello there'''
str4 = """Good evening"""
print(str4[-1])
print(str4[:4])
print(str4[1:4])
print(str4[-3:])
print(str1.count('l'))
print(str4.upper())
print(str1.upper().isupper())
num1 = input("Enter a number: ")

print(num1)
#List
list1 = [2,4,6,8.9,"Hello",True,[2,3,4]]
print(type(list1))
print(list1)
print(type(list1[-1]))
var = list1[-1]
print(list1[-1][0])
print(list1[-3:])
print(len(list1[-1]))

for i in list1:
print(i, end=" , ")
print()
l1 = [2,4,6,8]
l2 = [1,3,5,7]
print((l1+l2)*3)

l1.append(19)
print(l1)
l1.insert(2,"Hello")

l1.pop(0) #index / positive to remove
l1.remove(19) #value to remove
print(l1)
l1[1] = 18
print(l1)

l11 = l1
l21 = l1.copy()
print("1")
print("L1: ",l1)
print("L11: ",l11)
print("L21: ",l21)
l11.append(66)
l1.append(55)
l21.append(66)
print("2")
print("L1: ",l1)
print("L11: ",l11)
print("L21: ",l21)

l1.extend(l11) # l1 = l1+l2
print(l1)
#l1.sort()
l1.reverse()
print(l1)
print(l1.count(66))
print(l1.index(8))
t1 = tuple(l1)
t1 = list(t1)
t1 = (2,4,5)
n1,n2,n3 = t1

t1 = (3,)
print(type(t1))
t1 = (3)
print(type(t1))

dict1 = {55:"Sachin", "Name": "Cricket"}
word = "hello"
guess = "ll"
ind = 0
word1 = word.replace("l","L",1)
print(word1)
for i in range(word.count(guess)):
ind = word.find(guess,ind)
print(ind)
ind=ind+1

word3 = "How are you doing"
l1 = word3.split("o")
print(l1)
word4 = "o".join(l1)
print(word4)

#Strings
word = "hEllo".lower()
print(word)
display_text = "* "*len(word)
print(display_text)
while True:
guess = input("Guess the character: ")
guess = guess[0].lower()

if guess.isalpha():
if guess in word:
ind = 0
for i in range(word.count(guess)):
ind = word.find(guess, ind)
#now time to reveal
#0 - 0, 1-2, 2-4
display_text = display_text[:ind*2] + guess+display_text[ind*2+1:]
ind = ind + 1
print(display_text)

if "*" not in display_text:
print("Congratulations!")
break
else:
print("Given character is not in the original word")
else:
print("Invalid character")

#List
l1 = [2,4,6.5,"Hello",True,[2,4,6]]
l1.append(11)
l1.insert(1,"Good evening")
l1.pop(0) #removes element from the given position
l1.remove(6.5) #value

l2 = l1
l3 = l1.copy
print("Set 1: ")
print("l1 : ",l1)
print("l2 : ",l2)
print("l3 : ",l3)

print("Set 2: ")
print("l1 : ",l1)
print("l2 : ",l2)
print("l3 : ",l3)
print("######")
'''
22
12
2022

22nd December 2022
'''
month_txt = ["January","February","March","April","May","June","July","August",
"September","October","November","December"]
dt_ending = ["st","nd","rd"]+["th"]*17 +["st","nd","rd"]+["th"]*7 +["st"]
date_user = int(input("Enter Date: "))
month_user = int(input("Enter month:"))
year_user = input("Enter the year: ")
display_txt = str(date_user) +dt_ending[date_user-1]+" " + month_txt[month_user-1]+" " +year_user

print(display_txt)

l1 = [5,10,15,20,25,30]
print(len(l1))


sample = [[1,2,3,4,5],
[2,4,6,8,10],
[3,6,9,12,15]]
#dictionary: unordered collection mutable
main_dict = {}
d1 = {"name":"sachin"}
d2 = {"city":"mumbai"}
main_dict.update(d1)
main_dict.update(d2)
key="sports"
val="cricket"
temp={key:val}
main_dict.update(temp)

main_dict2 = main_dict
main_dict3 = main_dict.copy()
print("Set 1")
print("Dict 1: ",main_dict)
print("Dict 2: ",main_dict2)
print("Dict 3: ",main_dict3)
key="marks"
val=[55,44,66,77,88]
temp={key:val}
main_dict.update(temp)
print(main_dict)

print("Set 1")
print("Dict 1: ",main_dict)
print("Dict 2: ",main_dict2)
print("Dict 3: ",main_dict3)

print("Set Mem Loc")
print("Dict 1: ",id(main_dict))
print("Dict 2: ",id(main_dict2))
print("Dict 3: ",id(main_dict3))

main_dict.pop('city')
print("Dict 1: ",main_dict)
main_dict.popitem()
print("Dict 1: ",main_dict)
#keys
for i in main_dict.keys():
print(i)
#values
for i in main_dict.values():
print(i)
#items
for i,j in main_dict.items():
print(i," : ",j)
# List functions

### MAP
list1 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29]
#find cube of all these values

result = list(map(lambda x:x**3,list1))
print("Result = ",result)

### FILTER
result = filter(lambda x:x>=15,list1)
print("Filtered values: ",list(result))

### REDUCE
from functools import reduce
result = reduce(lambda x,y:x+y,list1)
print("Sum is ",result)

result = reduce(lambda x,y:x+y,[1,2,3,4,5,6,7])
print("Sum is ",result)

# 1,2,3,4,5,6,7 =
## 1. x=1, y=2 , x+y = 3
## 2. x=3, y=3, x+y=6
## 3. x=6, y=4, x+y = 10
## 4. x=10, y=5 = 15
## 5. x=15, y=6 = 21
## 6. x=21,y=7 = 28

#how to connect to the database from Python
#SQLITE3 - installed on local machine, nobody can connect from outside
import sqlite3

con_str = sqlite3.connect("classnotes.db")
cursor = con_str.cursor()
q1 = '''Create table Notes(
ID int primary key,
description text,
subject varchar(30))'''
#cursor.execute(q1)

q2 = '''Insert into Notes(ID, Description, Subject)
values(2,"This is a sample Maths notes to perform sone action",'MATHS')'''
#cursor.execute(q2)

q4 = '''UPDATE Notes set subject='Science' where ID=2 '''
cursor.execute(q4)
q4 = '''DELETE From Notes where ID=1 '''
cursor.execute(q4)

con_str.commit()
q3 = '''Select * from Notes'''
recordset = cursor.execute(q3)
#print(list(recordset))
for i in recordset:
for j in i:
print(j,end=" ")
print()

con_str.close()

##########
a=10
b=10
c =a/b
print("A/B = ",c) #ZeroDivisionError: division by zero
num1 = 0
num2 = 0
try:
num1 = int(input("Enter a number: "))
num2 = int(input("Enter another number: "))

except ValueError:
#print("We cant proceed further because input is not valid")
print("Invalid input, setting both the numbers to zero")

finally:
sum = num1 + num2
print("Sum is ", sum)
print("Thank you for using this program. See you soon")


#ValueError: invalid literal for int() with base 10: '8t'
Digitalfirm Nov 2022

Day 1:

Installation:

Python:  https://learn.swapnil.pw/python/pythoninstallation

 Pycharm:  https://learn.swapnil.pw/python/pythonides

 R & RStudio:   https://swapnil.pw/uncategorized/installating-r-studio

https://www.mckinsey.com/featured-insights

 

print("terterterg feererg eryey erytey eytytyt",end='\n')
print(10+5);print("10+5+3+1");print(10+5+3+1)
print("10+5+3+1 =",10+5+3+1, "and 5 + 4 + 95 =", 5+4+95);

# \n is used to move the content to new line
print('Hello',end='. ')
print("How are you",end='. ')
print("Thank you",end='\n') #I am printing thank you here
'this is a sample content print("Thank you")'
'''
multiline
text

'''
print(''' this is a sample text''')
print("\n is use for newline") #printing text is 2 different lines
print("\\n is use for newline") # \\ will be read as \

########################################
num1 = 18+4+2+10
print(num1)
print(num1)

print(num1)
print(num1)

quantity = 13
cost = 19
total = quantity * cost
print(total)
#output: The cost of each pen $19 so the total cost of 17 pens will be $323
print("The cost of each pen $",cost,"so the total cost of",quantity,"pens will be $",total)
#using f string - use variables within strings - variable should be in {}
print(f"The cost of each pen ${cost} so the total cost of {quantity} pens will be ${total}")

# basic variables
#integer
#float
#string
#bool
#complex
#data types
a = 50 #int - numbers without decimal
print("a = 50 => ", type(a))
a = 50.0 #float - numbers decimal
print("a = 50.0 => ", type(a))
a = "50" #str - text
print("a = '50' => ", type(a))
a = True # or False - 2 values , bool
print("a = True => ", type(a))
a = 5j #j is square root of -1
print("a = 5j => ", type(a))
print(a*a)

#Operators
print("Arithematic operations")
a=5
b=8
print(a+b)
print(a-b)
print(a*b)
print(b/a) #float as output
print(a**b) #** power/exponential
print(a//b) #integer division
print(b//a) #integer division
print(a%b) # 5
print(b%a)

print("Conditional Operators")
#input as integer/float - output will be bool
a=8 #assignment, assigning the value 5 to a
b=8
print(a>b) #is a greater than b
print(a<b)
print(a>=b)
print(a<=b)
print(a==b) # == asking a question
print(a!=b) #is a not equal to b
#Logical operators: I/P: Bool and O/P: Bool
# P1: Sachin and Dravid will open the batting
# P2: Sachin or Dravid will open the batting
# A: Sachin and Sehwag opened the batting -
#AND - even one condition is FALSE - entire thing would be False
#OR - even one condition is TRUE - entire thing would be TRUE
a = 10
b = 20
print("a>b or b>a and b!=a: ",not(a>b or b>a and b!=a))
print("not b==a: ",not b==a)
# TRUE

#membership: in
l1 = [3,4,5,6,7]
print(l1)
print(type(l1))
print(5 not in l1)

#convert into different number systems
a= 10 #integer - decimal number system
print(bin(a))
b=0b10
print("b = ",int(b))
#hex for hexadecimal (0x5050) and oct for octal (0o) - number systems
print(oct(b))
print(hex(b))
###### example 1
marks1 = 89
marks2 = 90
marks3 = 56
marks4 = 67
marks5 = 78
sum = marks1 + marks2+marks3 + marks4 + marks5
avg = sum/5
print(f"Total marks obtained in 5 subjects is {sum} and average is {avg} %")

###### example 2
marks1 = input("Enter marks in Subject 1: ")
marks1 = int(marks1)
marks2 = int(input("Enter marks in Subject 2: "))
marks3 = 56
marks4 = 67
marks5 = 78
sum = marks1 + marks2+marks3 + marks4 + marks5
avg = sum/5
print(f"Total marks obtained in 5 subjects is {sum} and average is {avg} %")

# Conditional statements
if avg>=50:
print("You have passed")
print("In IF")
print("Hello")
print("hi")
else:
print("You have failed")

print("Thank you")
#
##Assignment 1: Input a number from the user and Check if the number is positive or not
##Assignment 2: Input number of sides from the user and check if its triangle or not
n=4  #s2 = 0 s1 =5
if n<3:
print("Invalid Shape")
elif n==3:
print("Its a triangle")
elif n==4:
s1 = int(input("Enter length: "))
s2 = int(input("Enter breadth: "))
if s1==s2:
print("Its a square")
if s1==0:
print("Area is not possible")
else:
print("Area is: ",s1*s2)
else:
print("Its a rectangle")
if s1==0:
print("Area is not possible")
else:
if s2==0:
print("Area is not possible")
else:
print("Area is: ",s1*s2)

elif n==5:
print("Its a pentagon")
s1 = int(input("Enter length: "))
s2 = int(input("Enter breadth: "))
if s1 == s2:
print("Its a square")
if s1 == 0:
print("Area is not possible")
else:
print("Area is: ", s1 * s2)
else:
print("Its a rectangle")
if s1 == 0 or s2==0:
print("Area is not possible")
else:
print("Area is: ", s1 * s2)
elif n==6:
print("Its a hexagon")
s1 = int(input("Enter length: "))
s2 = int(input("Enter breadth: "))
if s1 == s2:
print("Its a square")
if s1 == 0:
print("Area is not possible")
else:
print("Area is: ", s1 * s2)
else:
print("Its a rectangle")
if s1 == 0:
print("Area is not possible")
elif s2==0:
print("Area is not possible")
else:
print("Area is: ", s1 * s2)
elif n==7:
print("Its a heptagon")
elif n==8:
print("Its an octagon")
else:
print("Its a complex shape")


#Assignment: Program to find sum and avg of 5 marks
# and assign grade on the basis on:
# avg > 90: A
#avg >75: B
#avg >60: C
#avg >50: D
#avg >40: E
#avg<40: F
#Loops- repeat given block of code

#For loop - exactly how many times to repeat
for i in range(1,10,2): #range(start - included,end-excluded,increment): 1,3,5,7,9
print(i,":Hello")

for i in range(3, 6): # range(start - included,end-excluded,increment=1): 3,4,5
print(i, ":Hello")

for i in range(3): # range(start=0,end-excluded,increment=1): 0,1,2
print(i, ":Hello")
sum=0
for i in range(5):
marks = int(input("Enter marks: "))
sum+=marks
avg = sum/5

for i in range(5):
print("*",end=" ")
print()
'''
* * * * *
* * * * *
* * * * *
* * * * *
* * * * *
'''
for j in range(5):
for i in range(5):
print("*",end=" ")
print()
print("------------------\n")
'''
* * * * *
* * * *
* * *
* *
*
'''
for j in range(5):
for i in range(5-j):
print("*",end=" ")
print()
print("------------------\n")

'''
*
* *
* * *
* * * *
* * * * *
'''
for j in range(5):
for i in range(1+j):
print("*",end=" ")
print()
print("------------------\n")

'''
*
* *
* * *
* * * *
* * * * *
'''
#While
i=0
while i<5:
print("Hello")
i+=1

#adding 2 numbers till user says yes
ch='y'
while ch=='y':
a=30
b=50
print("Sum is 80")
ch=input("type y to continue, anyother key to stop: ")



#rewriting same program using While True
while True:
a = 30
b = 50
print("Sum is 80")
ch = input("type y to continue, anyother key to stop: ")
if ch!='y':
break

#lets write a program to print addition of 2 numbers only when they are even
#otherwise ignore, continue till user wants

while True:
n1 = int(input("Enter first number: "))
if n1%2==1:
continue #continue will take you the beginning of the loop
n2 = int(input("Enter second number: "))
if n2 % 2 == 1:
continue
sum = n1 + n2
print("Sum is ",sum)
ch=input("Hit enter to continue, anyother key to stop: ")
if len(ch)!=0:
break #break will throw you out of the loop

Assignments

1.     # Assignment 1: Modify the Total Avg marks calculation program to do it for 5 students
# Assignment 2: Modify your Voting program (eligible to vote or not) to a repeat it for multiple input until
# user wants to continue

  1. Write a Python program that computes the factorial of an integer.
  2. Program to find sum N natural numbers
  3. Write code to display and count the factors of a number
  4. Program to check if eligible to vote in India
  5. Enter marks of 3 subjects for 5 students and grade them. Check for data validity and use BREAK and CONTINUE where necessary
  6. Check the type of a Triangle: Isosceles, Equilateral, Scalene, Right Angle
  7. Input 3 numbers and re-arrange them in ascending order. Use BOOLEAN
#STRINGS
name1 = "Sachin"
#first character
print(name1[0]) #0 is for first character
print(name1[2]) #3rd character
size = len(name1)
print(name1[size-1]) #last character
print(name1[-1]) #last character
print(name1[1:4]) #2,3,4 th characters
print(name1[:3]) #no val on left of : means its zero
print(name1[3:6]) #last 3 characters
print(name1[size-3:size]) #last 3 characters
print(name1[-6:-3]) #first 3 characters
print(name1[-size:3-size]) #first 3 characters
print(name1[-3:]) #last 3 char - no val on right of :mean go till last

print("For loop")
for i in name1:
print(i)

for i in range(len(name1)):
print(f"the chracter at the index {i} is {name1[i]}")

for i in enumerate(name1):
print(i)

for i,j in enumerate(name1):
print(f"the chracter at the index {i} is {j}")

print("S" in name1)
name2 = "Tendulkar"
print(name1 + " " + name2)

print((name1 +" ")* 4)
#STRINGS
name1 = "Sachin"
#first character
print(name1[0]) #0 is for first character
print(name1[2]) #3rd character
size = len(name1)
print(name1[size-1]) #last character
print(name1[-1]) #last character
print(name1[1:4]) #2,3,4 th characters
print(name1[:3]) #no val on left of : means its zero
print(name1[3:6]) #last 3 characters
print(name1[size-3:size]) #last 3 characters
print(name1[-6:-3]) #first 3 characters
print(name1[-size:3-size]) #first 3 characters
print(name1[-3:]) #last 3 char - no val on right of :mean go till last

print("For loop")
for i in name1:
print(i)

for i in range(len(name1)):
print(f"the chracter at the index {i} is {name1[i]}")

for i in enumerate(name1):
print(i)

for i,j in enumerate(name1):
print(f"the chracter at the index {i} is {j}")

print("S" in name1)
name2 = "Tendulkar"
print(name1 + " " + name2)

print((name1 +" ")* 4)
# String methods
val1 = "Sachin 10Dulkar"
print(val1.isalnum())
print(val1.islower())
print(val1.istitle())
val2 = "12345"
print(val2.isdigit())
#lower upper title
print("Second set of functions")
val3 = "how ARE you doiNG todaY?"
print(val3.upper())
print(val3.lower())
print(val3.title())
#find
txt_to_search = "Are"
val4 = val3.lower()
print(val4.find(txt_to_search.lower()))
print(val3.replace("ARE","is"))
val3 = val3.lower().replace("are","is")
print(val3)

#split and join
val3 = "how ARE you are doiNG todaY?"
print(val3.split())
val4 = "HOW|ARE|YOU|DOING|TODAY"
print(val4.replace("|"," "))
val4_list = val4.split("|")
val6_str = " ".join(val4_list)
print(val6_str)
val7 = " how ARE you doiNG todaY? "
val7_strip = val7.strip()
print(val7_strip)
val_cnt = val3.lower().count("area")
print(val_cnt)
# LIST
str1 = "Hello"
print(str1[1])
# str1[1] = "Y" #this is not possible
# strings are called as immutable data types
list1 = [50, 4, 5.5, "Hello", True]
print(type(list1))
print(len(list1))
print(list1[3][2])
print(type(list1[3]))

for i in list1:
print(i)
for i in range(len(list1)):
print(list1[i])

l1 = [1, 2, 3, 4]
l2 = [10, 20, 30]
l3 = l1 + l2
print("Adding two list: ", l3)
print("Multiply: ", l2 * 3)
print(30 not in l2)

print(l2[2])
l2[2] = "Thank You" # lists are mutable
print(l2[2])

sum = 0
marks = []
for i in range(0):
m1 = int(input("Enter marks: "))
sum += m1
marks.append(m1)

print("Total marks", sum)

# append will add at the end
# insert - pos and value: value is added at the given pos
marks.insert(2, 11) # [11
marks.insert(2, 22) # [11,22]
marks.insert(2, 33) # [11,22,33]
marks.insert(2, 44) # [11,22,44,33]
marks.insert(2, 55) # [11,22,55,44,33]
marks.insert(2, 66) # [11,22,66,55,44,33]
marks.insert(2, 77) # [11,22,77,66,55,44,33]
# marks[7] = 100 - error since index 7 isnt there
print("Marks obtained are: ", marks)
# pop - removes from the given position
# remove - removes given value
val_remove = 77
if val_remove in marks:
marks.remove(val_remove)
else:
print("Value is not present in the list")
print("Marks obtained are: ", marks)
pos_remove = 2
if pos_remove < len(marks):
marks.pop(pos_remove)
else:
print("List doesnt have that index")

print("Marks obtained are: ", marks)
marks.clear()
print("Marks obtained are: ", marks)

Assignment


 

Assignments

1. Write a Python program to sum all the items in a list.

2. Write a Python program to multiplies all the items in a list.

3. Write a Python program to get the largest number from a list.

4. Write a Python program to get the smallest number from a list.

5. Write a Python program to count the number of strings where the string length is 2 or more and the first and last character are same from a given list of strings. 

Sample List : [‘abc’, ‘xyz’, ‘aba’, ‘1221’]

Expected Result : 2

6. Write a Python program to get a list, sorted in increasing order by the last element in each tuple from a given list of non-empty tuples. 

Sample List : [(2, 5), (1, 2), (4, 4), (2, 3), (2, 1)]

Expected Result : [(2, 1), (1, 2), (2, 3), (4, 4), (2, 5)]

7. Write a Python program to remove duplicates from a list.

8. Write a Python program to check a list is empty or not.

9. Write a Python program to clone or copy a list.

10. Write a Python program to find the list of words that are longer than n from a given list of words.

11. Write a Python function that takes two lists and returns True if they have at least one common member.

12. Write a Python program to print a specified list after removing the 0th, 4th and 5th elements.

Sample List : [‘Red’, ‘Green’, ‘White’, ‘Black’, ‘Pink’, ‘Yellow’]

Expected Output : [‘Green’, ‘White’, ‘Black’]

13. Write a Python program to generate a 3*4*6 3D array whose each element is *.

14. Write a Python program to print the numbers of a specified list after removing even numbers from it.

15. Write a Python program to shuffle and print a specified list.

16. Write a Python program to generate and print a list of first and last 5 elements where the values are square of numbers between 1 and 30 (both included).

17. Write a Python program to generate and print a list except for the first 5 elements, where the values are square of numbers between 1 and 30 (both included).

18. Write a Python program to generate all permutations of a list in Python.

19. Write a Python program to get the difference between the two lists.

20. Write a Python program access the index of a list. 

DAY 8

 

def myreverse(a):
print("A = ", a)
a.reverse()
return a[0]


list1 = [50, 4, 5.5, "Hello", True]
list2 = [90, 20, 50, 40, 30, 70]
list2.reverse()
# print(list2)
print("Myreverse: ", myreverse(list2))
print(list2.reverse())
list2.sort()
# print(list2)
list1.extend(list2)
print("New set: ", list1)
a = "5"
# print(int(a))
print("Learning COPY")
list2 = [90, 20, 50, 40, 30, 70]
list3 = list2 # shallow copy
list4 = list2.copy() # deep copy
print("list2: ", list2)
print("list3: ", list3)
print("list4: ", list4)
list2.append(10)
print("list2: ", list2)
print("list3: ", list3)
print("list4: ", list4)

print("Stack Implementation")
list_master = []
while True:
print("1. Add to the stack")
print("2. Remove from the stack")
print("3. Clear the stack")
print("4. Quit")
op = int(input("Enter your option"))
if op == 1:
val = int(input("Enter the element to add: "))
list_master.append(val)
print("After adding list: ", list_master)
elif op == 2:
if len(list_master) > 0:
list_master.pop(-1)
print("After adding list: ", list_master)
else:
print("List is empty!")
elif op == 3:
list_master.clear()
print("After adding list: ", list_master)
elif op == 4:
print("Thank you for using the program.")
break
else:
print("Invalid option, Try again!")

############### LIST #######################

# TUPLE
t2 = ()
t3=(55,)
t1 = (5, 4, 6, 8, 4)
print(type(t1))
t1 = list(t1)
print(t1.count(8))
# Dictionary
# list: linear ordered mutable collection
# tuple: linear ordered immutable collection
# dictionary: non-linear ordered mutable collection (unordered untill 3.7)
# dictionary is made up of key & value
dict1 = {} # empty dictionary
print(type(dict1))
dict1 = {"Name": "Sachin", "City": "Mumbai", "Runs": 12900, "IsPlaying": False}
print(dict1)
print(dict1["City"])
print(dict1.get("City"))
val = "India"
key = "Country"
t_dict = {key: val}
dict1.update(t_dict)
print("Dictionary after Update: \n", dict1)
print("Size of dictionary: ", len(dict1))

print("keys:", dict1.keys())
for i in dict1.keys():
print(i)
for i in dict1.values():
print(i)
for i in dict1.keys():
print(dict1[i])

for i, j in dict1.items():
print(i)

if "City" in dict1: # default it checks in keys
print("We have City")
if "Mumbai" in dict1.values():
print("We have City Mumbai")
else:
print("Mumbai is not there")

print("Dict1: ", dict1)
dict1.pop("City") # key as input
print("Dict1: ", dict1)
dict1.popitem() # key as input
print("Dict1: ", dict1)
print(type(dict1.values()))
dict1.pop(list(dict1.keys())[list(dict1.values()).index("Sachin")])
# print()
print("Dict1: ", dict1)
#Set - also mutable
set1 = {"New York"}
print(type(set1))
set1.add("Chicago")

#update
#union
s1 = {1,3,5,7,2,4}
s2 = {2,4,6,8}
print("Union: ",s1.union(s2))
print("Union: ",s1 | s2)
#s1.update(s2)
#print("Union Update: ",s1)

#difference
print("difference :",s1-s2)
print("difference :",s2-s1)
print("Difference: ",s1.difference(s2))
#s1.difference_update(s2)
#print("Difference update: ",s1)
print("Symmetric Difference: ", s1 ^ s2)


#intersection
print("intersection :",s1.intersection(s2))
print("intersection: ",s1 & s2)
print("intersection update: ",s1.intersection_update(s2))
print(s1)
s1.intersection_update(s2)
print("intersection update: ",s1)

print(set1)
l1 = [5,10,10,15,15,15,20,20,25]
l1 = list(set(l1))
print(l1)

### Functions



def myfunc1(name): #which takes ONE input argument and doesnt return anything
print("Hello ",name)
print("How are you?")
print("Where are you going?")

def myfunc2(name): #which takes ONE input argument and doesnt return anything
print("Hello ",name)
print("How are you?")
print("Where are you going?")
return "Thank You", "Bye"

def myfunc(): #this is an example which doesnt take any input argument and doesnt return
print("Hello")
print("How are you?")
print("Where are you going?")

myfunc()
print("second time: ")
myfunc1("Kapil") #1 required positional argument: 'name'
print(myfunc2("Sachin"))


#functions

#required positional arguments
## Function definition
def calculate(a,b):
print("Value of a is ",a)
print("Value of b is ",b)
sum = a+b
diff = a-b
mul = a*b
div = a/b
return sum,diff,mul,div

## Function definition - Default argument
def calculate1(a,b=50):
print("Value of a is ",a)
print("Value of b is ",b)
sum = a+b
diff = a-b
mul = a*b
div = a/b
return sum,diff,mul,div

result = calculate(30,20)
print("Addition of given 2 values is ", result[0])
result = calculate1(30,5)
print("Addition of given 2 values is ", result[0])
result = calculate1(30)
print("Addition of given 2 values is ", result[0])
#non positional
result = calculate1(b=30,a=5) #nonpositional => Keyword arguments
print("Addition of given 2 values is ", result[0])

#variable name arguments
def mycalculation(a,c,*b,**d): # * takes multiple values
print("A = ",a)
print("B = ", b)
print("C = ", c)
print("D = ", d)

mycalculation(5,6,7,8,9,10,11,name="Sachin", runs=5000)

def check_prime(a):
result1 = True
for i in range(2,a//2):
if a%i == 0:
result1 = False
break
return result1

result = check_prime(1100)
if result:
print("Its a prime number")
else:
print("Its not a prime number")

#generate prime numbers between 500 and 1000
prime_num = []
for i in range(500,1001):
if check_prime(i):
prime_num.append(i)
print("Prime numbers are: ",prime_num)
#Recursive function
def myfunc(number):
if number <1:
return 0
print(number)
myfunc(number-1)

def factorial(n):
if n<1:
return 1
return n * factorial(n-1)


if __name__ =="__main__":
myfunc(100)
# 5! = 5 * 4 * 3 * 2 * 1!
fact = factorial(5)
print("Factorial is ",fact)
class Person:
population = 0
def welcome(self,name):
self.name = name
print("Welcome to the world")
Person.population+=1

def display(self):
print("Welcome to ",self.name)
print("Total Population: ",Person.population)

p1 = Person()
p1.welcome("Sachin")
p3 = Person()
p3.welcome("Laxman")

p2 = Person()
p4 = Person()
p5 = Person()
p2.welcome("Rekha")
p4.welcome("Geeta")
p5.welcome("Rohit")
p3.display()
p4.display()
class Person():
def __method1(self):
print("Method 1")
def _method2(self):
print("Method 2")
def method3(self):
print("Method 3")
self.__method1()
class Student(Person):
def read(self):
print("I am studying")

p1 = Person()
#p1.__method1() - private members cant be called
p1.method3()
p1._method2()

s1 = Student()
s1.read()
s1.method3()

# public
#protected _ :practically its like public, but theoritically it cant be accessed outside the class
#private __ : members can not be used outside the class

19 DEC 2022

#Class
str1="555"
print(type(str1))
str2 = "Good day"
print(str1.upper())

class Apple:
loc = "World"
def getvalue(self,name):
self.name = name
def display(self):
print(f"I am {self.name}")
@classmethod
def setaddress(cls):
cls.loc = "Universe"
@classmethod
def address(cls):
print(f"Location: {cls.loc}")

a1=Apple()
a1.getvalue("Sachin")
a2=Apple()
a2.getvalue("Kapil")
a3=Apple()
a3.getvalue("Laxman")
a4=Apple()
print(type(a1))
a1.display()
a1.setaddress()
a2.address()

class MySum:
def getval(self):
self.num1 = int(input("Enter value 1: "))
self.num2 = int(input("Enter value 2: "))
def printsum(self):
self.sum = self.num1 + self.num2
print("Sum of the values: ",self.sum)

m1 = MySum()
m1.getval()
m1.printsum()
class Employee:
population = 0

def __init__(self,name,age, salary):
self.name = name
self.age = age
self.__salary = salary
Employee.population +=1

def edit_details(self,name,age,salary):
self.name = name
self.age = age
self.__salary = salary
def _getsalary(self):
return self.__salary

@classmethod
def display_pop(cls):
print("Total Count of Objects = ",cls.population)

p1 = Employee("Sachin",48,1500) # this is calling __init__()
p2 = Employee("Virat", 29,1400)
p3 = Employee("Rohit", 29,1300)
#print(p1.__salary)
p1.display_pop()
print(p1._getsalary())
#print(p2.getsalary())
#print(p3.getsalary())

'''
Encapsulation:
access modifiers:
3 types: Public (variablename), Private (__variablename) -only to class, Protected (_variablename)
'''
word = "hello"
guess = "ll"
ind = 0
word1 = word.replace("l","L",1)
print(word1)
for i in range(word.count(guess)):
ind = word.find(guess,ind)
print(ind)
ind=ind+1

word3 = "How are you doing"
l1 = word3.split("o")
print(l1)
word4 = "o".join(l1)
print(word4)

#Strings
word = "hEllo".lower()
print(word)
display_text = "* "*len(word)
print(display_text)
while True:
guess = input("Guess the character: ")
guess = guess[0].lower()

if guess.isalpha():
if guess in word:
ind = 0
for i in range(word.count(guess)):
ind = word.find(guess, ind)
#now time to reveal
#0 - 0, 1-2, 2-4
display_text = display_text[:ind*2] + guess+display_text[ind*2+1:]
ind = ind + 1
print(display_text)

if "*" not in display_text:
print("Congratulations!")
break
else:
print("Given character is not in the original word")
else:
print("Invalid character")

class Book:
book_count = 0
def __init__(self, author, title, book_id):
self.author = author
self.title = title
self.book_id = book_id
Book.book_count+=1

def getbook(self):
print(f"{self.title} is written by {self.author}")

@classmethod
def getBookCount(cls):
print("Total books available: ",cls.book_count)

book1 = Book('Swapnil Saurav','Learn and Practice Python', 9012)
book1.getbook()
book1.getBookCount()
#Inheritance
class School:
def __init__(self,schoolname):
self.schoolname = schoolname

def _displaydetails(self):
print("School name is ",self.schoolname)

class Student (School):
def __init__(self,stname, schoolname):
School.__init__(self,schoolname)
self.stname = stname

def displaydetails1(self):
print("Student name is ",self.stname)
def displaydetails1(self,name):
print("Student name is ",self.stname)

def displaydetails1(self,name,age):
print("Student name is ", self.stname)

class Teacher (School):
def __init__(self,tname):
self.tname = tname

def displaydetails(self):
print("Teacher name is ",self.tname)

sc1 =School("ABC International School")
st1 = Student("Sachin Tendulkar","XYZ International School")
t1 = Teacher("Kapil Dev")
sc1._displaydetails()
st1.displaydetails1()
t1.displaydetails()

'''
Public
Protected _var (single underscore)
Private __var (double underscore)
'''
#1. declare a class calc
#2. initialize functon to read 3 variables
#3. create another method to calculate: sum. multiply minus
#4. Display the result using another method
#5. Create another class to perform arithematic operators
## that you have learnt in Python: + - * / % ** //

class Calc:
def __init__(self,a,b,c):
self.n1 = a
self.n2 = b
self.n3 = c
self.add = "Addition not yet done"
self.mul = "Multiplication not yet done"
self.min = "Difference not yet done"

def calc(self):
self.add = self.n1 + self.n2
self.mul = self.n1 * self.n2
self.min = self.n1 - self.n2

def display(self):
print("Sum = ",self.add)
print("Multiplication = ",self.mul)
print("Difference = ",self.min)

class Arithmatic(Calc):
def __init__(self,a,b,c):
Calc.__init__(self,a,b,c)
self.n1 = a
self.n2 = b
self.n3 = c
self.div = "Division not yet done"
self.mod = "Modulus not yet done"
self.pow = "Power not yet done"
self.intdiv = "Integer Division not yet done"

def calc(self):
Calc.calc(self)
self.div = self.n1 / self.n2
self.mod = self.n1 % self.n2
self.pow = self.n1**self.n2
self.intdiv = self.n1 // self.n2

def display(self):
Calc.display(self)
print("Division = ",self.div)
print("Modulus = ",self.mod)
print("Power = ",self.pow)
print("Integer Division = ", self.intdiv)

c1 = Arithmatic(10,5,12)
c1.calc()
c1.display()

c2 = Calc(3,4,6)
c2.calc()
c2.display()

TYPES OF FUNCTION

 

def myfun1(a,b):
'''Example of Required Positional Argument'''
print(f"a is {a} and b is {b}")
sum = a + b
print("Sum: ",sum)
#return sum

def myfun2(a=16,b=6):
'''Example of Default Positional Argument'''
print(f"a is {a} and b is {b}")
sum = a + b
print("Sum: ",sum)
#return sum

def myfun3(a,*b,**c): #variable length arguments
print("a = ",a)
print("b = ", b) # * means tuple
print("c = ", c) # **- dictionary


myfun3(50,5,6,7,8,9,9,11,14,name="sachin",game ="Cricket")


#Keyword argument
n1,n2=14,26
print(myfun2(a=n2,b=n1))
result = myfun2(b=34)


n1,n2=14,26
print(myfun2(n1,n2))
result = myfun2(34)
#result*=2
print(result)

n1,n2=54,66
print(myfun1(n1,n2))
result = myfun1(34,76)
#result*=2
print(result)

#Types of functions based on input parameter:
## 1. Required positional arguments: YOu have to provide value and in same order (left to right)
## Default (positional) arguments
import os
print(os.name)
if os.name=="nt":
print("Its a Windows machine")
elif os.name=="posix":
print("its a Linux/Mac")
else:
print("Other OS")

print(os.getcwd())
#os.rmdir("Nov_2")
#os.rename("file1.txt", "file1dec.txt")
print("iterate in folder:")
from pathlib import Path
path_list = Path("C:\\Users\\Hp\\Poems\\")
for i in path_list.iterdir():
print(i)
os.mkdir("Test2")

fp= open(r"C:\Users\Hp\Poems\Poem1.txt","r") #r for read w for write a append
content = fp.read(200)
print(type(content))
print(content)
fp.seek(0)
content = fp.readline(500)
print(type(content))
print(content)

content = fp.readlines()
print(type(content))
print(content[4])
fp.close()
fp1 = open(r"C:\Users\Hp\Poems\testCopy\sample.txt","a")
if fp1.writable():
fp1.writelines(content)

fp1.close()
#Numpy
import numpy as np
x = range(16)
# range: 0 to upto 16 - 0...15
x = np.reshape(x,(4,4))
print(type(x))
print(x)
size = x.shape
print("Total rows = ",size[0])
print("Total columns = ",size[1])

#indexing
print(x[1,2])
print(x[3,1])
print(x[0,:])
print(x[:,0])
print(x[1:3,1:3])

#
x = np.zeros((3,3))
print(x)
x = np.ones((3,3))
print(x)
x = np.full((3,3),99)
print(x)

x = np.random.random((3,3))
print(x)

l1 = [[5,10,15],[9,10,11],[2,3,1]]
print(type(l1))
l1 = np.array(l1, dtype=np.int8)
print(l1)
print(type(l1))
l2 = np.array([[3,6,9],[7,14,21],[2,4,6]])
print(l2)

#addition
print(l1 + l2)
print(np.add(l2,l1))

print(l1 - l2)
print(np.subtract(l2,l1))

print(l1 / l2)
print(np.divide(l2,l1))

print("==========================")
print(l1,"\n",l2)
print(l1 @ l2)
print(np.matmul(l2,l1))

for i in l1.flat:
print(i)

x = np.identity(6)
print(x)
print("Printing l1:\n",l1)
print("Printing Transpose of l1:")
l1_t = np.transpose(l1)
print(l1_t)

l1_det = np.linalg.det(l1)
print("Determinant of L1 is ",l1_det)
l1_inv = np.linalg.inv(l1)
print("Inverse of L1 is ",l1_inv)

#Singular matrix have determinant zero so we cant find inverse of that matrix

# 2x-3y = 8
# 3x-4y = 12
# what is x & y?
# Numpy to solve linear algebra
# 2x +5y + 2z = -38
# 3x - 2y + 4z = 17
# -6x +y -7z = -12
import numpy as np
Coeff = [[2,5,2],[3,-2,4],[-6,1,-7]]
Coeff_mat = np.array(Coeff)
Coeff_det = np.linalg.det(Coeff_mat)
if Coeff_det ==0:
print("There are no possible solution for given equations")
else:
Const = [[-38],[17],[-12]]
Coeff_inv = np.linalg.inv(Coeff_mat)
sol = np.matmul(Coeff_inv,Const)
print("Solution is: \n",sol)
print(f"x={sol[0,0]}, y={sol[1,0]}, z={sol[2,0]}")
#SETS
set1 = {1,5,9,10,20}
print(type(set1))
set1.add(22)
print(set1)

set2 = set1 #deep copy - set2 and set1 will point to same location in memory
set3 = set1.copy() #shallow copy - create a duplicate copy
print("printing 1: ")
print("Set 1: ",set1)
print("Set 2: ",set2)
print("Set 3: ",set3)
set2.add(25)
set2.add(29)

print("printing 2: ")
print("Set 1: ",set1)
print("Set 2: ",set2)
print("Set 3: ",set3)
print("Set 1: ",id(set1))
print("Set 2: ",id(set2))
print("Set 3: ",id(set3))

#union, intersection, difference, symmetric difference
Set2 = {1, 20, 5, 22, 9, 10, 29, 25}
Set3 = {1, 20, 5, 22, 9, 10,31,35}
print(Set2.union(Set3))
print(Set2 | Set3)

print(Set2.intersection(Set3))
print(Set2 & Set3)

print(Set2.difference(Set3))
print(Set3 - Set2)

print(Set2.symmetric_difference(Set3))
print(Set2 ^ Set3)
print("Set 2: ",Set2)
print("Set 3: ",Set3)
print(Set2.symmetric_difference_update(Set3))
print("Set 2: ",Set2)
print("Set 3: ",Set3)

from datetime import datetime
currenttime = datetime.now()
print("Current time: ",currenttime)

n=10000
counter = 0
for i in range(n):
for j in range(n):
counter+=1
if counter*100 % (n*n)==0:
print(f"{counter*100//(n*n)}% Task Completed")

endtime = datetime.now()
print("Total time taken by the program is ",endtime-currenttime)

from datetime import datetime, timedelta
print("Current time: ",datetime.now())
print("Current date: ",datetime.now().strftime("%Y,%m-%d"))
print("Current year: ",datetime.now().year)
print("Current month: ",datetime.now().month)
print("Current day: ",datetime.now().day)
print("Current hour: ",datetime.now().hour)
print("Current minute: ",datetime.now().minute)
print("Current second: ",datetime.now().second)

import time
print("Current time: ",time.strftime("%Y,%m-%d"))
print("Total time: ",time.time())
print("Tomorrow's time: ",datetime.now()+timedelta(days=1))
from pytz import timezone
print("Current time in US Eastern is",datetime.now(timezone("US/Eastern")).strftime("%Y-%m-%d"))

# random numbers
import random
random.seed(100)
print("Random = ",random.random()) # randon no. between 0 & 1
print("Random = ",int(random.random()*1000))
print("Random Integer values: ",random.randint(500,9000))
choices = ["ONE","TWO","THREE","FOUR","FIVE","SIX"]
print("One value from the list: ",random.choice(choices))
random.shuffle(choices)
print("random shuffle: ",choices)

#MAP - works with List where you want to apply same calculation to all the numbers
distances = [1100,1900,4500,6500,3400,2900,5400]*500
dist_ft = 0 #3.1 *
from datetime import datetime

start=datetime.now()
dist_ft = list(map(lambda x:3.1*x,distances))
end=datetime.now()
print("Total time taken by MAP = ",end-start)

start=datetime.now()
dist_ft2 = []
for i in distances:
val = i*3.1
dist_ft2.append(val)
#print("Output using Loops = ",dist_ft2)
end=datetime.now()
print("Total time taken by LOOP = ",end-start)
Learn Data Science

https://youtu.be/mr15WQQoTvI

19 OCTOBER 2022

Day 1 Video Session

 

print("hello")
print(5+4)
print('5+5')
print("10+frgjdsijgdskmdklfmdfmv4",5+6," = ",11)
# 4 parameters/arguments
#Comment
price = 50 #variable called price is assgined a value 50
quantity = 23
TotalCost = price * quantity
print(TotalCost)

 

#The total cost of XquantityX pens selling at XpriceX would be XtotalcostX
print(“The total cost of”,quantity,“pens selling at”,price,“would be”,TotalCost)
print(f”The total cost of {quantity} pens selling at {price} would be {TotalCost}”)

#going to use format string

 

Session 2: 20 OCT 2022

VIDEO Recording

var1 = 80
var2 = 60
sdfdwfdsg = "var3"
#Arithematic
s1 = var1 + var2
print(s1, var1 /var2,"Now minus", var1 - var2)
print(var1 - var2)
print(var1 * var2)
print(var1 / var2) #division
print(var1 // var2) #integer division

## Data types: nature of data that we can work
#integer : -inf to +inf without decimal
#float : decimal -5.0
#string: 'hello'
#bool (boolean): True / False
Val1 = True
#input() #take input from the user
print(type(Val1)) #give you the datatype of the variable
price = 50
quantity =23
totalcost = price * quantity
a=50
b=23
c=a*b

#arithematic operators:  + - * / //
var1 = 3
var2 = 5
print(var1 ** var2) #power()
print(var1 % var2) #reminder

#Relational operators: will always result in bool output (T/F)
print("var1 < var2: ",var1 < var2) #is var1 less than var2
print(var1 > var2) #is var1 greater than var2
print(var1 == var2)
print(var1 <= var2) #is var1 less than or equal to var2
print(var1 >= var2)
print(var1 != var2) #not equal to

#Logical operators: will have bool input and bool output
# and or not
# F and F = F and T = T and F = FALSE T and T = T
print(True and True)
print(True and False)

#or
# T or T = F or T = T or F = True F or F = False
print("True or True: ",True or True)
print(True or False)

print("Grade A" )
print("Grade B")
print("Grade C")
print("Grade D")
print("Grade E")
print()
avg = 40
if avg>=50:
print("i am inside if")
print()
sum=5+3
print(sum)

print("I am in main")

Session on 21 OCT 2022

subject1 = input("Enter the marks in subject 1: ")
print(type(subject1))
subject1 = int(subject1)
print(type(subject1))
subject2 = 99
subject3 = 100
avg_marks = (subject1+subject2+subject3)/3
print("Average marks scored is ",avg_marks)
if avg_marks >=80:
print("You got grade A")
if avg_marks >=90:
print("You also win President Medal")
elif avg_marks >=70:
print("You got grade B")
elif avg_marks >=60:
print("You got grade C")
elif avg_marks >=50:
print("You got grade D")
else:
print("You didnt get grade E")

print("Thank You")

#Loops - to repeat the steps more than once
#1. For : we use it when we know how many times to execute
#2. While : we dont know how many times but we know condition till when
for i in range(10): #range(10): starts from zero and goes upto 10 (not included 10)
print("Hello")

count = 0
while count<10:
print("Hello in While")
count=count+1
#for loop
for j in range(5):
for i in range(5):
print("*", end=" ")
print()

print()

# \n - newline
#print("A \n B \n C \n D \n E")
#print has invisible \n at the end

print("Hello\n")
print("Good Morning")

26 OCT 2022

#List methods
list1 = [2,4,6,8,10,8,19,8]
list1.append(3) #adds at the end of the list
print(list1)
list1.insert(2,14) #(pos,value)
print(list1)

#remove elements from a list
#pop() - removes element at the given position
list1.pop(1)
#remove() - remove given element
list1.remove(10)
print(list1)

#index()
print("Index: ",list1.index(8))

pos=[]
c=0
for i in list1:
if 8 ==i:
pos.append(c)
c+=1
print("Position of 8 in the list: ",pos)
list1.pop(pos[-1])

list2 = [10,20,30,40]
list3 = list1 + list2
print(list3)
list1.extend(list2) #list1 = list1 + list2
print(list1)
list1.reverse() #just reverse the elements
print(list1)
list1.sort() #increasing order
print(list1)
list1.sort(reverse=True) #decreasing order
print(list1)

#
list1 = [2, 14, 6, 8, 8, 19, 8, 3]
list1[1] = 4 #we can edit is called MUTABLE
print(list1)
list2 = list1 #deep copy: both points to same data
list3 = list1.copy() #shallow copy
print("1. List1: ",list1)
print("1. List2: ",list2)
print("1. List3: ",list3)
list2.append(22)
print("2. List1: ",list1)
print("2. List2: ",list2)
print("2. List3: ",list3)

27 OCT 2022


#linear ordered mutable collection - List
#linear ordered immutable collection - Tuple

t1 = (1,2,3,4,5)
print(type(t1))
print(t1[-1])
#[] brackets are used for indexing in all datatypes and also list
#() - for tuple and also for function
print(t1)
#t1[1] = 10 - TypeError: 'tuple' object does not support item assignment
print(t1.index(3))
print(t1.count(3))
n1,n2,n3 = (2,4,6) #unpacking
print(n2)

for i in t1:
print(i)

#comparing: always compares first element and if they are equal
# it goes to the next and so on
#(2,4) (2,4)
print(type(t1))
t1 = list(t1)
print(type(t1))
t1 = tuple(t1)

#Dictionary
#non-linear unordered mutable collection
d1 = {}
print(type(d1))
d1 = {"fname":"Sachin", "lname":"Tendulkar","Runs": 130000,"City":"Mumbai"}
d1["lname"] = "TENDULKAR"
print(d1["lname"])
#
d1.popitem()
print(d1)
d1.pop("fname") #removes the value with the given key
print(d1)

#keys
print(d1.keys())
for i in d1.keys():
if d1[i] == "TENDULKAR":
print("Remove this key: ",i)

print(d1.values())
for i in d1.items():
print(i[1])

d1 = {"fname":"Sachin", "lname":"Tendulkar","Runs": 130000,"City":"Mumbai"}
d2 = d1
d3 = d1.copy()
print("1. D1 = ",d1)
print("1. D2 = ",d2)
print("1. D3 = ",d3)
d1.update({"Country":"India"})
print("2. D1 = ",d1)
print("2. D2 = ",d2)
print("3. D3 = ",d3)

d3.clear()
print(d2)

#Sets
#linear un-ordered mutable collection - sets
set1 = {1,2,3,3,4,3,4,2,1}
print(type(set1))
print(set1)

s1 = {1,2,3,4,5}
s2 = {3,4,5,6,7}
print(s1|s2) # Union
print(s1 & s2) # Intersection
print(s1 - s2) #diff
print(s2 - s1) #diff
print(s1 ^ s2) #symm diff
print(s1.intersection(s2)) #without update will give a new set
print(s1.update(s2)) #update will update the s1 with new value
print(s1)

28 OCT 2022

#Functions
def mystatements():
print("How are you?")
print("Whats your name?")
print("Where are you going?")

print("Hello")
mystatements()
print("Second")
mystatements()

def myaddition():
n1 = int(input("Enter number 1 to add: "))
n2 = 50
sum = n1 + n2
print("Addition of two numbers is ",sum)

myaddition()
num1,num2,num3 = 15,20,25
def myaddition2(n1,n3,n2): #accepting arguments
#n1 = int(input("Enter number 1 to add: "))
n2 = 50
sum = n1 + n2
print("Addition of two numbers is ",sum)
myaddition2(num1,num2,num3) #num1 is the argument we are passing
## positional & required


##2. positional & default
def myaddition2(n1,n2,n3=0): #accepting arguments
#n1 = int(input("Enter number 1 to add: "))
n2 = 50
sum = n1 + n2
print("Addition of two numbers is ",sum)
myaddition2(num1,num2)

#3.keyword arguments (not positional)
def myaddition2(n1,n2,n3=0): #accepting arguments
#n1 = int(input("Enter number 1 to add: "))
n2 = 50
sum = n1 + n2
print("Addition of two numbers is ",sum)
myaddition2(n3=10,n2=num1,n1=num2)

SESSION 2


#4. Function with takes variable number of arguments
def myownfunction(num1, *numbers, **values):
print("Num 1 is ",num1)
print("Numbers : ",numbers)
print("Values: ", values)
sum=0
for i in numbers:
sum+=i
return sum

def myown2(num1, *numbers, **values):
print("Num 1 is ",num1)
print("Numbers : ",numbers)
print("Values: ", values)
sum=0
for i in numbers:
sum+=i

print("myownfunction: ",myownfunction(3,4,5))
print("MyOwn2: ",myown2(3,4,5))
out = myown2(3,4,5)
print("OUT = ",out)

output = myownfunction("Hello",2,4,6,8,10,12,14,16,18,20, name="Sachin",city="Mumbai",runs=25000)
print("Output is: ",output)

set1= {1,2,3}
set2 = {3,4,5}
print("Union",set1.union(set2)) #return
print("Union Update",set1.update(set2)) #doesnt have return
print("Set1: ",set1)

#
# Class and Objects
#collection of variables and functions (methods) - grouped together to define something
class Dog:
num_legs = 4
def __init__(self,name,make):
self.name = name
self.breed = make

def display(self):
print("Name is ",self.name)
print("Breed is ",self.breed)

mydog1 = Dog("Tiger","BBB") #object 1 of class Dog
mydog2 = Dog("Moti","AAA") #object 2 of class Dog
#mydog2.initialize()
#mydog1.initialize()
print(mydog1.num_legs)
print(mydog2.num_legs)
mydog1.display()
class FourSides:
def __init__(self,a):
self.side1 = a
print('FourSides Object is created')
def _display_4sides(self):
print("Display in 4 Sides")
def area(self):
print("Sorry, I am not complete")
def peri(self):
print("Sorry, I am not complete")

class Square(FourSides):
def __init__(self,a):
FourSides.__init__(self,a)
print('Square Object is created')
def area(self):
print("Area is ",self.side1**2)
class Rectangle(FourSides):
def __init__(self,a, b):
FourSides.__init__(self, a)
self.side2 = b
print('Rectangle Object is created')
def area(self):
print("Area is ",self.side1*self.side2)


sq = Square(10)
print(sq.side1)
rc = Rectangle(5,10)
print(rc.side1)
sq.area()
#string
var1 = "Hello"
var2 = 'Hello'
var1 = '''Hello'''
var1 = """Hello"""
var1 = """How are you
Where are you doing
When will you be back
Take care"""
print(type(var1))
print(var1)
print(var1[0:3])
print(var1[-4:])

for i in var2:
print(i)
for i in range(len(var2)):
print(var2[i])

if "e" in var2:
print("E is in the string")

var3 = "i am fine and am doing good"
find = var3.find("am",5,19)
if find!= -1:
var5 = var3.index("am",5,19)
print(var5)
print(" ".isspace())
print(var3.islower())
print("I Am DoinG Good".istitle())
R PROGRAMMING SEP 2022

DATA ANALYSIS WITH R

DAY 1: 10 SEP 2022

#Compiler

 

#interpreter

 

print(“XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX”)

print(5 + 3)

print(“5 + 3”)

hello = 5

print(hello)

 

print(4) # comment

 

#Data types -what is the data

#Basic data type:  single value

#logical: TRUE / FALSE

var1 = TRUE  #FALSE

var1 <- TRUE

TRUE  -> var1

#

print(class(var1))

 

#Integer: positive or negative numbers without decimal part

var1 <- 3L

print(class(var1))

 

#numeric: can take decimal values

var1 <- 3.5

print(class(var1))

 

#CHARACTER

var1 <- “HEllo”

print(class(var1))

 

#complex: square root of -1

var1 = 5i   #complex numbers are represented iota

print(var1 * var1)

print(class(var1))

 

#Raw

print(charToRaw(“l”))

 

#### data structure

#vector : same type of values

hello=68

var1 = c(34,45,67,”hello”)

print(var1)

print(class(var1))

 

# lists

var1 <- list(3,5,”Hello”, TRUE, c(2,4,8,2,4,6,8,2,4,6,8))

print(var1)

cat(“Hello”, “there”)

#print(“Hello”, “there”)

 

#Matrices

mat1 = matrix(c(1,3,5,7,9,11,13,15,18), nrow=3,ncol=3, byrow = TRUE)

print(mat1)

 

mat1 = matrix(c(1,3,5,7,9,11,13,15,18), nrow=3,ncol=3, byrow = FALSE)

print(mat1)

 

var1 = array(c(1,3,5,7,9,11,13,15,18,9,11,13,15,18,21,22,25,28), dim=c(2,2,2,2))

print(var1)

 

# Factor

color = c(“Red”,”Green”,”Blue”,”Green”,”Blue”,”Green”,”Blue”,”Green”,”Blue”,”Red”)

color_f = factor(color)

print(color_f)

 

 

# Data Frames

employee <- data.frame(

  Name = c(“Sachin”,”Virat”,”Rohit”),

  City = c(“Mumbai”,”Delhi”,”Chennai”),

  Avg = c(113,24,85)

)

print(employee)

 

DAY 2: 11 SEP 2022

#Arithmetic operators

v1 = c(1,3,5,7)

v2 = c(2,4,6,8)

print(v1 + v2)

print(v1 – v2)

print(v1 * v2)

print(v1 / v2)

 

# %% is for remainder

num = 15

rem = num %%2

print(rem)

 

# integer division or quotient:  %/%

qt = 15 %/% 4

print(qt)

 

#5 ^ 3 : cube power of

print( 5^ 3)

 

#Relational Operators: bigger smaller relation – oUput is logical

var1 = 55

var2 = 66

print(var1 > var2)  # is var1 greater than var2?

print(var1 < var2)

print(var1 >= var2)

print(var1 <= var2)

print(var1 == var2)

print(var1 != var2) 

 

 

#Logical operators: Input is logical and output is also logical

#prediction: Sachin and Laxman will open the batting

#actual: Sachin and Rahul opened the batting

 

#prediction: Sachin or Laxman will open the batting

#actual: Sachin and Rahul opened the batting

 

#  & for and ,  | for or

a=5

b=6

c=7

print(a > b | b < c)  # for OR – even 1 True will make it True

 

# T & T = T  F & F = F   T & F = F    F&T = F  (multiplication)

# T | T = T  F | F = F   T | F = T    F|T = T  (addition)

print(!TRUE)

 

#Assignment Operators:

a = 5

a <- 5

a <<- 5  #left assignment

#right assignment:

100 -> b

200 ->> b

c=6

 

b -> c

print(b)

print(c)

 

 

####################################################3

## CONDITIONS

 

#if avg >= 90 I want to print COngratulations

avg = 90

if (avg >=90) {

  print(“Congratulations”)

}

 

avg =40

if (avg>=50) {

  print(“You have passed”)

} else {

  print(“Sorry, You have failed”)

}

 

 

# if – else if  – else

 

#avg > 90: Grade A, avg>80: Grade B, avg>70: C, avg > 60: D, avg >50: E, <50: F

avg = 90

 

if (avg>=90) {

  print(“Grade A”)

  val = 1

} else if (avg >=80) {

  print(“Grade B”)

  val=2

} else if(avg>=70) {

  print(“Grade C”)

  val = 3

} else if (avg >= 60) {

  print(“Grade D”)

   val = 4

} else if (avg>=50) {

  print(“Grade E”)

   val =5

} else {

  print(“Grade F”)

   val = 6

}

 

## switch

#switch(expression, case1: case2)…

 result <- switch(

   val,

   “Hello”,

   “How are you?”,

   “Where are you?”,

   “Hows going?”

 )

 print(result)

 

 

 #loops – repeat block

 ## repeat: exit check

 ## while : entry check

 ## for : when we know how many times to repeat

 

TABLE OF CONTENTS

Unit 1: Getting Started with R.. 2

Getting Started. 2

R Objects and Data Types. 5

R Operators. 9

Decision Making in R. 12

LOOPS in R. 14

STRINGS in R. 15

Unit 2: FUNCTIONS in R.. 17

Built-in Function. 17

User-defined Function. 17

Unit 3: VECTORS, LISTS, ARRAYS & MATRICES. 19

VECTORS. 19

LISTS. 22

MATRICES. 25

ARRAYS. 27

Factors. 29

Data Frames. 34

Unit 4: Working with Files. 45

Working with Excel Files. 46

Unit 5: Working with MSAccess Database. 48

Unit 6: Working with Graphs. 51

Unit 7: Overview of R Packages. 64

Unit 8: Programming Examples. 68

Unit 1: Getting Started with R

R is a free software environment for statistical computing and graphics. It compiles and runs on a wide variety of UNIX platforms, Windows and MacOS. Why R? It’s free, open source, powerful and highly extensible. “You have a lot of prepackaged stuff that’s already available, so you’re standing on the shoulders of giants,” Google’s chief economist told The New York Times back in 2009.There can be little doubt that interest in the R statistics language, especially for data analysis, is soaring.

 

Downloading R

The primary R system is available from the Comprehensive R Archive Network, also known as CRAN. CRAN also hosts many add-on packages that can be used to extend the functionality of R. The “base” R system that you download from CRAN: Linux, Windows, Mac, Source Code

Website to download:  https://cran.r-project.org/mirrors.html

 

The R Foundation for Statistical Computing

The R Foundation is a not-for-profit organization working in the public interest. It was founded by the members of the R Development Core Team in order to:

·        Provide support for the R project and other innovations in statistical computing. We believe that R has become a mature and valuable tool and we would like to ensure its continued development and the development of future innovations in software for statistical and computational research.

·        Provide a reference point for individuals, institutions or commercial enterprises that want to support or interact with the R development community.

·        Hold and administer the copyright of R software and documentation.

 

R functionality is divided into a number of packages:

·        The “base” R system contains, among other things, the base package which is required to run R and contains the most fundamental functions.

·        The other packages contained in the “base” system include utils, stats, datasets, graphics, grDevices, grid, methods, tools, parallel, compiler, splines, tcltk, stats4.

·        There are also “Recommended” packages: boot, class, cluster, codetools, foreign, KernSmooth, lattice, mgcv, nlme, rpart, survival, MASS, spatial, nnet, Matrix.

When you download a fresh installation of R from CRAN, you get all of the above, which represents a substantial amount of functionality. However, there are many other packages available:

·        There are over 4000 packages on CRAN that have been developed by users and programmers around the world.

·        People often make packages available on their personal websites; there is no reliable way to keep track of how many packages are available in this fashion.

·        There are a number of packages being developed on repositories like GitHub and BitBucket but there is no reliable listing of all these packages.

 

 

More details can be found at the R foundation website: https://www.r-project.org/

 

Let’s create our first R Program

Launch R. In Windows you can launch R software using the option shown below under Program Files.

Figure 1: Launch R Programming Window

 

After launching R interpreter, you will get a prompt > where you can start typing your

Program. Let’s try our first program:

 

In the Hello World code below, vString is a variable which stores the String value “Hello World” and in the next line we print the value of the vString variable. Please note that R command are case sensitive. print is the valid command to print the value on the screen.

Figure 2: Hello World

 

# is the syntax used to print comments in the program

Figure 3: R Programming

 

R Basic Syntax

Download and Install R software

When R is run, this will launch R interpreter. You will get a prompt where you can start typing your programs as follows:

Here first statement defines a string variable myString, where we assign a string “Hello, World!” and then next statement print() is being used to print the value stored in variable myString.

 

R Script File

Usually, you will do your programming by writing your programs in script files and then you execute those scripts at your command prompt with the help of R interpreter called Rscript. So let’s start with writing following code in a text file called test.R as under:

Save the above code in a file test.R and execute it at Linux command prompt as given below. Even if you are using Windows or other system, syntax will remain same.

For windows, go to command prompt and browse to the directory where R.exe/Rscript.exe is installed.

Run-> Rscript filename.R     (filename.R is the name of the file which has R program along with the path name.)

 

We will use RStudio for rest of our course example. Download and install R Studio.

 

 

Generally, while doing programming in any programming language, you need to use various variables to store information. Variables are nothing but reserved memory locations to store values. This means that, when you create a variable you reserve some space in memory. In contrast to other programming languages like C and java in R, the variables are not declared as some data type. The variables are assigned with R-Objects and the data type of the R-object becomes the data type of the variable.

 

R has five basic or “atomic” classes of objects:

·        character

·        numeric (real numbers)

·        integer

·        complex

·        logical (True/False)

 

The frequently used ones are:

Vectors

Lists

Matrices

Arrays

Factors

Data Frames

 

The simplest of these objects is the vector object and there are six data types of these atomic vectors, also termed as six classes of vectors. The other R-Objects are built upon the atomic vectors.

Figure 4: Data Types in R

 

 

Creating Vectors

The c() function can be used to create vectors of objects by concatenating things together.  When you want to create vector with more than one element, you should use c() function which means to combine the elements into a vector. You can also use the vector() function to initialize vectors.

Figure 5: Vector example

 

Lists, Matrices, Arrays

A list is an R-object which can contain many different types of elements inside it like vectors, functions and even another list inside it.

 

A matrix is a two-dimensional rectangular data set. It can be created using a vector input to the matrix function.

 

While matrices are confined to two dimensions, arrays can be of any number of dimensions. The array function takes a dim attribute which creates the required number of dimension. In the below example we create an array with two elements which are 3×3 matrices each.

 

Factors

Factors are used to represent categorical data and can be unordered or ordered. One can think of a factor as an integer vector where each integer has a label. Factors are important in statistical modeling and are treated specially by modelling functions like lm() and glm(). Using factors with labels is better than using integers because factors are self-describing. Having a variable that has values “Male” and “Female” is better than a variable that has values 1 and 2. Factor objects can be created with the factor() function.

Figure 6: List, Matrix and Array example

 

Figure 7: Factors example

 

Data Frames

Data frames are tabular data objects. Unlike a matrix in data frame each column can contain different modes of data. The first column can be numeric while the second column can be character and third column can be logical. It is a list of vectors of equal length. Data Frames are created using the data.frame() function.

Figure 8: Data frames example

 

Mixing Objects

There are occasions when different classes of R objects get mixed together. Sometimes this happens by accident but it can also happen on purpose. In implicit coercion, what R tries to do is find a way to represent all of the objects in the vector in a reasonable fashion. Sometimes this does exactly what you want and sometimes not. For example, combining a numeric object with a character object will create a character vector, because numbers can usually be easily represented as strings.

Figure 9: Mixing and Missing Objects examples

We have the following types of operators in R programming:

·        Arithmetic Operators

·        Relational Operators

·        Logical Operators

·        Assignment Operators

·        Miscellaneous Operators

 

Arithmetic Operators

 

Figure 10: Assignment Operators

 

Relational Operators

Operators

Meaning

> 

Checks if each element of the first vector is greater than the corresponding element of the second vector.

< 

Checks if each element of the first vector is less than the corresponding element of the second vector.

==

Checks if each element of the first vector is equal to the corresponding element of the second vector.

<=

Checks if each element of the first vector is less than or equal to the corresponding element of the second vector.

>=

Checks if each element of the first vector is greater than or equal to the corresponding element of the second vector.

!=

Checks if each element of the first vector is unequal to the corresponding element of the second vector.

 

Logical Operators

Operators

Meaning

&

It is called Element-wise Logical AND operator. It combines each element of the first vector with the corresponding element of the second vector and gives a output TRUE if both the elements are TRUE.

|

It is called Element-wise Logical OR operator. It combines each element of the first vector with the corresponding element of the second vector and gives a output TRUE if one the elements is TRUE.

!

It is called Logical NOT operator. Takes each element of the vector and gives the opposite logical value.

The logical operator && (logical AND) and || (logical OR) considers only the first element of the vectors and give a vector of single element as output.

 

Readers are encouraged to practice all the operators and see the output.

 

 

 

Assignment Operators

A variable in R can store an atomic vector, group of atomic vectors or a combination of many R objects. The variables can be assigned values using leftward, rightward and equal to operator. The values of the variables can be printed using print() or cat() function. The cat() function combines multiple items into a continuous print output.

In R, a variable itself is not declared of any data type, rather it gets the data type of the R -object assigned to it. So R is called a dynamically typed language, which means that we can change a variable’s data type of the same variable again and again when using it in a program.

Figure 11: Variable assignment

 

Figure 12: Listing and deleting variables

 

Miscellaneous Operators

Operators

Meaning

:

Colon operator. It creates the series of numbers in sequence for a vector.

%in%

This operator is used to identify if an element belongs to a vector.

%*%

This operator is used to multiply a matrix with its transpose.

 

 

R provides the following types of decision making statements:

Statement

Description

If statement

An if statement consists of a Boolean expression followed by one or more statements.

If else statement

An if statement can be followed by an optional else statement, which executes when the Boolean expression is false.

Switch statement

A switch statement allows a variable to be tested for equality against a list of values.

 

Figure 13: Example of If Statement

 

Figure 14: Example of If Else Statement

 

Multiple if else

An if statement can be followed by an optional else if…else statement, which is very

useful to test various conditions using single if…else if statement.

 

Syntax

 

When using if, else if, else statements there are few points to keep in mind.

·        An if can have zero or one else and it must come after any else if’s.

·        An if can have zero to many else if’s and they must come before the else.

·        Once an else if succeeds, none of the remaining else if’s or else’s will be tested.

 

SWITCH statement

A switch statement allows a variable to be tested for equality against a list of values. Each value is called a case, and the variable being switched on is checked for each case.

Syntax

 

The following rules apply to a switch statement:

·        If the value of expression is not a character string it is coerced to integer.

·        You can have any number of case statements within a switch. Each case is followed by the value to be compared to and a colon.

·        If the value of the integer is between 1 and nargs()-1 (The max number of arguments)then the corresponding element of case condition is evaluated and the

·        result returned.

·        If expression evaluates to a character string then that string is matched (exactly) to the names of the elements.

·        If there is more than one match, the first matching element is returned.

·        No Default argument is available.

·        In the case of no match, if there is a unnamed element of … its value is returned. (If there is more than one such argument an error is returned.)

 

 

Loops are used to repeat a block of code. Being able to have your program repeatedly execute a block of code is one of the most basic but useful tasks in programming- a loop lets you write a very simple statement to produce a significantly greater result simply by repetition. R programming language provides the following kinds of loop to handle looping requirements:

Loop Type

Description

REPEAT loop

Executes a sequence of statements multiple times and abbreviates the code that manages the loop variable.

WHILE loop

Repeats a statement or group of statements while a given condition is true. It tests the condition before executing the loop body.

FOR loop

It executes a block of statements repeatedly until the specified condition returns false.

 

Look Control Statements

Control Type

Description

BREAK statement

Terminates the loop statement and transfers execution to the statement immediately following the loop.

NEXT statement

The next statement simulates the behavior of R switch (skips the line of execution).

 

REPEAT – loop

The Repeat loop executes the same code again and again until a stop condition is met.

    Syntax:                                                                         Example:

 

 

 

 

 

WHILE – loop

The While loop executes the same code again and again until a stop condition is met.

    Syntax:                                                                         Example:

FOR – loop

A for loop is a repetition control structure that allows you to efficiently write a loop that needs to execute a specific number of times.

    Syntax:                                                                         Example:

Any value written within a pair of single quote or double quotes in R is treated as a string. Internally R stores every string within double quotes, even when you create them with single quote.

 

Rules Applied in String Construction

·     The quotes at the beginning and end of a string should be both double quotes or both single quote. They can not be mixed.

·     Double quotes can be inserted into a string starting and ending with single quote.

·     Single quote can be inserted into a string starting and ending with double quotes.

·     Double quotes can not be inserted into a string starting and ending with double quotes.

·     Single quote can not be inserted into a string starting and ending with single quote.

 

 

 

 

Examples of Strings in R

Formatting numbers & strings – format() function

Numbers and strings can be formatted to a specific style using format()function.

Syntax – The basic syntax for format function is :

 

Following is the description of the parameters used:

·   x is the vector input.

·   digits is the total number of digits displayed.

·   nsmall is the minimum number of digits to the right of the decimal point.

·   scientific is set to TRUE to display scientific notation.

·   width indicates the minimum width to be displayed by padding blanks in the beginning.

·   justify is the display of the string to left, right or center.

 

Other functions

Functions

Functionality

nchar(x)

This function counts the number of characters including spaces in a string.

toupper(x) / tolower(x)

These functions change the case of characters of a string.

substring(x,first,last)

This function extracts parts of a String.

A function is a set of statements organized together to perform a specific task. R has a large number of in-built functions and the user can create their own functions.

The different parts of a function are:

·   Function Name: This is the actual name of the function. It is stored in R environment as an object with this name.

·   Arguments: An argument is a placeholder. When a function is invoked, you pass a value to the argument. Arguments are optional; that is, a function may contain no arguments. Also arguments can have default values.

·   Function Body: The function body contains a collection of statements that defines what the function does.

·   Return Value: The return value of a function is the last expression in the function body to be evaluated.

 

R has many in-built functions which can be directly called in the program without defining them first. Simple examples of in-built functions are seq(), mean(), max(), sum(x)and paste(…) etc.

 

We can also create and use our own functions referred as user defined functions. An R function is created by using the keyword function. The basic syntax of an R function definition is as follows:

 

Example: Calling a function with argument values (by position and by name)

 

Example: Calling a function with default values

 

Lazy Evaluation of Function: Arguments to functions are evaluated lazily, which means so they are evaluated only when needed by the function body.

 

 

Vectors are the most basic R data objects and there are six types of atomic vectors. They are logical, integer, double, complex, character and raw. Even when you write just one value in R, it becomes a vector of length 1 and belongs to one of the above vector types.

# Atomic vector of type character.

print(“ABC”);

[1] “ABC”

# Atomic vector of type double.

print (1.2)

[1] 12.5

# Atomic vector of type integer.

print(10L)

[1] 10

# Atomic vector of type logical.

print(TRUE)

[1] TRUE

# Atomic vector of type complex.

print(4+8i)

[1] 4+8i

# Atomic vector of type raw.

print(charToRaw(‘hello’))

[1] 68 65 6c 6c 6f

 

Multiple Elements Vector

Using colon operator with numeric data

# Creating a sequence from 2 to 8.

v <- 2:8

print(v)

[1] 2 3 4 5 6 7 8

# Creating a sequence from 6.6 to 12.6.

v <- 6.6:12.6

print(v)

[1] 6.6 7.6 8.6 9.6 10.6 11.6 12.6

# If the final element specified does not belong to the sequence then it is discarded.

v <- 3.8:11.4

print(v)

[1] 3.8 4.8 5.8 6.8 7.8 8.8 9.8 10.8

 

Using sequence (Seq.) operator

Syntax and example of using Seq. operator:

# # Create vector with elements from 5 to 9 incrementing by 0.4.

print (seq(5, 9, by=0.4))

[1] 5.0 5.4 5.8 6.2 6.6 7.0 7.4 7.8 8.2 8.6 9.0

 

Using the c () function

The non-character values are coerced to character type if one of the elements is a char.

Syntax and example of using c() function:

##  The logical and numeric values are converted to characters.

x <- c(‘apple’, ‘red’, 5, TRUE)

print(x)

[1] “apple” “red” “5” “TRUE”

Accessing Vector Elements

Elements of a Vector are accessed using indexing. The [ ] brackets are used for indexing. Indexing starts with position 1. Giving a negative value in the index drops that element from result. TRUE, FALSE or 0 and 1 can also be used for indexing.

Syntax and example:

# Accessing vector elements using position.

t <- c(“Sun”,”Mon”,”Tue”,”Wed”,”Thurs”,”Fri”,”Sat”)

u <- t[c(2,3,6)]

print(u)

[1] “Mon” “Tue” “Fri”

 

# Accessing vector elements using logical indexing.

v <- t[c(TRUE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE)]

print(v)

[1] “Sun” “Fri”

 

# Accessing vector elements using negative indexing.

x <- t[c(-2,-5)]

print(x)

[1] “Sun” “Tue” “Wed” “Fri” “Sat”

 

# Accessing vector elements using 0/1 indexing.

y <- t[c(0,0,0,0,0,0,1)]

print(y)

[1] “Sun”

 

Vector Manipulation

Vector Arithmetic- Two vectors of same length can be added, subtracted, multiplied or divided giving the result as a vector output.

Syntax and example:

# Create two vectors.

v1 <- c(3,8,4,5,0,11)

v2 <- c(4,11,0,8,1,2)

 

# Vector addition.

add.result <- v1+v2

print(add.result)

[1] 7 19 4 13 1 13

 

# Vector substraction.

sub.result <- v1-v2

print(sub.result)

[1] -1 -3 4 -3 -1 9

 

# Vector multiplication.

multi.result <- v1*v2

print(multi.result)

[1] 12 88 0 40 0 22

 

# Vector division.

divi.result <- v1/v2

print(divi.result)

[1] 0.7500000 0.7272727 Inf 0.6250000 0.0000000 5.5000000

 

Vector Element Recycling

If we apply arithmetic operations to two vectors of unequal length, then the elements of the shorter vector are recycled to complete the operations.

Syntax and example:

v1 <- c(3,8,4,5,0,11)

v2 <- c(4,11)

# V2 becomes c(4,11,4,11,4,11)

add.result <- v1+v2

print(add.result)

[1] 7 19 8 16 4 22

 

sub.result <- v1-v2

print(sub.result)

[1] -1 -3 0 -6 -4 0

 

Vector Element Sorting

Elements in a vector can be sorted using the sort() function.

Syntax and example:

v <- c(3,8,4,5,0,11, -9, 304)

# Sort the elements of the vector.

sort.result <- sort(v)

print(sort.result)

[1] -9 0 3 4 5 8 11 304

 

# Sort the elements in the reverse order.

revsort.result <- sort(v, decreasing = TRUE)

print(revsort.result)

[1] 304 11 8 5 4 3 0 -9

 

 

# Sorting character vectors.

v <- c(“Red”,”Blue”,”yellow”,”violet”)

sort.result <- sort(v)

print(sort.result)

[1] “Blue” “Red” “violet” “yellow”

 

# Sorting character vectors in reverse order.

revsort.result <- sort(v, decreasing = TRUE)

print(revsort.result)

[1] “yellow” “violet” “Red” “Blue”

 

Lists are the R objects which contain elements of different types like – numbers, strings, vectors and another list inside it. A list can also contain a matrix or a function as its elements. List is created using list() function.

 

Syntax and example:

## Create a list containing strings, numbers, vectors and a logical values.

list_data <- list(“Red”, “Green”, c(21,32,11), TRUE, 51.23, 119.1)

print(list_data)

 

[[1]]

[1] “Red”

[[2]]

[1] “Green”

[[3]]

[1] 21 32 11

[[4]]

[1] TRUE

[[5]]

[1] 51.23

[[6]]

[1] 119.1

 

Naming List Elements

The list elements can be given names and they can be accessed using these names.

 

Manipulating List Elements

We can add, delete and update list elements as shown below. We can add and delete elements only at the end of a list. But we can update any element.

 

Merging Lists

You can merge many lists into one list by placing all the lists inside one list() function.

Converting Lists to Vector

A list can be converted to a vector so that the elements of the vector can be used for further manipulation. All the arithmetic operations on vectors can be applied after the list is converted into vectors. To do this conversion, we use the unlist() function. It takes the list as input and produces a vector.

 

Matrices are the R objects in which the elements are arranged in a two-dimensional

format. They contain elements of the same atomic types. But we use matrices containing numeric elements to be used in mathematical calculations. A Matrix is created using the matrix() function.

 

Syntax

Parameters used:

·        data is the input vector which becomes the data elements of the matrix.

·        nrow is the number of rows to be created.

·        ncol is the number of columns to be created.

·        byrow is a logical clue. If TRUE then the input vector elements are arranged by row.

·        dimname is the names assigned to the rows and columns.

# Elements are arranged sequentially by row.

M <- matrix(c(3:14), nrow=4, byrow=TRUE)

print(M)

# Elements are arranged sequentially by column.

N <- matrix(c(3:14), nrow=4, byrow=FALSE)

print(N)

# Define the column and row names.

rownames = c(“row1”, “row2”, “row3”, “row4”)

colnames = c(“col1”, “col2”, “col3”)

 

# Accessing Elements of a Matrix

# Access the element at 3rd column and 1st row.

print(N[1,3])

# Access the element at 2nd column and 4th row.

print(N[4,2])

 

# Access only the 2nd row.

print(N[2,])

# Access only the 3rd column.

print(N[,3])

 

Matrix Computations

Various mathematical operations are performed on the matrices using the R operators. The result of the operation is also a matrix. The dimensions (number of rows and columns) should be same for the matrices involved in the operation.

# Create two 2×3 matrices.

matrix1 <- matrix(c(3, 9, -1, 4, 2, 6), nrow=2)

print(matrix1)

matrix2 <- matrix(c(5, 2, 0, 9, 3, 4), nrow=2)

print(matrix2)

# Add the matrices.

result <- matrix1 + matrix2

cat(“Result of addition”,”\n”)

print(result)

# Subtract the matrices

result <- matrix1 – matrix2

cat(“Result of subtraction”,”\n”)

print(result)

 

Matrix Multiplication & Division

# Create two 2×3 matrices.

matrix1 <- matrix(c(3, 9, -1, 4, 2, 6), nrow=2)

print(matrix1)

matrix2 <- matrix(c(5, 2, 0, 9, 3, 4), nrow=2)

print(matrix2)

# Multiply the matrices.

result <- matrix1 * matrix2

cat(“Result of multiplication”,”\n”)

print(result)

# Divide the matrices

result <- matrix1 / matrix2

cat(“Result of division”,”\n”)

print(result)

 

Arrays are the R data objects which can store data in more than two dimensions. For example – If we create an array of dimension (2, 3, 4) then it creates 4 rectangular matrices each with 2 rows and 3 columns. Arrays can store only data type. An array is created using the array() function. It takes vectors as input and uses the values in the dim parameter to create an array.

 

# Create two vectors of different lengths.

vector1 <- c(5,9,3)

vector2 <- c(10,11,12,13,14,15)

# Take these vectors as input to the array.

result <- array(c(vector1,vector2),dim=c(3,3,2))

print(result)

 

Naming Columns and Rows: We can give names to the rows, columns and matrices in the array by using the dimnames parameter.

# Create two vectors of different lengths.

vector1 <- c(5,9,3)

vector2 <- c(10,11,12,13,14,15)

column.names <- c(“COL1″,”COL2″,”COL3”)

row.names <- c(“ROW1″,”ROW2″,”ROW3”)

matrix.names <- c(“Matrix1″,”Matrix2”)

# Take these vectors as input to the array.

result <- array(c(vector1,vector2),dim=c(3,3,2),dimnames =

                  list(column.names,row.names,matrix.names))

print(result)

 

Accessing Array Elements

# Create two vectors of different lengths.

vector1 <- c(5,9,3)

vector2 <- c(10,11,12,13,14,15)

column.names <- c(“COL1″,”COL2″,”COL3”)

row.names <- c(“ROW1″,”ROW2″,”ROW3”)

matrix.names <- c(“Matrix1″,”Matrix2”)

# Take these vectors as input to the array.

result <- array(c(vector1,vector2),dim=c(3,3,2),dimnames =

                  list(column.names,row.names,matrix.names))

# Print the third row of the second matrix of the array.

print(result[3,,2])

# Print the element in the 1st row and 3rd column of the 1st matrix.

print(result[1,3,1])

# Print the 2nd Matrix.

print(result[,,2])

 

Manipulating Array Elements

As array is made up matrices in multiple dimensions, the operations on elements of array are carried out by accessing elements of the matrices.

# Create two vectors of different lengths.

vector1 <- c(5,9,3)

vector2 <- c(10,11,12,13,14,15)

# Take these vectors as input to the array.

array1 <- array(c(vector1,vector2),dim=c(3,3,2))

# Create two vectors of different lengths.

vector3 <- c(9,1,0)

vector4 <- c(6,0,11,3,14,1,2,6,9)

array2 <- array(c(vector1,vector2),dim=c(3,3,2))

# create matrices from these arrays.

matrix1 <- array1[,,2]

matrix2 <- array2[,,2]

# Add the matrices.

result <- matrix1+matrix2

print(result)

 

Calculations Across Array Elements: We can do calculations across the elements in an array using the apply() function.

 

Syntax

 

Parameters used:

·        x is an array.

·        margin is the name of the data set used.

·        fun is the function to be applied across the elements of the array.

 

 

We use the apply() function below to calculate the sum of the elements in the rows of an array across all the matrices.

# Create two vectors of different lengths.

vector1 <- c(5,9,3)

vector2 <- c(10,11,12,13,14,15)

# Take these vectors as input to the array.

new.array <- array(c(vector1,vector2),dim=c(3,3,2))

print(new.array)

# Use apply to calculate the sum of the rows across all the matrices.

result <- apply(new.array, c(1), sum)

print(result)

 

Array indexing. Subsections of an array

Individual elements of an array may be referenced by giving the name of the array followed by

the subscripts in square brackets, separated by commas.

More generally, subsections of an array may be specified by giving a sequence of index vectors

in place of subscripts; however if any index position is given an empty index vector, then the full

range of that subscript is taken.

Continuing the previous example, a[2,,] is a 42 array with dimension vector c(4,2) and

data vector containing the values

c(a[2,1,1], a[2,2,1], a[2,3,1], a[2,4,1],

a[2,1,2], a[2,2,2], a[2,3,2], a[2,4,2])

in that order. a[,,] stands for the entire array, which is the same as omitting the subscripts

entirely and using a alone.

For any array, say Z, the dimension vector may be referenced explicitly as dim(Z) (on either

side of an assignment).

Also, if an array name is given with just one subscript or index vector, then the corresponding

values of the data vector only are used; in this case the dimension vector is ignored. This is not

the case, however, if the single index is not a vector but itself an array, as we next discuss.

 

Factors are the data objects which are used to categorize the data and store it as levels. They can store both strings and integers. They are useful in the columns which have a limited number of unique values. Like “Male, “Female” and True, False etc. They are useful in data analysis for statistical modeling.

A factor is a vector object used to specify a discrete classification (grouping) of the components

of other vectors of the same length. R provides both ordered and unordered factors. While the

“real” application of factors is with model formulae (see Section 11.1.1 [Contrasts], page 53), we

here look at a specific example.

4.1 A specific example

Suppose, for example, we have a sample of 30 tax accountants from all the states and territories

of Australia1 and their individual state of origin is specified by a character vector of state

mnemonics as

> state <- c(“tas”, “sa”, “qld”, “nsw”, “nsw”, “nt”, “wa”, “wa”,

“qld”, “vic”, “nsw”, “vic”, “qld”, “qld”, “sa”, “tas”,

“sa”, “nt”, “wa”, “vic”, “qld”, “nsw”, “nsw”, “wa”,

“sa”, “act”, “nsw”, “vic”, “vic”, “act”)

Notice that in the case of a character vector, “sorted” means sorted in alphabetical order.

A factor is similarly created using the factor() function:

> statef <- factor(state)

The print() function handles factors slightly differently from other objects:

> statef

[1] tas sa qld nsw nsw nt wa wa qld vic nsw vic qld qld sa

[16] tas sa nt wa vic qld nsw nsw wa sa act nsw vic vic act

Levels: act nsw nt qld sa tas vic wa

To find out the levels of a factor the function levels() can be used.

> levels(statef)

[1] “act” “nsw” “nt” “qld” “sa” “tas” “vic” “wa”

4.2 The function tapply() and ragged arrays

To continue the previous example, suppose we have the incomes of the same tax accountants in

another vector (in suitably large units of money)

> incomes <- c(60, 49, 40, 61, 64, 60, 59, 54, 62, 69, 70, 42, 56,

61, 61, 61, 58, 51, 48, 65, 49, 49, 41, 48, 52, 46,

59, 46, 58, 43)

To calculate the sample mean income for each state we can now use the special function

tapply():

> incmeans <- tapply(incomes, statef, mean)

giving a means vector with the components labelled by the levels

act nsw nt qld sa tas vic wa

44.500 57.333 55.500 53.600 55.000 60.500 56.000 52.250

The function tapply() is used to apply a function, here mean(), to each group of components

of the first argument, here incomes, defined by the levels of the second component, here statef2, as if they were separate vector structures. The result is a structure of the same length as the

levels attribute of the factor containing the results. The reader should consult the help document

for more details.

Suppose further we needed to calculate the standard errors of the state income means. To do

this we need to write an R function to calculate the standard error for any given vector. Since

there is an builtin function var() to calculate the sample variance, such a function is a very

simple one liner, specified by the assignment:

> stdError <- function(x) sqrt(var(x)/length(x))

(Writing functions will be considered later in Chapter 10 [Writing your own functions], page 42.

Note that R’s a builtin function sd() is something different.) After this assignment, the standard

errors are calculated by

> incster <- tapply(incomes, statef, stderr)

and the values calculated are then

> incster

act nsw nt qld sa tas vic wa

1.5 4.3102 4.5 4.1061 2.7386 0.5 5.244 2.6575

As an exercise you may care to find the usual 95% confidence limits for the state mean

incomes. To do this you could use tapply() once more with the length() function to find

the sample sizes, and the qt() function to find the percentage points of the appropriate t-

distributions. (You could also investigate R’s facilities for t-tests.)

The function tapply() can also be used to handle more complicated indexing of a vector

by multiple categories. For example, we might wish to split the tax accountants by both state

and sex. However in this simple instance (just one factor) what happens can be thought of as

follows. The values in the vector are collected into groups corresponding to the distinct entries

in the factor. The function is then applied to each of these groups individually. The value is a

vector of function results, labelled by the levels attribute of the factor.

The combination of a vector and a labelling factor is an example of what is sometimes called

a ragged array, since the subclass sizes are possibly irregular. When the subclass sizes are all

the same the indexing may be done implicitly and much more efficiently, as we see in the next

section.

4.3 Ordered factors

The levels of factors are stored in alphabetical order, or in the order they were specified to

factor if they were specified explicitly.

Sometimes the levels will have a natural ordering that we want to record and want our

statistical analysis to make use of. The ordered() function creates such ordered factors but

is otherwise identical to factor. For most purposes the only difference between ordered and

unordered factors is that the former are printed showing the ordering of the levels, but the

contrasts generated for them in fitting linear models are different.

 

Factors are created using the factor () function by taking a vector as input.

Factors are categorical variables that are super useful in summary statistics, plots, and regressions. They basically act like dummy variables that R codes for you.  So, let’s start off with some data:

and let’s check out what kinds of variables we have:

 

so we see that Race is a factor variable with three levels.  I can see all the levels this way:

So what his means that R groups statistics by these levels.  Internally, R stores the integer values 1, 2, and 3, and maps the character strings (in alphabetical order, unless I reorder) to these values, i.e. 1=Black, 2=Hispanic, and 3=White.  Now if I were to do a summary of this variable, it shows me the counts for each category, as below.  R won’t let me do a mean or any other statistic of a factor variable other than a count, so keep that in mind. But you can always change your factor to be numeric.

If I do a plot of age on race, I get a boxplot from the normal plot command since that is what makes sense for a categorical variable:

 

plot(mydata$Age~mydata$Race, xlab=”Race”, ylab=”Age”, main=”Boxplots of Age by Race”)

# Create a vector as input.

data <-

  c(“East”,”West”,”East”,”North”,”North”,”East”,”West”,”West”,”West”,”East”,”North”)

print(data)

print(is.factor(data))

# Apply the factor function.

factor_data <- factor(data)

print(factor_data)

print(is.factor(factor_data))

 

Factors in Data Frame

On creating any data frame with a column of text data, R treats the text column as categorical data and creates factors on it.

# Create the vectors for data frame.

height <- c(132,151,162,139,166,147,122)

weight <- c(48,49,66,53,67,52,40)

gender <- c(“male”,”male”,”female”,”female”,”male”,”female”,”male”)

# Create the data frame.

input_data <- data.frame(height,weight,gender)

print(input_data)

# Test if the gender column is a factor.

print(is.factor(input_data$gender))

# Print the gender column so see the levels.

print(input_data$gender)

 

Changing the Order of Levels: The order of the levels in a factor can be changed by applying the factor function again with new order of the levels.

data <-

  c(“East”,”West”,”East”,”North”,”North”,”East”,”West”,”West”,”West”,”East”,”North”)

# Create the factors

factor_data <- factor(data)

print(factor_data)

# Apply the factor function with required order of the level.

new_order_data <- factor(factor_data,levels = c(“East”,”West”,”North”))

print(new_order_data)

 

Generating Factor Levels: We can generate factor levels by using the gl() function. It takes two integers as input which indicates how many levels and how many times each level.

Syntax: gl(n, k, labels)

 

Following is the description of the parameters used:

·        n is a integer giving the number of levels.

·        k is a integer giving the number of replications.

·        labels is a vector of labels for the resulting factor levels.

v <- gl(3, 4, labels = c(“Tampa”, “Seattle”,”Boston”))

print(v)

 

 

A data frame is a table or a two-dimensional array-like structure in which each column contains values of one variable and each row contains one set of values from each column. Following are the characteristics of a data frame:

·        The column names should be non-empty.

·        The row names should be unique.

·        The data stored in a data frame can be of numeric, factor or character type.

·        Each column should contain same number of data items.

 

# Create the data frame.

emp.data <- data.frame(

  emp_id = c (1:5),

  emp_name = c(“Rick”,”Dan”,”Michelle”,”Ryan”,”Gary”),

  salary = c(623.3,515.2,611.0,729.0,843.25),

  start_date = as.Date(c(“2012-01-01″,”2013-09-23″,”2014-11-15”,”2014-05-

                         11″,”2015-03-27″)),

  stringsAsFactors=FALSE

  )

# Print the data frame.

print(emp.data)

 

Get the Structure of the Data Frame: The structure of the data frame can be seen by using str() function.

# Create the data frame.

emp.data <- data.frame(

emp_id = c (1:5),

emp_name = c(“Rick”,”Dan”,”Michelle”,”Ryan”,”Gary”),

salary = c(623.3,515.2,611.0,729.0,843.25),

start_date = as.Date(c(“2012-01-01″,”2013-09-23″,”2014-11-15”,”2014-05-

11″,”2015-03-27″)),

stringsAsFactors=FALSE

)

# Get the structure of the data frame.

str(emp.data)

 

Summary of Data in Data Frame

The statistical summary and nature of the data can be obtained by applying summary() function.

# Create the data frame.

emp.data <- data.frame(

emp_id = c (1:5),

emp_name = c(“Rick”,”Dan”,”Michelle”,”Ryan”,”Gary”),

salary = c(623.3,515.2,611.0,729.0,843.25),

start_date = as.Date(c(“2012-01-01″,”2013-09-23″,”2014-11-15”,”2014-05-

11″,”2015-03-27″)),

stringsAsFactors=FALSE

)

# Print the summary.

print(summary(emp.data))

 

Extract Data from Data Frame

Extract specific column from a data frame using column name.

# Create the data frame.

emp.data <- data.frame(

  emp_id = c (1:5),

  emp_name = c(“Rick”,”Dan”,”Michelle”,”Ryan”,”Gary”),

  salary = c(623.3,515.2,611.0,729.0,843.25),

  start_date = as.Date(c(“2012-01-01″,”2013-09-23″,”2014-11-15”,”2014-05-

                         11″,”2015-03-27″)),

  stringsAsFactors=FALSE

  )

# Extract Specific columns.

result <- data.frame(emp.data$emp_name,emp.data$salary)

print(result)

 

# Extract 3rd and 5th row with 2nd and 4th column.

result <- emp.data[c(3,5),c(2,4)]

print(result)

 

# Extract first two rows.

result <- emp.data[1:2,]

print(result)

 

# Expand Data Frame – A data frame can be expanded by adding columns and rows.

# Add the “dept” coulmn.

emp.data$dept <- c(“IT”,”Operations”,”IT”,”HR”,”Finance”)

v <- emp.data

print(v)

 

 

Add Row

To add more rows permanently to an existing data frame, we need to bring in the new rows in the same structure as the existing data frame and use the rbind() function. In the example below we create a data frame with new rows and merge it with the existing data frame to create the final data frame.

# Create the first data frame.

emp.data <- data.frame(

  emp_id = c (1:5),

  emp_name = c(“Rick”,”Dan”,”Michelle”,”Ryan”,”Gary”),

  salary = c(623.3,515.2,611.0,729.0,843.25),

  start_date = as.Date(c(“2012-01-01″,”2013-09-23″,”2014-11-15”,”2014-05-

                         11″,”2015-03-27″)),

  dept=c(“IT”,”Operations”,”IT”,”HR”,”Finance”),

  stringsAsFactors=FALSE

)

# Create the second data frame

emp.newdata <- data.frame(

  emp_id = c (6:8),

  emp_name = c(“Rasmi”,”Pranab”,”Tusar”),

  salary = c(578.0,722.5,632.8),

  start_date = as.Date(c(“2013-05-21″,”2013-07-30″,”2014-06-17”)),

  dept = c(“IT”,”Operations”,”Fianance”),

  stringsAsFactors=FALSE

)

# Bind the two data frames.

emp.finaldata <- rbind(emp.data,emp.newdata)

print(emp.finaldata)

 

Unit 4: Simple manipulations; numbers and vectors

Vectors and assignment

R operates on named data structures. The simplest such structure is the numeric vector, which is a single entity consisting of an ordered collection of numbers. To set up a vector named x, say, consisting of five numbers, namely 10.4, 5.6, 3.1, 6.4 and 21.7, use the R command

> x <- c(10.4, 5.6, 3.1, 6.4, 21.7)

 

This is an assignment statement using the function c() which in this context can take an arbitrary number of vector arguments and whose value is a vector got by concatenating its

arguments end to end. A number occurring by itself in an expression is taken as a vector of length one. Notice that the assignment operator (‘<-’), which consists of the two characters ‘<’ (“less than”) and ‘-’ (“minus”) occurring strictly side-by-side and it ‘points’ to the object receiving the value of the expression. In most contexts the ‘=’ operator can be used as an alternative. Assignment can also be made using the function assign(). An equivalent way of making the same assignment as above is with:

> assign(“x”, c(10.4, 5.6, 3.1, 6.4, 21.7))

The usual operator, <-, can be thought of as a syntactic short-cut to this.

Assignments can also be made in the other direction, using the obvious change in the assignment operator. So the same assignment could be made using

> c(10.4, 5.6, 3.1, 6.4, 21.7) -> x

If an expression is used as a complete command, the value is printed and lost 2. So now if we

were to use the command

> 1/x

the reciprocals of the five values would be printed at the terminal (and the value of x, of course, unchanged).

The further assignment

> y <- c(x, 0, x)

would create a vector y with 11 entries consisting of two copies of x with a zero in the middle

place.

 

Vector arithmetic

Vectors can be used in arithmetic expressions, in which case the operations are performed element by element. Vectors occurring in the same expression need not all be of the same length. If they are not, the value of the expression is a vector with the same length as the longest vector which occurs in the expression. Shorter vectors in the expression are recycled as often as need be (perhaps fractionally) until they match the length of the longest vector. In particular a constant is simply repeated. So with the above assignments the command

> v <- 2*x + y + 1

generates a new vector v of length 11 constructed by adding together, element by element, 2*x repeated 2.2 times, y repeated just once, and 1 repeated 11 times.

 

The elementary arithmetic operators are the usual +, -, *, / and ^ for raising to a power. In

addition all of the common arithmetic functions are available. log, exp, sin, cos, tan, sqrt,

and so on, all have their usual meaning. max and min select the largest and smallest elements of a vector respectively. range is a function whose value is a vector of length two, namely c(min(x), max(x)). length(x) is the number of elements in x, sum(x) gives the total of the elements in x, and prod(x) their product.

Two statistical functions are mean(x) which calculates the sample mean, which is the same

as sum(x)/length(x), and var(x) which gives sum((x-mean(x))^2)/(length(x)-1)

 

or sample variance. If the argument to var() is an n-by-p matrix the value is a p-by-p sample

covariance matrix got by regarding the rows as independent p-variate sample vectors.

sort(x) returns a vector of the same size as x with the elements arranged in increasing order;

however there are other more flexible sorting facilities available (see order() or sort.list()

which produce a permutation to do the sorting).

Note that max and min select the largest and smallest values in their arguments, even if they

are given several vectors. The parallel maximum and minimum functions pmax and pmin return a vector (of length equal to their longest argument) that contains in each element the largest (smallest) element in that position in any of the input vectors.

For most purposes the user will not be concerned if the “numbers” in a numeric vector

are integers, reals or even complex. Internally calculations are done as double precision real

numbers, or double precision complex numbers if the input data are complex.

 

To work with complex numbers, supply an explicit complex part. Thus

sqrt(-17)    :    will give NaN and a warning, but

sqrt(-17+0i)     :    will do the computations as complex numbers.

 

Generating regular sequences

R has a number of facilities for generating commonly used sequences of numbers. For example

1:30 is the vector c(1, 2, …, 29, 30). The colon operator has high priority within an expression,

so, for example 2*1:15 is the vector c(2, 4, …, 28, 30). Put n <- 10 and compare

the sequences 1:n-1 and 1:(n-1).

The construction 30:1 may be used to generate a sequence backwards.

The function seq() is a more general facility for generating sequences. It has five arguments,

only some of which may be specified in any one call. The first two arguments, if given, specify

the beginning and end of the sequence, and if these are the only two arguments given the result is the same as the colon operator. That is seq(2,10) is the same vector as 2:10.

Arguments to seq(), and to many other R functions, can also be given in named form, in

which case the order in which they appear is irrelevant. The first two arguments may be named from=value and to=value; thus seq(1,30), seq(from=1, to=30) and seq(to=30, from=1)

are all the same as 1:30. The next two arguments to seq() may be named by=value and

length=value, which specify a step size and a length for the sequence respectively. If neither

of these is given, the default by=1 is assumed.

For example

> seq(-5, 5, by=.2) -> s3

generates in s3 the vector c(-5.0, -4.8, -4.6, …, 4.6, 4.8, 5.0). Similarly

> s4 <- seq(length=51, from=-5, by=.2)

generates the same vector in s4.

The fifth argument may be named along=vector, which is normally used as the only argument

to create the sequence 1, 2, …, length(vector), or the empty sequence if the vector

is empty (as it can be).

A related function is rep() which can be used for replicating an object in various complicated

ways. The simplest form is

> s5 <- rep(x, times=5)

which will put five copies of x end-to-end in s5. Another useful version is

> s6 <- rep(x, each=5)

which repeats each element of x five times before moving on to the next.

 

Logical vectors

As well as numerical vectors, R allows manipulation of logical quantities. The elements of a

logical vector can have the values TRUE, FALSE, and NA (for “not available”). The

first two are often abbreviated as T and F, respectively. Note however that T and F are just

variables which are set to TRUE and FALSE by default, but are not reserved words and hence can be overwritten by the user. Hence, you should always use TRUE and FALSE.

Logical vectors are generated by conditions. For example

> temp <- x > 13

sets temp as a vector of the same length as x with values FALSE corresponding to elements of x where the condition is not met and TRUE where it is.

The logical operators are <, <=, >, >=, == for exact equality and != for inequality. In addition

if c1 and c2 are logical expressions, then c1 & c2 is their intersection (“and”), c1 | c2 is their

union (“or”), and !c1 is the negation of c1.

Logical vectors may be used in ordinary arithmetic, in which case they are coerced into

numeric vectors, FALSE becoming 0 and TRUE becoming 1. However there are situations where logical vectors and their coerced numeric counterparts are not equivalent, for example see the next subsection.

 

Missing values

In some cases the components of a vector may not be completely known. When an element

or value is “not available” or a “missing value” in the statistical sense, a place within a vector

may be reserved for it by assigning it the special value NA. In general, any operation on an NA

becomes an NA. The motivation for this rule is simply that if the specification of an operation

is incomplete, the result cannot be known and hence is not available.

The function is.na(x) gives a logical vector of the same size as x with value TRUE if and

only if the corresponding element in x is NA.

> z <- c(1:3,NA); ind <- is.na(z)

Notice that the logical expression x == NA is quite different from is.na(x) since NA is not

really a value but a marker for a quantity that is not available. Thus x == NA is a vector of the

same length as x all of whose values are NA as the logical expression itself is incomplete and

hence undecidable.

Note that there is a second kind of “missing” values which are produced by numerical computation, the so-called Not a Number, NaN, values. Examples are

> 0/0

or

> Inf – Inf

which both give NaN since the result cannot be defined sensibly.

In summary, is.na(xx) is TRUE both for NA and NaN values. To differentiate these,

is.nan(xx) is only TRUE for NaNs.

Missing values are sometimes printed as <NA> when character vectors are printed without

quotes.

2.6 Character vectors

Character quantities and character vectors are used frequently in R, for example as plot labels.

Where needed they are denoted by a sequence of characters delimited by the double quote

character, e.g., “x-values”, “New iteration results”.

Character strings are entered using either matching double (“) or single (’) quotes, but are

printed using double quotes (or sometimes without quotes). They use C-style escape sequences,

using \ as the escape character, so \\ is entered and printed as \\, and inside double quotes “

is entered as \”. Other useful escape sequences are \n, newline, \t, tab and \b, backspace—see

?Quotes for a full list.

Character vectors may be concatenated into a vector by the c() function; examples of their

use will emerge frequently.

The paste() function takes an arbitrary number of arguments and concatenates them one by

one into character strings. Any numbers given among the arguments are coerced into character

strings in the evident way, that is, in the same way they would be if they were printed. The

arguments are by default separated in the result by a single blank character, but this can be

changed by the named argument, sep=string, which changes it to string, possibly empty.

For example

> labs <- paste(c(“X”,”Y”), 1:10, sep=””)

makes labs into the character vector

c(“X1”, “Y2”, “X3”, “Y4”, “X5”, “Y6”, “X7”, “Y8”, “X9”, “Y10”)

Note particularly that recycling of short lists takes place here too; thus c(“X”, “Y”) is

repeated 5 times to match the sequence 1:10.3

2.7 Index vectors; selecting and modifying subsets of a data set

Subsets of the elements of a vector may be selected by appending to the name of the vector an

index vector in square brackets. More generally any expression that evaluates to a vector may

have subsets of its elements similarly selected by appending an index vector in square brackets

immediately after the expression.

Such index vectors can be any of four distinct types.

1. A logical vector. In this case the index vector is recycled to the same length as the vector

from which elements are to be selected. Values corresponding to TRUE in the index vector

are selected and those corresponding to FALSE are omitted. For example

> y <- x[!is.na(x)]

creates (or re-creates) an object y which will contain the non-missing values of x, in the

same order. Note that if x has missing values, y will be shorter than x. Also

> (x+1)[(!is.na(x)) & x>0] -> z

creates an object z and places in it the values of the vector x+1 for which the corresponding

value in x was both non-missing and positive.

 

2. A vector of positive integral quantities. In this case the values in the index vector must lie

in the set f1, 2, . . . , length(x)g. The corresponding elements of the vector are selected and

concatenated, in that order, in the result. The index vector can be of any length and the

result is of the same length as the index vector. For example x[6] is the sixth component

of x and

> x[1:10]

selects the first 10 elements of x (assuming length(x) is not less than 10). Also

> c(“x”,”y”)[rep(c(1,2,2,1), times=4)]

(an admittedly unlikely thing to do) produces a character vector of length 16 consisting of

“x”, “y”, “y”, “x” repeated four times.

3. A vector of negative integral quantities. Such an index vector specifies the values to be

excluded rather than included. Thus

> y <- x[-(1:5)]

gives y all but the first five elements of x.

4. A vector of character strings. This possibility only applies where an object has a names

attribute to identify its components. In this case a sub-vector of the names vector may be

used in the same way as the positive integral labels in item 2 further above.

> fruit <- c(5, 10, 1, 20)

> names(fruit) <- c(“orange”, “banana”, “apple”, “peach”)

> lunch <- fruit[c(“apple”,”orange”)]

The advantage is that alphanumeric names are often easier to remember than numeric

indices. This option is particularly useful in connection with data frames, as we shall see

later.

An indexed expression can also appear on the receiving end of an assignment, in which case

the assignment operation is performed only on those elements of the vector. The expression

must be of the form vector[index_vector] as having an arbitrary expression in place of the

vector name does not make much sense here.

For example

> x[is.na(x)] <- 0

replaces any missing values in x by zeros and

> y[y < 0] <- -y[y < 0]

has the same effect as

> y <- abs(y)

2.8 Other types of objects

Vectors are the most important type of object in R, but there are several others which we will

meet more formally in later sections.

matrices or more generally arrays are multi-dimensional generalizations of vectors. In fact,

they are vectors that can be indexed by two or more indices and will be printed in special

ways. See Chapter 5 [Arrays and matrices], page 18.

factors provide compact ways to handle categorical data. See Chapter 4 [Factors], page 16.

lists are a general form of vector in which the various elements need not be of the same

type, and are often themselves vectors or lists. Lists provide a convenient way to return the

results of a statistical computation. See Section 6.1 [Lists], page 26.

data frames are matrix-like structures, in which the columns can be of different types. Think

of data frames as ‘data matrices’ with one row per observational unit but with (possibly) both numerical and categorical variables. Many experiments are best described by data

frames: the treatments are categorical but the response is numeric. See Section 6.3 [Data

frames], page 27.

functions are themselves objects in R which can be stored in the project’s workspace. This

provides a simple and convenient way to extend R. See Chapter 10 [Writing your own

functions], page 42.

Objects, their modes and attributes

 

Changing the length of an object

An “empty” object may still have a mode. For example

> e <- numeric()

makes e an empty vector structure of mode numeric. Similarly character() is a empty character

vector, and so on. Once an object of any size has been created, new components may be added

to it simply by giving it an index value outside its previous range. Thus

> e[3] <- 17

now makes e a vector of length 3, (the first two components of which are at this point both NA).

This applies to any structure at all, provided the mode of the additional component(s) agrees

with the mode of the object in the first place.

This automatic adjustment of lengths of an object is used often, for example in the scan()

function for input. (see Section 7.2 [The scan() function], page 31.)

Conversely to truncate the size of an object requires only an assignment to do so. Hence if

alpha is an object of length 10, then

> alpha <- alpha[2 * 1:5]

makes it an object of length 5 consisting of just the former components with even index. (The

old indices are not retained, of course.) We can then retain just the first three values by

> length(alpha) <- 3

and vectors can be extended (by missing values) in the same way.

3.3 Getting and setting attributes

The function attributes(object) returns a list of all the non-intrinsic attributes currently

defined for that object. The function attr(object, name) can be used to select a specific

attribute. These functions are rarely used, except in rather special circumstances when some

new attribute is being created for some particular purpose, for example to associate a creation

date or an operator with an R object. The concept, however, is very important.

Some care should be exercised when assigning or deleting attributes since they are an integral

part of the object system used in R.

When it is used on the left hand side of an assignment it can be used either to associate a

new attribute with object or to change an existing one. For example

> attr(z, “dim”) <- c(10,10)

allows R to treat z as if it were a 10-by-10 matrix.

3.4 The class of an object

All objects in R have a class, reported by the function class. For simple vectors this is just the

mode, for example “numeric”, “logical”, “character” or “list”, but “matrix”, “array”,

“factor” and “data.frame” are other possible values.

A special attribute known as the class of the object is used to allow for an object-oriented

style4 of programming in R. For example if an object has class “data.frame”, it will be printed

in a certain way, the plot() function will display it graphically in a certain way, and other

so-called generic functions such as summary() will react to it as an argument in a way sensitive

to its class.

To remove temporarily the effects of class, use the function unclass(). For example if winter

has the class “data.frame” then

> winter

 

will print it in data frame form, which is rather like a matrix, whereas

> unclass(winter)

will print it as an ordinary list. Only in rather special situations do you need to use this facility,

but one is when you are learning to come to terms with the idea of class and generic functions.

Generic functions and classes will be discussed further in Section 10.9 [Object orientation],

page 48, but only briefly.

 

 

 

 

Importing and manipulating your data are important steps in the data science workflow. R allows for the import of different data formats using specific packages that can make your job easier:

·        readr for importing flat files

·        The readxl package for getting excel files into R

·        The haven package lets you import SAS, STATA and SPSS data files into R.

·        Databases: connect via packages like RMySQL and RpostgreSQL, and access and manipulate via DBI

·        rvest for webscraping

 

Once your data is available in your working environment you are ready to start manipulating it using these packages:

·        The tidyr package for tidying your data.

·        The stringr package for string manipulation.

·        For data frame like objects learn the ins and outs of the dplyr package

·        Need to perform heavy data wrangling tasks? Check out the data.table package

·        Performing time series analysis? Try out packages like like zoo, xts and quantmod.

 

Let’s practice

 

# Get and print current working directory.

print(getwd())

 

#Reading a CSV File

data <- read.csv(“input.csv”)

print(data)

 

# Analyzing the CSV File

data <- read.csv(“input.csv”)

print(is.data.frame(data))

print(ncol(data))

print(nrow(data))

 

#Get the maximum salary:

# Create a data frame.

data <- read.csv(“input.csv”)

# Get the max salary from data frame.

sal <- max(data$salary)

print(sal)

 

# Get the max salary from data frame.

sal <- max(data$salary)

# Get the person detail having max salary.

retval <- subset(data, salary == max(salary))

print(retval)

 

#Get the persons in IT department whose salary is greater than 600

info <- subset(data, salary > 600 & dept == “IT”)

print(info)

 

#Get the people who joined on or after 2014

retval <- subset(data, as.Date(start_date) > as.Date(“2014-01-01”))

print(retval)

 

Writing into a CSV File

R can create csv file form existing data frame. The write.csv() function is used to create the csv file. This file gets created in the working directory

 

# Create a data frame.

data <- read.csv(“input.csv”)

retval <- subset(data, as.Date(start_date) > as.Date(“2014-01-01”))

# Write filtered data into a new file.

write.csv(retval,”output.csv”)

newdata <- read.csv(“output.csv”)

print(newdata)

 

retval <- subset(data, as.Date(start_date) > as.Date(“2014-01-01”))

# Write filtered data into a new file.

write.csv(retval,”output.csv”, row.names=FALSE)

newdata <- read.csv(“output.csv”)

print(newdata)

 

 

# Verify the package is installed.

any(grepl(“xlsx”,installed.packages()))

# Load the library into R workspace.

library(“xlsx”)

 

Input as XLSX file

Open Microsoft excel. Copy and paste the following data in the work sheet named as sheet1.

Also copy and paste the following data to another worksheet and rename this worksheet to “city”.

 

Save the Excel file as “input.xlsx”. You should save it in the current working directory of the R workspace.

 

Reading the Excel File

The input.xlsx is read by using the read.xlsx() function as shown below. The result is stored as a data frame in the R environment.

# Read the first worksheet in the file input.xlsx.

data <- read.xlsx(“input.xlsx”, sheetIndex = 1)

print(data)

 

Note: These examples are for 32 bit Windows

 

First, load the RODBC package (you’ll also have to install it if you don’t have it already).

 

# Load RODBC package

 library(RODBC)

 

Next, connect to the Access database. This code creates an object called “channel” that tells R where the Access database is.

 

If you paste the path from windows be sure to change every backslash to a forward slash.

Do not include the file extension (.accdb or .mdb) on the end of the name of the database.

 

# Connect to Access db

 channel <- odbcConnectAccess(“C:/Documents/Name_Of_My_Access_Database”)

 

Finally, run a SQL query to return the data.

# Get data

data <- sqlQuery( channel , paste (“select *

 from Name_of_table_in_my_database”))

 

Return All Data from One Table

Example shows how to connect to database in R and queries the database DATABASE and returns all of the data (this is specified using the * in SQL) from the table DATATABLE. The table is preceded by the database schema SCHEMA and separated by a period. Each of the words in all caps needs within the query needs to be replaced so that the query applies to your database.

# Load RODBC package

library(RODBC)

 

# Create a connection to the database called “channel”

# If you are using operating system authentication (the computer already knows who you

# are because you are logged into it) you can leave out the uid=”USERNAME”, part.

channel <- odbcConnect(“DATABASE”, uid=”USERNAME”, pwd=”PASSWORD”, believeNRows=FALSE)

 

# Check that connection is working (Optional)

odbcGetInfo(channel)

 

# Find out what tables are available (Optional)

Tables <- sqlTables(channel, schema=”SCHEMA”)

 

# Query the database and put the results into the data frame “dataframe”

 dataframe <- sqlQuery(channel, “

 SELECT *

 FROM

 SCHEMA.DATATABLE”)

 

Return Only Specific Fields

Example shows how to connect to database in R and query the database DATABASE and pull only the specified fields from the table DATATABLE. Note that loading the RODBC package and creating a connection does not have to be repeated if they were done in the first example.

 

# Load RODBC package

library(RODBC)

 

# Create a connection to the database called “channel”

channel <- odbcConnect(“DATABASE”, uid=”USERNAME”, pwd=”PASSWORD”, believeNRows=FALSE)

 

# Find out what fields are available in the table (Optional)

# as.data.frame coerces the data into a data frame for easy viewing

Columns <- as.data.frame(colnames(sqlFetch(channel, “SCHEMA.DATATABLE”)))

 

# Query the database and put the results into the data frame “dataframe”

 dataframe <- sqlQuery(channel, “

 SELECT SCHOOL,

 STUDENT_NAME

 FROM

 SCHEMA.DATATABLE”)