print(“6+3=”,6+3,end=“\n“)
print(“How are you?”,end=“\n“)
print(“This line \n for new line”) ;
# integer – int
a=5;
print(type(a))
#string – str
#boolean – bool
#float
#complex
a = 3+5j
print(a*a) # -16 +30j
#####
#1. WAP to find area and perimeter of a rectangle
# input: what you give to the computer
# process: what is the ask from the computer
# output: what you get back in return
# input: length & breadth
# process: area = length * breadth and perimeter as 2*(length + breadth)
# output: print the answers (area and perimeter) on to the screen
length = 25
breadth = 15
area = length * breadth
perimeter = 2*(length + breadth)
print(“Area = “,area)
print(“Perimeter = “,perimeter)
length , breadth,name = 25, 15,“Sachin” #implicit conversion
area = length * breadth
perimeter = 2*(length + breadth)
print(“Area = “,area,“and Perimeter = “,perimeter)
#unpack
#input() – to get input value from the user
length = input(“Enter the length value = “) #implicit into str
length = int(length) #explicit conversion to int
print(“Data type of length is “,type(length))
breadth = int(input(“Enter the breadth value = “))
print(“Data type of breadth is “,type(breadth))
area = length * breadth
perimeter = 2*(length + breadth)
# f string – format string
print(f”A rectangle with length {length} and breadth {breadth} has an area of {area} and perimeter of {perimeter}“)
# f-string expanded to float and str
total_cost = 100
num_units = 33
print(f”Total cost came to Rs {total_cost} for {num_units} pens so the cost of each pen is Rs {total_cost/num_units:.2f}“)
print(f”{3.69:.1f}“)
player = “Virat”
position = “captain”
country = “India”
print(f”Player {player:.<12} is {position:X^15} of {country:->12} team”)
player = “Mbanwaweba”
position = “wicket-keeper”
country = “Zimbabwe”
print(f”Player {player:<12} is {position:^15} of {country:>12} team”)
val1 = 7
val2 = 3
print(val1 + val2) # addition
print(val1 – val2) # subtract
print(val1 * val2) # multiply
print(val1 / val2) # division
print(val1 % val2) # remainder
print(val1 // val2) # integer division
print(val1 ** val2) # power
print(int(10 ** (1/2)))
#binary value
print(bin(10)) #0b1010
print(hex(0o12)) #0xa
print(oct(10)) #0o12
print(int(0b1010))
# binary operators: << >> & |
#Shift
print(10 <<3) # 1010
print(int(0b101000))
print(10 >> 2)
#Relational or conditional: > < <= >= == !=
val1 = 10
val2 = 20
val3 = 10
print(val1 > val2) # is val1 greater than val2? – False
print(val1 >= val2) # is val1 greater than or equal to val2? – False
print(val1 >= val3) # is val1 greater than or equal to val3? – True
print(val1 > val3) # is val1 greater than val3? – False
print(val1 < val3) #False
print(val1 <= val3) #True
print(“val1 == val3: “,val1 == val3)
print(val1 != val3)
print(“A40” > “90”)
# Logical operator: and or not
#input and output both are bool values
# and – both have to True to result into True (otherwise its False)
print(“T and T: “, True and True)
print(“T and F: “, True and False)
# or (+)
print(“F or T : “, False or True)
# 2+5*3 = ?
val1 = 10
val2 = 20
val3 = 10
print(val1 <= val3 or val1 > val3 and val1 > val2) # = ?
# T
print(bin(15))
print(“15 & 10 = “, 15&10)
print(“15 | 10 = “, 15|10)
#print(f”15 | 10 = 15{|}10″, )
print(f”val1 <= val3 or val1 > val3 and val1 > val2 = {val1 <= val3 or val1 > val3 and val1 > val2}“)
#print(f”True {and} False”)
b = int(input(“Enter the value of breadth: “))
#conditional statements checks the value
if l>0 and b>0:
area = l * b
perimeter = 2*(l+b)
print(f”Rectangle with length {l} and breadth {b} has area of {area} and perimeter of {perimeter}“)
#another set of if condition
if l>0 and b>0:
area = l * b
perimeter = 2*(l+b)
print(f”Rectangle with length {l} and breadth {b} has area of {area} and perimeter of {perimeter}“)
else:
print(“Sides of rectangle doesnt look valid”)
#
# Check is a number is positive and if positive chck if divisible by 5
value = 50
if value > 0:
print(f”{value} is positive”,end=” “)
if value %5==0:
print(“and its divisible by 5”)
elif value <0:
print(f”{value} is negative”)
else:
print(f”{value} is neither positive nor negative”)
marks1, marks2,marks3,marks4,marks5 = 96, 96,96,96,95
#assign grade on the basis on avg marks:
# avg>90: A, 75-90: B, 60-75: C, 50-60:D, <50: E
avg = (marks1 + marks2+marks3+marks4+marks5)/5
print(“Average: “,avg)
if avg>=90:
print(“Grade A”)
#print(“Result: You have Passed”)
elif avg>=75:
print(“Grade B”)
#print(“Result: Passed”)
elif avg>=60:
print(“Grade C”)
elif avg>=50:
print(“Grade D”)
else:
print(“Grade E”)
print(“Result: Failed”)
#
print(“===================”)
flag = 0 #didnt win dean’s award
if avg>=50:
#print(“Result: You’ve passed!”)
if avg >= 90:
print(“Grade A”)
if avg >=95:
flag=1
elif avg >= 75:
print(“Grade B”)
elif avg >= 60:
print(“Grade C”)
else:
print(“Grade D”)
print(“Result: You’ve passed!”)
else:
print(“Result: Sorry You’ve failed!”)
print(“Grade E”)
if flag==1:
print(“You win special dean’s award”)
## checking the greater number between 2 values
val1,val2 = 40,20
if val1 > val2:
print(f”{val1} > {val2}“)
elif val1 < val2:
print(f”{val2} > {val1}“)
else:
print(f”{val2} = {val1}“)
#########
val1,val2 , val3 = 50, 90,20
if val1 > val2:
#print(f”{val1} >= {val2}”)
if val1 > val3:
if val3 > val2:
print(f”{val1} >= {val3} >= {val2}“)
else:
print(f”{val1} >= {val2} >= {val3}“)
else:
print(f”{val3} >= {val1} >= {val2}“)
else:
#print(f”{val2} >= {val1}”)
if val2>val3:
if val1 > val3:
print(f”{val2} >= {val1} >= {val3}“)
else:
print(f”{val2} >= {val3} >= {val1}“)
else:
print(f”{val3} >= {val2} >= {val1}“)
#LOOPS- to execute block of code multiple times
#range(a,b,c): a=starting (=), b= ending value (<), c=increment
#range(5,30,5): 5,10,15,20,25
#range(a,b) c is default = 1
#range(4,10): 4,5,6,7,8,9
#range(b), a is default = 0, c is default = 1
#range(4): 0,1,2,3
#FOR Loop – when we know how many times to run
print(“i” in “India”) #True
print(“A” in “India”) #False because A is not in India
for counter in range(5,10,3):
print(“In For Loop:”,counter)
for counter in “India”:
print(“In For Loop:”,counter)
#Run a loop 5 times:
for i in range(5):
print(“i = “,i)
#a way of printing even numbers upto 20
for i in range(0,21,2):
print(i,end=“, “)
print()
for i in range(1,101):
if i %5==0:
print(i,end=“, “)
print()
# WHILE Loop –
ch=“n”
#extry controlled loop
while ch==“y”: #while will execute if the condition is True
print(“How are you?”)
ch=input(“Enter y to continue, anyother key to stop: “)
#exit controlled
while True:
print(“I am fine”)
ch = input(“Enter n to stop, anyother key to stop: “)
if ch==“n”:
break
###
”’
* * * * *
* * * * *
* * * * *
* * * * *
* * * * *
”’
for j in range(5):
for i in range(5):
print(“*”,end=” “)
print()
”’
*
* *
* * *
* * * *
* * * * *
”’
for j in range(5):
for i in range(j+1):
print(“*”,end=” “)
print()
”’
* * * * *
* * * *
* * *
* *
*
”’
num=10
for j in range(num):
for i in range(num-j):
print(“*”,end=” “)
print()
”’
*
* *
* * *
* * * *
* * * * *
”’
for j in range(5):
for i in range(5-j):
print(” “,end=“”)
for i in range(j+1):
print(“*”,end=” “)
print()
#Multiplication table
for j in range(1,11):
for i in range(1,11):
print(f”{i:<2} * {j:<2} = {j*i:<2}“,end=” “)
print()
#match and case
ch = input(“Enter you favorite programming language: “)
match ch:
case “Python”:
print(“You are on Data Scientist track”)
case “Java”:
print(“You are on Mobile App Developer track”)
case “Javascript”:
print(“You are on Web Developer track”)
#Program to take input marks and find avg
ch=“y”
sum=0
counter=0
while ch==“y”:
marks = int(input(“Enter marks: “))
counter+=1
sum+=marks # sum=sum+marks
ch=input(“Do you have more marks to add? y for yes: “)
avg = sum/counter
print(f”Total marks= {sum} and Average marks ={avg}“)
#
# guessing number game – human v computer
import random
num = random.randint(1,100) #both start and end is inclusive
attempt = 0
while True:
val = int(input(“Guess the number (1-100): “))
if val<1 or val>100:
print(“Invalid number!”)
continue #take you to the beginning of the loop
attempt+=1
if val==num:
print(f”You have guessed it correctly in {attempt} attempts”)
break #throw you out of the loop
elif val <num:
print(“Incorrect! Your guess is low”)
else:
print(“Incorrect! Your guess is high”)
##
# guessing number game – computer v computer
import random
num = random.randint(1,100) #both start and end is inclusive
attempt = 0
start,end=1,100
while True:
val = random.randint(start,end) #int(input(“Guess the number (1-100): “))
if val<1 or val>100:
print(“Invalid number!”)
continue #take you to the beginning of the loop
attempt+=1
if val==num:
print(f”You have guessed it correctly in {attempt} attempts”)
break #throw you out of the loop
elif val <num:
print(“Incorrect! Your guess is low”)
start = val + 1 #guess a higher number
else:
print(“Incorrect! Your guess is high”)
end = val – 1 #guess a lower number
### Using IF Condition in one single line – one line condition
# Ternary operator: condition logic should not be more than 1 line
val1, val2 = 30,40
var1 = val1 if val1 > val2 else val2
print(“1. Higher number is “,var1)
var1 = “val1 is higher” if val1 > val2 else “val2 is higher”
print(“2. Message: “,var1)
### One line for loop
#square the values greater than 5 and cube the values for others
for i in range(10):
if i >5:
val = i**2
else:
val = i**3
print(val)
#Above code can be implemented in one line:
print(“Using one line loop and condition:”)
for i in range(10): print(i**2) if i>5 else print(i**3)
Assignment Programs
Exercise 1: Write a program in Python to display the Factorial of a number.
Exercise 2: Write a Python program to find those numbers which are divisible by 7 and multiples of 5, between 1500 and 2700 (both included).
Exercise 3: Write a Python program to reverse a number.
Exercise 4: Write a program to print n natural number in descending order using a while loop.
Exercise 5: Write a program to display the first 7 multiples of 7.
Exercise 6: Write a Python program to convert temperatures to and from Celsius and Fahrenheit.
[ Formula : c/5 = f-32/9 [ where c = temperature in celsius and f = temperature in fahrenheit ]
Expected Output :
60°C is 140 in Fahrenheit
45°F is 7 in Celsius
Exercise 7: Write a Python program that iterates the integers from 1 to 50. For multiples of three print “Fizz” instead of the number and for multiples of five print “Buzz”. For numbers that are multiples of three and five, print “FizzBuzz”.
Sample Output :
fizzbuzz
1
2
fizz
4
buzz
Exercise 8: Write a Python program to print the alphabet pattern ‘A’.
Expected Output:
***
* *
* *
*****
* *
* *
* *
Exercise 9: Write a Python program to print the alphabet pattern ‘G’.
Expected Output:
***
* *
*
* ***
* *
* *
***
name = “Sachin” \
“Cricket God”
name1 = ‘Virat’
name2 = ”’Rohit
Captain of Team India”’
name4=“””Dhoni
won the world
cup for India
in multiple formats of the
game”””
print(name)
print(name2)
print(name4)
#substring- subset of data – indexing
name = “Sachin Tendulkar”
print(name[0])
print(name[1])
print(“SCI => “,name[0]+name[2]+name[4])
print(“First 3 characters: “,name[0:3], name[:3]) #nothing on left on : means start from zero
print(“chin => “,name[2:6])
print(“kar =>> “, name[13:16])
#len()
print(“Total characters in name is”,len(name))
print(name[15], name[len(name)-1], name[-1])
print(“First character using backward indexing: “,name[-16], name[-len(name)])
print(“kar using backward indexing=>> “, name[-3:]) #when left black on the right means go upto end
fname = “Sachin”
age = 49
print(“First name of the player is “+fname)
print(“Age = “+str(age))
for i in range(5):
print(“* “*(5-i))
#Data structures
# list, tuple, dictionary, sets
print(“Print all characters: “,name,name[:])
# Strings are immutable: TypeError: ‘str’ object does not support item assignment
val = “HELLO”
val = “hELLO”
txt = “abade”
for i in txt:
print(“* “*(5– txt.index(i)))
## is…() – return either True or False
txt = “highway route number. I AM driving there. since one month”
print(“txt.isupper: “,txt.isupper())
print(“txt.islower: “,txt.islower())
print(“Alphabet & Numeric”,txt.isalnum())
print(“Alphabet & Numeric”,txt.isalpha())
print(“Alphabet & Numeric”,txt.isdigit())
print(txt.upper())
print(txt.lower())
print(txt.title())
print(txt.capitalize())
# split()
print(txt.split(‘o’))
out = [‘highway’, ‘route’, ‘number.’, ‘I’, ‘AM’, ‘driving’, ‘there.’, ‘since’, ‘one’, ‘month’]
print(“====”)
out2 = ” “.join(out)
print(out2)
txt = “I am driving on highway route on number fifteen.”
print(txt.lower().count(“i”))
start_pos=0
for i in range(txt.lower().count(“i”)):
print(txt.lower().index(“i”,start_pos),end=“, “)
start_pos=txt.lower().index(“i”,start_pos) + 1
print()
print(txt.replace(“on”,“over”,1))
print(txt)
#strings are immutable
#LIST
l1 = [2,3,5.5,True,“Hello”,[4,8,12]]
print(len(l1))
print(type(l1))
print(type(l1[0]))
print(type(l1[-1]))
print(l1[-2].upper())
l2 = [False,5,115]
l3 = l1 + l2
print(l3*3)
#lists are mutable
l2[1] = “How are you?”
print(l2)
for i in l2:
print(i,end=“, “)
print(“\n\n### Methods”)
### Methods
l1 = [2,4,6,8,10,2,14]
print(l1.index(2,2))
print(l1.count(2))
print(“1. Current list = “,l1)
print(l1.pop(2)) #removes index element
print(“2. Current List = “,l1)
print(l1.remove(14)) #removes value element
print(“3. Current List = “,l1)
l1.append(21) #adds at the end of the list
print(“4. Current List = “,l1)
l1.insert(3,31) #takes position and the value
print(“5. Current List = “,l1)
#below creating 2 new lists based on l1 values
l2 = l1 #deep copy
l3 = l1.copy() #shallow copy
print(“1. L1 = “,l1)
print(“1. L2 = “,l2)
print(“1. L3 = “,l3)
l1.append(42)
l2.append(52)
l3.append(62)
print(“2. L1 = “,l1)
print(“2. L2 = “,l2)
print(“2. L3 = “,l3)
l1.extend(l3)
print(l1)
l1.reverse()
print(l1)
l1.sort() #increasing order
print(l1)
l1.sort(reverse=True) #decreasing order
print(l1)
#definition: List is an ordered linear mutable collection
subject_list = [‘Maths’,‘Science’,‘English’,‘Social Science’,‘German’]
marks_list = []
sum=0
for i in range(5):
#marks = int(input(“Enter marks in Subject “+str(i+1)+”: “))
marks = int(input(“Enter marks in Subject ” + subject_list[i] + “: “))
sum+=marks
marks_list.append(marks)
print(“Marks obtained in each subject = “,marks_list,“and total marks = “,sum)
sum=0
for i in marks_list:
sum+=i
print(“Total marks = “,sum)
#Reduce, Map, Filter => later after functions
# TUPLE:
#definition: Tuple is an ordered linear immutable collection
t1 = ()
t1 = (5,)
t1 = (5,4)
t1 = (5,6,7,8.0,“9”)
print(type(t1))
#tuples are converted to list and vice-versa
t1 = list(t1)
t1.append(45)
t1 = tuple(t1)
#tuples are faster than list for reading
#packing
t1 = (3,30,“Hello”) #packing
#unpacking
a,b,c = t1
print(a,b,c)
print((2,3,99) > (3,1)) #checks one by one member untill it finds the greater value
# dictionary – ordered collections – key:value pair
dict1 = {}
print(type(dict1))
dict1 = {“name”:“Sachin”,“city”:“Mumbai”,“Runs”:12345,“name”:“Tendulkar”}
print(dict1)
print(dict1[‘name’])
dict2 = {(“Team IPL”,“Team Ranji”):“Mumbai Indians”}
dict1.update(dict2)
print(dict1)
for i in dict1.keys():
print(i, dict1[i])
print(“Iterating through values:”)
for i in dict1.values():
print(i)
print(“Iterating through items (key,value):”)
for i,j in dict1.items():
print(i,j)
for i in dict1.items():
print(list(i))
print(“Printing the values: “)
print(“Keys :”,dict1.keys())
print(“Values:”,dict1.values())
print(“Items:”,dict1.items())
dict1.pop(‘Runs’)
print(“After pop: “,dict1)
dict2 = dict1 #deep copy
dict3 = dict1.copy() # shallow copy
t_dict = {“Country”:“India”}
dict1.update(t_dict)
print(“Printing all 3 Dictionaries: “)
print(“Dict1: “,dict1)
print(“Dict2: “,dict2)
print(“Dict3: “,dict3)
#dict1[‘name’]=”Tendulkar”
t_dict = {“name”:“Tendulkar”}
dict1.update(t_dict)
for i in range(2):
print(“Dict1 before popitem: “,dict1)
dict1.popitem()
print(“Dict1 after popitem: “,dict1)
”’
Write a program to input Roll no. and marks of 3 students in 3 subjects
{101:[], 102:[]}
”’
dict_marks={}
for i in range(3):
t_dict = {}
roll=int(input(“Enter the Roll number:”))
t_list=[]
for j in range(3):
marks=int(input(“Enter the marks for Roll no.”+str(roll)+” in Subject “+str(j+1)+” :”))
t_list.append(marks)
t_dict = {roll:t_list}
dict_marks.update(t_dict)
print(“Final data:”,dict_marks)
”’
Assignment program:
from the below dictionary find the topper for each subject:
{100: [55, 66, 78], 102: [90, 87, 54], 105: [67, 76, 87]}
e.g. Highest in Subject 1: 102 with 90 marks
Highest in Subject 2: 102 with 87 marks
Highest in Subject 3: 105 with 87 marks
”’
#Example 2:
master_dict = {}
for k in range(2):
t_dict = {“Name”:0,‘Age’:0,‘Email’:0,“Address”:0}
for i in t_dict.keys():
j=input(“Enter the client’s “+i+” :”)
t_dict[i] = j
master_dict.update(t_dict)
print(“Master Dictionary information: \n“,master_dict)
##################################################
# SETS
set1 = {}
print(type(set1))
set1 = {“Apple”,“Banana”,“Mango”,“Grapes”,“Guava”}
print(type(set1))
set2 = {“Mango”,“Grapes”,“Guava”,“Apple”,“Banana”}
set1 = {1,2,3,4,5,6}
set2 = {1,3,5,7,9}
print(“Union:”)
print(set1 | set2)
print(set1.union(set2))
print(“Intersection:”)
print(set1 & set2)
print(set1.intersection(set2))
print(“Minus – difference”)
print(set1 – set2)
print(set2.difference(set1)) #set2 – set1
print(“Symmetric Difference”)
print(set1 ^ set2)
print(set1.symmetric_difference(set2))
print(“Set1 before pop:”,set1)
set1.pop()
print(“Set1 after pop:”,set1)
list1 = [1,2,3,4,1,2,3,1,2,1]
list1 =list(set(list1))
print(type(list1))
print(list1)
#################### FUNCTIONS ##############################
def myquestions():
”’this is a sample function to demonstrate how function works
it doesnt take any parameter nor does it return anything
-written on 22nd april”’
print(“whats your name?”)
print(“how are you?”)
print(“Where are you going?”)
def mycalc1(a,b,c):
print(“MY CALC 1”)
print(f”A,B and C values are {a},{b},{c}“)
total = a+b+c
print(total)
def mycalc2(a,b=0,c=9):
print(“MY CALC 2”)
print(f”A,B and C values are {a},{b},{c}“)
total = a+b+c
print(total)
def myfunc1(a,*b,**c):
print(“A = “,a)
print(“B = “,b)
print(“C = “,c)
myquestions()
print(myquestions.__doc__)
print(“Doc for print”)
print(input.__doc__)
#doc – first line of code inside the function, must be multiline comment
print(“\n\n\n“)
mycalc1(10,50,90) #required positional arguments
mycalc2(3,7,19) #calling default arguments
mycalc1(c=1,a=7,b=3) #use keywords to avoid positional
print(“Calling variable length arguments:”)
myfunc1(10,1,2,3,4,5,6,7,8,9,0,4,5,66,44,333,33, name=“Sachin”,age=43,runs=19890)
def myfun1(a,b,c):
#
global x
print(“X = “,x)
x = 5
print(“X = “, x)
x=50
myfun1(5,10,15)
print(“in Main x = “,x)
###
def isPrime(n):
check= True
for i in range(2,n//2+1):
if n%i==0:
check=False
break
return check
check = isPrime(51)
if check:
print(“51 number is prime”)
else:
print(“51 number is not prime”)
#generate list of prime number between 1000 and 2000
for i in range(1000,2001):
out =isPrime(i)
if out:
print(i)
“””This is a sample function to see the working of a function”””
print(“What’s your name?”)
print(“How are you?”)
print(“Where do you live?”)
def myfunc2(a,b,c): # required positional arguments
print(f”Values of a,b and c are {a},{b} and {c} respectively”)
total = a+b+c
print(“Total is “,total)
def myfunc3(a,b=0,c=0): # a & b required positional and c is positional not required (default)
print(f”Values of a,b and c are {a},{b} and {c} respectively”)
total = a+b+c
print(“Total is “,total)
def isPrime(n):
”’isPrime is a function that takes a parameter n and
check and prints if its a prime number of not”’
prime = True
for i in range(2,n//2 +1):
if n%i==0:
prime=False
break
return prime
if __name__ ==“__main__”:
myfunc1()
print(“————-“)
print(myfunc1.__doc__)
# doc string: a multi line string and first line in the function
print(print.__doc__)
print(int.__doc__)
myfunc2(5, 10, 15) # required positional
print(“Calling My Func3 below:”)
myfunc3(10, 20)
myfunc3(10, 20, 30)
n = 11
out = isPrime(n)
if out:
print(n, “is a prime number”)
else:
print(n, “is not a prime number”)
# I want to print a list of all the values that are prime between 100 and 500
print(“Printing list of prime numbers from 100 to 500:”)
for k in range(100, 501):
if isPrime(k):
print(k)
# We are talking about non-positional (or KEYWORD arguments)
print(“Working on keyword arguments:”)
myfunc2(b=45, c=50, a=70) # for keyword – use same arguments that are already there
# Module:
P5.py file:
def mytask(n):
print(“Hello : “,n)
if n==0:
return 100
mytask(n-1)
# 5! = 5 * 4!
def myfacto(n):
if n==1:
return 1
return (n * myfacto(n-1))
###### decorators
def outer():
print(“Line one of outer”)
def inner():
print(“Line 1 of inner”)
print(“Line two of outer”)
inner()
print(“Line three of outer”)
def myouter11():
print(“This is line 1 from myouter 11”)
def myouter22():
print(“This is line 1 from myouter 22”)
def myouter33():
print(“This is line 1 from myouter 33”)
def myouter2(var1):
print(“This is line 1 from myouter 2”)
var1()
print(“This is line 2 from myouter 2”)
if __name__ ==‘__main__’:
out = myfacto(50)
print(“Factorial of 4 is”, out)
outer()
myouter2(myouter33)
P6.py:
from p5 import mytask, myfacto, myfunc2
from MyPack1 import modul1
modul1.myfunc1()
#SuperFunctions.mytask(7)
mytask(50)
myfacto(10)
print(“Option 1”)
for i in range(5):
print(“Hello”)
print(“Option 2”)
for i in range(5): print(“Hello”)
print(“Option 3”)
list1 = [2,4,6,8,10]
for i in list1: print(i)
print(“Option 4”)
prod=1
for i in range(1,10): prod*=i
print(prod)
print(“Option 5”)
mylist1 = [i for i in range(1,10)]
print(“Mylist1 = “,mylist1)
# one line if condition
print(“Condition Option 1”)
num=-5
if num>0:
print(“Positive”)
else:
print(“Not positive”)
output = “Positive” if num>0 else “Not Positive”
print(“Option 1 output = “,output)
print(“Condition Option 1 with Loops”)
### calculate cube of values between 1 and 9
print([num**3 for num in range(1,10)])
### calculate cube of values between 1 and 9 if the value is odd
print([num**3 for num in range(1,10) if num%2==1])
# one line function
print(“one line function Option 1”)
myfun1 = lambda x,y:print(“Total = “,x+y)
myfun1(10,20)
print(“one line function Option 2”)
friends=[“Rohit”,“Rahul”,“Surya”,“Kohli”]
batting = lambda team: [print(“Now batting:”,x) for x in team]
batting(friends)
#MAPS, FILTER & REDUCE
input = [2000,3000,100,200,5000,6000,3000,900,600,500,230,8000]
# all the three concepts works on list – input is a list and output depends upon the task
## 1. map – if there is a single logic (formula) that you need to apply on entire list values
#example: convert these feet into metres: divide by 3.1
some_func = lambda num:num/3.1
out = list(map(some_func,input))
print(“Output = “,out)
# Filter: if there is a single logic (condition) based on whicch you select subset
## subset of values are created when the condition returns True
out = list(filter(lambda x: (x//100)>=10,input))
print(“Filtered values are: “,out)
# Reduce: takes entire data in a list and reduces them to just 1 single value based on the given formula
from functools import reduce
print(“Sum of all the values are: “,reduce(lambda a,b:a+b, input))
PROJECT 1: Working with Dictionary
## Dictionary: {Item_code: [“Item_Description”,price]}
## create bills for each individual: {item_code: [quantity, price, total_cost]}
import time
start = time.time()
for i in range(1000000):
out = i**3+500*i**2+9
time.sleep(1)
end = time.time()
print(“Total time taken by the program to run: “,end-start)
from datetime import datetime
currenttime= datetime.now()
print(“Current time: “,currenttime)
print(“Date: “,currenttime.strftime(“%y-%m-%d”))
print(“Get: “,currenttime.year, currenttime.day, currenttime.minute)
print(“out: “,currenttime.today())
print(“Weekday: “,currenttime.weekday())
from datetime import timedelta,datetime
print(“Yesterday: “,currenttime-timedelta(days=1))
print(“Next week: “,currenttime+timedelta(days=7))
__publisher = “Eka Publishers”
def __init__(self, title,page):
print(“Publisher is: “,BookMagazine.__publisher)
self.title=title
self.pages=page
class Books(BookMagazine):
total_books = 0 # class level variable – all objects and class will return same value
#object level variables will be inside object methods
def __init__(self, title,author,page):
BookMagazine.__init__(self,title, page)
self.author=author
Books.total_books +=1
def display_book(self):
print(“Dummy Function: Book Created”)
print(“Title = “, self.title)
if self.author==“”:
print(“There is no author name declared”)
else:
print(“Author = “, self.author)
print(“Pages = “, self.pages)
@classmethod
def display_count(cls):
print(“Total book count = “,cls.total_books)
##
b1=Books(“Python Programming”,“Swapnil Saurav”,330)
print(“B1 Display”)
b1.display_book()
b2=Books(“Data Science”,“Swapnil Saurav”,550)
print(“B2 Display”)
b2.display_book()
b3=Books(“Data Visualization”,“Swapnil Saurav”,250)
b3.display_book()
print(b1.total_books)
print(Books.total_books)
#Today’s assignment; Using Class and objects perform addition, subtraction, multiplication,
# and division. Each of these should have unique functions. init should take in 2 input values
# from the user
# Implement atleast one of these: class variable and method, object variable and method
class Magazines(BookMagazine):
def __init__(self,title,pages,genre):
BookMagazine.__init__(self,title,pages)
self.genre = genre
class Library:
def lib_fun1(self):
print(“Printing from Library class:”)
#print(“Publishers = “,BookMagazine.__publisher) #throws error as private members cant be accessed
print(“Total Books = “,Books.total_books)
#print(Books.__publisher) #throws error as private members cant be accessed
#print(b2.)
l1= Library()
l1.lib_fun1()
#Access Modifiers:
## private: only the members of the same class can access
## protected: (one _ variable name): _name, _pub => only derived class can be called
### concept of protected is there but practically its not yet updated
## public: any member can call members of any class
total_books=0
def __init__(self,title,author):
self.title = title
self.author = author
Books.total_books +=1
def display_data(self):
print(f”Title = {self.title} and Author = {self.author}“)
b1 = Books(“Python Programming”,“Saurav”)
print(type(b1))
b1.display_data()
b2 = Books(“Machine Learning”,“Saurav”)
b2.display_data()
l1 = []
print(type(l1))
##############
#Errors
# Syntax errors – when you dont follow Python rules
# print 5
#logical errors – wrong logic.. a + b = 4 *2
# Exception errors – runtime errors
num1=0
try:
num1 = int(input(“Enter a number: “))
#10/0
except (ValueError, ZeroDivisionError):
print(“You have not entered a valid number, hence exiting…”)
except Exception:
print(“Some error has occured, please retry!”)
else:
print(num1) # ValueError
finally:
print(“Error or No error I will be called”)
#10/0: ZeroDivisionError
num1=0
while True:
try:
num1 = int(input(“Enter a number: “))
break
except (ValueError, ZeroDivisionError):
print(“You have not entered a valid number, hence exiting…”)
except Exception:
print(“Some error has occured, please retry!”)
else:
print(num1) # ValueError
finally:
print(“Error or No error I will be called”)
# Assertion Error
def print_data(num):
#perform this only when num >100
assert (num>100), “Value entered is too small to process”
return num**num
try:
out = print_data(100)
except AssertionError:
print(“Give larger value and run again”)
else:
print(“Output is “,out)
#########
### create my own exception
class TooSmallValue(Exception):
def __init__(self,value=0,min=100):
self.value = value
self.min = min
#driving code
value = int(input(“Enter a value >100: “))
try:
if value <=100:
raise TooSmallValue
except TooSmallValue:
print(“TooSmallValue: Give larger value and run again”)
else:
print(“Output is “,value*value)
######### WORKING WITH OPERATING SYSTEMS
import os
os_name = os.name
print(os.name)
if os_name==‘nt’:
print(“You are using a Windows machine”)
elif os_name==‘posix’:
print(“This is Mac or Linux or Unix machine”)
else:
print(“not sure which OS you are using”)
## Some OS specific command
#os.rename(“infy.py”,”infy_apr.py”)
#os.mkdir(“TEST”)
import os
path_loc = Path(“C:/Users/HP/PycharmProjects/pythonProject/”)
for p in path_loc.iterdir():
print(p, ” : Is it a directory: “, p.is_dir())
# Text file processing
fileobj=open(“file1.txt”,“a+”) # read(r), write (w), append (a): r+ w+ a+
if fileobj.readable():
print(“read operations”)
content = fileobj.read()
print(“Entire content:”)
print(content)
fileobj.seek(5) #move to first character
print(“First 10 characters:”)
print(fileobj.read(10))
# readline will read maximum one line at a time that too current
fileobj.seek(0)
line = fileobj.readline(1000)
print(“Line: \n“,line)
lines = fileobj.readlines()
print(“==========reading lines”)
print(lines)
fileobj.seek(0)
print(“==========reading lines”)
print(lines)
else:
print(“Its not readable”)
if fileobj.writable():
lines = [‘Twinkle Twinkle Little Star\n‘,‘How I wonder\n‘,
‘What you are\n‘,‘Up Above the World\n‘]
fileobj.writelines(lines)
fileobj.close()
######### CSV Files
import csv
fileobj = open(“D:/datasets/tcs_stocks.csv”) # default mode is r (read)
csv_file = csv.reader(fileobj, delimiter=“,”)
print(list(csv_file))
fileobj.seek(0)
for i in csv_file:
for j in i[:2]:
print(j, end=” “)
print()
fileobj.close()
# create a csv file
header = [‘Name’,‘Team’,‘Matches’]
row1 = [‘Sachin’,‘Mumbai’,222]
row2 = [‘Laxman’,‘Hyderabad’,212]
row3 = [‘Rahul’,‘Bangalore’,333]
import csv
fileobj = open(“sample1.csv”,‘w’,newline=”)
row_writer = csv.writer(fileobj,delimiter=‘|’)
row_writer.writerow(header)
row_writer.writerow(row1)
row_writer.writerow(row2)
row_writer.writerow(row3)
fileobj.close()
###### JSON #############
#json: load, loads, dump, dumps
import json
fileoj = open(“json1.json”,“r”)
content = json.load(fileoj)
#print to check if we got the content or not – this is not how to
#display the json content
print(type(content))
#we will use json dumps to display on to the screen
print(json.dumps(content, indent=4, sort_keys=True))
fileoj.close()
fileoj = open(“json2.json”,“w”)
content1 = ‘{“Name”:”Virat”,”Game”:”Cricket”}’
print(type(content1))
content1 = json.loads(content1)
print(type(content1))
print(json.dumps(content1, indent=4, sort_keys=True))
json.dump(obj=content1,fp=fileoj, indent=4)
fileoj.close()
############ DATABASE ################
# structured v unstructured
# Name, Age, Country, Runs, Wickets
## Library Application
# Table 1: Books
## Columns: BookID (INTEGER), BookTitle (TEXT), Price (FLOAT), Copies (INTEGER)
# Table 2: Members
#Columns: MemberID (INTEGER), Name (TEXT), Email (TEXT), Phone (TEXT), Address (TEXT)
#Relationship:
# one to one:
# one to many / many to one:
# many to many:
# Table 3: BOOKSMEMBERS
# columns: TID (INTEGER), BOOKID(INTEGER) , MID(INTEGER),
# ISSUEDATE (DATE), RETURNDATE (DATE)
# OLTP – Online Transaction Processing (normal ) – Reading + (Editing done in bulk)
# OLAP – Online Analytical Processing (Analytics) – Reading
# CRUD : Create (INSERT), Read (SELECT), Update (UPDATE), Delete (DELETE)
# Roles in DBMS:
## Admin (DBA)
## Database Design (ER diagram, create tables) – SQL
## Application Developers: SQL – CRUD
”’
# Constraints: Keys (Primary Key, Foreign Key), NOT NULL, UNIQUE, CHECK, DEFAULT
Table 1: Publisher
Create Table Publisher(
PUBID int Primary Key,
Address varchar(100),
Name varchar(25));
INSERT INTO Publisher Values (101, ‘Hyderabad’,’Eka Publishers);
INSERT INTO Publisher Values (102, ‘Mumbai’,’Best Publishers);
Table 2: Books:
Create Table Books(
BookID int Primary Key,
Author varchar(25) NOT NULL,
Title varchar(25) NOT NULL,
Price float(7,2),
Available bool Default 1,
PubID int,
CHECK(Price>=0.0),
Foreign Key (PubID) references Publisher(PubID)
);
INSERT INTO Books (BookID, Author, Title, PubID) Values (101,’Swapnil’,’Python Programming’,101);
INSERT INTO Books (BookID, Author, Title, PubID) Values (102,’Saurav’,’Machine Learning’,101);
Table 3: Member
Create Table Member(
MEM_ID Int Primary key,
Memb_date Date,
Memb_Type varchar(1),
Address varchar(100),
Name varchar(30),
Expiry_date date);
INSERT INTO MEMBER(MEM_ID, Name) Values(101,’%s’)
INSERT INTO MEMBER(MEM_ID, Name) Values(102,’%s’)
INSERT INTO MEMBER(MEM_ID, Name) Values(103,’%s’)
Table 4: BooksMember
Create table BooksMember(
BMID Int Primary Key,
MEMID int Foreign Key References MEMBER(MEMID),
BOOKID int Foreign Key References BOOKS(BOOKID),
BORROW_DATE Date,
RETURN_DATE Date,
);
”’
import pymysql
db_connect = pymysql.connect(host=‘localhost’,password=‘learnSQL’,db=‘library’,user=‘root’)
cursor = db_connect.cursor()
#cursor.execute(‘Drop table Publisher;’)
tab1 = ”’
Create Table Publisher(
PUBID int Primary Key,
Address varchar(100),
Name varchar(25));
”’
#cursor.execute(tab1)
tab2 = ”’
Create Table Books(
BookID int Primary Key,
Author varchar(25) NOT NULL,
Title varchar(25) NOT NULL,
Price float(7,2),
Available bool Default 1,
PubID int,
CHECK(Price>=0.0),
Foreign Key (PubID) references Publisher(PubID)
);
”’
#cursor.execute(tab2)
tab3 = ”’
Create Table Member(
MEM_ID Int Primary key,
Memb_date Date,
Memb_Type varchar(1),
Address varchar(100),
Name varchar(30),
Expiry_date date);
”’
#cursor.execute(tab3)
tab4 = ”’
Create table BooksMember(
BMID Int Primary Key,
BORROW_DATE Date,
RETURN_DATE Date,
MEM_ID int,
BOOKID int,
Foreign Key (MEM_ID) References MEMBER (MEM_ID),
Foreign Key (BOOKID) References BOOKS (BOOKID));
”’
#cursor.execute(tab4)
########### CRUD Create (Insert), Read (Select), Update, Delete ###
## Peforming Create – using Insert
list_insert = [“INSERT INTO Publisher Values (101, ‘Hyderabad’,’Eka Publishers’);”,
“INSERT INTO Publisher Values (102, ‘Mumbai’,’Best Publishers’);”,
“INSERT INTO Books (BookID, Author, Title, PubID) Values (101,’Swapnil’,’Python Programming’,101);”,
“INSERT INTO Books (BookID, Author, Title, PubID) Values (102,’Saurav’,’Machine Learning’,101);”]
list_insert = []
for statement in list_insert:
cursor.execute(statement)
db_connect.commit() # to save the changes
# Insert by dynamic query
#Remove the multi line comment to practice:
”’
name1 = input(“Enter the Member 1 name: “)
insert1 = “INSERT INTO MEMBER(MEM_ID, Name) Values(101,’%s’)”%(name1)
cursor.execute(insert1)
name2 = input(“Enter the Member 2 name: “)
insert2 = “INSERT INTO MEMBER(MEM_ID, Name) Values(102,’%s’)”%(name2)
cursor.execute(insert2)
name3 = input(“Enter the Member 3 name: “)
insert3 = “INSERT INTO MEMBER(MEM_ID, Name) Values(‘%d’,’%s’)”%(103,name3)
cursor.execute(insert3)
db_connect.commit()
”’
## Update existing value in Member table
update1 = “Update Member Set Name=’Sachin Tendulkar’ where mem_id=101”
cursor.execute(update1)
## Delete existing member from Member table
delete1 = “Delete from Member where mem_id=102”
cursor.execute(delete1)
db_connect.commit()
## Reading using Select
select1 = “Select * from Member”
cursor.execute(select1)
results = cursor.fetchall()
for r in results:
print(r)
db_connect.close()
”’
To practice SELECT Commands, please login to:
https://livesql.oracle.com/
create an account and start practicing
”’
— Reading all the rows and columns
select * from HR.Employees;
— All the rows but given columns only
Select Employee_ID, FIRST_NAME, EMAIL from HR.Employees;
— Restricted columns and restricted rows
Select Employee_ID, FIRST_NAME, EMAIL from HR.Employees where Employee_ID =120;
select first_name||’ has a salary of $’|| salary “Salary Information” , email from hr.employees order by email;
select first_name||’ has a salary of $’|| salary “Salary Information” , email, HIRE_DATE from hr.employees order by HIRE_DATE, email desc;
select first_name, last_name, email, salary from hr.employees where salary > 10000 and salary <18000;
select first_name, last_name, email, salary from hr.employees where salary between 10000 and 18000;
select first_name, last_name, email, salary from hr.employees where salary in (17000, 11000, 13500);
select count(*) from hr.employees
select avg(salary) from hr.employees
select * from HR.Employees;
select * from hr.departments;
select first_name, last_name, email, to_char(hire_date, ‘Month DD, YYYY’), round(months_between(sysdate, hire_date)) Tenure_in_months from hr.employees;
select * from dual;
select 3+3 from HR.Employees where rownum<2;
select 3+3, abs(-80), to_date(‘May 14, 2023 08:01 P.M.’, ‘Month DD, YYYY HH:MI P.M.’) from dual
select Decode(3+5,8,’CORRECT’,’INCORRECT’) from dual;
— Aggregate functions
select JOB_ID, count(*), round(avg(salary)), round(sum(salary)) TOTAL_SALARY from hr.employees group by JOB_ID having count(*)>=5;
— JOINING TABLES
select FIRST_NAME , LAST_NAME, t1.DEPARTMENT_ID, t2.DEPARTMENT_ID, Department_name, HIRE_DATE from HR.Employees t1, hr.departments t2 where t1.department_id = t2.department_ID;
— SUB QUERY
select * from HR.Employees where Employee_ID in (select employee_id from hr.employees);
select * from HR.Employees where Employee_ID = (select employee_id from hr.employees where rownum < 2);
— SET OPERATIONS
select * from HR.Employees where salary <11000
INTERSECT
select * from HR.Employees where salary >20000
x = range(16)
print(type(x))
x = np.reshape(x,(8,2))
print(type(x))
print(x)
print(“Y:”)
y = [[3,4,5],[2,1,2],[9,0,1],[2,2,2]]
y = np.array(y)
print(y)
print(y[2,0])
print(y[-2,-3])
print(y[1:3,1:])
z = [[3,4,1],[2,3,2],[2,7,1],[2,9,2]]
z = np.array(z)
print(“=======================”)
print(z)
print(y)
print(y + z)
print(np.add(y,z))
print(y – z)
print(np.subtract(y,z))
print(y * z)
print(np.multiply(y,z))
print(y /z)
print(np.divide(y,z))
y = [[3,4,5,3],[2,1,2,2],[9,0,1,1],[2,2,2,4]] # 4 * 4
y = np.array(y)
z = [[3,4,1],[2,3,2],[2,7,1],[2,9,2]] # 4 * 3
z = np.array(z)
print(y @ z) #matrix multiplication
print(np.matmul(y,z))
”’
x + y = 35
2x + 3y = 90
Find x and y ?
”’
# coefficient
coeff = np.array([[1,1],[2,3]])
#variable matrix
solution = np.array([[35],[90]])
## coeff * variable = solution
# variable = inv(coeff) * solution
det_coeff = np.linalg.det(coeff)
print(“Determinant of Coefficient matrix = “,det_coeff)
if det_coeff !=0:
variable = np.linalg.inv(coeff) @ solution
print(“Solution is: x = “,int(variable[0,0]), “and y = “,int(variable[1,0]))
import pandas as pd
y = [[3,4,5,3],[2,1,2,2],[9,0,1,1],[2,2,2,4]]
y1 = np.array(y)
y2 = pd.DataFrame(y)
print(y1)
print(y2)
print(“Y2: \n“,y2.loc[0:2,1:3])
y2 = pd.DataFrame(y,columns=[“January”,“February”,“March”,“April”])
print(y2)
y2 = pd.DataFrame(y,columns=[“January”,“February”,“March”,“April”],
index=[“Banana”,“Apple”,“Guava”,“Mango”])
print(y2)
print(“Y2: \n“,y2.loc[[“Guava”,“Mango”],[“January”,“February”,“March”]])
# loc, iloc
import pandas as pd
data_df = pd.DataFrame(data, columns=[“Month”,“Runs Scored”,“Runs Given Away”])
print(data_df)
print(data_df[“Runs Scored”].mean())
print(data_df[“Runs Given Away”].sum())
print(data_df[data_df[‘Month’]==“March”])
print(data_df[data_df[‘Month’].isin([“January”,“April”,“May”])])
print(data_df.iloc[0])
print(data_df.loc[[0,2,4],[“Month”,“Runs Given Away”]])
#pd.read_csv(“https://raw.githubusercontent.com/swapnilsaurav/Dataset/master/user_device.csv”)
device_df = pd.read_csv(“D:/datasets/gitdataset/user_device.csv”) #(272, 6)
print(device_df.shape)
usage_df = pd.read_csv(“D:/datasets/gitdataset/user_usage.csv”) #(240, 4)
print(usage_df.shape)
new_df = pd.merge(device_df, usage_df,on=“use_id”) #how=inner
print(new_df)
new_df = pd.merge(device_df, usage_df,on=“use_id”, how=“left”) #how=inner
print(new_df)
new_df = pd.merge(device_df, usage_df,on=“use_id”, how=“right”) #how=inner
print(new_df)
new_df = pd.merge(device_df, usage_df,on=“use_id”, how=“outer”) #how=inner
print(new_df)
# 159+81+113 = 353
# Bivariate: Scatter
import pandas as pd
data = pd.read_csv(“D:\\datasets\\gitdataset\\hotel_bookings.csv”)
print(data.shape)
print(data.dtypes)
import matplotlib.pyplot as plt
import seaborn as sns
data_30 = data.columns[:30]
#print(data_30)
color_list=[“#00FF00”,“#FF0000”]
sns.heatmap(data[data_30].isnull(), cmap=sns.color_palette(color_list))
plt.show()
import numpy as np
”’
for i in data.columns:
missing_cnt = np.mean(data[i].isnull())
print(f”{i} has {missing_cnt*100}% of missing values”)
”’
for i in data.columns:
missing_cnt = np.mean(data[i].isnull())
if missing_cnt >0.8:
print(f”{i} has {missing_cnt*100}% of missing values”)
## Company has more than 94% missing data – so lets drop it
# axis = 0 – row & axis = 1 – column
data = data.drop([‘company’], axis=1) # column company will be dropped
print(“Shape after dropping company: “,data.shape)
for i in data.columns:
#check for row missing
missing = data[i].isnull()
num_missing = np.sum(missing)
if num_missing > 0:
data[f’{i}_ismissing’] = missing
missing_cnt = np.mean(data[i].isnull())
if missing_cnt >0.8:
print(f”{i} has {missing_cnt*100}% of missing values”) #company is not there now
print(“Shape after adding _ismissing columns: “,data.shape)
# create a new column which will store the missing number of values for each row
is_missing_col = [col for col in data.columns if “ismissing” in col]
data[“num_missing_cnt”] = data[is_missing_col].sum(axis=1)
print(data[“num_missing_cnt”])
# selecting rows with more thab 12 missing values
index_missing_col = data[data[‘num_missing_cnt’]>12].index
data = data.drop(index_missing_col, axis=0)
print(“Shape after removing missing rows: “,data.shape)
# find the missing values:
cols_num = data.select_dtypes(include=[np.number])
all_num_cols = cols_num.columns.values
for i in all_num_cols: #list of columns which are numeric
missing_cnt = np.mean(data[i].isnull())
if missing_cnt >0.00:
#print(f”{i} has {missing_cnt*100}% of missing values”)
med = data[i].median()
data[i] = data[i].fillna(med)
”’
children has 2.0498257606219004% of missing values – FLOAT
babies has 11.311318858061922% of missing values – FLOAT
agent has 13.687005763302507% of missing values – FLOAT
meal has 11.467129071170085% of missing values – CAT
country has 0.40879238707947996% of missing values – CAT
deposit_type has 8.232810615199035% of missing values – CAT
”’
#handle categorical values
data[‘agent’] = pd.Categorical(data.agent)
mode = data[‘agent’].describe()[‘top’]
data[‘agent’] = data[‘agent’].fillna(mode)
data[‘meal’] = pd.Categorical(data.agent)
mode = data[‘meal’].describe()[‘top’]
data[‘meal’] = data[‘meal’].fillna(mode)
data[‘country’] = pd.Categorical(data.agent)
mode = data[‘country’].describe()[‘top’]
data[‘country’] = data[‘country’].fillna(mode)
data[‘deposit_type’] = pd.Categorical(data.agent)
mode = data[‘deposit_type’].describe()[‘top’]
data[‘deposit_type’] = data[‘deposit_type’].fillna(mode)
print(“Final check for missing values:”)
for i in all_num_cols: #list of columns which are numeric
missing_cnt = np.mean(data[i].isnull())
if missing_cnt >0.00:
print(f”{i} has {missing_cnt*100}% of missing values”)
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
url = “https://www.hubertiming.com/results/2017GPTR10K”
from urllib.request import urlopen
html_code = urlopen(url)
from bs4 import BeautifulSoup
soup = BeautifulSoup(html_code,“lxml”)
print(soup.title)
all_a = soup.find_all(‘a’) # returns as a list of all values
print(all_a)
for link in all_a:
print(link.get(“href”))
import re
rows = soup.find_all(‘tr’)
list_rows = []
for row in rows:
row_td = row.find_all(‘td’)
row_td = str(row_td)
#row_td = BeautifulSoup(row_td,”lxml”).get_text()
#print(row_td)
pattern = re.compile(‘<.*?>‘)
row_td = (re.sub(pattern,“”,row_td))
list_rows.append(row_td)
#Data clearning
list_rows = list_rows[5:] #removing not required rows
# 2. convert into dataframe
data_df = pd.DataFrame(list_rows)
# 3. split into different columns
data_df = data_df[0].str.split(‘,’,expand=True)
print(data_df)
”’
<tr>
<th> – header
<td> – data
”’
all_headers = []
headers = str(soup.find_all(“th”))
headers = BeautifulSoup(headers,“lxml”).get_text()
all_headers.append(headers)
header_df = pd.DataFrame(all_headers)
header_df = header_df[0].str.split(‘,’,expand=True)
print(header_df)
main_df = [header_df, data_df]
main_df = pd.concat(main_df)
print(“=============\n\n“)
main_df = main_df.rename(columns=main_df.iloc[0])
main_df = main_df.drop(main_df.index[0])
main_df = main_df.dropna(axis=0, how=“any”)
#remove [ from first col and ] from last column
main_df.rename(columns={‘[Place’ : ‘Place’}, inplace=True)
main_df.rename(columns={‘ Team]’ : ‘Team’}, inplace=True)
main_df[‘Place’] = main_df[‘Place’].str.strip(‘[‘)
main_df[‘Team’] = main_df[‘Team’].str.strip(‘]’)
print(main_df)
print(main_df.info())
import pandas as pd
url=“D:\\datasets\\OnlineRetail\\order_reviews.csv”
reviews_df = pd.read_csv(url)
print(list(reviews_df.columns))
## 1. convert entire text to lowercase
## 2. compatibility decomposition
## 3. convert into utf8
## 4. removing accent
## 5. sentences into words
## 6. remove stop words
import unicodedata
import nltk
# nltk.download(‘punkt’)
## Function to perform steps 1 to 6
def basic_nlp_analysis(text):
text = text.lower()
#Below code will perform:
## 2. compatibility decomposition
## 3. convert into utf8
## 4. removing accent
text = unicodedata.normalize(‘NFKD’,text).encode(‘ascii’, errors=‘ignore’).decode(‘utf-8’)
## 5. sentences into words
words = nltk.tokenize.word_tokenize(text)
## 6. remove stop words
STOP_WORDS = set(w for w in nltk.corpus.stopwords.words(‘portuguese’))
words = tuple (t for t in words if t not in STOP_WORDS and t.isalpha())
return words
commented_reviews = reviews_df[reviews_df[‘review_comment_message’].notnull()].copy()
print(commented_reviews[‘review_comment_message’])
# will apply basic nlp operations on the column
commented_reviews[‘review_comment_words’] = commented_reviews[‘review_comment_message’].apply(basic_nlp_analysis)
print(commented_reviews[‘review_comment_words’])
INFERENTIAL STATS:
Predicting for the population from the sample data
Led by probability – an event you are interested in / total possible outcome
1/6
Head/Tail: ½
Discrete and Continuous
Bayes theorem – p (probability of success) & q (probability of failure)
q = 1-p
Probability (1 time event) and Probability distribution (repetition)
Toss coin one after another: Sample Space: TT, HH, TH, HT = 2/4
Toss 2 coins at the same time: Sample Space: TT, HH, TH