# DS-Weekend-022023

print(“Hello”,5+4,end=” “)
print(“6+3=”,6+3,end=\n)
print(“How are you?”,end=\n)

print(“This line \n for new line”) ;
# integer – int
a=5;
print(type(a))
#string – str

#boolean – bool

#float

#complex

a = 3+5j
print(a*a) # -16 +30j

#####
#1. WAP to find area and perimeter of a rectangle
# input: what you give to the computer
# process: what is the ask from the computer
# output: what you get back in return
# process: area = length * breadth and perimeter as 2*(length + breadth)
# output: print the answers (area and perimeter) on to the screen

length = 25
print(“Area = “,area)
print(“Perimeter = “,perimeter)

length , breadth,name = 25, 15,“Sachin” #implicit conversion
print(“Area = “,area,“and Perimeter = “,perimeter)

#unpack

#input() – to get input value from the user
length = input(“Enter the length value = “) #implicit into str
length = int(length) #explicit conversion to int
print(“Data type of length is “,type(length))
# f string – format string
print(f”A rectangle with length {length} and breadth {breadth} has an area of {area} and perimeter of {perimeter})
# f-string expanded to float and str
total_cost = 100
num_units = 33
print(f”Total cost came to Rs {total_cost} for {num_units} pens so the cost of each pen is Rs {total_cost/num_units:.2f})
print(f”{3.69:.1f})

player = “Virat”
position = “captain”
country = “India”
print(f”Player {player:.<12} is {position:X^15} of {country:->12} team”)

player = “Mbanwaweba”
position = “wicket-keeper”
country = “Zimbabwe”
print(f”Player {player:<12} is {position:^15} of {country:>12} team”)
#Arithematic operations
val1 = 7
val2 = 3
print(val1 – val2) # subtract
print(val1 * val2) # multiply
print(val1 / val2) # division
print(val1 % val2) # remainder
print(val1 // val2) # integer division
print(val1 ** val2) # power
print(int(10 ** (1/2)))

#binary value
print(bin(10)) #0b1010
print(hex(0o12)) #0xa
print(oct(10)) #0o12
print(int(0b1010))

# binary operators: << >> & |
#Shift
print(10 <<3) # 1010
print(int(0b101000))
print(10 >> 2)
#Relational or conditional: > < <= >= == !=
val1 = 10
val2 = 20
val3 = 10
print(val1 > val2) # is val1 greater than val2? – False
print(val1 >= val2) # is val1 greater than or equal to val2? – False
print(val1 >= val3) # is val1 greater than or equal to val3? – True
print(val1 > val3) # is val1 greater than val3? – False
print(val1 < val3) #False
print(val1 <= val3) #True
print(“val1 == val3: “,val1 == val3)
print(val1 != val3)
print(“A40” > “90”)

# Logical operator: and or not
#input and output both are bool values
# and – both have to True to result into True (otherwise its False)
print(“T and T: “, True and True)
print(“T and F: “, True and False)

# or (+)
print(“F or T : “, False or True)
# 2+5*3 = ?
val1 = 10
val2 = 20
val3 = 10
print(val1 <= val3 or val1 > val3 and val1 > val2) # = ?
# T

print(bin(15))
print(“15 & 10 = “, 15&10)
print(“15 | 10 = “, 15|10)

#print(f”15 | 10 = 15{|}10″, )

print(f”val1 <= val3 or val1 > val3 and val1 > val2 = {val1 <= val3 or val1 > val3 and val1 > val2})
#print(f”True {and} False”)
l = int(input(“Enter the value of length: “))
b = int(input(“Enter the value of breadth: “))

#conditional statements checks the value
if l>0 and b>0:
area = l * b
perimeter = 2*(l+b)
print(f”Rectangle with length {l} and breadth {b} has area of {area} and perimeter of {perimeter})

#another set of if condition
if l>0 and b>0:
area = l * b
perimeter = 2*(l+b)
print(f”Rectangle with length {l} and breadth {b} has area of {area} and perimeter of {perimeter})
else:
print(“Sides of rectangle doesnt look valid”)

#
# Check is a number is positive and if positive chck if divisible by 5

value = 50
if value > 0:
print(f”{value} is positive”,end=” “)
if value %5==0:
print(“and its divisible by 5”)
elif value <0:
print(f”{value} is negative”)
else:
print(f”{value} is neither positive nor negative”)

marks1, marks2,marks3,marks4,marks5 = 96, 96,96,96,95

#assign grade on the basis on avg marks:
# avg>90: A, 75-90: B, 60-75: C, 50-60:D, <50: E
avg = (marks1 + marks2+marks3+marks4+marks5)/5
print(“Average: “,avg)
if avg>=90:
#print(“Result: You have Passed”)
elif avg>=75:
#print(“Result: Passed”)
elif avg>=60:
elif avg>=50:
else:
print(“Result: Failed”)

#
print(“===================”)
flag = 0 #didnt win dean’s award
if avg>=50:
#print(“Result: You’ve passed!”)
if avg >= 90:
if avg >=95:
flag=1
elif avg >= 75:
elif avg >= 60:
else:
print(“Result: You’ve passed!”)
else:
print(“Result: Sorry You’ve failed!”)
if flag==1:
print(“You win special dean’s award”)

## checking the greater number between 2 values
val1,val2 = 40,20

if val1 > val2:
print(f”{val1} > {val2})
elif val1 < val2:
print(f”{val2} > {val1})
else:
print(f”{val2} = {val1})

#########
val1,val2 , val3 = 50, 90,20

if val1 > val2:
#print(f”{val1} >= {val2}”)
if val1 > val3:
if val3 > val2:
print(f”{val1} >= {val3} >= {val2})
else:
print(f”{val1} >= {val2} >= {val3})
else:
print(f”{val3} >= {val1} >= {val2})
else:
#print(f”{val2} >= {val1}”)
if val2>val3:
if val1 > val3:
print(f”{val2} >= {val1} >= {val3})
else:
print(f”{val2} >= {val3} >= {val1})
else:
print(f”{val3} >= {val2} >= {val1})

#LOOPS- to execute block of code multiple times

#range(a,b,c): a=starting (=), b= ending value (<), c=increment
#range(5,30,5): 5,10,15,20,25
#range(a,b) c is default = 1
#range(4,10): 4,5,6,7,8,9
#range(b), a is default = 0, c is default = 1
#range(4): 0,1,2,3
#FOR Loop – when we know how many times to run

print(“i” in “India”) #True
print(“A” in “India”) #False because A is not in India

for counter in range(5,10,3):
print(“In For Loop:”,counter)

for counter in “India”:
print(“In For Loop:”,counter)

#Run a loop 5 times:
for i in range(5):
print(“i = “,i)

#a way of printing even numbers upto 20
for i in range(0,21,2):
print(i,end=“, “)
print()

for i in range(1,101):
if i %5==0:
print(i,end=“, “)
print()

# WHILE Loop –
ch=“n”
#extry controlled loop
while ch==“y”: #while will execute if the condition is True
print(“How are you?”)
ch=input(“Enter y to continue, anyother key to stop: “)

#exit controlled
while True:
print(“I am fine”)
ch = input(“Enter n to stop, anyother key to stop: “)
if ch==“n”:
break
###
”’
* * * * *
* * * * *
* * * * *
* * * * *
* * * * *
”’

for j in range(5):
for i in range(5):
print(“*”,end=” “)
print()

”’
*
* *
* * *
* * * *
* * * * *
”’

for j in range(5):
for i in range(j+1):
print(“*”,end=” “)
print()

”’
* * * * *
* * * *
* * *
* *
*
”’
num=10
for j in range(num):
for i in range(num-j):
print(“*”,end=” “)
print()

”’
*
* *
* * *
* * * *
* * * * *
”’
for j in range(5):
for i in range(5-j):
print(” “,end=“”)
for i in range(j+1):
print(“*”,end=” “)
print()

#Multiplication table
for j in range(1,11):
for i in range(1,11):
print(f”{i:<2} * {j:<2} = {j*i:<2},end=” “)
print()

#match and case
ch = input(“Enter you favorite programming language: “)
match ch:
case “Python”:
print(“You are on Data Scientist track”)
case “Java”:
print(“You are on Mobile App Developer track”)
case “Javascript”:
print(“You are on Web Developer track”)

#Program to take input marks and find avg
ch=“y”
sum=0
counter=0
while ch==“y”:
marks = int(input(“Enter marks: “))
counter+=1
sum+=marks # sum=sum+marks
ch=input(“Do you have more marks to add? y for yes: “)
avg = sum/counter
print(f”Total marks= {sum} and Average marks ={avg})

#
# guessing number game – human v computer
import random
num = random.randint(1,100) #both start and end is inclusive
attempt = 0
while True:
val = int(input(“Guess the number (1-100): “))
if val<1 or val>100:
print(“Invalid number!”)
continue #take you to the beginning of the loop
attempt+=1
if val==num:
print(f”You have guessed it correctly in {attempt} attempts”)
break #throw you out of the loop
elif val <num:
else:

##
# guessing number game – computer v computer
import random
num = random.randint(1,100) #both start and end is inclusive
attempt = 0
start,end=1,100
while True:
val = random.randint(start,end) #int(input(“Guess the number (1-100): “))
if val<1 or val>100:
print(“Invalid number!”)
continue #take you to the beginning of the loop
attempt+=1
if val==num:
print(f”You have guessed it correctly in {attempt} attempts”)
break #throw you out of the loop
elif val <num:
start = val + 1 #guess a higher number
else:
end = val – 1 #guess a lower number

### Using IF Condition in one single line – one line condition
# Ternary operator: condition logic should not be more than 1 line

val1, val2 = 30,40

var1 = val1 if val1 > val2 else val2
print(“1. Higher number is “,var1)

var1 = “val1 is higher” if val1 > val2 else “val2 is higher”
print(“2. Message: “,var1)

### One line for loop
#square the values greater than 5 and cube the values for others
for i in range(10):
if i >5:
val = i**2
else:
val = i**3
print(val)

#Above code can be implemented in one line:
print(“Using one line loop and condition:”)
for i in range(10): print(i**2) if i>5 else print(i**3)

## Assignment Programs

Exercise 1: Write a program in Python to display the Factorial of a number.

Exercise 2: Write a Python program to find those numbers which are divisible by 7 and multiples of 5, between 1500 and 2700 (both included).

Exercise 3: Write a Python program to reverse a number.

Exercise 4: Write a program to print n natural number in descending order using a while loop.

Exercise 5: Write a program to display the first 7 multiples of 7.

Exercise 6: Write a Python program to convert temperatures to and from Celsius and Fahrenheit.

[ Formula : c/5 = f-32/9 [ where c = temperature in celsius and f = temperature in fahrenheit ]

Expected Output :

60°C is 140 in Fahrenheit

45°F is 7 in Celsius

Exercise 7: Write a Python program that iterates the integers from 1 to 50. For multiples of three print “Fizz” instead of the number and for multiples of five print “Buzz”. For numbers that are multiples of three and five, print “FizzBuzz”.

Sample Output :

fizzbuzz

1

2

fizz

4

buzz

Exercise 8: Write a Python program to print the alphabet pattern ‘A’.

Expected Output:

***

*   *

*   *

*****

*   *

*   *

*   *

Exercise 9: Write a Python program to print the alphabet pattern ‘G’.

Expected Output:

***

*   *

*

* ***

*   *

*   *

***

#String
name = “Sachin” \
“Cricket God”
name1 = ‘Virat’
name2 = ”’Rohit
Captain of Team India”’
name4=“””Dhoni
won the world
cup for India
in multiple formats of the
game”””

print(name)
print(name2)
print(name4)

#substring- subset of data – indexing
name = “Sachin Tendulkar”
print(name[0])
print(name[1])
print(“SCI => “,name[0]+name[2]+name[4])
print(“First 3 characters: “,name[0:3], name[:3]) #nothing on left on : means start from zero
print(“chin => “,name[2:6])
print(“kar =>> “, name[13:16])
#len()
print(“Total characters in name is”,len(name))
print(name[15], name[len(name)-1], name[-1])
print(“First character using backward indexing: “,name[-16], name[-len(name)])

print(“kar using backward indexing=>> “, name[-3:]) #when left black on the right means go upto end

fname = “Sachin”
age = 49
print(“First name of the player is “+fname)
print(“Age = “+str(age))

for i in range(5):
print(“* “*(5-i))

#Data structures
# list, tuple, dictionary, sets

print(“Print all characters: “,name,name[:])

# Strings are immutable: TypeError: ‘str’ object does not support item assignment
val = “HELLO”
val = “hELLO”

for i in txt:
print(“* “*(5– txt.index(i)))

## is…() – return either True or False
txt = “highway route number. I AM driving there. since one month”
print(“txt.isupper: “,txt.isupper())
print(“txt.islower: “,txt.islower())
print(“Alphabet & Numeric”,txt.isalnum())
print(“Alphabet & Numeric”,txt.isalpha())
print(“Alphabet & Numeric”,txt.isdigit())

print(txt.upper())
print(txt.lower())
print(txt.title())
print(txt.capitalize())

# split()
print(txt.split(‘o’))
out = [‘highway’, ‘route’, ‘number.’, ‘I’, ‘AM’, ‘driving’, ‘there.’, ‘since’, ‘one’, ‘month’]
print(“====”)
out2 = ” “.join(out)
print(out2)

txt = “I am driving on highway route on number fifteen.”
print(txt.lower().count(“i”))
start_pos=0
for i in range(txt.lower().count(“i”)):
print(txt.lower().index(“i”,start_pos),end=“, “)
start_pos=txt.lower().index(“i”,start_pos) + 1
print()

print(txt.replace(“on”,“over”,1))
print(txt)
#strings are immutable

#LIST
l1 = [2,3,5.5,True,“Hello”,[4,8,12]]
print(len(l1))

print(type(l1))
print(type(l1[0]))
print(type(l1[-1]))

print(l1[-2].upper())
l2 = [False,5,115]
l3 = l1 + l2
print(l3*3)

#lists are mutable
l2[1] = “How are you?”
print(l2)

for i in l2:
print(i,end=“, “)
print(\n\n### Methods”)
### Methods
l1 = [2,4,6,8,10,2,14]
print(l1.index(2,2))
print(l1.count(2))
print(“1. Current list = “,l1)
print(l1.pop(2)) #removes index element
print(“2. Current List = “,l1)
print(l1.remove(14)) #removes value element
print(“3. Current List = “,l1)
l1.append(21) #adds at the end of the list
print(“4. Current List = “,l1)
l1.insert(3,31) #takes position and the value
print(“5. Current List = “,l1)
#below creating 2 new lists based on l1 values
l2 = l1 #deep copy
l3 = l1.copy() #shallow copy
print(“1. L1 = “,l1)
print(“1. L2 = “,l2)
print(“1. L3 = “,l3)
l1.append(42)
l2.append(52)
l3.append(62)
print(“2. L1 = “,l1)
print(“2. L2 = “,l2)
print(“2. L3 = “,l3)
l1.extend(l3)
print(l1)
l1.reverse()
print(l1)
l1.sort() #increasing order
print(l1)
l1.sort(reverse=True) #decreasing order
print(l1)

#definition: List is an ordered linear mutable collection
subject_list = [‘Maths’,‘Science’,‘English’,‘Social Science’,‘German’]
marks_list = []
sum=0
for i in range(5):
#marks = int(input(“Enter marks in Subject “+str(i+1)+”: “))
marks = int(input(“Enter marks in Subject ” + subject_list[i] + “: “))
sum+=marks
marks_list.append(marks)
print(“Marks obtained in each subject = “,marks_list,“and total marks = “,sum)

sum=0
for i in marks_list:
sum+=i
print(“Total marks = “,sum)

#Reduce, Map, Filter => later after functions
# TUPLE:
#definition: Tuple is an ordered linear immutable collection
t1 = ()
t1 = (5,)
t1 = (5,4)
t1 = (5,6,7,8.0,“9”)
print(type(t1))
#tuples are converted to list and vice-versa
t1 = list(t1)
t1.append(45)
t1 = tuple(t1)
#tuples are faster than list for reading

#packing
t1 = (3,30,“Hello”) #packing
#unpacking
a,b,c = t1
print(a,b,c)

print((2,3,99) > (3,1)) #checks one by one member untill it finds the greater value

#List – linear ordered mutable collection
# dictionary – ordered collections – key:value pair

dict1 = {}
print(type(dict1))
dict1 = {“name”:“Sachin”,“city”:“Mumbai”,“Runs”:12345,“name”:“Tendulkar”}
print(dict1)
print(dict1[‘name’])
dict2 = {(“Team IPL”,“Team Ranji”):“Mumbai Indians”}
dict1.update(dict2)
print(dict1)

for i in dict1.keys():
print(i, dict1[i])
print(“Iterating through values:”)
for i in dict1.values():
print(i)
print(“Iterating through items (key,value):”)
for i,j in dict1.items():
print(i,j)
for i in dict1.items():
print(list(i))

print(“Printing the values: “)
print(“Keys :”,dict1.keys())
print(“Values:”,dict1.values())
print(“Items:”,dict1.items())

dict1.pop(‘Runs’)
print(“After pop: “,dict1)
dict2 = dict1 #deep copy
dict3 = dict1.copy() # shallow copy
t_dict = {“Country”:“India”}
dict1.update(t_dict)
print(“Printing all 3 Dictionaries: “)
print(“Dict1: “,dict1)
print(“Dict2: “,dict2)
print(“Dict3: “,dict3)
#dict1[‘name’]=”Tendulkar”
t_dict = {“name”:“Tendulkar”}
dict1.update(t_dict)
for i in range(2):
print(“Dict1 before popitem: “,dict1)
dict1.popitem()
print(“Dict1 after popitem: “,dict1)

”’
Write a program to input Roll no. and marks of 3 students in 3 subjects
{101:[], 102:[]}

”’
dict_marks={}

for i in range(3):
t_dict = {}
roll=int(input(“Enter the Roll number:”))
t_list=[]
for j in range(3):
marks=int(input(“Enter the marks for Roll no.”+str(roll)+” in Subject “+str(j+1)+” :”))
t_list.append(marks)
t_dict = {roll:t_list}
dict_marks.update(t_dict)
print(“Final data:”,dict_marks)

”’
Assignment program:
from the below dictionary find the topper for each subject:
{100: [55, 66, 78], 102: [90, 87, 54], 105: [67, 76, 87]}

e.g. Highest in Subject 1: 102 with 90 marks
Highest in Subject 2: 102 with 87 marks
Highest in Subject 3: 105 with 87 marks
”’
#Example 2:
master_dict = {}

for k in range(2):
for i in t_dict.keys():
j=input(“Enter the client’s “+i+” :”)
t_dict[i] = j
master_dict.update(t_dict)
print(“Master Dictionary information: \n,master_dict)

##################################################
# SETS
set1 = {}
print(type(set1))
set1 = {“Apple”,“Banana”,“Mango”,“Grapes”,“Guava”}
print(type(set1))
set2 = {“Mango”,“Grapes”,“Guava”,“Apple”,“Banana”}

set1 = {1,2,3,4,5,6}
set2 = {1,3,5,7,9}
print(“Union:”)
print(set1 | set2)
print(set1.union(set2))
print(“Intersection:”)
print(set1 & set2)
print(set1.intersection(set2))
print(“Minus – difference”)
print(set1 – set2)
print(set2.difference(set1)) #set2 – set1
print(“Symmetric Difference”)
print(set1 ^ set2)
print(set1.symmetric_difference(set2))
print(“Set1 before pop:”,set1)
set1.pop()
print(“Set1 after pop:”,set1)

list1 = [1,2,3,4,1,2,3,1,2,1]
list1 =list(set(list1))
print(type(list1))
print(list1)
#################### FUNCTIONS ##############################
def myquestions():
”’this is a sample function to demonstrate how function works
it doesnt take any parameter nor does it return anything
-written on 22nd april”’
print(“how are you?”)
print(“Where are you going?”)

def mycalc1(a,b,c):
print(“MY CALC 1”)
print(f”A,B and C values are {a},{b},{c})
total = a+b+c
print(total)

def mycalc2(a,b=0,c=9):
print(“MY CALC 2”)
print(f”A,B and C values are {a},{b},{c})
total = a+b+c
print(total)

def myfunc1(a,*b,**c):
print(“A = “,a)
print(“B = “,b)
print(“C = “,c)

myquestions()
print(myquestions.__doc__)
print(“Doc for print”)
print(input.__doc__)
#doc – first line of code inside the function, must be multiline comment
print(\n\n\n)
mycalc1(10,50,90) #required positional arguments
mycalc2(3,7,19) #calling default arguments
mycalc1(c=1,a=7,b=3) #use keywords to avoid positional

print(“Calling variable length arguments:”)
myfunc1(10,1,2,3,4,5,6,7,8,9,0,4,5,66,44,333,33, name=“Sachin”,age=43,runs=19890)

def myfun1(a,b,c):
#
global x
print(“X = “,x)
x = 5
print(“X = “, x)

x=50
myfun1(5,10,15)
print(“in Main x = “,x)
###
def isPrime(n):
check= True
for i in range(2,n//2+1):
if n%i==0:
check=False
break
return check

check = isPrime(51)
if check:
print(“51 number is prime”)
else:
print(“51 number is not prime”)

#generate list of prime number between 1000 and 2000

for i in range(1000,2001):
out =isPrime(i)
if out:
print(i)
def myfunc1():
“””This is a sample function to see the working of a function”””
print(“How are you?”)
print(“Where do you live?”)

def myfunc2(a,b,c): # required positional arguments
print(f”Values of a,b and c are {a},{b} and {c} respectively”)
total = a+b+c
print(“Total is “,total)

def myfunc3(a,b=0,c=0): # a & b required positional and c is positional not required (default)
print(f”Values of a,b and c are {a},{b} and {c} respectively”)
total = a+b+c
print(“Total is “,total)

def isPrime(n):
”’isPrime is a function that takes a parameter n and
check and prints if its a prime number of not”’
prime = True
for i in range(2,n//2 +1):
if n%i==0:
prime=False
break
return prime

if __name__ ==“__main__”:
myfunc1()
print(“————-“)
print(myfunc1.__doc__)
# doc string: a multi line string and first line in the function
print(print.__doc__)
print(int.__doc__)

myfunc2(5, 10, 15) # required positional
print(“Calling My Func3 below:”)
myfunc3(10, 20)
myfunc3(10, 20, 30)

n = 11
out = isPrime(n)
if out:
print(n, “is a prime number”)
else:
print(n, “is not a prime number”)

# I want to print a list of all the values that are prime between 100 and 500
print(“Printing list of prime numbers from 100 to 500:”)
for k in range(100, 501):
if isPrime(k):
print(k)

# We are talking about non-positional (or KEYWORD arguments)
print(“Working on keyword arguments:”)

myfunc2(b=45, c=50, a=70) # for keyword – use same arguments that are already there

# Module:

P5.py file:

from p4 import myfunc2
print(“Hello : “,n)
if n==0:
return 100

# 5! = 5 * 4!

def myfacto(n):
if n==1:
return 1
return (n * myfacto(n-1))

###### decorators

def outer():
print(“Line one of outer”)
def inner():
print(“Line 1 of inner”)
print(“Line two of outer”)
inner()
print(“Line three of outer”)

def myouter11():
print(“This is line 1 from myouter 11”)
def myouter22():
print(“This is line 1 from myouter 22”)
def myouter33():
print(“This is line 1 from myouter 33”)
def myouter2(var1):
print(“This is line 1 from myouter 2”)
var1()
print(“This is line 2 from myouter 2”)

if __name__ ==‘__main__’:
out = myfacto(50)
print(“Factorial of 4 is”, out)
outer()
myouter2(myouter33)

P6.py:

#import p5 as SuperFunctions
from p5 import mytask, myfacto, myfunc2
from MyPack1 import modul1
modul1.myfunc1()

myfacto(10)
# one line for loop
print(“Option 1”)
for i in range(5):
print(“Hello”)
print(“Option 2”)
for i in range(5): print(“Hello”)
print(“Option 3”)
list1 = [2,4,6,8,10]
for i in list1: print(i)
print(“Option 4”)
prod=1
for i in range(1,10): prod*=i
print(prod)
print(“Option 5”)
mylist1 = [i for i in range(1,10)]
print(“Mylist1 = “,mylist1)

# one line if condition
print(“Condition Option 1”)
num=-5
if num>0:
print(“Positive”)
else:
print(“Not positive”)
output = “Positive” if num>0 else “Not Positive”
print(“Option 1 output = “,output)

print(“Condition Option 1 with Loops”)
### calculate cube of values between 1 and 9
print([num**3 for num in range(1,10)])
### calculate cube of values between 1 and 9 if the value is odd
print([num**3 for num in range(1,10) if num%2==1])
# one line function
print(“one line function Option 1”)
myfun1 = lambda x,y:print(“Total = “,x+y)
myfun1(10,20)
print(“one line function Option 2”)
friends=[“Rohit”,“Rahul”,“Surya”,“Kohli”]
batting = lambda team: [print(“Now batting:”,x) for x in team]
batting(friends)

#MAPS, FILTER & REDUCE
input = [2000,3000,100,200,5000,6000,3000,900,600,500,230,8000]
# all the three concepts works on list – input is a list and output depends upon the task
## 1. map – if there is a single logic (formula) that you need to apply on entire list values
#example: convert these feet into metres: divide by 3.1
some_func = lambda num:num/3.1
out = list(map(some_func,input))
print(“Output = “,out)

# Filter: if there is a single logic (condition) based on whicch you select subset
## subset of values are created when the condition returns True
out = list(filter(lambda x: (x//100)>=10,input))
print(“Filtered values are: “,out)

# Reduce: takes entire data in a list and reduces them to just 1 single value based on the given formula
from functools import reduce
print(“Sum of all the values are: “,reduce(lambda a,b:a+b, input))

## PROJECT 1: Working with Dictionary

## Project 1: Create a menu option for billing
## Dictionary: {Item_code: [“Item_Description”,price]}
## create bills for each individual: {item_code: [quantity, price, total_cost]}
from datetime import date, datetime, timedelta, timezone
import time
start = time.time()
for i in range(1000000):
out = i**3+500*i**2+9
time.sleep(1)
end = time.time()

print(“Total time taken by the program to run: “,end-start)

from datetime import datetime
currenttime= datetime.now()
print(“Current time: “,currenttime)
print(“Date: “,currenttime.strftime(“%y-%m-%d”))
print(“Get: “,currenttime.year, currenttime.day, currenttime.minute)
print(“out: “,currenttime.today())
print(“Weekday: “,currenttime.weekday())

from datetime import timedelta,datetime
print(“Yesterday: “,currenttime-timedelta(days=1))
print(“Next week: “,currenttime+timedelta(days=7))
class BookMagazine:
__publisher = “Eka Publishers”
def __init__(self, title,page):
print(“Publisher is: “,BookMagazine.__publisher)
self.title=title
self.pages=page
class Books(BookMagazine):
total_books = 0 # class level variable – all objects and class will return same value
#object level variables will be inside object methods
def __init__(self, title,author,page):
BookMagazine.__init__(self,title, page)
self.author=author
Books.total_books +=1

def display_book(self):
print(“Dummy Function: Book Created”)
print(“Title = “, self.title)
if self.author==“”:
print(“There is no author name declared”)
else:
print(“Author = “, self.author)
print(“Pages = “, self.pages)

@classmethod
def display_count(cls):
print(“Total book count = “,cls.total_books)
##
b1=Books(“Python Programming”,“Swapnil Saurav”,330)
print(“B1 Display”)
b1.display_book()

b2=Books(“Data Science”,“Swapnil Saurav”,550)
print(“B2 Display”)
b2.display_book()

b3=Books(“Data Visualization”,“Swapnil Saurav”,250)
b3.display_book()
print(b1.total_books)

print(Books.total_books)

#Today’s assignment; Using Class and objects perform addition, subtraction, multiplication,
# and division. Each of these should have unique functions. init should take in 2 input values
# from the user
# Implement atleast one of these: class variable and method, object variable and method

class Magazines(BookMagazine):
def __init__(self,title,pages,genre):
BookMagazine.__init__(self,title,pages)
self.genre = genre

class Library:
def lib_fun1(self):
print(“Printing from Library class:”)
#print(“Publishers = “,BookMagazine.__publisher) #throws error as private members cant be accessed
print(“Total Books = “,Books.total_books)

#print(Books.__publisher) #throws error as private members cant be accessed
#print(b2.)
l1= Library()
l1.lib_fun1()
#Access Modifiers:
## private: only the members of the same class can access
## protected: (one _ variable name): _name, _pub => only derived class can be called
### concept of protected is there but practically its not yet updated

## public: any member can call members of any class
class Books:
total_books=0
def __init__(self,title,author):
self.title = title
self.author = author
Books.total_books +=1
def display_data(self):
print(f”Title = {self.title} and Author = {self.author})

b1 = Books(“Python Programming”,“Saurav”)
print(type(b1))
b1.display_data()
b2 = Books(“Machine Learning”,“Saurav”)
b2.display_data()
l1 = []
print(type(l1))

##############
#Errors
# Syntax errors – when you dont follow Python rules
# print 5

#logical errors – wrong logic.. a + b = 4 *2

# Exception errors – runtime errors
num1=0
try:
num1 = int(input(“Enter a number: “))
#10/0
except (ValueError, ZeroDivisionError):
print(“You have not entered a valid number, hence exiting…”)
except Exception:
print(“Some error has occured, please retry!”)
else:
print(num1) # ValueError
finally:
print(“Error or No error I will be called”)
#10/0: ZeroDivisionError
num1=0
while True:
try:
num1 = int(input(“Enter a number: “))
break
except (ValueError, ZeroDivisionError):
print(“You have not entered a valid number, hence exiting…”)
except Exception:
print(“Some error has occured, please retry!”)
else:
print(num1) # ValueError
finally:
print(“Error or No error I will be called”)

# Assertion Error
def print_data(num):
#perform this only when num >100
assert (num>100), “Value entered is too small to process”
return num**num
try:
out = print_data(100)
except AssertionError:
print(“Give larger value and run again”)
else:
print(“Output is “,out)

#########

### create my own exception
class TooSmallValue(Exception):
def __init__(self,value=0,min=100):
self.value = value
self.min = min

#driving code
value = int(input(“Enter a value >100: “))
try:
if value <=100:
raise TooSmallValue
except TooSmallValue:
print(“TooSmallValue: Give larger value and run again”)
else:
print(“Output is “,value*value)

######### WORKING WITH OPERATING SYSTEMS

import os
os_name = os.name
print(os.name)
if os_name==‘nt’:
print(“You are using a Windows machine”)
elif os_name==‘posix’:
print(“This is Mac or Linux or Unix machine”)
else:
print(“not sure which OS you are using”)

## Some OS specific command
#os.rename(“infy.py”,”infy_apr.py”)
#os.mkdir(“TEST”)
from pathlib import Path
import os

path_loc = Path(“C:/Users/HP/PycharmProjects/pythonProject/”)
for p in path_loc.iterdir():
print(p, ” : Is it a directory: “, p.is_dir())

# Text file processing
fileobj=open(“file1.txt”,“a+”) # read(r), write (w), append (a): r+ w+ a+
print(“Entire content:”)
print(content)
fileobj.seek(5) #move to first character
print(“First 10 characters:”)
# readline will read maximum one line at a time that too current
fileobj.seek(0)
print(“Line: \n,line)
print(lines)
fileobj.seek(0)
print(lines)
else:

if fileobj.writable():
lines = [‘Twinkle Twinkle Little Star\n,‘How I wonder\n,
‘What you are\n,‘Up Above the World\n]
fileobj.writelines(lines)
fileobj.close()

######### CSV Files
import csv

fileobj = open(“D:/datasets/tcs_stocks.csv”) # default mode is r (read)
print(list(csv_file))
fileobj.seek(0)
for i in csv_file:
for j in i[:2]:
print(j, end=” “)
print()

fileobj.close()

# create a csv file
row1 = [‘Sachin’,‘Mumbai’,222]
row3 = [‘Rahul’,‘Bangalore’,333]
import csv
fileobj = open(“sample1.csv”,‘w’,newline=)
row_writer = csv.writer(fileobj,delimiter=‘|’)
row_writer.writerow(row1)
row_writer.writerow(row2)
row_writer.writerow(row3)
fileobj.close()

###### JSON #############
import json
fileoj = open(“json1.json”,“r”)
#print to check if we got the content or not – this is not how to
#display the json content
print(type(content))
#we will use json dumps to display on to the screen
print(json.dumps(content, indent=4, sort_keys=True))
fileoj.close()

fileoj = open(“json2.json”,“w”)
content1 = ‘{“Name”:”Virat”,”Game”:”Cricket”}’
print(type(content1))
print(type(content1))
print(json.dumps(content1, indent=4, sort_keys=True))
json.dump(obj=content1,fp=fileoj, indent=4)
fileoj.close()

############ DATABASE ################

# structured v unstructured
# Name, Age, Country, Runs, Wickets

## Library Application
# Table 1: Books
## Columns: BookID (INTEGER), BookTitle (TEXT), Price (FLOAT), Copies (INTEGER)

# Table 2: Members
#Columns: MemberID (INTEGER), Name (TEXT), Email (TEXT), Phone (TEXT), Address (TEXT)

#Relationship:
# one to one:
# one to many / many to one:
# many to many:

# Table 3: BOOKSMEMBERS
# columns: TID (INTEGER), BOOKID(INTEGER) , MID(INTEGER),
# ISSUEDATE (DATE), RETURNDATE (DATE)

# OLTP – Online Transaction Processing (normal ) – Reading + (Editing done in bulk)
# OLAP – Online Analytical Processing (Analytics) – Reading

# CRUD : Create (INSERT), Read (SELECT), Update (UPDATE), Delete (DELETE)

# Roles in DBMS:
## Database Design (ER diagram, create tables) – SQL
## Application Developers: SQL – CRUD
#### Working with Databases #########
”’
# Constraints: Keys (Primary Key, Foreign Key), NOT NULL, UNIQUE, CHECK, DEFAULT

Table 1: Publisher
Create Table Publisher(
PUBID int Primary Key,
Name varchar(25));

INSERT INTO Publisher Values (101, ‘Hyderabad’,’Eka Publishers);
INSERT INTO Publisher Values (102, ‘Mumbai’,’Best Publishers);

Table 2: Books:

Create Table Books(
BookID int Primary Key,
Author varchar(25) NOT NULL,
Title varchar(25) NOT NULL,
Price float(7,2),
Available bool Default 1,
PubID int,
CHECK(Price>=0.0),
Foreign Key (PubID) references Publisher(PubID)
);

INSERT INTO Books (BookID, Author, Title, PubID) Values (101,’Swapnil’,’Python Programming’,101);

INSERT INTO Books (BookID, Author, Title, PubID) Values (102,’Saurav’,’Machine Learning’,101);

Table 3: Member
Create Table Member(
MEM_ID Int Primary key,
Memb_date Date,
Memb_Type varchar(1),
Name varchar(30),
Expiry_date date);

INSERT INTO MEMBER(MEM_ID, Name) Values(101,’%s’)
INSERT INTO MEMBER(MEM_ID, Name) Values(102,’%s’)
INSERT INTO MEMBER(MEM_ID, Name) Values(103,’%s’)

Table 4: BooksMember

Create table BooksMember(
BMID Int Primary Key,
MEMID int Foreign Key References MEMBER(MEMID),
BOOKID int Foreign Key References BOOKS(BOOKID),
BORROW_DATE Date,
RETURN_DATE Date,
);

”’
import pymysql
cursor = db_connect.cursor()
#cursor.execute(‘Drop table Publisher;’)
tab1 = ”’
Create Table Publisher(
PUBID int Primary Key,
Name varchar(25));
”’
#cursor.execute(tab1)
tab2 = ”’
Create Table Books(
BookID int Primary Key,
Author varchar(25) NOT NULL,
Title varchar(25) NOT NULL,
Price float(7,2),
Available bool Default 1,
PubID int,
CHECK(Price>=0.0),
Foreign Key (PubID) references Publisher(PubID)
);
”’
#cursor.execute(tab2)

tab3 = ”’
Create Table Member(
MEM_ID Int Primary key,
Memb_date Date,
Memb_Type varchar(1),
Name varchar(30),
Expiry_date date);
”’
#cursor.execute(tab3)
tab4 = ”’
Create table BooksMember(
BMID Int Primary Key,
BORROW_DATE Date,
RETURN_DATE Date,
MEM_ID int,
BOOKID int,
Foreign Key (MEM_ID) References MEMBER (MEM_ID),
Foreign Key (BOOKID) References BOOKS (BOOKID));
”’
#cursor.execute(tab4)

########### CRUD Create (Insert), Read (Select), Update, Delete ###
## Peforming Create – using Insert
list_insert = [“INSERT INTO Publisher Values (101, ‘Hyderabad’,’Eka Publishers’);”,
“INSERT INTO Publisher Values (102, ‘Mumbai’,’Best Publishers’);”,
“INSERT INTO Books (BookID, Author, Title, PubID) Values (101,’Swapnil’,’Python Programming’,101);”,
“INSERT INTO Books (BookID, Author, Title, PubID) Values (102,’Saurav’,’Machine Learning’,101);”]
list_insert = []
for statement in list_insert:
cursor.execute(statement)
db_connect.commit() # to save the changes

# Insert by dynamic query

#Remove the multi line comment to practice:
”’
name1 = input(“Enter the Member 1 name: “)
insert1 = “INSERT INTO MEMBER(MEM_ID, Name) Values(101,’%s’)”%(name1)
cursor.execute(insert1)
name2 = input(“Enter the Member 2 name: “)
insert2 = “INSERT INTO MEMBER(MEM_ID, Name) Values(102,’%s’)”%(name2)
cursor.execute(insert2)

name3 = input(“Enter the Member 3 name: “)
insert3 = “INSERT INTO MEMBER(MEM_ID, Name) Values(‘%d’,’%s’)”%(103,name3)
cursor.execute(insert3)
db_connect.commit()
”’
## Update existing value in Member table
update1 = “Update Member Set Name=’Sachin Tendulkar’ where mem_id=101”
cursor.execute(update1)
## Delete existing member from Member table
delete1 = “Delete from Member where mem_id=102”
cursor.execute(delete1)
db_connect.commit()
select1 = “Select * from Member”
cursor.execute(select1)
results = cursor.fetchall()
for r in results:
print(r)
db_connect.close()

”’
https://livesql.oracle.com/

create an account and start practicing
”’

— Reading all the rows and columns

select * from HR.Employees;

— All the rows but given columns only

Select Employee_ID, FIRST_NAME, EMAIL from HR.Employees;

— Restricted columns and restricted rows

Select Employee_ID, FIRST_NAME, EMAIL from HR.Employees where Employee_ID =120;

select first_name||’ has a salary of \$’||  salary “Salary Information” , email from hr.employees order by email;

select first_name||’ has a salary of \$’||  salary “Salary Information” , email, HIRE_DATE from hr.employees order by HIRE_DATE, email desc;

select first_name, last_name, email, salary from hr.employees where salary > 10000  and salary <18000;

select first_name, last_name, email, salary from hr.employees where salary between 10000 and 18000;

select first_name, last_name, email, salary from hr.employees where salary in (17000, 11000, 13500);

select count(*) from hr.employees

select avg(salary) from hr.employees

select * from HR.Employees;

select * from hr.departments;

select first_name, last_name, email, to_char(hire_date, ‘Month DD, YYYY’), round(months_between(sysdate, hire_date)) Tenure_in_months from hr.employees;

select * from dual;

select 3+3 from HR.Employees where rownum<2;

select 3+3, abs(-80), to_date(‘May 14, 2023 08:01 P.M.’, ‘Month DD, YYYY HH:MI P.M.’) from dual

select Decode(3+5,8,’CORRECT’,’INCORRECT’)  from dual;

— Aggregate functions

select JOB_ID, count(*), round(avg(salary)), round(sum(salary)) TOTAL_SALARY from hr.employees group by JOB_ID having count(*)>=5;

— JOINING TABLES

select FIRST_NAME , LAST_NAME, t1.DEPARTMENT_ID, t2.DEPARTMENT_ID, Department_name, HIRE_DATE from HR.Employees t1, hr.departments t2 where t1.department_id = t2.department_ID;

— SUB QUERY

select * from HR.Employees where Employee_ID in (select employee_id from hr.employees);

select * from HR.Employees where Employee_ID = (select employee_id from hr.employees where rownum < 2);

— SET OPERATIONS

select * from HR.Employees where salary <11000

INTERSECT

select * from HR.Employees where salary >20000

import numpy as np
x = range(16)
print(type(x))
x = np.reshape(x,(8,2))
print(type(x))
print(x)
print(“Y:”)
y = [[3,4,5],[2,1,2],[9,0,1],[2,2,2]]
y = np.array(y)
print(y)
print(y[2,0])
print(y[-2,-3])

print(y[1:3,1:])
z = [[3,4,1],[2,3,2],[2,7,1],[2,9,2]]
z = np.array(z)
print(“=======================”)
print(z)
print(y)
print(y + z)

print(y – z)
print(np.subtract(y,z))

print(y * z)
print(np.multiply(y,z))

print(y /z)
print(np.divide(y,z))

y = [[3,4,5,3],[2,1,2,2],[9,0,1,1],[2,2,2,4]] # 4 * 4
y = np.array(y)
z = [[3,4,1],[2,3,2],[2,7,1],[2,9,2]] # 4 * 3
z = np.array(z)
print(y @ z) #matrix multiplication
print(np.matmul(y,z))

”’
x + y = 35
2x + 3y = 90
Find x and y ?
”’
# coefficient
coeff = np.array([[1,1],[2,3]])
#variable matrix
solution = np.array([[35],[90]])
## coeff * variable = solution
# variable = inv(coeff) * solution
det_coeff = np.linalg.det(coeff)
print(“Determinant of Coefficient matrix = “,det_coeff)
if det_coeff !=0:
variable = np.linalg.inv(coeff) @ solution
print(“Solution is: x = “,int(variable[0,0]), “and y = “,int(variable[1,0]))

import pandas as pd

y = [[3,4,5,3],[2,1,2,2],[9,0,1,1],[2,2,2,4]]
y1 = np.array(y)
y2 = pd.DataFrame(y)
print(y1)
print(y2)
print(“Y2: \n,y2.loc[0:2,1:3])
y2 = pd.DataFrame(y,columns=[“January”,“February”,“March”,“April”])
print(y2)
y2 = pd.DataFrame(y,columns=[“January”,“February”,“March”,“April”],
index=[“Banana”,“Apple”,“Guava”,“Mango”])
print(y2)
print(“Y2: \n,y2.loc[[“Guava”,“Mango”],[“January”,“February”,“March”]])
# loc, iloc

## Reference Book- Numpy, Pandas and Projects

data = [[“January”,1500,1900],[“February”,1900,1800],[“March”,1500,1800],[“April”,1000,1500],[“May”, 2300,2500]]
import pandas as pd
data_df = pd.DataFrame(data, columns=[“Month”,“Runs Scored”,“Runs Given Away”])
print(data_df)
print(data_df[“Runs Scored”].mean())
print(data_df[“Runs Given Away”].sum())
print(data_df[data_df[‘Month’]==“March”])
print(data_df[data_df[‘Month’].isin([“January”,“April”,“May”])])
print(data_df.iloc[0])
print(data_df.loc[[0,2,4],[“Month”,“Runs Given Away”]])

print(device_df.shape)
print(usage_df.shape)
new_df = pd.merge(device_df, usage_df,on=“use_id”) #how=inner
print(new_df)

new_df = pd.merge(device_df, usage_df,on=“use_id”, how=“left”) #how=inner
print(new_df)
new_df = pd.merge(device_df, usage_df,on=“use_id”, how=“right”) #how=inner
print(new_df)
new_df = pd.merge(device_df, usage_df,on=“use_id”, how=“outer”) #how=inner
print(new_df)
# 159+81+113 = 353

## CLASS NOTES HERE (BELOW CODE)

# Univariate: Histogram, Bar chart,

# Bivariate: Scatter

import pandas as pd
print(data.shape)
print(data.dtypes)

import matplotlib.pyplot as plt
import seaborn as sns
data_30 = data.columns[:30]
#print(data_30)
color_list=[“#00FF00”,“#FF0000”]
sns.heatmap(data[data_30].isnull(), cmap=sns.color_palette(color_list))
plt.show()
import numpy as np
”’
for i in data.columns:
missing_cnt = np.mean(data[i].isnull())
print(f”{i} has {missing_cnt*100}% of missing values”)
”’
for i in data.columns:
missing_cnt = np.mean(data[i].isnull())
if missing_cnt >0.8:
print(f”{i} has {missing_cnt*100}% of missing values”)

## Company has more than 94% missing data – so lets drop it
# axis = 0 – row & axis = 1 – column
data = data.drop([‘company’], axis=1) # column company will be dropped
print(“Shape after dropping company: “,data.shape)
for i in data.columns:
#check for row missing
missing = data[i].isnull()
num_missing = np.sum(missing)
if num_missing > 0:
data[f’{i}_ismissing’] = missing
missing_cnt = np.mean(data[i].isnull())
if missing_cnt >0.8:
print(f”{i} has {missing_cnt*100}% of missing values”) #company is not there now
print(“Shape after adding _ismissing columns: “,data.shape)
# create a new column which will store the missing number of values for each row
is_missing_col = [col for col in data.columns if “ismissing” in col]
data[“num_missing_cnt”] = data[is_missing_col].sum(axis=1)
print(data[“num_missing_cnt”])
# selecting rows with more thab 12 missing values
index_missing_col = data[data[‘num_missing_cnt’]>12].index
data = data.drop(index_missing_col, axis=0)
print(“Shape after removing missing rows: “,data.shape)

# find the missing values:
cols_num = data.select_dtypes(include=[np.number])
all_num_cols = cols_num.columns.values
for i in all_num_cols: #list of columns which are numeric
missing_cnt = np.mean(data[i].isnull())
if missing_cnt >0.00:
#print(f”{i} has {missing_cnt*100}% of missing values”)
med = data[i].median()
data[i] = data[i].fillna(med)

”’
children has 2.0498257606219004% of missing values – FLOAT
babies has 11.311318858061922% of missing values – FLOAT
agent has 13.687005763302507% of missing values – FLOAT

meal has 11.467129071170085% of missing values – CAT
country has 0.40879238707947996% of missing values – CAT
deposit_type has 8.232810615199035% of missing values – CAT

”’

#handle categorical values
data[‘agent’] = pd.Categorical(data.agent)
mode = data[‘agent’].describe()[‘top’]
data[‘agent’] = data[‘agent’].fillna(mode)

data[‘meal’] = pd.Categorical(data.agent)
mode = data[‘meal’].describe()[‘top’]
data[‘meal’] = data[‘meal’].fillna(mode)

data[‘country’] = pd.Categorical(data.agent)
mode = data[‘country’].describe()[‘top’]
data[‘country’] = data[‘country’].fillna(mode)

data[‘deposit_type’] = pd.Categorical(data.agent)
mode = data[‘deposit_type’].describe()[‘top’]
data[‘deposit_type’] = data[‘deposit_type’].fillna(mode)

print(“Final check for missing values:”)
for i in all_num_cols: #list of columns which are numeric
missing_cnt = np.mean(data[i].isnull())
if missing_cnt >0.00:
print(f”{i} has {missing_cnt*100}% of missing values”)

## ADVANCED PYTHON LIBRARY FOR PLOTTING

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

url = “https://www.hubertiming.com/results/2017GPTR10K”
from urllib.request import urlopen
html_code = urlopen(url)

from bs4 import BeautifulSoup
soup = BeautifulSoup(html_code,“lxml”)
print(soup.title)
all_a = soup.find_all(‘a’) # returns as a list of all values
print(all_a)

import re
rows = soup.find_all(‘tr’)
list_rows = []
for row in rows:
row_td = row.find_all(‘td’)
row_td = str(row_td)
#row_td = BeautifulSoup(row_td,”lxml”).get_text()
#print(row_td)
pattern = re.compile(<.*?>)
row_td = (re.sub(pattern,“”,row_td))
list_rows.append(row_td)

#Data clearning
list_rows = list_rows[5:] #removing not required rows
# 2. convert into dataframe
data_df = pd.DataFrame(list_rows)

# 3. split into different columns
data_df = data_df[0].str.split(‘,’,expand=True)
print(data_df)

”’
<tr>
<td> – data
”’
main_df = pd.concat(main_df)
print(“=============\n\n)
main_df = main_df.rename(columns=main_df.iloc[0])
main_df = main_df.drop(main_df.index[0])
main_df = main_df.dropna(axis=0, how=“any”)
#remove [ from first col and ] from last column
main_df.rename(columns={‘[Place’ : ‘Place’}, inplace=True)
main_df.rename(columns={‘ Team]’ : ‘Team’}, inplace=True)
main_df[‘Place’] = main_df[‘Place’].str.strip(‘[‘)
main_df[‘Team’] = main_df[‘Team’].str.strip(‘]’)
print(main_df)
print(main_df.info())

## Above code available here (Click to access)

import numpy as np
import pandas as pd
url=“D:\\datasets\\OnlineRetail\\order_reviews.csv”
print(list(reviews_df.columns))

## 1. convert entire text to lowercase
## 2. compatibility decomposition
## 3. convert into utf8
## 4. removing accent
## 5. sentences into words
## 6. remove stop words

import unicodedata
import nltk

## Function to perform steps 1 to 6
def basic_nlp_analysis(text):
text = text.lower()
#Below code will perform:
## 2. compatibility decomposition
## 3. convert into utf8
## 4. removing accent
text = unicodedata.normalize(‘NFKD’,text).encode(‘ascii’, errors=‘ignore’).decode(‘utf-8’)
## 5. sentences into words
words = nltk.tokenize.word_tokenize(text)

## 6. remove stop words
STOP_WORDS = set(w for w in nltk.corpus.stopwords.words(‘portuguese’))
words = tuple (t for t in words if t not in STOP_WORDS and t.isalpha())

return words

commented_reviews = reviews_df[reviews_df[‘review_comment_message’].notnull()].copy()
print(commented_reviews[‘review_comment_message’])
# will apply basic nlp operations on the column
commented_reviews[‘review_comment_words’] = commented_reviews[‘review_comment_message’].apply(basic_nlp_analysis)

print(commented_reviews[‘review_comment_words’])

## About Probability and Probability distribution - Book

INFERENTIAL STATS:

Predicting for the population from the sample data

Led by probability – an event you are interested in / total possible outcome

1/6

Discrete and Continuous

Bayes theorem – p (probability of success) & q (probability of failure)

q = 1-p

Probability (1 time event) and Probability distribution (repetition)

Toss coin one after another: Sample Space: TT, HH, TH, HT = 2/4

Toss 2 coins at the same time: Sample Space: TT, HH, TH