Learn Python – Evening Jan 2022 – II
########################
####
str1 = 'HELLO'
str2 = "I am fine"
str3 = '''Where are you going?
How long will you be here?
What are you going to do?'''
str4 = """I am here
I will be here for next 7 days
I am going to just relax and chill"""
print(type(str1),type(str2),type(str3),type(str4))
print(str1)
print(str2)
print(str3)
print(str4)

# What's you name?
str5 = "What's your name?"
print(str5)
#He asked,"Where are you?"
str6 = 'He asked,"Where are you?"'
print(str6)

#He asked,"What's your name?"
#escape sequence \
print('''He asked,"What's your name?"''')
print("He asked,\"What's your name?\"")

print('nnnnn\nnn\tnn')

print("\FOlder\\newfolder")
# \n is used to print newline in python
print("\\n is used to print newline in python")

# \\n will not print newline in python
print("\\\\n will not print newline in python")

str1 = "Hello You"
str2 = "There"
print(str1 + str2)
print(str1 *5)
for i in str1:
print("Hello")

#indexing
print(str1[2])
print("last element: ",str1[4])
print("last element: ",str1[-1])
print("second element: ",str1[-8])
print("ell: ",str1[1:4])
print("ell: ",str1[-8:-5])
print("First 3: ",str1[:3])
print("First 3: ",str1[:-6])
print("Last 3: ",str1[6:])
print("Last 3: ",str1[-3:])

#Methods - exactly same as your functions - only difference is they are linked to a class
import time
str1 = "HELLO"
print(str1.replace("L","X",1))

sub_str = "LL"
str2 = "HELLO HOW WELL ARE YOU LL"
cnt = str2.find(sub_str)
print("Count = ",cnt)

if cnt<0:
print("Sorry, no matching value hence removing")
else:
print("Value found, now replacing")
for i in range(5):
print(". ",end="")
time.sleep(0.5)
print("\n")
print(str2.replace(sub_str,"OOOO"))


out_res = str2.split("LL")
print("Output Result = ",out_res)

out_str = "LL".join(out_res)
print(out_str)

print(str2.title())
print(str2.lower())
print(str2.upper())

str3 = 'hello how well are you ll'
print(str3.islower())
print(str3.isupper())

num1 = input("Enter a number: ")
if num1.isdigit():
num1 = int(num1)
else:
print("Invaid input")

ename = input("Enter your first name: ")
if ename.isalpha():
print("Your name is being saved...")
else:
print("Invaid name")

#WAP to count of vowels in a sentence
para1 = "Work, family, and endless to-do lists can make it tough to find the time to catch up. But you'll never regret taking a break to chat with your friend, Frost reminds us. Everything else will still be there later."
sum=0
for l in para1:
if l=='a' or l=='A' or l=='e' or l=='E' or l=='i' or l=='I' or l=='o' or l=='O' or l=='u' or l=='3':
sum+=1
print("Total vowesl = ",sum)
sum=0
for l in para1.lower():
if l=='a' or l=='e' or l=='i' or l=='o' or l=='u':
sum+=1
print("Total vowesl = ",sum)

sum=0
for l in para1.lower():
if l in 'aeiou':
sum+=1
print("Total vowesl = ",sum)

########## LIST
#LIST
#collection of linear ordered items
list1 = [1,2,3,4,5]
print(type(list1))
print("Size = ",len(list1))

print(list1[0])
print(list1[-1])
print(list1[3])
print(list1[:3])
print(list1[-3:])
print(list1[1:4])

for i in list1:
print(i)

print([2,3,4]+[6,4,9])
print([2,3,4]*3)

str2 = "A B C D A B C A B A "
print(str2.count("D"))
print(list1.count(3))

l1 = [2,4,6,8]
print(l1.append(12))
print(l1)
l1[0]=10
print(l1)

l1.insert(2,15)
print(l1)

# Queue: FIFO
# Stack: LIFO

if 16 in l1:
l1.remove(16) #takes in value to remove
l1.remove(15)
print(l1)
l1.pop(1) #index
print(l1)

#################
while False:
print("Queue is: ",l1)
print("1. Add\n2. Remove\n3. Exit")
ch=input("Enter your choice: ")
if ch=="1":
val = input("Enter the value: ")
l1.append(val)
elif ch=="2":
l1.pop(0)
elif ch=="3":
break
else:
print("Try again!")

while False:
print("Stack is: ",l1)
print("1. Add\n2. Remove\n3. Exit")
ch=input("Enter your choice: ")
if ch=="1":
val = input("Enter the value: ")
l1.append(val)
elif ch=="2":
l1.pop(-1)
elif ch=="3":
break
else:
print("Try again!")

l2 = l1 #they become same
l3 = l1.copy()
print("1. List1 = ",l1)
print("1. List2 = ",l2)
print("1. List3 = ",l3)

l1.append(33)
l2.append(44)
l3.append(55)

print("2. List1 = ",l1)
print("2. List2 = ",l2)
print("2. List3 = ",l3)

l1.extend(l3)
print(l1)
print(l1.count(6))

sum=0
marks=[]
for i in range(3):
m = int(input("Enter marks in subject "+str(i+1)+": "))
marks.append(m)
sum+=m
print("Sum is ",sum, "and average is ",sum/3)
print("Marks obtained is ",marks)

#THREE STUDENTS AND THREE SUBJECTS:
allmarks=[]
for j in range(3):
sum=0
marks=[]
for i in range(3):
m = int(input("Enter marks in subject "+str(i+1)+": "))
marks.append(m)
sum+=m
print("Sum is ",sum, "and average is ",sum/3)
print("Marks obtained is ",marks)

allmarks.append(marks)

print("All the marks are: ",allmarks)

# All the marks are: [[88, 66, 77], [99, 44, 66], [44, 99, 88]]
# find the highest marks of each subject

#Tuple - linear order immutable collection
#strings are also immutable

tuple1 = (1,3,1,4,1,5,1,6)
print(type(tuple1))
print(len(tuple1))
print(tuple1.count(1))
print(tuple1.index(4))
print(tuple1[2])
for i in tuple1:
print(i)
t1 = list(tuple1)
t1.append(55)
t1=tuple(t1)
t2 = (2,4,6,8) #packing
#unpacking
a,b,c,d,e = t2
print(a,b,c,d)
#packing
import numpy as np
x = range(16)
x = np.reshape(x,(8,2))
print(x)
x2 = np.ones((3,3))
print(x2)
x3 = np.full((4,4),11)
print(x3)
x4 = [[1,2,1],[1,1,1],[2,2,2],[3,1,1]]
x4 = np.array(x4)
print(type(x4))
print(x4)

#indexing
print(x4[1:3,1:])
x5 = np.array([[1,2,1],[1,1,1],[2,2,2],[3,1,1]])
print(x4+x5)
print(np.add(x4,x5))
print(x4-x5)
print(np.subtract(x4,x5))
print(x4 * x5)
print(np.multiply(x4,x5))
print(x4 / x5)
print(np.divide(x4,x5))
print(x4 // x5)
print(np.sqrt(x4))
print(np.mean(x4))
print(“Shape of the matrix = “,np.shape(x4))
x6 = [[5],[6],[4]]
print(x4 @ x6) #matrix multiplication
print(np.matmul(x4,x6))



# x=5, y=4
# 3x-2y = 7
# 3x+5y = 35
# A * B = C => B = A inverse * C
A = np.array([[3,-2],[3,5]])
C = np.array([[7],[35]])
#find determinant and if its non zero only then perform inverse
det_A = np.linalg.det(A)
if det_A != 0:
Inv_A = np.linalg.inv(A)
Sol = Inv_A @ C
print(“Solution is: “,Sol)
else:
print(“Solution is not possible”)

# SCIPY
import scipy
#

Numpy Video

#Indigo computers how many laptops and desktops to make
# memory chip: 1L+ 2D <=15000
# processing chip: 1L + 1D <=10000
# machine time: 4L + 3D <=25000
# maximize Profit: 750L + 1000D = ?
import numpy as np
from scipy.optimize import minimize, LinearConstraint, linprog
l,d = 1,1
obj = 750*l + 1000*d
#since we are going to minimize, the obj becomes
obj = –750*l –1000*d
obj_list = [-750, –1000]

lhs_constraint_ineq = [[1,2],
[1,1],
[4,3]]
rhs_value=[15000,
10000,
25000]
val_bounds = [(0, float(“inf”)),(0, float(“inf”))]
opt_sol = linprog(c=obj_list, A_ub=lhs_constraint_ineq, b_ub=rhs_value,method=“revised simplex”)
print(opt_sol)


import pandas as pd
data1 = pd.DataFrame([10,20,30,40,50])
print(data1)

data1 = pd.DataFrame([[10, “Sachin”],[20,“Laxman”],[30,“Dhoni”],[40,“Kohli”],[50,“Rohit”]], columns=
[“Roll No”,“Name”],
index=[“Player 1”,“Player 2”,“Player 3”,“Player 4”,“Player 4”])
print(data1)

dataset1 = pd.read_csv(“D:/datasets/ongc.csv”)
print(dataset1)
import pandas as pd
data = [[“Sachin”,“Cricket”,“Mumbai”,19000],[“Virat”,“Cricket”,“Delhi”,10000],
[“Dhoni”,“Cricket”,“Ranchi”,11000],[“Sunil”,“Cricket”,“Mumbai”,8000],
[“Ravi”,“Cricket”,“Mumbai”, 3000]]
data_df = pd.DataFrame(data, columns=[“Name”,“Sports”,“City”,“Runs”],
index=[“A”,“B”,“C”,“D”,“E”])
print(data_df)
print(pd.__version__) #2.0.0
print(data_df.loc[“B”]) # loc & iloc
print(data_df.loc[[“A”,“C”]])

print(data_df.iloc[0])
print(data_df.iloc[0,2]) #(row,col)
print(data_df.iloc[[0,2],2])
print(data_df.iloc[0:3,1:3])
print(data_df.iloc[3:,:2])

print(“Average of Runs scored: “, data_df[“Runs”].mean())
print(“Total Runs scored: “, data_df[“Runs”].sum())
# Axis = 0 is for Rows, Axis = 1 is for Columns
data_df = data_df.drop([“A”], axis=0)
data_df = data_df.drop([“City”], axis=1)
#
data_df = data_df.drop(data_df.index[1])
data_df = data_df[data_df.Name !=“Virat”]
print(“After Drop”)
print(data_df)
import pandas as pd
device_df = pd.read_csv(“D:/datasets/gitdataset/user_device.csv”) # 272 rows x 6 columns
usage_df = pd.read_csv(“D:/datasets/gitdataset/user_usage.csv”) # 240 rows x 4 columns
print(usage_df.head(6))

# merge
usage_device_df = pd.merge(usage_df, device_df,on=“use_id”) # inner
print(“INNER \n,usage_device_df)
usage_device_df = pd.merge(usage_df, device_df,on=“use_id”, how=“left”) # inner
print(“LEFT \n,usage_device_df)
usage_device_df = pd.merge(usage_df, device_df,on=“use_id”, how=“right”) # inner
print(“RIGHT \n,usage_device_df)

usage_device_df = pd.merge(usage_df, device_df,on=“use_id”, how=“outer”) # inner
print(“FULL \n,usage_device_df)

# 272 & 240 – 159 (159 + 113 + 81 = 353)
print(“Number of Rows in Combind tables: “,usage_device_df.shape[0])
print(“Number of Columns in Combind tables: “,usage_device_df.shape[1])

hotels_df = pd.read_csv(“D:/datasets/gitdataset/hotel_bookings.csv”)
print(hotels_df.shape)
print(hotels_df.dtypes)

”’
Data Analytics steps:
1. Collecting data
2. Data cleaning: missing data, outliers
”’
# heatmap to check missing values
import matplotlib.pyplot as plt
import seaborn as sns

cols_30 = hotels_df.columns[:30]
print(cols_30)
sns.heatmap(hotels_df[cols_30].isnull(), cmap=sns.color_palette([“#00FF00”, “#FF0000”]))
plt.show()
data = [[“January”,1500,1900],[“February”,1900,1800],[“March”,1500,1800],[“April”,1000,1500],[“May”, 2300,2500]]
import pandas as pd
data_df = pd.DataFrame(data, columns=[“Month”,“Runs Scored”,“Runs Given Away”])
print(data_df)
print(data_df[“Runs Scored”].mean())
print(data_df[“Runs Given Away”].sum())
print(data_df[data_df[‘Month’]==“March”])
print(data_df[data_df[‘Month’].isin([“January”,“April”,“May”])])
print(data_df.iloc[0])
print(data_df.loc[[0,2,4],[“Month”,“Runs Given Away”]])

#pd.read_csv(“https://raw.githubusercontent.com/swapnilsaurav/Dataset/master/user_device.csv”)
device_df = pd.read_csv(“D:/datasets/gitdataset/user_device.csv”) #(272, 6)
print(device_df.shape)
usage_df = pd.read_csv(“D:/datasets/gitdataset/user_usage.csv”) #(240, 4)
print(usage_df.shape)
new_df = pd.merge(device_df, usage_df,on=“use_id”) #how=inner
print(new_df)

new_df = pd.merge(device_df, usage_df,on=“use_id”, how=“left”) #how=inner
print(new_df)
new_df = pd.merge(device_df, usage_df,on=“use_id”, how=“right”) #how=inner
print(new_df)
new_df = pd.merge(device_df, usage_df,on=“use_id”, how=“outer”) #how=inner
print(new_df)
# 159+81+113 = 353


link = “https://raw.githubusercontent.com/swapnilsaurav/Dataset/master/baseball_game_logs.csv”

import pandas as pd
data_df = pd.read_csv(link)
print(data_df)
# Natural language processing
import pandas as pd
link = “https://raw.githubusercontent.com/swapnilsaurav/OnlineRetail/master/order_reviews.csv”
reviews_df = pd.read_csv(link)


”’
1. convert entire text to lower case
2. decomposition on the text (readable text)
3. convert accent (language specific words) into ASCII value (ignore the error)
4. Tokenization: breaking into words
5. Stop words removal (non helpful)
”’
#review_comment_message
import nltk
import unicodedata
reviews_df = reviews_df[reviews_df[‘review_comment_message’].notnull()].copy()
print(reviews_df[‘review_comment_message’].head())

# Function to translate into English
#pip install googletrans==4.0.0-rc1
”’
from googletrans import Translator
translator = Translator()
reviews_df[‘review_comment_english’] = reviews_df[‘review_comment_message’].apply(lambda x: translator.translate(x,src=”pt”, dest=’en’).text)

print(reviews_df[‘review_comment_english’])
”’
# function to normalize portuguese text
def normalize_text(text):
return unicodedata.normalize(‘NFKD’,text).encode(‘ascii’,errors=‘ignore’).decode(‘utf-8’)
def basic_nlp(text):
text = text.lower() # step 1 – lowercase
# steps 2 and 3
text = normalize_text(text)
# step 4: tokenize
words = set(normalize_text(word) for word in nltk.tokenize.word_tokenize(text))
# step 5: remove stop words (non meaningful words)
STOP_WORDS = nltk.corpus.stopwords.words(‘portuguese’)
words = tuple(w for w in words if w not in STOP_WORDS and w.isalpha())
return words

reviews_df[‘review_comment_words’] = reviews_df[‘review_comment_message’].apply(basic_nlp)
print(“===================”)
print(reviews_df[‘review_comment_words’].head())

# Unigram, bigram, trigram