Data Analytics Jan 2023

https://learn.swapnil.pwLearn and Practice Python

 

Refer Python notes here for installation of software:   https://learn.swapnil.pw

DAY 1 VIDEO HERE

#Scipy - scientific python
import scipy

#Permutation & Combination
## Both are about choosing r things from given n things
## default case replacement is not allowed

## Permutation order is important - n! / (n-r)!
## Combination is where order is not important - n! /(n-r)! r!

## 6 B & 4 G - I need to form a committe with 4 members, there has to be atleast a Boy
## 3B - 1G - x1
## 2B - 2 G - x2
## 1B - 3G - x3
## 4B - x4
## total= x1 + x2 + x3 + x4
from scipy.special import comb, perm
sum = 0
cnt = comb(6,3,repetition=False) * comb(4,1)
sum+=cnt
cnt = comb(6,2) * comb(4,2)
sum+=cnt
cnt = comb(6,1) * comb(4,3)
sum+=cnt
cnt = comb(6,4) * comb(4,0)
sum+=cnt
print("Total combination possible is ",sum)

#Permutation
# 4 coats, 5 waist coats, 6 caps - 3 members
#abcd lmnop
cnt1 = perm(4,3)
cnt2 = perm(5,3)
cnt3 = perm(6,3)
print("Total permutation = ", cnt1*cnt2*cnt3)

#####################################
######### OPTIMIZATION PROBLEM #####
#####################################
# There is a company that makes: laptops (profit = 750) and desktops (1000)
#objective is to Maximize profit
# x = no. of laptops = 750x
# y = no. of desktops = 1000x
#solution = 750x + 1000y
## constraint 1 =Processing chips = 10,000 = requires 1 chip each
## ==> x + y <= 10,000
## Memory chipset 1 GB size - Latops need 1GB memory , Desktops need 2GB
## ==> x + 2y <= 15,000
##Time to assemble 1 laptop = 4min, desktop = 3min, total time 25,000 min available
## ==> 4x + 3y <=25,000

## x+y <= 10
## x+2y <=15
## 4x+3y <=25
import numpy
from scipy.optimize import linprog, minimize,LinearConstraint



l = 1 #num of laptops
d = 1 #num of desktops
profit_l = 750
profit_d = 1000
total_profit = l*profit_l + d * profit_d
objective =[-profit_l, -profit_d] #minimization problem
## x+y <= 10
## x+2y <=15
## 4x+3y <=25
lhs_cons = [[1,1],
[1,2],
[4,3]]
rhs_val = [10000,
15000,
25000]
bnd = [(0,float("inf")),(0,float("inf"))]
optimize_sol = linprog(c=objective, A_ub=lhs_cons, b_ub=rhs_val,bounds=bnd,method="revised simplex")
if optimize_sol:
print(optimize_sol.x[0], optimize_sol.x[1])
print("Total profit = ",optimize_sol.fun*-1)


print("==================")
lhs_cons=[]
rhs_val=[]
while True:
l1 = int(input("Enter the value for notebook: "))
l2 = int(input("Enter the value for desktop: "))
y1 = int(input("Enter the value for Y: "))
lhs_cons.append([l1,l2])
rhs_val.append(y1)
ch=input("Do you have more constraints: ")
if ch!="y":
break
print("LHS Constraints = ",lhs_cons)
print("RHS Values = ",rhs_val)

#Pandas - dataframe - is a way to read data in table format (row & column)
import pandas as pd

data = [["Sachin",47],["Virat",33],["Rohit",35]]
df1 = pd.DataFrame(data,columns=['Name','Age'])
print(df1)
import pandas as pd
import sqlite3
con_str = sqlite3.connect("LibraryMS.db")
cursor = con_str.cursor()
q1 = "select * from students"
rows = cursor.execute(q1)
list2 = list(rows.fetchall())

con_str.close()
data_df = pd.DataFrame(list2)
print(data_df)

list1=[["Q1 2022",2300,3400,1900],
["Q2 2022",2300,3400,1900],
["Q3 2022",2300,3400,1900],
["Q4 2022",2300,3400,1900]]
print(list1)
columns=["Quarter","Apple","Banana","Oranges"]
ind=["Jan-March","April-June","Jul-Sep","Oct-Dec"]
data_df = pd.DataFrame(list1, columns=columns,index=ind)
print(data_df)
# df.iloc & loc
print(data_df.iloc[0:3,-3:])
print(data_df.iloc[0:3,[1,3]])

print(data_df.loc[['Jan-March',"Oct-Dec"],['Apple',"Oranges"]])

import pandas as pd

data_df1 = pd.read_csv("https://raw.githubusercontent.com/swapnilsaurav/Dataset/master/user_usage.csv")
print(data_df1)
data_df2 = pd.read_csv("https://raw.githubusercontent.com/swapnilsaurav/Dataset/master/user_device.csv")
print(data_df2)
import pandas as pd
import unicodedata
import nltk


#remove accent functions
def remove_accent(text):
txt = unicodedata.normalize('NFKD',text).encode('ascii',errors='ignore').decode('utf-8')
return txt
#getting the stop words set
STOP_WORDS = set(remove_accent(word) for word in nltk.corpus.stopwords.words('portuguese'))

#defining a function to perform NLP processes
def nlp_analysis_1(comment):
#nlp 1. convert to lowercase
comments = comment.lower()
#nlp 2. remove accents
comments = remove_accent(comments)
#nl 3. tokenize the content
tokens = nltk.tokenize.word_tokenize(comments)
return tokens

reviews = pd.read_csv("C:\\Users\\Hp\Downloads\\OnlineRetail-master\\order_reviews.csv")
#print(reviews['review_comment_message'])
#Step 1: removed the null values
comment_text = reviews[reviews['review_comment_message'].notnull()].copy()
print(comment_text.columns)
comment_text['review_comment_message'] = comment_text['review_comment_message'].apply(nlp_analysis_1)
print(comment_text['review_comment_message'])

SQL Learning

livesql.oracle.com

Select * from hr.employees;

 

select first_name, last_name,hire_date,salary from hr.employees;

 

select first_name FirstName, last_name,hire_date,salary from hr.employees;

 

select first_name || ‘ ‘|| last_name  FULLNAME,hire_date,salary from hr.employees;

 

select first_name || ‘ ‘|| last_name  FULLNAME,hire_date,salary *12 ANNUAL_SALARY from hr.employees;

 

select first_name || ‘ ‘|| last_name  FULLNAME,hire_date,salary *12 ANNUAL_SALARY from hr.employees order by Last_name;

 

 

select first_name || ‘ ‘|| last_name  FULLNAME,hire_date,salary *12 ANNUAL_SALARY 

from hr.employees 

order by Hire_date, Last_name;

 

select first_name || ‘ ‘|| last_name  FULLNAME,hire_date,salary *12 ANNUAL_SALARY , COMMISSION_PCT

from hr.employees 

order by COMMISSION_PCT NULLS First;

 

 

select first_name “First Name”, last_name,hire_date,salary from hr.employees;

 

select first_name, last_name,hire_date,salary from hr.employees where salary>=9000;

 

select first_name, last_name,hire_date,salary from hr.employees 

where salary>=9000 and salary <=12000;

 

select first_name, last_name,hire_date,salary from hr.employees 

where salary BETWEEN 9000 and 12000;

 

select first_name, last_name,hire_date,salary, DEPARTMENT_ID from hr.employees 

where salary>=9000 or DEPARTMENT_ID =80;

 

select distinct salary from hr.employees;

 

— Tendulkar  9000

— Tendulkar  15000