Data Analytics Jan 2023

Jan 4 2023

https://learn.swapnil.pwLearn and Practice Python

Refer Python notes here for installation of software: https://learn.swapnil.pw

DAY 1 Notes:

 Click here to Read the notes

DAY 1 VIDEO HERE

Click here to read the notes (Chapter 1 for Day 2, Chapter 2 for Day 3 and 4)

DAY 2 VIDEO HERE

DAY 3 VIDEO HERE

DAY 4 Video - Range, IQR, BoxPlot, Variance, Standard Deviation

DAY 5 VIDEO - Click Here to access

#Scipy - scientific python
import scipy

#Permutation & Combination
## Both are about choosing r things from given n things
## default case replacement is not allowed

## Permutation order is important - n! / (n-r)!
## Combination is where order is not important - n! /(n-r)! r!

## 6 B & 4 G - I need to form a committe with 4 members, there has to be atleast a Boy
## 3B - 1G  - x1
## 2B - 2 G - x2
## 1B - 3G - x3
## 4B - x4
## total= x1 + x2 + x3 + x4
from scipy.special import comb, perm
sum = 0
cnt = comb(6,3,repetition=False) * comb(4,1)
sum+=cnt
cnt = comb(6,2) * comb(4,2)
sum+=cnt
cnt = comb(6,1) * comb(4,3)
sum+=cnt
cnt = comb(6,4) * comb(4,0)
sum+=cnt
print("Total combination possible is ",sum)

#Permutation
# 4 coats, 5 waist coats, 6 caps - 3 members
#abcd   lmnop
cnt1 = perm(4,3)
cnt2 = perm(5,3)
cnt3 = perm(6,3)
print("Total permutation = ", cnt1*cnt2*cnt3)

#####################################
######### OPTIMIZATION PROBLEM  #####
#####################################
# There is a company that makes: laptops (profit = 750) and desktops (1000)
#objective is to Maximize profit
# x = no. of laptops =  750x
# y = no. of desktops = 1000x
#solution = 750x + 1000y
## constraint 1 =Processing chips = 10,000 = requires 1 chip each
## ==>   x + y  <= 10,000
## Memory chipset 1 GB size - Latops need 1GB memory , Desktops need 2GB
## ==>   x + 2y <= 15,000
##Time to assemble 1 laptop = 4min, desktop = 3min, total time  25,000 min available
## ==>  4x + 3y <=25,000

##  x+y <= 10
##  x+2y <=15
##  4x+3y <=25

Video 6 - Click here to View

import numpy
from scipy.optimize import linprog, minimize,LinearConstraint



l = 1  #num of laptops
d = 1  #num of desktops
profit_l = 750
profit_d = 1000
total_profit = l*profit_l + d * profit_d
objective =[-profit_l, -profit_d]  #minimization problem
##  x+y <= 10
##  x+2y <=15
##  4x+3y <=25
lhs_cons = [[1,1],
            [1,2],
            [4,3]]
rhs_val = [10000,
           15000,
           25000]
bnd = [(0,float("inf")),(0,float("inf"))]
optimize_sol = linprog(c=objective, A_ub=lhs_cons, b_ub=rhs_val,bounds=bnd,method="revised simplex")
if optimize_sol:
    print(optimize_sol.x[0], optimize_sol.x[1])
    print("Total profit = ",optimize_sol.fun*-1)


print("==================")
lhs_cons=[]
rhs_val=[]
while True:
    l1 = int(input("Enter the value for notebook: "))
    l2 = int(input("Enter the value for desktop: "))
    y1 = int(input("Enter the value for Y: "))
    lhs_cons.append([l1,l2])
    rhs_val.append(y1)
    ch=input("Do you have more constraints: ")
    if ch!="y":
        break
print("LHS Constraints = ",lhs_cons)
print("RHS Values = ",rhs_val)

#Pandas - dataframe - is a way to read data in table format (row & column)
import pandas as pd

data = [["Sachin",47],["Virat",33],["Rohit",35]]
df1 = pd.DataFrame(data,columns=['Name','Age'])
print(df1)

VIDEO 7 - Watch the video here

import pandas as pd
import sqlite3
con_str = sqlite3.connect("LibraryMS.db")
cursor = con_str.cursor()
q1 = "select * from students"
rows = cursor.execute(q1)
list2 = list(rows.fetchall())

con_str.close()
data_df = pd.DataFrame(list2)
print(data_df)

list1=[["Q1 2022",2300,3400,1900],
       ["Q2 2022",2300,3400,1900],
       ["Q3 2022",2300,3400,1900],
       ["Q4 2022",2300,3400,1900]]
print(list1)
columns=["Quarter","Apple","Banana","Oranges"]
ind=["Jan-March","April-June","Jul-Sep","Oct-Dec"]
data_df = pd.DataFrame(list1, columns=columns,index=ind)
print(data_df)
# df.iloc  & loc
print(data_df.iloc[0:3,-3:])
print(data_df.iloc[0:3,[1,3]])

print(data_df.loc[['Jan-March',"Oct-Dec"],['Apple',"Oranges"]])

import pandas as pd

data_df1 = pd.read_csv("https://raw.githubusercontent.com/swapnilsaurav/Dataset/master/user_usage.csv")
print(data_df1)
data_df2 = pd.read_csv("https://raw.githubusercontent.com/swapnilsaurav/Dataset/master/user_device.csv")
print(data_df2)

VIDEO DAY 8 - Watch the Video Here

import pandas as pd
import unicodedata
import nltk


#remove accent functions
def remove_accent(text):
    txt = unicodedata.normalize('NFKD',text).encode('ascii',errors='ignore').decode('utf-8')
    return txt
#getting the stop words set
STOP_WORDS = set(remove_accent(word) for word in nltk.corpus.stopwords.words('portuguese'))

#defining a function to perform NLP processes
def nlp_analysis_1(comment):
    #nlp 1. convert to lowercase
    comments = comment.lower()
    #nlp 2. remove accents
    comments = remove_accent(comments)
    #nl 3. tokenize the content
    tokens = nltk.tokenize.word_tokenize(comments)
    return tokens

reviews = pd.read_csv("C:\\Users\\Hp\Downloads\\OnlineRetail-master\\order_reviews.csv")
#print(reviews['review_comment_message'])
#Step 1: removed the null values
comment_text = reviews[reviews['review_comment_message'].notnull()].copy()
print(comment_text.columns)
comment_text['review_comment_message'] = comment_text['review_comment_message'].apply(nlp_analysis_1)
print(comment_text['review_comment_message'])

VIDEO DAY 9

Click here to access the presentation slides

VIDEO DAY 10

SQL Learning

livesql.oracle.com

Select * from hr.employees;

select first_name, last_name,hire_date,salary from hr.employees;

select first_name FirstName, last_name,hire_date,salary from hr.employees;

select first_name || ‘ ‘|| last_name FULLNAME,hire_date,salary from hr.employees;

select first_name || ‘ ‘|| last_name FULLNAME,hire_date,salary *12 ANNUAL_SALARY from hr.employees;

select first_name || ‘ ‘|| last_name FULLNAME,hire_date,salary *12 ANNUAL_SALARY from hr.employees order by Last_name;

select first_name || ‘ ‘|| last_name FULLNAME,hire_date,salary *12 ANNUAL_SALARY

from hr.employees

order by Hire_date, Last_name;

select first_name || ‘ ‘|| last_name FULLNAME,hire_date,salary *12 ANNUAL_SALARY , COMMISSION_PCT

from hr.employees

order by COMMISSION_PCT NULLS First;

select first_name “First Name”, last_name,hire_date,salary from hr.employees;

select first_name, last_name,hire_date,salary from hr.employees where salary>=9000;

select first_name, last_name,hire_date,salary from hr.employees

where salary>=9000 and salary <=12000;

select first_name, last_name,hire_date,salary from hr.employees

where salary BETWEEN 9000 and 12000;

select first_name, last_name,hire_date,salary, DEPARTMENT_ID from hr.employees

where salary>=9000 or DEPARTMENT_ID =80;

select distinct salary from hr.employees;

— Tendulkar 9000

— Tendulkar 15000

VIDEO OF LECTURE

PRACTICE PROBLEM: Click here

21

Share on Facebook

admin 0 Comments