Swapnil Saurav

Object Oriented Programming – Properties using Python
”’
ENCAPSULATION:
1. Giving required access to the user:
a. public access: public members can be called by any class from any where
b. protected access (_member): this behaves like public
c. private access (__member): This is visible only within the class (NOT Outside the class)
”’
class Patient:
total = 0

def getinfo(self, name,age):
self.name = name
self.age = age
self.__yob = 2023-age # private member
print(“Name is: “,self.name)
print(“Age is: “,self.age)
print(“You were born in the year”,self.__yob)
self._yearofbirth()

def _yearofbirth(self): # protected member
print(“Method: You were born in the year”,2023 self.age)


class OutPatient(Patient):
def op_data(self,dept,hospital):
self.dept = dept
self.hospital = hospital
print(“Department: “,self.dept)
print(“Hosipital: “,self.hospital)
def fp_data2(self,a,b):
print(“OutPatient: Patient is from Dubai!”)
class ForeignPatient(OutPatient):

def fp_data(self):
print(“Patient is from Dubai!”)

def fp_data(self,a):
print(“Patient is from Dubai!”)

def fp_data3(self,a,b):
print(“ForeignPatient: Patient is from Dubai!”)

p1 = Patient()
p1.getinfo(“Sachin”,51)
print(p1.name)
#print(p1.__yob)
p1._yearofbirth()
# protected members, by definition it exists in Python but
# practically its like public members
op1 = OutPatient()
op1.op_data(“General Checkup”,“Apollo”)
op1.getinfo(“Virat”,35)

fp1 = ForeignPatient()
fp1.getinfo(“Warner”, 38)

#fp1.fp_data() – this would work in Java & C++ but not in Python
fp1.fp_data2(3,5)
#fp1.fp_data(7) – this would work in Java & C++ but not in Python

Video Link: Object Oriented Programming – Properties Using Python

Data Science Tutorial Aug 2023

MACHINE LEARNING TUTORIAL

## https://github.com/swapnilsaurav/MachineLearning

link =“https://raw.githubusercontent.com/swapnilsaurav/MachineLearning/master/1_Data_PreProcessing.csv”
import numpy as np
import pandas as pd

df = pd.read_csv(link)
#print(df)

X =df.iloc[:,:-1].values
y =df.iloc[:,-1].values
#print(X)
#print(y)


## 1 Handling missing values:
# a) lot of values in the rows / column – drop/delete them
# b) replace with median / mean – numeric & mode – in case of categorical
## Scikit learn package: This package does everything for machine learning
## pip install scikit-learn
# class SimpleImputer belonging to sklearn (scikit-learn) we will use to replace missing values
from sklearn.impute import SimpleImputer
imputer =SimpleImputer(missing_values=np.nan, strategy= ‘mean’)
imputer = imputer.fit(X[:,1:])
X[:,1:3] = imputer.transform(X[:,1:])
#print(X)

## 2 Handling categorical values
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
lc = LabelEncoder()
X[:,0] = lc.fit_transform(X[:,0])
transform = ColumnTransformer([(‘one_hot_encoder’, OneHotEncoder(), [0])], remainder=‘passthrough’)
# remainder is to indicate what should columns that are not transformed be
# we are saying, let them be as it is- passthrough
X = transform.fit_transform(X)
y = lc.fit_transform(y) # y will not have ColumnTranformer
X = X[:,1:]
#print(X)

## 3 Handling Outliers

## 4 Creating Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.20)


## 5 Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)
print(X_train, y_train, X_test,y_test)

# K-Fold Cross Validation
from sklearn.model_selection import KFold
kf = KFold(n_splits=3)
for train, test in kf.split(X):
print(“============”)
print(train, \n,test)
#### MODEL



import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv(“D:/datasets/2_Marks_Data.csv”)
print(“Columns are:”,df.columns)
print(“Shape: “,df.shape)
print(“Top 5 rows are: \n,df.head(3))

print(“Describe :\n,df.describe())
plt.scatter(df[“Hours”], df[“Marks”])
plt.show()

X = df.iloc[:,:1].values
y = df.iloc[:,1].values
from sklearn.model_selection import train_test_split
X_train, X_test,y_train,y_test = train_test_split(X,y,test_size=0.2, random_state=1)

# running the algorithm- Simple Linear Regression
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()

#train the data
regressor.fit(X_train, y_train)
#
#above model will give the line in the form mx + c
#output values
print(“Coefficient (m / slope): “, regressor.coef_)
print(“Intercept (c / constant): “, regressor.intercept_)
”’
Coefficient (m / slope): [7.47852733]
Intercept (c / constant): 20.591719221277778
Y = 7.5 * X + 20.6
”’
y_pred = regressor.predict(X_test)
result_df = pd.DataFrame({‘Actual’:y_test, ‘Predicted’:y_pred})
print(result_df)

# RMSE:
# Root (Square root)
# Mean (take the mean: sum of the square /n)
# Squared (square the difference)
# Error (difference)

from sklearn import metrics
mse = metrics.mean_squared_error(y_test, y_pred) #like variance
rmse = mse**0.5 # like std dev
print(“RMSE = “,rmse)
”’
Regression: Output (Marks) is a continous variable
Algorithm: Simple (as it has only 1 X column) Linear (assuming that dataset is linear) Regression
X – independent variable(s)
Y – dependent variable
”’
import pandas as pd
import matplotlib.pyplot as plt
link = “https://raw.githubusercontent.com/swapnilsaurav/MachineLearning/master/2_Marks_Data.csv”
df = pd.read_csv(link)
X = df.iloc[:,:1].values
y = df.iloc[:,1].values

”’
# 1. Replace the missing values with mean value
from sklearn.impute import SimpleImputer
import numpy as np
imputer = SimpleImputer(missing_values=np.nan, strategy=’mean’)
imputer = imputer.fit(X[:,1:3])
X[:,1:3] = imputer.transform(X[:,1:3])
#print(X)

# 2. Handling categorical values
# encoding
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
lc = LabelEncoder()
X[:,0] = lc.fit_transform(X[:,0])

from sklearn.compose import ColumnTransformer
transform = ColumnTransformer([(‘one_hot_encoder’, OneHotEncoder(),[0])],remainder=’passthrough’)
X=transform.fit_transform(X)
X = X[:,1:] # dropped one column
#print(X)
”’

# EDA – Exploratory Data Analysis
plt.scatter(x=df[‘Hours’],y=df[‘Marks’])
plt.show()
”’
Scatter plots – shows relationship between X and Y variables. You can have:
1. Positive correlation:
2. Negative correlation:
3. No Correlation
4. Correlation: 0 to +/- 1
5. Correlation value: 0 to +/- 0.5 : no correlation
6. Strong correlation value will be closer to +/- 1
7. Equation: straight line => y = mx + c
”’
# 3. splitting it into train and test test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=100)
print(X_train)

”’
# 4. Scaling / Normalization
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
X_train = scale.fit_transform(X_train[:,3:])
X_test = scale.fit_transform(X_test[:,3:])
print(X_train)
”’

## RUN THE MODEL
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
# fit – train the model
regressor.fit(X_train, y_train)
print(f”M/Coefficient/Slope = {regressor.coef_} and the Constant = {regressor.intercept_})

# y = 7.5709072 X + 20.1999196152844
# M/Coefficient/Slope = [7.49202113] and the Constant = 21.593606679699406

y_pred = regressor.predict(X_test)
result_df =pd.DataFrame({‘Actual’: y_test, ‘Predicted’: y_pred})
print(result_df)

# Analyze the output
from sklearn import metrics
mse = metrics.mean_squared_error(y_true=y_test, y_pred=y_pred)
print(“Root Mean Squared Error (Variance) = “,mse**0.5)
mae = metrics.mean_absolute_error(y_true=y_test, y_pred=y_pred)
print(“Mean Absolute Error = “,mae)
print(“R Square is (Variance)”,metrics.r2_score(y_test, y_pred))

## Bias is based on training data
y_pred_tr = regressor.predict(X_train)
mse = metrics.mean_squared_error(y_true=y_train, y_pred=y_pred_tr)
print(“Root Mean Squared Error (Bias) = “,mse**0.5)
print(“R Square is (Bias)”,metrics.r2_score(y_train, y_pred_tr))
## Bias v Variance
import pandas as pd
import matplotlib.pyplot as plt
link = “https://raw.githubusercontent.com/swapnilsaurav/MachineLearning/master/3_Startups.csv”
df = pd.read_csv(link)
print(df.describe())
X = df.iloc[:,:4].values
y = df.iloc[:,4].values

”’
# 1. Replace the missing values with mean value
from sklearn.impute import SimpleImputer
import numpy as np
imputer = SimpleImputer(missing_values=np.nan, strategy=’mean’)
imputer = imputer.fit(X[:,1:3])
X[:,1:3] = imputer.transform(X[:,1:3])
#print(X)
”’
# 2. Handling categorical values
# encoding
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
lc = LabelEncoder()
X[:,3] = lc.fit_transform(X[:,3])

from sklearn.compose import ColumnTransformer
transform = ColumnTransformer([(‘one_hot_encoder’, OneHotEncoder(),[3])],remainder=‘passthrough’)
X=transform.fit_transform(X)
X = X[:,1:] # dropped one column
print(X)


# EDA – Exploratory Data Analysis
plt.scatter(x=df[‘Administration’],y=df[‘Profit’])
plt.show()
plt.scatter(x=df[‘R&D Spend’],y=df[‘Profit’])
plt.show()
plt.scatter(x=df[‘Marketing Spend’],y=df[‘Profit’])
plt.show()

# 3. splitting it into train and test test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=100)
print(X_train)

”’
# 4. Scaling / Normalization
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
X_train = scale.fit_transform(X_train[:,3:])
X_test = scale.fit_transform(X_test[:,3:])
print(X_train)
”’


## RUN THE MODEL
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
# fit – train the model
regressor.fit(X_train, y_train)
print(f”M/Coefficient/Slope = {regressor.coef_} and the Constant = {regressor.intercept_})

# y = -3791.2 x Florida -3090.1 x California + 0.82 R&D – 0.05 Admin + 0.022 Marketing+ 56650


y_pred = regressor.predict(X_test)
result_df =pd.DataFrame({‘Actual’: y_test, ‘Predicted’: y_pred})
print(result_df)

# Analyze the output
from sklearn import metrics
mse = metrics.mean_squared_error(y_true=y_test, y_pred=y_pred)
print(“Root Mean Squared Error (Variance) = “,mse**0.5)
mae = metrics.mean_absolute_error(y_true=y_test, y_pred=y_pred)
print(“Mean Absolute Error = “,mae)
print(“R Square is (Variance)”,metrics.r2_score(y_test, y_pred))

## Bias is based on training data
y_pred_tr = regressor.predict(X_train)
mse = metrics.mean_squared_error(y_true=y_train, y_pred=y_pred_tr)
print(“Root Mean Squared Error (Bias) = “,mse**0.5)
print(“R Square is (Bias)”,metrics.r2_score(y_train, y_pred_tr))

”’
Case 1: All the columns are taken into account:
Mean Absolute Error = 8696.887641252619
R Square is (Variance) 0.884599945166969
Root Mean Squared Error (Bias) = 7562.5657508560125
R Square is (Bias) 0.9624157828452926
”’
## Testing

import statsmodels.api as sm
import numpy as np
X = np.array(X, dtype=float)
print(“Y:\n,y)
summ1 = sm.OLS(y,X).fit().summary()
print(“Summary of All X \n—————-\n:”,summ1)
import pandas as pd
import matplotlib.pyplot as plt
link = “https://raw.githubusercontent.com/swapnilsaurav/MachineLearning/master/3_Startups.csv”
df = pd.read_csv(link)
print(df.describe())
X = df.iloc[:,:4].values
y = df.iloc[:,4].values

”’
# 1. Replace the missing values with mean value
from sklearn.impute import SimpleImputer
import numpy as np
imputer = SimpleImputer(missing_values=np.nan, strategy=’mean’)
imputer = imputer.fit(X[:,1:3])
X[:,1:3] = imputer.transform(X[:,1:3])
#print(X)
”’
# 2. Handling categorical values
# encoding
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
lc = LabelEncoder()
X[:,3] = lc.fit_transform(X[:,3])

from sklearn.compose import ColumnTransformer
transform = ColumnTransformer([(‘one_hot_encoder’, OneHotEncoder(),[3])],remainder=‘passthrough’)
X=transform.fit_transform(X)
X = X[:,1:] # dropped one column
print(X)

”’
After doing Backward elemination method we realized that all the state columns
are not significantly impacting the analysis hence removing those 2 columns too.
”’
X = X[:,2:] # after backward elemination

# EDA – Exploratory Data Analysis
plt.scatter(x=df[‘Administration’],y=df[‘Profit’])
plt.show()
plt.scatter(x=df[‘R&D Spend’],y=df[‘Profit’])
plt.show()
plt.scatter(x=df[‘Marketing Spend’],y=df[‘Profit’])
plt.show()

# 3. splitting it into train and test test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=100)
print(X_train)

”’
# 4. Scaling / Normalization
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
X_train = scale.fit_transform(X_train[:,3:])
X_test = scale.fit_transform(X_test[:,3:])
print(X_train)
”’


## RUN THE MODEL
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
# fit – train the model
regressor.fit(X_train, y_train)
print(f”M/Coefficient/Slope = {regressor.coef_} and the Constant = {regressor.intercept_})

# y = -3791.2 x Florida -3090.1 x California + 0.82 R&D – 0.05 Admin + 0.022 Marketing+ 56650


y_pred = regressor.predict(X_test)
result_df =pd.DataFrame({‘Actual’: y_test, ‘Predicted’: y_pred})
print(result_df)

# Analyze the output
from sklearn import metrics
mse = metrics.mean_squared_error(y_true=y_test, y_pred=y_pred)
print(“Root Mean Squared Error (Variance) = “,mse**0.5)
mae = metrics.mean_absolute_error(y_true=y_test, y_pred=y_pred)
print(“Mean Absolute Error = “,mae)
print(“R Square is (Variance)”,metrics.r2_score(y_test, y_pred))

## Bias is based on training data
y_pred_tr = regressor.predict(X_train)
mse = metrics.mean_squared_error(y_true=y_train, y_pred=y_pred_tr)
print(“Root Mean Squared Error (Bias) = “,mse**0.5)
print(“R Square is (Bias)”,metrics.r2_score(y_train, y_pred_tr))

”’
Case 1: All the columns are taken into account:
Mean Absolute Error = 8696.887641252619
R Square is (Variance) 0.884599945166969
Root Mean Squared Error (Bias) = 7562.5657508560125
R Square is (Bias) 0.9624157828452926
”’
## Testing

import statsmodels.api as sm
import numpy as np
X = np.array(X, dtype=float)
#X = X[:,[2,3,4]]
print(“Y:\n,y)
summ1 = sm.OLS(y,X).fit().summary()
print(“Summary of All X \n—————-\n:”,summ1)

## Test for linearity
# 1. All features (X) should be correlated to Y
# 2. Multicollinearity: Within X there should not be any correlation,
# if its there then take any one for the analysis
import pandas as pd
import matplotlib.pyplot as plt
link = “https://raw.githubusercontent.com/swapnilsaurav/MachineLearning/master/4_Position_Salaries.csv”
df = pd.read_csv(link)
print(df.describe())
X = df.iloc[:,1:2].values
y = df.iloc[:,2].values

”’
# 1. Replace the missing values with mean value
from sklearn.impute import SimpleImputer
import numpy as np
imputer = SimpleImputer(missing_values=np.nan, strategy=’mean’)
imputer = imputer.fit(X[:,1:3])
X[:,1:3] = imputer.transform(X[:,1:3])
#print(X)
”’
”’
# 2. Handling categorical values
# encoding
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
lc = LabelEncoder()
X[:,3] = lc.fit_transform(X[:,3])

from sklearn.compose import ColumnTransformer
transform = ColumnTransformer([(‘one_hot_encoder’, OneHotEncoder(),[3])],remainder=’passthrough’)
X=transform.fit_transform(X)
X = X[:,1:] # dropped one column
print(X)

”’
”’
After doing Backward elemination method we realized that all the state columns
are not significantly impacting the analysis hence removing those 2 columns too.

X = X[:,2:] # after backward elemination
”’
”’
# EDA – Exploratory Data Analysis
plt.scatter(x=df[‘Level’],y=df[‘Salary’])
plt.show()
”’

# 3. splitting it into train and test test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=100)
print(X_train)

from sklearn.linear_model import LinearRegression
from sklearn import metrics
”’
# 4. Scaling / Normalization
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
X_train = scale.fit_transform(X_train[:,3:])
X_test = scale.fit_transform(X_test[:,3:])
print(X_train)
”’
”’
#Since dataset is too small, lets take entire data for training
X_train, y_train = X,y
X_test, y_test = X,y
”’
”’
## RUN THE MODEL

regressor = LinearRegression()
# fit – train the model
regressor.fit(X_train, y_train)
print(f”M/Coefficient/Slope = {regressor.coef_} and the Constant = {regressor.intercept_}”)

# y =
y_pred = regressor.predict(X_test)
result_df =pd.DataFrame({‘Actual’: y_test, ‘Predicted’: y_pred})
print(result_df)

# Analyze the output

mse = metrics.mean_squared_error(y_true=y_test, y_pred=y_pred)
print(“Root Mean Squared Error (Variance) = “,mse**0.5)
mae = metrics.mean_absolute_error(y_true=y_test, y_pred=y_pred)
print(“Mean Absolute Error = “,mae)
print(“R Square is (Variance)”,metrics.r2_score(y_test, y_pred))

## Bias is based on training data
y_pred_tr = regressor.predict(X_train)
mse = metrics.mean_squared_error(y_true=y_train, y_pred=y_pred_tr)
print(“Root Mean Squared Error (Bias) = “,mse**0.5)
print(“R Square is (Bias)”,metrics.r2_score(y_train, y_pred_tr))

# Plotting the data for output
plt.scatter(x=df[‘Level’],y=df[‘Salary’])
plt.plot(X,y_pred)
plt.xlabel(“Level”)
plt.ylabel(“Salary”)
plt.show()
”’

# 3. Model – Polynomial regression analysis
# y = C + m1 * X + m2 * x square
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline

for i in range(1,10):
#prepare the parameters
parameters = [(‘polynomial’, PolynomialFeatures(degree=i)),(‘modal’,LinearRegression())]
pipe = Pipeline(parameters)
pipe.fit(X_train,y_train)
y_pred = pipe.predict(X)
## Bias is based on training data
y_pred_tr = pipe.predict(X_train)
mse = metrics.mean_squared_error(y_true=y_train, y_pred=y_pred_tr)
rmse_tr = mse ** 0.5
print(“Root Mean Squared Error (Bias) = “,rmse_tr)
print(“R Square is (Bias)”,metrics.r2_score(y_train, y_pred_tr))

## Variance is based on validation data
y_pred_tt = pipe.predict(X_test)
mse = metrics.mean_squared_error(y_true=y_test, y_pred=y_pred_tt)
rmse_tt = mse ** 0.5
print(“Root Mean Squared Error (Variance) = “, rmse_tt)
print(“R Square is (Variance)”, metrics.r2_score(y_test, y_pred_tt))
print(“Difference Between variance and bias = “,rmse_tt – rmse_tr)
# Plotting the data for output
plt.scatter(x=df[‘Level’],y=df[‘Salary’])
plt.plot(X,y_pred)
plt.title(“Polynomial Analysis degree =”+str(i))
plt.xlabel(“Level”)
plt.ylabel(“Salary”)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
#link = “https://raw.githubusercontent.com/swapnilsaurav/MachineLearning/master/4_Position_Salaries.csv”
link = “https://raw.githubusercontent.com/swapnilsaurav/MachineLearning/master/3_Startups.csv”
df = pd.read_csv(link)
print(df.describe())
X = df.iloc[:,0:4].values
y = df.iloc[:,4].values

”’
# 1. Replace the missing values with mean value
from sklearn.impute import SimpleImputer
import numpy as np
imputer = SimpleImputer(missing_values=np.nan, strategy=’mean’)
imputer = imputer.fit(X[:,1:3])
X[:,1:3] = imputer.transform(X[:,1:3])
#print(X)
”’

# 2. Handling categorical values
# encoding
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
lc = LabelEncoder()
X[:,3] = lc.fit_transform(X[:,3])

from sklearn.compose import ColumnTransformer
transform = ColumnTransformer([(‘one_hot_encoder’, OneHotEncoder(),[3])],remainder=‘passthrough’)
X=transform.fit_transform(X)
X = X[:,1:] # dropped one column
print(X)


”’
After doing Backward elemination method we realized that all the state columns
are not significantly impacting the analysis hence removing those 2 columns too.

X = X[:,2:] # after backward elemination
”’
”’
# EDA – Exploratory Data Analysis
plt.scatter(x=df[‘Level’],y=df[‘Salary’])
plt.show()
”’

# 3. splitting it into train and test test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=100)
print(X_train)

from sklearn.linear_model import LinearRegression
from sklearn import metrics
”’
# 4. Scaling / Normalization
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
X_train = scale.fit_transform(X_train[:,3:])
X_test = scale.fit_transform(X_test[:,3:])
print(X_train)
”’
”’
#Since dataset is too small, lets take entire data for training
X_train, y_train = X,y
X_test, y_test = X,y
”’

## RUN THE MODEL – Support Vector Machine Regressor (SVR)
from sklearn.svm import SVR
#regressor = SVR(kernel=’linear’)
#regressor = SVR(kernel=’poly’,degree=2,C=10)
# Assignment – Best value for gamma: 0.01 to 1 (0.05)
regressor = SVR(kernel=“rbf”,gamma=0.1,C=10)
# fit – train the model
regressor.fit(X_train, y_train)


# y =
y_pred = regressor.predict(X_test)
result_df =pd.DataFrame({‘Actual’: y_test, ‘Predicted’: y_pred})
print(result_df)

# Analyze the output

mse = metrics.mean_squared_error(y_true=y_test, y_pred=y_pred)
print(“Root Mean Squared Error (Variance) = “,mse**0.5)
mae = metrics.mean_absolute_error(y_true=y_test, y_pred=y_pred)
print(“Mean Absolute Error = “,mae)
print(“R Square is (Variance)”,metrics.r2_score(y_test, y_pred))

## Bias is based on training data
y_pred_tr = regressor.predict(X_train)
mse = metrics.mean_squared_error(y_true=y_train, y_pred=y_pred_tr)
print(“Root Mean Squared Error (Bias) = “,mse**0.5)
print(“R Square is (Bias)”,metrics.r2_score(y_train, y_pred_tr))


# Plotting the data for output
plt.scatter(X_train[:,2],y_pred_tr)
#plt.plot(X_train[:,2],y_pred_tr)
plt.show()

#Decision Tree & Random Forest
import pandas as pd
link = “https://raw.githubusercontent.com/swapnilsaurav/MachineLearning/master/3_Startups.csv”
link = “D:\\datasets\\3_Startups.csv”
df = pd.read_csv(link)
print(df)

#X = df.iloc[:,:4].values
X = df.iloc[:,:1].values
y = df.iloc[:,:-1].values
from sklearn.model_selection import train_test_split
X_train, X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=100)

from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor()
regressor.fit(X_train,y_train)
y_pred = regressor.predict(X_test)

# Baging, Boosting, Ensemble
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=10)
regressor.fit(X_train,y_train)
y_pred = regressor.predict(X_test)

## Assignment these algorithms and check the RMSE and R square values for both of these algorithms
# Ridge Lasso Elasticnet
import pandas as pd
link=“https://raw.githubusercontent.com/swapnilsaurav/Dataset/master/student_scores_multi.csv”
df = pd.read_csv(link)
print(df)
X = df.iloc[:,0:3].values
y = df.iloc[:,3].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.85, random_state=100)

from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
lr_ridge = Ridge(alpha=0.8)
lr_ridge.fit(X_train,y_train)
y_ridge_pred = lr_ridge.predict(X_test)

from sklearn.metrics import r2_score
r2_ridge_test = r2_score(y_test, y_ridge_pred)

y_ridge_pred_tr = lr_ridge.predict(X_train)
r2_ridge_train = r2_score(y_train, y_ridge_pred_tr)
print(f”Ridge Regression: Train R2 = {r2_ridge_train} and Test R2={r2_ridge_test})
# Classifications algorithm: supervised algo which predicts the class
”’
classifier: algorithm that we develop
model: training and predicting the outcome
features: the input data (columns)
target: class that we need to predict
classification: binary (2 class outcome) or multiclass (more than 2 classes)

Steps to run the model:
1. get the data
2. preprocess the data
3. eda
4. train the model
5. predict the model
6. evaluate the model

”’
#1. Logistic regression
link = “https://raw.githubusercontent.com/swapnilsaurav/MachineLearning/master/5_Ads_Success.csv”
import pandas as pd
df = pd.read_csv(link)
X = df.iloc[:,1:4].values
y = df.iloc[:,4].values

from sklearn.preprocessing import LabelEncoder
lc = LabelEncoder()
X[:,0] = lc.fit_transform(X[:,0] )

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25, random_state=100)

# Scaling as Age and Salary are in different range of values
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

## Build the model
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier.fit(X_train,y_train)
y_pred = classifier.predict(X_test)

# visualize the outcome
X_train = X_train[:,1:]
X_test = X_test[:,1:]
classifier.fit(X_train,y_train)
y_pred = classifier.predict(X_test)
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
import numpy as np
x_set, y_set = X_train, y_train
X1,X2 = np.meshgrid(np.arange(start = x_set[:,0].min()-1, stop=x_set[:,0].max()+1, step=0.01),
np.arange(start = x_set[:,1].min()-1, stop=x_set[:,1].max()+1, step=0.01))
plt.contourf(X1,X2,classifier.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),
cmap=ListedColormap((‘red’,‘green’)))
plt.show()
# Classifications algorithm: supervised algo which predicts the class
”’
classifier: algorithm that we develop
model: training and predicting the outcome
features: the input data (columns)
target: class that we need to predict
classification: binary (2 class outcome) or multiclass (more than 2 classes)

Steps to run the model:
1. get the data
2. preprocess the data
3. eda
4. train the model
5. predict the model
6. evaluate the model

”’
#1. Logistic regression
link = “https://raw.githubusercontent.com/swapnilsaurav/MachineLearning/master/5_Ads_Success.csv”
link = “D:\\datasets\\5_Ads_Success.csv”
import pandas as pd
df = pd.read_csv(link)
X = df.iloc[:,1:4].values
y = df.iloc[:,4].values

from sklearn.preprocessing import LabelEncoder
lc = LabelEncoder()
X[:,0] = lc.fit_transform(X[:,0] )

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25, random_state=100)

# Scaling as Age and Salary are in different range of values
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

## Build the model
”’
## LOGISTIC REGRESSION
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier.fit(X_train,y_train)
y_pred = classifier.predict(X_test)
”’
from sklearn.svm import SVC
”’
## Support Vector Machine – Classifier
classifier = SVC(kernel=’linear’)

classifier = SVC(kernel=’rbf’,gamma=100, C=100)
”’
from sklearn.neighbors import KNeighborsClassifier
## Refer types of distances:
# https://designrr.page/?id=200944&token=2785938662&type=FP&h=7229

classifier = KNeighborsClassifier(n_neighbors=9, metric=‘minkowski’)
classifier.fit(X_train,y_train)
y_pred = classifier.predict(X_test)

# visualize the outcome
X_train = X_train[:,1:]
X_test = X_test[:,1:]
classifier.fit(X_train,y_train)
y_pred = classifier.predict(X_test)
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
import numpy as np
x_set, y_set = X_train, y_train
X1,X2 = np.meshgrid(np.arange(start = x_set[:,0].min()-1, stop=x_set[:,0].max()+1, step=0.01),
np.arange(start = x_set[:,1].min()-1, stop=x_set[:,1].max()+1, step=0.01))
plt.contourf(X1,X2,classifier.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),
cmap=ListedColormap((‘red’,‘green’)))

#Now we will plot training data
for i, j in enumerate(np.unique(y_set)):
plt.scatter(x_set[y_set==j,0],
x_set[y_set==j,1], color=ListedColormap((“red”,“green”))(i),
label=j)
plt.show()

## Model Evaluation using Confusion Matrix
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(“Confusion Matrix: \n,cm)
cr = classification_report(y_test, y_pred)
accs = accuracy_score(y_test, y_pred)
print(“classification_report: \n,cr)
print(“accuracy_score: “,accs)
import sklearn.tree

link = “https://raw.githubusercontent.com/swapnilsaurav/MachineLearning/master/5_Ads_Success.csv”
link = “D:\\datasets\\5_Ads_Success.csv”
import pandas as pd
df = pd.read_csv(link)
X = df.iloc[:,1:4].values
y = df.iloc[:,4].values

from sklearn.preprocessing import LabelEncoder
lc = LabelEncoder()
X[:,0] = lc.fit_transform(X[:,0] )

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25, random_state=100)

# Scaling as Age and Salary are in different range of values
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

## Build the model
”’
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion=”gini”)
”’
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=39, criterion=“gini”)
classifier.fit(X_train,y_train)
y_pred = classifier.predict(X_test)

# visualize the outcome
X_train = X_train[:,1:]
X_test = X_test[:,1:]
classifier.fit(X_train,y_train)
y_pred = classifier.predict(X_test)
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
import numpy as np
x_set, y_set = X_train, y_train
X1,X2 = np.meshgrid(np.arange(start = x_set[:,0].min()-1, stop=x_set[:,0].max()+1, step=0.01),
np.arange(start = x_set[:,1].min()-1, stop=x_set[:,1].max()+1, step=0.01))
plt.contourf(X1,X2,classifier.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),
cmap=ListedColormap((‘red’,‘green’)))

#Now we will plot training data
for i, j in enumerate(np.unique(y_set)):
plt.scatter(x_set[y_set==j,0],
x_set[y_set==j,1], color=ListedColormap((“red”,“green”))(i),
label=j)
plt.show()

## Model Evaluation using Confusion Matrix
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(“Confusion Matrix: \n,cm)
cr = classification_report(y_test, y_pred)
accs = accuracy_score(y_test, y_pred)
print(“classification_report: \n,cr)
print(“accuracy_score: “,accs)

”’
# Show decision tree created

output = sklearn.tree.export_text(classifier)
print(output)
# visualize the tree
fig = plt.figure(figsize=(40,60))
tree_plot = sklearn.tree.plot_tree(classifier)
plt.show()
”’

”’
In Ensemble Algorithms – we run multiple algorithms to improve the performance
of a given business objective:
1. Boosting: When you run same algorithm – Input varies based on weights
2. Bagging: When you run same algorithm – average of all
3. Stacking: Over different algorithms – average of all
”’
import sklearn.tree

link = “https://raw.githubusercontent.com/swapnilsaurav/MachineLearning/master/5_Ads_Success.csv”
link = “D:\\datasets\\5_Ads_Success.csv”
import pandas as pd
df = pd.read_csv(link)
X = df.iloc[:,1:4].values
y = df.iloc[:,4].values

from sklearn.preprocessing import LabelEncoder
lc = LabelEncoder()
X[:,0] = lc.fit_transform(X[:,0] )

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25, random_state=100)

# Scaling as Age and Salary are in different range of values
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

## Build the model
”’
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion=”gini”)

from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=39, criterion=”gini”)
”’
from sklearn.ensemble import AdaBoostClassifier
classifier = AdaBoostClassifier(n_estimators=7)
classifier.fit(X_train,y_train)
y_pred = classifier.predict(X_test)

# visualize the outcome
X_train = X_train[:,1:]
X_test = X_test[:,1:]
classifier.fit(X_train,y_train)
y_pred = classifier.predict(X_test)
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
import numpy as np
x_set, y_set = X_train, y_train
X1,X2 = np.meshgrid(np.arange(start = x_set[:,0].min()-1, stop=x_set[:,0].max()+1, step=0.01),
np.arange(start = x_set[:,1].min()-1, stop=x_set[:,1].max()+1, step=0.01))
plt.contourf(X1,X2,classifier.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),
cmap=ListedColormap((‘red’,‘green’)))

#Now we will plot training data
for i, j in enumerate(np.unique(y_set)):
plt.scatter(x_set[y_set==j,0],
x_set[y_set==j,1], color=ListedColormap((“red”,“green”))(i),
label=j)
plt.show()

## Model Evaluation using Confusion Matrix
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(“Confusion Matrix: \n,cm)
cr = classification_report(y_test, y_pred)
accs = accuracy_score(y_test, y_pred)
print(“classification_report: \n,cr)
print(“accuracy_score: “,accs)

”’
# Show decision tree created

output = sklearn.tree.export_text(classifier)
print(output)
# visualize the tree
fig = plt.figure(figsize=(40,60))
tree_plot = sklearn.tree.plot_tree(classifier)
plt.show()
”’

”’
In Ensemble Algorithms – we run multiple algorithms to improve the performance
of a given business objective:
1. Boosting: When you run same algorithm – Input varies based on weights
2. Bagging: When you run same algorithm – average of all
3. Stacking: Over different algorithms – average of all
”’

from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt

X,y = make_blobs(n_samples=300, n_features=3, centers=4)
plt.scatter(X[:,0], X[:,1])
plt.show()

from sklearn.cluster import KMeans
km = KMeans(n_clusters=5, init=“random”,max_iter=100)
y_cluster =km.fit_predict(X)

plt.scatter(X[y_cluster==0,0],X[y_cluster==0,1],c=“blue”,label=“Cluster A”)
plt.scatter(X[y_cluster==1,0],X[y_cluster==1,1],c=“red”,label=“Cluster B”)
plt.scatter(X[y_cluster==2,0],X[y_cluster==2,1],c=“green”,label=“Cluster C”)
plt.scatter(X[y_cluster==3,0],X[y_cluster==3,1],c=“black”,label=“Cluster D”)
plt.scatter(X[y_cluster==4,0],X[y_cluster==4,1],c=“orange”,label=“Cluster E”)
plt.show()

distortion = []
max_centers = 30
for i in range(1,max_centers):
km = KMeans(n_clusters=i, init=“random”, max_iter=100)
y_cluster = km.fit(X)
distortion.append(km.inertia_)

print(“Distortion:\n,distortion)
plt.plot(range(1,max_centers),distortion,marker=“o”)
plt.show()

import pandas as pd
import matplotlib.pyplot as plt
link = “D:\\Datasets\\USArrests.csv”
df = pd.read_csv(link)
#print(df)
X = df.iloc[:,1:]
from sklearn.preprocessing import normalize
data = normalize(X)
data = pd.DataFrame(data)
print(data)

## plotting dendogram
import scipy.cluster.hierarchy as sch
dendo = sch.dendrogram(sch.linkage(data, method=‘ward’))
plt.axhline(y=0.7,color=“red”)
plt.show()
link = “D:\\datasets\\Market_Basket_Optimisation.csv”
import pandas as pd
df = pd.read_csv(link)
print(df)
from apyori import apriori
transactions = []
for i in range(len(df)):
if i%100==0:
print(“I = “,i)
transactions.append([str(df.values[i,j]) for j in range(20)])

## remove nan from the list
print(“Transactions:\n,transactions)

association_algo = apriori(transactions, min_confidence=0.2, min_support=0.02, min_lift=2)
print(“Association = “,list(association_algo))
”’
Time Series Forecasting – ARIMA method

1. Read and visualize the data
2. Stationary series
3. Optimal parameters
4. Build the model
5. Prediction
”’
import pandas as pd
#Step 1: read the data
link = “D:\\datasets\\gitdataset\\AirPassengers.csv”
air_passengers = pd.read_csv(link)

”’
#Step 2: visualize the data
import plotly.express as pe
fig = pe.line(air_passengers,x=”Month”,y=”#Passengers”)
fig.show()
”’
# Cleaning the data
from datetime import datetime
air_passengers[‘Month’] = pd.to_datetime(air_passengers[‘Month’])
air_passengers.set_index(‘Month’,inplace=True)

#converting to time series data
import numpy as np
ts_log = np.log(air_passengers[‘#Passengers’])
#creating rolling period – 12 months
import matplotlib.pyplot as plt
”’
moving_avg = ts_log.rolling(12).mean
plt.plot(ts_log)
plt.plot(moving_avg)
plt.show()
”’
#Step 3: Decomposition into: trend, seasonality, error ( or residual or noise)
”’
Additive decomposition: linear combination of above 3 factors:
Y(t) =T(t) + S(t) + E(t)

Multiplicative decomposition: product of 3 factors:
Y(t) =T(t) * S(t) * E(t)
”’
from statsmodels.tsa.seasonal import seasonal_decompose
decomposed = seasonal_decompose(ts_log,model=“multiplicative”)
decomposed.plot()
plt.show()

# Step 4: Stationary test
”’
To make Time series analysis, the TS should be stationary.
A time series is said to be stationary if its statistical properties
(mean, variance, autocorrelation) doesnt change by a large value
over a period of time.
Types of tests:
1. Augmented Dickey Fuller test (ADH Test)
2. Kwiatkowski Phillips Schnidt Shin (KPSS) test
3. Phillips Perron (PP) Test

Null Hypothesis: The time series is not stationary
Alternate Hypothesis: Time series is stationary
If p >0.05 we reject Null Hypothesis
”’
from statsmodels.tsa.stattools import adfuller
result = adfuller(air_passengers[‘#Passengers’])
print(“ADF Stats: \n,result[0])
print(“p value = “,result[1])
”’
To reject Null hypothesis, result[0] less than 5% critical region value
and p > 0.05
”’

# Run the model
”’
ARIMA model: Auto-Regressive Integrative Moving Average
AR: p predicts the current value
I: d integrative by removing trend and seasonality component from previous period
MA: q represents Moving Average

AIC- Akaike’s Information Criterion (AIC) – helps to find optimal p,d,q values
BIC – Bayesian Information Criterion (BIC) – alternative to AIC
”’
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
plot_acf(air_passengers[‘#Passengers’].diff().dropna())
plot_pacf(air_passengers[‘#Passengers’].diff().dropna())
plt.show()
”’
How to read above graph:
To find q (MA), we look at the Autocorrelation graph and see where there is a drastic change:
here, its at 1, so q = 1 (or 2 as at 2, it goes to -ve)

To find p (AR) – sharp drop in Partial Autocorrelation graph:
here, its at 1, so p = 1 (or 2 as at 2, it goes to -ve)

for d (I) – we need to try with multiple values
intially we will take as 1

”’
”’
Time Series Forecasting – ARIMA method

1. Read and visualize the data
2. Stationary series
3. Optimal parameters
4. Build the model
5. Prediction
”’
import pandas as pd
#Step 1: read the data
link = “D:\\datasets\\gitdataset\\AirPassengers.csv”
air_passengers = pd.read_csv(link)

”’
#Step 2: visualize the data
import plotly.express as pe
fig = pe.line(air_passengers,x=”Month”,y=”#Passengers”)
fig.show()
”’
# Cleaning the data
from datetime import datetime
air_passengers[‘Month’] = pd.to_datetime(air_passengers[‘Month’])
air_passengers.set_index(‘Month’,inplace=True)

#converting to time series data
import numpy as np
ts_log = np.log(air_passengers[‘#Passengers’])
#creating rolling period – 12 months
import matplotlib.pyplot as plt
”’
moving_avg = ts_log.rolling(12).mean
plt.plot(ts_log)
plt.plot(moving_avg)
plt.show()
”’
#Step 3: Decomposition into: trend, seasonality, error ( or residual or noise)
”’
Additive decomposition: linear combination of above 3 factors:
Y(t) =T(t) + S(t) + E(t)

Multiplicative decomposition: product of 3 factors:
Y(t) =T(t) * S(t) * E(t)
”’
from statsmodels.tsa.seasonal import seasonal_decompose
decomposed = seasonal_decompose(ts_log,model=“multiplicative”)
decomposed.plot()
plt.show()

# Step 4: Stationary test
”’
To make Time series analysis, the TS should be stationary.
A time series is said to be stationary if its statistical properties
(mean, variance, autocorrelation) doesnt change by a large value
over a period of time.
Types of tests:
1. Augmented Dickey Fuller test (ADH Test)
2. Kwiatkowski Phillips Schnidt Shin (KPSS) test
3. Phillips Perron (PP) Test

Null Hypothesis: The time series is not stationary
Alternate Hypothesis: Time series is stationary
If p >0.05 we reject Null Hypothesis
”’
from statsmodels.tsa.stattools import adfuller
result = adfuller(air_passengers[‘#Passengers’])
print(“ADF Stats: \n,result[0])
print(“p value = “,result[1])
”’
To reject Null hypothesis, result[0] less than 5% critical region value
and p > 0.05
”’

# Run the model
”’
ARIMA model: Auto-Regressive Integrative Moving Average
AR: p predicts the current value
I: d integrative by removing trend and seasonality component from previous period
MA: q represents Moving Average

AIC- Akaike’s Information Criterion (AIC) – helps to find optimal p,d,q values
BIC – Bayesian Information Criterion (BIC) – alternative to AIC
”’
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
plot_acf(air_passengers[‘#Passengers’].diff().dropna())
plot_pacf(air_passengers[‘#Passengers’].diff().dropna())
plt.show()
”’
How to read above graph:
To find q (MA), we look at the Autocorrelation graph and see where there is a drastic change:
here, its at 1, so q = 1 (or 2 as at 2, it goes to -ve)

To find p (AR) – sharp drop in Partial Autocorrelation graph:
here, its at 1, so p = 1 (or 2 as at 2, it goes to -ve)

for d (I) – we need to try with multiple values
intially we will take as 1

”’
from statsmodels.tsa.arima.model import ARIMA
model = ARIMA(air_passengers[‘#Passengers’], order=(1,1,1))
result = model.fit()
plt.plot(air_passengers[‘#Passengers’])
plt.plot(result.fittedvalues)
plt.show()
print(“ARIMA Model Summary”)
print(result.summary())

model = ARIMA(air_passengers[‘#Passengers’], order=(4,1,4))
result = model.fit()
plt.plot(air_passengers[‘#Passengers’])
plt.plot(result.fittedvalues)
plt.show()
print(“ARIMA Model Summary”)
print(result.summary())

# Prediction using ARIMA model
air_passengers[‘Forecasted’] = result.predict(start=120,end=246)
air_passengers[[‘#Passengers’,‘Forecasted’]].plot()
plt.show()

# predict using SARIMAX Model
import statsmodels.api as sm
model = sm.tsa.statespace.SARIMAX(air_passengers[‘#Passengers’],order=(7,1,1), seasonal_order=(1,1,1,12))
result = model.fit()
air_passengers[‘Forecast_SARIMAX’] = result.predict(start=120,end=246)
air_passengers[[‘#Passengers’,‘Forecast_SARIMAX’]].plot()
plt.show()
”’
NLP – Natural Language Processing – analysing review comment to understand
reasons for positive and negative ratings.
concepts like: unigram, bigram, trigram

Steps we generally perform with NLP data:
1. Convert into lowercase
2. decompose (non unicode to unicode)
3. removing accent: encode the content to ascii values
4. tokenization: will break sentence to words
5. Stop words: not important words for analysis
6. Lemmetization (done only on English words): convert the words into dictionary words
7. N-grams: set of one word (unigram), two words (bigram), three words (trigrams)
8. Plot the graph based on the number of occurrences and Evaluate
”’
”’
cardboard mousepad. Going worth price! Not bad
”’

link=“https://raw.githubusercontent.com/swapnilsaurav/OnlineRetail/master/order_reviews.csv”
import pandas as pd
import unicodedata
import nltk
import matplotlib.pyplot as plt
df = pd.read_csv(link)
print(list(df.columns))
”’
[‘review_id’, ‘order_id’, ‘review_score’, ‘review_comment_title’,
‘review_comment_message’, ‘review_creation_date’, ‘review_answer_timestamp’]
”’
df[‘review_creation_date’] = pd.to_datetime(df[‘review_creation_date’])

df[‘review_answer_timestamp’] = pd.to_datetime(df[‘review_answer_timestamp’])

# data preprocessing – making data ready for analysis
reviews_df = df[df[‘review_comment_message’].notnull()].copy()
#print(reviews_df)
”’
Write a function to perform basic preprocessing steps
”’
def basic_preprocessing(text):
txt_pp = text.lower()
print(txt_pp)
#remove accent

# applying basic preprocessing:
reviews_df[‘review_comment_message’] = \
reviews_df[‘review_comment_message’].apply(basic_preprocessing)


”’
NLP – Natural Language Processing – analysing review comment to understand
reasons for positive and negative ratings.
concepts like: unigram, bigram, trigram

Steps we generally perform with NLP data:
1. Convert into lowercase
2. decompose (non unicode to unicode)
3. removing accent: encode the content to ascii values
4. tokenization: will break sentence to words
5. Stop words: not important words for analysis
6. Lemmetization (done only on English words): convert the words into dictionary words
7. N-grams: set of one word (unigram), two words (bigram), three words (trigrams)
8. Plot the graph based on the number of occurrences and Evaluate
”’
”’
cardboard mousepad. Going worth price! Not bad
”’

link=“D:/datasets/OnlineRetail/order_reviews.csv”
import pandas as pd
import unicodedata
import nltk
import matplotlib.pyplot as plt
df = pd.read_csv(link)
print(list(df.columns))
”’
[‘review_id’, ‘order_id’, ‘review_score’, ‘review_comment_title’,
‘review_comment_message’, ‘review_creation_date’, ‘review_answer_timestamp’]
”’
#df[‘review_creation_date’] = pd.to_datetime(df[‘review_creation_date’])
#df[‘review_answer_timestamp’] = pd.to_datetime(df[‘review_answer_timestamp’])

# data preprocessing – making data ready for analysis
reviews_df = df[df[‘review_comment_message’].notnull()].copy()
#print(reviews_df)

# remove accents
def remove_accent(text):
return unicodedata.normalize(‘NFKD’,text).encode(‘ascii’,errors=‘ignore’).decode(‘utf-8’)
#STOP WORDS LIST:
STOP_WORDS = set(remove_accent(w) for w in nltk.corpus.stopwords.words(‘portuguese’))

”’
Write a function to perform basic preprocessing steps
”’
def basic_preprocessing(text):
#converting to lower case
txt_pp = text.lower()
#print(txt_pp)

#remove the accent
#txt_pp = unicodedata.normalize(‘NFKD’,txt_pp).encode(‘ascii’,errors=’ignore’).decode(‘utf-8’)
txt_pp =remove_accent(txt_pp)
#print(txt_pp)
#tokenize
txt_token = nltk.tokenize.word_tokenize(txt_pp)
#print(txt_token)

# removing stop words
txt_token = (w for w in txt_token if w not in STOP_WORDS and w.isalpha())
return txt_token

# applying basic preprocessing:
reviews_df[‘review_comment_words’] = \
reviews_df[‘review_comment_message’].apply(basic_preprocessing)

#get positive reviews – all 5 ratings in review_score
reviews_5 = reviews_df[reviews_df[‘review_score’]==5]

#get negative reviews – all 1 ratings
reviews_1 = reviews_df[reviews_df[‘review_score’]==1]

## write a function to creaet unigram, bigram, trigram
def create_ngrams(words):
unigram,bigrams,trigram = [],[],[]
for comment in words:
unigram.extend(comment)
bigrams.extend(.join(bigram) for bigram in nltk.bigrams(comment))
trigram.extend(‘ ‘.join(trigram) for trigram in nltk.trigrams(comment))
return unigram,bigrams,trigram

#create ngrams for rating 5 and rating 1
uni_5, bi_5, tri_5 = create_ngrams(reviews_5[‘review_comment_words’])
print(uni_5)
print(‘””””””””””””””””””‘)
print(bi_5)
print(” =========================================”)
print(tri_5)

uni_1, bi_1, tri_1 = create_ngrams(reviews_1[‘review_comment_words’])
#print(uni_5)

# distribution plot
def plot_dist(words, color):
nltk.FreqDist(words).plot()
JULY Weekend Data Science
print(); print(3+5*25+ 100); print(‘3+5*2-5+100 ‘)
print(‘3+5*2-5+100=’, 3 + 5*25+100) #parameters = 2
print(‘3+5*2-5+100=’,3+5*25+100, “Hello”,5*2)
# Comments – this for humans not for computer
# indentation is mandatory in Python

#Process finished with exit code 0 = Exit code 0 means no error as expected

#variable
value1 = 50 #defining a varaible called value1
# lets tale value1 and assume its value is 50
print(value1)
print(“Value1 =”,value1)
value1 = 100
print(“Value1 =”,value1)

value1 = “HELLO”
print(“Value1 =”,value1)

#Basic types of data in Python
# type()
value1 = 50
print(type(value1)) # <class ‘int’>

# int -> integer: -9999999, -87866,0,1,1000,4534563463
#
value1 = 50.0
print(type(value1)) #<class ‘float’>
# e.g. of float: 0.0, -9,0, 89.8777777777777777
print(5+3)
print(5+3.0)
## indentation, ; variables, int, float, type()

# 3. complex
# square root of -1 called complex numbers, example:
num1 = 5j
print(type(num1) ) # <class ‘complex’>
print(num1)
print(num1 * num1)
print(“===========”)
#4. str – string (text)
num1 = “hell” \
“o”
print(num1)
print(type(num1) ) #<class ‘str’>

num1 = ‘5.6’
print(type(num1) )
num1 = “””hello”””
print(type(num1) )
num1 = ”’How
are
you?”’
print(type(num1) )
print(num1)

# 5. bool (boolean): True or False
val1 = True
print(type(val1))

# # # #
# variable names in Python – should always begin with alphabet
# it accepts, alphabets, digits and _
a=5
b=10
c=a+b
print(c)

awetrwetwetwe=5
bveryaDAGDBBB=10
cYTIUYOYUOJJDHJGN= awetrwetwetwe + bveryaDAGDBBB
print(cYTIUYOYUOJJDHJGN)

num1 = 10
num2 = 5
total = num1 + num2
print(total)

# # # # #
cost_of_each_pen = 59
total_pens = 16
total_cost = cost_of_each_pen * total_pens
print(“Total cost = “,total_cost)
print(“Cost of each pen is”,cost_of_each_pen,“so the cost of”,total_pens,“pens will be”,total_cost)
#above output using format string (f-string)
print(f”Cost of each pen is {cost_of_each_pen} so the cost of {total_pens} pens will be {total_cost})
print(“Cost of each pen is {cost_of_each_pen} so the cost of {total_pens} pens will be {total_cost}”)

total_pens = 3
total_cost = 100
cost_of_each_pen = total_cost / total_pens
print(“Total cost = “,total_cost)
print(“Cost of each pen is”,cost_of_each_pen,“so the cost of”,total_pens,“pens will be”,total_cost)
#above output using format string (f-string)
print(f”Cost of each pen is {cost_of_each_pen:.2f} so the cost of {total_pens} pens will be {total_cost})
print(“Cost of each pen is {cost_of_each_pen} so the cost of {total_pens} pens will be {total_cost}”)

name,country,position = “Virat”,“India”,“Opener”
print(f”Player {name:>15}, plays for the country {country:^20} at {position:<20})
name,country,position = “Manbagwama”,“New Zealand”,“Wicket-Keeper”
print(f”Player {name:>15}, plays for the country {country:^20} at {position:<20})

# operators: Arithematic operations
## + – * / // (integer division) ** (power) %(modulus – remainder)
num1 = 56
num2 = 3
print(num1 + num2)
print(num1 – num2)
print(num1 * num2)
print(num1 / num2)

print(num1 // num2)
print(num1 % num2)
print(num1 ** num2)

print(100 ** (1/2))

# Relational, Logical, Membership

## Assignments ##
# WAP to calculate area and perimeter of a rectangular field when length and breadth are given
# WAP to calculate area and circumference of a circular field when radius is given

# Comparison / Relational Operators
# check relationship: > < >= <= == !=
# output is bool (True / False)
var1, var2, var3 = 10,20,10 #lets assign value 10 to var1
print(var1 > var2) # False
print(var1 > var3) # False
print(var1 < var2) # True
print(var1 < var3) #False
print(var1 >= var2) #False
print(var1 >= var3) #True
print(“===============”)
print(var1 <= var2) # True
print(var1 <= var3) # True
print(var1 == var2) #False – is var1 equal to var2 ?
print(var1 == var3) #True
print(var1 != var2) # True
print(var1 != var3) # False

# Logical operator: and or not
# input and output are bool
# prediction: Rohit and Surya will open the batting – False
# actual: Ishan and Surya opened the batting
# in AND, even if one is False entire thing becomes False
# prediction: Rohit or Surya will open the batting – True
# actual: Ishan and Surya opened the batting
# in OR, even if one is True entire thing becomes True
print(True and True) #True
print(False and False) #False
print(False and True) #False
print(True and False) #False

print(True or True) #True
print(False or False) #False
print(False or True) #True
print(True or False) #True

##
print(“check”)
var1, var2, var3 = 10,20,10
print(var1 > var2 and var1 > var3 or var1 < var2 and var1 == var3 or var1 != var2 and var1 <= var3)
#(T or T)
print(not True)
print(not False)

# Membership operator: in , not in
print(“i” in “india”)

# looping – when you run a block of code multiple times
## keep looping till yot get the stopping condition – WHILE loop
## exatly how many times to run a code: FOR loop

# if is to check the conditoon
num=8
# condition – usng if
if num ==8:
print(“Value is “,num)
print(“if is executed now”)

# you can have only IF
# or when you have something to talk about when IF is false – ELSE
if num ==18:
print(“Value is “,num)
print(“if is executed now”)
else:
print(“I am in ELSE”)


#WAP to check if a number is positive or not
num = 5
if num >0:
print(“Number is positive”)
else:
print(“Number is not positive”)

# WAP to check if a number is odd or even
num = 11
if num%2!=0:
print(“Its odd number”)
else:
print(“Its even number”)

## input() – take input from the user
val1 = input(“Enter your marks: “)
print(“1. Data type of val1 is: “,type(val1))
val1 = int(val1) # int() float() bool() str() complex() – explicit conversion
print(“2. Data type of val1 is: “,type(val1))
print(“Value you entered is “,val1)



## WAP a program to input value from the user and check if it has 0 at the end
## Value = 57 – it doesnt have zero at the end
## value = 60 – it should say it has zero at the end.

value = int(input(“Enter the number:”))
if value %10==0:
print(“This has zero at the end”)
else:
print(“It doesnt have zero at the end”)

# if a number is positive or negative or zero
num = int(input(“Enter a number: “))
if num == 0:
print(“Number is zero”)
elif num < 0:
print(“Number is negative”)
else:
print(“Number is Positive”)

sum = 360
avg = sum/5

if avg >= 75:
print(“Grade is A”)
elif avg >=60:
print(“Grade is B”)
elif avg>=50:
print(“Grade is C”)
elif avg >=35:
print(“Grade is D”)
else:
print(“Grade is E”)



# if the score is more than 90, invite them for tea with director
avg = 98 #98: 2 88: 2 68: 3 & #98: 2 88: 1 68: 3

if avg >=80:
if avg >= 90:
print(“You are invited for tea @5pm auditorium”)
if avg >=95:
print(“You win President Award!”)
print(“A+ Grade”)
print(“Excellent result”)
print(“Great going!”)
elif avg >=60:
print(“A grade”)
elif avg >=40:
print(“B Grade”)
elif avg >=30:
print(“D Grade”)
else:
print(“Sorry you have failed”)
print(“Try again next time”)


# find the highest number of the given 3 numbers
Num1 = 99
Num2 = 78
Num3 = 123
if Num1 > Num2:
if Num1 > Num3:
print(f”For the assignment given by Swapnil Num1={Num1} value is highest”)
if Num2 > Num1:
if Num2 > Num3:
print(f”For the assignment given by Swapnil Num2={Num2} value is highest”)
if Num3 > Num1:
if Num3 > Num3:
print(f”For the assignment given by SwapnilNum3={Num3} value is highest”)
## ##
num1 = 198
num2 = 198
num3 = 168

if num1 > num2:
if num1 > num3:
print(“{num1} is greater”)
else:
print(“{num3} is greater”)
else:
if num2 >num3:
print(“{num2} is greater”)
else:
print(“{num3} is greater”)

###
num1 = 98
num2 = 128
num3 = 168
high = num1
if high < num2:
high = num2
if high <num3:
high = num3
print(f”{high} is the highest”)


# WAP to input 3 sides of a triangle and find if its a equilateral, isoceles or scalene
num1 = 128
num2 = 128
num3 = 128
s1=num1; s2=num2; s3=num3
if s1==s2:
if s2==s3:
print(“As all 3 sides of triangle is equal hence it is an equilateral triangle”)
else:
print(“As 2 sides of an triangle are equal hence it is an isosceles triangle”)
else:
if s2==s3 or s3==s1:
print(“As 2 sides of an triangle are equal hence it is an isosceles triangle”)
else:
print(“As no sides are equal it is an scalene triangle”)


num1 = 128
num2 = 108
num3 = 128

if num1 == num2:
if num1 == num3:
print (“equilateral triangle”)
else:
print(“isoscales triangle”)
else:
if num2 == num3 or num1==num3:
print (“isoscales triangle”)
else:
print(“scalene triangle”)

## below: bad example of the code
num1 = 128
num2 = 108
num3 = 128

if num1 == num2 and num1==num3:
print (“equilateral triangle”)
if (num1 == num2 and num2!=num3) or (num1==num3 and num3!=num2) or (num2==num3 and num3!=num1):
print (“isoscales triangle”)
if (num1 !=num3 and num1!=num2) or (num2 !=num1 and num2!=num3) or (num3 !=num1 and num2!=num3):
print(“scalene triangle”)

# for loop
# range(a,b,c): a=start, b< end (upto, not equal to), c = increment
#range(2,12,3)# generates range of values- 2,5,8
# range(a,b) default c value is ONE
#range(5) #default start = 0, increment =1: 0,1,2,3,4

for counter in range(10):
print(counter)

# while: loops until the given condition
z=0
while z<=10:
print(z)
z=z+ 1
# divisible by 100 & 400 – leap year, divisible by 100 but not 400 – not a leap year
# divisible by 100 but divisible by 4 – leapyear

year = 400
if year %100==0:
if year %400==0:
print(“Leap Year”)
else:
print(“Not a leap year”)
else:
if year %4==0:
print(“Leap Year”)
else:
print(“Not a leap Year”)

####
a,b,c,d = 40,10,20,5
small = a
if b <small:
small=b
if c <small:
small=c
if d <small:
small=d

num=str(1234567)
print(“Number of characters = “,len(num))
num = 158
length = 1
a = num//10
if a!=0:
length+=1 # length = length + 1
num = num//10

a = num//10
if a!=0:
length+=1 # length = length + 1
num = num//10

a,b,c=20,10,30

if a < b:
a,b=b,a
if b < c:
if a>c:
print(c, “is second highest”)
else:
print(a,“is second highest”)
else:
print(b,“is second highest”)

eng = 85
maths,science,ss = 90,90,80
if eng >=80:
if ss>=80:
if maths>=80 and science >= 80:
print(“Science Strean”)
else:
print(“Humanities Stream”)
elif maths >= 50 and science >= 50:
print(“Commerce”)

else:
print(“Admission not possible”)
# loops – repeat multiple times
# for: used when you know how many times to repeat
# while: in which conditions to repeat

#range(): which generates range of values
# range(=start, <end, increment)
# range(3,11,2): 3,5,7,9
# range(=start, <end) : increment is default 1
# range(5,9): 5,6,7,8
# range(<end): default start = 0 & increment is 1
# range(4): 0,1,2,3

for counter in range(3,11,2):
print(“Counter = “,counter)
print(“Hello”)

#print even numbers from 6 to 20
for i in range(6,21,2):
print(i,end=“, “)
print()

# find sum of numbers between 5 and 9 both including
sum=0
for i in range(5,10):
sum+=i
print(“Sum = “,sum)

# generate first 4 multiples of 5: 5 10 15 20
num = 15
for j in range(4):
print(f”{num} * {j+1} = {num*(j+1)})

print(“WHILE : condition based repeation”)
count = 1
while count <=8:
count += 2
print(count)

# print hello till user says yes

while True:
print(“HELLO”)
ch = input(“Enter y to print again or anyother key stop: “)
if ch!=‘y’:
break

”’
* * * * *
* * * * *
* * * * *
* * * * *
* * * * *
”’
print(“Pattern 1”)
for j in range(5):
for i in range(5):
print(“*”, end=” “)
print()

”’
*
* *
* * *
* * * *
* * * * *
”’
print(“Pattern 2”)
for j in range(5):
for i in range(j + 1):
print(“*”, end=” “)
print()

”’
* * * * *
* * * *
* * *
* *
*
”’
print(“Pattern 3”)
for j in range(5):
for i in range(5 – j):
print(“*”, end=” “)
print()

”’
Assignment 1:
* * * * *
* * * *
* * *
* *
*

Assignment 2:
*
* *
* * *
* * * *
* * * * *

Assignment 3: Multiplication Table:

1 * 1 = 1 2 * 1 = 2 … 10 * 1 = 10
1 * 2 = 2 2 * 2 = 4 10 * 2 = 20

1 * 10 = 10 2 * 10 = 20 10 * 10 = 100
”’

## Check if Prime or not
num = 53
isPrime = True
for i in range(2, num // 2 + 1):
if num % i == 0:
isPrime = False
break

if isPrime:
print(f”{num} is Prime”)
else:
print(f”{num} is not Prime”)

import time
curr = time.time()
line = 0
## Generate Prime numbers between 20000 and 30000
for num in range(20000,30001):
isPrime = True
for i in range(2,num//2+1):
if num % i==0:
isPrime = False
break

if isPrime:
if line >= 20:
print()
line = 0
print(num, end=“,”)
line+=1
end = time.time()
print(\n\nTotal time taken to generate prime numbers is “,end-curr,“seconds”)
print(“Time is: “,time.time())
”’
1*1 = 1 2 * 1 = 2 …
1 * 2..

1 * 10

”’
s=0
for i in range(1,11,1):
s += 1 # s=s+1
for j in range(1,11):
print(f”{j:<2}*{i:>2}={j*i: <3}, end=” “)

print()

print(“S = “, s)
## WHILE LOOP
num = 1
ch = “y”
while ch==“y”:
num+=1
ch=input(“Enter y to continue or anyother key to stop: “)

print(“num = “,num)
##
# Menu option
while True:
print(“Now, pick up the option:”)
print(“1. Addition”)
print(“2. Subtraction”)
print(“3. Multiplication”)
print(“4. Division”)
print(“5. Quit”)
choice = int(input(“Enter your option from the above menu:”))

if choice>=1 and choice <=4:
num1 = int(input(“Enter first number: “))
num2 = int(input(“Enter second number: “))

if choice==1:
print(“Addition of given numbers: “,num1+num2)
elif choice==2:
print(“Subtraction of given numbers: “,num1-num2)
elif choice==3:
print(“Multiplication of given numbers: “,num1*num2)
else:
print(“Division of given numbers: “,num1/num2)
elif choice==5:
break
else:
print(“Invalid option, try again!”)

##
”’
Guess the number game!
Computer v Human

Module: has collection of related functions
randint(1,100) – will return random integer number between 1 and 100 (both including)
”’
import random

num = random.randint(1, 100)
attempt = 0
while True:
guess = int(input(“Guess the number: “))
if guess < 1 or guess > 100:
print(“Invalid guess, try again”)
continue

attempt += 1
if num == guess:
print(f”Congratulations! You guessed it right in {attempt} attempts.”)
break
elif num < guess:
print(f”Your {guess} is higher.. try with a lower guess!”)
else:
print(f”Your {guess} is lower.. try with a higher guess!”)
”’
Guess the number game!
Computer v Computer

Module: has collection of related functions
randint(1,100) – will return random integer number between 1 and 100 (both including)
”’
import random
import time

start = time.time()
num = random.randint(1,100)
low,high =1,100
attempt = 0
while True:
guess = random.randint(low,high)
attempt+=1
if num==guess:
end = time.time()
print(f”Congratulations! You guessed it right in {attempt} attempts and in {end-start} seconds.”)
break
elif num < guess:
print(f”Your {guess} is higher.. try with a lower guess!”)
high = guess-1
else:
print(f”Your {guess} is lower.. try with a higher guess!”)
low = guess + 1

## ### ### ##
”’
WAP a Program to generate prime numbers from 1 till user continue to ask
2
you want more? y
3
you want more? y
5
you want more? y
7
you want more? y
11
you want more? n
Thank you

”’
num = 1
while True:
num+=1
isPrime = True
for i in range(2,num//2+1):
if num%i==0:
isPrime=False
break
if not isPrime:
continue
print(num)
ch=input(“you want more (y for yes)?”)
if ch!=“y”:
break
## ##
## STRINGS
str1 = ‘Hello’ # one line text
str2 = “Hi there” # one line text
str3 = ”’How are you?
Where are you?
How you doing?”’ # multiline text
str4 = “””I am fine
I am here
i am great””” # multiline text
print(type(str1),type(str2),type(str3),type(str4))
print(str1,\n,str2,\n,str3,\n,str4)

# \n is used for newline
# \ – escape character { this will add power / remove power if you have power}
# \\ – will make one \
print(\\n is used for newline \\t is used for Tab spaces \\c has not meaning”)

# \\n \\t \\c
print(\\\\n \\\\t \\\\c”)
## STRING
str1 = “hello 5”
str2 = “how are you”
# he asked,”how are you?”
print(‘he asked,”how are you?”‘)
print(“he asked,\”how are you?\”)
# he asked,”what’s your name?”
print(‘he asked,”what\’s your name?”‘)
print(str1 + ” “+ str2)
print((str1+” “)*5)

for i in str1:
print(i)

# len() – will tell you the length of the variable
print(f”Number of elements in {str1} is”,len(str1))

for i in range(len(str1)):
print(i, ” – “,str1[i])

# indexing: accessing the member from a string /data collection
str1 = “helloshnrtmd”
print(str1[0])
last_idx = len(str1) – 1
print(str1[-1])
# str1[0] = “H” – error

# string is immutable – you cant edit
a=5
a=6
str1 = “hello”
str1 = “Hello”
print(“ell”, str1[1:4], str1[-4:-1])
print(“Hell”,str1[0:4],str1[:4], str1[:-1])
print(“ello”,str1[1:5],str1[1:], str1[-4:])

# Functions (global) and Methods (specific to the class)
str1 = “HelLo how are you”
str2 = “hello”
anything =
print(“Lower: “,str1.lower())
print(“Uppercase: “,str1.upper())
print(“STR1 = “,str1)
print(“Title Case: “, str1.title())
print(str2.islower())
print(str2.upper().isupper())
print(str2.isalnum())
print(str2.isdigit())

num1 = input(“Enter length of the rectangle: “)
if num1.isdigit():
num1 = int(num1)
else:
print(“Invalid number”)

# modify below program to accept space also
name = input(“Enter your name: “)
if name.isalpha():
print(“Name accepted”)
else:
print(“Invalid name”)
# modify below program to accept space also
name = “Sachin Tendulkar”
if name.isalpha():
print(“Name accepted”)
else:
print(“Invalid name”)

output = name.split() #split by default will split on ‘ ‘
output_str = “”.join(output)
print(output_str)
if output_str.isalpha():
print(name,“Name accepted”)
else:
print(“Invalid name”)

print(“Strip —“)
txt = ” 2 4 6 8 “
print(txt.strip(), “length is”,len(txt.strip()))

# replace find count
print(“blank spaces are:”,txt.count(” “))
txt2 = “Twinkle Twinkle Twinkly Star”
print(“Count Twin: “,txt2.count(“Twin”))
print(“Find Twin: “,txt2.find(“Twin”))

# Print the position of all the occurrences
st=0
for i in range(txt2.count(“Twin”)):
cnt = txt2.find(“Twin”,st)
print(“Find Twin: “, cnt)
st = cnt+1

txt2 = “Twinkle Twinkle Twinkly Star”
txt3 = txt2.replace(“Twin”,“Quin”)
print(txt3)
txt3 = txt2.replace(“Twin”,“Quin”,2)
print(txt3)
print(“Check if txt2 starts with t: “,txt2.startswith(“t”))
print(“Check if txt2 starts with t: “,txt2.startswith(“T”))
print(“Check if txt2 starts with t: “,txt2.endswith(“r”))
print(“Check if txt2 starts with t: “,txt2.endswith(“R”))


# LIST: linear mutable ordered collection
l1 = []
print(type(l1))
l1 = [3,4,6,8,9]
print(“Getting last 3 values:”)
print(l1[2:])
l1[0]=0
print(l1)

l2 = [“Hello”, 45, 66.6, 5j , True, [5,10,15,20]]
print(l2)
print(type(l2))
print(type(l2[3]))

print([2,4,6]+[4,8,12])
print([2,4,6] * 5)

###
for i in l2:
print(i)

fare=[]
fare.append(50) # append is used to add members to a list at the end
fare.append(60)
fare.append(45)
fare.insert(1,77)
fare.insert(1,44)
print(“Fare = “,fare)

fare.pop(2)
print(“Fare = “,fare)
fare.remove(60)
print(“Fare = “,fare)
fare.clear()
print(“Fare = “,fare)

marks = []
tot = 0
for i in range(5):
m = int(input(“Enter marks = “))
marks.append(m)
tot+=m

print(“Marks = “,marks)
print(“Total marks is”,sum(marks))
print(“Total = “,sum(marks)/len(marks))

#LIST

”’
## Assignment: Implement Stack and Queue operations using List
Stack is called as – Last in First out structure (LIFO)

Queue: First In First Out (FIFO)

”’
l1 = [2,4,8,14,16]
num= 140
if l1.count(num) >0:
print(“Index: “,l1.index(num))
else:
print(f”{num} is not in the list”)

print(l1.index(14,2,6))
print(“0. L1 = “,l1)
l2 = l1.copy() # shallow copy – creates another copy
l3 = l1 # deep copy – another name for same data
print(“1. L1 = “,l1)
print(“1. L2 = “,l2)
print(“1. L3 = “,l3)
l1.append([18,20])
l1.append(20)
print(“2. L1 = “,l1)
print(“2. L2 = “,l2)
print(“2. L3 = “,l3)

l1.extend(l3) # l1 = l1 + l3
print(l1)

l4 = [5,9,1,4,2,3,6]
l4.reverse()
print(l4)
l4.sort()
print(l4)
l4.sort(reverse=True)
print(l4)

# [[4,6,8],[9,5,2],[5,9,1]] – find highest value at each position.
l1 = [[5, 6, 8], [5, 0, 0], [5, 9, 1]]
print(“the largest element is”, max(l1))

data = []
for i in range(3):
temp = []
for j in range(3):
val = int(input(“Enter value: “))
temp.append(val)
data.append(temp)

print(“Data = “,data)
# data = [[5, 4, 3], [6, 7, 5], [9, 8, 7]]
max_list = [-999,-999,-999]
for i in range(3):
for j in range(3):
if max_list[i] < data[j][i]:
max_list[i] = data[j][i]

print(max_list)

# Assignment 2: Find the highest element for each of the sub-list

# TUPLE: Linear immutable Ordered collection
t1 = (1,2,3)
print(type(t1))
print(t1.count(3))
print(t1.index(3))
# indexing is exactly same as Str/List
for i in t1:
print(i)
a,b,c = t1
print(a,b,c)
d = [(1,2,3,4,5,6),(1,2,3,4,5,6),(1,2,3,4,5,6),(1,2,3,4,5,6)]
t1 = list(t1)
t1 = tuple(t1)

## ##
# Dictionary: Linear mutable unordered collection
# dictionary is your key:value (key value pairs)
d1 = {“Name”:“Sachin”,“Runs”:21000,3:“Mumbai”, True:“Mumbai Indians”}
print(d1)
print(d1[“Name”])
d1[“Name”] = “Sachin Tendulkar”
print(d1)

d2 = d1 #deep copy
d3 = d1.copy() #shallow copy
print(“1. D1 = “,d1)
print(“1. D2 = “,d2)
print(“1. D3 = “,d3)
t = {“Country”:“India”}
d1.update(t)
print(“2. D1 = “,d1)
print(“2. D2 = “,d2)
print(“2. D3 = “,d3)

print(d1.keys())
print(d1.values())
print(d1.items())

for i in d1.keys():
print(i)
print(“================”)
for i in d1.values():
print(i)
print(“================”)
for i in d1.items():
print(i)
print(“================”)

for i,j in d1.items():
print(i,“:”,j)
print(“================”)

d1.pop(“Name”)
print(d1)
d1.popitem()
print(d1)
## 7th Oct 2023
list1=[“Maths”,“Physics”,“Chemistry”]
Students_marks = {}
for i in range(3):
key = input(“Enter the Roll No. of the Student:”)
marks_list = []
for j in range(3):
marks = int(input(f”Enter the marks in {list1[j]}: “))
marks_list.append(marks)
t = {key:marks_list}
Students_marks.update(t)

print(“Details of marks are:\n,Students_marks)
# {‘101’: [67, 78, 89], ‘105’: [69, 71, 90], ‘110’: [90, 45, 76]}
”’
Assignment: Extend the above program to find:
1. Total of each student
2. Highest marks obtained among the given students
3. Give roll no of topper of each subject
”’
# Dictionary : unordered mutable collection
d1 = {10:“ONE”, 20:“TWO”,30:“THREE”}
d1.pop(20)
print(d1)

d2 = d1
d3 = d1.copy()
print(“1. D1 = “,d1)
print(“1. D2 = “,d2)
print(“1. D3 = “,d3)
d1.update({40:“Four”})
d1.update({50:“Five”})
print(“2. D1 = “,d1)
print(“2. D2 = “,d2)
print(“2. D3 = “,d3)

# SET : unordered collection
set1 = {5,8, 8, 5, 5,12}
print(type(set1))
print(set1)
set1.update({1,2,3})
print(set1)
set1.pop()
print(“After pop: “,set1)

l1 = [5,10,10,15,15,15,20,20,20,20]
l1 = list(set(l1))
print(“List l1 = “,l1)

# SET OPERATIONS
set1 = {1,3,5,7,6}
set2 = {2,4,6,8,7}
print(set1)
print(set2)
print(“UNION”)
print(set1.union(set2))
print(set1 | set2)
print(“INTERSECTION”)
print(set1.intersection(set2))
print(set1 & set2)
print(“SET1 – SET2”)
print(set1.difference(set2))
print(set1 – set2)
print(“SET2 – SET1”)
print(set2.difference(set1))
print(set2 – set1)

print(“SYMM SET1 – SET2”)
print(set1.symmetric_difference(set2))
print(set1 ^ set2)
print(“SYMM SET2 – SET1”)
print(set2.symmetric_difference(set1))
print(set2 ^ set1)

print(set1.isdisjoint(set2))
set4 = {10,20,30}
print(set1.isdisjoint(set4))

d1 = {“Good”:{“Excellent”,“Positive”,“Nice”}}
print(d1)

# one line loops
words = ‘abcdefghijklmnopqrstuvwxyz’
# go through words list and save to another list
words2 =[]
for w in words:
words2.append(w)

print(“Words 2: “,words2)

words3 = [w*2 for w in words]
print(“Words 3: “,words3)

words = ‘abcdefghijklmnopqrstuvwxyz’
words3 = [w*2 for w in words]
print(“Words 3: “,words3)

# one line conditions
num1 = 50
#check if its odd or even
result = “even” if num1%2==0 else “odd”
print(f”{num1} is {result})

# Functions
”’
Inbuilt functions: print(), type(), int(),…
User defined functions: we create our own functions
One line function:
”’



#this is how you define a function by name three_sentences()
def three_sentences():
print(“Who are you?”)
print(“How are you?”)
print(“Where are you?”)

def three_sentencesv1(var_name):
print(f”Who are you, {var_name}?”)
print(f”How are you, {var_name}?”)
print(f”Where are you, {var_name}?”)

def addition(num1, num2):
add = num1 + num2
#print(“Addition is”,add)
return add

#Calling the functions
result = addition(10,20)
three_sentences()
three_sentencesv1(“Mr Sachin”)
three_sentencesv1(“Ms Sindhu”)
print(“Output is”,result)
if result >= 0:
print(f”I got {result} sweets”)


# Functions
# arguments:
# required – you have to provide the values for the arguments
# positional – the order of the arguments should match
# default (like non-required values)
# keyword (like non-positional)

country = “INDIA” #global variable
#function definition
def func1(num1, num2):
print(“A block of code”)
print(“Num1 =”,num1)
print(“Num2 = “,num2)
total = num1 + num2
return total
def func2(num1, num2=1): #default value for num2
print(“A block of code”)
print(“Num1 =”,num1)
print(“Num2 = “,num2)
total = num1 * num2
return total

def func3(num1=10, num2=5): #default value for num2
print(“A block of code”)
print(“Num1 =”,num1)
print(“Num2 = “,num2)
total = num1 * num2
return total

def func4(x,y,num1=10, num2=5,num3=25,num4=85): #default value for num2
print(“A block of code”)
print(“Num1 =”,num1)
print(“Num2 = “,num2)
total = num1 * num2
return total

def test():
global country #local – local will preceed global
print(“Country is”,country)
country=“india” #local – local will preceed global
print(“Country is”, country)

# variable length arguments: using * and **
# * read as a tuple
# ** read as a dictionary
def var_len_demo(num, *names, **favorite):
print(“Variable length arguments functon”)
print(“Num is”,num)
if len(names)>0:
print(“Names = “,names)
if len(favorite) > 0:
print(“Favorites = “,favorite)

result = func1(5,10)
print(“func1 Output is “,result)
result = func2(5)
print(“func2 Output is “,result)
result = func3(5,7)
print(“func2 Output is “,result)
result = func3()
print(“func2 result that we have is “,result)

result = func3(num2=15,num1=99)
print(“func2 Output is “,result)
result = func3(num2=15)
result *=10
print(“func2 Output is “,result)
print(“func4 Output is”,func4(5,6))

# wap to check if a number is prime or not
def checkprime(num):
isPrime = True
if num<2:
isPrime = False
elif num>2:
for i in range(2,num//2+1):
if num%i==0:
isPrime=False
break
else:
isPrime=True
return isPrime

for val1 in range(15,18):
result = checkprime(val1)
if result:
print(f”{val1} is a prime number”)
else:
print(f”{val1} is not a prime number”)

print(‘generate series of prime numbers between 50000 and 51000:’)
for i in range(50000,51001):
result = checkprime(i)
if result:
print(i,end=“, “)
print()
”’
Assignment:
All the programs that you have done so far – convert them
into functions – all your core logic should be in the function
and only the values to the arguments should be passed to the
function and the output displayed in the main function.
”’
def printpattern(value,num):
for i in range(num):
for j in range(num):
print(value,end=” “)
print()

printpattern(“#”,7)
test()

# calling variable length
var_len_demo(100)
var_len_demo(11, “Sachin”,“Virat”,“Rohit”,“MS”)
var_len_demo(11, “Sachin”,“Virat”,“Rohit”,“MS”,color=“blue”,game=“badminton”,drink=“water”)
var_len_demo(11, color=“blue”,game=“badminton”,drink=“water”)
# Class and Objects
## —————–
class Book:
total_books = 0 # class member/variable

def input_data(myname):
myname.title = “” #object variable
myname.author = “” #object variable
type = “book”
myname.mytype = type

def set_data(myname):
myname.title = “Python Programming” #object variable
myname.author = “Saurav” #object variable

b1= ‘hello’
print(“Type of b1 = “,type(b1))
print(b1.upper())
book1 = Book()
print(“Type of book1 = “,type(book1))

book1.total_books += 10
print(book1.total_books)
book1.input_data() #object function
print(“Author: “,book1.author)
book1.set_data()
print(“Author: “,book1.author)


# Class and Objects
# Program 2
## —————–
class Book:
total_books = 0 # class member/variable

def input_data(self):
self.title = “” #object variable
self.author = “” #object variable
Book.total_books +=1

def set_data(self, title,author=“Saurav”):
self.title = title #object variable
self.author = author #object variable


book2 = Book()
book2.input_data()
book2.set_data(“Python Programming”,“Sachin”)
book3 = Book()
book3.input_data()
book3.set_data(“Machine Learning”,“Rohit”)
book4 = Book()
book4.input_data()
book4.set_data(“SQL Programming”)
print(“Total books = “,book2.total_books)
print(“Total books = “,book3.total_books)
print(“Total books = “,book4.total_books)
print(“Total books = “,Book.total_books)

print(“Title = “,book2.title)
print(“Title = “,book3.title)
print(“Title = “,book4.title)
#print(“Title = “,Book.title)

# class and objects

class Books:
total_books = 0 #class variable

def getdata(self,title,author): #object method
self.title = title #object variable
self.writer = author
Books.total_books +=1
print(“Hello”)

def putdata(self):
print(“Title of the book: “,self.title)
print(“Author of the book: “, self.writer)

@classmethod
def CountBooks(cls):
print(“Total books in the library are:”,cls.total_books)

b1 = Books()
b2 = Books()
print(“Total books = “,b1.total_books)
print(“Total books = “,Books.total_books)
b1.getdata(“Python”,“Rohit”)
b2.getdata(“SQL”,“Virat”)
b2.putdata()
b1.putdata()
print(“Total books = “,Books.total_books)
print(“Total books = “,b1.total_books)
print(“Total books = “,b2.total_books)
b2.CountBooks()

### ### ####
# class and objects
class Library:
def libname(self):
print(“I am being called from Library”)
class Books(Library):
total_books = 0 #class variable

def __init__(self,title,author): #object method
self.title = title #object variable
self.writer = author
Books.total_books +=1
print(“Hello”)

def putdata(self):
print(“Title of the book: “,self.title)
print(“Author of the book: “, self.writer)

@classmethod
def CountBooks(cls):
print(“Total books in the library are:”,cls.total_books)

b1 = Books(“Python”,“Rohit”) #__init__()
b2 = Books(“SQL”,“Virat”)
print(“Total books = “,b1.total_books)
print(“Total books = “,Books.total_books)
#b1.getdata(“Python”,”Rohit”)
#b2.getdata(“SQL”,”Virat”)
b2.putdata()
b1.putdata()
print(“Total books = “,Books.total_books)
print(“Total books = “,b1.total_books)
print(“Total books = “,b2.total_books)
b2.CountBooks()

”’
Properties of class and objects:
1. Inheritance: inheriting properties from another class
2. Polymorphism: Multiple forms
3. Abstraction: information/implementation hiding
4. Encapsulation: data hiding
”’
b1.libname()

class text_display(Books):
def __init__(self, title, author):
print(“INIT in text_display”)
Books.__init__(self, title, author)

def display(self):
print(“Display in text_display”)

def putdata(self):
print(“Title of the book: “)
print(“Author of the book: “)

t1 = text_display(“Waste Management”,“Dhoni”)
t1.putdata()

# Encapsulation in Python
”’
Public members (variable or method)
Protected members (_membersname): logically these members can only
be called by the derived classes. concept is there but not enforeced
Private members (__membername): they are not accessible outside the class

”’
class Library:
def __lib_data(self):
print(“Lib data from Library”)
def get_data(self):
self.__lib_data()
class Books(Library):
def book_data(self):
print(“Book data from Books”)

def get_data(self):
Library.__lib_data(self)
print(“Get data from Books”)

class Test:
def test_data(self):
print(“Test data from Test”)

b1 = Books()
b1.get_data()
## #####


# work with external files – text files
”’
Modes of opening file:
r : read the content (default)
w : write the content by deleting previous content
a : add to the existing content (append)

r+ : read and write
w+ : write and read
a+ : append and read

Writing to a text file:
write()
writelines()

Reading the content from a text file:
read()
readline()
readlines()
”’
content = ”’Twinkle Twinkle little star
How I wonder what you are
Up above the world so high
like a diamond in the sky
”’

cont_lt = [‘Ba Ba Black Sheep\n,‘Do you have any wool?\n,
‘Yes sir yes sir\n,‘Three bags full\n]
fileptr = open(“TestNov\\abc.txt”,“a”)
if fileptr.writable():
fileptr.write(content)
fileptr.writelines(cont_lt)
else:
print(“File cant be written”)

fileptr.close()

## open the file for reading
fileptr = open(“TestNov\\abc.txt”,“r”)
if fileptr.readable():
# read the content
mycontent = fileptr.read()
else:
print(“Not readable”)

print(“===================”)
print(mycontent.replace(“Twinkle”,“TWINKLE”))
mycontent = mycontent.replace(“Twinkle”,“TWINKLE”)
print(mycontent)

print(“Second reading…………”)
# go to beginning
fileptr.seek(30) #go to the position 0
mycontent = fileptr.read()
print(mycontent)
fileptr.seek(0)
print(“============ Read line =================”)
content = fileptr.readline()
print(content)
content = fileptr.readline(200)
print(content)
content = fileptr.readline()
print(content)

content = fileptr.read(200)
print(content)

fileptr.seek(0)
content_lines = fileptr.readlines()
print(content_lines)
print(“Total number of lines = “,len(content_lines))
fileptr.seek(0)
allcontent = fileptr.read()
print(“Number of characters = “,len(allcontent))
txt1 = “welcome”
print(len(txt1))
fileptr.close()

## Working with CSV files
import csv
”’
r, r+, w, w+, a, a+ : different modes of files to read
”’
fileptr = open(“TestNov/abc.csv”,“w”,newline=“”)
csv_writer = csv.writer(fileptr)
content_list = [[“SNO”,“Name”,“City”,“Runs”],
[1,“Rohit”,“Mumbai”, 3500],
[2,“Virat”,“Delhi”,4210],
[3,“Laxman”,“Hyderabad”,1990]]

for i in content_list:
csv_writer.writerow(i)
fileptr.close()

fileptr = open(“TestNov/abc.csv”,“w”,newline=“”)
csv_writer = csv.writer(fileptr,delimiter=“,”)
content_list = [[“SNO”,“Name”,“City”,“Runs”],
[1,“Rohit”,“Mumbai”, 3500],
[2,“Virat”,“Delhi”,4210],
[4,“Laxman”,“Hyderabad”,1990]]

csv_writer.writerows(content_list)
fileptr.close()
print(“Reading the CSV file\n)
fileptr = open(“TestNov/abc.csv”) #default mode is read
csv_reader = csv.reader(fileptr, delimiter=“,”)
for i in csv_reader:
print(i)
fileptr.close()

fileptr = open(“TestNov/abc.csv”) #default mode is read
csv_reader = csv.reader(fileptr, delimiter=“,”)
count=0
for i in csv_reader:
if count==0:
count+=1
else:
print(i[1])
break
fileptr.close()

### working with json file
json1 = {
“Player1”:[
{
“Name”:“Sachin”,
“City”:“Mumabi”,
“Runs”: 13980
}
]
}

import json
”’
dump: writing to a file
dumps: writing to screen
load: reading from a file
loads: reading from a string
”’
# dump – writing to a file
fileptr = open(“TestNov/abc.json”,“w”)
json.dump(json1, fileptr,indent=4)
fileptr.close()
#load – reading json content from the file
fileptr = open(“TestNov/abc.json”,“r”)
content = json.load(fileptr)
print(“printing from the file:\n,content)
#writing in json format on the screen
print(json.dumps(content,indent=4))
fileptr.close()

json_str = “””{“Player1”: [{“Name”: “Sachin”, “City”: “Mumabi”, “Runs”: 15980}]}”””
#convert above string into json format
json_json =json.loads(json_str)
#above json_json content we will display on the screen
# and also save to the file
fileptr = open(“TestNov/abc.json”,“w”)
json.dump(json_json, fileptr,indent=4)
fileptr.close()

print(json.dumps(json_json,indent=4))

### ###################

### Assingment ###
### Read India and South Africa today’s match data as a csv
## write using Python (csv.writer)
### then use reader to answer following question
### 1. Who is the top scorer
### 2. Who is the highest wicket taker
### 3. Who has the best bowling average
### 4. Who has the best strike rate



### Example
### working with json file
json1 = {
“Players”:[
{
“Name”:“Sachin”,
“City”:“Mumabi”,
“Runs”: 13980
},
{
“Name”:“Laxman”,
“City”:“Hyderabad”,
“Runs”: 15980
},
{
“Name”:“Rahul”,
“City”:“Bangalore”,
“Runs”: 11980
}
]
}

import json
”’
dump: writing to a file
dumps: writing to screen
load: reading from a file
loads: reading from a string
”’
# dump – writing to a file
fileptr = open(“TestNov/abc.json”,“w”)
json.dump(json1, fileptr,indent=4)
fileptr.close()
#load – reading json content from the file
fileptr = open(“TestNov/abc.json”,“r”)
content = json.load(fileptr)
print(“printing from the file:\n,content)
print(content[‘Players’][2][‘Name’])
#writing in json format on the screen
fileptr.close()
#databases – store and make data manipulation easy
# databases – Oracle, SQL Server, DB2, Postgres, SQLITE, MYSQL,
# language – SQL language -Structured Query Language- to access
# relational databases you need to learn SQL
# Roles – DBA (database administrators- manage),
# Database programmers (users)
# Database architects – they are responsible for creating the
# tables and storing them

# Operations on RDBMS – Read (reading the existing content – 80%)
# Add, Edit, Delete
# DB knowledge – Physical data v logical data
# all RDBMS systems will have same logical view
# ANSI – sets the standards for relational databases

# Rules to be followed by the database to be called itself RDBMS
# – data is stored in table and in row and column format, cell
#cell should have one information

#components: Table, Views, Triggers

#creating table to store values: list of column and their dattypes, primary key

#Relational DBMS – relationship between various column values
## 1 to 1
## 1 to Many or Many to 1
## Many to Many

# download and install MYSQL
# https://dev.mysql.com/downloads/installer/
# follow the instructions: download all the additional things:
## like Workbench, Sample database also install
## While installing save the root user password
## – username – root
## password – learnSQL
## servername – localhost

# Tutorial: https://dev.mysql.com/doc/refman/8.0/en/numeric-types.html

# Constraints: Primary Key, Foreign Key, Unique, Not Null, Check

Create table myemployees.departments(

DID INTEGER PRIMARY KEY,

DEPT VARCHAR(10),

DEPTCODE INTEGER,

DHEAD VARCHAR(20)

);

 

INSERT INTO myemployees.departments values(1,’Sales’,109,’Sunil’);

 

INSERT INTO myemployees.employees VALUES(1,’Rohit Sharma’,’23456′,’rohit@rohit.com’,’1985-05-21′,21000.89,1);

 

INSERT INTO myemployees.employees (EID, ENAME, SALARY,DID) 

VALUES(2,’Virat Kohli’,20000.89,1);

 

select * from myemployees.employees;


”’
Database
Exception handling

”’
”’
https://www.geeksforgeeks.org/convert-python-script-to-exe-file/

”’
import csv
fname =
input(“Enter the filename: “)

fileobj =
open(fname)
csv_content = csv.reader(fileobj,
delimiter=“,”)
print(“Content on the screen: “)
for row in csv_content:
   
print(row)
fileobj.close()

#######################################

”’
Converting Python program to exe:
https://www.datacamp.com/tutorial/two-simple-methods-to-convert-a-python-file-to-an-exe-file

”’

# from the tkinter library
from tkinter import *
# import filedialog module
from tkinter import filedialog
from tkinter import messagebox
import csv
# to read filename
filename1 = “”

def ReadCSV():

   
if filename1==“”:
        messagebox.showerror(
“File Process”,“Filename has not be selected”)
   
else:
       
print(“I am in Else!”)
        fileobj =
open(filename1)
        csv_content = csv.reader(fileobj,
delimiter=“,”)
       
print(“Content on the screen: “)
       
for row in csv_content:
           
print(row)
        fileobj.close()

# Function for opening the
# file explorer window
def browseFiles():
    filename = filedialog.askopenfilename(
initialdir=“/”,
                                         
title=“Select a File”,
                                         
filetypes=((“Text files”,
                                                     
“*.txt*”),
                                                     (
“all files”,
                                                     
“*.*”)))
   
filename1 = filename
   
# Change label contents
   
label_file_explorer.configure(text=“File Opened: ” + filename)


# Create the root window
window = Tk()

# Set window title
window.title(‘Selecting the file’)

# Set window size
window.geometry(“500×500”)

# Set window background color
window.config(background=“white”)

# Create a File Explorer label
label_file_explorer = Label(window,
                           
text=“File Explorer”,
                           
width=100, height=4,
                           
fg=“blue”)

button_explore = Button(window,
                       
text=“Browse Files”,
                       
command=browseFiles)

button_process = Button(window,
                       
text=“Analyze the file”,
                       
command=ReadCSV)

# specifying rows and columns
label_file_explorer.grid(column=1, row=1)
button_explore.grid(
column=1, row=2)
button_process.grid(
column=2, row=2)


#fname = input(“Enter the filename: “)



# Let the window wait for any events
window.mainloop()

##########  CSV File ##########
import csv
#fname = input(“Enter the filename: “)
diff_list=[]

fname1=
“D:/MyApp/B1.csv”
fname2=“D:/MyApp/B2.csv”

fileobj1 = open(fname1)
fileobj2 =
open(fname2)
csv_content1 = csv.reader(fileobj1,
delimiter=“,”)
csv_content2 = csv.reader(fileobj2,
delimiter=“,”)
print(“Content on the screen: “)

list_content1=[]
for row1 in csv_content1:
    list_content1.append(row1)

list_content2=[]
for row2 in csv_content2:
    list_content2.append(row2)


for row1 in list_content1:
   
for row2 in list_content2:
       
#print(“ROW 1 and ROW 2: “,row1,row2)
       
if row1[0]== row2[0] and row2[0]!=‘id’:
            temp_list = []
            temp_list.append(row1[
0])
           
for i in range(1,5):
               
if row1[i].isnumeric() and row2[i].isnumeric():
                    temp_list.append(
int(row1[i])-int(row2[i]))
            diff_list.append(temp_list)

print(“Final list:\n,diff_list)
fileobj1.close()
fileobj2.close()

#writing to a csv file
fileobj = open(“D:/MyApp/B3.csv”,“w”,newline=“”)
csv_writer = csv.writer(fileobj,
delimiter=‘,’)
csv_writer.writerow([
“ID”,“A”,“B”,“C”,“D”])
csv_writer.writerows(diff_list)
fileobj.close()

######

####  Exception handling  #####
## 3 types of errors:
# syntax errors
# logical errors
# runtime error = exception
#print(“Hello)
print(“Sum of 2 and 3 is “,2*3)

a=
5
b=10
if b==0:
   
print(“Cant move ahead”)
else:
   
print(“Division is “,a/b)

#
a = “6t”

b = 4
try:
    a =
int(a)
    c = a/b

except ZeroDivisionError:
   
print(“Divide by zero is not allowed”)
except ValueError:
   
print(“Numerator is not a valid number”)
except Exception:
   
print(“Some exception has occurred , not sure what”)
else:
   
print(“Value of c is”,c)

finally:
   
print(“I am in finally”)



# ZeroDivisionError: division by zero

“””
Connecting to MYSQL Database

“””
import pymysql
conn = pymysql.connect(user=“root”,host=“localhost”,password=“learnSQL”,database=“employees”)
#cursor is database object to take commands from python to DB
curobj = conn.cursor()

table1 = ”’
Create table books(
ID Integer Primary Key,
TITLE Text,
PRICE Integer
)
”’
#curobj.execute(table1)
insert1 = ”’Insert into books(ID,TITLE,PRICE) values(1,”Python Programming”, 390)”’
#curobj.execute(insert1)
insert1 = ”’Insert into books(ID,TITLE,PRICE) values(2,”SQL Programming”, 290)”’
#curobj.execute(insert1)
insert1 = ”’Insert into books(ID,TITLE,PRICE) values(3,”Java Programming”, 490)”’
#curobj.execute(insert1)
insert1 = ”’Insert into books(ID,TITLE,PRICE) values(4,”C++ Programming”, 395)”’
#curobj.execute(insert1)
insert1 = ”’Insert into books(ID,TITLE,PRICE) values(5,”C# Programming”, 492)”’
#curobj.execute(insert1)

up1 = ”’Update Books set title=”Java Language Programming” where ID=3”’
curobj.execute(up1)
del1 = ”’Delete from Books where ID=4”’
curobj.execute(del1)
conn.commit()
select1 = “Select * from books;”
curobj.execute(select1)
output = curobj.fetchall()
#print(“Total rows = “,output)

for rows in output:
print(rows)


curobj.close()
## Database – type is RDBMS – relational database management system
#table is where data is stored
# server can have multiple databases:
#SQLITE3 free database, which comes with Python installation
# doesnt have username password, cant be accessed from outside

import sqlite3
conn = sqlite3.connect(“DECDATA.SQLITE”)
# SQLITE3 DB data types tutorial:
# https://www.sqlite.org/datatype3.html
table1 = ”’
Create table books(
ID Integer,
TITLE Text,
PRICE Integer
)
”’

#cursor is database object to take commands from python to DB
curobj = conn.cursor()
#curobj.execute(table1) – already run
“””
insert1 = ”’Insert into books(ID,TITLE,PRICE) values(1,”Python Programming”, 390)”’
curobj.execute(insert1)
insert1 = ”’Insert into books(ID,TITLE,PRICE) values(2,”SQL Programming”, 290)”’
curobj.execute(insert1)
insert1 = ”’Insert into books(ID,TITLE,PRICE) values(3,”Java Programming”, 490)”’
curobj.execute(insert1)
insert1 = ”’Insert into books(ID,TITLE,PRICE) values(4,”C++ Programming”, 395)”’
curobj.execute(insert1)
insert1 = ”’Insert into books(ID,TITLE,PRICE) values(5,”C# Programming”, 492)”’
curobj.execute(insert1)
conn.commit()
“””
“””
up1 = ”’Update Books set title=”Java Language Programming” where ID=3”’
curobj.execute(up1)
conn.commit()
“””
“””
del1 = ”’Delete from Books where ID=4”’
curobj.execute(del1)
conn.commit()
“””

select1 = “Select * from Books”
output = curobj.execute(select1)
for rows in output:
print(rows[1],rows[2])
curobj.close()
Data Structure using Python
### MAPS – based on Dictionaries
import collections
d1 = {“name”:“Sachin”,“city”:“Mumbai”,“age”:50}
d2 = {“sports”:“Hockey”,“team”:“India”,“Goals”:322}

output = collections.ChainMap(d1,d2)
print(output.maps)

print(“Keys = “,list(output.keys()))
print(“Values = “,list(output.values()))
print(“Items = “,list(output.items()))

# in operator
print(“Is city in the map? “,“city” in output)
print(“Value at city = “,output[“city”])
#update
output[“city”] = “Hyderabad”
print(output)

# LINKED LIST: sequence of data elements
## Properties: Traverse, Insert & Delete
## Insert: Begining, Middle, End
class Node:
def __init__(self,data=None):
self.value = data
self.nextnode = None

class ListPointer:
def __init__(self):
self.head = None

#function to traverse through the Linked List
def traverse(self):
header = self.head
val=[]
while header is not None:
#print(“Value from the Linked List:”,header.value)
val.append(header.value)
header = header.nextnode
return val
# Inserting a node to the beginning of the linked List
def Insert_Beginning(self,newdata):
NewNode = Node(newdata)
NewNode.nextnode = self.head
self.head = NewNode
#Inserting at the end
def Insert_End(self,newdata):
NewNode = Node(newdata) #nextnode is null

#check if there is existing node or not
if self.head is None:
self.head = NewNode
return
last = self.head
while last.nextnode is not None:
last = last.nextnode
last.nextnode = NewNode

#Inserting Between 2 Nodes
def Insert_Between(self, betweennode, newdata):
if betweennode is None:
print(“The between Node is not available!”)
return

NewNode = Node(newdata)
NewNode.nextnode = betweennode.nextnode
betweennode.nextnode = NewNode

#Remove a node
def Remove(self,value):
# check for 3 conditions – First, Inbetween and Last

return


if __name__==“__main__”:
list = ListPointer() # created header
list.head = Node(“January”) # first node with value January and pointing to null
n2 = Node(“February”) # second node with value February and pointing to null
list.head.nextnode = n2 #first node is now linked to second node
n3 = Node(“March”) #third node with value March and pointing to null
n2.nextnode = n3 #second node is now linked to third node

# traversing through the list
print(list.traverse())

# add at the beginning
list.Insert_Beginning(“December”)
# traversing through the list
print(list.traverse())

#Add April at the end
list.Insert_End(“April”)
print(list.traverse())

list.Insert_Between(n3,“November”)
print(list.traverse())

#remove




# LINKED LIST: sequence of data elements
## Properties: Traverse, Insert & Delete
## Insert: Begining, Middle, End
class Node:
def __init__(self,data=None):
self.value = data
self.nextnode = None

class ListPointer:
def __init__(self):
self.head = None

#function to traverse through the Linked List
def traverse(self):
header = self.head
val=[]
while header is not None:
#print(“Value from the Linked List:”,header.value)
val.append(header.value)
header = header.nextnode
return val
# Inserting a node to the beginning of the linked List
def Insert_Beginning(self,newdata):
NewNode = Node(newdata)
NewNode.nextnode = self.head
self.head = NewNode
#Inserting at the end
def Insert_End(self,newdata):
NewNode = Node(newdata) #nextnode is null

#check if there is existing node or not
if self.head is None:
self.head = NewNode
return
last = self.head
while last.nextnode is not None:
last = last.nextnode
last.nextnode = NewNode

#Inserting Between 2 Nodes
def Insert_Between(self, betweennode, newdata):
if betweennode is None:
print(“The between Node is not available!”)
return

NewNode = Node(newdata)
NewNode.nextnode = betweennode.nextnode
betweennode.nextnode = NewNode

#Remove a node
def Remove(self,value):
# check for 3 conditions – First, Inbetween and Last
header = self.head
if header is None:
# Do nothing – linked list is blank
return

if header is not None:
if header.value == value: #first node in the list to be removed
self.head = header.nextnode
header = None
while header is not None:
if header.value == value:
break
previous = header #
header = header.nextnode
if header ==None: #you have reached last node and still value not matched
#simply it means no value matched
print(“Value not found in the linked list”)
return

previous.nextnode = header.nextnode
header = None

return


if __name__==“__main__”:
list = ListPointer() # created header
list.head = Node(“January”) # first node with value January and pointing to null
n2 = Node(“February”) # second node with value February and pointing to null
list.head.nextnode = n2 #first node is now linked to second node
n3 = Node(“March”) #third node with value March and pointing to null
n2.nextnode = n3 #second node is now linked to third node

# traversing through the list
print(list.traverse())

# add at the beginning
list.Insert_Beginning(“December”)
# traversing through the list
print(list.traverse())

#Add April at the end
list.Insert_End(“April”)
print(list.traverse())

list.Insert_Between(n3,“November”)
print(list.traverse())

#remove
list.Remove(‘December’) # first value
print(list.traverse())
list.Remove(“March1”)
print(list.traverse())



# Building Stack
# Push (last) and Pop (last)

# assignment: create a parent class for both Stack and Queue
## and implement display function
class Stack:
def __init__(self):
self.stack = []

def push(self, value):
#do not accept duplicate values
self.stack.append(value)
return
def pop(self):
# what is there is no element to remove – handle
self.stack.pop(-1)
return
def display(self):
#print(self.stack)
# handle empty
if len(self.stack) !=0:
return self.stack

class Queue:
def __init__(self):
self.queue = []

def add(self,value):
if value in self.queue:
print(“This value is already in Queue”)
return
self.queue.append(value)
def remove(self):
if len(self.queue)>0:
self.queue.pop(0)
else:
print(“Queue is empty!”)
def display(self):
if len(self.queue) !=0:
return self.queue


class DoubleLinkedList:
def __init__(self,data):
self.data = data
self.next = None
self.prev = None


def LRTraverse(self):
pass
def RLTravserse(self):
pass

if __name__ ==“__main__”:
import june2023 as st

mystack = st.Stack()
mystack.push(55)
print(mystack.display())
mystack.push(75)
print(mystack.display())
mystack.pop()
print(mystack.display())

myq = st.Queue()
myq.add(55)
print(myq.display())
myq.add(75)
print(myq.display())
myq.remove()
print(myq.display())

## DEQUEUE – Double ended queue
import collections

names = [‘Sachin’, ‘Virat’, ‘Dhoni’]
dq = collections.deque(names)
dq.pop()
print(dq)
dq.popleft()
print(dq)
dq.append()
#SORTING: BUBBLE, LINEAR, INSERTION, SHELL, SELECTION

list1 = [10,30,70,20,80,40,60,50]

for i in range(len(list1)-1):
for idx in range(0,len(list1)-1-i):
if list1[idx] > list1[idx+1]:
list1[idx],list1[idx+1] =list1[idx+1], list1[idx]

print(“Bubble Sorted List = “,list1)

list1 = [10,30,70,20,80,40,60,50]
for i in range(len(list1)-1):
for idx in range(i+1,len(list1)):
if list1[i] > list1[idx]:
list1[i],list1[idx] =list1[idx], list1[i]

print(“Linear Sorted List = “,list1)

list1 = [10,30,70,20,80,40,60,50]

def merge(left,right):
result = []
while len(left)!=0 and len(right)!=0:
if left[0] > right[0]:
result.append(right[0])
right.remove(right[0])
else:
result.append(left[0])
left.remove(left[0])
if len(left) ==0:
result+=right
else:
result+=left
return result
def divide(list1):
if len(list1)<2:
#print(“Merge Sort: “,list1)
return list1
else:
center = len(list1) //2
left_list = list1[:center]
right_list = list1[center:]
left_list = divide(left_list)
right_list = divide(right_list)
return list(merge(left_list,right_list))

print(“List 1 before Merge Sort: “,list1)
list1 = divide(list1)
print(“List 1 after Merge Sort: “,list1)
list1 = [80,70,60,50,40,30,20,10]

#for k in range(len(list1)-1):
for i in range(1,len(list1)):
j = i-1 #represents sorted array
next = list1[i] # unsorted array
while list1[j] > next and j>=0:
list1[j+1] = list1[j]
j=j-1

list1[j+1] = next

print(“Insertion Sort: “,list1)

#Shell sort
list1 = [80,70,60,50,40,30,20,10]
gap = len(list1) // 2

while gap >0:
for i in range(gap,len(list1)):
value = list1[i]
j=i #tracker
while j>=gap and list1[j-gap] > value:
list1[j] = list1[j-gap]
j=j-gap
list1[j] = value
gap=gap//2
print(list1)

## Shell sort: o(n square) BEST O(nlongn) worst case
# Search
l1 = [10,50,30,70,40,20]
num = 10
idx = 0
found = False
for i in range(len(l1)):
if num==l1[i]:
found=True
idx = i
break

if found:
print(f”{num} is in the list at {idx})
else:
print(f”{num} is not in the list”)

#
num = 10
idx = 0
found = False
while idx<len(l1) and found is False:
if num==l1[idx]:
found=True
else:
idx+=1
if found:
print(f”{num} is in the list at {idx})
else:
print(f”{num} is not in the list”)


# Binary Search: works on sorted list
l1 = [10,50,30,70,40,20]
l1.sort() #sort the list before use
print(l1)
num = 10
idx = 0
found = False
low,high = 0, len(l1)
while low<high and found is False:
mid = (low+high)//2
if l1[mid] == num:
idx=mid
found=True
elif l1[mid] > num:
high = mid-1
else:
low = mid+1
if found:
print(f”Binary Search: {num} is in the list at {idx})
else:
print(f”Binary Search: {num} is not in the list”)

## BINARY TREE

class Node:
def __init__(self,key):
self.value = key
self.left = None
self.right = None
#Preorder traversal
def pretraverse(self):
print(self.value, end=“, “)
if self.left:
self.left.pretraverse()
if self.right:
self.right.pretraverse()
def intraverse(self):
if self.left:
self.left.intraverse()
print(self.value, end=“, “)
if self.right:
self.right.intraverse()


root = Node(1)
root.left = Node(2)
root.right = Node(3)
root.left.left = Node(4)
root.left.right = Node(5)
root.right.left = Node(6)
root.right.right = Node(7)
root.pretraverse()
print(\n In traverse:”)
root.intraverse()
# Quick sort: divide and conquer
## pick a pivot element:
# 1. first number is pivot
# 2. last number is pivot
# divide the list based on this pivot value:
# left list (numbers < pivot) and right list (numbers > pivot)
l1 = [90,80,70,60,50,40,30,20,10]
def QuickSortAlgo(l1, low,high):
if low < high:
pivot = l1[high]
##logic to partition the dataset into 2 parts
i=low-1
for j in range(low, high):
if l1[j] <= pivot:
i+=1
l1[i],l1[j] = l1[j],l1[i]
l1[i+1], l1[high] = l1[high], l1[i+1]
pivot = i+1
##

#Partioning the left sub-list
QuickSortAlgo(l1,low,pivot-1)

# Partioning the right sub-list
QuickSortAlgo(l1, pivot + 1, high)



size = len(l1)
QuickSortAlgo(l1, 0,size-1)
print(“Sorted list: \n,l1)

def sum_n(n):
if n==0:
return 0
else:
return n + sum_n(n-1)

ans = sum_n(10)
print(ans)

# 10 + 9 + 8 + .. 0

def fibo(n):
if n <=1:
return n
else:
return fibo(n-2) + fibo(n-1)

# fibo(4)= fibo(2) + fibo(3) = 0 + 1 + 1 + 0 + 1 = 3
for i in range(15):
print(fibo(i))

def toh_rec(n_disk, source,inter, target):
if n_disk ==1:
print(f”Move disk 1 from Tower {source} to Tower {target})
return
toh_rec(n_disk-1, source,target, inter)
print(f”Move disk {n_disk} from Tower {source} to Tower {target})
toh_rec(n_disk – 1, inter, source, target)

disks = 3
toh_rec(disks, “Source”,“Inter”,“Target”)

## Backtracking
# CIRCULAR DOUBLY LINKED LIST

class Node:
def __init__(self, data):
self.data = data
self.next = None
self.prev = None

class CircularDLL:
def __init__(self):
self.first = None
def getnode(self,index):
current = self.first
for i in range(index):
current = current.next
if current == self.first:
return None
return current
def insertafter(self, beforenode, newnode):
newnode.prev = beforenode
newnode.next = beforenode.next
newnode.next.prev = newnode
beforenode.next = newnode

def insertbefore(self,beforenode, node):
self.insertafter(beforenode.prev, node)

def insertbegin(self,node):
self.insertend(node)
self.first = node

def insertend(self,node):
if self.first is None:
self.first = node
node.next = node
node.prev = node
else:
self.insertafter(self.first.prev)

def remove(self):
pass

def display(self):
pass

#Main Menu
print(“Type the operation you want to perform: “)
print(“insert <data> after <index>”)
print(“insert <data> before <index>”)
print(“insert <data> at beginning”)
print(“insert <data> at end”)
print(“remove <index>”)
print(“display”)
print(“quit”)
cdll = CircularDLL()
while True:
print(“The values in the Circular Doubly Linked List are: “)
cdll.display()
ch=input(“What do you want to do?”)
ch = ch.lower().strip().split()
print(“CH = “,ch)
if ch[0]==“insert”:
value = int(ch[1])
newNode = Node(value)

if ch[2]==“after”:
data = cdll.getnode(int(ch[3]))
if data is None:
print(“Given index doesnt exist”)
else:
cdll.insertafter(data, newNode)

elif ch[2]==“before”:
cdll.insertbefore(newNode)
elif ch[2]==“at”:
if ch[3]==“beginning”:
cdll.insertbegin(newNode)

elif ch[3]==“end”:
cdll.insertend(newNode)
else:
print(“Error! Try again!”)
continue
else:
print(“Error! Try again!”)
continue
elif ch[0]==“remove”:
idx = int(ch[1])
idx_node = cdll.getnode(idx)
if idx_node is None:
print(“No such index, cant perform remove option.”)
continue
# if condition is false
cdll.remove()
elif ch[0]==“display”:
print(“The values in the Circular Doubly Linked List are: “)
cdll.display()

elif ch[0]==“quit”:
break
else:
print(“Invalid option, try again!”)
# CIRCULAR DOUBLY LINKED LIST

class Node:
def __init__(self, data):
self.data = data
self.next = None
self.prev = None

class CircularDLL:
def __init__(self):
self.first = None
def getnode(self,index):
current = self.first
for i in range(index):
current = current.next
if current == self.first:
return None
return current
def insertafter(self, beforenode, newnode):
newnode.prev = beforenode
newnode.next = beforenode.next
newnode.next.prev = newnode
beforenode.next = newnode

def insertbefore(self,beforenode, node):
self.insertafter(beforenode.prev, node)

def insertbegin(self,node):
self.insertend(node)
self.first = node

def insertend(self,node):
if self.first is None:
self.first = node
node.next = node
node.prev = node
else:
self.insertafter(self.first.prev,node)

def remove(self, node):
if self.first.next == self.first:
self.first = None
else:
node.prev.next = node.next
node.next.prev = node.prev
if self.first == node:
self.first = node.next # node.prev

def display(self):
if self.first is None:
print(“NO VALUES”)
return
current = self.first
print(\n)
while True:
print(current.data, end=‘, ‘)
current = current.next
if current == self.first:
break
print(\n)
#Main Menu
print(“Type the operation you want to perform: “)
print(“insert <data> after <index>”)
print(“insert <data> before <index>”)
print(“insert <data> at beginning”)
print(“insert <data> at end”)
print(“remove <index>”)
print(“display”)
print(“quit”)
cdll = CircularDLL()
while True:
print(“The values in the Circular Doubly Linked List are: “)
cdll.display()
ch=input(“What do you want to do?”)
ch = ch.lower().strip().split()
print(“CH = “,ch)
if ch[0]==“insert”:
value = int(ch[1])
newNode = Node(value)

if ch[2]==“after”:
data = cdll.getnode(int(ch[3]))
if data is None:
print(“Given index doesnt exist”)
else:
cdll.insertafter(data, newNode)

elif ch[2]==“before”:
ref_node = cdll.getnode(int(ch[3]))
cdll.insertbefore(ref_node,newNode)
elif ch[2]==“at”:
if ch[3]==“beginning”:
cdll.insertbegin(newNode)

elif ch[3]==“end”:
cdll.insertend(newNode)
else:
print(“Error! Try again!”)
continue
else:
print(“Error! Try again!”)
continue
elif ch[0]==“remove”:
idx = int(ch[1])
idx_node = cdll.getnode(idx)
if idx_node is None:
print(“No such index, cant perform remove option.”)
continue
# if condition is false
cdll.remove(idx_node)
elif ch[0]==“display”:
print(“The values in the Circular Doubly Linked List are: “)
cdll.display()

elif ch[0]==“quit”:
break
else:
print(“Invalid option, try again!”)

## Assignment: Modify above code to create CircularQueue

## ## PRIORITY QUEUE ## ##
#Priority Queue
from queue import PriorityQueue
apq = PriorityQueue()
apq.put(50)
apq.put(30)
apq.put(40)

print(“Printing elements of Priority Queue:\n)
while not apq.empty():
print(apq.get(), end=“, “)
print()
print(“Adding values with Priority:”)
apq = PriorityQueue()
apq.put((5, “Study”)) # combination of priority and values are added as tuple
apq.put((2, “Sleep”))
apq.put((9, “Eat”))
apq.put((4, “Play”))
print(“2. Printing elements of Priority Queue:\n)
while not apq.empty():
print(apq.get(), end=“, “)
print()

### ### ### ###

class BST:
def __init__(self, value):
self.value = value
self.left = None
self.right = None

def bst_insert(node,key):
if node is None:
return BST(key)
if node.value <key:
node.right = bst_insert(node.right, key)
if node.value >key:
node.left = bst_insert(node.left, key)
return node

def bst_search(node, key):
if node is None or node.value == key:
return root

if node.value < key:
return bst_search(node.right, key)
if node.value > key:
return bst_search(node.left, key)

# 50, 30, 10, 70, 90, 80, 15, 25, 35
if __name__ == “__main__”:
root = None
root = bst_insert(root,50)
bst_insert(root,30)
bst_insert(root, 10)
bst_insert(root, 70)
bst_insert(root, 90)
bst_insert(root, 80)
bst_insert(root, 15)
bst_insert(root, 25)
bst_insert(root, 35)

key = 70
isFound = bst_search(root,key)
if isFound is None:
print(“Value “,key,“not in the BST!”)
else:
print(“Value”,key,” is found in the BST!”)

key = 75
isFound = bst_search(root, key)
if isFound is None:
print(“Value”,key,” not in the BST!”)
else:
print(“Value”,key,“is found in the BST!”)

# insertion, deletion and displaying the tree
# left, node, right
JUNE 2023 DataScience Tutorial PART- II

use employees;

— create table, drop table, alter table

— to read data we use Select

Select * from departments;

 

insert into departments (DID, HOD, DNAME, DCODE) values (100,’Sachin’,’CSE’,’AA00′);

insert into departments  values (101,’MEC’,’Virat’,’AZ00′);

 

insert into departments  values (103,’ECE’,’Rohit’,’KZ00′);

insert into departments  values (105,’CIV’,’Dhoni’,’CZ00′);

insert into departments  values (106,’TCE’,’Sunil’,’BZ00′);

 

Select * from employees;

 

Select @@GLOBAL.secure_file_priv;

 

— Import data into CSV

LOAD DATA INFILE ‘C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/EmployeesData.csv’

INTO TABLE EMPLOYEES

FIELDS TERMINATED BY ‘,’

ENCLOSED BY ‘”‘

LINES TERMINATED BY ‘\n’

IGNORE 1 ROWS;

 

— DELETE to remove a row

DELETE FROM EMPLOYEES WHERE EMPID=120;

 

— Update to modify the existing values in a row

Update Employees

Set Bonus_PCT = 0.45, Salary = 160000

where empid=122;

 

Select * from employees where empid=122

 

—  SELECT

Select FNAME, EMAIL, PHONE, SALARY From Employees;

 

Select FNAME, EMAIL, PHONE, SALARY From Employees where salary >=100000;

 

Select FNAME, EMAIL, PHONE, SALARY From Employees where EMAIL =’ramakrishnavendra@wnbco.co’;

 

— Create Index to make query faster

Create Index idx_salary

on Employees (Salary)

 

—  Relational operators in MYSQL:  =   >   <   >=   <=   <>

Select FNAME, EMAIL, PHONE, SALARY From Employees where EMAIL <>’ramakrishnavendra@wnbco.co’;

 

— BETWEEN  LIKE  IN

 

use employees;

 

select * from employees;

 

select FNAME, LNAME, DOB, Salary from employees where salary > 75000;

— BETWEEN is used for numbers

select FNAME, LNAME, DOB, Salary from employees where salary BETWEEN 75000 AND  95000;

— below statement is same as above (BETWEEN)

select FNAME, LNAME, DOB, Salary from employees where salary >= 75000 AND  salary <= 95000;

 

select FNAME, LNAME, DOB, Salary from employees where salary >= 75000 AND  Bonus_pct <= 0.4; — 0 to 9 rows

 

select FNAME, LNAME, DOB, Salary from employees where salary >= 75000 OR  Bonus_pct <= 0.4; — 25 to 34 rows

 

select FNAME, LNAME, DOB, Salary from employees where salary >= 75000   — 25 rows

 

select FNAME, LNAME, DOB, Salary from employees where  Bonus_pct <= 0.4;   — 9 rows

 

— LIKE – for text data comparison

Select * from employees where FNAME like ‘A%’

 

— IN – checking for given set of values

Select * from employees where FNAME IN (‘O%’, ‘A%’,’B%’);

 

Select * from employees where FNAME IN (‘Anikeet Dey’,’Amol Jain’,’Suresh Narayan Singh Yadav’ );

 

Select * from employees where SALARY IN (36107,  110266, 107799, 198890);

—  Logical operator – AND OR NOT

select * from employees where not( deptid = 100 and salary >100000)

 

—  ORDER BY

select * from employees order by salary  DESC;

 

select * from employees order by salary  ASC;  — Ascending order is by default

 

select * from employees order by DEPTID  ASC, Salary DESC;

use employees;

 

select * from employees where bonus_pct is not null;

 

select * from employees order by FNAME;

select * from employees order by FNAME DESC;

 

UPDATE employees SET BONUS_PCT = NULL WHERE BONUS_PCT > 0.88;

 

select * from employees where bonus_pct is not null limit 3;

 

select * from employees where bonus_pct is not null order by salary DESC limit 5;

 

select avg(salary) as Max_Salary, deptid from employees group by deptid;

 

select avg(salary), count(salary), sum(salary) from employees;

 

select * from employees where salary > (select avg(salary) from employees);

 

— Find highest salary earner for each department

select deptid, MAX(salary) as max_salary FROM employees group by deptid;

 

select deptid, fname, salary from employees where (Deptid, salary)  in 

(select deptid, MAX(salary) as max_salary FROM employees group by deptid);

 

— Questions for Bala:

— 1. Top 5 run scorer

— 2. Top 5 wickets taker

— 3. Top 5 catch takers

— 4. How many matches each team has won and lost

— 5. How many teams won and toss but lost the matches

— 6. How many teams lost by batting first

— 7. How many matches were interrupted by rain

— 8. Top run scorer for each team

— 9. Top wicket takers for each team

— 10. Top catch takers for each team

 

— ——————– ——–

— Questions for Vinay:

— 1. How much is my daily sales?

— 2. Top 5 contributing product by sales

— 3. Top 5 contributing product by count

— 4. Breakup of Payment mode (card/cash/upi/others)

— 5. Top 5 highest priced products

— 6. Which day of the week do more people tend to partake in online grocery shopping

— 7. Top best selling categories (number of transactions per category)

— 8. Top best selling categories (amount of sales)

— 9. Top 20% categories contribution to % of sales

— 10. What is the average sales for each of the 7 days.

— ——————– ——–

 

use employees;

 

select deptid, avg(salary) as “Average Salary” from employees group by deptid;

— COUNT, AVG, SUM, MIN, MAX

 

— Like:  %a, a%,  %a% ,  %b__% (exactly 1 value for _),   ‘a%z’

select fname, email, salary from employees;

 

select fname, email, salary from employees where fname like ‘%ol%’;

 

— J followed by exactly three characters

select fname, email, salary from employees where fname like ‘%J___’;

 

— J followed by atleast four characters

select fname, email, salary from employees where fname like ‘%J____%’;

 

select * from employees where fname in (‘Pankaj Prajapati’,’Manju Mishra’ ,’Arijeet Dasgupta’);

 

select * from employees where fname in (select fname from employees where fname like ‘%J____%’);

select * from employees where (fname,salary) in (select fname,salary from employees where fname like ‘%J____%’);

 

select fname, email, salary from employees where fname like ‘%dey’;

 

 

 

— Questions for Karan

— How many books are currently issued

— Who has read the maximum number of books

— Is there a member who has not issued a book so far

— How many books have been issued for more than a month

— List of books with more than 2000 rupees cost

— List of Books older than 20 years

— The most read books of the library

— The most read genre of library

— Average books that are issued per day

— How many readers have not returned the book on time so far

 

— —————- —- 

— combining data from 2 tables

use employees;

select * from employees;

select employees.FNAME, DID, departments.DNAME from employees, departments

 

 

select employees.FNAME, DID, departments.DNAME from employees, departments where employees.deptid = departments.did;

 

insert into departments values (107, ‘BIOT’,’Kapil’,’BB09′)

 

insert into employees (EMPID, FNAME, LNAME, DOB, EMAIL,PHONE, DOJ,SALARY) 

values (133,’Sachin Tendulkar’,’T’,’1990-05-04′,’sachin@wnbco.co’,9999000009,’2023-05-15′,231456);

 

select * from departments;

 

select employees.FNAME, DID, departments.DNAME from employees, departments where employees.deptid = departments.did;

 

 

— JOINS

 

use employees;

 

select * from employees;

 

select * from departments;

 

select fname, hod from employees, departments where departments.did = employees.DEPTID;  — Inner

 

select fname, hod from employees, departments where employees.DEPTID = departments.did order by empid;

 

select fname, hod from employees INNER JOIN departments ON employees.DEPTID = departments.did order by empid;

 

SELECT T1.COL1, T2.COL2, T3.COL3

FROM (((T1 INNER JOIN T2 ON T1.K1 = T2.K1) INNER JOIN T3 ON T2.K2 = T3.K2) INNER JOIN T4 ON T3.K3 = T4.K3);

 

 

select fname, hod from employees LEFT JOIN departments ON employees.DEPTID = departments.did order by empid;

 

use employees;

 

select fname, hod from employees left join departments on employees.DEPTID = departments.did

UNION

select fname, hod from departments left join employees  on employees.DEPTID = departments.did;

 

 

select fname, hod from departments inner join employees  on employees.DEPTID = departments.did;

 

select * from employees;

 

select sum(EMPID) from employees;

 

select count(EMPID) from employees;

 

select count(EMPID) from employees where salary > 50000;

 

select count(EMPID) , DEPTID from employees

group by deptid

having count(EMPID) > 5;

 

select count(fname), hod from employees left join departments on employees.DEPTID = departments.did

group by deptid

having count(fname) > 3;

 

 

select * from employees;

— Salary_Grade:  <50K: E 50-100K: D    100-150K – C  150-200-B  >200K: A

 

select fname, salary, bonus_pct, 

case when bonus_pct is null then salary

else salary+salary*bonus_pct 

end as total_salary from employees;

 

select fname, salary, 

CASE

when salary < 50000 Then ‘Grade: E’

    when salary > 200000 Then ‘Grade: A’

    when salary > 150000 Then ‘Grade: B’

    when salary > 100000 Then ‘Grade: C’

    Else ‘Grade: D’

End As Salary_Grade

from employees;

use employees;

 

select * from employees;

 

select FNAME, DOB, DOJ, datediff(doj,dob) from employees;

 

 

select fname, NOW() from employees;

 

select now(); — current date and time (system)

 

select date(now());

select curdate();

— default date format in MYSQL is  YYYY-MM-DD

— DD-MM-YYYY

select date_format(curdate(), ‘%d-%m-%Y’) as Date;

 

select curdate() as todays_date, date_add(curdate(),interval 1 Day) as Tomorrow,

date_add(curdate(),interval 1 Week) as next_week,

date_add(curdate(),interval 1 Month) as Next_month,

date_add(curdate(),interval 1 Year) as Next_year;

 

— Add date: Interval, day, week or month or year;

select curdate() as todays_date, date_sub(curdate(),interval 1 Day) as Yesterday,

date_sub(curdate(),interval 1 Week) as last_week,

date_sub(curdate(),interval 1 Month) as last_month,

date_sub(curdate(),interval 1 Year) as last_year;

 

 

select day(curdate()) as day, month(curdate()) as Month, quarter(curdate()) as Quarter, Year(curdate()) as Year,

Weekday(curdate()) as Weekday, week(curdate()) as week, weekofyear(curdate()) as WeekofYear;

 

 

select * from employees where salary > (select salary from employees where empid = 104);

 

—  create a stored procedure

DELIMITER $$

 

Create Procedure GetEmpInfo()

Begin

select * from employees where salary > (select salary from employees where empid = 104);

End $$

DELIMITER ;

 

call GetEmpInfo();

 

drop Procedure GetEmpInfo;

 

— Features: exception handling, conditions and loops (While, Repeat), 

 

— Creating a View

create view EmpData

as

Select EMPID, FNAME, LNAME, DOB, EMAIL, PHONE, DOJ, DEPTID from Employees;

 

select * from empdata;

 

 

—  String functions:  LTRIM, RTRIM, Replace, substring, COncat

select concat(fname,’.’,lname,’@wnbco.co’) from employees;

 

select fname, substring(fname, 3), substring(fname, -3) from employees;

 
import pymysql
con_str = pymysql.connect(host=“localhost”, user=“root”, password=“learnSQL”,database=“employees”)
cursor = con_str.cursor()

q1 = ”’Create Table PY_EMP (
EMPID INT PRIMARY KEY,
NAME VARCHAR(30),
CITY VARCHAR(15))
”’
#cursor.execute(q1)

q2 = ”’INSERT INTO PY_EMP VALUES(1,’Laxman’,’Hyderabad’)”’
cursor.execute(q2)
q2 = ”’INSERT INTO PY_EMP VALUES(2,’Rahul’,’Bangalore’)”’
cursor.execute(q2)


q3 = ”’Select * from Py_Emp”’
cursor.execute(q3)
results = cursor.fetchall()
for row in results:
print(row)

con_str.commit()

con_str.close()
# NUMPY
# pip install numpy
import numpy as np
nums = range(16)
nums = np.reshape(nums,(8,2))
print(nums)
nums = np.reshape(nums,(4,4))
print(nums)
print(“Shape: Rows = “,nums.shape[0], “and columns = “,nums.shape[1])
# indexing
print(nums[1,2], nums[-3,-2])
print(nums[1]) # 2nd row
print(nums[:,1]) # : rows from 0th to (n-1)th
print(nums[-1], nums[:,-2], nums[-1,-2])

# to give your own set of values, you need to provide in terms of list
l1 = [[1,5,7],[2,4,9],[1,1,3],[3,3,2]]
# array is a function to convert list into numpy
mat1 = np.array(l1)
print(mat1)

print(np.zeros((3,3)))
print(np.ones((3,3)))
print(np.full((5,7),2.0))
print(np.full((5,7),9))

# eye – identity matrix: square matrix with 1 on its main diagonal
mat1 = np.eye(5)
print(mat1)

# NUMPY
import numpy as np
# to give your own set of values, you need to provide in terms of list
l1 = [[1,5,7],[2,4,9],[1,1,3],[3,3,2]]
# array is a function to convert list into numpy
mat1 = np.array(l1) # 4 * 3 – shape
print(mat1)
l2 = [[2,3,4],[2,1,2],[5,2,3],[3,2,2]]
# array is a function to convert list into numpy
mat2 = np.array(l2)
print(mat2)

# Matrices operations
print(mat1 + mat2)
print(np.add(mat1, mat2))

print(mat1 – mat2)
print(np.subtract(mat1, mat2))

print(mat1 * mat2)
print(np.multiply(mat1, mat2))

print(mat1 / mat2)
print(np.divide(mat1, mat2))

# actual matrix multiplication is done using matmul()
l3 = [[2,3,4],[2,1,2],[5,2,3]]
# array is a function to convert list into numpy
mat3 = np.array(l3)
print(mat3)
print(“Matrix Multiplication”)
print(np.matmul(mat1, mat3))
print(mat1 @ mat3)
## calculating determinant

l4 = [[1,3,5],[1,3,1],[2,3,4]]
mat5 = np.array(l4)
det_mat5 = np.linalg.det(mat5)
print(“Determinant of matrix 5 is”,det_mat5)
print(“Inverse of matrix 5 is: \n,np.linalg.inv(mat5))

”’
Linear Algebra Equation:
x1 + 5×2 = 7
-2×1 – 7×2 = -5

x1 = -8, x2= 3,
”’
coeff_mat = np.array([[1,5],[-2,-7]])
#var_mat = np.array([[x1],[x2]])
result_mat = np.array([[7],[-5]])
# equation here is coeff_mat * var_mat = result_mat [eg: 5 * x = 10]
# which is, var_mat = coeff_mat inv * result_mat
det_coeff_mat = np.linalg.det(coeff_mat)
if det_coeff_mat !=0:
var_mat = np.linalg.inv(coeff_mat) @ result_mat
print(“X1 = “,var_mat[0,0])
print(“X2 = “,var_mat[1,0])
else:
print(“Solution is not possible”)

# # scipy = scientific python
# pip install scipy
”’
#Inequality = OPTIMIZATION or MAXIMIZATION / MINIMIZATION PROBLEM
Computer Parts Assembly:
Laptops & Desktops
profit: 1000, 600
objective: either maximize profit or minimize cost

constraints:
1. Demand: 500, 600
2. Parts: Memory card: 5000 cards available
3. Manpower: 25000 minutes


”’
”’
Optimization using Scipy
let’s assume d = desktop, n = notebooks

Constraints:
1. d + n <= 10000
2. 2d + n <= 15000
3. 3d + 4n <= 25000

profit: 1000 d + 750 n => maximize
-1000d – 750 n =>minimize

”’
import numpy as np
from scipy.optimize import minimize, linprog
d = 1
n = 1
profit_d = 1000
profit_n = 750
profit = d * profit_d + n * profit_n
obj = [-profit_d, -profit_n]
lhs_con = [[1,1],[2,1],[3,4]]
rhs_con = [10000, 15000, 25000]

boundary = [(0, float(“inf”)), # boundary condition for # of desktops
(10, 200000)] # we just added some limit for notebooks
opt = linprog(c=obj, A_ub=lhs_con, b_ub=rhs_con, bounds=boundary, method=“revised simplex”)
print(opt)
if opt.success:
print(f”Number of desktops = {opt.x[0]} and number of laptops = {opt.x[1]})
print(“Maximum profit that can be generated = “,-1 * opt.fun)
else:
print(“Solution can not be generated”)

### ### ### PANDAS
# Pandas – dataframe which resembles Table structure
# pip install pandas
import pandas as pd
df1 = pd.DataFrame()
print(df1)
print(type(df1))

# fruit production
data = [[“Apple”, 15000, 11000,6000],
[“Banana”, 18000,22000,29000],
[“Mango”, 2, 900, 19000],
[“Guava”, 19000,11000,25000]]

fruit_production = pd.DataFrame(data)
print(fruit_production)
print(“Slicing 1:\n)
print(fruit_production.iloc[1:3,2:]) #based on index
print(“Slicing 2:\n)
print(fruit_production.loc[1:3,2:]) #based on title(names)

fruit_production = pd.DataFrame(data,
columns=[“Fruits”,“January”,“February”,“March”])
print(fruit_production)

fruit_production = pd.DataFrame(data,
columns=[“Fruits”,“January”,“February”,“March”],
index=[“Fruit 1”,“Fruit 2”,“Fruit 3”,“Fruit 4”])
print(fruit_production)

## dataframe.loc() dataframe.iloc()

print(“Slicing 1:\n)
print(fruit_production.iloc[1:3,2:]) #based on index
print(“Slicing 2:\n)
print(fruit_production.loc[[“Fruit 2”, “Fruit 3”],[“February”,“March”]]) #based on title(names)

### ###
# pandas
# pip install pandas
import pandas as pd
l1 = [10,20,30,40,50]
l1 = [[“Sachin”,101,20000,“BATSMAN”],[“Kapil”,501,12000,“BOWLER”],
[“Sunil”,12,21000,“BATSMAN”],[“Zaheer”,725,2000,“BOWLER”]]
df1 = pd.DataFrame(l1,columns=[“Player”,“Wickets”,“Runs”,“Type”],
index=[“Player 1”,“Player 2”,“Player 3”,“Player 4”])
print(df1)

d1 = {‘Apple’:[12000,11000,13000],
‘Banana’: [17000,18000,19000],
‘Mango’:[11000,13000,15000]}
df2 = pd.DataFrame(d1)
print(df2)

# creating dataframe from list of dictionary
data1 = [{“Guava”:9000, “Oranges”: 5000},
{“Guava”:8000, “Oranges”: 7000},
{“Guava”:10000, “Oranges”: 6000}]
df3 = pd.DataFrame(data1)
print(df3)

print(df3.iloc[0,:]) #first row and all column values
print(df3.iloc[:,0])

print(df2.iloc[:,0:2])
print(df2.iloc[[0,2],[0,2]])

#
print(df2.loc[[0,2],[“Apple”,“Mango”]])
print(df1.loc[[“Player 1”,“Player 4”],[“Player”,“Runs”]])

df2.iloc[2,0] = 14000
print(df2)
print(“========= DF1 =============”)
df1[‘Avg’] = df1[‘Runs’] / df1[“Wickets”]
print(df1)
print(“Reading data from DF1: “)
df4 = df1[df1.Player !=‘Sachin’] #filter where clause
print(\n\n New dataset without Sachin: \n, df4)
df1 = df1.drop(“Player”,axis=1) # axis default is 0
# unlike pop() and del – drop() returns a new dataframe
print(df1)


print(“Average Wickets of all the players = “,df1[‘Wickets’].mean())
print(“Average Wickets of players by type = \n\n,df1.groupby(‘Type’).mean())
# axis = 0 refers to rows
# axis = 1 refers to columns

print(\n\nDropping columns from DF1: “)
del df1[‘Wickets’] #dropping column Wickets using del
print(df1)

df1.pop(‘Runs’) #dropping column using pop
print(df1)
#
import pandas as pd

ud_df = pd.read_csv(“D:/datasets/gitdataset/user_device.csv”)
print(ud_df) # 272 rows x 6 columns
print(“Rows: “,ud_df.shape[0])
print(“Columns: “,ud_df.shape[1])

print(ud_df.tail(1))
print(ud_df.head(1))

use_df = pd.read_csv(“D:/datasets/gitdataset/user_usage.csv”)
print(use_df) # 240 rows x 4 columns

result_df = pd.merge(use_df[[‘use_id’,‘monthly_mb’,‘outgoing_sms_per_month’,
‘outgoing_mins_per_month’]], ud_df,
on=‘use_id’)
print(result_df) # [159 rows x 9 columns] = ud_df: 159 + 113, use_df = 159 + 81

result_df = pd.merge(use_df[[‘use_id’,‘monthly_mb’,‘outgoing_sms_per_month’,
‘outgoing_mins_per_month’]], ud_df,
on=‘use_id’, how=‘outer’)
print(result_df)

result_df = pd.merge(use_df[[‘use_id’,‘monthly_mb’,‘outgoing_sms_per_month’,
‘outgoing_mins_per_month’]], ud_df,
on=‘use_id’, how=‘left’)
print(result_df)

result_df = pd.merge(use_df[[‘use_id’,‘monthly_mb’,‘outgoing_sms_per_month’,
‘outgoing_mins_per_month’]], ud_df,
on=‘use_id’, how=‘right’)
print(result_df)

## Working with Pandas – Example ##
import pandas as pd
import numpy as np
df = pd.read_csv(“D:/datasets/gitdataset/hotel_bookings.csv”)
print(df.shape)
print(df.dtypes)
”’
numeric – int, float
categorical – 1) Nominal – there is no order 2) Ordinal – here order is imp
”’
df_numeric = df.select_dtypes(include=[np.number])
print(df_numeric)

df_object= df.select_dtypes(exclude=[np.number])
print(df_object) # categorical and date columns

print(df.columns)
for col in df.columns:
missing = np.mean(df[col].isnull())
if missing >0:
print(f”{col}{missing})
”’
Phases:
1. Business objective
2. Collect the relevant data
3. Preprocessing – making data ready for use
a. Handle missing values
b. Feature scaling – scale the values in the column to similar range
c. Outliers / data correction
d. handling categorical data:
i. Encode the data to convert text to number
East = 0, North = 1, South = 2, West = 3
ii. Column Transform into multple columns
iii. Delete any one column
4. EDA- Exploratory Data Analysis: to understand the data
5. MODEL BUILDING – Divide the train and test


”’
import pandas as pd
df = pd.read_csv(“https://raw.githubusercontent.com/swapnilsaurav/MachineLearning/master/1_Data_PreProcessing.csv”)
print(df)
Phases:
1. Business objective
2. Collect the relevant data
3. Preprocessing – making data ready for use
a. Handle missing values
b. Feature scaling – scale the values in the column to similar range
c. Outliers / data correction
d. handling categorical data:
i. Encode the data to convert text to number
East = 0, North = 1, South = 2, West = 3
ii. Column Transform into multple columns
iii. Delete any one column
4. EDA- Exploratory Data Analysis: to understand the data
5. MODEL BUILDING –
a. Divide the train and test
b. Run the model
6. EVALUATE THE MODEL:
a. Measure the performance of each algorithm on the test data
b. Metric to compare: based on Regression (MSE, RMSE, R square) or
classification (confusion matrix -accuracy, sensitivity..)
c. select the best performing model
7. DEPLOY THE BEST PERFORMING MODEL

Hypothesis test:
1. Null Hypothesis (H0): starting statement (objective)
Alternate Hypethesis (H1): Alternate of H0

Z or T test:
Chi square test: both are categorical

e.g. North zone: 50 WIN 5 LOSS – p = 0.005

# simple (single value) v composite (specifies range)
# two tailed test v one tailed test [H0: mean = 0,
H1 Left Tailed: mean <0
H1 Right Tailed: mean >0
# level of significance:
alpha value: confidence interval – 95%
p value: p value <0.05 – we reject Null Hypothesis
import pandas as pd
df = pd.read_csv(“https://raw.githubusercontent.com/swapnilsaurav/MachineLearning/master/1_Data_PreProcessing.csv”)
X = df.iloc[:,:3].values
y = df.iloc[:,3].values
#print(“X: \n”)
#print(X)
#print(“Y: \n”)
#print(y)

# scikit-learn package to perform ML
# install the package by: pip install scikit-learn
# but when you import, its sklearn

# Complete tutorial on sklearn:
# https://scikit-learn.org/stable/

# 1. Replace the missing values with mean value
from sklearn.impute import SimpleImputer
import numpy as np
imputer = SimpleImputer(missing_values=np.nan, strategy=‘mean’)
imputer = imputer.fit(X[:,1:3])
X[:,1:3] = imputer.transform(X[:,1:3])
#print(X)

# 2. Handling categorical values
# encoding
from sklearn.preprocessing import LabelEncoder
lc = LabelEncoder()
X[:,0] = lc.fit_transform(X[:,0])
print(X)
import pandas as pd
df = pd.read_csv(“https://raw.githubusercontent.com/swapnilsaurav/MachineLearning/master/1_Data_PreProcessing.csv”)
X = df.iloc[:,:3].values
y = df.iloc[:,3].values
#print(“X: \n”)
#print(X)
#print(“Y: \n”)
#print(y)

# scikit-learn package to perform ML
# install the package by: pip install scikit-learn
# but when you import, its sklearn

# Complete tutorial on sklearn:
# https://scikit-learn.org/stable/

# 1. Replace the missing values with mean value
from sklearn.impute import SimpleImputer
import numpy as np
imputer = SimpleImputer(missing_values=np.nan, strategy=‘mean’)
imputer = imputer.fit(X[:,1:3])
X[:,1:3] = imputer.transform(X[:,1:3])
#print(X)

# 2. Handling categorical values
# encoding
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
lc = LabelEncoder()
X[:,0] = lc.fit_transform(X[:,0])

from sklearn.compose import ColumnTransformer
transform = ColumnTransformer([(‘one_hot_encoder’, OneHotEncoder(),[0])],remainder=‘passthrough’)
X=transform.fit_transform(X)
X = X[:,1:] # dropped one column
#print(X)

# 3. splitting it into train and test test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)
print(X_train)
# 4. Scaling / Normalization
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
X_train = scale.fit_transform(X_train[:,3:])
X_test = scale.fit_transform(X_test[:,3:])
print(X_train)
”’
Regression: Output (Marks) is a continous variable
Algorithm: Simple (as it has only 1 X column) Linear (assuming that dataset is linear) Regression
X – independent variable(s)
Y – dependent variable
”’
import pandas as pd
import matplotlib.pyplot as plt
link = “https://raw.githubusercontent.com/swapnilsaurav/MachineLearning/master/2_Marks_Data.csv”
df = pd.read_csv(link)
X = df.iloc[:,:1].values
y = df.iloc[:,1].values

”’
# 1. Replace the missing values with mean value
from sklearn.impute import SimpleImputer
import numpy as np
imputer = SimpleImputer(missing_values=np.nan, strategy=’mean’)
imputer = imputer.fit(X[:,1:3])
X[:,1:3] = imputer.transform(X[:,1:3])
#print(X)

# 2. Handling categorical values
# encoding
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
lc = LabelEncoder()
X[:,0] = lc.fit_transform(X[:,0])

from sklearn.compose import ColumnTransformer
transform = ColumnTransformer([(‘one_hot_encoder’, OneHotEncoder(),[0])],remainder=’passthrough’)
X=transform.fit_transform(X)
X = X[:,1:] # dropped one column
#print(X)
”’

# EDA – Exploratory Data Analysis
plt.scatter(x=df[‘Hours’],y=df[‘Marks’])
plt.show()
”’
Scatter plots – shows relationship between X and Y variables. You can have:
1. Positive correlation:
2. Negative correlation:
3. No Correlation
4. Correlation: 0 to +/- 1
5. Correlation value: 0 to +/- 0.5 : no correlation
6. Strong correlation value will be closer to +/- 1
7. Equation: straight line => y = mx + c
”’
# 3. splitting it into train and test test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=100)
print(X_train)

”’
# 4. Scaling / Normalization
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
X_train = scale.fit_transform(X_train[:,3:])
X_test = scale.fit_transform(X_test[:,3:])
print(X_train)
”’

## RUN THE MODEL
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
# fit – train the model
regressor.fit(X_train, y_train)
print(f”M/Coefficient/Slope = {regressor.coef_} and the Constant = {regressor.intercept_})

# y = 7.5709072 X + 20.1999196152844
# M/Coefficient/Slope = [7.49202113] and the Constant = 21.593606679699406

y_pred = regressor.predict(X_test)
result_df =pd.DataFrame({‘Actual’: y_test, ‘Predicted’: y_pred})
print(result_df)

# Analyze the output
Mini Python Projects

BILLING PROJECT USING CSV FILE

file = “expenses.csv”
import csv
def add_details():
while True:
code = input(“Enter Item Code:”)
desc = input(“Enter Description:”)
amount = float(input(“Enter Amount: “))
fileobj = open(file,“a”,newline=“”)
csv_writer = csv.writer(fileobj, delimiter=“,”, quotechar=‘”‘,quoting=csv.QUOTE_MINIMAL)
csv_writer.writerow([code,desc,amount])
fileobj.close()
ch=input(“Enter y to continue adding more items:”)
if ch!=‘y’:
break
def view_details():
print(\n\n)
sno = “S.No.”
code = “Code”
desc = “Description”
amt = “Amount”
total = 0
print(“-“*55)
print(f”{sno:<6} {code: <6} {desc:<30} {amt:<6})
print(“-” * 55)
fileobj = open(file, “r”)
csv_reader = csv.reader(fileobj,delimiter=“,”)
line=0
for data in csv_reader:
line+=1
print(f”{line:<6},end=” “)
for i in data:
if data.index(i)==1:
print(f”{i:<30},end=” “)
else:
print(f”{i:<6}, end=” “)
if data.index(i)==2:
total+=float(i)
print()
print(“-” * 55)
print(” “*36,“Total: “,total)
print(“-” * 55)
fileobj.close()

print(\n)

def edit_info():
fileobj = open(file, “r”)
csv_reader1 = csv.reader(fileobj, delimiter=“,”)
#print(list(csv_reader1))
all_data = list(csv_reader1)
code = input(“Enter the Item Code to delete:”)
for row in all_data:
if code in row:
print(“Found!”)
idx = all_data.index(row)
all_data.pop(idx)
fileobj.close()
print(“Final data to push into csv: “,all_data)
fileobj = open(file, “w”, newline=“”)
csv_writer = csv.writer(fileobj, delimiter=“,”, quotechar=‘”‘, quoting=csv.QUOTE_MINIMAL)
for r in all_data:
csv_writer.writerow(r)
fileobj.close()


while True:
print(“MENU”)
print(“1. Add Details\n2. View Details\n3. Calculate Total\n4. Edit the data\n5. Exit”)
ch= input(“Choose your option:”)
if ch==“1”:
add_details()
elif ch==“2”:
view_details()
elif ch==“3”:
pass
elif ch==“4”:
edit_info()
elif ch==“5”:
break
else:
print(“Try again!”)
Mastering Data Science – June 2023

Mastering Data Science Course - JUNE 2023

# comments – ignored by the compiler/interpreter
# Python is case sensitive language
print(5 + 5) # inbuilt function – prints whatever you give to it on the screen
print(“5 + 5”) # “” – used for repeating by print
print(“5 + 5 =”,5+5)
print(‘3 + 5 =’,5+5)

# variables
x = 777
y = 75
print(x+y)
print(x,“+”,y)
print(x,“+”,y,“=”,x+y)
print(x,‘+’,y,‘=’,x+y)
# variables naming rule: it should start with an alphabet, it can have numbers and _
num = 10
num1 = 101
n1m2 = 56
first_number = 56

# write a program to add 2 numbers
num1 = 45
num2 = 66
print(“Sum of two numbers is”,num1+num2)

x = 45
y = 66
print(“Sum of two numbers is”,x+y)


sdfdsagtarebgdvdzdczvdv = 45
dfsgdfsgbsysbsdfhnjdjshff = 66
print(“Sum of two numbers is”,sdfdsagtarebgdvdzdczvdv+dfsgdfsgbsysbsdfhnjdjshff)

# write a program to calculate area and perimeter of a rectangle, when their sides are given
# input -> process -> output
# area = length * breadth
# perimeter = 2 * (length + breadth)
# input:
length = 52
breadth = 31
#process:
area = length * breadth;
perimeter = 2*(length + breadth);
#output
print(“Area of the rectangle is”,area);print(“The perimeter is”,perimeter)

########### Assignment ################
# WAP to calculate area and circunference of a circle when radius is given
# WAP to calculate perimeter of a triangle who 3 sides are given
##########################################

# Basic data types – 5 types
length = 30 # int=integer = numbers without decimal values
print(“type of data = “,type(length)) # <class ‘int’>
# type() – gives the type of the data that is in the variable

length = 29.8 # float = decimal values
print(“type of data = “,type(length)) # <class ‘float’>

number1 = 5+2j
print(“type of data = “,type(number1)) #<class ‘complex’>
# imaginary numbers are the root of -ve numbers = i
print(“Doing a calculation = “, number1 * (52j)) # (a+b)(a-b) = a square – b square
# 25 – (-4) = 29 + 0j

# bool (boolean)- True (1) & False (0)
val1 = True # False
print(“type of data = “,type(val1))

#text -str = string
name = “Sachin Tendulkar”
print(“type of data = “,type(name))

########### Assignment ################
# WAP to calculate area and circunference of a circle when radius is given
# WAP to calculate perimeter of a triangle who 3 sides are given
##########################################

# Basic data types – 5 types

cost_price = 45
selling_price = 67
quantity = 78
profit = (selling_price – cost_price) * quantity
print(“Total profit after selling”,quantity,“items bought at”,cost_price,
“and sold at”,selling_price,“is”,profit,end=\n)
print(f”Total profit after selling {quantity} items bought at {cost_price}
f”and sold at {selling_price} is {profit},end=\n) #f string
print(“Total profit after selling {} items bought at {} and sold at “
“{} is {}”.format(quantity, cost_price,selling_price,profit))
print(“Total profit after selling {0} items bought at {2} and “
“sold at {3} is {1}”.format(quantity, profit,cost_price,selling_price))

quantity = int(“14”)
# int(), str(), bool(), float(), complex()
total_cost = 500
cost_per_quantity = total_cost/quantity
# implicit and explicit conversion
print(f”Cost per items for quantity {quantity} and total cost {total_cost} is {cost_per_quantity:.2f})

player = “Kohli”
country = “India”
position =“Opener”
print(f”This is {player:<15} who plays for {country:>15} as {position:^20} in cricket.”)

player,country,position = “Mubwangwa”, “Zimbabwe”,“Wicket-Keeper”
print(f”This is {player:<15} who plays for {country:_>15} as {position:.^20} in cricket.”)

print(“I am fine \nhow are \nyou”) #escape character (works only inside quotes) \n, \t
# \n will create newline
print(\\n will create newline”)
# \\n will create newline
print(\\\\n will create newline”)

print(f”{player} \n{country} \n{position})
print(“First line content”,end=” – “)
print(“Second line content”,end=\n)

value = 50
print(“Decimal number = “,int(value))
print(“Hexa number = “,hex(value)) #0x
print(“Octal number = “,oct(value)) #0o
print(“Binary number = “,bin(value)) #0b

value = 0b110011001100
print(“Decimal number = “,int(value))
print(“Hexa number = “,hex(value)) #0x
print(“Octal number = “,oct(value)) #0o
print(“Binary number = “,bin(value)) #0b

#### Operations –
##Arithmematic operations
val1, val2 = 54, 5
print(val1 + val2) # 55
print(val1 – val2) #
print(val1 * val2) #
print(val1 / val2) #

print(val1 // val2) #integer division – will return only the integer part
print(val1 ** val2) # power (raise to)- 54 to the power of 5
print(54 ** (1/2)) #square root of 54
print(val1 % val2) # modulo – remainder

#Relational (comparison operator): Output is always a bool value
# == != < > <= >=
val1,val2 = 15, 15
print(val1 == val2) # is val1 equal to val2 ? – T
print(val1 != val2) # F
print(val1 < val2) # F
print(val1 > val2) # F
print(val1 <= val2) # T
print(val1 >= val2) # T

## Logical operations

#Arithematic: + – * / // ** %
#Comparison: > < >= <= == !=
#Logical: and or not: input and output both values are boolean
# Prediction 1: Sachin or Sehwag will open the batting
# Prediction 2: Sachin and Sourav will open the batting
# Actual: Sachin and Rahul opened the batting

print(True or False) # OR gives True even if one value is True
print(True and False) # AND gives False even if one value is False
print(5 > 6 and 8<10) # False and True
print(3 + 4 * 4)
val1,val2,val3 = 10,15,10
print(val1 > val2 and val2 ==val3 or val3==val1 and val1!=val3 and val2>=val3 or val3 !=val1)
print(3 + 5 * 2 6 * 3 + 5 3 * 4 / 3)

#BIT WISE Operators: and (&) or (|), left shift and right shift
print(50 & 30)
print(bin(50), bin(30))
print(int(0b10010))
print(50 | 30)
print(int(0b111110))

print(312 >> 3)
print(12 << 3)

########### Refer the document for assignment ########
# Conditions – if, elif else
# Pass or Fail
print(“PASS”)
print(“FAIL”)

avg = 5
if avg >=35:
print(“PASS”)
print(“Congratulations on great result”)
else:
print(“Result: FAIL”)
#if avg > 90 – A+, 80 -A, 70: B+, 60: B-, 50: C, >35: D, <35: E
avg = float(input(“Enter the average value of the student: “))
if avg>=90:
print(“Super Duper Result: A+”)
elif avg>=80:
print(“Grade: A-“)
elif avg>=70:
print(“Grade: B+”)
elif avg >=60:
print(“Grade: B-“)
elif avg >=50:
print(“Grade: C”)
elif avg >=35:
print(“Grade: D”)
else:
print(“Grade: E”)

### input()
val = int(input(“Enter a number: “)) #takes value from the user

print(val)
print(type(val))
# by default input() takes in value as string

#### Take marks of 5 subjects from the user and calculate sum and average and then assign the grade
# based on above discussion
”’
Write a program to input a value from the user and check if its positive, negative or zero.
if its positive, then check if its odd or even. For even numbers, check if they are multiple of 4
”’
#Nested if conditions
num = int(input(“Enter a number: “))
if num >0:
print(num,“is positive”,end=“”)
if num %2 ==0:
print(” and also even”)
if num%4==0:
print(num,“is divisible by 4.”)
else:
print(” and also odd”)
elif num<0:
print(num, “is negative”)
else:
print(num,“is neither positive not negative”)

#Assignment: Write a program to input 3 sides of a triangle anc check
#if they are: right angled, isoceles, scalene or equilateral

# short form of one line if-else condition
a,b = 10,5
if a>b:
print(a,” is greater”)
else:
print(b, ” is greater”)

#short
result = a if a>b else b

print(result,“is greater”)

## let’s take example of 3 values and check which number is highest
a,b,c = 500,100,500
if a>b:
if a>c:
print(a,“is highest”)
else:
print(c, “is highest”)
else: # b is greater than a
if b>c:
print(b,“is highest”)
else:
print(c, “is highest”)

result = a if a>b else b
result = result if result>c else c
print(result,“is highest – II”)

if a>=b and a>=c:
print(a,“is highest – III”)
elif b>=a and b>=c:
print(b,“is highest – III”)
else:
print(c,“is highest – III”)
# LOOPS:
#1. FOR – when you know exactly how many times to repeat
#2. WHILE – when you dont know exactly how many times but you know when to stop

# range() generates range of values from given =position to the <final position with given increment
range(1,49,6) #range(=start,<end,increment): 1,7,13,19,25,31,37,43
range(5,11) #range(start,end), increment is default = 1: 5,6,7,8,9,10
range(5) # range(end), start default = 0, increment=1: 0,1,2,3,4
# for loop using range
for variable in range(1,49,6):
print(“=================”)
print(“HELLO : “,variable )

for variable in range(5,11):
print(“xxxxxxxxxxxxxxxxx”)
print(“HELLO : “,variable )

for variable in range(5):
print(“………..”)
print(“HELLO : “,variable )

# for counter in range_of_values
# while true condition
final,start = 5,0
while start< final:
print(“I am in while now, start = “,start)
start +=1 #start = start+1
print(“Thats it for today”)
n=10
“””
* * * * *
* * * * *
* * * * *
* * * * *
* * * * *
“””
for j in range(n):
for i in range(n):
print(“*”,end=” “)
print()

”’
*
* *
* * *
* * * *
* * * * *
”’
#z=0
for j in range(n):
#z+=1 # z=z+1
for i in range(j+1):
print(“*”,end=” “)
print()

“””
* * * * *
* * * *
* * *
* *
*
“””
for j in range(n):
for i in range(n-j):
print(“*”,end=” “)
print()

”’
*
* *
* * *
* * * *
* * * * *
”’
for j in range(n):
for k in range(n-j-1):
print(” “,end=“”)
for i in range(j+1):
print(“*”,end=” “)
print()

##### Assignment ##########
#Print below pattern:
“””
* * * * *
* * * *
* * *
* *
*
“””
## assignment code
”’
A pattern

”’
n=15
for j in range(n):
for k in range(n-j-1):
print(” “,end=“”)
if j==0 or j==int(round(n/2)):
for i in range(j+1):
print(“* “,end=“”)
else:
print(“*”,end=“”)
print(” “*j,end=“”)
print(“*”,end=“”)
print()

#### Assignment
# Practice patterns A to Z

for num in range(1,11):
for i in range(1,11):
print(f”{i:>2} * {num:>2} = {num*i: >2},end=” “)
print()

user_says = “y”
while user_says ==“y”:
print(“Hello”)
user_says = input(“Enter y to print again or any other key to stop:”)

check = True
while check:
print(“Hello Again”)
user_says = input(“Enter y to print again or any other key to stop:”)
if user_says!=“y”:
check = False

while True:
print(“Hello Again Again”)
user_says = input(“Enter y to print again or any other key to stop:”)
if user_says!=“y”:
break

user_says = input(“Enter y to print again or any other key to stop:”)
while user_says ==“y”:
print(“Hello”)
user_says = input(“Enter y to print again or any other key to stop:”)

st,end,co=11,100,11
for i in range(st,end,co):
if end%co==0:
if i == (end // co-1) * st:
print(i)
else:
print(i, end=“, “)
else:
if i==end//co*st:
print(i)
else:
print(i, end=“, “)

st,end,co=11,100,11
for i in range(st,end,co):
if i+co>=end:
print(i)
else:
print(i, end=“, “)

”’
Find 10 multiples of even number
If the number is negative then do not execute
if you come across multiple value as a multiple of 12 then skip it
if you come across multiple value as a multiple of 20 then stop it
”’
while True:
num = int(input(“Enter a number: “))
if num>=0:
for i in range(1,11):
if num*i%12 ==0:
continue
if num*i%20 ==0:
break
print(f”{num} * {i} = {num*i})
ch=input(“Hit any key to stop: “)
if len(ch)!=0:
break
#ATM Transaction
while True:
print(“Enter your choice from below menu:”)
print(“1. Deposit \n2. Withdraw \n3. Account Transfer \n4. Statement \n5. Quit”)
ch = input(“Enter your option here: “)
if ch==“1”:
ch=input(“Enter the amount: “)
print(“… continue witht he logic”)
elif ch==“2”:
print(“Logic is not ready”)
elif ch==“3”:
print(“Logic is not ready”)
elif ch==“4”:
print(“Logic is not ready”)
elif ch==“5”:
print(“Thank you for Banking with us, Have a Good Day!”)
break
else:
print(“Invalid Option, please try again!”)
continue


# Check for Prime
is_prime = True
for i in range(2,num//2+1):
if num %i==0:
is_prime = False
break
if is_prime:
print(f”{num} is a Prime number”)
else:
print(f”{num} is not a Prime number”)

# generate prime numbers between 5000 and 10000

for num in range(5000,10001):
is_prime = True
for i in range(2,num//2+1):
if num %i==0:
is_prime = False
break
if is_prime:
print(f”{num} ,end=“, “)



# Assignments:
## 1. WAP to generate first 5 fibonacci numbers: 0,1, 1, 2,3,5,8,13,21…
# guess the number: Computer v Human
import random

num = random.randint(1,100)
#print(“Number = “,num)
#print(“===============”)
counter = 0
while True:
guess = int(input(“Guess the number (1-100): “))
if guess <1 or guess>100:
print(“Invalid number! Try again…”)
continue
counter+=1 #counter = counter + 1
if guess == num:
print(f”Congratulations! You have successfully guessed the number is {counter} steps.”)
break
elif guess < num:
print(“Sorry! Your guess is lower. Try again…”)
else:
print(“Sorry! Your guess is higher. Try again…”)

############
# guess the number: Computer v Computer
import random

num = random.randint(1,100)
print(“Number = “,num)
print(“===============”)
counter = 0
st,en = 1,100
while True:
#guess = int(input(“Guess the number (1-100): “))
guess = random.randint(st,en)
print(“Guess number = “,guess)
if guess <1 or guess>100:
print(“Invalid number! Try again…”)
continue
counter+=1 #counter = counter + 1
if guess == num:
print(f”Congratulations! You have successfully guessed the number is {counter} steps.”)
break
elif guess < num:
print(“Sorry! Your guess is lower. Try again…”)
st=guess+1
else:
print(“Sorry! Your guess is higher. Try again…”)
en=guess-1

################

# guess the number: Computer v Human (automation)
import random

num = random.randint(1,100)
print(“Number = “,num)
print(“===============”)
counter = 0
st,en = 1,100
while True:
guess = (st+en)//2
#guess = random.randint(st,en)
print(“Guess number = “,guess)
if guess <1 or guess>100:
print(“Invalid number! Try again…”)
continue
counter+=1 #counter = counter + 1
if guess == num:
print(f”Congratulations! You have successfully guessed the number is {counter} steps.”)
break
elif guess < num:
print(“Sorry! Your guess is lower. Try again…”)
st=guess+1
else:
print(“Sorry! Your guess is higher. Try again…”)
en=guess-1

###########################
# guess the number: Computer v Human (automation)
import random
import time
TOTAL = 0
max = 100000
start_time = time.time()
for i in range(max):
num = random.randint(1,100)
#print(“Number = “,num)
#print(“===============”)
counter = 0
st,en = 1,100
while True:
guess = (st+en)//2
#guess = random.randint(st,en)
print(“Guess number = “,guess)
if guess <1 or guess>100:
#print(“Invalid number! Try again…”)
continue
counter+=1 #counter = counter + 1
if guess == num:
print(f”Congratulations! You have successfully guessed the number is {counter} steps.”)
TOTAL +=counter
break
elif guess < num:
#print(“Sorry! Your guess is lower. Try again…”)
st=guess+1
else:
#print(“Sorry! Your guess is higher. Try again…”)
en=guess-1
end_time = time.time()
print(“Total steps = “,TOTAL)
print(“Average number of steps = “,TOTAL/max,” total time taken to run”,max,“steps is”,(end_time – start_time))
# Average number of steps = 5.80429 total time taken to run 100000 steps is 36.42937159538269

####################################

# guess the number: Computer v Computer (measure steps)
import random
import time
TOTAL = 0
max = 100000
start_time = time.time()
for i in range(max):
num = random.randint(1,100)
#print(“Number = “,num)
#print(“===============”)
counter = 0
st,en = 1,100
while True:
#guess = (st+en)//2
guess = random.randint(st,en)
print(“Guess number = “,guess)
if guess <1 or guess>100:
#print(“Invalid number! Try again…”)
continue
counter+=1 #counter = counter + 1
if guess == num:
print(f”Congratulations! You have successfully guessed the number is {counter} steps.”)
TOTAL +=counter
break
elif guess < num:
#print(“Sorry! Your guess is lower. Try again…”)
st=guess+1
else:
#print(“Sorry! Your guess is higher. Try again…”)
en=guess-1
end_time = time.time()
print(“Total steps = “,TOTAL)
print(“Average number of steps = “,TOTAL/max,” total time taken to run”,max,“steps is”,(end_time – start_time))
# Average number of steps = 7.4783 total time taken to run 100000 steps is 28.571797370910645
# STRINGS
txt1 = “HELLO”
txt2 = ‘HI’
print(txt1, txt2)
txt3 = ”’I am fine”’
txt4 = “””I am here”””
print(txt3, txt4)

txt5 = ”’I am fine here
Hope you are
doing well
too. So how are you”’
txt6 = “””I am here
Where are you?
How are you?”””
print(txt5)
print(txt6)

# I’m fine
print(“I’m fine”)
# He asked,”How are you?”
print(‘He asked,”How are you?”‘)
# \ – escape sequence
print(“I am \nfine”)
print(‘I\’m fine’)

txt7 = “HELLO”
print(len(txt7))
print(txt7[1])
print(txt7[1],txt7[4])
print(txt7[1:4]) #ELL
print(txt7[:4]) #HELL
print(txt7[2:]) #LLO
print(txt7[:]) #HELLO

txt8 = input(“Enter a text:”)
length = len(txt8)
print(txt8[-1], txt8[length-1]) #last character
print(txt8[0], txt8[-length]) #first character
print(txt8[1:4], txt8[-4:-1]) # ELL
print(txt8[:4], txt8[-5:-1], txt8[:-1]) # HELL
print(txt8[1:], txt8[1:5], txt8[-4:]) # ELLO

a = “HELLO”
b = “THERE”
print(a +” “+ b)
c = (a+” “) * 4
print(c)
print(c[:-1])
#a[0]=”h” #’str’ object does not support item assignment – IMMUTABLE
a = “h”+a[1:]
print(a)

for i in b:
print(i)

for j in range(len(b)):
print(j, b[j])

### ### String Methods #####
a= “heLLLLLLLLLlllo”
## Refer String Methods – https://docs.python.org/3.11/library/stdtypes.html#string-methods
print(a.islower())
print(a.lower())

ch = input(“Enter Yes to continue: “)
if ch.upper()==“YES”:
print(“YES”)

num1 = input(“Enter a number: “)
if num1.isdigit():
num1 = int(num1)
else:
print(“Exiting because of invalid number”)
# String methods
str1 = “Hello How Are You?”
print(“Capitalize the content: “,str1.capitalize())
print(str1.isalpha())
print(“”.join(str1.split(” “)))
str1_1 = “”.join(str1.split(” “))
print(“is alpha for joined text: “,str1_1.isalpha())
if str1_1.isalpha():
print(“Your name has been saved into the database”)
else:
print(“Invalid name, try again!”)

str2 = ” name “
print(str2.isspace())
print(“Strip txt:”,str2.strip())
print(“Strip txt:”+str2.strip())

str3 = “Hello How Are You are you doing you are?”
print(“count of are:”,str3.lower().count(“are”, 0, 21))
print(“Finding find:”, str3.find(“find”)) #returns the position of first occurrence
print(“Finding find:”, str3.find(“you”))
print(“Finding find:”, str3.find(“you”,23,35))

## Assignment – find the position of all the occurrences and print the output in below format:
# found 2 words and they are at positions 22 and 32

print(str3.startswith(“Hello How”))
print(str3.endswith(“?”))

# Replace –
print(str3.replace(“you”,“YOUR”))
print(str3.replace(“you”,“YOUR”, 1))
print(str3[:24]+str3[24:].replace(“you”,“YOUR”, 1))
print(str3) # original text will not change becasue of any method as strings are immutable
# List: linear ordered mutable collection
list1 = [20,30.0, True, “Hello”, [2,3,5]]
print(“Datatype: “, type(list1))
print(“Number of values = “,len(list1))
print(list1[0], type(list1[0]))
print(list1[-1], type(list1[-1]))

print([1,2,3]+[10,20,30])
print([1,2,3] *3)

for i in list1:
print(i)

for i in range(len(list1)):
print(i, “: “,list1[i])

for i in list1[-1]:
print(i)

list1[0] = “Twenty”
print(list1)

#Tuple: linear ordered immutable collection
t1 = (20,30.0, True, “Hello”, [2,3,5])
print(type(t1))
print(t1[0],t1[-1])
#t1[0]= 5 # ‘tuple’ object does not support item assignment

# you can convert list to tuple and tuple to list
l1 = [3,5,9]
print(type(l1))
l1 = tuple(l1)
print(type(l1))
l1 = list(l1)
print(type(l1))

t1 = (2,100,1000,10000,100)
t2 = (2,100,1000,10000,100000)
if t1 > t2:
print(“T1”)
elif t2 > t1:
print(“T2”)
else:
print(“They are equal”)

print(“Count = “,t1.count(100))
print(“Index = “,t1.index(100))

l1 = list(t1)
print(“Count = “,l1.count(100))
print(“Index = “,l1.index(100))

l1.append(99)
l1.append(199)
l1.append(299)
print(“L1 after append= “,l1)

#insert & append – both will add members to the list
l1.insert(2,555)
print(l1)
l1.insert(200,444)
print(l1)
# FIFO – Queue
total = 0
marks=[]
for i in range(5):
m1 = int(input())
marks.append(m1)
total += m1
print(“Marks obtained are:”,marks)
print(“Total = “,total)
#Marks obtained are: [50, 60, 70, 80, 90]
# LIFO – stack
total = 0
marks=[]
for i in range(5):
m1 = int(input())
marks.insert(0,m1)
total += m1
print(“Marks obtained are:”,marks)
print(“Total = “,total)
#Marks obtained are: [90, 80, 70, 60, 50]

# using sum() to do total


marks=[]
for i in range(5):
m1 = int(input())
marks.append(m1)
total = sum(marks)
print(“Marks obtained are:”,marks)
print(“Total = “,total)

l1 = [90, 80, 70, 60, 50]
idx =10
if len(l1) >idx:
l1.pop(10) #default is -1: pop works on index
print(l1)
if 90 in l1:
l1.remove(90) #remove works on value
print(l1)

if l1.count(90) > 0:
l1.remove(90)
l2 = [90, 80, 70, 60, 50]
print(sum(l2))

l2 = [80, 90, 60, 50, 70]
l2.reverse()
print(l2)
l2.sort()
l2.sort(reverse=True)
print(l2)

# extend
l1 = [2,4,6]
l2 = [4,5,9]
l3 = l1 + l2
print(l3)
l1.extend(l2) # l1 = l1 + l2
print(l1)

l3 = [2, 4, 6, 4, 5, 9]
l4 = l3 # deep copy [renaming= duplicate name]
l5 = l3.copy() #shallow copy [taking a photocopy]
print(“l3 = “,l3)
print(“l4 = “,l4)
print(“l5 = “,l5)
l3.append(11)
l4.append(22)
l5.append(33)
print(“After append:”)
print(“l3 = “,l3)
print(“l4 = “,l4)
print(“l5 = “,l5)

”’
Assignment: Write a program using List to read the marks of
5 subjects for 5 students and find the highest marks for:
i) Each Student
ii) Each Subject

e.g. [[44,77,66,55,88],[64,54,74,84,94],[99,66,44,77,55],[83,74,65,56,67],[91,81,71,61,51]]
Highest for Student 1: 88
2: 94
3: 99
4: 83
5: 91
Highest for Subject 1: 99
2: 81
3: 74
4: 84
5: 94
”’
all_marks = []
for i in range(5):
students=[]
print(“Marks for Student “,i+1,“: “)
for j in range(5):
m = int(input(“Marks obtained in Subject “+str(j+1)+“: “))
students.append(m)
all_marks.append(students)
print(“All marks = “,all_marks)
# [[44,77,66,55,88],[64,54,74,84,94],[99,66,44,77,55],[83,74,65,56,67],[91,81,71,61,51]]

#All marks = [[44, 77, 66, 55, 88], [64, 54, 74, 84, 94],
# [99, 66, 44, 77, 55], [83, 74, 65, 56, 67], [91, 81, 71, 61, 51]]


l3 = [2, 4, 6, 4, 5, 9]
l3.clear()
print(l3)

#Tuple
t1 = ()
print(type(t1))
t1 = (100,)
print(type(t1))
t1 = (100,200)
print(type(t1))

######### DICTIONARY ##########
# dictionary -> unordered mutable collection
# dict = {key:value}
dict1 = {}
print(type(dict1))
dict1 = {1:500,“Two”: “Sachin Tendulkar”,“some index”:999,“City”:“Mumbai”, 10:1000}
#Key shouldnt be duplicated
print(dict1[1])

#DICTIONARY
dict1 = {}
print(type(dict1))

all_info= {}
# all_info= {“”:[],””:[],””:[]}
for i in range(3):
rno = input(“Enter the Roll No. “)
marks=[]
for j in range(3):
m = float(input(“Enter the marks: “))
marks.append(m)
t_dict = {rno:marks}
all_info.update(t_dict)

print(“All Info: \n,all_info)

t1 = (3,6,9)
a,b,c = t1
#a,b= t1 ValueError: too many values to unpack (expected 2)
# a,b,c,d = t1 ValueError: not enough values to unpack (expected 4, got 3)


all_info = {‘101’: [3.0, 4.0, 5.0], ‘102’: [7.0, 8.0, 9.0], ‘103’: [4.0, 5.0, 6.0],
‘104’:[2.0, 4.0, 6.0], ‘105’:[3.0,6.0, 9.0]}
chk_rno = input(“Enter the roll number for information: “)

print(all_info.keys())
print(all_info.values())
print(all_info.items())

for k in all_info.items():
if k[0]==chk_rno:
print(k[1])

found = False
for k,v in all_info.items():
if k==chk_rno:
print(v)
found = True

if not found:
print(“Sorry, roll no is not in the list”)

chk_rno = input(“Enter the roll number to delete: “)
for k in all_info.items():
if k[0]==chk_rno:
all_info.pop(chk_rno)
break
print(“After removal: \n,all_info)
all_info.popitem() #removes last updated value
print(“After popitem: \n,all_info)

all_info = {‘101’: [3.0, 4.0, 5.0], ‘102’: [7.0, 8.0, 9.0], ‘103’: [4.0, 5.0, 6.0],
‘104’:[2.0, 4.0, 6.0], ‘105’:[3.0,6.0, 9.0]}
#copy
all_info1 = all_info #Deep copy
all_info2 = all_info.copy() # Shallow copy
print(“First:”)
print(all_info)
print(all_info1)
print(all_info2)
all_info.update({9:“Orange”})
all_info1.update({10:“Apple”})
all_info2.update({11:“Mango”})
print(“Second:”)
print(all_info)
print(all_info1)
print(all_info2)

### SETS : linear unordered collection

set1 = {3,5,6,8}
print(type(set1))

list1 = [‘Mango’,‘Mango’,‘Mango’,‘Mango’,‘Mango’]
set1 = {‘Mango’,‘Mango’,‘Mango’,‘Mango’,‘Mango’}
print(len(list1))
print(len(set1)) # set will have unique values
list1 = [“A”,“B”,“C”,“D”,“E”]
list2 = [“C”,“D”,“A”,“B”,“E”]
set2 = {“C”,“D”,“A”,“B”,“E”}
print(set2) # order doesnt matter in set
# SETs are used to represent values
set2 = {“C”,“D”,“A”,“B”,“E”}
set1 = {“C”,“D”,“A”,“F”,“G”}

#frozen_set is the immutable version of set
#sets are mutable
set1.add(“Z”)
set3 = {“P”,“Q”}
set2.update(set3)
print(“Set1: “,set1)
print(“Set2: “,set2)
set2.pop()
print(“Set2: “,set2)
set2.remove(“Q”)

#Operations on sets: Union, Intersection, Difference, difference_update
print(“Set1: “,set1)
print(“Set2: “,set2)
#union – combining all the elements from the sets
print(set1.union(set2))
print(set1 | set2)

#intersection: only the common elements
print(set1.intersection(set2))
print(set1 & set2)

#difference
print(set1 – set2)
print(set2 – set1)
print(set1.difference(set2))

#symmetric difference
print(set1 ^ set2)
print(set2 ^ set1)
print(set1.symmetric_difference(set2))

set1.union(set2) #union update – set1 will have new values
set1.intersection_update(set2) # set1 will be updated with output values
set1.difference_update(set2)
set1.symmetric_difference_update(set2)
set1.issubset(set2)

# FUNCTIONS –

# inbuilt functions: print(), input(), ….
# user defined functions

#just created a function called as myquestions()
# required positional argument
def myquestions(choice,a):
print(“Choice =”,choice,“and A =”,a)
if choice==1:
print(“Whats your name?”)
print(“What are you doing here?”)
print(“When will you be done?”)
else:
print(“Whats your Work here?”)
print(“What are your interests?”)
print(“Why dont you stand out?”)
return “Job Done”

myquestions(1,5) #passing while calling
#print(“My Out = “,myout)
print(“Another time let me repeat”)
myout = myquestions(0,4)
print(“My Out = “,myout)

str1 = “HELLO”
output = str1.lower() #returns a value
print(output)

list1 = [5,1,9,6,2,7,3]
output2 = list1.reverse() #doesnt return a value
print(output2)
print(list1)

out=print(“heloo”)
print(“OUT=”,out) #doesnt return
out = input() #returns
print(“OUT=”,out)


# non-required = default argument
# non-positional = keyword argument
def myquestions(choice,a=-99): #default value
print(“Choice =”,choice,“and A =”,a)
if choice==1:
print(“Whats your name?”)
print(“What are you doing here?”)
print(“When will you be done?”)
else:
print(“Whats your Work here?”)
print(“What are your interests?”)
print(“Why dont you stand out?”)
return “Job Done”



myquestions(1,5) #passing while calling
myquestions(1)
myquestions(a=1,choice=5) #keyword argument

def myfavoritefruits(f1,*fruits, **interests): #* and ** for variable number of arguments
# * – as tuple, ** will read as dictionary
if len(fruits)==0:
print(“My favorite fruit is”,f1)
else:
#tuple(list(fruits).append(f1))
print(“My favorite fruits are:”,tuple(set(fruits)))
if len(interests)!=0:
print(“My other interests are: “)
for i,j in interests.items():
print(“Favorite”,i,“is”,j)


myfavoritefruits(“Guava”,“Orange”)
myfavoritefruits(“Orange”)
myfavoritefruits(“mango”,“mango”,“mango”,“apple”, color=“red”,place=“hyderabad”,food=“Biryani”)
# required, default, keyword, variable length arguments

def myfun1(x,y):
print(f”x={x}, y={y} and sum is {x+y})


myfun1(10,20)
a,b = 5,6
myfun1(a,b)
x,y=2,8
myfun1(x=y,y=x)
myfun1(y,x)

#Scope of a variable
PI = 3.14

def myfun1(x,y):
print(f”x={x}, y={y} and sum is {x+y})
global g
print(“Print G =”,g)
h=70
g=20
print(“Local: “,g)

g=100 #Global variable
myfun1(10,20)
#print(“H =”,h)

def my_fun2(n):
all_values = []
for i in range(n):
all_values.append(input(“Enter values: “))
return all_values

val = my_fun2(10)
print(val)
# class and objects
# functions and variables – for class and specific for objects

class Library:
total_books = 0 #class level variable
var2 = –1
name = “ABC International School Library”
def __init__(self,title, author,copies=1): # auto called when object is created
print(“Adding books to the library”)
Library.total_books+=1
self.total_copies = copies #object level variable
self.title = title
self.author = author

def display_detail(self):
print(“================================”)
print(“Title \t\t Author \t\t\t Copies”)
print(“——————————–“)
print(f”{self.title} {self.author} {self.total_copies})
print(“================================”)

@classmethod
def print_total(cls):
print(“Total book count is: “,cls.total_books)

# variables – needs to be told if its object level – by default its class level
# methods – needs to be told if its class level – by default its object level

lib1 = Library(“Python Programming”,“Sachin Tendulkar”,10) # __init__ will be called
lib2 = Library(“Machine Learning”,“Virat Kohli”) # __init__ will be called
lib3 = Library(“SQL Programming”, “Dhoni”, 12) # __init__ will be called
print(Library.total_books) #calling using class name
print(lib1.total_books) #calling using object
print(lib2.total_books)

print(Library.total_books)
print(lib1.total_books)
print(lib2.total_books)

print(lib1.total_copies)
print(lib2.total_copies)
print(lib2.author)
print(lib3.author)
lib1.display_detail()
lib1.print_total()
lib3.print_total()
Library.print_total()


class MathsOps:
def __init__(self,a:int, b=1):
self.num1 = a
self.num2 = b # Protected members are defined as _ (single underscore)
self.__mul = –1 #we are making it as PRIVATE (__)
self.div = –1
self.square = self.num1 ** 2
self._power = self.num1 ** self.num2 #protected

def add(self):
return self.num1 + self.num2
def subtract(self):
return self.num1 – self.num2
def multiply(self):
self.__mul = self.num1 * self.num2
return self.__mul
def divide(self):
self.div = self.num1 / self.num2

m1 = MathsOps(100)
print(m1.add())
print(m1.subtract())
#print(m1.__mul)
print(“Multiply: “,m1.multiply())
division = m1.divide()
print(“Division = “,division)
print(m1._power) #protected concept is there Python but not implemented
print(m1.square)
class CountryInfo:
def __display1(self): #private
print(“Country = India”)
def _display2(self): #protected
print(“Country is India”)
class University (CountryInfo):
def __init__(self, name,year_estb):
self.uname = name
self.year_estb = year_estb
def display(self): #public
print(f”The university {self.uname} was established in the year {self.year_estb})

def generateInfo(self):
print(“Sorry this is not implemented”)

def makeitready(self):
print(“Sorry this is not implemented”)
class CityInfo:
def display(self):
print(“City = Hyderabad”)

class Professors(University, CityInfo):
def __init__(self,name,univ,salary, year_estb):
University.__init__(self,univ, year_estb)
self.name = name
self.salary = salary
def printinfo(self):
print(self.name,“: “,self.salary)
University.display(self)
CityInfo.display(self)

def generateInfo(self):
print(“generating payslip”)
class Students(CityInfo,University):
def __init__(self,name,univ,marks, year_estb):
University.__init__(self,univ,year_estb)
self.name = name
self.marks = marks

def printinfo(self):
print(self.name,“:”,self.marks)
University.display(self)
CityInfo.display(self)
def generateInfo(self):
print(“Marks are being generated”)

u1 = University(“ABC University”, 1968)
u2 = University(“XYZ Management University”, 1998)
u2.display()
p1 = Professors(“Sachin”,“ABC University”,167000,1968)
p1.printinfo()
s1 = Students(“Virat”,“XYZ Management University”, 75,1998)
s1.printinfo()

s1.display()
p1.display()
#p1.__display1() #private members are not accessible outside the class
p1._display2() #protected members are accessible by derived classes

ct1 = CityInfo()
ct1.display()

s1.generateInfo() # foreced the derived class to have implemeted the method
p1.generateInfo() # foreced the derived class to have implemeted the method

# Please do the program from:
# https://designrr.page/?id=206786&token=2174026775&type=FP&h=8743
# Page no. 154

#Android: STORY MANTRA:
# categories -> Python -> Python book
class CityInfo:
def cityinfoprint(self):
print(“City is Hyderabad”)
class University:
def __init__(self,univ):
self.univ = univ
def printInfo(self):
print(“University is “,self.univ)
def sampleM(self,num1,num2):
print(“Sample M 3”)
class Student(University):
def __init__(self,name,age,univ):
#University.__init__(self, univ)
super().__init__(univ)
self.name=name
self.age = age
def printInfo(self):
print(“Name, Age & University :”,self.name, self.age, self.univ)
super().printInfo()
CityInfo.cityinfoprint(self)
def sampleM(self):
”’

:return:
”’
print(“Sample M 1”)

s1 = Student(“Sachin”, 49,“Mumbai University”)
s1.printInfo()
s1.sampleM()

def myfunction1(a=10,b=20):
”’
This is my sample function to do nothing but to show off
:param a:int is for length
:param b:int for for taking input data as breadth
:return: nothing
”’
return a + b

num1 = 78
num2 = 88
print(num1 + num2) # operator overloading
print([2,46] + [5,9,8]) # operator overloading

print(input.__doc__)
print(print.__doc__)
print(len.__doc__)
print(myfunction1.__doc__)
from abc import ABCMeta, abstractmethod
class Shape:
__metaclass__ = ABCMeta

def __init__(self, shapeType):
self.shapeType = shapeType
@abstractmethod
def area(self):
pass

@abstractmethod
def perimeter(self):
pass

class Rectangle(Shape):
def __init__(self, length,breadth):
Shape.__init__(self,“Rectangle”)
self.length = length
self.breadth = breadth

def perimeter(self):
return 2*(self.length + self.breadth)

def area(self):
return self.length * self.breadth

class Circle(Shape):
def __init__(self, radius):
Shape.__init__(self,“Circle”)
self.rad = radius

def perimeter(self):
return 2* 3.14 * self.rad

def area(self):
return 3.14 * self.rad**2


r1 = Rectangle(10,5)
print(“Perimeter is”,r1.perimeter())
print(“Area is”,r1.area())

c1 = Circle(5)
print(“Perimeter is”,c1.perimeter())
print(“Area is”,c1.area())

class Books:
count = 0
def __init__(self,title):
self.title = title
Books.count+=1

@classmethod
def totalcount(cls):
print(“Total titles in the library = “,cls.count)

def __del__(self):
print(f”The object with title {self.title} is getting destroyed. You cant use it again!”)
b1 = Books(“Python Programming”)
b2 = Books(“SQL Programming”)
b3 = Books(“Machine Learning”)
b4 = Books(“Tableau book”)
print(b1.title)
b1.totalcount()
print(b3.title)
b3.totalcount()
b4.totalcount()
print(b4.title)

del b4
input()

b4.totalcount()

def myfun1(a,b):
print(a+b)
return a+b

def myfun2():
print(“I do nothing”)

class Sample:
def __init__(self):
print(“Object created”)
def printinfo(self):
print(“Some output here”)

if __name__ ==“__main__”:
myfun1(99,87)
myfun1(99,7)
s1 = Sample()


=====================


#import p11 as RocketScience
from p11 import myfun1

#RocketScience.myfun2()
print(myfun1(5,10))

import random
random.random()

########### FILE HANDLING
#mode of file handling: r (read), w (write-delete old content and write new), a (Append)
## r+ (read & write), w+ (write & read), a+ (append and read)

filename = “abc.txt”

fileobj = open(filename,“r+”)
content = ”’This is a sample content
story about a king
and a queen
who lived in a jungle
so I am talking about
Lion the kind of jungle”’

fileobj.write(content)
content2 = [‘THis is sample line 1\n,‘line 2 content \n,‘line 3 content \n]
fileobj.writelines(content2)
fileobj.seek(20)
output = fileobj.read()
print(“Content from the file:\n,output)
fileobj.seek(10)
output = fileobj.read()
fileobj.seek(10)
content3 = fileobj.read(15)
content4 = fileobj.readline()
print(“Content from the file:\n,output)

fileobj.seek(0)
content5 = fileobj.readlines()
print(“Content from the file:\n,content5)

fileobj.close()

## Exception handling
#SyntaxError : print(“Hello)
#logical error: you make error in the logic – very difficult to find
#runtime errors (exceptions):

num1 = int(input(“Enter a number: “))
# ValueError exception
# Exceptions
a=“k”
b=10
c=-1
try:
c = b/d

except ZeroDivisionError:
print(“Denominator is zero hence stopping the program from executing”)
except TypeError:
print(“Invalid numbers, hence exiting…”)
except NameError:
print(“One of the values has not been defined. Try again”)
except Exception:
print(“Not sure but some error has occurred, we need to stop”)
else:
print(“Answer is”,c)
finally:
print(“We have completed division process”)
#
class InvalidLength(Exception):
def __init__(self,value=0):
self.value = value

length, breadth = –1,-1
while True:
try:
length = int(input(“Enter length: “))
except ValueError:
print(“Invalid number, try again…”)
else:
#assert length > 0, “Rectangle with this diamension is not possible”
if length <=0:
try:
raise InvalidLength
except InvalidLength:
print(“Invalid value for Length hence resetting the value to 1”)
length=1
break
while True:
try:
breadth = int(input(“Enter breadth: “))
except ValueError:
print(“Invalid number, try again…”)
else:
assert breadth>0,“Rectangle with this diamension is not possible”
break

area = length * breadth
print(“Area of the rectangle is”,area)
## ### ##
# datetime, date, time

from datetime import datetime, timedelta
import time
from pytz import timezone

curr_time = datetime.now()
print(“Current time is”,curr_time)
print(“Current time is”,curr_time.strftime(“%d / %m /%Y”))
print(curr_time.year, curr_time.day, curr_time.date())
for i in range(5):
time.sleep(1) # sleep for 2 seconds
print(“Time left:”,5-i,“seconds”)
print(“Good Morning”)
print(“Current time is”,datetime.now())
print(“Date 2 days back was”,(curr_time-timedelta(days=2)).strftime(“%d/%m/%Y”))
print(“UTC Time is”,datetime.now(timezone(‘UTC’)))
print(“UTC Time is”,datetime.now(timezone(‘US/Eastern’)))
print(“UTC Time is”,datetime.now(timezone(‘Asia/Kolkata’)))

Download link:

 

https://dev.mysql.com/downloads/installer/

Create table employees.Employees( EMPID INT Primary Key auto_increment, FNAME VARCHAR(55) NOT NULL, LNAME VARCHAR(55), DOB DATE, EMAIL VARCHAR(35) unique, PHONE VARCHAR(11), DOJ DATE Default(‘2021-07-20’), — YYYY-MM-DD SALARY FLOAT(2), DEPTID INT, Foreign Key (DEPTID) References Departments(DID), Constraint U_UC_LN_DOB Unique(LNAME,DOB), CHECK(Salary 0.0) ) — Constraints: Primary Key, Foreign Key, Not Null, Unique, Check, Default

— Modifying a table – Table is already created and in use –  ALTER TABLE

— ADD or DELETE (DROP) or MODIFY or RENAME a Column

— DDL command to delete is DROP

— DDL command to modify is ALTER

use employees;

 

ALTER table employees ADD BONUS Float(3);

 

ALTER table employees ADD dummy Float(3);

 

ALTER TABLE Employees DROP COLUMN dummy;

 

ALTER TABLE EMPLOYEES MODIFY COLUMN BONUS float(4)

 

ALTER TABLE EMPLOYEES RENAME COLUMN BONUS to BONUS_PCT;

Python and Machine Learning Course May 2023

print(‘5 + 3 =’, 5+3)

pi = 3.1 # variable
g = 9.8
radius = 15
# area of a circle = pi * r square
# commentsm jhjkhjkhkjhjghg
print(“Area of a circle =”,3.1 * radius * radius)

# Write a program to find area and perimeter of a rectangle by taking length and breadth as input
length = 21
breadth = 38
area = length * breadth
perim = 2 * (length + breadth)
print(“Area of rectangle =”,area)
print(“Perimeter of rectangle =”,perim)

#Write a program to find area and perimeter of a square
side = 35
area = side * side
perim = 4 * side
print(“Area of square =”,area)
print(“Perimeter of square =”,perim)

#Find total and average of five numbers
num1 = 78
num2 = 82
num3 = 79
num4 = 91
num5 = 59
total = num1 + num2 + num3 + num4 + num5
avg = total / 5
print(“Total = “,total,”Average is”,avg)

#Find total and average of five numbers
num1, num2, num3,num4,num5 = 78,82,79,91,59
total = num1 + num2 + num3 + num4 + num5
avg = total / 5
print(“Total = “,total,“Average is”,avg)
# find the value of 2x square -10x +30, when x = 8
x = 8
y = 2 * x * x – (10 * x) + 30 # expression
print(“Value of y =”,y)

#different types
# basic data types – they can store only one value at a time
# integer (int) – numbers without decimal values
num1 = 5
num2 = 0
num3 = –99 #<class ‘int’>
print(“1. “,type(num1)) #type(num1) – this is first to be called and then print() is called

# float (float) – decimal values
num1 = 5.0
num2 = 0.9
num3 = –99.1 #<class ‘float’>
print(“2. “,type(num1))

# string (str)
var1 = “Hello”
var2 = ‘Good Evening’
var3 = ”’Hi there”’
var4 = “””Python learning”””
print(type(var1), type(var2),type(var3),type(var4))

# boolean (bool) – True & False
var1 = True
var2 = False
print(type(var1), type(var2))
# complex (complex): complex numbers are square root of negative numbers
# square root of -25 = 5i = 5j
var1 = 5j
print(type(var1))

## Operations

#Arithematics operations (maths op)

#Relational operations

#logical operations (and/or/not)

Programming with Hari
print(‘Hello’)
print()   #comment is for human being
#sdfsdfdsfsdfdsf
print(‘hello again hhjvjbhjhlbbuihuhuhuhu’)
print(‘5+3’)
print(5+3)
print(“4+6=”,4+6)

# y = x + 5, find y when x is 3

# 5 is constant – you cant change its value
# x, y are variables – their values can change
# y dependent variable because its value depends on x
# x is independent variable because you can assign any value to it

x = 3
y = x+ 5
print(“When X is “,x,“then Y is “,y)

# write a program to calculate area of a rectangle


l = 5
b = 8
area = l * b
print(” Length is”,l,“breadth is”,b,“and area is”,area)
# f-string format string
print(f“Length is {l} and breadth is {b} so the area is {area})

#calculate area and circumference of a circle and print the output as:
# Circle with a radius will have area of area and circumference of circumference
# arithematic
val1 , val2 = 25,20
print(val1 + val2)
print(val1 – val2)
print(val1 * val2)
print(val1 / val2)
print(val1 // val2) # integer division
print(val1 % val2) # mod – remainder
print(5 ** 3) #power

#relational operators: > < >= <= == !=
#output is always – True or False
val1, val2, val3 = 25,20,25
print(val1 > val2) #T
print(val1 > val3) #F
print(val1 >= val3) #T
print(val1 < val2) #F
print(val1 < val3) #F
print(val1 <= val3) #T
print(val1 == val2) #F
print(val1 != val3) #F
print(val1 == val3) #T

# Logical operators: input &output are bool: and , or, not
print(“Logical operators”)
print(True and True) #I did both the tasks
print(False and True)
print(True and False)
print(False and False)

print(True or True) #T
print(False or True) #T
print(True or False) #T
print(False or False) #F

print(not True)
print(not False)

r= –5
if r>0:
area = 22/7*(r**2)
print(“area of the circle is”,area)
else:
print(“Invalid data”)

num=4
if num>0:
print(“Positive”)
elif num<0:
print(“Negative”)
else:
print(“Neither positive not negative”)
# wap to check if a number is divisible by 3 or not
# wap to check if a number is odd or even
# for: when you know how many times you need to repeat something
# while:
# range(=start, < end, =increment) range(3,9,2): 3,5,7
for counter in range(3,9,2):
print(“HELLO, The counter value is”,counter)
##
for i in range(1,101): #when increment is not given, default value is 1
print(i)


#Generate even numbers from 2 to 100 (both inclusive)
n=12
for i in range(n): #when start number is not given, default value is zero
print(“* “,end=“”)
print()

”’
* * * * * * *
* * * * * * *
* * * * * * *
* * * * * * *
* * * * * * *
* * * * * * *
* * * * * * *
”’
for j in range(n):
for i in range(n):
print(“* “,end=“”)
print()

”’
*
* *
* * *
* * * *
”’
for j in range(n):
for i in range(j+1):
print(“* “,end=“”)
print()

”’
* * * *
* * *
* *
*
”’
for j in range(n):
for i in range(n-j):
print(“* “,end=“”)
print()

”’
* * * *
*
*
* * * *
”’
for j in range(n):
if j==0 or j==n-1:
for i in range(n):
print(“* “,end=“”)
print()
else:
for k in range(n-j-1):
print(” “,end=” “)
print(“*”)


#while
cont = “y”
while cont==“y”:
print(“Hello from While”)
cont = input(“Do you want to continue: hit y for yes anyother key for no:”)
num = 10
while num >0:
print(“hello”)
num=num-1

ch=‘y’
while ch==‘y’ or ch==“Y”:
print(“I am fine”)
ch=input(“Do you want to continue (y for yes):”)

while True:
print(“How are you?”)
ch=input(“Enter n to stop:”)
if ch==“n”:
break #that will throw you out of the loop

# guessing game
num = 45

while True:
guess = int(input(“Guess the number (1-100): “))
if guess<1 or guess>100:
continue
if num==guess:
print(“Congratulations… You have guessed it correctly!”)
break
elif num<guess:
print(“Incorrect! You have guessed higher number”)
else:
print(“Incorrect! You have guessed lower number”)

## Menu Options
print(“WELCOME TO MY LIBRARY APPLICATION”)
while True:
print(“1. Issue the Book”)
print(“2. Return the Book”)
print(“3. Add new Book to the library”)
print(“4. Remove a book from the library”)
print(“5. Exit”)
choice = input(“Enter you choice: “)
if choice==“1”:
print(“Given book has been issued!”)
elif choice==“2”:
print(“Book has been returned to the library!”)
elif choice==“3”:
print(“Added the book to the library!”)
elif choice==“4”:
print(“Removed the book from the library database!”)
elif choice==“5”:
print(“Have a good day!”)
break
else:
print(“Invalid option! Try again”)
continue
####
str1 = 'HELLO'
str2 = "I am fine"
str3 = '''Where are you going?
How long will you be here?
What are you going to do?'''
str4 = """I am here
I will be here for next 7 days
I am going to just relax and chill"""
print(type(str1),type(str2),type(str3),type(str4))
print(str1)
print(str2)
print(str3)
print(str4)

# What's you name?
str5 = "What's your name?"
print(str5)
#He asked,"Where are you?"
str6 = 'He asked,"Where are you?"'
print(str6)

#He asked,"What's your name?"
#escape sequence \
print('''He asked,"What's your name?"''')
print("He asked,\"What's your name?\"")

print('nnnnn\nnn\tnn')

print("\FOlder\\newfolder")
# \n is used to print newline in python
print("\\n is used to print newline in python")

# \\n will not print newline in python
print("\\\\n will not print newline in python")

str1 = "Hello You"
str2 = "There"
print(str1 + str2)
print(str1 *5)
for i in str1:
print("Hello")

#indexing
print(str1[2])
print("last element: ",str1[4])
print("last element: ",str1[-1])
print("second element: ",str1[-8])
print("ell: ",str1[1:4])
print("ell: ",str1[-8:-5])
print("First 3: ",str1[:3])
print("First 3: ",str1[:-6])
print("Last 3: ",str1[6:])
print("Last 3: ",str1[-3:])

#Methods - exactly same as your functions - only difference is they are linked to a class
import time
str1 = "HELLO"
print(str1.replace("L","X",1))

sub_str = "LL"
str2 = "HELLO HOW WELL ARE YOU LL"
cnt = str2.find(sub_str)
print("Count = ",cnt)

if cnt<0:
print("Sorry, no matching value hence removing")
else:
print("Value found, now replacing")
for i in range(5):
print(". ",end="")
time.sleep(0.5)
print("\n")
print(str2.replace(sub_str,"OOOO"))


out_res = str2.split("LL")
print("Output Result = ",out_res)

out_str = "LL".join(out_res)
print(out_str)

print(str2.title())
print(str2.lower())
print(str2.upper())

str3 = 'hello how well are you ll'
print(str3.islower())
print(str3.isupper())

num1 = input("Enter a number: ")
if num1.isdigit():
num1 = int(num1)
else:
print("Invaid input")

ename = input("Enter your first name: ")
if ename.isalpha():
print("Your name is being saved...")
else:
print("Invaid name")

#WAP to count of vowels in a sentence
para1 = "Work, family, and endless to-do lists can make it tough to find the time to catch up. But you'll never regret taking a break to chat with your friend, Frost reminds us. Everything else will still be there later."
sum=0
for l in para1:
if l=='a' or l=='A' or l=='e' or l=='E' or l=='i' or l=='I' or l=='o' or l=='O' or l=='u' or l=='3':
sum+=1
print("Total vowesl = ",sum)
sum=0
for l in para1.lower():
if l=='a' or l=='e' or l=='i' or l=='o' or l=='u':
sum+=1
print("Total vowesl = ",sum)

sum=0
for l in para1.lower():
if l in 'aeiou':
sum+=1
print("Total vowesl = ",sum)
#Strings – indexing
str1 = “Hello”
print(“=>”, str1[0]+str1[2]+str1[4])

#methods in Python
print(str1.isdigit())

num1 = input(“Enter a number: “)
if num1.isdigit():
num1 = int(num1)
print(num1)
else:
print(“Sorry you have entered a invalid number”)

username = “abcd3456”
if username.isalnum():
print(“Valid username”)
else:
print(“Username is not valid, enter again!”)
username=“abcd”
print(“alpha: “,username.isalpha())
print(“isalnum: “,username.isalnum())
print(“isdigit: “, username.isdigit())
txt1 = “Hello how are you are you are doing”
print(“lower: “,txt1.islower()) #isupper(), istitle()
print(“title: “,txt1.title())
print(“title: “,txt1.istitle())
# islower() -> lower(), isupper() -> upper(), istitle() -> title()
print(“=> “,txt1.split())
print(“=> “,txt1.split(“o”))
txt_list1 = [‘Hello’, ‘how’, ‘are’, ‘you’, ‘doing’]
txt_list2 = [‘Hell’, ‘ h’, ‘w are y’, ‘u d’, ‘ing’]
print(“Join = “, ” “.join(txt_list1))
print(“Join = “, “o”.join(txt_list2))
print(“”,txt1.count(“are”,10,25))
print(“”,txt1.count(“hello”))
print(” “,txt1.replace(“are”,“is”))

COMPLETE THE PYTHON COURSE FROM HERE:

https://www.netacad.com/

Direct course link: https://www.netacad.com/courses/programming/pcap-programming-essentials-python

# List: linear ordered mutable collection
list1 = [5,6.1,“Hello”,True,[2,4,9]]

# mutable means you can edit the list
list1[1] = 6.9
print(“List 1 = “,list1)

# non mutables (immutable) objects cant be edited eg. String
str1 = “Hello”
#str1[1]= “Z” #TypeError: ‘str’ object does not support item assignment
print(“You can iterate through the list:”)
for i in list1:
print(i)

list2 = [3,6,9,12]
list3 = [4,8,12,16]
print(“list2 + list3: “,list2 + list3)

print(“last member = “,list1[-1])
print(“First 4 values = “,list1[:4])
print(“Number of elements in List 1 = “,len(list1))

### Methods in List ###
list2 = [3,6,9,12]
list2.append(18)
print(“List 2 after append: “,list2)
list2.append(15)
print(“List 2 after append 2: “,list2)

# add the element at a specific position
list2.insert(1,24)
print(“List 2 after insert: “,list2)

## removing
list2.pop(2) #pop takes the index value
print(“List 2 after pop: “,list2)
list2.remove(18) #remove takes the value of the element to remove
print(“List 2 after remove: “,list2)

list4 = list2
list5 = list2.copy()
print(“1. List 2 = “,list2)
print(“1. List 4 = “,list4)
print(“1. List 5 = “,list5)
list2.append(30)
list2.append(40)
print(“2. List 2 = “,list2)
print(“2. List 4 = “,list4)
print(“2. List 5 = “,list5)

print(“10 in the list = “,list2.count(10))
list2.reverse()
print(“List after reverse = “,list2)
list2.sort() #sort in the ascending order
print(“Sorted list = “,list2)

list2.sort(reverse=True) #sort in the descending order
print(“Sorted list = “,list2)

list2.clear()
print(“List after clear = “,list2)
# Tuples – linear ordered immutable collection

t1 = (4,6,7,8)
print(type(t1))
for i in t1:
print(i)
t1 = list(t1)
t1.append(10)
t1 = tuple(t1)
# Dictionary : key:value pair

dict1 = {}
print(type(dict1))
dict1 = {3:“Sachin”,“Second”:2,True:False,“Flag”:True}
# 3 ways – keys, values, items
for k in dict1.keys(): #keys
print(“Key = “,k,” and value is”,dict1[k])
for values in dict1.values():
print(“Value is”,values)

# iterate through items:
for item in dict1.items():
print(item,“: key is”,item[0],“and value is”,item[1])

# only a dictionary can be added to a dictionary
temp = {“City”:“Mumbai”}
dict1.update(temp)
print(“Dictionary 1: “,dict1)

#dictionary – mutable [list is mutable, tuple and string – immutable]
dict1[3]=“Tendulkar”
print(“Dictionary 1: “,dict1)

#copy
dict2 = dict1
dict3 = dict1.copy()
print(“1. Dict1 = “,dict1)
print(“1. Dict2 = “,dict2)
print(“1. Dict3 = “,dict3)
dict1.update({“Team”:“India”})
dict3.update({“Runs”: 12908})
dict2.update({“Wickets”: 432})

print(“2. Dict1 = “,dict1)
print(“2. Dict2 = “,dict2)
print(“2. Dict3 = “,dict3)

dict1.clear()
print(dict1)
Django Learning with Jeswanth

All videos are available here:    https://www.youtube.com/playlist?list=PLF9fJE4vTtTLnXYzr_19r1NVRWuNV892t

Access all the videos here

BASIC INFORMATION

For Basic information like installation, software to use, history, refer this link:

#Server DB Server(Modal – SQL) Controller (Python) View (HTML, Javascript)

print(‘Hello There’);print(“How are you?”)

print(5+3);
print ( “5+3=”,5+3,“and 6*2 =”, 6*2 )
# y = x+5, find the value of y when x is 4
# 5 is constant – value will remain 5
# x,y – their value can change – variables
# x independent variable, we can put any value
# y is dependent variable (on x), it can take multiple values but that depends on x

#calculate area and perimeter of a rectangle
l=50
b=85
#input values
# input – process (formula) – output (display the result)
area = l * b
perimeter = 2*(l+b)
print(“Rectangle with length”,l,“and breadth”,b,“has an area of”,area,“and perimeter of”,perimeter)

# f-string – formatting the string
print(f”Rectangle with length {l} and breadth {b} has an area of {area} and perimeter of {perimeter})

#calculate area and circumference of a circle
#independent variable here is radius
r = 13
area = 3.14 * r * r #pi is constant value 3.14
c = 2*3.14*r
#area and c are dependent variables
print(“Circle with radius”,r,“will have area of”,area,“and circumference of”,c)
print(f”Circle with radius {r} will have area of {area:.1f} and circumference of {c:.4f})

name,team,pos=“Virat”,“India”,“Captain”
print(f”Player’s name is {name:<10} from the team {team:^10} and represents as {pos:>15} at the event.”)
name,team,pos=“Manbgwabe”,“Zimbabwe”,“Wicket-Keeper”
print(f”Player’s name is {name:<10} from the team {team:^10} and represents as {pos:>15} at the event.”)

#numeric data types – int, float n complex
var1 = 50 #int – integer, numbers without decimal point
print(type(var1)) #type() – will give the type of the data that is stored
var1 = 50.0 #float
print(type(var1))
var1 = 5j # complex
# square root of -1 : i (in python we say j)
print(type(var1))
print(5j * 5j)
#text type – string (str)
”’ This is a
multi line
comment in python”’
var1 =‘5’
print(type(var1)) #<class ‘str’>
var1 = True #bool – True or False
print(type(var1))
# 5 types: int, float, bool, str,complex
#arithematic
val1, val2 = 20,3
print(val1 + val2) #adding
print(val1 – val2) #subtracting
print(val1 * val2) #multiplying
print(val1 / val2) #dividing – gives a float value
print(val1 // val2) # integer division – gives integer value
print(val1 ** val2) #power
print(val1 % val2) #mod – remainder

# 20/3 = 6 2/3
#conditional / relational operators: > < >= <= == !=
# output: True or False
val1, val2,val3 = 20,3,20
print(val1 > val2) #True
print(val1 >= val3) #T
print(val1 > val3) # F
print(val1 < val3) #
print(val1 <= val3) #T
print(val1 == val3)
print(val1 != val3)

# logical operator: and or not
#input is boolean and output is also boolean
print(False and False)
print(False and True)
print(True and False)
print(True and True)
print(“===============”)

print(False or False)
print(False or True)
print(True or False)
print(True or True)
print(not True)
print(not False)
print(“========”)
val1, val2,val3 = 20,3,20
print(val1 >= val3 and val1 > val3 or val1 < val3 and val1 <= val3 or val1 == val3)
# T
num= 0
if num>0:
print(“Positive”)
elif num<0:
print(“Negative”)
else:
pass

print(“Thank you”)

####
avg =75
#avg>=85: Grade A, 70-85: Grade B, 60 to 70: Grade C, 50-60: D, <50: E
# if avg>=50: You have passed!
if avg>=50:
print(“You’ve passed!”)
if avg >= 70:
print(“Grade A”)
if avg>=90:
print(“You win special award”)
elif avg >= 85:
print(“Grade B”)
elif avg >= 60:
print(“Grade C”)
else:
print(“Grade D”)
else:
print(“You’ve not passed!”)
print(“Grade E”)
#LOOPS – repeating a block of code
## FOR Loop: used when you know how many times to execute a code
## WHILE Loop: beforehand you are not sure how many times but repeat based on some condition
# for: when you know how many times you need to repeat something
# while:
# range(=start, < end, =increment) range(3,9,2): 3,5,7
for counter in range(3,9,2):
print(“HELLO, The counter value is”,counter)
##
for i in range(1,101): #when increment is not given, default value is 1
print(i)


#Generate even numbers from 2 to 100 (both inclusive)
n=12
for i in range(n): #when start number is not given, default value is zero
print(“* “,end=“”)
print()

”’
* * * * * * *
* * * * * * *
* * * * * * *
* * * * * * *
* * * * * * *
* * * * * * *
* * * * * * *
”’
for j in range(n):
for i in range(n):
print(“* “,end=“”)
print()

”’
*
* *
* * *
* * * *
”’
for j in range(n):
for i in range(j+1):
print(“* “,end=“”)
print()

”’
* * * *
* * *
* *
*
”’
for j in range(n):
for i in range(n-j):
print(“* “,end=“”)
print()

”’
* * * *
*
*
* * * *
”’
for j in range(n):
if j==0 or j==n-1:
for i in range(n):
print(“* “,end=“”)
print()
else:
for k in range(n-j-1):
print(” “,end=” “)
print(“*”)


#while
cont = “y”
while cont==“y”:
print(“Hello from While”)
cont = input(“Do you want to continue: hit y for yes anyother key for no:”)