PROGRAM 1
In [3]:
import csv
with open('enjoysport.csv', 'r') as file:
data = [row for row in csv.reader(file)] #list comprehension
print("The total number of training instances are:",len(data)-1,'\n',data[1:])
num_attribute = len(data[0])-1
# Initial hypothesis
hypothesis = ['0']*num_attribute
for i in range(0, len(data)):
if data[i][num_attribute] == 'yes':
for j in range(0, num_attribute):
if hypothesis[j] == '0' or hypothesis[j] == data[i][j]:
hypothesis[j] = data[i][j]
else:
hypothesis[j] = '?'
print("\nThe hypothesis for the training instance {} is : \n".format(i),hypothesis)
print("\nThe Maximally specific hypothesis for the training instances is ", hypothesis)
The total number of training instances are: 4 [['sunny', 'warm', 'normal', 'strong', 'warm', 'same', 'yes'], ['sunny', 'warm', 'high', 'strong', 'warm', 'same', 'yes'], ['rainy', 'cold', 'high', 'strong', 'warm', 'change', 'no'], ['sunny', 'warm', 'high', 'strong', 'cool', 'change', 'yes']] The hypothesis for the training instance 0 is : ['0', '0', '0', '0', '0', '0'] The hypothesis for the training instance 1 is : ['sunny', 'warm', 'normal', 'strong', 'warm', 'same'] The hypothesis for the training instance 2 is : ['sunny', 'warm', '?', 'strong', 'warm', 'same'] The hypothesis for the training instance 3 is : ['sunny', 'warm', '?', 'strong', 'warm', 'same'] The hypothesis for the training instance 4 is : ['sunny', 'warm', '?', 'strong', '?', '?'] The Maximally specific hypothesis for the training instances is ['sunny', 'warm', '?', 'strong', '?', '?']
In [ ]:
PROGRAM 2
In [4]:
import pandas as pd
data = pd.read_csv('enjoysport.csv')
concepts = data.iloc[:, :-1].values
target = data.iloc[:, -1].values
n=len(concepts[0])-1
specific_h = ['0'] * n
general_h = ['?'] * n
print("The initialization of the specific and general hypothesis ")
print(" S0:",specific_h,"\n G0:",general_h)
def learn(concepts, target):
specific_h = concepts[0].copy()
general_h = [["?" for _ in range(len(specific_h))] for _ in range(len(specific_h))]
for i, h in enumerate(concepts):
if target[i] == "yes":
print(f"\n the {i+1} training instance is Positive \n",concepts[i])
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
else:
print(f"\nThe {i+1} training instance is Negative \n",concepts[i])
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
print(f"S{i+1}:\n", specific_h)
print(f"G{i+1}:\n", general_h)
general_h = [h for h in general_h if h != ['?' for _ in range(len(specific_h))]]
return specific_h, general_h
s_final, g_final = learn(concepts, target)
print("\nThe Final Specific Hypothesis:")
print(s_final)
print("\nThe Final General Hypothesis:")
print(g_final)
The initialization of the specific and general hypothesis S0: ['0', '0', '0', '0', '0'] G0: ['?', '?', '?', '?', '?'] the 1 training instance is Positive ['sunny' 'warm' 'normal' 'strong' 'warm' 'same'] S1: ['sunny' 'warm' 'normal' 'strong' 'warm' 'same'] G1: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']] the 2 training instance is Positive ['sunny' 'warm' 'high' 'strong' 'warm' 'same'] S2: ['sunny' 'warm' '?' 'strong' 'warm' 'same'] G2: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']] The 3 training instance is Negative ['rainy' 'cold' 'high' 'strong' 'warm' 'change'] S3: ['sunny' 'warm' '?' 'strong' 'warm' 'same'] G3: [['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', 'same']] the 4 training instance is Positive ['sunny' 'warm' 'high' 'strong' 'cool' 'change'] S4: ['sunny' 'warm' '?' 'strong' '?' '?'] G4: [['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']] The Final Specific Hypothesis: ['sunny' 'warm' '?' 'strong' '?' '?'] The Final General Hypothesis: [['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]
In [ ]:
PROGRAM 3
In [7]:
import math
import csv
from collections import Counter
def load_csv(filename):
with open(filename, 'r') as file:
reader = csv.reader(file)
headers = next(reader)
dataset = list(reader)
return dataset, headers
class Node:
def __init__(self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""
def subtables(data, col, delete=False):
dic = {}
coldata = [row[col] for row in data]
attr = list(set(coldata))
for val in attr:
dic[val] = []
for row in data:
key = row[col]
if delete:
row = row[:col] + row[col+1:]
dic[key].append(row)
return attr, dic
def entropy(data):
total = len(data)
if total == 0:
return 0
counts = Counter(data)
entropy = sum(
(-count / total) * math.log(count / total, 2)
for count in counts.values()
)
return entropy
def compute_gain(data, col):
total_entropy = entropy([row[-1] for row in data])
attr, dic = subtables(data, col, delete=False)
total_size = len(data)
weighted_entropy = sum(
(len(subset) / total_size) * entropy([row[-1] for row in subset])
for subset in dic.values()
)
gain = total_entropy - weighted_entropy
return gain
def build_tree(data, features):
lastcol = [row[-1] for row in data]
if len(set(lastcol)) == 1:
node = Node("")
node.answer = lastcol[0]
return node
gains = [compute_gain(data, col) for col in range(len(data[0]) - 1)]
split = gains.index(max(gains))
node = Node(features[split])
fea = features[:split] + features[split+1:]
attr, dic = subtables(data, split, delete=True)
for key, subset in dic.items():
child = build_tree(subset, fea)
node.children.append((key, child))
return node
def print_tree(node, level=0):
if node.answer:
print(" " * level, "Answer:", node.answer)
else:
print(" " * level, "Attribute:", node.attribute)
for value, child in node.children:
print(" " * (level + 1), "Value:", value)
print_tree(child, level + 2)
def classify(node, x_test, features):
if node.answer:
return node.answer
else:
pos = features.index(node.attribute)
for value, child in node.children:
if x_test[pos] == value:
return classify(child, x_test, features)
return None
# Main program
dataset, features = load_csv("id3.csv")
node1 = build_tree(dataset, features)
print("The decision tree for the dataset using ID3 algorithm is:")
print_tree(node1)
testdata, test_features = load_csv("id3_test_1.csv")
for xtest in testdata:
print("\nThe test instance:", xtest)
print("The label for test instance:", classify(node1, xtest, features))
The decision tree for the dataset using ID3 algorithm is: Attribute: Outlook Value: sunny Attribute: Humidity Value: high Answer: no Value: normal Answer: yes Value: overcast Answer: yes Value: rain Attribute: Wind Value: weak Answer: yes Value: strong Answer: no The test instance: ['sunny', 'mild', 'high', 'weak'] The label for test instance: no The test instance: ['sunny', 'hot', 'high', 'strong'] The label for test instance: no The test instance: ['overcast', 'hot', 'low', 'strong'] The label for test instance: yes The test instance: ['rain', 'mild', 'high', 'weak'] The label for test instance: yes
In [ ]:
PROGRAM 4
In [8]:
import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float) # two inputs [sleep,study]
y = np.array(([92], [86], [89]), dtype=float) # one output [Expected % in Exams]
X = X/np.amax(X,axis=0) # maximum of X array longitudinally
y = y/100
#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))
#Derivative of Sigmoid Function
def derivatives_sigmoid(x):
return x * (1 - x)
#Variable initialization
epoch=5000 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer
#weight and bias initialization
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons)) #weight of the link from input node to hidden node
bh=np.random.uniform(size=(1,hiddenlayer_neurons)) # bias of the link from input node to hidden node
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons)) #weight of the link from hidden node to output node
bout=np.random.uniform(size=(1,output_neurons)) #bias of the link from hidden node to output node
#draws a random range of numbers uniformly of dim x*y
for i in range(epoch):
#Forward Propogation
hinp1=np.dot(X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp= outinp1+ bout
output = sigmoid(outinp)
#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)
#how much hidden layer weights contributed to error
hiddengrad = derivatives_sigmoid(hlayer_act)
d_hiddenlayer = EH * hiddengrad
# dotproduct of nextlayererror and currentlayerop
wout += hlayer_act.T.dot(d_output) *lr
wh += X.T.dot(d_hiddenlayer) *lr
print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)
Input: [[0.66666667 1. ] [0.33333333 0.55555556] [1. 0.66666667]] Actual Output: [[0.92] [0.86] [0.89]] Predicted Output: [[0.89240533] [0.88403953] [0.89373022]]
In [ ]:
PROGRAM 5
In [ ]:
In [25]:
import csv
import random
import math
def loadCsv(filename):
with open(filename, "r") as file:
lines = csv.reader(file)
dataset = list(lines)
# Skip the header row
dataset = dataset[1:]
for i in range(len(dataset)):
# converting strings into numbers for processing
dataset[i] = [float(x) for x in dataset[i]]
return dataset
def splitDataset(dataset, splitRatio):
# 67% training size
trainSize = int(len(dataset) * splitRatio)
trainSet = []
copy = list(dataset)
while len(trainSet) < trainSize:
# generate indices for the dataset list randomly to pick elements for training data
index = random.randrange(len(copy))
trainSet.append(copy.pop(index))
return [trainSet, copy]
def separateByClass(dataset):
separated = {}
# creates a dictionary of classes 1 and 0 where the values are the instances belonging to each class
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated
def mean(numbers):
return sum(numbers) / float(len(numbers))
def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x-avg, 2) for x in numbers]) / float(len(numbers) - 1)
return math.sqrt(variance)
def summarize(dataset):
summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]
del summaries[-1]
return summaries
def summarizeByClass(dataset):
separated = separateByClass(dataset)
summaries = {}
for classValue, instances in separated.items():
# summaries is a dictionary of tuples(mean, std) for each class value
summaries[classValue] = summarize(instances)
return summaries
def calculateProbability(x, mean, stdev):
exponent = math.exp(-(math.pow(x-mean, 2) / (2 * math.pow(stdev, 2))))
return (1 / (math.sqrt(2 * math.pi) * stdev)) * exponent
def calculateClassProbabilities(summaries, inputVector):
probabilities = {}
for classValue, classSummaries in summaries.items(): # class and attribute information as mean and sd
probabilities[classValue] = 1
for i in range(len(classSummaries)):
mean, stdev = classSummaries[i] # take mean and sd of every attribute for class 0 and 1 separately
x = inputVector[i] # test vector's first attribute
probabilities[classValue] *= calculateProbability(x, mean, stdev) # use normal distribution
return probabilities
def predict(summaries, inputVector):
probabilities = calculateClassProbabilities(summaries, inputVector)
bestLabel, bestProb = None, -1
for classValue, probability in probabilities.items(): # assigns that class which has the highest probability
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classValue
return bestLabel
def getPredictions(summaries, testSet):
predictions = []
for i in range(len(testSet)):
result = predict(summaries, testSet[i])
predictions.append(result)
return predictions
def getAccuracy(testSet, predictions):
correct = 0
for i in range(len(testSet)):
if testSet[i][-1] == predictions[i]:
correct += 1
return (correct / float(len(testSet))) * 100.0
def main():
filename = 'pima_indian.csv'
splitRatio = 0.67
dataset = loadCsv(filename)
trainingSet, testSet = splitDataset(dataset, splitRatio)
print('Split {0} rows into train={1} and test={2} rows'.format(len(dataset), len(trainingSet), len(testSet)))
# prepare model
summaries = summarizeByClass(trainingSet)
# test model
predictions = getPredictions(summaries, testSet)
accuracy = getAccuracy(testSet, predictions)
print('Accuracy of the classifier is : {0}%'.format(accuracy))
main()
Split 768 rows into train=514 and test=254 rows Accuracy of the classifier is : 77.55905511811024%
In [ ]:
PROGRAM 6
In [8]:
import numpy as np
import pandas as pd
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianNetwork
from pgmpy.inference import VariableElimination
heartDisease = pd.read_csv("heart.csv")
heartDisease = heartDisease.replace('?',np.nan)
print('Sample instances from the dataset are given below')
print(heartDisease.head())
print('\n Attributes and datatypes')
print(heartDisease.dtypes)
model= BayesianNetwork([('age','heartdisease'),('sex','heartdisease'),('exang','heartdisease'),('cp','heartdisease'),('heartdisease','restecg'),('heartdisease','chol')])
print('\nLearning CPD using Maximum likelihood estimators')
model.fit(heartDisease,estimator=MaximumLikelihoodEstimator)
print('\n Inferencing with Bayesian Network:')
HeartDiseasetest_infer = VariableElimination(model)
print('\n 1. Probability of HeartDisease given evidence= restecg')
q1=HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'restecg':1})
print(q1)
print('\n 2. Probability of HeartDisease given evidence= cp ')
q2=HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'cp':2})
print(q2)
Sample instances from the dataset are given below age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \ 0 63 1 1 145 233 1 2 150 0 2.3 3 1 67 1 4 160 286 0 2 108 1 1.5 2 2 67 1 4 120 229 0 2 129 1 2.6 2 3 37 1 3 130 250 0 0 187 0 3.5 3 4 41 0 2 130 204 0 2 172 0 1.4 1 ca thal heartdisease 0 0 6 0 1 3 3 2 2 2 7 1 3 0 3 0 4 0 3 0 Attributes and datatypes age int64 sex int64 cp int64 trestbps int64 chol int64 fbs int64 restecg int64 thalach int64 exang int64 oldpeak float64 slope int64 ca object thal object heartdisease int64 dtype: object Learning CPD using Maximum likelihood estimators Inferencing with Bayesian Network: 1. Probability of HeartDisease given evidence= restecg +-----------------+---------------------+ | heartdisease | phi(heartdisease) | +=================+=====================+ | heartdisease(0) | 0.1016 | +-----------------+---------------------+ | heartdisease(1) | 0.0000 | +-----------------+---------------------+ | heartdisease(2) | 0.2361 | +-----------------+---------------------+ | heartdisease(3) | 0.2017 | +-----------------+---------------------+ | heartdisease(4) | 0.4605 | +-----------------+---------------------+ 2. Probability of HeartDisease given evidence= cp +-----------------+---------------------+ | heartdisease | phi(heartdisease) | +=================+=====================+ | heartdisease(0) | 0.3742 | +-----------------+---------------------+ | heartdisease(1) | 0.2018 | +-----------------+---------------------+ | heartdisease(2) | 0.1375 | +-----------------+---------------------+ | heartdisease(3) | 0.1541 | +-----------------+---------------------+ | heartdisease(4) | 0.1323 | +-----------------+---------------------+
In [ ]:
PROGRAM 7
In [9]:
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
import sklearn.metrics as sm
import pandas as pd
import numpy as np
# Load the Iris dataset
iris = datasets.load_iris()
X = pd.DataFrame(iris.data)
X.columns = ['Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width']
y = pd.DataFrame(iris.target)
y.columns = ['Targets']
# KMeans Model
model = KMeans(n_clusters=3)
model.fit(X)
# Plot the results
plt.figure(figsize=(14, 7))
colormap = np.array(['red', 'lime', 'black'])
# Plot the Original Classifications
plt.subplot(1, 2, 1)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y.Targets], s=40)
plt.title('Real Classification')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
# Plot the Model's Classifications
plt.subplot(1, 2, 2)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[model.labels_], s=40)
plt.title('K Mean Classification')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
print('The accuracy score of K-Mean: ', sm.accuracy_score(y, model.labels_))
print('The Confusion matrix of K-Mean: ', sm.confusion_matrix(y, model.labels_))
# Standardize the features
from sklearn import preprocessing
scaler = preprocessing.StandardScaler()
scaler.fit(X)
xsa = scaler.transform(X)
xs = pd.DataFrame(xsa, columns=X.columns)
# Gaussian Mixture Model
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(n_components=3)
gmm.fit(xs)
y_gmm = gmm.predict(xs)
# Plot the GMM results
plt.subplot(2, 2, 3)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y_gmm], s=40)
plt.title('GMM Classification')
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
print('The accuracy score of EM: ', sm.accuracy_score(y, y_gmm))
print('The Confusion matrix of EM: ', sm.confusion_matrix(y, y_gmm))
C:\Users\purus\miniconda3\envs\megha\Lib\site-packages\threadpoolctl.py:1214: RuntimeWarning: Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at the same time. Both libraries are known to be incompatible and this can cause random crashes or deadlocks on Linux when loaded in the same Python program. Using threadpoolctl may cause crashes or deadlocks. For more information and possible workarounds, please see https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md warnings.warn(msg, RuntimeWarning) C:\Users\purus\miniconda3\envs\megha\Lib\site-packages\sklearn\cluster\_kmeans.py:1429: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1. warnings.warn(
The accuracy score of K-Mean: 0.44666666666666666 The Confusion matrix of K-Mean: [[50 0 0] [ 0 3 47] [ 0 36 14]]
C:\Users\purus\miniconda3\envs\megha\Lib\site-packages\sklearn\cluster\_kmeans.py:1429: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1. warnings.warn(
The accuracy score of EM: 0.3333333333333333 The Confusion matrix of EM: [[ 0 50 0] [45 0 5] [ 0 0 50]]
In [ ]:
PROGRAM 8
In [6]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import datasets
# Iris Plants Dataset
"""
Iris Plants Dataset, dataset contains 150 (50 in each of three classes)
Number of Attributes: 4 numeric, predictive attributes and the Class
"""
iris = datasets.load_iris()
# The x variable contains the first four columns of the dataset (i.e. attributes) while y contains the labels.
x = iris.data
y = iris.target
print('sepal-length', 'sepal-width', 'petal-length', 'petal-width')
print(x)
print('class: 0-Iris-Setosa, 1- Iris-Versicolour, 2- Iris-Virginica')
print(y)
# Split the dataset into 70% train data and 30% test data.
# This means that out of total 150 records, the training set will contain 105 records and the test set contains 45 of those records.
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
# Train the model using Nearest Neighbors with K=5
classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit(x_train, y_train)
y_pred = classifier.predict(x_test)
# Print Confusion Matrix and Accuracy Metrics
print('Confusion Matrix')
print(confusion_matrix(y_test, y_pred))
print('Accuracy Metrics')
print(classification_report(y_test, y_pred))
sepal-length sepal-width petal-length petal-width [[5.1 3.5 1.4 0.2] [4.9 3. 1.4 0.2] [4.7 3.2 1.3 0.2] [4.6 3.1 1.5 0.2] [5. 3.6 1.4 0.2] [5.4 3.9 1.7 0.4] [4.6 3.4 1.4 0.3] [5. 3.4 1.5 0.2] [4.4 2.9 1.4 0.2] [4.9 3.1 1.5 0.1] [5.4 3.7 1.5 0.2] [4.8 3.4 1.6 0.2] [4.8 3. 1.4 0.1] [4.3 3. 1.1 0.1] [5.8 4. 1.2 0.2] [5.7 4.4 1.5 0.4] [5.4 3.9 1.3 0.4] [5.1 3.5 1.4 0.3] [5.7 3.8 1.7 0.3] [5.1 3.8 1.5 0.3] [5.4 3.4 1.7 0.2] [5.1 3.7 1.5 0.4] [4.6 3.6 1. 0.2] [5.1 3.3 1.7 0.5] [4.8 3.4 1.9 0.2] [5. 3. 1.6 0.2] [5. 3.4 1.6 0.4] [5.2 3.5 1.5 0.2] [5.2 3.4 1.4 0.2] [4.7 3.2 1.6 0.2] [4.8 3.1 1.6 0.2] [5.4 3.4 1.5 0.4] [5.2 4.1 1.5 0.1] [5.5 4.2 1.4 0.2] [4.9 3.1 1.5 0.2] [5. 3.2 1.2 0.2] [5.5 3.5 1.3 0.2] [4.9 3.6 1.4 0.1] [4.4 3. 1.3 0.2] [5.1 3.4 1.5 0.2] [5. 3.5 1.3 0.3] [4.5 2.3 1.3 0.3] [4.4 3.2 1.3 0.2] [5. 3.5 1.6 0.6] [5.1 3.8 1.9 0.4] [4.8 3. 1.4 0.3] [5.1 3.8 1.6 0.2] [4.6 3.2 1.4 0.2] [5.3 3.7 1.5 0.2] [5. 3.3 1.4 0.2] [7. 3.2 4.7 1.4] [6.4 3.2 4.5 1.5] [6.9 3.1 4.9 1.5] [5.5 2.3 4. 1.3] [6.5 2.8 4.6 1.5] [5.7 2.8 4.5 1.3] [6.3 3.3 4.7 1.6] [4.9 2.4 3.3 1. ] [6.6 2.9 4.6 1.3] [5.2 2.7 3.9 1.4] [5. 2. 3.5 1. ] [5.9 3. 4.2 1.5] [6. 2.2 4. 1. ] [6.1 2.9 4.7 1.4] [5.6 2.9 3.6 1.3] [6.7 3.1 4.4 1.4] [5.6 3. 4.5 1.5] [5.8 2.7 4.1 1. ] [6.2 2.2 4.5 1.5] [5.6 2.5 3.9 1.1] [5.9 3.2 4.8 1.8] [6.1 2.8 4. 1.3] [6.3 2.5 4.9 1.5] [6.1 2.8 4.7 1.2] [6.4 2.9 4.3 1.3] [6.6 3. 4.4 1.4] [6.8 2.8 4.8 1.4] [6.7 3. 5. 1.7] [6. 2.9 4.5 1.5] [5.7 2.6 3.5 1. ] [5.5 2.4 3.8 1.1] [5.5 2.4 3.7 1. ] [5.8 2.7 3.9 1.2] [6. 2.7 5.1 1.6] [5.4 3. 4.5 1.5] [6. 3.4 4.5 1.6] [6.7 3.1 4.7 1.5] [6.3 2.3 4.4 1.3] [5.6 3. 4.1 1.3] [5.5 2.5 4. 1.3] [5.5 2.6 4.4 1.2] [6.1 3. 4.6 1.4] [5.8 2.6 4. 1.2] [5. 2.3 3.3 1. ] [5.6 2.7 4.2 1.3] [5.7 3. 4.2 1.2] [5.7 2.9 4.2 1.3] [6.2 2.9 4.3 1.3] [5.1 2.5 3. 1.1] [5.7 2.8 4.1 1.3] [6.3 3.3 6. 2.5] [5.8 2.7 5.1 1.9] [7.1 3. 5.9 2.1] [6.3 2.9 5.6 1.8] [6.5 3. 5.8 2.2] [7.6 3. 6.6 2.1] [4.9 2.5 4.5 1.7] [7.3 2.9 6.3 1.8] [6.7 2.5 5.8 1.8] [7.2 3.6 6.1 2.5] [6.5 3.2 5.1 2. ] [6.4 2.7 5.3 1.9] [6.8 3. 5.5 2.1] [5.7 2.5 5. 2. ] [5.8 2.8 5.1 2.4] [6.4 3.2 5.3 2.3] [6.5 3. 5.5 1.8] [7.7 3.8 6.7 2.2] [7.7 2.6 6.9 2.3] [6. 2.2 5. 1.5] [6.9 3.2 5.7 2.3] [5.6 2.8 4.9 2. ] [7.7 2.8 6.7 2. ] [6.3 2.7 4.9 1.8] [6.7 3.3 5.7 2.1] [7.2 3.2 6. 1.8] [6.2 2.8 4.8 1.8] [6.1 3. 4.9 1.8] [6.4 2.8 5.6 2.1] [7.2 3. 5.8 1.6] [7.4 2.8 6.1 1.9] [7.9 3.8 6.4 2. ] [6.4 2.8 5.6 2.2] [6.3 2.8 5.1 1.5] [6.1 2.6 5.6 1.4] [7.7 3. 6.1 2.3] [6.3 3.4 5.6 2.4] [6.4 3.1 5.5 1.8] [6. 3. 4.8 1.8] [6.9 3.1 5.4 2.1] [6.7 3.1 5.6 2.4] [6.9 3.1 5.1 2.3] [5.8 2.7 5.1 1.9] [6.8 3.2 5.9 2.3] [6.7 3.3 5.7 2.5] [6.7 3. 5.2 2.3] [6.3 2.5 5. 1.9] [6.5 3. 5.2 2. ] [6.2 3.4 5.4 2.3] [5.9 3. 5.1 1.8]] class: 0-Iris-Setosa, 1- Iris-Versicolour, 2- Iris-Virginica [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] Confusion Matrix [[17 0 0] [ 0 9 1] [ 0 2 16]] Accuracy Metrics precision recall f1-score support 0 1.00 1.00 1.00 17 1 0.82 0.90 0.86 10 2 0.94 0.89 0.91 18 accuracy 0.93 45 macro avg 0.92 0.93 0.92 45 weighted avg 0.94 0.93 0.93 45
In [ ]:
PROGRAM 9
In [4]:
import numpy as np
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import gridplot
from bokeh.io import push_notebook
def local_regression(x0, X, Y, tau):
# Add bias term
x0 = np.r_[1, x0]
# Add one to avoid the loss in information
X = np.c_[np.ones(len(X)), X]
# fit model: normal equations with kernel
xw = X.T * radial_kernel(x0, X, tau) # XTranspose * W
beta = np.linalg.pinv(xw @ X) @ xw @ Y
# @ Matrix Multiplication or Dot Product
# predict value
return x0 @ beta
# @ Matrix Multiplication or Dot Product for prediction
def radial_kernel(x0, X, tau):
return np.exp(np.sum((X - x0) ** 2, axis=1) / (-2 * tau * tau))
# Weight or Radial Kernal Bias Function
n = 1000
# generate dataset
X = np.linspace(-3, 3, num=n)
print("The Data Set (10 Samples) X :\n", X[1:10])
Y = np.log(np.abs(X ** 2 - 1) + .5)
print("The Fitting Curve Data Set (10 Samples) Y :\n", Y[1:10])
# jitter X
X += np.random.normal(scale=.1, size=n)
print("Normalised (10 Samples) X :\n", X[1:10])
domain = np.linspace(-3, 3, num=300)
print("Xo Domain Space(10 Samples) :\n", domain[1:10])
def plot_lwr(tau):
# prediction through regression
prediction = [local_regression(x0, X, Y, tau) for x0 in domain]
plot = figure(width=400, height=400)
plot.title.text = 'tau=%g' % tau
plot.scatter(X, Y, alpha=.3)
plot.line(domain, prediction, line_width=2, color='red')
return plot
show(gridplot([[plot_lwr(10.), plot_lwr(1.)], [plot_lwr(0.1), plot_lwr(0.01)]]))
The Data Set (10 Samples) X : [-2.99399399 -2.98798799 -2.98198198 -2.97597598 -2.96996997 -2.96396396 -2.95795796 -2.95195195 -2.94594595] The Fitting Curve Data Set (10 Samples) Y : [2.13582188 2.13156806 2.12730467 2.12303166 2.11874898 2.11445659 2.11015444 2.10584249 2.10152068] Normalised (10 Samples) X : [-2.92548256 -3.06788731 -2.91643284 -2.78992995 -2.95614917 -2.84968539 -2.95866768 -2.90056358 -2.85405618] Xo Domain Space(10 Samples) : [-2.97993311 -2.95986622 -2.93979933 -2.91973244 -2.89966555 -2.87959866 -2.85953177 -2.83946488 -2.81939799]
In [ ]:
PROGRAM 10
In [5]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score
import seaborn as sns
# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data[:, :2] # Use only the first two features for visualization purposes
y = iris.target
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Train the SVM model
svm_model = SVC(kernel='linear', C=1.0, random_state=42)
svm_model.fit(X_train, y_train)
# Make predictions
y_pred = svm_model.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
print(f'Accuracy: {accuracy:.2f}')
print(f'Confusion Matrix:\n{conf_matrix}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
# Plot the decision boundaries
def plot_decision_boundaries(X, y, model, ax):
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
ax.contourf(xx, yy, Z, alpha=0.3)
ax.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', marker='o')
ax.set_xlabel('Feature 1')
ax.set_ylabel('Feature 2')
ax.set_title('SVM Decision Boundary')
fig, ax = plt.subplots()
plot_decision_boundaries(X_train, y_train, svm_model, ax)
plt.show()
# Plot confusion matrix
#plot_confusion_matrix(svm_model, X_test, y_test)
#plt.show()
def plot_conf_matrix(conf_matrix, ax):
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax)
ax.set_xlabel('Predicted labels')
ax.set_ylabel('True labels')
ax.set_title('Confusion Matrix')
fig, ax = plt.subplots()
plot_conf_matrix(conf_matrix, ax)
plt.show()
Accuracy: 0.73 Confusion Matrix: [[19 0 0] [ 0 7 6] [ 0 6 7]] Precision: 0.73 Recall: 0.73
In [ ]:
In [ ]: