Skip to content
| Marketplace
Sign in
Visual Studio Code>Other>hello-world-extensionNew to Visual Studio Code? Get it now.
hello-world-extension

hello-world-extension

Goku

|
1 install
| (0) | Free
A simple hello world extension
Installation
Launch VS Code Quick Open (Ctrl+P), paste the following command, and press enter.
Copied to clipboard
More Info

RETRO21

This is a simple VS Code extension created for learning purposes.

Practical 1: Water Jug Problem (Prolog)

% Initial state
start((0, 0)).

% Goal state
goal((2, 0)).

% Move rules
move((X, Y), (5, Y)) :- X < 5.         % Fill 5L jug
move((X, Y), (X, 4)) :- Y < 4.         % Fill 4L jug
move((X, Y), (0, Y)) :- X > 0.         % Empty 5L jug
move((X, Y), (X, 0)) :- Y > 0.         % Empty 4L jug

% Pour from 5L to 4L
move((X, Y), (NX, NY)) :-
    X > 0, Y < 4,
    T is min(X, 4 - Y),
    NX is X - T,
    NY is Y + T.

% Pour from 4L to 5L
move((X, Y), (NX, NY)) :-
    Y > 0, X < 5,
    T is min(Y, 5 - X),
    NY is Y - T,
    NX is X + T.

Practical 2: Tic-Tac-Toe Game (Prolog)

display_board(Board) :-
    nl,
    display_row(Board,1),
    display_row(Board,2),
    display_row(Board,3),
    nl.

display_row(Board,Row) :-
    write(''), display_cell(Board,Row,1),
    write(''), display_cell(Board,Row,2),
    write(''), display_cell(Board,Row,3),
    nl,
    (Row =< 3, write('--|--|--'), nl ; true).

display_cell(Board,Row,Col) :-
    member(cell(Row, Col,Player), Board),
    write(Player), !.
display_cell(_,_,_) :-
    write('').

win(Player,Board,Row,Col) :-
    ( member(cell(Row,1,Player),Board),
      member(cell(Row,2,Player),Board),
      member(cell(Row,3,Player),Board)
    ; member(cell(1,Col,Player),Board),
      member(cell(2,Col,Player),Board),
      member(cell(3,Col,Player),Board)
    ; member(cell(1,1,Player),Board),
      member(cell(2,2,Player),Board),
      member(cell(3,3,Player),Board)
    ; member(cell(1,3,Player),Board),
      member(cell(2,2,Player),Board),
      member(cell(3,1,Player),Board)
    ).

game_over(Board,Row,Col) :-
    ( win('X',Board,Row,Col)
    ; win('O',Board,Row,Col)
    ; length(Board,9)
    ).

make_move(Player,Row,Col,Board,NewBoard) :-
    \+ member(cell(Row,Col,_), Board),
    append(Board, [cell(Row,Col,Player)], NewBoard).

play :-
    play('X', []).

play(Player, Board) :-
    display_board(Board),
    ( game_over(Board,Row,Col) ->
        ( win('X',Board,Row,Col) -> write('X wins!\n')
        ; win('O',Board,Row,Col) -> write('O wins!\n')
        ; write('Its a draw!\n')
        )
    ; ( Player = 'X' -> write('Player X\'s turn\n') ; write('Player O\'s turn\n') ),
      write('Enter your move (row and column): '),
      read(Row), read(Col),
      ( (Row >= 1, Row =< 3, Col >= 1, Col =< 3) ->
          ( make_move(Player, Row, Col, Board, NewBoard) ->
              switch_player(Player, NextPlayer),
              play(NextPlayer, NewBoard)
          ; write('Invalid move. Try again\n'),
            play(Player, Board)
          )
      ; write('Invalid input. Row and Column must be between 1 and 3.\n'),
        play(Player, Board)
      )
    ).

switch_player('X','O').
switch_player('O','X').

Practical 3: Implementation of 8-puzzle problem using hill climbing (Prolog)

% Start and goal states
start(1/2/3/4/8/0/7/6/5).
goal(1/2/3/4/5/6/7/8/0).

% Move definitions
move(1/2/3/4/8/0/7/6/5, down, 1/2/3/4/8/5/7/6/0, 1).
move(1/2/3/4/8/5/7/6/0, left, 1/2/3/4/8/5/7/0/6, 1).
move(1/2/3/4/8/5/7/0/6, up, 1/2/3/4/0/5/7/8/6, 1).
move(1/2/3/4/0/5/7/8/6, right, 1/2/3/4/5/0/7/8/6, 1).
move(1/2/3/4/5/0/7/8/6, down, 1/2/3/4/5/6/7/8/0, 1).

% Perform sequence of moves and track cost
solve :-
    start(S0),
    move(S0, M1, S1, C1),
    write('Move: '), write(M1), write(' -> '), write(S1), write(', Cost: '), write(C1), nl,
    move(S1, M2, S2, C2),
    C12 is C1 + C2,
    write('Move: '), write(M2), write(' -> '), write(S2), write(', Cost: '), write(C12), nl,
    move(S2, M3, S3, C3),
    C123 is C12 + C3,
    write('Move: '), write(M3), write(' -> '), write(S3), write(', Cost: '), write(C123), nl,
    move(S3, M4, S4, C4),
    C1234 is C123 + C4,
    write('Move: '), write(M4), write(' -> '), write(S4), write(', Cost: '), write(C1234), nl,
    move(S4, M5, S5, C5),
    TotalCost is C1234 + C5,
    write('Move: '), write(M5), write(' -> '), write(S5), write(', Cost: '), write(TotalCost), nl,
    goal(S5),
    write('Goal reached! Total Cost = '), write(TotalCost), nl.

Practical 4: Introduction to Python Programming

Part 1: DataTypes, If-else and Functions

# 1. What is 2 to the power of 10?
print("1:", 2**10)  # 1024

# 2. Declare n1=10, n2=20, n3=30 and display 'sum of 10 and 20 is 30' using format()
n1, n2, n3 = 10, 20, 30
print("2: sum of {} and {} is {}".format(n1, n2, n3))

# 3. Split the string into a list
str1 = "SIESCOMS Sector-5 Plot-1E Nerul 200706"
print("3:", str1.split())

# 4. Display 'Nerul' from the split string
print("4:", str1.split()[3])

# 5. Split string and create a list of colleges, display 'SIESCOMS'
str3 = "SI- ESCOMS&VESIT&MET&STERLING&BVIT"
colleges = str3.split('&')
print("5:", "SIESCOMS")  # Assuming 'SIESCOMS' should be printed directly

# 6. Format planet and diameter
planet = "Earth"
diameter = 12742
print("6: The diameter of {} is {} kilometers.".format(planet, diameter))

# 7. Extract the word "hello" from a nested dictionary
d = {'key1': [1, 2, 3, {'key2': ['this', 'is', ['a', 'tricky', 'hello']]}]}
print("7:", d['key1'][3]['key2'][2][2])

# 8. Grab the domain from email
def get_domain(email):
    return email.split('@')[-1]

print("8:", get_domain("xyz@sies.edu.in"))

# 9. Count number of times the word "dog" occurs in a string
def count_dogs(text):
    return text.lower().split().count("dog")

essay = """The dog is a pet animal. A dog has sharp teeth so that it can eat flesh very easily. A dog has four legs, two ears, two eyes, a tail, a mouth, and a nose. A dog is a very clever animal and is very useful in catching thieves. A dog runs very fast, barks loudly and attacks the strangers. A dog saves the life of the master from danger. Dog are a very faithful animal. Usually, the dog eats fish, meat, milk, rice, bread, etc. Dogs are sometimes called canines. The lifespan of a dog is very small however it can live around 12-15 years long which depend on their size such as smaller dogs lives a longer life. A female dog gives birth to a baby and feed milk that's why dogs under the mammal category. The dog baby is called a puppy or pup and dog home is called kennel."""
print("9: Number of times 'dog' occurs:", count_dogs(essay))

# 10. Speeding ticket function with birthday consideration
def speeding_ticket(speed, is_birthday):
    allowance = 5 if is_birthday else 0
    if speed <= 60 + allowance:
        return "No Ticket"
    elif speed <= 80 + allowance:
        return "Small Ticket"
    else:
        return "Big Ticket"

print("10:", speeding_ticket(70, False))  # Small Ticket
print("10:", speeding_ticket(81, True))   # Small Ticket

Part 2: NumPy

# Import NumPy
import numpy as np

# 1. Create an array of 10 zeros
zeros_array = np.zeros(10)
print("1:", zeros_array)

# 2. Create an array of 10 ones
ones_array = np.ones(10)
print("2:", ones_array)

# 3. Create an array of 10 fives
fives_array = np.full(10, 5)
print("3:", fives_array)

# 4. Create an array of integers from 10 to 50
arr_10_to_50 = np.arange(10, 51)
print("4:", arr_10_to_50)

# 5. Create an array of even integers from 10 to 50
even_arr = np.arange(10, 51, 2)
print("5:", even_arr)

# 6. Create a 3x3 matrix with values from 0 to 8
matrix_3x3 = np.arange(9).reshape(3, 3)
print("6:\n", matrix_3x3)

# 7. Create a 3x3 identity matrix
identity_matrix = np.eye(3)
print("7:\n", identity_matrix)

# 8. Generate a random number between 0 and 1
rand_num = np.random.rand()
print("8:", rand_num)

# 9. Generate an array of 25 random numbers from a standard normal distribution
rand_array_25 = np.random.randn(25)
print("9:", rand_array_25)

# 10. Create an array of 20 linearly spaced points between 0 and 1
linspace_20 = np.linspace(0, 1, 20)
print("10:", linspace_20)

# 11. Create the given 5x5 matrix
mat = np.arange(1, 26).reshape(5, 5)
print("11:\n", mat)

# 12. Get the sum of all values in mat
sum_mat = mat.sum()
print("12: Sum of all values:", sum_mat)

# 13. Get the standard deviation of the values in mat
std_mat = mat.std()
print("13: Standard deviation:", std_mat)

# 14. Get the sum of all the columns in mat
col_sum = mat.sum(axis=0)
print("14: Column-wise sum:", col_sum)

Part 3: Pandas

# Import required libraries
import pandas as pd
import numpy as np

# Basic DataFrame creation
df1 = pd.DataFrame({'Numbers': [1, 2, 3, 4, 5]})
print("Basic DataFrame:")
print(df1)

# DataFrame with custom index
df2 = pd.DataFrame({'Numbers': [1, 2, 3, 4, 5]}, 
                   index=['one', 'two', 'three', 'four', 'five'])
print("\nDataFrame with custom index:")
print(df2)

# Create DataFrame with multiple columns
data = {
    'Name': ['Tom', 'Jack', 'Steve', 'Ricky'],
    'Age': [28, 34, 29, 42],
    'Mobile': [1234, 5678, 9876, 5432]
}
df4 = pd.DataFrame(data)
print("\nDataFrame with multiple columns:")
print(df4)

# Display specific columns
print("\nDisplay Names:")
print(df4['Name'])

# Display specific row
print("\nDisplay Jack's data:")
print(df4.loc[df4['Name'] == 'Jack'])

# Display multiple columns
print("\nDisplay name and mobile:")
print(df4[['Name', 'Mobile']])

# Create DataFrame with custom index
data = {
    'Name': ['Tom', 'Jack', 'Steve', 'Ricky', 'Greg'],
    'Age': [28, 34, 29, 42, 54],
    'Mobile': [1234, 5678, 9876, 5432, 5555]
}
df5 = pd.DataFrame(data, index=['A', 'B', 'C', 'D', 'E'])
print("\nDataFrame with custom index:")
print(df5)

# Add new columns
df5['m1'] = [55, 78, 90, 89, 78]
df5['m2'] = [85, 89, 79, 80, 89]
print("\nAfter adding marks columns:")
print(df5)

# Calculate total
df5['Total'] = df5['m1'] + df5['m2']
print("\nAfter adding total:")
print(df5)

# Add remarks column
df5['remarks'] = ['F', 'P', 'P', 'P', 'P']
print("\nAfter adding remarks:")
print(df5)

# Remove column
df5 = df5.drop('remarks', axis=1)
print("\nAfter removing remarks:")
print(df5)

# Remove row
df5 = df5.drop(index='D')
print("\nAfter removing row D:")
print(df5)

# Check DataFrame shape
print("\nDataFrame shape:", df5.shape)

Part 4: Data Visualization

# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Line Plot Example
height = [0, 100, 200, 300, 400, 500]
temperature = [30, 28, 25, 22, 20, 18]
plt.plot(height, temperature)
plt.xlabel("Height (m)")
plt.ylabel("Temperature (°C)")
plt.title("Temperature vs Height")
plt.show()

# Date-wise Temperature Plot
date = ["25/12", "26/12", "27/12"]
temp = [8.5, 10.5, 6.8]
plt.plot(date, temp)
plt.xlabel("Date")
plt.ylabel("Temperature (°C)")
plt.title("Date wise Temperature")
plt.grid(True)
plt.show()

# Weight vs Height Plot
height = [121.9, 124.5, 129.5, 134.6, 139.7, 147.3, 152.4, 157.5, 162.6]
weight = [19.7, 21.3, 23.5, 25.9, 28.5, 32.1, 35.7, 39.6, 43.2]
plt.plot(weight, height, marker='*', markersize=10, color='green', 
         linewidth=2, linestyle='dashed')
plt.xlabel("Weight (kg)")
plt.ylabel("Height (cm)")
plt.title("Average Weight vs Height")
plt.show()

# Pie Chart Example
df = pd.DataFrame({
    'Category': ['A', 'B', 'C', 'D'],
    'Values': [20, 30, 25, 25]
})
df.plot(kind='pie', y='Values', labels=df['Category'], 
        autopct='%1.2f%%', figsize=(6,6))
plt.title("Custom Pie Chart")
plt.show()

Note: For the data visualization examples that require CSV files (like salary_data.csv, Marks.csv, etc.), you'll need to have these files in your working directory. The paths in the code should be adjusted according to your file locations.

Practical 5: Perceptron algorithm for OR Function

import numpy as np
 
class Perceptron:
    def __init__(self, learning_rate=0.01, n_itsperations=100):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None
 
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
 
        y_ = np.array([1 if i > 0 else 0 for i in y])
 
        for _ in range(self.n_iterations):
            for idx, x_i in enumerate(X):
                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = self.activation_function(linear_output)
 
                update = self.learning_rate * (y_[idx] - y_predicted)
                self.weights += update * x_i
                self.bias += update
 
    def activation_function(self, x):
        return np.where(x>=0, 1, 0)
 
    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        y_predicted = self.activation_function(linear_output)
        return y_predicted
 
# OR gate inputs and outputs
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 1, 1, 1])
 
# Initialize and train the perceptron
perceptron = Perceptron(learning_rate=0.1, n_iterations=10)
perceptron.fit(X, y)
 
# Test the perceptron
predictions = perceptron.predict(X)
predictions
# Expected output: [0 1 1 1]

Practical 6: Improve prediction accuracy using Stochastic Gradient Descent

import numpy as np

def compute_error_for_line_given_points(b, m, points):
    """Calculate mean squared error for a line defined by slope (m) and intercept (b)
    
    Args:
        b (float): y-intercept
        m (float): slope
        points (numpy.array): Array of [x,y] coordinates

    Returns:
        float: Mean squared error
    """
    totalError = 0
    for i in range(len(points)):
        x = points[i, 0]
        y = points[i, 1]
        totalError += (y - (m * x + b)) ** 2
    return totalError / float(len(points))

def step_gradient(b_current, m_current, points, learningRate):
    """Calculate one step of gradient descent
    
    Args:
        b_current (float): Current y-intercept
        m_current (float): Current slope
        points (numpy.array): Array of [x,y] coordinates
        learningRate (float): Step size for gradient descent

    Returns:
        tuple: Updated b and m values
    """
    b_gradient = 0
    m_gradient = 0
    N = float(len(points))

    for i in range(len(points)):
        x = points[i, 0]
        y = points[i, 1]
        b_gradient += -(2/N) * (y - ((m_current * x) + b_current))
        m_gradient += -(2/N) * x * (y - ((m_current * x) + b_current))

    new_b = b_current - (learningRate * b_gradient)
    new_m = m_current - (learningRate * m_gradient)
    return [new_b, new_m]

def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):
    """Run gradient descent algorithm
    
    Args:
        points (numpy.array): Array of [x,y] coordinates
        starting_b (float): Initial y-intercept
        starting_m (float): Initial slope
        learning_rate (float): Step size for gradient descent
        num_iterations (int): Number of iterations to run

    Returns:
        tuple: Final b and m values
    """
    b = starting_b
    m = starting_m

    # Print initial error
    print(f"Starting gradient descent at b = {b}, m = {m}, "
          f"error = {compute_error_for_line_given_points(b, m, points)}")

    for i in range(num_iterations):
        b, m = step_gradient(b, m, points, learning_rate)

    return [b, m]

def run():
    """Main function to run linear regression"""
    try:
        points = np.genfromtxt("data.csv", delimiter=",")
        learning_rate = 0.0001
        initial_b = 0  # initial y-intercept guess
        initial_m = 0  # initial slope guess
        num_iterations = 1000

        print("Running...")
        [b, m] = gradient_descent_runner(points, initial_b, initial_m, 
                                       learning_rate, num_iterations)
        
        print(f"After {num_iterations} iterations b = {b}, "
              f"m = {m}, error = {compute_error_for_line_given_points(b, m, points)}")
        
    except FileNotFoundError:
        print("Error: Could not find data.csv file.")
    except Exception as e:
        print(f"An error occurred: {str(e)}")

if __name__ == '__main__':
    run()

Practical 7: Implement Adaline algorithm for AND operation

import numpy as np

class Adaline:
    def __init__(self, input_size, learning_rate=0.1, epochs=100):
        self.weights = np.zeros(input_size)
        self.bias = 0
        self.learning_rate = learning_rate
        self.epochs = epochs

    def activation(self, x):
        # Linear activation (identity function)
        return x

    def predict(self, X):
        # Compute the linear output
        return self.activation(np.dot(X, self.weights) + self.bias)

    def train(self, X, y):
        # Train the model using Adaline's learning rule (Least Mean Squares)
        for epoch in range(self.epochs):
            for i in range(len(X)):
                # Calculate the prediction
                prediction = self.predict(X[i])

                # Compute the error
                error = y[i] - prediction

                # Update the weights and bias
                self.weights += self.learning_rate * error * X[i]
                self.bias += self.learning_rate * error

    def evaluate(self, X):
        # Make predictions for the input X
        return np.where(self.predict(X) >= 0.5, 1, 0)  # Convert to binary output

# AND operation input and output
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # Input pairs
y = np.array([0, 0, 0, 1])  # AND outputs

# Initialize Adaline model with 2 input features (for A and B), learning rate, and epochs
adaline = Adaline(input_size=2, learning_rate=0.1, epochs=100)

# Train the Adaline model
adaline.train(X, y)

# Evaluate the trained model on the same inputs (X)
predictions = adaline.evaluate(X)
print("Predictions on the AND operation:")
for i, prediction in enumerate(predictions):
    print(f"Input: {X[i]} => Predicted: {prediction} => Actual: {y[i]}")

Practical 8: Implementation of Features Extraction and Selection, Normalization, Transformation, Principal Components Analysis

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest, f_classif

# Sample dataset
data = {
    'Math': [85, 90, 88, 60, 76, 95],
    'Science': [80, 85, 89, 65, 70, 100],
    'English': [78, 85, 80, 70, 75, 90],
    'Computer': [92, 96, 94, 65, 78, 98],
    'Passed': [1, 1, 1, 0, 0, 1]
}

df = pd.DataFrame(data)
print("Original Data:\n", df)

# Feature extraction
X = df.drop('Passed', axis=1)
y = df['Passed']

# Feature selection
selector = SelectKBest(score_func=f_classif, k=2)
X_new = selector.fit_transform(X, y)
print("\nSelected Features (Top 2):\n", X_new)

# Normalization
scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(X)
print("\nNormalized Data:\n", X_normalized)

# Standardization
standardizer = StandardScaler()
X_transformed = standardizer.fit_transform(X)
print("\nStandardized Data:\n", X_transformed)

# PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_transformed)
print("\nPCA Result:\n", X_pca)

Practical 9: Logistic Regression for Survival Prediction

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.metrics import classification_report

# Loading the data
train = pd.read_csv('titanic.csv')
print("First few rows of the dataset:")
print(train.head())

# Analyzing Missing Data
percent_missing = train.isnull().sum() * 100 / len(train)
result = pd.DataFrame({'cols': train.columns, 'percent_missing': percent_missing})
result.sort_values('percent_missing', inplace=True)
print("\nMissing Data Analysis:")
print(result)

# Analyzing Survival Counts
x = train[train['Survived'] == 0]
notsurvived = x.count()
y = train[train['Survived'] == 1]
survived = y.count()
pdsurvived = pd.DataFrame({"Not Survived": notsurvived, "Survived": survived}, 
                         index=["Not Survived", "Survived"])
print("\nSurvival Counts:")
print(pdsurvived)

# Data Cleaning
train.dropna(inplace=True)

# Converting Categorical Features
print("\nDataset Info:")
print(train.info())
train.drop(['Sex', 'Name', 'Ticket'], axis=1, inplace=True)
print("\nDataset after dropping categorical features:")
print(train.head())

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(
    train.drop('Survived', axis=1),
    train['Survived'],
    test_size=0.30,
    random_state=101
)

# Training and Predicting
logmodel = LogisticRegression()
logmodel.fit(X_train, y_train)
predictions = logmodel.predict(X_test)
print("\nPredictions:")
print(predictions)

# Evaluation
confusion_matrix = metrics.confusion_matrix(y_test, predictions)
cm_display = metrics.ConfusionMatrixDisplay(
    confusion_matrix=confusion_matrix,
    display_labels=[0, 1]
)
cm_display.plot()
plt.show()

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, predictions))

Practical 10: Bank Customers Retirement Predictions using SVM

Step 1: Import Libraries and Load Data

# Import libraries 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
bank_df = pd.read_csv('Bank_Customer_retirement.csv')

# Display basic information
print("Dataset Shape:", bank_df.shape)
print("\nFirst few rows:")
print(bank_df.head())

# Visualize the data
sns.pairplot(bank_df, hue='Retire', vars=['Age', 'Savings'])
plt.show()

Step 2: Data Preprocessing

# Drop Customer ID column
bank_df = bank_df.drop(['Customer ID'], axis=1)

# Prepare features and target
X = bank_df.drop(['Retire'], axis=1)
y = bank_df['Retire']

# Standardize the features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_data_scaled = scaler.fit_transform(X)

# Split the data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X_data_scaled, y, test_size=0.20, random_state=101
)

print("Training set shape:", X_train.shape)
print("Testing set shape:", X_test.shape)

Step 3: Model Training and Evaluation with Different Kernels

from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Linear Kernel
print("Linear Kernel Results:")
svmmodel1 = SVC(kernel='linear')
svmmodel1.fit(X_train, y_train)
y_pred = svmmodel1.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:")
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True)
plt.show()
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Polynomial Kernel
print("\nPolynomial Kernel Results:")
svmmodel2 = SVC(kernel='poly')
svmmodel2.fit(X_train, y_train)
y_pred = svmmodel2.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:")
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True)
plt.show()
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# RBF Kernel
print("\nRBF Kernel Results:")
svmmodel3 = SVC(kernel='rbf')
svmmodel3.fit(X_train, y_train)
y_pred = svmmodel3.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:")
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True)
plt.show()
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Practical 11: Elbow K-means Clustering for Indian States/UTs

Step 1: Import Libraries and Load Data

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from sklearn.cluster import KMeans

# Load the dataset
data = pd.read_csv("India StatesUTs.csv")

# Display the dataset
print("Dataset:")
print(data)

# Display the shape of the dataset
print("\nDataset Shape:", data.shape)

Step 2: Prepare Data and Apply K-means

# Select features for clustering
x = data.iloc[:, 1:3]  # Selecting Longitude and Latitude columns

# Apply K-means clustering
kmeansmodel = KMeans(n_clusters=5)
kmeansmodel.fit(x)

# Get cluster assignments
identified_clusters = kmeansmodel.fit_predict(x)
print("\nCluster Assignments:")
print(identified_clusters)

# Add cluster information to the dataset
data_with_clusters = data.copy()
data_with_clusters['Cluster'] = identified_clusters
print("\nData with Clusters:")
print(data_with_clusters)

Step 3: Visualize Clusters

# Plot the clusters
plt.figure(figsize=(10, 6))
plt.scatter(data_with_clusters['Longitude'],
           data_with_clusters['Latitude'],
           c=data_with_clusters['Cluster'],
           cmap='brg',
           s=200)
plt.xlim(50, 100)
plt.ylim(0, 50)
plt.title('Clusters of Indian States/UTs')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.show()

Step 4: Elbow Method for Optimal Clusters

# Calculate WCSS for different numbers of clusters
wcss = []
for i in range(1, 7):
    kmeans = KMeans(i)
    kmeans.fit(x)
    wcss_iter = kmeans.inertia_
    wcss.append(wcss_iter)

print("\nWithin-Cluster Sum of Squares (WCSS):")
print(wcss)

# Plot the elbow curve
plt.figure(figsize=(10, 6))
number_clusters = range(1, 7)
plt.plot(number_clusters, wcss)
plt.title('The Elbow Method')
plt.xlabel('Number of Clusters')
plt.ylabel('Within-cluster Sum of Squares')
plt.show()

Practical 12: Random Forest and Bagging Implementation

Part 1: Random Forest Implementation

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load and explore the dataset
df = pd.read_csv("playgolf.csv")
print("Dataset Preview:")
print(df.head(15))
print("\nDataset Info:")
print(df.info())
print("\nDataset Description:")
print(df.describe())

# Analyze categorical columns
categorical_col = []
for column in df.columns:    
    categorical_col.append(column)
    print(f"{column} : {df[column].unique()}")
    print("====================================")

# Check target variable distribution
print("\nPlayGolf Distribution:")
print(df.PlayGolf.value_counts())

# Prepare categorical columns for encoding
categorical_col.remove('PlayGolf')

# Encode categorical variables
label = LabelEncoder()
for column in categorical_col:
    df[column] = label.fit_transform(df[column])

print("\nEncoded Dataset:")
print(df)

# Split the data
X = df.drop('PlayGolf', axis=1)
y = df.PlayGolf
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest model
RandomForestmodel = RandomForestClassifier(n_estimators=10)
RandomForestmodel.fit(X_train, y_train)

# Apply Bagging
n_estimators = 10
bagging_classifier = BaggingClassifier(base_estimator=RandomForestmodel, 
                                     n_estimators=n_estimators)

# Train and evaluate
bagging_classifier.fit(X_train, y_train)
y_pred = bagging_classifier.predict(X_test)

print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print(f"\nAccuracy Score: {accuracy_score(y_test, y_pred) * 100:.2f}%")

Part 2: Ensemble Bagging with Voting

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier, VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB, BernoulliNB
from sklearn.linear_model import LogisticRegression

# Create sample dataset
def CreateDataFrame(N):
    columns = ['a', 'b', 'c', 'y']
    df = pd.DataFrame(columns=columns)
    for i in range(N):
        a = np.random.randint(10)
        b = np.random.randint(20)
        c = np.random.randint(5)  
        y = "normal"
        if((a+b+c) > 25):
            y = "high"
        elif((a+b+c) < 12):
            y = "low"
        df.loc[i] = [a, b, c, y]
    return df

# Generate and prepare data
df = CreateDataFrame(200)
print("Generated Dataset Preview:")
print(df.head())

X = df[["a", "b", "c"]]
Y = df[["y"]]

# Encode target variable
le = LabelEncoder()
y = le.fit_transform(Y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Initialize base models
dtcmodel = DecisionTreeClassifier(criterion="entropy")
lrmodel = LogisticRegression()
bnbmodel = BernoulliNB()
gnbmodel = GaussianNB()
svcmodel = SVC()

# Train and evaluate base models with bagging
base_methods = [dtcmodel, lrmodel, bnbmodel, gnbmodel, svcmodel]
for bm in base_methods:
    print(f"\nMethod: {bm}")
    bag_model = BaggingClassifier(base_estimator=bm, n_estimators=100, bootstrap=True)
    bag_model.fit(X_train, y_train)
    ytest_pred = bag_model.predict(X_test)
    print(f"Accuracy: {bag_model.score(X_test, y_test)}")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, ytest_pred))

# Create and evaluate voting classifier
voting_clf = VotingClassifier(estimators=[
    ('DecisionTree', dtcmodel),
    ('Logistic', lrmodel),
    ('Bernoulli', bnbmodel),
    ('Gaussian', gnbmodel),
    ('SVC', svcmodel)
])

# Train and evaluate voting classifier
voting_clf.fit(X_train, y_train)
predictions = voting_clf.predict(X_test)

print("\nVoting Classifier Results:")
print("Confusion Matrix:")
print(confusion_matrix(y_test, predictions))
print("\nClassification Report:")
print(classification_report(y_test, predictions))

Practical 13: AdaBoost, Stochastic Gradient Boosting, and Voting Ensemble

Part 1: AdaBoost Classification

import pandas as pd
from sklearn import model_selection
from sklearn.ensemble import AdaBoostClassifier

# Load the dataset
df = pd.read_csv("pimaindiansdiabetes.csv")
print("Dataset Preview:")
print(df)
print("\nDataset Shape:", df.shape)

# Prepare features and target
X = df.iloc[:, 0:8]
y = df.iloc[:, 8]

# Implement AdaBoost
kfold = model_selection.KFold(n_splits=10, random_state=42)
num_trees = 30
model = AdaBoostClassifier(n_estimators=num_trees, random_state=42)
results = model_selection.cross_val_score(model, X, y, cv=kfold)
print("\nAdaBoost Mean Accuracy:", results.mean())

Part 2: Stochastic Gradient Boosting

import pandas as pd
from sklearn import model_selection
from sklearn.ensemble import GradientBoostingClassifier

# Load the dataset
df = pd.read_csv("pimaindiansdiabetes.csv")
print("Dataset Preview:")
print(df)

# Prepare features and target
X = df.iloc[:, 0:8]
y = df.iloc[:, 8]

# Implement Stochastic Gradient Boosting
kfold = model_selection.KFold(n_splits=10, random_state=42)
num_trees = 30
model = GradientBoostingClassifier(n_estimators=num_trees, random_state=42)
results = model_selection.cross_val_score(model, X, y, cv=kfold)
print("\nStochastic Gradient Boosting Mean Accuracy:", results.mean())

Part 3: Voting Ensemble

import pandas as pd
import warnings
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier

# Suppress warnings
warnings.filterwarnings("ignore")

# Load the dataset
df = pd.read_csv("pimaindiansdiabetes.csv")
print("Dataset Preview:")
print(df)

# Prepare features and target
X = df.iloc[:, 0:8]
y = df.iloc[:, 8]

# Create base models
estimators = []
logmodel = LogisticRegression()
estimators.append(('logistic', logmodel))

DTmodel = DecisionTreeClassifier()
estimators.append(('cart', DTmodel))

SVCmodel = SVC()
estimators.append(('svm', SVCmodel))

# Create and evaluate voting ensemble
ensemble = VotingClassifier(estimators)
print("\nVoting Ensemble Model:", ensemble)

kfold = model_selection.KFold(n_splits=10)
results = model_selection.cross_val_score(ensemble, X, y, cv=kfold)
print("\nVoting Ensemble Mean Accuracy:", results.mean())

Practical 14: Feature Selection, Scaling, and PCA

Part 1: Feature Selection using Chi-Square Test

import pandas as pd
import numpy as np
from sklearn.feature_selection import SelectKBest, chi2

# Load the dataset
data = pd.read_csv('Mobile_Data.csv')
print("Dataset Preview:")
print(data.head(5))

# Prepare features and target
X = data.iloc[:, 0:20]  # All columns except price range
y = data.iloc[:, -1]    # Only price range column

# Apply Chi-Square Test to select top 10 features
bestfeatures = SelectKBest(score_func=chi2, k=10)
model = bestfeatures.fit(X, y)

# Create DataFrame with feature scores
dfscores = pd.DataFrame(model.scores_)
dfcolumns = pd.DataFrame(X.columns)
feature_scores = pd.concat([dfcolumns, dfscores], axis=1)
feature_scores.columns = ['Feature', 'Score']
print("\nFeature Scores:")
print(feature_scores.sort_values(by='Score', ascending=False))

Part 2: Feature Scaling - Normalization and Standardization

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Load the dataset
cols = ['loan_amount', 'interest_rate', 'installment']
data = pd.read_csv('Loan_Data.csv', usecols=cols)
print("Dataset Preview:")
print(data.head())

# Standardization
scaler = StandardScaler()
std_data_scaled = scaler.fit_transform(data)
print("\nStandardized Data:")
print(std_data_scaled)
print("\nStandardized Data Statistics:")
print("Mean:", std_data_scaled.mean(axis=0))
print("Standard Deviation:", std_data_scaled.std(axis=0))

# Normalization
scaler = MinMaxScaler()
norm_data_scaled = scaler.fit_transform(data)
print("\nNormalized Data:")
print(norm_data_scaled)
print("\nNormalized Data Statistics:")
print("Mean:", norm_data_scaled.mean(axis=0))
print("Standard Deviation:", norm_data_scaled.std(axis=0))

Part 3: Linear Discriminant Analysis (LDA)

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the dataset
dataset = pd.read_csv('Wine.csv')
X = dataset.iloc[:, 0:13].values
y = dataset.iloc[:, 13].values

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Apply LDA
lda = LDA(n_components=2)
X_train = lda.fit_transform(X_train, y_train)
X_test = lda.transform(X_test)

# Train Logistic Regression
classifier = LogisticRegression(random_state=0)
classifier.fit(X_train, y_train)

# Make predictions and evaluate
y_pred = classifier.predict(X_test)
print("\nLDA Results:")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

Part 4: Principal Component Analysis (PCA)

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the dataset
dataset = pd.read_csv('Wine.csv')
X = dataset.iloc[:, 0:13].values
y = dataset.iloc[:, 13].values

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Apply PCA
pca = PCA(n_components=2)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

# Train Logistic Regression
classifier = LogisticRegression(random_state=0)
classifier.fit(X_train, y_train)

# Make predictions and evaluate
y_pred = classifier.predict(X_test)
print("\nPCA Results:")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
  • Contact us
  • Jobs
  • Privacy
  • Manage cookies
  • Terms of use
  • Trademarks
© 2025 Microsoft