A10

import findspark findspark.init() import pyspark from pyspark.sql import SparkSession import seaborn as sns # Create a SparkSession spark = SparkSession.builder .appName(“TitanicAnalysis”) .getOrCreate() # Load the Titanic dataset titanic_df = spark.createDataFrame(sns.load_dataset(“titanic”)) titanic_df = titanic_df.fillna({‘Age’: 0}) # Filter data for male passengers who died and remove null values from Age column male_deceased = titanic_df.filter((titanic_df[“Sex”] == “male”) & …

A10 Read More »

A9

import findspark findspark.init() import pyspark from pyspark.sql import SparkSession # Create a SparkSession spark = SparkSession.builder .appName(“StudentGrades”) .getOrCreate() # Sample student scores scores = [ (“Alice”, {“Math”: 85, “Science”: 90, “English”: 80}), (“Bob”, {“Math”: 70, “Science”: 75, “English”: 85}), (“Charlie”, {“Math”: 60, “Science”: 65, “English”: 70}), (“David”, {“Math”: 90, “Science”: 95, “English”: 85}), (“Eve”, {“Math”: …

A9 Read More »

A8

import findspark findspark.init() import pyspark from pyspark.sql import SparkSession # Create a SparkSession spark = SparkSession.builder .appName(“MatrixMultiplication”) .getOrCreate() # Sample matrices matrix1 = [ (0, 0, 2), (0, 1, 3), (1, 0, 4), (1, 1, 5) ] matrix2 = [ (0, 0, 6), (0, 1, 7), (1, 0, 8), (1, 1, 9) ] # Create …

A8 Read More »

A7

import findspark findspark.init() import pyspark from pyspark.sql import SparkSession spark=SparkSession.builder.getOrCreate() sc=spark.sparkContext text_file = sc.textFile(“input.txt”) counts = text_file.flatMap(lambda line: line.split(” “)).map(lambda word: (word, 1)).reduceByKey(lambda x, y: x + y) output = counts.collect() for (word, count) in output: print(“%s: %i” % (word, count)) sc.stop() spark.stop()

A6

import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, classification_report from sklearn.cluster import KMeans data = pd.read_csv(“Iris.csv”) X = data.iloc[:, 1:5].values # Selecting the features scaler = StandardScaler() X_std = scaler.fit_transform(X) #elbow method wcss = [] # Within-cluster sum of …

A6 Read More »

A5

import pandas as pd from sklearn import preprocessing from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.metrics import precision_score , confusion_matrix, accuracy_score, recall_score, classification_report df = pd.read_csv(“Iris.csv”) df.head() encoder = preprocessing.LabelEncoder() df[“Species”]=encoder.fit_transform(df[“Species”]) df x = df.iloc[:, [1, 4]].values y = df.iloc[:, 5].values xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size = 0.25, random_state = …

A5 Read More »

error: