[Kaggle Visualization] Python basic code

Data Science Series
[Kaggle Visualization] Python basic code

heave_17 2021. 4. 28. 22:05
import pandas as pd 
pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plot
%matplotlib inline
import seaborn as sns 

# Path of the file to read
flight_filepath = "../input/flight_delays.csv"

# Read the file into a variable flight_data
flight_data = pd.read_csv(flight_filepath, index_col="Month")

plt.figure(figsize=(16,6))

# Add title
plt.title("Daily Global Streams of Popular Songs in 2017-2018")

# Line chart showing how FIFA rankings evolved over time 
sns.lineplot(data=fifa_data)

# Plot a subset of the data

# Set the width and height of the figure
plt.figure(figsize=(14,6))

# Add title
plt.title("Daily Global Streams of Popular Songs in 2017-2018")

# Line chart showing daily global streams of 'Shape of You'
sns.lineplot(data=spotify_data['Shape of You'], label="Shape of You")

# Line chart showing daily global streams of 'Despacito'
sns.lineplot(data=spotify_data['Despacito'], label="Despacito")

# Add label for horizontal axis
plt.xlabel("Date")

# Bar Charts

# Bar chart showing average arrival delay for Spirit Airlines flights by month
#  *Important Note: We always have to use this special notation to select the indexing column.
sns.barplot(x=flight_data.index, y=flight_data['NK'])

# Add label for vertical axis
plt.ylabel("Arrival delay (in minutes)")

# Heatmap

# Heatmap showing average arrival delay for each airline by month
sns.heatmap(data=flight_data, annot=True) #This ensures that the values for each cell appear on the chart. 

# Add label for horizontal axis
plt.xlabel("Airline")

# Scatter Plots

sns.scatterplot(x=insurance_data['bmi'], y=insurance_data['charges'])
sns.regplot(x=insurance_data['bmi'], y=insurance_data['charges'])

#Color-coded plots
sns.scatterplot(x=insurance_data['bmi'], y=insurance_data['charges'], hue=insurance_data['smoker'])
sns.lmplot(x="bmi", y="charges", hue="smoker", data=insurance_data)

#Scatter plots for categorical variables
sns.swarmplot(x=insurance_data['smoker'], y=insurance_data['charges'])

# Distributions

#Histogram
sns.distplot(a=iris_data['Petal Length (cm)'], kde=False)

#KDE (Kernel Density Estimate) plot 
sns.kdeplot(data=iris_data['Petal Length (cm)'], shade=True)

#2D KDE plot
sns.jointplot(x=iris_data['Petal Length (cm)'], y=iris_data['Sepal Width (cm)'], kind="kde")

#Color-coded plots
sns.distplot(a=iris_set_data['Petal Length (cm)'], label="Iris-setosa", kde=False)
sns.distplot(a=iris_ver_data['Petal Length (cm)'], label="Iris-versicolor", kde=False)
sns.distplot(a=iris_vir_data['Petal Length (cm)'], label="Iris-virginica", kde=False)
plt.title("Histogram of Petal Lengths, by Species")
plt.legend()