Data Science Series
[Kaggle Visualization] Python basic code
heave_17
2021. 4. 28. 22:05
import pandas as pd
pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plot
%matplotlib inline
import seaborn as sns
# Path of the file to read
flight_filepath = "../input/flight_delays.csv"
# Read the file into a variable flight_data
flight_data = pd.read_csv(flight_filepath, index_col="Month")
plt.figure(figsize=(16,6))
# Add title
plt.title("Daily Global Streams of Popular Songs in 2017-2018")
# Line chart showing how FIFA rankings evolved over time
sns.lineplot(data=fifa_data)
# Plot a subset of the data
# Set the width and height of the figure
plt.figure(figsize=(14,6))
# Add title
plt.title("Daily Global Streams of Popular Songs in 2017-2018")
# Line chart showing daily global streams of 'Shape of You'
sns.lineplot(data=spotify_data['Shape of You'], label="Shape of You")
# Line chart showing daily global streams of 'Despacito'
sns.lineplot(data=spotify_data['Despacito'], label="Despacito")
# Add label for horizontal axis
plt.xlabel("Date")
# Bar Charts
# Bar chart showing average arrival delay for Spirit Airlines flights by month
# *Important Note: We always have to use this special notation to select the indexing column.
sns.barplot(x=flight_data.index, y=flight_data['NK'])
# Add label for vertical axis
plt.ylabel("Arrival delay (in minutes)")
# Heatmap
# Heatmap showing average arrival delay for each airline by month
sns.heatmap(data=flight_data, annot=True) #This ensures that the values for each cell appear on the chart.
# Add label for horizontal axis
plt.xlabel("Airline")
# Scatter Plots
sns.scatterplot(x=insurance_data['bmi'], y=insurance_data['charges'])
sns.regplot(x=insurance_data['bmi'], y=insurance_data['charges'])
#Color-coded plots
sns.scatterplot(x=insurance_data['bmi'], y=insurance_data['charges'], hue=insurance_data['smoker'])
sns.lmplot(x="bmi", y="charges", hue="smoker", data=insurance_data)
#Scatter plots for categorical variables
sns.swarmplot(x=insurance_data['smoker'], y=insurance_data['charges'])
# Distributions
#Histogram
sns.distplot(a=iris_data['Petal Length (cm)'], kde=False)
#KDE (Kernel Density Estimate) plot
sns.kdeplot(data=iris_data['Petal Length (cm)'], shade=True)
#2D KDE plot
sns.jointplot(x=iris_data['Petal Length (cm)'], y=iris_data['Sepal Width (cm)'], kind="kde")
#Color-coded plots
sns.distplot(a=iris_set_data['Petal Length (cm)'], label="Iris-setosa", kde=False)
sns.distplot(a=iris_ver_data['Petal Length (cm)'], label="Iris-versicolor", kde=False)
sns.distplot(a=iris_vir_data['Petal Length (cm)'], label="Iris-virginica", kde=False)
plt.title("Histogram of Petal Lengths, by Species")
plt.legend()