Plotting Tutorial¶

In [2]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

# with %matplotlib inline you turn on the immediate display.
%matplotlib inline

In [3]:
# Toy dataset
data = pd.DataFrame({
'mass':   [4, 5, 5, 5, 6, 7, 11, 11, 13, 13],
'height': [2, 1, 2, 3, 2, 4, 8,  10, 8,  10],
'label':  ['cat']*5 + ['dog']*5
})
data

Out[3]:
mass height label
0 4 2 cat
1 5 1 cat
2 5 2 cat
3 5 3 cat
4 6 2 cat
5 7 4 dog
6 11 8 dog
7 11 10 dog
8 13 8 dog
9 13 10 dog

Method 1 - pandas.DataFrame.plot()¶

In [4]:
# Use DataFrame.plot function, which uses matplotlib
# Default is line and because of that we have mass and height line
data.plot( color='green', marker='o', linestyle='dashed', title = "Data")
plt.show()

In [5]:
# Create a scatter plot using pandas plot - you have to specify x and y
data.plot(kind='scatter', x='height', y='mass')
plt.show()

In [6]:
# You can use plot directly on series. figsize= width x height (inch)
data.groupby(['mass'])['height'].mean().plot(kind='bar', title='Average height', figsize=[3,3])
plt.ylabel('mean height')
plt.show()

In [20]:
# my_plot is matplotlib axes object (AxesSubplot). Often just ax
my_plot = data.groupby(['mass'])['height'].mean().plot(kind='bar', title='Average height', figsize=[5,5])
my_plot.set_ylabel('mean height')

# Rename x ticks
plt.xticks([0, 1, 2, 3, 4, 5], ['Mass 4', 'Mass 5', 'Mass 6', 'Mass 7', 'Mass 11', 'Mass 13'])
plt.show()

matplotlib.plot() and pandas.DataFrame.plot() are different functions

Method 2 - matplotlib.pyplot.plot()¶

A¶

In [8]:
# with rcParams you can conf your figure, e.g. font.family, size, line color, line width etc
# they are automatically Run and startup and they Configure your stuff = rc

# This will set figsize for all other plots also
plt.rcParams['figure.figsize'] = [6, 6] # width x height (inch)
plt.rcParams['figure.figsize'] = 6, 6  # or this way

# Update font size
plt.rcParams.update({'font.size': 26})

# Plot one figure with (x,y) coordinates.
plt.plot(data['height'], data['mass'], marker='+',  color='black' , linestyle='')
plt.show() # Display a figure. Therefore, next plot will create new figures. Without plt.show() this would add to previous plot

# Update font size again
plt.rcParams.update({'font.size': 14})

# Points connected with a line
plt.plot(data['height'], data['mass'], marker='+',  color='black' , linestyle='--')
plt.show()

plt.plot(data['height'], data['mass'], 'k+') # k+ is short for  color='black', marker='+', linestyle='' Look https://matplotlib.org/api/_as_gen/matplotlib.pyplot.plot.html
plt.title('plot 3')
plt.ylabel('y label')
plt.xlabel('x label')

plt.show()


B¶

In [9]:
# Figure can contain multiple plots and you can also set params
fig = plt.figure(figsize=[16, 6])

# Multiple plots on one figure - add_subplot(nrows, ncols, index)
plt.show()


C¶

In [10]:
plt.rcParams['figure.figsize'] = [16, 6]

# Multiple plots on one figure - plt.subplot(nrows, ncols, index)
plt.subplot(121)
plt.plot(data['height'],data['mass'], 'go--')

plt.subplot(122)
plt.plot(data['height'],data['mass'], 'go--')

plt.show()


D¶

In [22]:
plt.rcParams['figure.figsize'] = [16, 6]

# ax: matplotlib axes object.
# The Axes contains most of the figure elements: Axis, Tick, Line2D, Text, Polygon, etc., and sets the coordinate system.
ax = plt.subplot(121)  # ax is the name of the plot

# Sets aspect ratio equal
# Labels and different screen sizes may distort the plot and set_aspect handles this problem, if you need a square plot)
ax.set_aspect('equal')
ax.plot(data['height'],data['mass'], 'go--')

ax2 = plt.subplot(122)  # ax is the name of the plot
ax2.set_aspect('equal')
ax2.plot(data['height'],data['mass'], 'go--')

ax2.set_title('demo')
ax.set_xlabel('x')

plt.show()


Method 3 - seaborn¶

In [12]:
# Seaborn is a plotting module for Python.
# Works well with pandas dataFrame
import seaborn as sns
plt.rcParams['figure.figsize'] = [6, 6]
plt.style.use('seaborn-whitegrid') # Plot style
ax = sns.scatterplot(x='height', y="mass", data=data)

In [13]:
# hue - Grouping variable that will produce points with different colors
ax = sns.scatterplot(x="total_bill", y="tip", hue="day", style="time", data=tips)


Seaborn Example gallery with code https://seaborn.pydata.org/examples/index.html

Adding a point, a line and a legend¶

In [14]:
plt.rcParams['figure.figsize'] = [6, 6]
plt.plot(data['height'],data['mass'], 'ro', label='data')

# add a point with x and y coordinates
plt.plot(4,10, color='black', marker='+', markersize=15, label='outlier')
plt.plot(*[4,11], color='black', marker='+', markersize=15, label='unknown')

# [[x1,x2], [y1,y2]]
plt.plot([8,0], [6,0] ,'g--')
plt.plot((8,0), (6,0) ,'g--') # Both work

# [[x,y], [x,y]]
# zip([1, 2], [10,11]) -> [(1, 10), (2, 11)]
# *[(1, 10), (2, 11)] -> (1, 10) (2, 11) ; unpacking the containers
plt.plot(*list(zip([1, 2], [10,11])) ,'k-')

# add a line with x and y coordinates
plt.legend(markerscale=1, frameon=True, loc='upper left')
plt.show()


In [15]:
plt.plot(data['height'],data['mass'], 'ro', label='data')
for i, label in enumerate(data['label']):
x = data['height'][i] + 0.1 # move the label on x axis
y = data['mass'][i]
plt.annotate(label,(x,y))
plt.show()


Color points by label¶

In [16]:
# x = data.loc[data.label == 'cat', 'mass']
# y = data.loc[data.label == 'cat', 'height']
plt.plot(data.loc[data.label == 'cat', 'mass'], data.loc[data.label == 'cat', 'height'], 'bo', label='cats')
plt.plot(data.loc[data.label == 'dog', 'mass'], data.loc[data.label == 'dog', 'height'], 'ro', label='dogs')
plt.legend(markerscale=1, frameon=True, loc='lower right')
plt.show()