Gantt Charts Version 2 - Individual Workstreams
While some users like showing Gantt charts that allow overlapping dates, many would prefer a visual that clearly shows individual work streams, especially in organizations where members are focused on one task at a given time.


The input to the Python editor? A table with 4 columns:
- Project Name
- Person - the individual working on the project
- start_date - the day the individual begins working on the project
- end_date - the day the individual stops working on the project
Now let's say we accidentally double booked one of our team members. This chart will give us a warning so we can go back and correct the team workflows!

# SQL output is imported as a pandas dataframe variable called "df"
# primary reference: https://sukhbinder.wordpress.com/2016/05/10/quick-gantt-chart-with-matplotlib/
# reference http://www.clowersresearch.com/main/gantt-charts-in-matplotlib/
#######################################################################
### LIBRARIES ###
#######################################################################
import pandas as pd
import datetime as dt
import matplotlib.dates as dates
import matplotlib.pyplot as plt
from matplotlib.dates import MONTHLY, WEEKLY, DateFormatter, rrulewrapper, RRuleLocator
from collections import OrderedDict
import numpy as np
import datetime as dt
#######################################################################
### HELPER FUNCTIONS ###
#######################################################################
# Function: Return distinct projects/phases
# Input: dataframe with a column indicating project names
# Output: dataframe of all distinct phases
def all_phases(df):
phases=df['project'].drop_duplicates().values.tolist()
return phases
# Function: prepare dataframe for gantt visualization
# Input: dataframe with a column for project, team, start_date, and end_date
# Output: dataframe with all columns required for main function
def df_gantt(df):
df['project']=df['project'].astype('category')
df['person']=df['person'].astype('category')
df['end_date2']=dates.date2num(df['end_date'])
df['start_date2']=dates.date2num(df['start_date'])
df['duration']=df['end_date2']-df['start_date2']
#assign colors for each series
colors=['paleturquoise','lightskyblue','plum','mediumorchid','salmon','lightpink','sandybrown','gold']
allprojects=df['project'].drop_duplicates().sort_values().reset_index(drop=True)
# # allgrps=df['person'].drop_duplicates().sort_values().reset_index(drop=True)
se=pd.DataFrame(colors[0:len(allprojects)])
se.columns=['col']
colormatrix=pd.concat([allprojects,se],axis=1)
df = df.merge(colormatrix,on='project',how='inner').sort_values('person').reset_index(drop=True)
return df
##########################################################################
### MAIN PLOT
##########################################################################
# Function: Create Gantt chart
# Input: dataframe with a column for project, team, start_date, and end_date, optional parameter showtoday (boolean) that shows a line marking the current date
# Output: matplotlib object of Gantt chart
def gantt_graph(df, showtoday):
df=df_gantt(df)
fig, ax = plt.subplots()
#initialize variables
j=''
change=0
ylocs=[]
#plot chart
for i in range(len(df['start_date2'])):
if (j!=df['person'][i]):
change=change+1
j=df['person'][i]
ylocs.append(0.5*change+0.5)
ax.barh(0.5*change+0.5, df['duration'][i], left=df['start_date2'][i],height=0.4, align='center', color=df['col'][i], alpha = 1.0, label=df['person'][i])
ax.annotate(df['project'][i],xy=(df['start_date2'][i]+0.5*df['duration'][i],0.5*change+0.5),horizontalalignment='center')
#format x axis
rule = rrulewrapper(WEEKLY, interval=1)
ax.xaxis.set_major_locator(RRuleLocator(rule))
ax.xaxis.set_major_formatter(DateFormatter("%b %d '%y"))
plt.tick_params(
axis='x', # changes apply to the x-axis
which='both', # both major and minor ticks are affected
top='off', # ticks along the bottom edge are off
bottom='on',
rotation=0) # ticks along the top edge are off
#format y axis
ax.invert_yaxis()
ax.set_yticks(ylocs)
ppl=df['person'].drop_duplicates().values.tolist()
ax.set_yticklabels(ppl)
plt.tick_params(
axis='y', # changes apply to the x-axis
which='both', # both major and minor ticks are affected
left='off', # ticks along the bottom edge are off
right='off') # ticks along the top edge are off
#format border
ax.spines['bottom'].set_linewidth(0.2)
ax.spines['left'].set_linewidth(0.2)
ax.spines['top'].set_linewidth(0.2)
ax.spines['right'].set_linewidth(0.2)
#plot a line showing current date
if showtoday == True:
plt.axvline(x=dt.date.today(),color='gray')
return fig
# Function: provide error message handling for double-booked team memebers
# Input: 1. dataframe with a column for project, team, start_date, and end_date; 2. optional boolean parameter to show a line for the current date
# Output: matplotlib object of gantt chart (if no double booking cases) or error message (if any cases of double booking exist)
def workflow_errors(df, show_today=True):
df_updated=df_gantt(df)
errors=0
for person in df_updated['person'].drop_duplicates().values.tolist():
df2=df_updated[df_updated['person'] == person]
df3=df2
for i in range(len(df2['start_date2'])):
for j in range(len(df3['start_date2'])):
if (df2['end_date2'].iloc[i]>df3['start_date2'].iloc[j] and df2['end_date2'].iloc[i]<df3['end_date2'].iloc[j]):
stringtime=df2['end_date'].iloc[i].strftime('%B %d, %Y')
txt = person + ' has multiple workflows on/around ' + stringtime
errors+=1
if (errors==0):
fig = gantt_graph(df, showtoday=show_today)
else:
fig = plt.figure()
plt.axis('off')
plt.text(.5, .5, txt, fontsize=15, color='black', ha='center')
return fig
# # Use Sisense for Cloud Data Teams to visualize a dataframe or an image by passing data to periscope.output()
periscope.image(workflow_errors(df, show_today=True))
Updated 02-13-2024
intapiuser
Admin
Joined December 15, 2022