cancel
Showing results for 
Search instead for 
Did you mean: 
intapiuser
Community Team Member
Community Team Member
While some users like showing Gantt charts that allow overlapping dates, many would prefer a visual that clearly shows individual work streams, especially in organizations where members are focused on one task at a given time.
 
The input to the Python editor? A table with 4 columns:
  • Project Name
  • Person - the individual working on the project
  • start_date - the day the individual begins working on the project
  • end_date - the day the individual stops working on the project
 
Now let's say we accidentally double booked one of our team members. This chart will give us a warning so we can go back and correct the team workflows!
 
# SQL output is imported as a pandas dataframe variable called "df"
# primary reference: https://sukhbinder.wordpress.com/2016/05/10/quick-gantt-chart-with-matplotlib/
# reference http://www.clowersresearch.com/main/gantt-charts-in-matplotlib/

#######################################################################
### LIBRARIES ###
#######################################################################

import pandas as pd
import datetime as dt
import matplotlib.dates as dates
import matplotlib.pyplot as plt
from matplotlib.dates import MONTHLY, WEEKLY, DateFormatter, rrulewrapper, RRuleLocator
from collections import OrderedDict
import numpy as np
import datetime as dt

#######################################################################
### HELPER FUNCTIONS ###
#######################################################################

# Function: Return distinct projects/phases
# Input: dataframe with a column indicating project names
# Output: dataframe of all distinct phases

def all_phases(df):
  phases=df['project'].drop_duplicates().values.tolist()
  return phases

# Function: prepare dataframe for gantt visualization
# Input: dataframe with a column for project, team, start_date, and end_date
# Output: dataframe with all columns required for main function

def df_gantt(df):

    df['project']=df['project'].astype('category')
    df['person']=df['person'].astype('category')
    df['end_date2']=dates.date2num(df['end_date'])
    df['start_date2']=dates.date2num(df['start_date'])
    df['duration']=df['end_date2']-df['start_date2']

    #assign colors for each series
    colors=['paleturquoise','lightskyblue','plum','mediumorchid','salmon','lightpink','sandybrown','gold']
    allprojects=df['project'].drop_duplicates().sort_values().reset_index(drop=True)
# #     allgrps=df['person'].drop_duplicates().sort_values().reset_index(drop=True)
    se=pd.DataFrame(colors[0:len(allprojects)])
    se.columns=['col']
    colormatrix=pd.concat([allprojects,se],axis=1)
    df = df.merge(colormatrix,on='project',how='inner').sort_values('person').reset_index(drop=True)
    return df

##########################################################################
### MAIN PLOT
##########################################################################

# Function: Create Gantt chart
# Input: dataframe with a column for project, team, start_date, and end_date, optional parameter showtoday (boolean) that shows a line marking the current date
# Output: matplotlib object of Gantt chart

def gantt_graph(df, showtoday):

  df=df_gantt(df)
  fig, ax = plt.subplots()

  #initialize variables
  j=''
  change=0
  ylocs=[]

  #plot chart
  for i in range(len(df['start_date2'])):
    if (j!=df['person'][i]):
      change=change+1
      j=df['person'][i]
      ylocs.append(0.5*change+0.5)
    ax.barh(0.5*change+0.5, df['duration'][i], left=df['start_date2'][i],height=0.4, align='center', color=df['col'][i], alpha = 1.0, label=df['person'][i])
    ax.annotate(df['project'][i],xy=(df['start_date2'][i]+0.5*df['duration'][i],0.5*change+0.5),horizontalalignment='center')

  #format x axis
  rule = rrulewrapper(WEEKLY, interval=1)
  ax.xaxis.set_major_locator(RRuleLocator(rule))
  ax.xaxis.set_major_formatter(DateFormatter("%b %d '%y"))
  plt.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    top='off',      # ticks along the bottom edge are off
    bottom='on',
    rotation=0)         # ticks along the top edge are off

    #format y axis
  ax.invert_yaxis()
  ax.set_yticks(ylocs)
  ppl=df['person'].drop_duplicates().values.tolist()
  ax.set_yticklabels(ppl)
  plt.tick_params(
    axis='y',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    left='off',      # ticks along the bottom edge are off
    right='off')        # ticks along the top edge are off


  #format border
  ax.spines['bottom'].set_linewidth(0.2)
  ax.spines['left'].set_linewidth(0.2)
  ax.spines['top'].set_linewidth(0.2)
  ax.spines['right'].set_linewidth(0.2)

  #plot a line showing current date
  if showtoday == True:
    plt.axvline(x=dt.date.today(),color='gray')

  return fig

# Function: provide error message handling for double-booked team memebers
# Input: 1. dataframe with a column for project, team, start_date, and end_date; 2. optional boolean parameter to show a line for the current date
# Output: matplotlib object of gantt chart (if no double booking cases) or error message (if any cases of double booking exist)

def workflow_errors(df, show_today=True):
  df_updated=df_gantt(df)
  errors=0
  for person in df_updated['person'].drop_duplicates().values.tolist():
    df2=df_updated[df_updated['person'] == person]
    df3=df2
    for i in range(len(df2['start_date2'])):
      for j in range(len(df3['start_date2'])):
        if (df2['end_date2'].iloc[i]>df3['start_date2'].iloc[j] and df2['end_date2'].iloc[i]<df3['end_date2'].iloc[j]):
          stringtime=df2['end_date'].iloc[i].strftime('%B %d, %Y')
          txt = person + ' has multiple workflows on/around ' + stringtime
          errors+=1
  if (errors==0):
    fig = gantt_graph(df, showtoday=show_today)
  else:
    fig = plt.figure()
    plt.axis('off')
    plt.text(.5, .5, txt, fontsize=15, color='black', ha='center')
  return fig

# # Use Sisense for Cloud Data Teams to visualize a dataframe or an image by passing data to periscope.output()
periscope.image(workflow_errors(df, show_today=True))
Rate this article:
Version history
Last update:
‎02-13-2024 11:27 AM
Updated by: