Plotting data from two different dataframes to a single Dash graph

Question

I'm trying to generate a Dash app which displays historical and forecasted housing prices. I've got the forecasted data stored in a different dataframe from the historical prices, and I'd like to plot them both on the same graph in Dash, and have the graph get updated via callback when the user picks a different city from a dropdown menu. I would like both traces of the graph to update when a value is selected in the dropdown. I've tried various things but can only get one trace from one dataframe plotted for the graph in my callback:

# --- import libraries ---
import dash
import dash_table
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from dash.dependencies import Output, Input

# --- load data --- 
df_h = pd.read_csv('df_h.csv')
df_arima = pd.read_csv('df_arima.csv')

options = [] #each column in the df_h dataframe is an option for the dropdown menu
for column in df_h.columns:
    options.append({'label': '{}'.format(column, column), 'value': column})

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

# --- initialize the app ---
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

# --- layout the dashboard ---
app.layout = html.Div(
    children = [
        html.Div([
            html.Label('Select a feature from drop-down to plot'),
            dcc.Dropdown(
                id = 'city-dropdown',
                options = options,
                value = 'Denver, CO',
                multi = False,
                clearable = True,
                searchable = True,
                placeholder = 'Choose a City...'),
            html.Div(id = 'forecast-container',
                     style = {'padding': '50px'}),
            ]),
        ], 
            )

# --- dropdown callback ---
@app.callback(
    Output('forecast-container', 'children'),
    Input('city-dropdown', 'value'))
def forecast_graph(value):

    dff = df_h[['Date', value]] #'value' is identical between the two dataframes. references
    dfa = df_arima[df_arima['City'] == value] # a col in dff and row values in dfa


    return [
        dcc.Graph(
            id = 'forecast-graph',
            figure = px.line(
                data_frame = dff,
                x = 'Date',
                y = value).update_layout(
                    showlegend = False,
                    template = 'xgridoff',
                    yaxis = {'title': 'Median Home Price ($USD)'},
                    xaxis = {'title': 'Year'},
                    title = {'text': 'Median Home Price vs. Year for {}'.format(value),
                              'font': {'size': 24}, 'x': 0.5, 'xanchor': 'center'}
                    ),
                    )
                    ]

I was able to accomplish this in Plotly but can't figure out how to do it in Dash. This is what I want in Dash:

Plotly graph I am trying to reproduce in callback, that is linked to a dropdown menu

This is all I can get to work in Dash: Only one dataframe plots in Dash

This is the code that works in plotly graph objects:

from statsmodels.tsa.arima_model import ARIMA

df_ml = pd.read_csv('df_ml.csv')
# --- data cleaning --- 
df_pred = df_ml[df_ml['RegionName'] == city]
df_pred = df_pred.transpose().reset_index().drop([0])
df_pred.columns = ['Date', 'MedianHomePrice_USD']
df_pred['MedianHomePrice_USD'] = df_pred['MedianHomePrice_USD'].astype('int')
df_pred['Date'] = pd.to_datetime(df_pred['Date'])
df_pred['Date'] = df_pred['Date'].dt.strftime('%Y-%m')
df_model = df_pred.set_index('Date')
model_data = df_model['MedianHomePrice_USD']

def house_price_forecast(model_data, forecast_steps, city):
    #--- ARIMA Model (autoregressive integrated moving average) ---
    model = ARIMA(model_data, order = (2,1,2), freq = 'MS') 
    res = model.fit()
    
    forecast = res.forecast(forecast_steps) 
    forecast_mean = forecast[0]
    forecast_se = forecast[1]
    forecast_ci = forecast[2]
    
    df_forecast = pd.DataFrame()
    df_forecast['Mean'] = forecast_mean.T
    df_forecast['Lower_ci'], df_forecast['Upper_ci'] = forecast_ci.T
    df_forecast['Date'] = pd.date_range(start = '2021-02', periods = forecast_steps, freq = 'MS')
    df_forecast['Date'] = df_forecast['Date'].dt.strftime('%Y-%m')
    df_forecast.index = df_forecast['Date']
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(x = df_pred['Date'], y = df_pred['MedianHomePrice_USD'],
                            mode = 'lines', name = 'Median Home Price ($USD)',
                            line_color = 'rgba(49, 131, 189, 0.75)', line_width = 2))

    fig.add_trace(go.Scatter(x = df_forecast.index, y = df_forecast['Mean'], 
                    mode = 'lines', line_color = '#e6550d',
                    name = 'Forecast mean'))
    fig.add_trace(go.Scatter(x = df_forecast.index, y = df_forecast['Upper_ci'], 
                    mode = 'lines', line_color = '#e0e0e0', fill = 'tonexty',
                    fillcolor = 'rgba(225,225,225, 0.3)',
                    name = 'Upper 95% confidence interval'))
    fig.add_trace(go.Scatter(x = df_forecast.index, y = df_forecast['Lower_ci'], 
                    mode = 'lines', line_color = '#e0e0e0', fill = 'tonexty',
                    fillcolor = 'rgba(225,225,225, 0.3)',
                    name = 'Lower 95% confidence interval'))
    fig.update_layout(title = 'Median Home Price in {}, {} - {} (Predicted)'.format(
                  city, model_data.idxmin()[:-3], df_forecast_mean.idxmax()[:-3]),
                  xaxis_title = 'Year', yaxis_title = 'Median Home Price ($USD)',
                  template = 'xgridoff')
    
    fig.show()

house_price_forecast(model_data, 24, 'Denver, CO') #24 month prediction

Perhaps a more succinct way of asking this question: How do I add a trace to an existing Dash graph, with data from a different dataframe, and both traces get updated when the user selects a value from a single dropdown?

SpinelLherzolite SpinelLherzolite · Accepted Answer · 2021-03-18T21:25:46

Figured it out...

Don't use the syntax I used above in your callback. Put the px.line call inside a variable(fig, in this case), and then use fig.add_scatter to add data from a different dataframe to the graph. Both parts of the graph will update from the callback.

Also, fig.add_scatter doesn't have a dataframe argument, so use df.column or df[column] (ex. 'dfa.Date' below)

# --- import libraries ---
import dash
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import plotly.express as px
from dash.dependencies import Output, Input

# --- load data ---    
df_h = pd.read_csv('df_h.csv')
df_h['Date'] = pd.to_datetime(df_h['Date'])

df_arima = pd.read_csv('df_arima.csv')
df_arima['Date'] = pd.to_datetime(df_arima['Date'])
df_arima['Date'] = df_arima['Date'].dt.strftime('%Y-%m')

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

# --- initialize the app ---
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div([
    dcc.Graph(id = 'forecast-container')
        ]
    )

# --- dropdown callback ---
@app.callback(
    Output('forecast-container', 'figure'),
    Input('city-dropdown', 'value'))
def update_figure(selected_city):
    dff = df_h[['Date', selected_city]]
    # dff[selected_city] = dff[selected_city].round(0)
    dfa = df_arima[df_arima['City'] == selected_city]
    
    fig = px.line(dff, x = 'Date', y = selected_city,
                  hover_data = {selected_city: ':$,f'})
    
    fig.add_scatter(x = dfa.Date, y = dfa.Mean,
                    line_color = 'orange', name = 'Forecast Mean')
    
    fig.add_scatter(x = dfa.Date, y = dfa.Lower_ci,
                    fill = 'tonexty', fillcolor = 'rgba(225,225,225, 0.3)',
                    marker = {'color': 'rgba(225,225,225, 0.9)'},
                    name = 'Lower 95% Confidence Interval')
    
    fig.add_scatter(x = dfa.Date, y = dfa.Upper_ci,
                    fill = 'tonexty', fillcolor = 'rgba(225,225,225, 0.3)',
                    marker = {'color': 'rgba(225,225,225, 0.9)'},
                    name = 'Upper 95% Confidence Interval')
    
    fig.update_layout(template = 'xgridoff',
                      yaxis = {'title': 'Median Home Price ($USD)'},
                      xaxis = {'title': 'Year'},
                      title = {'text': 'Median Home Price vs. Year for {}'.format(selected_city),
                               'font': {'size': 24}, 'x': 0.5, 'xanchor': 'center'})
    
    return fig
    
if __name__ == '__main__':
    app.run_server(debug = True)

Plotting data from two different dataframes to a single Dash graph

1 Answers