Data Visualization for Construction

Overview

Based on DDC methodology (Chapter 4.1), this skill provides comprehensive data visualization techniques for construction analytics. Visual insights drive better decisions - from cost breakdowns to schedule analysis.

Book Reference: "Аналитика данных и принятие решений" / "Data Analytics and Decision Making"

"Визуализация данных превращает сложные наборы данных в понятные графики, которые могут использоваться для принятия решений на всех уровнях проекта." — DDC Book, Chapter 4.1

Quick Start

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load construction data
df = pd.read_excel("project_data.xlsx")

# Quick bar chart - volumes by category
fig, ax = plt.subplots(figsize=(10, 6))
df.groupby('Category')['Volume_m3'].sum().plot(kind='bar', ax=ax)
ax.set_title('Volume by Category')
ax.set_ylabel('Volume (m³)')
plt.tight_layout()
plt.savefig('volume_by_category.png', dpi=150)
plt.show()

Matplotlib Fundamentals

Basic Charts for Construction

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

def create_cost_breakdown_pie(df, cost_col='Cost', category_col='Category'):
    """Create pie chart for cost breakdown"""
    costs = df.groupby(category_col)[cost_col].sum()

    fig, ax = plt.subplots(figsize=(10, 8))

    # Create pie with percentage labels
    wedges, texts, autotexts = ax.pie(
        costs.values,
        labels=costs.index,
        autopct='%1.1f%%',
        startangle=90,
        colors=plt.cm.Set3.colors
    )

    ax.set_title('Cost Breakdown by Category', fontsize=14, fontweight='bold')

    # Add total in center
    ax.text(0, 0, f'Total:\n${costs.sum():,.0f}',
            ha='center', va='center', fontsize=12)

    plt.tight_layout()
    return fig

def create_volume_bar_chart(df, volume_col='Volume_m3', category_col='Category'):
    """Create horizontal bar chart for volumes"""
    volumes = df.groupby(category_col)[volume_col].sum().sort_values()

    fig, ax = plt.subplots(figsize=(10, 6))

    bars = ax.barh(volumes.index, volumes.values, color='steelblue')

    # Add value labels
    for bar, value in zip(bars, volumes.values):
        ax.text(value + volumes.max() * 0.01, bar.get_y() + bar.get_height()/2,
                f'{value:,.0f} m³', va='center', fontsize=10)

    ax.set_xlabel('Volume (m³)')
    ax.set_title('Material Volumes by Category', fontsize=14, fontweight='bold')
    ax.set_xlim(0, volumes.max() * 1.15)

    plt.tight_layout()
    return fig

def create_level_comparison(df, value_col='Volume_m3', level_col='Level'):
    """Create grouped bar chart comparing levels"""
    pivot = df.pivot_table(
        values=value_col,
        index=level_col,
        columns='Category',
        aggfunc='sum',
        fill_value=0
    )

    fig, ax = plt.subplots(figsize=(12, 6))
    pivot.plot(kind='bar', ax=ax, width=0.8)

    ax.set_xlabel('Building Level')
    ax.set_ylabel('Volume (m³)')
    ax.set_title('Volume Distribution by Level and Category', fontsize=14, fontweight='bold')
    ax.legend(title='Category', bbox_to_anchor=(1.02, 1), loc='upper left')

    plt.xticks(rotation=45)
    plt.tight_layout()
    return fig

Time Series Visualization

def create_progress_chart(df, date_col='Date', value_col='Cumulative_Progress'):
    """Create S-curve progress chart"""
    df = df.sort_values(date_col)

    fig, ax = plt.subplots(figsize=(12, 6))

    # Actual progress
    ax.plot(df[date_col], df[value_col],
            'b-', linewidth=2, label='Actual Progress')

    # Planned progress (if available)
    if 'Planned_Progress' in df.columns:
        ax.plot(df[date_col], df['Planned_Progress'],
                'g--', linewidth=2, label='Planned Progress')

    ax.fill_between(df[date_col], 0, df[value_col], alpha=0.3)

    ax.set_xlabel('Date')
    ax.set_ylabel('Progress (%)')
    ax.set_title('Project S-Curve', fontsize=14, fontweight='bold')
    ax.legend()
    ax.grid(True, alpha=0.3)

    # Format dates
    fig.autofmt_xdate()

    plt.tight_layout()
    return fig

def create_gantt_chart(df, start_col='Start', end_col='End', task_col='Task'):
    """Create simple Gantt chart"""
    df = df.sort_values(start_col)

    fig, ax = plt.subplots(figsize=(14, len(df) * 0.5 + 2))

    # Plot each task as horizontal bar
    for i, (_, row) in enumerate(df.iterrows()):
        start = pd.to_datetime(row[start_col])
        end = pd.to_datetime(row[end_col])
        duration = (end - start).days

        ax.barh(i, duration, left=start, height=0.6,
                align='center', color='steelblue', alpha=0.8)

    ax.set_yticks(range(len(df)))
    ax.set_yticklabels(df[task_col])
    ax.set_xlabel('Date')
    ax.set_title('Project Schedule - Gantt Chart', fontsize=14, fontweight='bold')
    ax.grid(axis='x', alpha=0.3)

    fig.autofmt_xdate()
    plt.tight_layout()
    return fig

Seaborn for Statistical Visualization

Distribution Analysis

import seaborn as sns

def create_distribution_analysis(df, value_col='Volume_m3', category_col='Category'):
    """Create distribution plots for construction data"""
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))

    # 1. Histogram with KDE
    sns.histplot(data=df, x=value_col, kde=True, ax=axes[0, 0])
    axes[0, 0].set_title('Volume Distribution')

    # 2. Box plot by category
    sns.boxplot(data=df, x=category_col, y=value_col, ax=axes[0, 1])
    axes[0, 1].set_xticklabels(axes[0, 1].get_xticklabels(), rotation=45)
    axes[0, 1].set_title('Volume by Category')

    # 3. Violin plot
    sns.violinplot(data=df, x=category_col, y=value_col, ax=axes[1, 0])
    axes[1, 0].set_xticklabels(axes[1, 0].get_xticklabels(), rotation=45)
    axes[1, 0].set_title('Volume Distribution by Category')

    # 4. Strip plot with jitter
    sns.stripplot(data=df, x=category_col, y=value_col,
                  ax=axes[1, 1], alpha=0.5, jitter=True)
    axes[1, 1].set_xticklabels(axes[1, 1].get_xticklabels(), rotation=45)
    axes[1, 1].set_title('Individual Elements')

    plt.tight_layout()
    return fig

def create_correlation_heatmap(df, numeric_cols=None):
    """Create correlation heatmap for numeric columns"""
    if numeric_cols is None:
        numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()

    corr_matrix = df[numeric_cols].corr()

    fig, ax = plt.subplots(figsize=(10, 8))

    sns.heatmap(corr_matrix,
                annot=True,
                cmap='RdYlBu_r',
                center=0,
                fmt='.2f',
                square=True,
                ax=ax)

    ax.set_title('Correlation Matrix', fontsize=14, fontweight='bold')

    plt.tight_layout()
    return fig

Category Analysis

def create_category_summary(df, category_col='Category',
                            value_col='Volume_m3', cost_col='Cost'):
    """Create comprehensive category summary visualization"""
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))

    # 1. Count by category
    category_counts = df[category_col].value_counts()
    sns.barplot(x=category_counts.index, y=category_counts.values, ax=axes[0, 0])
    axes[0, 0].set_title('Element Count by Category')
    axes[0, 0].set_xticklabels(axes[0, 0].get_xticklabels(), rotation=45)

    # 2. Total volume by category
    volumes = df.groupby(category_col)[value_col].sum().sort_values(ascending=False)
    sns.barplot(x=volumes.index, y=volumes.values, ax=axes[0, 1])
    axes[0, 1].set_title('Total Volume by Category')
    axes[0, 1].set_xticklabels(axes[0, 1].get_xticklabels(), rotation=45)

    # 3. Average cost by category
    if cost_col in df.columns:
        avg_cost = df.groupby(category_col)[cost_col].mean().sort_values(ascending=False)
        sns.barplot(x=avg_cost.index, y=avg_cost.values, ax=axes[1, 0])
        axes[1, 0].set_title('Average Cost by Category')
        axes[1, 0].set_xticklabels(axes[1, 0].get_xticklabels(), rotation=45)

    # 4. Volume vs Cost scatter
    if cost_col in df.columns:
        sns.scatterplot(data=df, x=value_col, y=cost_col,
                        hue=category_col, alpha=0.7, ax=axes[1, 1])
        axes[1, 1].set_title('Volume vs Cost')
        axes[1, 1].legend(bbox_to_anchor=(1.02, 1), loc='upper left')

    plt.tight_layout()
    return fig

Plotly for Interactive Dashboards

Interactive Charts

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def create_interactive_cost_breakdown(df, category_col='Category', cost_col='Cost'):
    """Create interactive sunburst chart"""
    # Aggregate by category and material
    agg_df = df.groupby([category_col, 'Material'])[cost_col].sum().reset_index()

    fig = px.sunburst(
        agg_df,
        path=[category_col, 'Material'],
        values=cost_col,
        title='Cost Breakdown by Category and Material'
    )

    fig.update_layout(height=600)
    return fig

def create_interactive_3d_scatter(df, x_col='Volume_m3', y_col='Cost',
                                   z_col='Weight_kg', color_col='Category'):
    """Create 3D scatter plot for multi-dimensional analysis"""
    fig = px.scatter_3d(
        df,
        x=x_col,
        y=y_col,
        z=z_col,
        color=color_col,
        hover_data=['ElementId'],
        title='3D Analysis: Volume vs Cost vs Weight'
    )

    fig.update_layout(height=700)
    return fig

def create_interactive_timeline(df, date_col='Date', value_col='Progress',
                                 category_col='Phase'):
    """Create interactive timeline with range slider"""
    fig = px.line(
        df,
        x=date_col,
        y=value_col,
        color=category_col,
        title='Project Progress Timeline'
    )

    fig.update_layout(
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(count=3, label="3m", step="month", stepmode="backward"),
                    dict(count=6, label="6m", step="month", stepmode="backward"),
                    dict(step="all", label="All")
                ])
            ),
            rangeslider=dict(visible=True),
            type="date"
        ),
        height=500
    )

    return fig

Dashboard Layout

def create_project_dashboard(df):
    """Create comprehensive project dashboard"""
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=(
            'Cost by Category',
            'Volume Distribution',
            'Elements by Level',
            'Progress Over Time'
        ),
        specs=[
            [{"type": "pie"}, {"type": "bar"}],
            [{"type": "bar"}, {"type": "scatter"}]
        ]
    )

    # 1. Cost pie chart
    costs = df.groupby('Category')['Cost'].sum()
    fig.add_trace(
        go.Pie(labels=costs.index, values=costs.values, name='Cost'),
        row=1, col=1
    )

    # 2. Volume bar chart
    volumes = df.groupby('Category')['Volume_m3'].sum().sort_values(ascending=True)
    fig.add_trace(
        go.Bar(x=volumes.values, y=volumes.index, orientation='h', name='Volume'),
        row=1, col=2
    )

    # 3. Elements by level
    level_counts = df.groupby('Level').size()
    fig.add_trace(
        go.Bar(x=level_counts.index, y=level_counts.values, name='Count'),
        row=2, col=1
    )

    # 4. Progress scatter (if available)
    if 'Date' in df.columns and 'Progress' in df.columns:
        fig.add_trace(
            go.Scatter(x=df['Date'], y=df['Progress'], mode='lines+markers', name='Progress'),
            row=2, col=2
        )

    fig.update_layout(
        height=800,
        title_text='Project Analytics Dashboard',
        showlegend=False
    )

    return fig

Construction-Specific Visualizations

Heatmaps for Level Analysis

def create_level_heatmap(df, level_col='Level', category_col='Category',
                          value_col='Volume_m3'):
    """Create heatmap for level-by-category analysis"""
    pivot = df.pivot_table(
        values=value_col,
        index=level_col,
        columns=category_col,
        aggfunc='sum',
        fill_value=0
    )

    fig, ax = plt.subplots(figsize=(12, 8))

    sns.heatmap(
        pivot,
        annot=True,
        fmt=',.0f',
        cmap='YlOrRd',
        ax=ax,
        cbar_kws={'label': 'Volume (m³)'}
    )

    ax.set_title('Volume Distribution: Level × Category', fontsize=14, fontweight='bold')

    plt.tight_layout()
    return fig

def create_material_treemap(df, category_col='Category', material_col='Material',
                             value_col='Volume_m3'):
    """Create treemap for hierarchical material analysis"""
    agg_df = df.groupby([category_col, material_col])[value_col].sum().reset_index()

    fig = px.treemap(
        agg_df,
        path=[category_col, material_col],
        values=value_col,
        title='Material Distribution Treemap',
        color=value_col,
        color_continuous_scale='Blues'
    )

    fig.update_layout(height=600)
    return fig

Cost Analysis Charts

def create_cost_analysis_dashboard(df):
    """Create comprehensive cost analysis visualization"""
    fig, axes = plt.subplots(2, 3, figsize=(18, 10))

    # 1. Cost distribution histogram
    sns.histplot(data=df, x='Cost', bins=30, ax=axes[0, 0])
    axes[0, 0].set_title('Cost Distribution')
    axes[0, 0].axvline(df['Cost'].mean(), color='r', linestyle='--', label='Mean')
    axes[0, 0].axvline(df['Cost'].median(), color='g', linestyle='--', label='Median')
    axes[0, 0].legend()

    # 2. Cost by category (box plot)
    sns.boxplot(data=df, x='Category', y='Cost', ax=axes[0, 1])
    axes[0, 1].set_xticklabels(axes[0, 1].get_xticklabels(), rotation=45)
    axes[0, 1].set_title('Cost Range by Category')

    # 3. Cumulative cost
    sorted_costs = df.sort_values('Cost', ascending=False)
    sorted_costs['Cumulative_Cost'] = sorted_costs['Cost'].cumsum()
    sorted_costs['Cumulative_Pct'] = sorted_costs['Cumulative_Cost'] / sorted_costs['Cost'].sum() * 100
    axes[0, 2].plot(range(len(sorted_costs)), sorted_costs['Cumulative_Pct'])
    axes[0, 2].axhline(80, color='r', linestyle='--', alpha=0.5)
    axes[0, 2].set_xlabel('Number of Elements')
    axes[0, 2].set_ylabel('Cumulative Cost %')
    axes[0, 2].set_title('Pareto Analysis (80/20)')

    # 4. Cost per unit volume
    df['Cost_per_m3'] = df['Cost'] / df['Volume_m3'].replace(0, np.nan)
    by_cat = df.groupby('Category')['Cost_per_m3'].mean().sort_values(ascending=True)
    axes[1, 0].barh(by_cat.index, by_cat.values)
    axes[1, 0].set_title('Average Cost per m³ by Category')

    # 5. Top 10 elements by cost
    top10 = df.nlargest(10, 'Cost')
    axes[1, 1].barh(top10['ElementId'], top10['Cost'])
    axes[1, 1].set_title('Top 10 Elements by Cost')

    # 6. Cost vs Volume scatter with regression
    sns.regplot(data=df, x='Volume_m3', y='Cost', ax=axes[1, 2],
                scatter_kws={'alpha': 0.5})
    axes[1, 2].set_title('Cost vs Volume (with Trend)')

    plt.tight_layout()
    return fig

Export and Reporting

Save Visualizations

def save_all_visualizations(df, output_dir='reports/charts'):
    """Generate and save all standard visualizations"""
    import os
    os.makedirs(output_dir, exist_ok=True)

    # Generate charts
    charts = {
        'cost_breakdown': create_cost_breakdown_pie(df),
        'volume_bars': create_volume_bar_chart(df),
        'distribution': create_distribution_analysis(df),
        'level_heatmap': create_level_heatmap(df)
    }

    # Save each chart
    saved_files = []
    for name, fig in charts.items():
        filepath = f"{output_dir}/{name}.png"
        fig.savefig(filepath, dpi=150, bbox_inches='tight')
        plt.close(fig)
        saved_files.append(filepath)

    return saved_files

def create_pdf_report(df, output_path='project_report.pdf'):
    """Create PDF report with multiple visualizations"""
    from matplotlib.backends.backend_pdf import PdfPages

    with PdfPages(output_path) as pdf:
        # Page 1: Overview
        fig1 = create_cost_breakdown_pie(df)
        pdf.savefig(fig1)
        plt.close(fig1)

        # Page 2: Volume analysis
        fig2 = create_volume_bar_chart(df)
        pdf.savefig(fig2)
        plt.close(fig2)

        # Page 3: Distribution
        fig3 = create_distribution_analysis(df)
        pdf.savefig(fig3)
        plt.close(fig3)

        # Page 4: Heatmap
        fig4 = create_level_heatmap(df)
        pdf.savefig(fig4)
        plt.close(fig4)

    return output_path

Quick Reference

Chart Type	Best For	Library
Bar Chart	Category comparisons	Matplotlib/Seaborn
Pie Chart	Cost breakdown	Matplotlib
Heatmap	Level × Category matrix	Seaborn
Box Plot	Distribution by group	Seaborn
Scatter	Relationship analysis	Matplotlib/Plotly
Treemap	Hierarchical data	Plotly
Sunburst	Multi-level breakdown	Plotly
Gantt	Schedule visualization	Matplotlib
S-Curve	Progress tracking	Matplotlib

Color Palettes for Construction

# Professional color palettes
CONSTRUCTION_COLORS = {
    'primary': ['#2C3E50', '#3498DB', '#1ABC9C', '#F39C12', '#E74C3C'],
    'materials': {
        'Concrete': '#95A5A6',
        'Steel': '#34495E',
        'Timber': '#D35400',
        'Brick': '#C0392B',
        'Glass': '#3498DB'
    },
    'categories': {
        'Structural': '#2C3E50',
        'Architectural': '#3498DB',
        'MEP': '#27AE60',
        'Finishes': '#F39C12'
    }
}

Resources

Book: "Data-Driven Construction" by Artem Boiko, Chapter 4.1
Website: https://datadrivenconstruction.io
Matplotlib: https://matplotlib.org
Seaborn: https://seaborn.pydata.org
Plotly: https://plotly.com/python

Next Steps

See pandas-construction-analysis for data preparation
See cost-prediction for predictive analytics
See qto-report for quantity extraction

data-visualization