From 2bebc60c695c018caa88ed6578f3af9522f0eb92 Mon Sep 17 00:00:00 2001 From: Pavlo Penenko Date: Mon, 9 Feb 2026 16:47:17 -0500 Subject: [PATCH 01/17] Instrument `GlslProgram::build` --- source/MaterialXRenderGlsl/GlslProgram.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/MaterialXRenderGlsl/GlslProgram.cpp b/source/MaterialXRenderGlsl/GlslProgram.cpp index 8626a43cd6..b6bc4c32f3 100644 --- a/source/MaterialXRenderGlsl/GlslProgram.cpp +++ b/source/MaterialXRenderGlsl/GlslProgram.cpp @@ -12,6 +12,7 @@ #include #include +#include #include @@ -82,6 +83,8 @@ const string& GlslProgram::getStageSourceCode(const string& stage) const void GlslProgram::build() { + MX_TRACE_FUNCTION(Tracing::Category::Render); + clearBuiltData(); GLint glStatus = GL_FALSE; From 6952584c63c11eb41b0177d5f0282e798179fd91 Mon Sep 17 00:00:00 2001 From: Pavlo Penenko Date: Mon, 9 Feb 2026 17:30:44 -0500 Subject: [PATCH 02/17] Instrument `DumpGeneratedCode` --- source/MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp b/source/MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp index 197c43c93e..248efc2b62 100644 --- a/source/MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp +++ b/source/MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp @@ -232,6 +232,7 @@ bool GlslShaderRenderTester::runRenderer(const std::string& shaderName, if (testOptions.dumpGeneratedCode) { + MX_TRACE_SCOPE(Cat::Render, "DumpGeneratedCode"); mx::ScopedTimer dumpTimer(&profileTimes.languageTimes.ioTime); std::ofstream file; file.open(shaderPath + "_vs.glsl"); @@ -291,6 +292,7 @@ bool GlslShaderRenderTester::runRenderer(const std::string& shaderName, if (testOptions.dumpUniformsAndAttributes) { + MX_TRACE_SCOPE(Cat::Render, "DumpUniformsAndAttributes"); mx::ScopedTimer printTimer(&profileTimes.languageTimes.ioTime); log << "* Uniform:" << std::endl; program->printUniforms(log); @@ -360,6 +362,7 @@ bool GlslShaderRenderTester::runRenderer(const std::string& shaderName, } { + MX_TRACE_SCOPE(Cat::Render, "CaptureAndSaveImage"); mx::ScopedTimer ioTimer(&profileTimes.languageTimes.imageSaveTime); std::string fileName = shaderPath + "_glsl.png"; mx::ImagePtr image = _renderer->captureImage(); From 246891c175093c3e8a803dc114da7aa19bf49a1a Mon Sep 17 00:00:00 2001 From: Pavlo Penenko Date: Fri, 6 Feb 2026 13:24:50 -0500 Subject: [PATCH 03/17] Expose envSampleCount option in MaterialXTest for IBL sampling control Add configurable environment radiance sample count to test suite options, allowing game-representative performance testing (1-16 samples) vs reference quality (1024+ samples). Lower sample counts make shader complexity a larger fraction of GPU time, enabling meaningful runtime performance comparisons. --- .../MaterialXGenShader/GenShaderUtil.cpp | 14 ++++++++++++++ .../MaterialXGenShader/GenShaderUtil.h | 9 +++++++++ .../MaterialXRenderGlsl/RenderGlsl.cpp | 2 +- .../MaterialXTest/MaterialXRenderMsl/RenderMsl.mm | 2 +- .../MaterialXRenderSlang/RenderSlang.cpp | 2 +- 5 files changed, 26 insertions(+), 3 deletions(-) diff --git a/source/MaterialXTest/MaterialXGenShader/GenShaderUtil.cpp b/source/MaterialXTest/MaterialXGenShader/GenShaderUtil.cpp index e2a197e1cc..54af17d6bf 100644 --- a/source/MaterialXTest/MaterialXGenShader/GenShaderUtil.cpp +++ b/source/MaterialXTest/MaterialXGenShader/GenShaderUtil.cpp @@ -1006,6 +1006,8 @@ void TestSuiteOptions::print(std::ostream& output) const output << "\tEnable Reference Quality: " << enableReferenceQuality << std::endl; output << "\tOutput Directory: " << (outputDirectory.isEmpty() ? "(default)" : outputDirectory.asString()) << std::endl; output << "\tEnable Tracing: " << enableTracing << std::endl; + output << "\tFrames Per Material: " << framesPerMaterial << std::endl; + output << "\tenvSampleCount: " << envSampleCount << std::endl; } bool TestSuiteOptions::readOptions(const std::string& optionFile) @@ -1033,6 +1035,8 @@ bool TestSuiteOptions::readOptions(const std::string& optionFile) const std::string ENABLE_REFERENCE_QUALITY("enableReferenceQuality"); const std::string OUTPUT_DIRECTORY_STRING("outputDirectory"); const std::string ENABLE_TRACING_STRING("enableTracing"); + const std::string FRAMES_PER_MATERIAL_STRING("framesPerMaterial"); + const std::string ENV_SAMPLE_COUNT_STRING("envSampleCount"); overrideFiles.clear(); dumpGeneratedCode = false; @@ -1148,6 +1152,16 @@ bool TestSuiteOptions::readOptions(const std::string& optionFile) { enableTracing = val->asA(); } + else if (name == FRAMES_PER_MATERIAL_STRING) + { + int frames = val->asA(); + framesPerMaterial = (frames >= 1) ? static_cast(frames) : 1u; + } + else if (name == ENV_SAMPLE_COUNT_STRING) + { + int count = val->asA(); + envSampleCount = (count >= 1) ? count : 1024; + } } } } diff --git a/source/MaterialXTest/MaterialXGenShader/GenShaderUtil.h b/source/MaterialXTest/MaterialXGenShader/GenShaderUtil.h index e6ae23f19f..d8e58c4e6a 100644 --- a/source/MaterialXTest/MaterialXGenShader/GenShaderUtil.h +++ b/source/MaterialXTest/MaterialXGenShader/GenShaderUtil.h @@ -126,6 +126,15 @@ class TestSuiteOptions // Default is false to avoid overhead when not profiling. bool enableTracing = false; + // Number of frames to render per material for GPU timing. + // Default is 1. Set higher (e.g., 5-10) for statistical validity. + // First frame often includes driver shader compilation overhead. + unsigned int framesPerMaterial = 1; + + // Number of environment radiance samples for IBL lighting. + // Default is 1024. Lower values (1-16) are more representative of real-time rendering. + int envSampleCount = 1024; + // Helper to resolve output path for an artifact. // If outputDirectory is set, returns outputDirectory/filename. // Otherwise returns the original path unchanged. diff --git a/source/MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp b/source/MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp index 248efc2b62..9575ebbbac 100644 --- a/source/MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp +++ b/source/MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp @@ -99,7 +99,7 @@ void GlslShaderRenderTester::registerLights(mx::DocumentPtr document, // Apply light settings for render tests. _lightHandler->setEnvRadianceMap(envRadiance); _lightHandler->setEnvIrradianceMap(envIrradiance); - _lightHandler->setEnvSampleCount(options.enableReferenceQuality ? 4096 : 1024); + _lightHandler->setEnvSampleCount(options.envSampleCount); _lightHandler->setRefractionTwoSided(true); } diff --git a/source/MaterialXTest/MaterialXRenderMsl/RenderMsl.mm b/source/MaterialXTest/MaterialXRenderMsl/RenderMsl.mm index c6340101da..8757393c08 100644 --- a/source/MaterialXTest/MaterialXRenderMsl/RenderMsl.mm +++ b/source/MaterialXTest/MaterialXRenderMsl/RenderMsl.mm @@ -103,7 +103,7 @@ bool runRenderer(const std::string& shaderName, // Apply light settings for render tests. _lightHandler->setEnvRadianceMap(envRadiance); _lightHandler->setEnvIrradianceMap(envIrradiance); - _lightHandler->setEnvSampleCount(options.enableReferenceQuality ? 4096 : 1024); + _lightHandler->setEnvSampleCount(options.envSampleCount); _lightHandler->setRefractionTwoSided(true); } diff --git a/source/MaterialXTest/MaterialXRenderSlang/RenderSlang.cpp b/source/MaterialXTest/MaterialXRenderSlang/RenderSlang.cpp index 7a96b7af78..2bd4961e24 100644 --- a/source/MaterialXTest/MaterialXRenderSlang/RenderSlang.cpp +++ b/source/MaterialXTest/MaterialXRenderSlang/RenderSlang.cpp @@ -98,7 +98,7 @@ void SlangShaderRenderTester::registerLights(mx::DocumentPtr document, // Apply light settings for render tests. _lightHandler->setEnvRadianceMap(envRadiance); _lightHandler->setEnvIrradianceMap(envIrradiance); - _lightHandler->setEnvSampleCount(options.enableReferenceQuality ? 4096 : 1024); + _lightHandler->setEnvSampleCount(options.envSampleCount); _lightHandler->setRefractionTwoSided(true); } From 8db81187a900dd21529a61518c7979b199f9844c Mon Sep 17 00:00:00 2001 From: Pavlo Penenko Date: Fri, 13 Feb 2026 11:06:41 -0500 Subject: [PATCH 04/17] Add diff_test_runs package for comparing MaterialX test outputs Python package (MaterialXTest.diff_test_runs) with three comparison scripts and shared reporting utilities: diff_images.py -- perceptual image comparison via NVIDIA FLIP, with HTML side-by-side reports diff_traces.py -- Perfetto trace comparison with per-material CPU slice and GPU render time analysis, multiple --slice filters, --warmup-frames burn-in, inline SVG charts diff_shaders.py -- offline shader analysis (LOC, SPIR-V size, compile time, spirv-opt time); auto-discovers Vulkan SDK tools in PATH _report.py -- shared comparison tables, SVG chart generation, and HTML report builder --- .../MaterialXTest/diff_test_runs/__init__.py | 0 .../MaterialXTest/diff_test_runs/_report.py | 387 ++++++++++++++ .../diff_test_runs/diff_images.py | 472 ++++++++++++++++++ .../diff_test_runs/diff_shaders.py | 436 ++++++++++++++++ .../diff_test_runs/diff_traces.py | 452 +++++++++++++++++ 5 files changed, 1747 insertions(+) create mode 100644 python/MaterialXTest/diff_test_runs/__init__.py create mode 100644 python/MaterialXTest/diff_test_runs/_report.py create mode 100644 python/MaterialXTest/diff_test_runs/diff_images.py create mode 100644 python/MaterialXTest/diff_test_runs/diff_shaders.py create mode 100644 python/MaterialXTest/diff_test_runs/diff_traces.py diff --git a/python/MaterialXTest/diff_test_runs/__init__.py b/python/MaterialXTest/diff_test_runs/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/MaterialXTest/diff_test_runs/_report.py b/python/MaterialXTest/diff_test_runs/_report.py new file mode 100644 index 0000000000..8f3f4889b3 --- /dev/null +++ b/python/MaterialXTest/diff_test_runs/_report.py @@ -0,0 +1,387 @@ +''' +Shared reporting utilities for MaterialX diff tools. + +Provides comparison table formatting, chart generation (SVG via matplotlib), +and HTML report building for comparing per-material metrics between +baseline and optimized test runs. + +Data convention: + All comparison functions expect a pandas DataFrame with columns: + name -- item name (material, shader file, etc.) + baseline -- baseline metric value + optimized -- optimized metric value + delta -- optimized - baseline + change_pct -- (delta / baseline) * 100 +''' + +import logging +from pathlib import Path + +import pandas as pd + +logger = logging.getLogger(__name__) + + +# ----------------------------------------------------------------------------- +# Optional: matplotlib (for chart generation) +# ----------------------------------------------------------------------------- + +_have_matplotlib = False +try: + import matplotlib + matplotlib.rcParams['svg.fonttype'] = 'none' # Keep text as , not paths + import matplotlib.pyplot as plt + from matplotlib.patches import Patch + _have_matplotlib = True +except ImportError: + pass + + +# ============================================================================= +# DATA HELPERS +# ============================================================================= + +def isna(val): + '''Check if value is None or NaN.''' + return val is None or pd.isna(val) + + +def mergeComparison(baselineValues, optimizedValues, minDelta=0.0): + ''' + Build a comparison DataFrame from two {name: value} dicts. + + Args: + baselineValues: dict mapping name -> numeric value + optimizedValues: dict mapping name -> numeric value + minDelta: Minimum absolute delta to include (0 = include all) + + Returns: + DataFrame with columns [name, baseline, optimized, delta, change_pct], + sorted by delta ascending (biggest improvements first). + ''' + baselineDf = pd.DataFrame( + list(baselineValues.items()), columns=['name', 'baseline']) + optimizedDf = pd.DataFrame( + list(optimizedValues.items()), columns=['name', 'optimized']) + + merged = pd.merge(baselineDf, optimizedDf, on='name', how='outer') + merged['delta'] = merged['optimized'] - merged['baseline'] + merged['change_pct'] = (merged['delta'] / merged['baseline']) * 100 + + if minDelta > 0: + merged = merged[merged['delta'].abs() >= minDelta] + + return merged.sort_values('delta', ascending=True).reset_index(drop=True) + + +def mergeComparisonDf(baselineAgg, optimizedAgg, minDelta=0.0): + ''' + Build a comparison DataFrame by merging two aggregated DataFrames. + + Each input must have columns [name, value]. This is useful when + values come from pandas groupby rather than plain dicts. + + Returns: + DataFrame with columns [name, baseline, optimized, delta, change_pct], + sorted by delta ascending. + ''' + merged = pd.merge( + baselineAgg[['name', 'value']], + optimizedAgg[['name', 'value']], + on='name', suffixes=('_baseline', '_optimized'), how='outer' + ) + merged.rename(columns={ + 'value_baseline': 'baseline', + 'value_optimized': 'optimized', + }, inplace=True) + merged['delta'] = merged['optimized'] - merged['baseline'] + merged['change_pct'] = (merged['delta'] / merged['baseline']) * 100 + + if minDelta > 0: + merged = merged[merged['delta'].abs() >= minDelta] + + return merged.sort_values('delta', ascending=True).reset_index(drop=True) + + +# ============================================================================= +# TABLE OUTPUT +# ============================================================================= + +def printComparisonTable(data, title, baselineLabel='Baseline', + optimizedLabel='Optimized', unit='ms', + valueFormat='.2f', highlightNames=None): + ''' + Print a formatted comparison table to stdout. + + Args: + data: Comparison DataFrame (name, baseline, optimized, delta, change_pct) + title: Section title printed above the table + baselineLabel: Display name for the baseline column + optimizedLabel: Display name for the optimized column + unit: Unit suffix for values (e.g., "ms", " lines", " bytes") + valueFormat: Format spec for values (e.g., ".2f", ".0f", ",d") + highlightNames: Optional set of names to mark with * + ''' + if data is None or data.empty: + return + + if highlightNames is None: + highlightNames = set() + + bCol = baselineLabel[:10] + oCol = optimizedLabel[:10] + + print(f'\n{"=" * 85}') + print(f' {title}') + print(f'{"=" * 85}') + marker = ' *' if highlightNames else '' + print(f"{'Name':<40} {bCol:>10} {oCol:>10} {'Delta':>10} {'Change':>8}{marker}") + print('-' * 85) + + for _, row in data.iterrows(): + fullName = str(row['name']) + name = fullName[:38] + baseVal = row['baseline'] + optVal = row['optimized'] + deltaVal = row['delta'] + changePct = row['change_pct'] + + affected = fullName in highlightNames + mark = ' *' if affected else ' ' + + baseStr = f'{baseVal:{valueFormat}}{unit}' if not isna(baseVal) else 'N/A' + optStr = f'{optVal:{valueFormat}}{unit}' if not isna(optVal) else 'N/A' + deltaStr = f'{deltaVal:+{valueFormat}}{unit}' if not isna(deltaVal) else 'N/A' + changeStr = f'{changePct:+.1f}%' if not isna(changePct) else 'N/A' + print(f'{name:<40} {baseStr:>10} {optStr:>10} {deltaStr:>10} {changeStr:>8}{mark}') + + print('-' * 85) + + improved = data[data['change_pct'] < 0] + regressed = data[data['change_pct'] > 0] + unchanged = data[data['change_pct'] == 0] + validChanges = data.dropna(subset=['change_pct'])['change_pct'] + + print(f'\nSummary: {len(improved)} improved, {len(regressed)} regressed, ' + f'{len(unchanged)} unchanged, {len(data)} total') + + if len(improved) > 0: + best = improved.iloc[0] + print(f"Best improvement: {best['name']} ({best['change_pct']:.1f}%)") + + if len(regressed) > 0: + worst = regressed.iloc[-1] + print(f"Worst regression: {worst['name']} ({worst['change_pct']:+.1f}%)") + + if len(validChanges) > 0: + print(f'Overall: mean {validChanges.mean():+.1f}%, ' + f'median {validChanges.median():+.1f}%') + + if highlightNames: + print(f'\n* = highlighted ({len(highlightNames)} items)') + + +# ============================================================================= +# CHART OUTPUT +# ============================================================================= + +def createComparisonChart(data, outputPath, title, + baselineLabel='Baseline', optimizedLabel='Optimized', + unit='ms', highlightNames=None, highlightLabel=None, + subtitle=None): + ''' + Create a paired before/after horizontal bar chart sorted by delta. + + Saves as SVG with searchable text. + + Args: + data: Comparison DataFrame (name, baseline, optimized, delta, change_pct) + outputPath: Path to save the chart (SVG) + title: Chart title + baselineLabel: Display name for the baseline series + optimizedLabel: Display name for the optimized series + unit: Unit suffix for value annotations + highlightNames: Optional set of names to emphasise + highlightLabel: Legend label for highlighted items + subtitle: Optional subtitle line (e.g., filter parameters) + ''' + if data is None: + return + + if highlightNames is None: + highlightNames = set() + if not _have_matplotlib: + logger.warning('Cannot create chart: matplotlib not installed.') + return + + chartDf = data.dropna(subset=['baseline', 'optimized']).copy() + if chartDf.empty: + logger.warning('No data to chart') + return + + # Reverse so largest improvements at TOP + chartDf = chartDf.iloc[::-1].reset_index(drop=True) + chartDf['is_highlighted'] = chartDf['name'].isin(highlightNames) + + def _makeLabel(row): + name = row['name'][:28] + '...' if len(row['name']) > 28 else row['name'] + delta = row['delta'] + pct = row['change_pct'] + prefix = '* ' if row['is_highlighted'] else '' + if pd.notna(delta) and pd.notna(pct): + return f'{prefix}{name} ({delta:+.1f}{unit}, {pct:+.1f}%)' + return f'{prefix}{name}' + + chartDf['display_name'] = chartDf.apply(_makeLabel, axis=1) + + figHeight = max(10, len(chartDf) * 0.5) + fig, ax = plt.subplots(figsize=(14, figHeight)) + + yPos = range(len(chartDf)) + barHeight = 0.35 + + ax.barh([y + barHeight / 2 for y in yPos], chartDf['baseline'], + barHeight, label=baselineLabel, color='#3498db', alpha=0.8) + + colors = ['#2ecc71' if d <= 0 else '#e74c3c' for d in chartDf['delta']] + ax.barh([y - barHeight / 2 for y in yPos], chartDf['optimized'], + barHeight, label=optimizedLabel, color=colors, alpha=0.8) + + for i, (b, o, delta) in enumerate(zip(chartDf['baseline'], + chartDf['optimized'], + chartDf['delta'])): + ax.text(b + 1, i + barHeight / 2, f'{b:.1f}{unit}', va='center', + fontsize=7, color='#2980b9') + ax.text(o + 1, i - barHeight / 2, f'{o:.1f}{unit}', va='center', + fontsize=7, color='#27ae60' if delta < 0 else '#c0392b') + + ax.set_yticks(yPos) + ax.set_yticklabels(chartDf['display_name']) + ax.set_xlabel(f'Value ({unit})' if unit else 'Value') + + if highlightNames: + for i, (label, isHl) in enumerate( + zip(ax.get_yticklabels(), chartDf['is_highlighted'])): + if isHl: + label.set_fontweight('bold') + label.set_color('#8e44ad') + + titleLines = [title] + if highlightLabel and highlightNames: + titleLines.append(f'* = {highlightLabel}') + if subtitle: + titleLines.append(subtitle) + ax.set_title('\n'.join(titleLines), fontsize=11) + + legendElements = [ + Patch(facecolor='#3498db', label=baselineLabel), + Patch(facecolor='#2ecc71', label=f'{optimizedLabel} (improved)'), + Patch(facecolor='#e74c3c', label=f'{optimizedLabel} (regressed)') + ] + ax.legend(handles=legendElements, loc='lower right') + + plt.tight_layout() + plt.savefig(outputPath, format='svg', bbox_inches='tight') + plt.close(fig) + logger.info(f'Chart saved to: {outputPath}') + + +# ============================================================================= +# HTML REPORT +# ============================================================================= + +def generateHtmlReport(reportPath, sections, pageTitle='Comparison Report', + subtitle=None, footerText='Generated by MaterialX diff tools'): + ''' + Generate an HTML report with inline SVG charts (searchable text). + + Args: + reportPath: Path to output HTML file + sections: List of (title, chartPath) tuples. SVG chart files are read + and inlined so that text is searchable via Ctrl+F. + pageTitle: Title for the HTML page header + subtitle: Optional subtitle shown under the page title + footerText: Footer attribution text + ''' + reportPath = Path(reportPath) + reportDir = reportPath.parent + reportDir.mkdir(parents=True, exist_ok=True) + + html = [] + html.append(f''' + + + + + {pageTitle} + + + +
+

{pageTitle}

+''') + + if subtitle: + html.append(f'

{subtitle}

\n') + + for title, chartFilePath in sections: + chartFile = Path(chartFilePath) if chartFilePath else None + if chartFile and chartFile.exists(): + svgContent = chartFile.read_text(encoding='utf-8') + # Strip XML declaration if present (not needed when inlined) + if svgContent.startswith('') + 2:].lstrip() + html.append(f''' +
+

{title}

+ {svgContent} +
+''') + + html.append(f''' +
+ {footerText} +
+
+ + +''') + + with open(reportPath, 'w', encoding='utf-8') as f: + f.write(''.join(html)) + + logger.info(f'HTML report saved to: {reportPath}') + + +# ============================================================================= +# PATH & BROWSER HELPERS +# ============================================================================= + +def chartPath(basePath, suffix): + '''Derive a chart output path by inserting a suffix before the extension.''' + basePath = Path(basePath) + return basePath.parent / f'{basePath.stem}_{suffix}{basePath.suffix}' + + +def openReport(reportPath): + '''Print the report path prominently and open it in the default browser.''' + import webbrowser + + absPath = Path(reportPath).resolve() + print(f'\n{"=" * 85}') + print(f' Report: {absPath}') + print(f'{"=" * 85}') + webbrowser.open(absPath.as_uri()) diff --git a/python/MaterialXTest/diff_test_runs/diff_images.py b/python/MaterialXTest/diff_test_runs/diff_images.py new file mode 100644 index 0000000000..0ac513988f --- /dev/null +++ b/python/MaterialXTest/diff_test_runs/diff_images.py @@ -0,0 +1,472 @@ +#!/usr/bin/env python +''' +Compare rendered images between baseline and optimized MaterialX test runs +using NVIDIA FLIP perceptual image comparison. + +FLIP (A Difference Evaluator for Alternating Images) approximates human +perception of differences when flipping between images. A FLIP score of 0 +means identical, 1 means maximally different. + +Usage: + python diff_images.py + python diff_images.py --threshold 0.05 + python diff_images.py --report report.html + +Dependencies: + pip install flip-evaluator Pillow numpy +''' + +import argparse +import logging +import sys +from pathlib import Path + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger('diff_images') + +# Optional: FLIP (for perceptual image comparison) +_have_flip = False +try: + import flip_evaluator as flip + _have_flip = True +except ImportError: + logger.debug('flip-evaluator not found. Install with: pip install flip-evaluator') + + +# ============================================================================= +# IMAGE COMPARISON +# ============================================================================= + +def findImages(directory, pattern='**/*.png'): + '''Find all PNG images in a directory recursively.''' + directory = Path(directory) + if not directory.exists(): + raise FileNotFoundError(f'Directory not found: {directory}') + return list(directory.glob(pattern)) + + +def computeImageDiff(img1Path, img2Path, ppd=70.0, heatmapPath=None): + ''' + Compute FLIP perceptual difference metrics between two images. + + FLIP (A Difference Evaluator for Alternating Images) is a perceptual + image comparison metric from NVIDIA that approximates human perception + of differences when flipping between images. + + Args: + img1Path: Path to reference (baseline) image + img2Path: Path to test (optimized) image + ppd: Pixels per degree (viewing distance). Default 70 assumes + a 0.7m viewing distance for a 1080p 24" monitor. + heatmapPath: Optional path to save FLIP heatmap image (magma colormap) + + Returns: + dict with keys: mean_flip, max_flip, pct_diff_pixels, identical, heatmap_path + ''' + import numpy as np + + try: + flipMap, meanFlip, _ = flip.evaluate( + str(img1Path), + str(img2Path), + "LDR", + inputsRGB=True, + applyMagma=False, + computeMeanError=True, + parameters={"ppd": ppd} + ) + except Exception as e: + return { + 'error': str(e), + 'identical': False + } + + flipMap = np.array(flipMap) + maxFlip = float(flipMap.max()) + + # Percentage of pixels with perceptible difference (FLIP > 0.01) + diffPixels = flipMap > 0.01 + pctDiffPixels = 100.0 * diffPixels.sum() / diffPixels.size + + result = { + 'mean_flip': float(meanFlip), + 'max_flip': maxFlip, + 'pct_diff_pixels': pctDiffPixels, + 'identical': meanFlip < 1e-6, + 'heatmap_path': None + } + + # Save heatmap if requested + if heatmapPath: + try: + heatmapImg, _, _ = flip.evaluate( + str(img1Path), + str(img2Path), + "LDR", + inputsRGB=True, + applyMagma=True, + computeMeanError=False, + parameters={"ppd": ppd} + ) + from PIL import Image + heatmapArr = np.array(heatmapImg) + if heatmapArr.max() <= 1.0: + heatmapArr = (heatmapArr * 255).astype(np.uint8) + Image.fromarray(heatmapArr).save(heatmapPath) + result['heatmap_path'] = str(heatmapPath) + except Exception as e: + logger.warning(f'Failed to save heatmap: {e}') + + return result + + +def compareImages(baselineDir, optimizedDir, threshold=0.05, reportDir=None): + ''' + Compare all matching images between two directories using FLIP. + + Args: + baselineDir: Path to baseline images + optimizedDir: Path to optimized images + threshold: FLIP threshold above which to report differences (default: 0.05) + reportDir: Optional directory to save FLIP heatmaps for HTML report + + Returns: + List of comparison results with paths for report generation + ''' + if not _have_flip: + logger.error('Cannot compare images: flip-evaluator not installed.') + logger.error('Install with: pip install flip-evaluator') + return None + + baselineDir = Path(baselineDir) + optimizedDir = Path(optimizedDir) + + # Create heatmap directory if generating report + heatmapDir = None + if reportDir: + heatmapDir = Path(reportDir) / 'heatmaps' + heatmapDir.mkdir(parents=True, exist_ok=True) + + baselineImages = findImages(baselineDir) + logger.info(f'Found {len(baselineImages)} images in baseline') + + results = [] + matched = 0 + missing = 0 + + for baselineImg in baselineImages: + relPath = baselineImg.relative_to(baselineDir) + optimizedImg = optimizedDir / relPath + + if not optimizedImg.exists(): + logger.warning(f'Missing in optimized: {relPath}') + missing += 1 + continue + + matched += 1 + + heatmapPath = None + if heatmapDir: + heatmapPath = heatmapDir / f'{relPath.stem}_flip.png' + + metrics = computeImageDiff(baselineImg, optimizedImg, heatmapPath=heatmapPath) + metrics['name'] = relPath.stem + metrics['path'] = str(relPath) + metrics['baseline_path'] = str(baselineImg.absolute()) + metrics['optimized_path'] = str(optimizedImg.absolute()) + results.append(metrics) + + logger.info(f'Compared {matched} image pairs, {missing} missing') + return results + + +def printImageTable(results, threshold=0.05): + '''Print a formatted FLIP image comparison table to stdout.''' + if results is None: + return False + + print('\n' + '=' * 85) + print(f"{'Image':<40} {'Mean FLIP':>10} {'Max FLIP':>10} {'% Diff':>10} {'Status':>8}") + print('=' * 85) + + identical = 0 + different = 0 + errors = 0 + + sortedResults = sorted(results, key=lambda x: x.get('mean_flip', 0), reverse=True) + + for r in sortedResults: + name = r['name'][:38] + + if 'error' in r: + print(f"{name:<40} {'ERROR':>10} {r['error']}") + errors += 1 + continue + + meanFlip = r['mean_flip'] + maxFlip = r['max_flip'] + pctDiff = r['pct_diff_pixels'] + + if r['identical'] or meanFlip < threshold: + status = 'OK' + identical += 1 + else: + status = 'DIFF' + different += 1 + + print(f"{name:<40} {meanFlip:>10.6f} {maxFlip:>10.4f} {pctDiff:>9.2f}% {status:>8}") + + print('=' * 85) + print(f'\nImage Summary (FLIP): {identical} identical, {different} different, {errors} errors') + print(f'Threshold: mean FLIP < {threshold}') + + if different > 0: + print(f'\n*** WARNING: {different} images differ above threshold! ***') + return False + else: + print('\nAll images match within threshold.') + return True + + +# ============================================================================= +# HTML REPORT +# ============================================================================= + +def generateHtmlReport(reportPath, imageResults, threshold=0.05): + ''' + Generate an HTML report with side-by-side image comparisons and FLIP heatmaps. + + Args: + reportPath: Path to output HTML file + imageResults: Image comparison results from compareImages() + threshold: FLIP threshold used for pass/fail + ''' + reportPath = Path(reportPath) + reportDir = reportPath.parent + reportDir.mkdir(parents=True, exist_ok=True) + + def relPath(absPath): + if absPath is None: + return None + try: + return str(Path(absPath).relative_to(reportDir)) + except ValueError: + return 'file:///' + str(Path(absPath)).replace('\\', '/') + + imgPassed = 0 + imgFailed = 0 + imgErrors = 0 + if imageResults: + for r in imageResults: + if 'error' in r: + imgErrors += 1 + elif r['identical'] or r['mean_flip'] < threshold: + imgPassed += 1 + else: + imgFailed += 1 + + html = [] + html.append(''' + + + + + MaterialX Image Comparison Report + + + +
+

MaterialX Image Comparison Report

+''') + + # Summary cards + html.append(f''' +
+
+

Images Passed

+
{imgPassed}
+
+
+

Images Failed

+
{imgFailed}
+
+
+

Errors

+
{imgErrors}
+
+
+

FLIP Threshold

+
{threshold}
+
+
+''') + + # Image comparisons + if imageResults: + html.append(f''' +

Image Comparisons (FLIP)

+

FLIP score: 0 = identical, 1 = maximally different. Threshold: {threshold}

+
+''') + sortedImages = sorted(imageResults, key=lambda x: x.get('mean_flip', 0), reverse=True) + + for r in sortedImages: + if 'error' in r: + continue + + passed = r['identical'] or r['mean_flip'] < threshold + statusClass = 'passed' if passed else 'failed' + statusBadge = 'pass' if passed else 'fail' + statusText = 'PASS' if passed else 'FAIL' + + baselineRel = relPath(r.get('baseline_path')) + optimizedRel = relPath(r.get('optimized_path')) + heatmapRel = relPath(r.get('heatmap_path')) + + html.append(f''' +
+
+

{r['name']}

+
+ Mean FLIP: {r['mean_flip']:.4f} | Max: {r['max_flip']:.4f} | {r['pct_diff_pixels']:.1f}% pixels differ + {statusText} +
+
+
+ Baseline +
Baseline
+
+
+ Optimized +
Optimized
+
+
+''') + if heatmapRel: + html.append(f''' FLIP Heatmap +
FLIP Heatmap
+''') + else: + html.append('''
No heatmap
+''') + html.append('''
+
+''') + + html.append('
\n') + + # Footer + html.append(''' +
+ Generated by diff_images.py | NVIDIA FLIP for perceptual image comparison +
+
+ + +''') + + with open(reportPath, 'w', encoding='utf-8') as f: + f.write(''.join(html)) + + logger.info(f'HTML report saved to: {reportPath}') + + +# ============================================================================= +# MAIN +# ============================================================================= + +def main(): + parser = argparse.ArgumentParser( + description='Compare rendered images between baseline and optimized MaterialX test runs using NVIDIA FLIP.', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=''' +Examples: + %(prog)s ./baseline/ ./optimized/ + %(prog)s ./baseline/ ./optimized/ --threshold 0.01 + %(prog)s ./baseline/ ./optimized/ --report comparison.html +''') + + parser.add_argument('baseline', type=Path, + help='Baseline directory containing rendered images') + parser.add_argument('optimized', type=Path, + help='Optimized directory containing rendered images') + parser.add_argument('--threshold', type=float, default=0.05, + help='FLIP threshold for pass/fail (default: 0.05)') + parser.add_argument('--report', type=Path, default=None, + help='Path for HTML report with side-by-side images and FLIP heatmaps') + + args = parser.parse_args() + + if not _have_flip: + logger.error('flip-evaluator is required. Install with: pip install flip-evaluator') + sys.exit(1) + + # Determine report directory for heatmaps + reportDir = None + if args.report: + reportDir = args.report.parent + + try: + results = compareImages(args.baseline, args.optimized, + args.threshold, reportDir=reportDir) + allPassed = printImageTable(results, args.threshold) + + if args.report and results: + generateHtmlReport(args.report, results, args.threshold) + + sys.exit(0 if allPassed else 1) + + except FileNotFoundError as e: + logger.error(f'{e}') + sys.exit(1) + except Exception as e: + logger.error(f'Error: {e}') + raise + + +if __name__ == '__main__': + main() diff --git a/python/MaterialXTest/diff_test_runs/diff_shaders.py b/python/MaterialXTest/diff_test_runs/diff_shaders.py new file mode 100644 index 0000000000..d5515acf56 --- /dev/null +++ b/python/MaterialXTest/diff_test_runs/diff_shaders.py @@ -0,0 +1,436 @@ +#!/usr/bin/env python +''' +Compare dumped shader files between baseline and optimized MaterialX test runs. + +Computes per-material metrics from generated GLSL shader source files and, +optionally, from external shader analysis tools found in PATH. + +Built-in metrics (always available): + LOC Lines of code (non-blank lines in the pixel shader) + +Tool-based metrics (when the tool is in PATH): + glslangValidator SPIR-V size (bytes) + compilation time (ms) + spirv-opt Optimised SPIR-V size + optimisation time (ms) + +Usage: + python diff_shaders.py + python diff_shaders.py -o shader_diff.html +''' + +import argparse +import logging +import shutil +import subprocess +import sys +import tempfile +import time +from pathlib import Path + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger('diff_shaders') + +# Import shared reporting utilities (works both as package and standalone script) +try: + from ._report import (mergeComparison, printComparisonTable, + createComparisonChart, generateHtmlReport, + chartPath, openReport) +except ImportError: + from _report import (mergeComparison, printComparisonTable, + createComparisonChart, generateHtmlReport, + chartPath, openReport) + + +# ============================================================================= +# SHADER FILE DISCOVERY +# ============================================================================= + +def findShaderPairs(baselineDir, optimizedDir, pattern='**/*_ps.glsl'): + ''' + Find matching pixel shader files between baseline and optimized directories. + + Args: + baselineDir: Path to baseline shader directory + optimizedDir: Path to optimized shader directory + pattern: Glob pattern for shader files (default: pixel shaders) + + Returns: + List of (materialName, baselinePath, optimizedPath) tuples, + sorted by material name. + ''' + baselineDir = Path(baselineDir) + optimizedDir = Path(optimizedDir) + + pairs = [] + for baselineFile in sorted(baselineDir.glob(pattern)): + relPath = baselineFile.relative_to(baselineDir) + optimizedFile = optimizedDir / relPath + + if not optimizedFile.exists(): + logger.warning(f'Missing in optimized: {relPath}') + continue + + # Derive material name from filename (strip _ps.glsl suffix) + stem = baselineFile.stem + if stem.endswith('_ps'): + materialName = stem[:-3] + elif stem.endswith('_vs'): + materialName = stem[:-3] + else: + materialName = stem + + pairs.append((materialName, baselineFile, optimizedFile)) + + logger.info(f'Found {len(pairs)} matching shader pairs') + return pairs + + +# ============================================================================= +# METRICS: LOC (always available) +# ============================================================================= + +def countLoc(shaderPath): + '''Count non-blank lines in a shader file.''' + text = Path(shaderPath).read_text(encoding='utf-8', errors='replace') + return sum(1 for line in text.splitlines() if line.strip()) + + +def computeLocMetrics(pairs): + ''' + Compute LOC (lines of code) for all shader pairs. + + Returns: + (baselineDict, optimizedDict) -- {materialName: loc} for each side. + ''' + baseline = {} + optimized = {} + for materialName, baselinePath, optimizedPath in pairs: + baseline[materialName] = countLoc(baselinePath) + optimized[materialName] = countLoc(optimizedPath) + return baseline, optimized + + +# ============================================================================= +# SPIR-V COMPILATION PIPELINE (glslangValidator, compile once / reuse) +# ============================================================================= + +def _compileToSpirvTimed(glslPath, outputPath): + ''' + Compile a GLSL shader to SPIR-V using glslangValidator. + + Uses OpenGL semantics (-G) since MaterialX generates OpenGL GLSL, + and --auto-map-locations to assign layout locations automatically + (MaterialX shaders don't have explicit layout qualifiers). + + Returns (success: bool, elapsedMs: float). + ''' + try: + start = time.perf_counter() + result = subprocess.run( + ['glslangValidator', '-G', '--auto-map-locations', + '-S', 'frag', '-o', str(outputPath), str(glslPath)], + capture_output=True, text=True, timeout=30 + ) + elapsedMs = (time.perf_counter() - start) * 1000.0 + if result.returncode != 0: + logger.debug(f'glslangValidator stderr: {result.stderr.strip()}') + return result.returncode == 0, elapsedMs + except (subprocess.TimeoutExpired, FileNotFoundError): + return False, 0.0 + + +def compileSpirvPairs(pairs, tmpDir): + ''' + Compile all shader pairs to SPIR-V once, caching results in tmpDir. + + Returns: + dict of materialName -> { + 'baseline_spv': Path, 'optimized_spv': Path, + 'baseline_compile_ms': float, 'optimized_compile_ms': float, + } + ''' + cache = {} + for materialName, baselinePath, optimizedPath in pairs: + bSpv = tmpDir / f'{materialName}_baseline.spv' + oSpv = tmpDir / f'{materialName}_optimized.spv' + + bOk, bMs = _compileToSpirvTimed(baselinePath, bSpv) + oOk, oMs = _compileToSpirvTimed(optimizedPath, oSpv) + + if bOk and oOk: + cache[materialName] = { + 'baseline_spv': bSpv, + 'optimized_spv': oSpv, + 'baseline_compile_ms': bMs, + 'optimized_compile_ms': oMs, + } + else: + logger.warning(f'SPIR-V compilation failed for {materialName}') + + logger.info(f'Compiled {len(cache)}/{len(pairs)} shader pairs to SPIR-V') + return cache + + +# ============================================================================= +# SPIR-V OPTIMISATION PIPELINE (spirv-opt, reuses compiled SPIR-V) +# ============================================================================= + +def _optimizeSpirvTimed(inputPath, outputPath): + '''Run spirv-opt -O on a SPIR-V binary. Returns (success, elapsedMs).''' + try: + start = time.perf_counter() + result = subprocess.run( + ['spirv-opt', '-O', '-o', str(outputPath), str(inputPath)], + capture_output=True, text=True, timeout=60 + ) + elapsedMs = (time.perf_counter() - start) * 1000.0 + return result.returncode == 0, elapsedMs + except (subprocess.TimeoutExpired, FileNotFoundError): + return False, 0.0 + + +def optimizeSpirvPairs(spirvCache, tmpDir): + ''' + Run spirv-opt on every cached SPIR-V pair. + + Returns: + dict of materialName -> { + 'baseline_opt_spv': Path, 'optimized_opt_spv': Path, + 'baseline_opt_ms': float, 'optimized_opt_ms': float, + } + ''' + optCache = {} + for name, info in spirvCache.items(): + bOpt = tmpDir / f'{name}_baseline_opt.spv' + oOpt = tmpDir / f'{name}_optimized_opt.spv' + + bOk, bMs = _optimizeSpirvTimed(info['baseline_spv'], bOpt) + oOk, oMs = _optimizeSpirvTimed(info['optimized_spv'], oOpt) + + if bOk and oOk: + optCache[name] = { + 'baseline_opt_spv': bOpt, + 'optimized_opt_spv': oOpt, + 'baseline_opt_ms': bMs, + 'optimized_opt_ms': oMs, + } + else: + logger.warning(f'spirv-opt failed for {name}') + + logger.info(f'Optimised {len(optCache)}/{len(spirvCache)} SPIR-V pairs') + return optCache + + +# ============================================================================= +# METRICS EXTRACTORS (pull numbers out of the caches) +# ============================================================================= + +def extractSpirvSizeMetrics(spirvCache): + '''SPIR-V binary size in bytes.''' + b, o = {}, {} + for name, info in spirvCache.items(): + b[name] = info['baseline_spv'].stat().st_size + o[name] = info['optimized_spv'].stat().st_size + return b, o + + +def extractCompileTimeMetrics(spirvCache): + '''glslangValidator compilation time in ms.''' + b, o = {}, {} + for name, info in spirvCache.items(): + b[name] = info['baseline_compile_ms'] + o[name] = info['optimized_compile_ms'] + return b, o + + +def extractOptSpirvSizeMetrics(optCache): + '''Optimised SPIR-V binary size in bytes.''' + b, o = {}, {} + for name, info in optCache.items(): + b[name] = info['baseline_opt_spv'].stat().st_size + o[name] = info['optimized_opt_spv'].stat().st_size + return b, o + + +def extractOptTimeMetrics(optCache): + '''spirv-opt optimisation time in ms.''' + b, o = {}, {} + for name, info in optCache.items(): + b[name] = info['baseline_opt_ms'] + o[name] = info['optimized_opt_ms'] + return b, o + + +# ============================================================================= +# REPORT HELPERS +# ============================================================================= + +def _addMetric(reportSections, data, metricTag, title, baselineName, + optimizedName, chartBase, unit='', valueFormat='.0f'): + ''' + Print a comparison table, generate a chart, and append to reportSections. + ''' + if data is None or data.empty: + return + + printComparisonTable(data, title, + baselineLabel=baselineName, + optimizedLabel=optimizedName, + unit=unit, valueFormat=valueFormat) + + svgPath = chartPath(chartBase, metricTag) + createComparisonChart(data, svgPath, title=title, + baselineLabel=baselineName, + optimizedLabel=optimizedName, + unit=unit) + reportSections.append((title, svgPath)) + + +# ============================================================================= +# MAIN +# ============================================================================= + +def main(): + parser = argparse.ArgumentParser( + description='Compare dumped shader files between baseline and ' + 'optimized MaterialX test runs.', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=''' +Examples: + %(prog)s ./baseline/ ./optimized/ + %(prog)s ./baseline/ ./optimized/ -o shader_diff.html + %(prog)s ./baseline/ ./optimized/ --pattern "**/*_vs.glsl" + +Available metrics depend on tools found in PATH: + LOC Always available (non-blank line count) + SPIR-V size Requires glslangValidator + Compile time Requires glslangValidator + Optimised SPIR-V Requires glslangValidator + spirv-opt + spirv-opt time Requires glslangValidator + spirv-opt +''') + + parser.add_argument('baseline', type=Path, + help='Baseline directory containing dumped shaders') + parser.add_argument('optimized', type=Path, + help='Optimized directory containing dumped shaders') + parser.add_argument('-o', '--outputfile', type=str, default=None, + help='Output HTML report file name ' + '(default: _vs__shaders.html)') + parser.add_argument('--pattern', type=str, default='**/*_ps.glsl', + help='Glob pattern for shader files (default: **/*_ps.glsl)') + + args = parser.parse_args() + + # Discover shader pairs ------------------------------------------------ + try: + pairs = findShaderPairs(args.baseline, args.optimized, args.pattern) + except FileNotFoundError as e: + logger.error(f'{e}') + sys.exit(1) + + if not pairs: + logger.error('No matching shader pairs found.') + sys.exit(1) + + # Directory leaf names for display + baselineName = Path(args.baseline).name + optimizedName = Path(args.optimized).name + + # Derive default report name + if args.outputfile is None: + args.outputfile = f'{baselineName}_vs_{optimizedName}_shaders.html' + + reportPath = Path(args.outputfile) + reportDir = reportPath.parent + reportDir.mkdir(parents=True, exist_ok=True) + chartBase = reportDir / (reportPath.stem + '.svg') + + reportSections = [] + + # Discover available tools --------------------------------------------- + tools = { + 'glslangValidator': shutil.which('glslangValidator'), + 'spirv-opt': shutil.which('spirv-opt'), + } + foundTools = [name for name, path in tools.items() if path] + if foundTools: + logger.info(f'Found tools in PATH: {", ".join(foundTools)}') + else: + logger.info('No optional shader tools found in PATH; ' + 'only LOC will be computed') + + # ---- Metric: LOC (always) -------------------------------------------- + logger.info('Computing LOC metrics...') + bLoc, oLoc = computeLocMetrics(pairs) + locData = mergeComparison(bLoc, oLoc) + _addMetric(reportSections, locData, 'LOC', + f'Lines of Code (non-blank): {baselineName} vs {optimizedName}', + baselineName, optimizedName, chartBase, unit=' lines') + + # ---- Tool-based metrics (inside a temp directory) -------------------- + with tempfile.TemporaryDirectory(prefix='mtlx_spirv_') as tmpDir: + tmpPath = Path(tmpDir) + spirvCache = {} + optCache = {} + + # -- Compile GLSL -> SPIR-V (once) --------------------------------- + if tools['glslangValidator']: + spirvCache = compileSpirvPairs(pairs, tmpPath) + + if spirvCache: + # SPIR-V Size + bSize, oSize = extractSpirvSizeMetrics(spirvCache) + _addMetric( + reportSections, mergeComparison(bSize, oSize), 'SPIRV', + f'SPIR-V Size (bytes): {baselineName} vs {optimizedName}', + baselineName, optimizedName, chartBase, unit=' B') + + # Compilation Time + bTime, oTime = extractCompileTimeMetrics(spirvCache) + _addMetric( + reportSections, mergeComparison(bTime, oTime), 'compile_time', + f'glslangValidator Compile Time (ms): ' + f'{baselineName} vs {optimizedName}', + baselineName, optimizedName, chartBase, + unit=' ms', valueFormat='.1f') + + # -- spirv-opt on cached SPIR-V ------------------------------------ + if tools['spirv-opt'] and spirvCache: + optCache = optimizeSpirvPairs(spirvCache, tmpPath) + + if optCache: + # Optimised SPIR-V Size + bOptSize, oOptSize = extractOptSpirvSizeMetrics(optCache) + _addMetric( + reportSections, mergeComparison(bOptSize, oOptSize), + 'SPIRV_opt', + f'Optimised SPIR-V Size (bytes): ' + f'{baselineName} vs {optimizedName}', + baselineName, optimizedName, chartBase, unit=' B') + + # spirv-opt Time + bOptTime, oOptTime = extractOptTimeMetrics(optCache) + _addMetric( + reportSections, mergeComparison(bOptTime, oOptTime), + 'spirvopt_time', + f'spirv-opt Time (ms): {baselineName} vs {optimizedName}', + baselineName, optimizedName, chartBase, + unit=' ms', valueFormat='.1f') + + # ---- HTML Report ----------------------------------------------------- + pageTitle = f'Shader Comparison: {baselineName} vs {optimizedName}' + toolInfo = ', '.join(foundTools) if foundTools else 'none' + footerText = (f'Generated by diff_shaders.py — ' + f'tools used: {toolInfo}') + + if reportSections: + generateHtmlReport(reportPath, reportSections, pageTitle=pageTitle, + footerText=footerText) + openReport(reportPath) + else: + print(f'\n{"=" * 85}') + print(' No data to report.') + print(f'{"=" * 85}') + + +if __name__ == '__main__': + main() diff --git a/python/MaterialXTest/diff_test_runs/diff_traces.py b/python/MaterialXTest/diff_test_runs/diff_traces.py new file mode 100644 index 0000000000..4c64b2e90a --- /dev/null +++ b/python/MaterialXTest/diff_test_runs/diff_traces.py @@ -0,0 +1,452 @@ +#!/usr/bin/env python +''' +Compare performance traces between baseline and optimized MaterialX test runs. + +Reads Perfetto .perfetto-trace files and compares slice durations, generating +tables, charts, and an HTML report. + +Two modes: + --gpu Compare GPU render durations per material (from GPU async track) + --slice NAME Compare CPU slice durations per material (child slices under + material parent slices). Multiple names produce multiple charts. + +For image comparison, see diff_images.py in the same directory. + +Usage: + python diff_traces.py --gpu + python diff_traces.py --slice GenerateShader + python diff_traces.py --slice GenerateShader CompileShader + python diff_traces.py --gpu --slice GenerateShader + python diff_traces.py --gpu -o custom_name.html +''' + +import argparse +import logging +import sys +from pathlib import Path + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger('diff_traces') + + +# ----------------------------------------------------------------------------- +# Dependencies +# ----------------------------------------------------------------------------- + +try: + from perfetto.trace_processor import TraceProcessor +except ImportError: + sys.exit('ERROR: perfetto is required. Install with: pip install perfetto') + +try: + import pandas as pd +except ImportError: + sys.exit('ERROR: pandas is required. Install with: pip install pandas') + +# Import shared reporting utilities (works both as package and standalone script) +try: + from ._report import (mergeComparisonDf, printComparisonTable, + createComparisonChart, generateHtmlReport, + chartPath, openReport) +except ImportError: + from _report import (mergeComparisonDf, printComparisonTable, + createComparisonChart, generateHtmlReport, + chartPath, openReport) + + +# ============================================================================= +# TRACE LOADING +# ============================================================================= + +def findTraceFile(path): + ''' + Find a Perfetto trace file from a path. + + If path is a file, return it directly. + If path is a directory, search for *.perfetto-trace files. + Returns the first trace file found, or raises FileNotFoundError. + ''' + path = Path(path) + + if path.is_file(): + return path + + if path.is_dir(): + traces = list(path.glob('*.perfetto-trace')) + if not traces: + traces = list(path.glob('**/*.perfetto-trace')) + if traces: + if len(traces) > 1: + print(f'Warning: Multiple traces found in {path}, using: {traces[0].name}') + return traces[0] + raise FileNotFoundError(f'No .perfetto-trace files found in: {path}') + + raise FileNotFoundError(f'Path not found: {path}') + + +def loadSliceDurations(traceProcessor, trackName=None): + ''' + Load slice durations from a Perfetto trace, optionally filtered by track. + + Results are ordered by timestamp to preserve frame ordering within each + material (important for warmup frame discarding). + + Args: + traceProcessor: TraceProcessor instance + trackName: Optional track name filter (e.g., "GPU") + + Returns: + DataFrame with columns [name, dur_ms]. + ''' + + if trackName: + query = f''' + SELECT slice.name, slice.dur / 1000000.0 as dur_ms + FROM slice + JOIN track ON slice.track_id = track.id + WHERE track.name = '{trackName}' + ORDER BY slice.ts + ''' + else: + query = ''' + SELECT slice.name, slice.dur / 1000000.0 as dur_ms + FROM slice + JOIN track ON slice.track_id = track.id + ORDER BY slice.ts + ''' + + df = traceProcessor.query(query).as_pandas_dataframe() + if df.empty: + trackMsg = f' on track "{trackName}"' if trackName else '' + logger.warning(f'No slices found{trackMsg}') + return pd.DataFrame(columns=['name', 'dur_ms']) + return df + + +def loadChildSliceDurations(traceProcessor, sliceName): + ''' + Load durations of a named child slice, keyed by parent (material) name. + + Queries the trace for slices matching sliceName that are direct children + of a parent slice (typically the material name). + + Args: + traceProcessor: TraceProcessor instance + sliceName: Name of the child slice (e.g., "GenerateShader", "CompileShader") + + Returns: + DataFrame with columns [name, dur_ms]. 'name' is the parent (material) name. + ''' + + query = f''' + SELECT parent.name as name, child.dur / 1000000.0 as dur_ms + FROM slice child + JOIN slice parent ON child.parent_id = parent.id + WHERE child.name = '{sliceName}' + ORDER BY parent.name + ''' + + df = traceProcessor.query(query).as_pandas_dataframe() + if df.empty: + logger.warning(f'No "{sliceName}" slices found') + return pd.DataFrame(columns=['name', 'dur_ms']) + return df + + +def loadOptimizationEvents(traceProcessor, optimizationName=None): + ''' + Load optimization events from a Perfetto trace. + + Optimization events are nested inside ShaderGen slices with hierarchy: + MaterialName -> GenerateShader -> OptimizationPass + + Args: + traceProcessor: TraceProcessor instance + optimizationName: Filter by optimization pass name + + Returns: + Set of material names that had the optimization applied. + ''' + + if optimizationName: + query = f''' + SELECT DISTINCT grandparent.name as material_name + FROM slice opt + JOIN slice parent ON opt.parent_id = parent.id + JOIN slice grandparent ON parent.parent_id = grandparent.id + WHERE opt.name = "{optimizationName}" + ''' + else: + query = ''' + SELECT DISTINCT opt.name as opt_name, grandparent.name as material_name + FROM slice opt + JOIN slice parent ON opt.parent_id = parent.id + JOIN slice grandparent ON parent.parent_id = grandparent.id + ''' + + result = traceProcessor.query(query) + + optimizedMaterials = set() + for row in result: + if row.material_name: + optimizedMaterials.add(row.material_name) + + return optimizedMaterials + + +# ============================================================================= +# COMPARISON +# ============================================================================= + +def _aggregateByName(df, warmupFrames=0): + ''' + Aggregate durations by name (averaging across multiple samples). + + Args: + df: DataFrame with columns [name, dur_ms], ordered by timestamp. + warmupFrames: Number of initial frames per material to discard + before averaging (burn-in period). + ''' + if df.empty: + return pd.DataFrame(columns=['name', 'value']) + + if warmupFrames > 0: + # Within each material group, drop the first N rows (preserving + # chronological order from the ORDER BY slice.ts query). + df = (df.groupby('name', sort=False) + .apply(lambda g: g.iloc[warmupFrames:] if len(g) > warmupFrames + else g.iloc[0:0], include_groups=False) + .reset_index(level=0)) + if df.empty: + logger.warning(f'All samples discarded by warmup ({warmupFrames} frames)') + return pd.DataFrame(columns=['name', 'value']) + + agg = df.groupby('name')['dur_ms'].mean().reset_index() + agg.columns = ['name', 'value'] + return agg + + +def compareGpuTraces(baselineTraceProcessor, optimizedTraceProcessor, + minDeltaMs=0.0, warmupFrames=0): + ''' + Compare GPU render durations between baseline and optimized traces. + + Reads the GPU async track and averages per material across frames, + optionally discarding initial warmup frames. + + Returns: + (merged_df, totalFrames, usedFrames) -- the comparison DataFrame, + the total number of frames per material, and how many were used + after warmup discarding. + ''' + logger.info('Comparing GPU traces...') + baselineData = loadSliceDurations(baselineTraceProcessor, trackName='GPU') + optimizedData = loadSliceDurations(optimizedTraceProcessor, trackName='GPU') + + # Determine typical samples per material (frames rendered) + if not baselineData.empty: + totalFrames = int(baselineData.groupby('name').size().median()) + else: + totalFrames = 0 + + usedFrames = max(0, totalFrames - warmupFrames) + + baselineAgg = _aggregateByName(baselineData, warmupFrames=warmupFrames) + optimizedAgg = _aggregateByName(optimizedData, warmupFrames=warmupFrames) + + return (mergeComparisonDf(baselineAgg, optimizedAgg, minDelta=minDeltaMs), + totalFrames, usedFrames) + + +def compareChildSlices(baselineTraceProcessor, optimizedTraceProcessor, sliceName, minDeltaMs=0.0): + ''' + Compare durations of a named child slice (e.g., GenerateShader) per material. + + Queries child slices under material parent slices in both traces, + then merges by material name and computes delta/percentage. + ''' + logger.info(f'Comparing "{sliceName}" slices...') + baselineData = loadChildSliceDurations(baselineTraceProcessor, sliceName) + baselineAgg = _aggregateByName(baselineData) + + optimizedData = loadChildSliceDurations(optimizedTraceProcessor, sliceName) + optimizedAgg = _aggregateByName(optimizedData) + + return mergeComparisonDf(baselineAgg, optimizedAgg, minDelta=minDeltaMs) + + +# ============================================================================= +# MAIN +# ============================================================================= + +def main(): + parser = argparse.ArgumentParser( + description='Compare performance traces between baseline and optimized MaterialX test runs.', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=''' +Examples: + %(prog)s ./baseline/ ./optimized/ --gpu + %(prog)s ./baseline/ ./optimized/ --slice GenerateShader + %(prog)s ./baseline/ ./optimized/ --slice GenerateShader CompileShader RenderMaterial + %(prog)s ./baseline/ ./optimized/ --gpu --slice GenerateShader + %(prog)s ./baseline/ ./optimized/ --gpu -o custom_name.html + +For image comparison, see diff_images.py in the same directory. +''') + + parser.add_argument('baseline', type=Path, + help='Baseline directory containing Perfetto traces') + parser.add_argument('optimized', type=Path, + help='Optimized directory containing Perfetto traces') + + modeGroup = parser.add_argument_group('comparison modes (at least one required)') + modeGroup.add_argument('--gpu', action='store_true', + help='Compare GPU render durations per material') + modeGroup.add_argument('--slice', nargs='+', metavar='NAME', + help='Compare named child-slice durations per material ' + '(e.g., GenerateShader, CompileShader, RenderMaterial)') + + optGroup = parser.add_argument_group('options') + optGroup.add_argument('--min-delta-ms', type=float, default=0.0, + help='Minimum absolute time difference in ms to include') + optGroup.add_argument('-o', '--outputfile', dest='outputfile', type=str, + default=None, + help='Output HTML report file name (default: _vs_.html)') + optGroup.add_argument('--warmup-frames', type=int, default=0, + help='Number of initial GPU frames per material to discard ' + 'as burn-in before averaging (default: 0)') + optGroup.add_argument('--show-opt', type=str, metavar='OPT_NAME', + help='Highlight materials affected by optimization pass') + + args = parser.parse_args() + + if not args.gpu and not args.slice: + parser.print_help() + sys.exit(0) + + # Load trace files once + try: + baselineTracePath = findTraceFile(args.baseline) + optimizedTracePath = findTraceFile(args.optimized) + except FileNotFoundError as e: + logger.error(f'{e}') + sys.exit(1) + + logger.info(f'Loading baseline trace: {baselineTracePath}') + baselineTraceProcessor = TraceProcessor(trace=str(baselineTracePath)) + + logger.info(f'Loading optimized trace: {optimizedTracePath}') + optimizedTraceProcessor = TraceProcessor(trace=str(optimizedTracePath)) + + # Load optimization events if requested + optimizedMaterials = set() + if args.show_opt: + baselineMaterials = loadOptimizationEvents(baselineTraceProcessor, args.show_opt) + if baselineMaterials: + logger.error(f'ERROR: Baseline has {len(baselineMaterials)} materials ' + f'with {args.show_opt}!') + sys.exit(1) + + optimizedMaterials = loadOptimizationEvents(optimizedTraceProcessor, args.show_opt) + logger.info(f'Found {len(optimizedMaterials)} materials affected by {args.show_opt}') + + # Directory leaf names for display + baselineName = Path(args.baseline).name + optimizedName = Path(args.optimized).name + + # Derive default report name from directory names + if args.outputfile is None: + args.outputfile = f'{baselineName}_vs_{optimizedName}.html' + + # Build the list of comparisons to run + comparisons = [] + + if args.gpu: + comparisons.append('GPU') + + if args.slice: + for sliceName in args.slice: + comparisons.append(sliceName) + + # Build filter subtitle from active options + filterParts = [] + if args.min_delta_ms > 0: + filterParts.append(f'min delta: {args.min_delta_ms:.1f} ms') + if args.warmup_frames > 0: + filterParts.append(f'warmup: {args.warmup_frames} frames discarded') + if args.show_opt: + filterParts.append(f'highlighting: {args.show_opt}') + subtitle = 'Filters: ' + ', '.join(filterParts) if filterParts else None + + # Derive chart paths from the report file name + reportPath = Path(args.outputfile) + reportDir = reportPath.parent + reportDir.mkdir(parents=True, exist_ok=True) + chartBase = reportDir / (reportPath.stem + '.svg') + + reportSections = [] + + try: + for label in comparisons: + # Run comparison and build title + if label == 'GPU': + traceData, totalFrames, usedFrames = compareGpuTraces( + baselineTraceProcessor, optimizedTraceProcessor, + args.min_delta_ms, args.warmup_frames) + if usedFrames > 1: + if args.warmup_frames > 0: + avgNote = (f' (averaged over {usedFrames} of {totalFrames} frames, ' + f'{args.warmup_frames} warmup discarded)') + else: + avgNote = f' (averaged over {totalFrames} frames)' + else: + avgNote = '' + title = f'GPU Render Duration per Material{avgNote}: {baselineName} vs {optimizedName}' + else: + traceData = compareChildSlices( + baselineTraceProcessor, optimizedTraceProcessor, label, args.min_delta_ms) + title = f'{label} Duration per Material: {baselineName} vs {optimizedName}' + + # Print table + printComparisonTable(traceData, title, + baselineLabel=baselineName, + optimizedLabel=optimizedName, + unit='ms', + highlightNames=optimizedMaterials) + + # Generate chart + if traceData is not None and not traceData.empty: + svgPath = chartPath(chartBase, label) + createComparisonChart( + traceData, svgPath, title=title, + baselineLabel=baselineName, optimizedLabel=optimizedName, + unit='ms', + highlightNames=optimizedMaterials, + highlightLabel=f'affected by {args.show_opt}' if args.show_opt else None, + subtitle=subtitle) + reportSections.append((title, svgPath)) + + # HTML Report (always generated) + pageTitle = f'Trace Comparison: {baselineName} vs {optimizedName}' + if reportSections: + generateHtmlReport(reportPath, reportSections, pageTitle=pageTitle, + subtitle=subtitle, + footerText='Generated by diff_traces.py') + openReport(reportPath) + else: + print(f'\n{"=" * 85}') + print(' No data to report.') + print(f'{"=" * 85}') + + sys.exit(0) + + except FileNotFoundError as e: + logger.error(f'{e}') + sys.exit(1) + except Exception as e: + logger.error(f'Error: {e}') + raise + + +if __name__ == '__main__': + main() From d90ca04dd60d78546ec3efa2ffa2c4016d218cbb Mon Sep 17 00:00:00 2001 From: Pavlo Penenko Date: Fri, 13 Feb 2026 11:07:26 -0500 Subject: [PATCH 05/17] Add framesPerMaterial and envSampleCount to _options.mtlx Declare the new test suite options so the C++ test runner can read them from the options file. Also reset enableTracing default to false (opt-in for profiling runs). --- resources/Materials/TestSuite/_options.mtlx | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/resources/Materials/TestSuite/_options.mtlx b/resources/Materials/TestSuite/_options.mtlx index 49b0b7cc39..834aaf99a1 100644 --- a/resources/Materials/TestSuite/_options.mtlx +++ b/resources/Materials/TestSuite/_options.mtlx @@ -83,6 +83,20 @@ When enabled, generates .perfetto-trace files in outputDirectory. Default is false to avoid overhead when not profiling. --> - + + + + + + + From f50206f15e624314f403ecadcfcbb9f6d2191c41 Mon Sep 17 00:00:00 2001 From: Pavlo Penenko Date: Wed, 28 Jan 2026 16:44:00 -0500 Subject: [PATCH 06/17] Additional tracing markers in shader codegen --- source/MaterialXGenGlsl/GlslShaderGenerator.cpp | 5 +++++ source/MaterialXGenHw/HwShaderGenerator.cpp | 5 +++++ source/MaterialXGenShader/ShaderNode.cpp | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/source/MaterialXGenGlsl/GlslShaderGenerator.cpp b/source/MaterialXGenGlsl/GlslShaderGenerator.cpp index eb115fe97c..896e7809fb 100644 --- a/source/MaterialXGenGlsl/GlslShaderGenerator.cpp +++ b/source/MaterialXGenGlsl/GlslShaderGenerator.cpp @@ -31,6 +31,8 @@ #include #include +#include + MATERIALX_NAMESPACE_BEGIN const string GlslShaderGenerator::TARGET = "genglsl"; @@ -128,6 +130,9 @@ GlslShaderGenerator::GlslShaderGenerator(TypeSystemPtr typeSystem) : ShaderPtr GlslShaderGenerator::generate(const string& name, ElementPtr element, GenContext& context) const { + MX_TRACE_FUNCTION(Tracing::Category::ShaderGen); + MX_TRACE_SCOPE(Tracing::Category::ShaderGen, name.c_str()); + ShaderPtr shader = createShader(name, element, context); // Request fixed floating-point notation for consistency across targets. diff --git a/source/MaterialXGenHw/HwShaderGenerator.cpp b/source/MaterialXGenHw/HwShaderGenerator.cpp index 68872ce5d2..d5e102c361 100644 --- a/source/MaterialXGenHw/HwShaderGenerator.cpp +++ b/source/MaterialXGenHw/HwShaderGenerator.cpp @@ -15,6 +15,8 @@ #include #include +#include + MATERIALX_NAMESPACE_BEGIN // @@ -90,6 +92,9 @@ HwShaderGenerator::HwShaderGenerator(TypeSystemPtr typeSystem, SyntaxPtr syntax) ShaderPtr HwShaderGenerator::createShader(const string& name, ElementPtr element, GenContext& context) const { + MX_TRACE_FUNCTION(Tracing::Category::ShaderGen); + MX_TRACE_SCOPE(Tracing::Category::ShaderGen, name.c_str()); + // Create the root shader graph ShaderGraphPtr graph = ShaderGraph::create(nullptr, name, element, context); ShaderPtr shader = std::make_shared(name, graph); diff --git a/source/MaterialXGenShader/ShaderNode.cpp b/source/MaterialXGenShader/ShaderNode.cpp index 4d54d5a6d6..1940673846 100644 --- a/source/MaterialXGenShader/ShaderNode.cpp +++ b/source/MaterialXGenShader/ShaderNode.cpp @@ -9,6 +9,8 @@ #include #include +#include + MATERIALX_NAMESPACE_BEGIN const string ShaderMetadataRegistry::USER_DATA_NAME = "ShaderMetadataRegistry"; @@ -172,6 +174,9 @@ ShaderNode::ShaderNode(const ShaderGraph* parent, const string& name) : ShaderNodePtr ShaderNode::create(const ShaderGraph* parent, const string& name, const NodeDef& nodeDef, GenContext& context) { + MX_TRACE_FUNCTION(Tracing::Category::ShaderGen); + MX_TRACE_SCOPE(Tracing::Category::ShaderGen, name.c_str()); + ShaderNodePtr newNode = std::make_shared(parent, name); const ShaderGenerator& shadergen = context.getShaderGenerator(); From 07778a50449d90106d35c5f8ed8a07213f3ee932 Mon Sep 17 00:00:00 2001 From: Pavlo Penenko Date: Fri, 30 Jan 2026 15:29:56 -0500 Subject: [PATCH 07/17] Add trace markers for emit and graph traversal functions Added MX_TRACE markers to key codegen functions for profiling: - ShaderGraph: createConnectedNodes, addUpstreamDependencies, createNode - ShaderGenerator: getImplementation, emitFunctionDefinitions, emitFunctionCalls - CompoundNode: initialize, emitFunctionDefinition, emitFunctionCall --- source/MaterialXGenShader/Nodes/CompoundNode.cpp | 11 +++++++++++ source/MaterialXGenShader/ShaderGenerator.cpp | 11 +++++++++++ source/MaterialXGenShader/ShaderGraph.cpp | 10 ++++++++++ 3 files changed, 32 insertions(+) diff --git a/source/MaterialXGenShader/Nodes/CompoundNode.cpp b/source/MaterialXGenShader/Nodes/CompoundNode.cpp index 78713d821f..f25dbbdf7b 100644 --- a/source/MaterialXGenShader/Nodes/CompoundNode.cpp +++ b/source/MaterialXGenShader/Nodes/CompoundNode.cpp @@ -12,6 +12,8 @@ #include #include +#include + MATERIALX_NAMESPACE_BEGIN ShaderNodeImplPtr CompoundNode::create() @@ -27,6 +29,9 @@ void CompoundNode::addClassification(ShaderNode& node) const void CompoundNode::initialize(const InterfaceElement& element, GenContext& context) { + MX_TRACE_FUNCTION(Tracing::Category::ShaderGen); + MX_TRACE_SCOPE(Tracing::Category::ShaderGen, element.getName().c_str()); + ShaderNodeImpl::initialize(element, context); if (!element.isA()) @@ -62,6 +67,9 @@ void CompoundNode::createVariables(const ShaderNode&, GenContext& context, Shade void CompoundNode::emitFunctionDefinition(const ShaderNode& node, GenContext& context, ShaderStage& stage) const { + MX_TRACE_FUNCTION(Tracing::Category::ShaderGen); + MX_TRACE_SCOPE(Tracing::Category::ShaderGen, _functionName.c_str()); + DEFINE_SHADER_STAGE(stage, Stage::PIXEL) { const ShaderGenerator& shadergen = context.getShaderGenerator(); @@ -148,6 +156,9 @@ void CompoundNode::emitFunctionDefinition(const ShaderNode& node, GenContext& co void CompoundNode::emitFunctionCall(const ShaderNode& node, GenContext& context, ShaderStage& stage) const { + MX_TRACE_FUNCTION(Tracing::Category::ShaderGen); + MX_TRACE_SCOPE(Tracing::Category::ShaderGen, _functionName.c_str()); + const ShaderGenerator& shadergen = context.getShaderGenerator(); DEFINE_SHADER_STAGE(stage, Stage::VERTEX) diff --git a/source/MaterialXGenShader/ShaderGenerator.cpp b/source/MaterialXGenShader/ShaderGenerator.cpp index 9dc2d5f212..182647e10d 100644 --- a/source/MaterialXGenShader/ShaderGenerator.cpp +++ b/source/MaterialXGenShader/ShaderGenerator.cpp @@ -17,6 +17,8 @@ #include #include +#include + #include MATERIALX_NAMESPACE_BEGIN @@ -106,6 +108,9 @@ void ShaderGenerator::emitFunctionDefinitionParameter(const ShaderPort* shaderPo void ShaderGenerator::emitFunctionDefinitions(const ShaderGraph& graph, GenContext& context, ShaderStage& stage) const { + MX_TRACE_FUNCTION(Tracing::Category::ShaderGen); + MX_TRACE_SCOPE(Tracing::Category::ShaderGen, graph.getName().c_str()); + // Emit function definitions for all nodes in the graph. for (ShaderNode* node : graph.getNodes()) { @@ -130,6 +135,9 @@ void ShaderGenerator::emitFunctionCall(const ShaderNode& node, GenContext& conte void ShaderGenerator::emitFunctionCalls(const ShaderGraph& graph, GenContext& context, ShaderStage& stage, uint32_t classification) const { + MX_TRACE_FUNCTION(Tracing::Category::ShaderGen); + MX_TRACE_SCOPE(Tracing::Category::ShaderGen, graph.getName().c_str()); + for (ShaderNode* node : graph.getNodes()) { if (!classification || node->hasClassification(classification)) @@ -297,6 +305,9 @@ ShaderNodeImplPtr ShaderGenerator::createShaderNodeImplForImplementation(const I ShaderNodeImplPtr ShaderGenerator::getImplementation(const NodeDef& nodedef, GenContext& context) const { + MX_TRACE_FUNCTION(Tracing::Category::ShaderGen); + MX_TRACE_SCOPE(Tracing::Category::ShaderGen, nodedef.getName().c_str()); + InterfaceElementPtr implElement = nodedef.getImplementation(getTarget()); if (!implElement) { diff --git a/source/MaterialXGenShader/ShaderGraph.cpp b/source/MaterialXGenShader/ShaderGraph.cpp index bfb15e9281..3c40fa7888 100644 --- a/source/MaterialXGenShader/ShaderGraph.cpp +++ b/source/MaterialXGenShader/ShaderGraph.cpp @@ -9,6 +9,8 @@ #include #include +#include + #include #include @@ -76,6 +78,8 @@ void ShaderGraph::createConnectedNodes(const ElementPtr& downstreamElement, ElementPtr connectingElement, GenContext& context) { + MX_TRACE_FUNCTION(Tracing::Category::ShaderGen); + // Create the node if it doesn't exist. NodePtr upstreamNode = upstreamElement->asA(); if (!upstreamNode) @@ -167,6 +171,9 @@ void ShaderGraph::createConnectedNodes(const ElementPtr& downstreamElement, void ShaderGraph::addUpstreamDependencies(const Element& root, GenContext& context) { + MX_TRACE_FUNCTION(Tracing::Category::ShaderGen); + MX_TRACE_SCOPE(Tracing::Category::ShaderGen, root.getName().c_str()); + std::set processedOutputs; for (Edge edge : root.traverseGraph()) @@ -695,6 +702,9 @@ void ShaderGraph::applyInputTransforms(ConstNodePtr node, ShaderNode* shaderNode ShaderNode* ShaderGraph::createNode(const string& name, const string& uniqueId, ConstNodeDefPtr nodeDef, GenContext& context) { + MX_TRACE_FUNCTION(Tracing::Category::ShaderGen); + MX_TRACE_SCOPE(Tracing::Category::ShaderGen, name.c_str()); + if (!nodeDef) { throw ExceptionShaderGenError("Could not find a nodedef for node '" + name + "'"); From a835eb6a5fc51cca33e76772a6a9e981bec94547 Mon Sep 17 00:00:00 2001 From: Pavlo Penenko Date: Fri, 13 Feb 2026 14:42:19 -0500 Subject: [PATCH 08/17] Add GPU timing infrastructure and fix trace aliases in RenderGlsl.cpp - Add GpuTimerQuery helper class and getCurrentTimeNs() for GPU timing via GL_TIME_ELAPSED queries (guarded by MATERIALX_BUILD_TRACING) - Add multi-frame render loop using framesPerMaterial for statistical validity, emitting MX_TRACE_ASYNC events on the GPU track - Replace stale Cat:: alias with mx::Tracing::Category:: (the alias was removed in PR #2742) --- .../MaterialXRenderGlsl/RenderGlsl.cpp | 88 ++++++++++++++++++- 1 file changed, 84 insertions(+), 4 deletions(-) diff --git a/source/MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp b/source/MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp index 9575ebbbac..e902a72e5d 100644 --- a/source/MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp +++ b/source/MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp @@ -19,8 +19,63 @@ #include +#ifdef MATERIALX_BUILD_TRACING +#include +#include +#endif + namespace mx = MaterialX; +#ifdef MATERIALX_BUILD_TRACING +// GPU timing utilities +namespace { + +// Get current time in nanoseconds (for async event timestamps) +uint64_t getCurrentTimeNs() +{ + using namespace std::chrono; + return duration_cast(steady_clock::now().time_since_epoch()).count(); +} + +// GPU timer query helper using GL_TIME_ELAPSED +class GpuTimerQuery +{ + public: + GpuTimerQuery() + { + glGenQueries(1, &_query); + } + + ~GpuTimerQuery() + { + glDeleteQueries(1, &_query); + } + + void begin() + { + glBeginQuery(GL_TIME_ELAPSED, _query); + } + + void end() + { + glEndQuery(GL_TIME_ELAPSED); + } + + // Returns duration in nanoseconds, blocks until result is available + uint64_t getDurationNs() + { + GLuint64 elapsedTime; + glGetQueryObjectui64v(_query, GL_QUERY_RESULT, &elapsedTime); + return elapsedTime; + } + + private: + GLuint _query; +}; + +} // anonymous namespace +#endif // MATERIALX_BUILD_TRACING + // // Render validation tester for the GLSL shading language // @@ -232,7 +287,7 @@ bool GlslShaderRenderTester::runRenderer(const std::string& shaderName, if (testOptions.dumpGeneratedCode) { - MX_TRACE_SCOPE(Cat::Render, "DumpGeneratedCode"); + MX_TRACE_SCOPE(mx::Tracing::Category::Render, "DumpGeneratedCode"); mx::ScopedTimer dumpTimer(&profileTimes.languageTimes.ioTime); std::ofstream file; file.open(shaderPath + "_vs.glsl"); @@ -292,7 +347,7 @@ bool GlslShaderRenderTester::runRenderer(const std::string& shaderName, if (testOptions.dumpUniformsAndAttributes) { - MX_TRACE_SCOPE(Cat::Render, "DumpUniformsAndAttributes"); + MX_TRACE_SCOPE(mx::Tracing::Category::Render, "DumpUniformsAndAttributes"); mx::ScopedTimer printTimer(&profileTimes.languageTimes.ioTime); log << "* Uniform:" << std::endl; program->printUniforms(log); @@ -358,11 +413,36 @@ bool GlslShaderRenderTester::runRenderer(const std::string& shaderName, unsigned int width = (unsigned int) testOptions.renderSize[0] * supersampleFactor; unsigned int height = (unsigned int) testOptions.renderSize[1] * supersampleFactor; _renderer->setSize(width, height); - _renderer->render(); + + // Render multiple frames for statistical validity (configurable via framesPerMaterial) + // Frame 0 often includes driver shader compilation; analyze in Python to discard warmup + for (unsigned int frameIdx = 0; frameIdx < testOptions.framesPerMaterial; ++frameIdx) + { +#ifdef MATERIALX_BUILD_TRACING + // GPU timing with timer queries + uint64_t cpuStartNs = getCurrentTimeNs(); + GpuTimerQuery gpuTimer; + gpuTimer.begin(); +#endif + _renderer->render(); + +#ifdef MATERIALX_BUILD_TRACING + gpuTimer.end(); + + // glFinish ensures GPU is done, making CPU trace scope accurate + glFinish(); + + // Get GPU duration (query result blocks until available) + uint64_t gpuDurationNs = gpuTimer.getDurationNs(); + + // Emit async event on GPU track showing actual GPU work duration + MX_TRACE_ASYNC(mx::Tracing::AsyncTrack::GPU, mx::Tracing::Category::Render, shaderName.c_str(), cpuStartNs, gpuDurationNs); +#endif + } } { - MX_TRACE_SCOPE(Cat::Render, "CaptureAndSaveImage"); + MX_TRACE_SCOPE(mx::Tracing::Category::Render, "CaptureAndSaveImage"); mx::ScopedTimer ioTimer(&profileTimes.languageTimes.imageSaveTime); std::string fileName = shaderPath + "_glsl.png"; mx::ImagePtr image = _renderer->captureImage(); From fda5a18c75a93b7e5b2ca972bd50270b4c86a12a Mon Sep 17 00:00:00 2001 From: Pavlo Penenko Date: Fri, 13 Feb 2026 14:52:17 -0500 Subject: [PATCH 09/17] Link MaterialXTrace to MaterialXGenShader when tracing is enabled Shader codegen source files (ShaderGraph.cpp, ShaderGenerator.cpp, etc.) include MaterialXTrace/Tracing.h and use MX_TRACE macros. When Perfetto tracing is enabled, MaterialXGenShader needs to link against MaterialXTrace for the trace event symbols. --- source/MaterialXGenShader/CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/source/MaterialXGenShader/CMakeLists.txt b/source/MaterialXGenShader/CMakeLists.txt index 07adb8a732..8a98ccdbb4 100644 --- a/source/MaterialXGenShader/CMakeLists.txt +++ b/source/MaterialXGenShader/CMakeLists.txt @@ -1,14 +1,18 @@ file(GLOB_RECURSE materialx_source "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp") file(GLOB_RECURSE materialx_headers "${CMAKE_CURRENT_SOURCE_DIR}/*.h*") +set(GENSHADER_MTLX_MODULES MaterialXFormat MaterialXCore) +if(MATERIALX_BUILD_PERFETTO_TRACING) + list(APPEND GENSHADER_MTLX_MODULES MaterialXTrace) +endif() + mx_add_library(MaterialXGenShader SOURCE_FILES ${materialx_source} HEADER_FILES ${materialx_headers} MTLX_MODULES - MaterialXFormat - MaterialXCore + ${GENSHADER_MTLX_MODULES} EXPORT_DEFINE MATERIALX_GENSHADER_EXPORTS) From 0d253c205a8579999e0821d028f0181e6742478d Mon Sep 17 00:00:00 2001 From: Pavlo Penenko Date: Fri, 13 Feb 2026 14:52:30 -0500 Subject: [PATCH 10/17] Use MATERIALX_BUILD_PERFETTO_TRACING in RenderGlsl.cpp The CMake flag was renamed from MATERIALX_BUILD_TRACING to MATERIALX_BUILD_PERFETTO_TRACING in PR #2742. Update all #ifdef guards in RenderGlsl.cpp to match. --- .../MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/source/MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp b/source/MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp index e902a72e5d..6a1cef3575 100644 --- a/source/MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp +++ b/source/MaterialXTest/MaterialXRenderGlsl/RenderGlsl.cpp @@ -19,14 +19,14 @@ #include -#ifdef MATERIALX_BUILD_TRACING +#ifdef MATERIALX_BUILD_PERFETTO_TRACING #include #include #endif namespace mx = MaterialX; -#ifdef MATERIALX_BUILD_TRACING +#ifdef MATERIALX_BUILD_PERFETTO_TRACING // GPU timing utilities namespace { @@ -74,7 +74,7 @@ class GpuTimerQuery }; } // anonymous namespace -#endif // MATERIALX_BUILD_TRACING +#endif // MATERIALX_BUILD_PERFETTO_TRACING // // Render validation tester for the GLSL shading language @@ -418,7 +418,7 @@ bool GlslShaderRenderTester::runRenderer(const std::string& shaderName, // Frame 0 often includes driver shader compilation; analyze in Python to discard warmup for (unsigned int frameIdx = 0; frameIdx < testOptions.framesPerMaterial; ++frameIdx) { -#ifdef MATERIALX_BUILD_TRACING +#ifdef MATERIALX_BUILD_PERFETTO_TRACING // GPU timing with timer queries uint64_t cpuStartNs = getCurrentTimeNs(); GpuTimerQuery gpuTimer; @@ -426,7 +426,7 @@ bool GlslShaderRenderTester::runRenderer(const std::string& shaderName, #endif _renderer->render(); -#ifdef MATERIALX_BUILD_TRACING +#ifdef MATERIALX_BUILD_PERFETTO_TRACING gpuTimer.end(); // glFinish ensures GPU is done, making CPU trace scope accurate From 9e3023251e292650ad582a767a0163cb8c15749b Mon Sep 17 00:00:00 2001 From: Pavlo Penenko Date: Fri, 13 Feb 2026 15:01:34 -0500 Subject: [PATCH 11/17] Fix `MATERIALX_BUILD_PERFETTO_TRACING` in an XML comment --- resources/Materials/TestSuite/_options.mtlx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/Materials/TestSuite/_options.mtlx b/resources/Materials/TestSuite/_options.mtlx index 834aaf99a1..1a9d56bce8 100644 --- a/resources/Materials/TestSuite/_options.mtlx +++ b/resources/Materials/TestSuite/_options.mtlx @@ -79,7 +79,7 @@ --> - From 611abfbd34fa77a793bd735f9135f97168bc3faa Mon Sep 17 00:00:00 2001 From: Pavlo Penenko Date: Fri, 13 Feb 2026 16:25:54 -0500 Subject: [PATCH 12/17] Add async event support for GPU timing in MaterialXTrace Port AsyncTrack enum, Sink::asyncEvent(), and MX_TRACE_ASYNC macro from the pre-merge proto2 branch. This enables GPU timer query results to be emitted as events on a dedicated "GPU" track in Perfetto traces. Use std::numeric_limits::max() for the GPU track ID to guarantee no collision with OS thread IDs. --- source/MaterialXTrace/PerfettoSink.cpp | 42 ++++++++++++++++++++++++++ source/MaterialXTrace/PerfettoSink.h | 2 ++ source/MaterialXTrace/Tracing.h | 34 +++++++++++++++++++++ 3 files changed, 78 insertions(+) diff --git a/source/MaterialXTrace/PerfettoSink.cpp b/source/MaterialXTrace/PerfettoSink.cpp index 691c73c08f..d7ffc6667b 100644 --- a/source/MaterialXTrace/PerfettoSink.cpp +++ b/source/MaterialXTrace/PerfettoSink.cpp @@ -7,7 +7,9 @@ #ifdef MATERIALX_BUILD_PERFETTO_TRACING +#include #include +#include #include // Define Perfetto trace categories for MaterialX @@ -31,6 +33,10 @@ MATERIALX_NAMESPACE_BEGIN namespace Tracing { +// Stable Perfetto track IDs for async tracks (must not collide with thread IDs). +// Use max uint64_t minus small offsets -- no OS will assign these as thread IDs. +constexpr uint64_t GPU_TRACK_ID = std::numeric_limits::max(); + PerfettoSink::PerfettoSink(std::string outputPath, size_t bufferSizeKb) : _outputPath(std::move(outputPath)) { @@ -41,6 +47,14 @@ PerfettoSink::PerfettoSink(std::string outputPath, size_t bufferSizeKb) args.backends |= perfetto::kInProcessBackend; perfetto::Tracing::Initialize(args); perfetto::TrackEvent::Register(); + + // Initialize async track descriptors with stable IDs and names + { + perfetto::Track gpuTrack(GPU_TRACK_ID); + auto desc = gpuTrack.Serialize(); + desc.set_name("GPU"); + perfetto::TrackEvent::SetTrackDescriptor(gpuTrack, desc); + } }); // Create and start a tracing session @@ -155,6 +169,34 @@ void PerfettoSink::counter(Category category, const char* name, double value) } } +void PerfettoSink::asyncEvent(AsyncTrack track, Category category, + const char* eventName, uint64_t startNs, uint64_t durationNs) +{ + // Currently only GPU track is supported + assert(track == AsyncTrack::GPU && "Only AsyncTrack::GPU is currently supported"); + perfetto::Track perfTrack(GPU_TRACK_ID); + + // Emit begin and end events with explicit timestamps + switch (category) + { + case Category::Render: + TRACE_EVENT_BEGIN("mx.render", nullptr, perfTrack, startNs, + [&](perfetto::EventContext ctx) { ctx.event()->set_name(eventName); }); + TRACE_EVENT_END("mx.render", perfTrack, startNs + durationNs); + break; + case Category::ShaderGen: + TRACE_EVENT_BEGIN("mx.shadergen", nullptr, perfTrack, startNs, + [&](perfetto::EventContext ctx) { ctx.event()->set_name(eventName); }); + TRACE_EVENT_END("mx.shadergen", perfTrack, startNs + durationNs); + break; + default: + TRACE_EVENT_BEGIN("mx.render", nullptr, perfTrack, startNs, + [&](perfetto::EventContext ctx) { ctx.event()->set_name(eventName); }); + TRACE_EVENT_END("mx.render", perfTrack, startNs + durationNs); + break; + } +} + void PerfettoSink::setThreadName(const char* name) { // Set thread name for trace visualization diff --git a/source/MaterialXTrace/PerfettoSink.h b/source/MaterialXTrace/PerfettoSink.h index b0012e86ec..87d5a0d7e7 100644 --- a/source/MaterialXTrace/PerfettoSink.h +++ b/source/MaterialXTrace/PerfettoSink.h @@ -70,6 +70,8 @@ class PerfettoSink : public Sink void beginEvent(Category category, const char* name) override; void endEvent(Category category) override; void counter(Category category, const char* name, double value) override; + void asyncEvent(AsyncTrack track, Category category, + const char* eventName, uint64_t startNs, uint64_t durationNs) override; void setThreadName(const char* name) override; private: diff --git a/source/MaterialXTrace/Tracing.h b/source/MaterialXTrace/Tracing.h index 283fd33dca..c89dedb061 100644 --- a/source/MaterialXTrace/Tracing.h +++ b/source/MaterialXTrace/Tracing.h @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -55,6 +56,15 @@ enum class Category Count }; +/// @enum AsyncTrack +/// Async track identifiers for operations with explicit timing (e.g., GPU work). +enum class AsyncTrack +{ + /// GPU render operations (measured via GL timer queries) + GPU = 0 + // Add more tracks here as needed (e.g., Compile, Transfer) +}; + /// @class Sink /// Abstract tracing sink interface. /// @@ -74,6 +84,17 @@ class MX_TRACE_API Sink /// Record a counter value (e.g., GPU time, memory usage). virtual void counter(Category category, const char* name, double value) = 0; + /// Record an async event with explicit timing (e.g., GPU operations). + /// This creates a slice on a separate track, useful for GPU work that + /// runs asynchronously from CPU traces. + /// @param track The async track to record on (e.g., AsyncTrack::GPU) + /// @param category The trace category for filtering + /// @param eventName Name of the event (e.g., material name) + /// @param startNs Start timestamp in nanoseconds (can be approximate) + /// @param durationNs Duration in nanoseconds (should be accurate) + virtual void asyncEvent(AsyncTrack track, Category category, + const char* eventName, uint64_t startNs, uint64_t durationNs) = 0; + /// Set the current thread's name for trace visualization. virtual void setThreadName(const char* name) = 0; }; @@ -142,6 +163,14 @@ class MX_TRACE_API Dispatcher _sink->counter(category, name, value); } + /// Record an async event with explicit timing. + void asyncEvent(AsyncTrack track, Category category, + const char* eventName, uint64_t startNs, uint64_t durationNs) + { + if (_sink) + _sink->asyncEvent(track, category, eventName, startNs, durationNs); + } + private: Dispatcher() = default; Dispatcher(const Dispatcher&) = delete; @@ -234,6 +263,10 @@ MATERIALX_NAMESPACE_END #define MX_TRACE_COUNTER(category, name, value) \ MaterialX::Tracing::Dispatcher::getInstance().counter(category, name, value) +/// Record an async event with explicit timing (e.g., GPU operations). +#define MX_TRACE_ASYNC(track, category, eventName, startNs, durationNs) \ + MaterialX::Tracing::Dispatcher::getInstance().asyncEvent(track, category, eventName, startNs, durationNs) + /// Begin a trace event (must be paired with MX_TRACE_END). #define MX_TRACE_BEGIN(category, name) \ MaterialX::Tracing::Dispatcher::getInstance().beginEvent(category, name) @@ -247,6 +280,7 @@ MATERIALX_NAMESPACE_END #define MX_TRACE_SCOPE(category, name) #define MX_TRACE_FUNCTION(category) #define MX_TRACE_COUNTER(category, name, value) +#define MX_TRACE_ASYNC(track, category, eventName, startNs, durationNs) #define MX_TRACE_BEGIN(category, name) #define MX_TRACE_END(category) From 80209aafb4c76f40e21e3730dc8a4fa71659cdc9 Mon Sep 17 00:00:00 2001 From: Pavlo Penenko Date: Fri, 13 Feb 2026 17:09:33 -0500 Subject: [PATCH 13/17] Fix enableTracing default to true for CI trace generation The previous commit accidentally set enableTracing to false, which would prevent CI from producing Perfetto traces. Restore to true to match the behavior established in PR #2742. --- resources/Materials/TestSuite/_options.mtlx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/Materials/TestSuite/_options.mtlx b/resources/Materials/TestSuite/_options.mtlx index 1a9d56bce8..30c6e2a986 100644 --- a/resources/Materials/TestSuite/_options.mtlx +++ b/resources/Materials/TestSuite/_options.mtlx @@ -83,7 +83,7 @@ When enabled, generates .perfetto-trace files in outputDirectory. Default is false to avoid overhead when not profiling. --> - +