From fab0ca1d7992f4e6432c7388836bec8e2209d0ad Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 16 Dec 2025 22:15:05 +0000 Subject: [PATCH 1/7] Initial plan From 2b188b969ffd60580217f191b0b9c735929e057d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 16 Dec 2025 22:22:21 +0000 Subject: [PATCH 2/7] Add basic Wireshark Lua dissector generator structure Co-authored-by: AaronWebster <3766083+AaronWebster@users.noreply.github.com> --- build_defs.bzl | 33 ++ compiler/back_end/lua/BUILD | 45 +++ compiler/back_end/lua/__init__.py | 15 + compiler/back_end/lua/build_defs.bzl | 119 +++++++ compiler/back_end/lua/dissector_generator.py | 334 +++++++++++++++++++ compiler/back_end/lua/emboss_codegen_lua.py | 109 ++++++ testdata/wireshark_test.emb | 46 +++ 7 files changed, 701 insertions(+) create mode 100644 compiler/back_end/lua/BUILD create mode 100644 compiler/back_end/lua/__init__.py create mode 100644 compiler/back_end/lua/build_defs.bzl create mode 100644 compiler/back_end/lua/dissector_generator.py create mode 100644 compiler/back_end/lua/emboss_codegen_lua.py create mode 100644 testdata/wireshark_test.emb diff --git a/build_defs.bzl b/build_defs.bzl index 781f99e..572f7fe 100644 --- a/build_defs.bzl +++ b/build_defs.bzl @@ -22,9 +22,12 @@ cc_emboss_library, which creates a header file and can be used as a dep in a There is also a convenience macro, `emboss_cc_library()`, which creates an `emboss_library` and a `cc_emboss_library` based on it. + +For Wireshark Lua dissector generation, use `emboss_lua_library()`. """ load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain") +load("//compiler/back_end/lua:build_defs.bzl", _lua_emboss_library = "lua_emboss_library") def emboss_cc_library(name, srcs, deps = [], import_dirs = [], enable_enum_traits = True, **kwargs): """Constructs a C++ library from an .emb file.""" @@ -257,3 +260,33 @@ cc_emboss_library = rule( }, provides = [CcInfo, EmbossInfo], ) + +def emboss_lua_library(name, srcs, deps = [], import_dirs = [], **kwargs): + """Constructs a Wireshark Lua dissector from an .emb file. + + Args: + name: The name of the library. + srcs: List of .emb source files (must be exactly one). + deps: List of emboss_library dependencies. + import_dirs: List of import directories. + **kwargs: Additional arguments. + """ + if len(srcs) != 1: + fail( + "Must specify exactly one Emboss source file for emboss_lua_library.", + "srcs", + ) + + emboss_library( + name = name + "_ir", + srcs = srcs, + deps = [dep + "_ir" for dep in deps], + import_dirs = import_dirs, + **kwargs + ) + + _lua_emboss_library( + name = name, + deps = [":" + name + "_ir"], + **kwargs + ) diff --git a/compiler/back_end/lua/BUILD b/compiler/back_end/lua/BUILD new file mode 100644 index 0000000..bf211bd --- /dev/null +++ b/compiler/back_end/lua/BUILD @@ -0,0 +1,45 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Emboss Wireshark Lua dissector code generator. + +load("@rules_python//python:py_binary.bzl", "py_binary") +load("@rules_python//python:py_library.bzl", "py_library") + +package( + default_visibility = [ + "//visibility:private", + ], +) + +py_binary( + name = "emboss_codegen_lua", + srcs = ["emboss_codegen_lua.py"], + python_version = "PY3", + visibility = ["//visibility:public"], + deps = [ + ":dissector_generator", + "//compiler/util:ir_data", + ], +) + +py_library( + name = "dissector_generator", + srcs = ["dissector_generator.py"], + deps = [ + "//compiler/util:attribute_util", + "//compiler/util:ir_data", + "//compiler/util:ir_util", + ], +) diff --git a/compiler/back_end/lua/__init__.py b/compiler/back_end/lua/__init__.py new file mode 100644 index 0000000..4ac04db --- /dev/null +++ b/compiler/back_end/lua/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Emboss Wireshark Lua dissector code generator.""" diff --git a/compiler/back_end/lua/build_defs.bzl b/compiler/back_end/lua/build_defs.bzl new file mode 100644 index 0000000..3bbb25e --- /dev/null +++ b/compiler/back_end/lua/build_defs.bzl @@ -0,0 +1,119 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- mode: python; -*- +# vim:set ft=blazebuild: +"""Build defs for Emboss Wireshark Lua dissector generation.""" + +load("//:build_defs.bzl", "EmbossInfo") + +EmbossLuaDissectorInfo = provider( + fields = { + "dissectors": "(list[File]) The `.lua` dissector files from this rule.", + "transitive_dissectors": "(list[File]) The `.lua` dissector files from this rule and all dependencies.", + }, + doc = "Provide Lua dissector files.", +) + +def _lua_emboss_aspect_impl(target, ctx): + emboss_lua_compiler = ctx.executable._emboss_lua_compiler + emboss_info = target[EmbossInfo] + src = target[EmbossInfo].direct_source + dissectors = [ctx.actions.declare_file(src.basename + ".lua", sibling = src)] + args = ctx.actions.args() + args.add("--input-file") + args.add_all(emboss_info.direct_ir) + args.add("--output-file") + args.add_all(dissectors) + ctx.actions.run( + executable = emboss_lua_compiler, + arguments = [args], + inputs = emboss_info.direct_ir, + outputs = dissectors, + ) + transitive_dissectors = depset( + direct = dissectors, + transitive = [ + dep[EmbossLuaDissectorInfo].transitive_dissectors + for dep in ctx.rule.attr.deps + ], + ) + return [ + EmbossLuaDissectorInfo( + dissectors = depset(dissectors), + transitive_dissectors = transitive_dissectors, + ), + ] + +_lua_emboss_aspect = aspect( + implementation = _lua_emboss_aspect_impl, + attr_aspects = ["deps"], + required_providers = [EmbossInfo], + attrs = { + "_emboss_lua_compiler": attr.label( + executable = True, + cfg = "exec", + default = "@com_google_emboss//compiler/back_end/lua:emboss_codegen_lua", + ), + }, +) + +def _lua_emboss_library_impl(ctx): + if len(ctx.attr.deps) != 1: + fail("`deps` attribute must contain exactly one label.", attr = "deps") + dep = ctx.attr.deps[0] + return [ + dep[EmbossInfo], + DefaultInfo(files = dep[EmbossLuaDissectorInfo].dissectors), + ] + +lua_emboss_library = rule( + implementation = _lua_emboss_library_impl, + attrs = { + "deps": attr.label_list( + aspects = [_lua_emboss_aspect], + allow_rules = ["emboss_library"], + allow_files = False, + ), + }, + provides = [EmbossInfo], +) + +def emboss_lua_library(name, srcs, deps = [], import_dirs = [], **kwargs): + """Constructs a Lua dissector library from an .emb file. + + Args: + name: The name of the library. + srcs: List of .emb source files (must be exactly one). + deps: List of emboss_library dependencies. + import_dirs: List of import directories. + **kwargs: Additional arguments passed to the rules. + """ + if len(srcs) != 1: + fail( + "Must specify exactly one Emboss source file for emboss_lua_library.", + "srcs", + ) + + native.alias( + name = name + "_ir_alias", + actual = srcs[0].replace(".emb", "_ir") if ".emb" in srcs[0] else srcs[0] + "_ir", + **kwargs + ) + + lua_emboss_library( + name = name, + deps = [":" + name + "_ir_alias"], + **kwargs + ) diff --git a/compiler/back_end/lua/dissector_generator.py b/compiler/back_end/lua/dissector_generator.py new file mode 100644 index 0000000..eccc7c3 --- /dev/null +++ b/compiler/back_end/lua/dissector_generator.py @@ -0,0 +1,334 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Wireshark Lua dissector code generator. + +Generates Lua dissector code for Wireshark from Emboss IR. +""" + +from typing import List, Optional, Tuple +from compiler.util import error +from compiler.util import ir_data +from compiler.util import ir_data_utils +from compiler.util import ir_util +from compiler.util import attribute_util + + +def _get_documentation_text(documentation: List[ir_data.Documentation]) -> str: + """Extract documentation text from documentation list. + + Only processes double-dash comments, not hash comments. + """ + if not documentation: + return "" + + doc_parts = [] + for doc in documentation: + if doc.text: + # Documentation text is already extracted from double-dash comments + # by the parser + doc_parts.append(doc.text.strip()) + + return " ".join(doc_parts) + + +def _get_filter_attribute(attributes: List[ir_data.Attribute]) -> Optional[str]: + """Get the wireshark_filter attribute value if present.""" + for attr in attributes: + if attr.name and attr.name.text == "wireshark_filter": + if attr.value and attr.value.string_constant: + return attr.value.string_constant.text + return None + + +def _sanitize_lua_identifier(name: str) -> str: + """Sanitize a name to be a valid Lua identifier.""" + # Replace invalid characters with underscores + sanitized = "" + for char in name: + if char.isalnum() or char == "_": + sanitized += char + else: + sanitized += "_" + + # Ensure it doesn't start with a number + if sanitized and sanitized[0].isdigit(): + sanitized = "_" + sanitized + + return sanitized + + +def _get_lua_type_for_field(field: ir_data.Field) -> Optional[str]: + """Get the Wireshark Lua type for a field.""" + if not field.type: + return None + + if field.type.atomic_type: + atomic = field.type.atomic_type + if atomic.reference: + ref_name = ir_util.get_reference_name(atomic.reference) + # Check if it's a built-in type + if ref_name in ["UInt", "Int"]: + return "uint" if ref_name == "UInt" else "int" + # Otherwise it's likely an enum or custom type + return None + + return None + + +def _generate_enum_value_string(enum: ir_data.Enum, type_name: str) -> str: + """Generate Lua code for enum value strings.""" + lines = [] + lines.append(f"local {type_name}_values = {{") + + for value in enum.value: + if value.name and value.value: + value_name = value.name.canonical_name.text + # Get numeric value + if value.value.constant and hasattr(value.value.constant, 'value'): + numeric_value = value.value.constant.value + lines.append(f" [{numeric_value}] = \"{value_name}\",") + + lines.append("}") + return "\n".join(lines) + + +def _generate_field_dissector(field: ir_data.Field, + parent_filter: str, + offset_expr: str = "offset", + indent: str = " ") -> Tuple[List[str], List[str]]: + """Generate Lua code to dissect a field. + + Returns: + Tuple of (field_declarations, dissector_code_lines) + """ + if not field.name: + return ([], []) + + field_name = field.name.canonical_name.text + sanitized_name = _sanitize_lua_identifier(field_name) + + # Get filter name from attribute or construct from parent + filter_name = _get_filter_attribute(field.attribute) + if not filter_name: + filter_name = f"{parent_filter}.{sanitized_name}" + + field_declarations = [] + dissector_code = [] + + # Get field documentation + doc = _get_documentation_text(field.documentation) + + # Check if this is a structure field + is_struct = False + if field.type and field.type.atomic_type and field.type.atomic_type.reference: + # This might be a struct reference - we'll handle it differently + is_struct = True + + # Get size + size_expr = None + if field.location and field.location.size: + # Try to get constant size + if hasattr(field.location.size, 'constant'): + const = field.location.size.constant + if hasattr(const, 'value'): + size_expr = const.value + + if not is_struct and size_expr: + # Generate ProtoField declaration + lua_type = _get_lua_type_for_field(field) + if lua_type: + size_bits = int(size_expr) if size_expr else 0 + size_bytes = (size_bits + 7) // 8 + + # Determine appropriate Wireshark field type based on size + if lua_type == "uint": + if size_bytes <= 1: + ws_type = "uint8" + elif size_bytes <= 2: + ws_type = "uint16" + elif size_bytes <= 4: + ws_type = "uint32" + else: + ws_type = "uint64" + elif lua_type == "int": + if size_bytes <= 1: + ws_type = "int8" + elif size_bytes <= 2: + ws_type = "int16" + elif size_bytes <= 4: + ws_type = "int32" + else: + ws_type = "int64" + else: + ws_type = "bytes" + + desc = f'"{field_name}"' + if doc: + desc = f'"{field_name} - {doc}"' + + field_declarations.append( + f'ProtoField.{ws_type}("{filter_name}", {desc})' + ) + + # Generate dissector code + dissector_code.append(f"{indent}-- {field_name}") + if doc: + dissector_code.append(f"{indent}-- {doc}") + dissector_code.append( + f"{indent}subtree:add(fields.{sanitized_name}, buffer({offset_expr}, {size_bytes}))" + ) + dissector_code.append(f"{indent}{offset_expr} = {offset_expr} + {size_bytes}") + + return (field_declarations, dissector_code) + + +def _generate_struct_dissector(struct: ir_data.Structure, + type_def: ir_data.TypeDefinition, + protocol_name: str) -> Tuple[List[str], List[str]]: + """Generate Lua code to dissect a structure. + + Returns: + Tuple of (field_declarations, dissector_function_lines) + """ + struct_name = type_def.name.canonical_name.text if type_def.name else "Unknown" + + # Get filter prefix from attribute or use protocol name + filter_prefix = _get_filter_attribute(type_def.attribute) + if not filter_prefix: + filter_prefix = protocol_name.lower() + + field_declarations = [] + dissector_lines = [] + + # Generate fields + for field in struct.field: + field_decls, field_code = _generate_field_dissector( + field, filter_prefix, "offset", " " + ) + field_declarations.extend(field_decls) + dissector_lines.extend(field_code) + + return (field_declarations, dissector_lines) + + +def generate_dissector(ir: ir_data.EmbossIr, protocol_name: Optional[str] = None) -> Tuple[str, List]: + """Generate a Wireshark Lua dissector from Emboss IR. + + Args: + ir: The Emboss IR to generate code from + protocol_name: Optional protocol name override + + Returns: + Tuple of (generated_code, errors) + """ + errors = [] + + if not ir.module: + errors.append([error.error("", ir.source_location, "No modules in IR")]) + return ("", errors) + + # Get the main module (first one) + main_module = ir.module[0] + + # Determine protocol name + if not protocol_name: + if main_module.source_file_name: + # Use source file name without extension + protocol_name = main_module.source_file_name.replace(".emb", "") + # Remove path components + if "/" in protocol_name: + protocol_name = protocol_name.split("/")[-1] + else: + protocol_name = "emboss" + + protocol_name_sanitized = _sanitize_lua_identifier(protocol_name) + + lines = [] + + # Header + lines.append("-- Wireshark Lua dissector generated by Emboss") + lines.append("-- DO NOT EDIT") + lines.append("") + + # Module documentation + if main_module.documentation: + doc = _get_documentation_text(main_module.documentation) + if doc: + lines.append(f"-- {doc}") + lines.append("") + + # Create protocol + lines.append(f'local {protocol_name_sanitized}_proto = Proto("{protocol_name}", "{protocol_name} Protocol")') + lines.append("") + + # Collect all field declarations + all_field_declarations = [] + all_dissector_code = [] + + # Process enums first + enum_value_strings = [] + for type_def in main_module.type: + if type_def.enumeration: + type_name = type_def.name.canonical_name.text if type_def.name else "Unknown" + enum_value_strings.append(_generate_enum_value_string(type_def.enumeration, type_name)) + + if enum_value_strings: + lines.extend(enum_value_strings) + lines.append("") + + # Process structures + for type_def in main_module.type: + if type_def.structure: + field_decls, dissector_code = _generate_struct_dissector( + type_def.structure, type_def, protocol_name + ) + all_field_declarations.extend(field_decls) + all_dissector_code.extend(dissector_code) + + # Define fields + if all_field_declarations: + lines.append("-- Protocol fields") + lines.append("local fields = {") + for i, decl in enumerate(all_field_declarations): + field_name = decl.split('("')[1].split('"')[0].split('.')[-1] if '("' in decl else f"field_{i}" + lines.append(f" {field_name} = {decl},") + lines.append("}") + lines.append(f"{protocol_name_sanitized}_proto.fields = {{") + for i, decl in enumerate(all_field_declarations): + field_name = decl.split('("')[1].split('"')[0].split('.')[-1] if '("' in decl else f"field_{i}" + lines.append(f" fields.{field_name},") + lines.append("}") + lines.append("") + + # Dissector function + lines.append(f"function {protocol_name_sanitized}_proto.dissector(buffer, pinfo, tree)") + lines.append(" pinfo.cols.protocol = \"" + protocol_name.upper() + "\"") + lines.append(f' local subtree = tree:add({protocol_name_sanitized}_proto, buffer(), "{protocol_name} Protocol")') + lines.append(" local offset = 0") + lines.append("") + + if all_dissector_code: + lines.extend(all_dissector_code) + + lines.append("end") + lines.append("") + + # Register dissector (user will need to customize this) + lines.append("-- Register the dissector") + lines.append("-- Uncomment and customize the following line to register on a specific port:") + lines.append(f"-- local udp_table = DissectorTable.get(\"udp.port\")") + lines.append(f"-- udp_table:add(12345, {protocol_name_sanitized}_proto)") + + return ("\n".join(lines), errors) diff --git a/compiler/back_end/lua/emboss_codegen_lua.py b/compiler/back_end/lua/emboss_codegen_lua.py new file mode 100644 index 0000000..ea51383 --- /dev/null +++ b/compiler/back_end/lua/emboss_codegen_lua.py @@ -0,0 +1,109 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Emboss Wireshark Lua dissector code generator. + +This is a driver program that reads IR, feeds it to dissector_generator, and prints +the result. +""" + +from __future__ import print_function + +import argparse +import os +import sys + +from compiler.back_end.lua import dissector_generator +from compiler.util import error +from compiler.util import ir_data +from compiler.util import ir_data_utils + + +def _parse_command_line(argv): + """Parses the given command-line arguments.""" + parser = argparse.ArgumentParser( + description="Emboss compiler Wireshark Lua back end.", prog=argv[0] + ) + parser.add_argument("--input-file", type=str, help=".emb.ir file to compile.") + parser.add_argument( + "--output-file", + type=str, + help="Write Lua dissector to file. If not specified, write to stdout.", + ) + parser.add_argument( + "--protocol-name", + type=str, + help="Protocol name for the dissector. If not specified, derived from input file.", + ) + parser.add_argument( + "--color-output", + default="if_tty", + choices=["always", "never", "if_tty", "auto"], + help="Print error messages using color. 'auto' is a synonym for 'if_tty'.", + ) + return parser.parse_args(argv[1:]) + + +def _show_errors(errors, ir, color_output): + """Prints errors with source code snippets.""" + source_codes = {} + for module in ir.module: + source_codes[module.source_file_name] = module.source_text + use_color = color_output == "always" or ( + color_output in ("auto", "if_tty") and os.isatty(sys.stderr.fileno()) + ) + print(error.format_errors(errors, source_codes, use_color), file=sys.stderr) + + +def generate_dissector_and_log_errors(ir, protocol_name, color_output): + """Generates a Lua dissector and logs any errors. + + Arguments: + ir: EmbossIr of the module. + protocol_name: Optional protocol name override. + color_output: "always", "never", "if_tty", "auto" + + Returns: + A tuple of (dissector_code, errors) + """ + dissector, errors = dissector_generator.generate_dissector(ir, protocol_name) + if errors: + _show_errors(errors, ir, color_output) + return (dissector, errors) + + +def main(flags): + if flags.input_file: + with open(flags.input_file) as f: + ir = ir_data_utils.IrDataSerializer.from_json(ir_data.EmbossIr, f.read()) + else: + ir = ir_data_utils.IrDataSerializer.from_json( + ir_data.EmbossIr, sys.stdin.read() + ) + + dissector, errors = generate_dissector_and_log_errors( + ir, flags.protocol_name, flags.color_output + ) + if errors: + return 1 + if flags.output_file: + with open(flags.output_file, "w") as f: + f.write(dissector) + else: + print(dissector) + return 0 + + +if __name__ == "__main__": + sys.exit(main(_parse_command_line(sys.argv))) diff --git a/testdata/wireshark_test.emb b/testdata/wireshark_test.emb new file mode 100644 index 0000000..6f206b1 --- /dev/null +++ b/testdata/wireshark_test.emb @@ -0,0 +1,46 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +-- Test protocol for Wireshark Lua dissector generation + +[$default byte_order: "LittleEndian"] +[(wireshark_filter): "testproto"] + +-- Message type enumeration +enum MessageType: + -- Request message + REQUEST = 0 + -- Response message + RESPONSE = 1 + -- Error message + ERROR = 2 + +-- Status code enumeration +enum StatusCode: + -- Operation successful + OK = 0 + -- Operation failed + FAILURE = 1 + +-- Main protocol header structure +struct ProtocolHeader: + [(wireshark_filter): "testproto.header"] + -- Message type field + 0 [+1] MessageType msg_type + -- Status code field + 1 [+1] StatusCode status + -- Message length in bytes + 2 [+2] UInt length + -- Sequence number for tracking messages + 4 [+4] UInt sequence_num From f3c0bc87cdb53dac1dac12f368ec990e41383ebf Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 16 Dec 2025 22:27:33 +0000 Subject: [PATCH 3/7] Improve Lua dissector generator with proper IR handling Co-authored-by: AaronWebster <3766083+AaronWebster@users.noreply.github.com> --- compiler/back_end/lua/dissector_generator.py | 92 ++++++++++++-------- 1 file changed, 56 insertions(+), 36 deletions(-) diff --git a/compiler/back_end/lua/dissector_generator.py b/compiler/back_end/lua/dissector_generator.py index eccc7c3..55e488d 100644 --- a/compiler/back_end/lua/dissector_generator.py +++ b/compiler/back_end/lua/dissector_generator.py @@ -22,7 +22,6 @@ from compiler.util import ir_data from compiler.util import ir_data_utils from compiler.util import ir_util -from compiler.util import attribute_util def _get_documentation_text(documentation: List[ir_data.Documentation]) -> str: @@ -45,10 +44,9 @@ def _get_documentation_text(documentation: List[ir_data.Documentation]) -> str: def _get_filter_attribute(attributes: List[ir_data.Attribute]) -> Optional[str]: """Get the wireshark_filter attribute value if present.""" - for attr in attributes: - if attr.name and attr.name.text == "wireshark_filter": - if attr.value and attr.value.string_constant: - return attr.value.string_constant.text + attr_value = ir_util.get_attribute(attributes, "wireshark_filter") + if attr_value and attr_value.string_constant: + return attr_value.string_constant.text return None @@ -69,7 +67,7 @@ def _sanitize_lua_identifier(name: str) -> str: return sanitized -def _get_lua_type_for_field(field: ir_data.Field) -> Optional[str]: +def _get_lua_type_for_field(field: ir_data.Field, ir: ir_data.EmbossIr) -> Optional[str]: """Get the Wireshark Lua type for a field.""" if not field.type: return None @@ -77,12 +75,17 @@ def _get_lua_type_for_field(field: ir_data.Field) -> Optional[str]: if field.type.atomic_type: atomic = field.type.atomic_type if atomic.reference: - ref_name = ir_util.get_reference_name(atomic.reference) - # Check if it's a built-in type - if ref_name in ["UInt", "Int"]: - return "uint" if ref_name == "UInt" else "int" - # Otherwise it's likely an enum or custom type - return None + # Find the referenced type + referenced_type = ir_util.find_object(atomic.reference, ir) + if referenced_type: + type_name = referenced_type.name.canonical_name.object_path[-1] + # Check if it's a built-in type + if type_name in ["UInt"]: + return "uint" + elif type_name in ["Int"]: + return "int" + # Otherwise it's likely an enum or custom type + return None return None @@ -94,10 +97,10 @@ def _generate_enum_value_string(enum: ir_data.Enum, type_name: str) -> str: for value in enum.value: if value.name and value.value: - value_name = value.name.canonical_name.text - # Get numeric value - if value.value.constant and hasattr(value.value.constant, 'value'): - numeric_value = value.value.constant.value + value_name = value.name.canonical_name.object_path[-1] + # Get numeric value using ir_util + numeric_value = ir_util.constant_value(value.value) + if numeric_value is not None: lines.append(f" [{numeric_value}] = \"{value_name}\",") lines.append("}") @@ -106,6 +109,7 @@ def _generate_enum_value_string(enum: ir_data.Enum, type_name: str) -> str: def _generate_field_dissector(field: ir_data.Field, parent_filter: str, + ir: ir_data.EmbossIr, offset_expr: str = "offset", indent: str = " ") -> Tuple[List[str], List[str]]: """Generate Lua code to dissect a field. @@ -116,7 +120,7 @@ def _generate_field_dissector(field: ir_data.Field, if not field.name: return ([], []) - field_name = field.name.canonical_name.text + field_name = field.name.canonical_name.object_path[-1] sanitized_name = _sanitize_lua_identifier(field_name) # Get filter name from attribute or construct from parent @@ -130,30 +134,39 @@ def _generate_field_dissector(field: ir_data.Field, # Get field documentation doc = _get_documentation_text(field.documentation) - # Check if this is a structure field + # Check if this is a structure or enum field is_struct = False + is_enum = False + enum_value_table = None + if field.type and field.type.atomic_type and field.type.atomic_type.reference: - # This might be a struct reference - we'll handle it differently - is_struct = True + # Find the referenced type + referenced_type = ir_util.find_object(field.type.atomic_type.reference, ir) + if referenced_type: + if referenced_type.enumeration: + is_enum = True + type_name = referenced_type.name.canonical_name.object_path[-1] + enum_value_table = f"{type_name}_values" + elif referenced_type.structure: + is_struct = True # Get size size_expr = None if field.location and field.location.size: - # Try to get constant size - if hasattr(field.location.size, 'constant'): - const = field.location.size.constant - if hasattr(const, 'value'): - size_expr = const.value + # Try to get constant size using ir_util + size_value = ir_util.constant_value(field.location.size) + if size_value is not None: + size_expr = str(size_value) if not is_struct and size_expr: # Generate ProtoField declaration - lua_type = _get_lua_type_for_field(field) - if lua_type: + lua_type = _get_lua_type_for_field(field, ir) + if lua_type or is_enum: size_bits = int(size_expr) if size_expr else 0 size_bytes = (size_bits + 7) // 8 # Determine appropriate Wireshark field type based on size - if lua_type == "uint": + if is_enum or lua_type == "uint": if size_bytes <= 1: ws_type = "uint8" elif size_bytes <= 2: @@ -178,9 +191,15 @@ def _generate_field_dissector(field: ir_data.Field, if doc: desc = f'"{field_name} - {doc}"' - field_declarations.append( - f'ProtoField.{ws_type}("{filter_name}", {desc})' - ) + # Build field declaration with enum value table if applicable + if is_enum and enum_value_table: + field_declarations.append( + f'ProtoField.{ws_type}("{filter_name}", {desc}, base.DEC, {enum_value_table})' + ) + else: + field_declarations.append( + f'ProtoField.{ws_type}("{filter_name}", {desc})' + ) # Generate dissector code dissector_code.append(f"{indent}-- {field_name}") @@ -196,13 +215,14 @@ def _generate_field_dissector(field: ir_data.Field, def _generate_struct_dissector(struct: ir_data.Structure, type_def: ir_data.TypeDefinition, - protocol_name: str) -> Tuple[List[str], List[str]]: + protocol_name: str, + ir: ir_data.EmbossIr) -> Tuple[List[str], List[str]]: """Generate Lua code to dissect a structure. Returns: Tuple of (field_declarations, dissector_function_lines) """ - struct_name = type_def.name.canonical_name.text if type_def.name else "Unknown" + struct_name = type_def.name.canonical_name.object_path[-1] if type_def.name else "Unknown" # Get filter prefix from attribute or use protocol name filter_prefix = _get_filter_attribute(type_def.attribute) @@ -215,7 +235,7 @@ def _generate_struct_dissector(struct: ir_data.Structure, # Generate fields for field in struct.field: field_decls, field_code = _generate_field_dissector( - field, filter_prefix, "offset", " " + field, filter_prefix, ir, "offset", " " ) field_declarations.extend(field_decls) dissector_lines.extend(field_code) @@ -281,7 +301,7 @@ def generate_dissector(ir: ir_data.EmbossIr, protocol_name: Optional[str] = None enum_value_strings = [] for type_def in main_module.type: if type_def.enumeration: - type_name = type_def.name.canonical_name.text if type_def.name else "Unknown" + type_name = type_def.name.canonical_name.object_path[-1] if type_def.name else "Unknown" enum_value_strings.append(_generate_enum_value_string(type_def.enumeration, type_name)) if enum_value_strings: @@ -292,7 +312,7 @@ def generate_dissector(ir: ir_data.EmbossIr, protocol_name: Optional[str] = None for type_def in main_module.type: if type_def.structure: field_decls, dissector_code = _generate_struct_dissector( - type_def.structure, type_def, protocol_name + type_def.structure, type_def, protocol_name, ir ) all_field_declarations.extend(field_decls) all_dissector_code.extend(dissector_code) From 3ee60fb72361e540c375d0aaaad8ce28eaa80953 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 16 Dec 2025 22:29:46 +0000 Subject: [PATCH 4/7] Add comprehensive documentation and tests for Lua dissector generator Co-authored-by: AaronWebster <3766083+AaronWebster@users.noreply.github.com> --- compiler/back_end/lua/BUILD | 11 +- compiler/back_end/lua/README.md | 159 ++++++++++++++++++ .../back_end/lua/dissector_generator_test.py | 94 +++++++++++ testdata/example_protocol.emb | 102 +++++++++++ 4 files changed, 365 insertions(+), 1 deletion(-) create mode 100644 compiler/back_end/lua/README.md create mode 100644 compiler/back_end/lua/dissector_generator_test.py create mode 100644 testdata/example_protocol.emb diff --git a/compiler/back_end/lua/BUILD b/compiler/back_end/lua/BUILD index bf211bd..ac3e402 100644 --- a/compiler/back_end/lua/BUILD +++ b/compiler/back_end/lua/BUILD @@ -16,6 +16,7 @@ load("@rules_python//python:py_binary.bzl", "py_binary") load("@rules_python//python:py_library.bzl", "py_library") +load("@rules_python//python:py_test.bzl", "py_test") package( default_visibility = [ @@ -38,8 +39,16 @@ py_library( name = "dissector_generator", srcs = ["dissector_generator.py"], deps = [ - "//compiler/util:attribute_util", "//compiler/util:ir_data", "//compiler/util:ir_util", ], ) + +py_test( + name = "dissector_generator_test", + srcs = ["dissector_generator_test.py"], + deps = [ + ":dissector_generator", + "//compiler/util:ir_data", + ], +) diff --git a/compiler/back_end/lua/README.md b/compiler/back_end/lua/README.md new file mode 100644 index 0000000..146b1d8 --- /dev/null +++ b/compiler/back_end/lua/README.md @@ -0,0 +1,159 @@ +# Emboss Wireshark Lua Dissector Generator + +This is an Emboss backend that generates Wireshark Lua dissectors from `.emb` files. + +## Overview + +The Lua dissector generator creates Wireshark protocol dissectors that can parse and display binary protocol data based on Emboss structure definitions. This allows you to: + +- Automatically generate Wireshark dissectors from your Emboss protocol specifications +- Display protocol fields with proper names and descriptions +- Show enum values as human-readable text +- Maintain hierarchical relationships between nested structures +- Add custom filter names for Wireshark display filters + +## Usage + +### Using Bazel + +Add a `emboss_lua_library` target to your BUILD file: + +```python +load("//:build_defs.bzl", "emboss_lua_library") + +emboss_lua_library( + name = "my_protocol_dissector", + srcs = ["my_protocol.emb"], +) +``` + +This will generate a `my_protocol.emb.lua` file that can be loaded into Wireshark. + +### Using the Command Line + +You can also generate dissectors directly from the command line: + +```bash +# First, generate the IR file +bazel run //compiler/front_end:emboss_front_end -- \ + /path/to/protocol.emb \ + --output-file=/tmp/protocol.emb.ir + +# Then, generate the Lua dissector +bazel run //compiler/back_end/lua:emboss_codegen_lua -- \ + --input-file=/tmp/protocol.emb.ir \ + --output-file=/tmp/protocol.lua \ + --protocol-name=myproto +``` + +### Loading into Wireshark + +1. Copy the generated `.lua` file to your Wireshark plugins directory: + - Linux: `~/.local/lib/wireshark/plugins/` + - macOS: `~/.wireshark/plugins/` or `/Applications/Wireshark.app/Contents/PlugIns/wireshark/` + - Windows: `%APPDATA%\Wireshark\plugins\` + +2. Edit the dissector file to uncomment and configure the registration code at the bottom: + ```lua + local udp_table = DissectorTable.get("udp.port") + udp_table:add(12345, my_protocol_proto) -- Replace 12345 with your port + ``` + +3. Restart Wireshark or reload Lua plugins (Ctrl+Shift+L) + +## Emboss Annotations + +### Wireshark Filter Names + +You can customize the Wireshark filter names using the `wireshark_filter` attribute: + +```emboss +-- At module level +[(wireshark_filter): "myproto"] + +-- At struct level +struct MyHeader: + [(wireshark_filter): "myproto.header"] + 0 [+2] UInt field1 + +-- At field level +struct MyData: + 0 [+1] UInt status + [(wireshark_filter): "myproto.custom_status"] +``` + +Without this annotation, filter names are automatically generated from the structure hierarchy. + +### Documentation Comments + +Use double-dash (`--`) comments to add descriptions that will appear in Wireshark: + +```emboss +-- This is a protocol header +struct ProtocolHeader: + -- Message type identifier + 0 [+1] MessageType msg_type + -- Current protocol version + 1 [+1] UInt version +``` + +Note: Hash (`#`) comments are for Emboss license headers and are not included in the dissector. + +## Example + +Here's a complete example: + +```emboss +# Copyright notice here + +-- Network protocol definition +[(wireshark_filter): "netproto"] + +-- Message types +enum MessageType: + -- Data packet + DATA = 0x01 + -- Acknowledgment packet + ACK = 0x02 + -- Error packet + ERROR = 0x03 + +-- Protocol header +struct Header: + -- Message type + 0 [+1] MessageType type + -- Sequence number + 1 [+2] UInt seq_num + -- Payload length in bytes + 3 [+2] UInt length +``` + +This generates a dissector that: +- Shows `MessageType` values as "DATA", "ACK", or "ERROR" instead of hex values +- Displays field descriptions in Wireshark +- Uses filter names like `netproto.type`, `netproto.seq_num`, etc. + +## Features + +- **Automatic Type Mapping**: Emboss UInt/Int types are mapped to appropriate Wireshark field types (uint8, uint16, uint32, uint64, int8, etc.) based on field size +- **Enum Support**: Enum fields display symbolic names instead of numeric values using Wireshark value strings +- **Documentation**: Double-dash comments from your `.emb` file appear as field descriptions in Wireshark +- **Hierarchical Filters**: Filter names preserve the structure hierarchy for easy filtering +- **Byte Order Support**: Respects the byte order specified in your Emboss definitions + +## Limitations + +- Currently supports only fixed-size fields (dynamic-size fields are not yet supported) +- Arrays and complex nested structures need manual adjustments +- Conditional fields (if statements in Emboss) are not yet fully supported +- The generated dissector provides a starting point that may need customization for complex protocols + +## Future Enhancements + +Planned improvements include: +- Support for nested structures and arrays +- Conditional field handling +- Dynamic size calculation +- Automatic port registration based on attributes +- Better handling of bit fields +- Support for multiple packet types in one dissector diff --git a/compiler/back_end/lua/dissector_generator_test.py b/compiler/back_end/lua/dissector_generator_test.py new file mode 100644 index 0000000..5255145 --- /dev/null +++ b/compiler/back_end/lua/dissector_generator_test.py @@ -0,0 +1,94 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for dissector_generator module.""" + +import unittest +from compiler.back_end.lua import dissector_generator +from compiler.util import ir_data + + +class DissectorGeneratorTest(unittest.TestCase): + """Tests for the Lua dissector generator.""" + + def test_basic_enum_generation(self): + """Test that we can generate a simple enum.""" + ir = ir_data.EmbossIr() + module = ir_data.Module() + module.source_file_name = "test.emb" + module.source_text = "-- Test" + + # Create an enum + enum_type = ir_data.TypeDefinition() + enum_type.name = ir_data.NameDefinition() + enum_type.name.canonical_name = ir_data.CanonicalName() + enum_type.name.canonical_name.object_path = ["TestEnum"] + + enum_def = ir_data.Enum() + + # Add enum value + enum_val = ir_data.EnumValue() + enum_val.name = ir_data.NameDefinition() + enum_val.name.canonical_name = ir_data.CanonicalName() + enum_val.name.canonical_name.object_path = ["VALUE_ONE"] + enum_val.value = ir_data.Expression() + enum_val.value.constant = ir_data.NumericConstant() + enum_val.value.constant.value = "1" + enum_def.value.append(enum_val) + + enum_type.enumeration = enum_def + enum_type.addressable_unit = ir_data.AddressableUnit.BIT + + module.type.append(enum_type) + ir.module.append(module) + + # Generate dissector + code, errors = dissector_generator.generate_dissector(ir, "test") + + self.assertEqual(len(errors), 0) + self.assertIn("TestEnum_values", code) + self.assertIn("VALUE_ONE", code) + self.assertIn("test_proto", code) + + def test_documentation_extraction(self): + """Test that documentation is properly extracted.""" + doc = ir_data.Documentation() + doc.text = "This is a test comment" + + result = dissector_generator._get_documentation_text([doc]) + self.assertEqual(result, "This is a test comment") + + def test_sanitize_lua_identifier(self): + """Test Lua identifier sanitization.""" + # Test normal identifier + self.assertEqual( + dissector_generator._sanitize_lua_identifier("normal_name"), + "normal_name" + ) + + # Test identifier starting with number + self.assertEqual( + dissector_generator._sanitize_lua_identifier("123name"), + "_123name" + ) + + # Test identifier with invalid characters + self.assertEqual( + dissector_generator._sanitize_lua_identifier("name-with-dashes"), + "name_with_dashes" + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/testdata/example_protocol.emb b/testdata/example_protocol.emb new file mode 100644 index 0000000..6a6c439 --- /dev/null +++ b/testdata/example_protocol.emb @@ -0,0 +1,102 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +-- Example network protocol for Wireshark Lua dissector generation +-- This demonstrates various features of the dissector generator + +[$default byte_order: "LittleEndian"] +[(wireshark_filter): "exampleproto"] + +-- Packet type enumeration +enum PacketType: + -- Initialization request + INIT_REQUEST = 0x01 + -- Initialization response + INIT_RESPONSE = 0x02 + -- Data transfer packet + DATA = 0x10 + -- Acknowledgment + ACK = 0x11 + -- Error notification + ERROR = 0xFF + +-- Error code enumeration +enum ErrorCode: + -- No error + SUCCESS = 0 + -- Invalid packet format + INVALID_FORMAT = 1 + -- Checksum mismatch + CHECKSUM_ERROR = 2 + -- Unsupported version + VERSION_MISMATCH = 3 + -- Unknown error + UNKNOWN = 255 + +-- Protocol version information +struct VersionInfo: + -- Major version number + 0 [+1] UInt major + -- Minor version number + 1 [+1] UInt minor + -- Patch version number + 2 [+1] UInt patch + +-- Common packet header +struct PacketHeader: + [(wireshark_filter): "exampleproto.header"] + -- Packet type identifier + 0 [+1] PacketType packet_type + -- Protocol version + 1 [+3] VersionInfo version + -- Sequence number for packet ordering + 4 [+4] UInt sequence_number + -- Total packet length including header + 8 [+2] UInt total_length + +-- Initialization packet +struct InitPacket: + -- Common header + 0 [+10] PacketHeader header + -- Client identifier + 10 [+4] UInt client_id + -- Requested features bitmask + 14 [+2] UInt features + +-- Data packet +struct DataPacket: + -- Common header + 0 [+10] PacketHeader header + -- Data payload size in bytes + 10 [+2] UInt payload_size + -- Data checksum (CRC16) + 12 [+2] UInt checksum + +-- Acknowledgment packet +struct AckPacket: + -- Common header + 0 [+10] PacketHeader header + -- Acknowledged sequence number + 10 [+4] UInt ack_sequence + -- Window size for flow control + 14 [+2] UInt window_size + +-- Error packet +struct ErrorPacket: + -- Common header + 0 [+10] PacketHeader header + -- Error code + 10 [+1] ErrorCode error_code + -- Length of error message + 11 [+1] UInt message_length From 1428c779fcbf072f890c7089010b8e25dc3a3778 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 16 Dec 2025 22:32:51 +0000 Subject: [PATCH 5/7] Fix type resolution and improve enum handling in Lua dissector generator Co-authored-by: AaronWebster <3766083+AaronWebster@users.noreply.github.com> --- compiler/back_end/lua/dissector_generator.py | 29 +++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/compiler/back_end/lua/dissector_generator.py b/compiler/back_end/lua/dissector_generator.py index 55e488d..2e0c700 100644 --- a/compiler/back_end/lua/dissector_generator.py +++ b/compiler/back_end/lua/dissector_generator.py @@ -75,17 +75,20 @@ def _get_lua_type_for_field(field: ir_data.Field, ir: ir_data.EmbossIr) -> Optio if field.type.atomic_type: atomic = field.type.atomic_type if atomic.reference: - # Find the referenced type - referenced_type = ir_util.find_object(atomic.reference, ir) - if referenced_type: - type_name = referenced_type.name.canonical_name.object_path[-1] - # Check if it's a built-in type - if type_name in ["UInt"]: - return "uint" - elif type_name in ["Int"]: - return "int" - # Otherwise it's likely an enum or custom type - return None + # Get the type name from the reference + type_name = atomic.reference.canonical_name.object_path[-1] + + # Check if it's a built-in type first (before trying to resolve) + if type_name in ["UInt"]: + return "uint" + elif type_name in ["Int"]: + return "int" + + # Try to find the referenced type to check if it's an enum + referenced_type = ir_util.find_object_or_none(atomic.reference, ir) + if referenced_type and referenced_type.enumeration: + # It's an enum, we'll handle it specially + return "enum" return None @@ -141,7 +144,7 @@ def _generate_field_dissector(field: ir_data.Field, if field.type and field.type.atomic_type and field.type.atomic_type.reference: # Find the referenced type - referenced_type = ir_util.find_object(field.type.atomic_type.reference, ir) + referenced_type = ir_util.find_object_or_none(field.type.atomic_type.reference, ir) if referenced_type: if referenced_type.enumeration: is_enum = True @@ -166,7 +169,7 @@ def _generate_field_dissector(field: ir_data.Field, size_bytes = (size_bits + 7) // 8 # Determine appropriate Wireshark field type based on size - if is_enum or lua_type == "uint": + if is_enum or lua_type in ("uint", "enum"): if size_bytes <= 1: ws_type = "uint8" elif size_bytes <= 2: From 67800f768f49983d0dc20202fa28adb6a904d558 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 16 Dec 2025 22:33:46 +0000 Subject: [PATCH 6/7] Add quick start guide for Wireshark Lua dissector generator Co-authored-by: AaronWebster <3766083+AaronWebster@users.noreply.github.com> --- compiler/back_end/lua/QUICKSTART.md | 146 ++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 compiler/back_end/lua/QUICKSTART.md diff --git a/compiler/back_end/lua/QUICKSTART.md b/compiler/back_end/lua/QUICKSTART.md new file mode 100644 index 0000000..54c547e --- /dev/null +++ b/compiler/back_end/lua/QUICKSTART.md @@ -0,0 +1,146 @@ +# Quick Start Guide: Wireshark Lua Dissector Generator + +This guide will help you quickly generate and use Wireshark dissectors from your Emboss protocol definitions. + +## Step 1: Define Your Protocol in Emboss + +Create a `.emb` file with your protocol definition: + +```emboss +# myprotocol.emb + +-- My custom network protocol +[(wireshark_filter): "myproto"] + +[$default byte_order: "LittleEndian"] + +-- Packet types +enum PacketType: + -- Data packet + DATA = 0x01 + -- Acknowledgment + ACK = 0x02 + +-- Protocol header +struct Header: + -- Packet type identifier + 0 [+1] PacketType type + -- Packet sequence number + 1 [+4] UInt seq_num + -- Payload length + 5 [+2] UInt length +``` + +## Step 2: Add to Your BUILD File + +```python +load("//:build_defs.bzl", "emboss_lua_library") + +emboss_lua_library( + name = "myprotocol_dissector", + srcs = ["myprotocol.emb"], +) +``` + +## Step 3: Build the Dissector + +```bash +bazel build :myprotocol_dissector +``` + +The generated `.lua` file will be in `bazel-bin/myprotocol.emb.lua` + +## Step 4: Install in Wireshark + +Copy the generated `.lua` file to your Wireshark plugins directory: + +**Linux:** +```bash +cp bazel-bin/myprotocol.emb.lua ~/.local/lib/wireshark/plugins/ +``` + +**macOS:** +```bash +cp bazel-bin/myprotocol.emb.lua ~/.wireshark/plugins/ +``` + +**Windows:** +```powershell +copy bazel-bin\myprotocol.emb.lua %APPDATA%\Wireshark\plugins\ +``` + +## Step 5: Configure Port Registration + +Edit the generated `.lua` file and uncomment the registration code at the bottom: + +```lua +-- Register the dissector +local udp_table = DissectorTable.get("udp.port") +udp_table:add(12345, myproto_proto) -- Replace 12345 with your port +``` + +For TCP: +```lua +local tcp_table = DissectorTable.get("tcp.port") +tcp_table:add(12345, myproto_proto) +``` + +## Step 6: Load in Wireshark + +1. Open Wireshark +2. Reload Lua plugins: Analyze → Reload Lua Plugins (or Ctrl+Shift+L) +3. Your dissector is now active! + +## Testing Your Dissector + +1. Capture some traffic on your configured port +2. Wireshark should automatically use your dissector +3. You should see your fields displayed with their names and descriptions +4. Enum values will show as text (e.g., "DATA" instead of "0x01") + +## Troubleshooting + +**Dissector not loading?** +- Check Wireshark's Lua console (Tools → Lua → Console) for errors +- Verify the .lua file is in the correct plugins directory +- Make sure you reloaded Lua plugins + +**Fields not displaying?** +- Verify your port registration matches your traffic +- Check that byte order matches your data +- Ensure field sizes are correct in your .emb file + +**Want to see all available filters?** +- In Wireshark, go to Edit → Preferences → Protocols +- Find your protocol in the list +- Or use the filter expression builder (Analyze → Display Filter Expression) + +## Advanced Features + +### Custom Filter Names + +```emboss +struct MyStruct: + [(wireshark_filter): "myproto.custom"] + 0 [+1] UInt field1 + [(wireshark_filter): "myproto.special_field"] +``` + +### Nested Structures + +```emboss +struct Inner: + 0 [+2] UInt value + +struct Outer: + 0 [+2] Inner inner_data + 2 [+4] UInt other_field +``` + +The dissector will maintain the hierarchy. + +## Next Steps + +- See the full README.md for all features +- Check out example_protocol.emb for a comprehensive example +- Read about Emboss language features in the main Emboss documentation From 2188668c76db878952e038bca6bb3136ffc6aec3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Dec 2025 00:57:30 +0000 Subject: [PATCH 7/7] Add comprehensive network protocol dissector test suite - Created network_headers.emb with Ethernet, IPv4, and UDP structs - Added generate_and_test.py to demonstrate complete workflow - Added test_network_dissector.py to create test PCAP and compare dissectors - Includes README documentation for the test suite - Demonstrates enum value strings for EtherType and IpProtocol - Shows comparison between tshark built-in and custom Emboss dissectors Co-authored-by: AaronWebster <3766083+AaronWebster@users.noreply.github.com> --- testdata/NETWORK_TEST_README.md | 140 +++++++++++++++++ testdata/generate_and_test.py | 114 ++++++++++++++ testdata/network_headers.emb | 60 ++++++++ testdata/test_network_dissector.py | 236 +++++++++++++++++++++++++++++ 4 files changed, 550 insertions(+) create mode 100644 testdata/NETWORK_TEST_README.md create mode 100755 testdata/generate_and_test.py create mode 100644 testdata/network_headers.emb create mode 100755 testdata/test_network_dissector.py diff --git a/testdata/NETWORK_TEST_README.md b/testdata/NETWORK_TEST_README.md new file mode 100644 index 0000000..edfef22 --- /dev/null +++ b/testdata/NETWORK_TEST_README.md @@ -0,0 +1,140 @@ +# Network Protocol Dissector Test + +This directory contains a comprehensive test demonstrating the Emboss Wireshark Lua dissector generator with real network protocol headers. + +## Files + +- **`network_headers.emb`** - Emboss definitions for Ethernet, IPv4, and UDP headers +- **`generate_and_test.py`** - Script to generate Lua dissector and run comparison test +- **`test_network_dissector.py`** - Creates test PCAP file and compares built-in vs custom dissectors + +## Network Protocols Defined + +### Ethernet Header (14 bytes) +- Destination MAC address (6 bytes) +- Source MAC address (6 bytes) +- EtherType (2 bytes) - enum with IPV4, ARP, IPV6 values + +### IPv4 Header (20 bytes, no options) +- Version and IHL (4 bits each) +- Type of Service +- Total Length +- Identification +- Flags and Fragment Offset +- TTL +- Protocol (1 byte) - enum with ICMP, TCP, UDP values +- Header Checksum +- Source IP Address +- Destination IP Address + +### UDP Header (8 bytes) +- Source Port +- Destination Port +- Length +- Checksum + +## Usage + +### Generate and Test + +Run the complete workflow: + +```bash +cd /home/runner/work/emboss/emboss +python3 testdata/generate_and_test.py +``` + +This will: +1. Generate IR from `network_headers.emb` +2. Generate Lua dissector from the IR +3. Create a test PCAP file with sample packet data +4. Show the generated Lua dissector code +5. (If tshark is installed) Compare built-in vs custom dissector output + +### Manual Steps + +You can also run the steps manually: + +```bash +# Step 1: Generate IR +python3 compiler/front_end/emboss_front_end.py \ + testdata/network_headers.emb \ + --output-file=/tmp/network_headers.emb.ir + +# Step 2: Generate Lua dissector +python3 compiler/back_end/lua/emboss_codegen_lua.py \ + --input-file=/tmp/network_headers.emb.ir \ + --output-file=/tmp/network_headers.lua + +# Step 3: View the generated dissector +cat /tmp/network_headers.lua + +# Step 4: Test with tshark (requires Wireshark/tshark installed) +python3 testdata/test_network_dissector.py +``` + +## Test Packet Structure + +The test creates a packet with the following structure: + +``` ++----------------+ +| Ethernet (14B) | +| Dst: 00:11:.. | +| Src: AA:BB:.. | +| Type: 0x0800 | ++----------------+ +| IPv4 (20B) | +| Src: 192...1 | +| Dst: 192..255 | +| Proto: UDP(17)| ++----------------+ +| UDP (8B) | +| Src: 12345 | +| Dst: 54321 | ++----------------+ +| Payload (53B) | +| "Hello, ..." | ++----------------+ +``` + +## Expected Output + +### Generated Lua Dissector Features + +The generated dissector includes: +- **Enum value tables**: EtherType and IpProtocol enums show text names instead of numbers +- **Protocol fields**: All header fields are defined as ProtoFields +- **Dissector function**: Parses packet and displays fields in Wireshark tree +- **Registration placeholder**: Template code to register on specific port + +### Comparison with Built-in Dissectors + +When tshark is available, the test shows: +- **Built-in dissectors**: Use standard filters `eth.*`, `ip.*`, `udp.*` +- **Custom dissector**: Uses `network.*` filter prefix +- **Both should show** similar protocol hierarchy and field values + +## Using the Dissector in Wireshark + +1. Copy the generated `.lua` file to Wireshark plugins directory: + - Linux: `~/.local/lib/wireshark/plugins/` + - macOS: `~/.wireshark/plugins/` + - Windows: `%APPDATA%\Wireshark\plugins\` + +2. Edit the Lua file to register on Ethernet type or specific port + +3. Reload Lua plugins in Wireshark (Ctrl+Shift+L) + +## Notes + +- The test demonstrates the complete workflow from `.emb` to working dissector +- Enum values (EtherType, IpProtocol) display as readable text in Wireshark +- The dissector handles proper byte ordering (BigEndian for network protocols) +- Bit fields in IPv4 header show how to handle sub-byte fields + +## Requirements + +- Python 3 +- Emboss compiler (front end and Lua backend) +- Optional: Wireshark/tshark for live testing diff --git a/testdata/generate_and_test.py b/testdata/generate_and_test.py new file mode 100755 index 0000000..9235e73 --- /dev/null +++ b/testdata/generate_and_test.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +""" +Generate Lua dissector from network_headers.emb and run comparison test. +This script demonstrates the complete workflow. +""" + +import sys +import os +import subprocess + +# Add project root to path +sys.path.insert(0, '/home/runner/work/emboss/emboss') + +from compiler.back_end.lua import dissector_generator +from compiler.util import ir_data +from compiler.util import ir_data_utils + + +def generate_ir_from_emb(emb_file, ir_file): + """Generate IR file from .emb file using embossc.""" + try: + # Use Python to run the front end + cmd = [ + sys.executable, + '/home/runner/work/emboss/emboss/compiler/front_end/emboss_front_end.py', + emb_file, + f'--output-file={ir_file}' + ] + + # Set PYTHONPATH + env = os.environ.copy() + env['PYTHONPATH'] = '/home/runner/work/emboss/emboss' + + result = subprocess.run(cmd, capture_output=True, text=True, env=env) + + if result.returncode != 0: + print(f"Error generating IR: {result.stderr}") + return False + + return True + except Exception as e: + print(f"Exception generating IR: {e}") + return False + + +def generate_lua_from_ir(ir_file, lua_file): + """Generate Lua dissector from IR file.""" + try: + with open(ir_file, 'r') as f: + ir = ir_data_utils.IrDataSerializer.from_json(ir_data.EmbossIr, f.read()) + + code, errors = dissector_generator.generate_dissector(ir, "network") + + if errors: + print(f"Errors generating dissector: {errors}") + return False + + with open(lua_file, 'w') as f: + f.write(code) + + return True + except Exception as e: + print(f"Exception generating Lua: {e}") + import traceback + traceback.print_exc() + return False + + +def main(): + print("Generating Lua dissector from network_headers.emb...") + print() + + emb_file = '/home/runner/work/emboss/emboss/testdata/network_headers.emb' + ir_file = '/tmp/network_headers.emb.ir' + lua_file = '/tmp/network_headers.lua' + + # Step 1: Generate IR + print(f"Step 1: Generating IR from {emb_file}...") + if not generate_ir_from_emb(emb_file, ir_file): + print("Failed to generate IR") + return 1 + print(f" Created: {ir_file}") + print() + + # Step 2: Generate Lua dissector + print(f"Step 2: Generating Lua dissector from IR...") + if not generate_lua_from_ir(ir_file, lua_file): + print("Failed to generate Lua dissector") + return 1 + print(f" Created: {lua_file}") + print() + + # Show the generated dissector + print("=" * 80) + print("GENERATED LUA DISSECTOR") + print("=" * 80) + with open(lua_file, 'r') as f: + print(f.read()) + print() + + # Step 3: Run the comparison test + print("=" * 80) + print("Step 3: Running dissector comparison test...") + print("=" * 80) + print() + + test_script = '/home/runner/work/emboss/emboss/testdata/test_network_dissector.py' + result = subprocess.run([sys.executable, test_script]) + + return result.returncode + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/testdata/network_headers.emb b/testdata/network_headers.emb new file mode 100644 index 0000000..65de470 --- /dev/null +++ b/testdata/network_headers.emb @@ -0,0 +1,60 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +-- Network protocol headers for Wireshark dissector testing +-- This file defines Ethernet, IPv4, and UDP headers matching standard specifications + +[$default byte_order: "BigEndian"] + + +enum EtherType: + IPV4 = 0x0800 + ARP = 0x0806 + IPV6 = 0x86DD + + +enum IpProtocol: + ICMP = 1 + TCP = 6 + UDP = 17 + + +struct EthernetHeader: + 0 [+6] UInt dst_mac + 6 [+6] UInt src_mac + 12 [+2] EtherType ethertype + + +struct IPv4Header: + 0 [+4] bits: + 0 [+4] UInt version + 4 [+4] UInt ihl + 1 [+1] UInt tos + 2 [+2] UInt total_length + 4 [+2] UInt identification + 6 [+2] bits: + 0 [+3] UInt flags + 3 [+13] UInt fragment_offset + 8 [+1] UInt ttl + 9 [+1] IpProtocol ip_protocol + 10 [+2] UInt header_checksum + 12 [+4] UInt src_ip + 16 [+4] UInt dst_ip + + +struct UdpHeader: + 0 [+2] UInt src_port + 2 [+2] UInt dst_port + 4 [+2] UInt length + 6 [+2] UInt checksum diff --git a/testdata/test_network_dissector.py b/testdata/test_network_dissector.py new file mode 100755 index 0000000..9a93139 --- /dev/null +++ b/testdata/test_network_dissector.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Test script that creates a network packet and compares dissection results +between tshark's built-in dissectors and our custom Emboss Lua dissector. +""" + +import struct +import subprocess +import sys +import os +import tempfile + + +def create_test_packet(): + """ + Create a test Ethernet/IP/UDP packet with payload. + + Returns bytes representing: + - Ethernet header (14 bytes) + - IPv4 header (20 bytes) + - UDP header (8 bytes) + - Payload data (variable) + """ + + # Ethernet header (14 bytes) + dst_mac = bytes([0x00, 0x11, 0x22, 0x33, 0x44, 0x55]) # Destination MAC + src_mac = bytes([0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF]) # Source MAC + ethertype = struct.pack('>H', 0x0800) # IPv4 + + ethernet_header = dst_mac + src_mac + ethertype + + # Payload data (simple test message) + payload = b"Hello, Wireshark! This is a test payload from Emboss." + + # UDP header (8 bytes) + src_port = 12345 + dst_port = 54321 + udp_length = 8 + len(payload) # UDP header + payload + udp_checksum = 0 # We'll use 0 for simplicity (optional in IPv4) + + udp_header = struct.pack('>HHHH', src_port, dst_port, udp_length, udp_checksum) + + # IPv4 header (20 bytes, no options) + version_ihl = (4 << 4) | 5 # Version 4, IHL 5 (20 bytes) + tos = 0 + total_length = 20 + len(udp_header) + len(payload) + identification = 12345 + flags_fragment = 0x4000 # Don't fragment flag set + ttl = 64 + protocol = 17 # UDP + header_checksum = 0 # Will calculate below + src_ip = struct.pack('>I', 0xC0A80001) # 192.168.0.1 + dst_ip = struct.pack('>I', 0xC0A800FF) # 192.168.0.255 + + # Build IP header without checksum + ip_header_no_checksum = struct.pack( + '>BBHHHBBH', + version_ihl, tos, total_length, identification, + flags_fragment, ttl, protocol, 0 # checksum placeholder + ) + src_ip + dst_ip + + # Calculate IP header checksum + checksum = 0 + for i in range(0, len(ip_header_no_checksum), 2): + word = (ip_header_no_checksum[i] << 8) + ip_header_no_checksum[i + 1] + checksum += word + checksum = (checksum & 0xFFFF) + (checksum >> 16) + checksum = ~checksum & 0xFFFF + + # Rebuild IP header with correct checksum + ip_header = struct.pack( + '>BBHHHBBH', + version_ihl, tos, total_length, identification, + flags_fragment, ttl, protocol, checksum + ) + src_ip + dst_ip + + # Combine all parts + packet = ethernet_header + ip_header + udp_header + payload + + return packet + + +def write_pcap_file(packet, filename): + """Write packet to a PCAP file.""" + + # PCAP global header + magic_number = 0xa1b2c3d4 + version_major = 2 + version_minor = 4 + thiszone = 0 + sigfigs = 0 + snaplen = 65535 + network = 1 # Ethernet + + pcap_global_header = struct.pack( + '