diff --git a/.gitignore b/.gitignore index 1ef5456..f623493 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ htmlcov # Visual Studio Code settings .vscode/ +*.py,cover diff --git a/README.md b/README.md index c573cfe..d3dfb50 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,15 @@ Gotchas Supported macros: ifdef, ifndef, if, elif, define, undef, include, else, pragma (only "once") +The #define directive supports both object-like and function-like macros: + * Object-like macros: `#define NAME value` + * Function-like macros: `#define NAME(params) body` + - Function-like macros must have '(' immediately after the macro name + - Supports zero or more parameters + - Arguments are expanded before substitution + - Nested macro calls are supported + - A macro name without '()' is not expanded (treated as identifier) + The #if and #elif directives support constant expression evaluation including: * Integer constants * Arithmetic operators: +, -, *, /, % @@ -32,4 +41,6 @@ Limitations: * Multiline continuations supported but whitespace handling may not be 1:1 with real preprocessors. Trailing whitespace is removed if before comment, indentation from first line is removed - * Semi-colon handling may not be identical to real preprocessors \ No newline at end of file + * Semi-colon handling may not be identical to real preprocessors + * Function-like macros do not support stringification (#) or + token pasting (##) operators \ No newline at end of file diff --git a/simplecpreprocessor/core.py b/simplecpreprocessor/core.py index fb2ffe9..42ed8f9 100644 --- a/simplecpreprocessor/core.py +++ b/simplecpreprocessor/core.py @@ -23,6 +23,14 @@ def constants_to_token_constants(constants): TOKEN_CONSTANTS = constants_to_token_constants(platform.PLATFORM_CONSTANTS) +class FunctionLikeMacro: + """Represents a function-like macro with parameters.""" + + def __init__(self, params, body): + self.params = params + self.body = body + + class Defines: def __init__(self, base): self.defines = base.copy() @@ -90,6 +98,62 @@ def process_define(self, **kwargs): else: # pragma: no cover # Defensive: should never happen as tokenizer ensures non-ws tokens return + + # Check if this is a function-like macro + # Function-like macros have '(' immediately after name (no whitespace) + if i+1 < len(chunk) and chunk[i+1].value == "(": + # Parse parameters + params = [] + j = i + 2 # Start after '(' + param_start = j + paren_depth = 0 + + while j < len(chunk): + token = chunk[j] + if token.value == "(" and not token.whitespace: + paren_depth += 1 + elif token.value == ")" and not token.whitespace: + if paren_depth == 0: + # End of parameter list + # Add last parameter if any + if param_start < j: + param_tokens = chunk[param_start:j] + param_name = None + for pt in param_tokens: + if not pt.whitespace: + param_name = pt.value + break + if param_name: + params.append(param_name) + # Body starts after ')' and any whitespace + body_start = j + 1 + while (body_start < len(chunk) and + chunk[body_start].whitespace): + body_start += 1 + body = chunk[body_start:-1] # Exclude newline + self.defines[define_name] = FunctionLikeMacro( + params, body + ) + return + else: + paren_depth -= 1 + elif token.value == "," and paren_depth == 0: + # Parameter separator + param_tokens = chunk[param_start:j] + param_name = None + for pt in param_tokens: + if not pt.whitespace: + param_name = pt.value + break + if param_name: + params.append(param_name) + param_start = j + 1 + j += 1 + + # If we get here, something went wrong + # Fall through to object-like macro handling + + # Object-like macro self.defines[define_name] = chunk[i+2:-1] def process_endif(self, **kwargs): diff --git a/simplecpreprocessor/tests/test_function_macros.py b/simplecpreprocessor/tests/test_function_macros.py new file mode 100644 index 0000000..7d3d13f --- /dev/null +++ b/simplecpreprocessor/tests/test_function_macros.py @@ -0,0 +1,246 @@ +from __future__ import absolute_import +from simplecpreprocessor import preprocess +from simplecpreprocessor.filesystem import FakeFile + + +def run_case(input_list, expected): + ret = preprocess(input_list) + output = "".join(ret) + assert output == expected + + +def test_function_macro_simple(): + """Test basic function-like macro with one parameter.""" + f_obj = FakeFile("header.h", [ + "#define SQUARE(x) ((x) * (x))\n", + "SQUARE(5)\n"]) + expected = "((5) * (5))\n" + run_case(f_obj, expected) + + +def test_function_macro_two_params(): + """Test function-like macro with two parameters.""" + f_obj = FakeFile("header.h", [ + "#define MAX(a, b) ((a) > (b) ? (a) : (b))\n", + "MAX(1, 2)\n"]) + expected = "((1) > (2) ? (1) : (2))\n" + run_case(f_obj, expected) + + +def test_function_macro_three_params(): + """Test function-like macro with three parameters.""" + f_obj = FakeFile("header.h", [ + "#define ADD3(a, b, c) ((a) + (b) + (c))\n", + "ADD3(1, 2, 3)\n"]) + expected = "((1) + (2) + (3))\n" + run_case(f_obj, expected) + + +def test_function_macro_no_params(): + """Test function-like macro with no parameters.""" + f_obj = FakeFile("header.h", [ + "#define FUNC() 42\n", + "FUNC()\n"]) + expected = "42\n" + run_case(f_obj, expected) + + +def test_function_macro_with_expression(): + """Test function-like macro with expression arguments.""" + f_obj = FakeFile("header.h", [ + "#define DOUBLE(x) ((x) * 2)\n", + "DOUBLE(3 + 4)\n"]) + expected = "((3 + 4) * 2)\n" + run_case(f_obj, expected) + + +def test_function_macro_not_called(): + """Test that function-like macro name without () is not expanded.""" + f_obj = FakeFile("header.h", [ + "#define SQUARE(x) ((x) * (x))\n", + "SQUARE\n"]) + expected = "SQUARE\n" + run_case(f_obj, expected) + + +def test_function_macro_whitespace_before_paren(): + """Test function-like macro with whitespace before opening paren.""" + f_obj = FakeFile("header.h", [ + "#define SQUARE(x) ((x) * (x))\n", + "SQUARE (5)\n"]) + # With whitespace before (, it should still be treated as a call + expected = "((5) * (5))\n" + run_case(f_obj, expected) + + +def test_object_like_macro_with_parens_in_body(): + """Test object-like macro with parentheses in body.""" + f_obj = FakeFile("header.h", [ + "#define FOO (x)\n", + "FOO\n"]) + expected = "(x)\n" + run_case(f_obj, expected) + + +def test_function_macro_nested_calls(): + """Test nested function-like macro calls.""" + f_obj = FakeFile("header.h", [ + "#define DOUBLE(x) ((x) * 2)\n", + "DOUBLE(DOUBLE(3))\n"]) + expected = "((((3) * 2)) * 2)\n" + run_case(f_obj, expected) + + +def test_function_macro_multiple_on_line(): + """Test multiple function-like macro calls on one line.""" + f_obj = FakeFile("header.h", [ + "#define ADD(a, b) ((a) + (b))\n", + "ADD(1, 2) ADD(3, 4)\n"]) + expected = "((1) + (2)) ((3) + (4))\n" + run_case(f_obj, expected) + + +def test_function_macro_empty_arg(): + """Test function-like macro with empty argument.""" + f_obj = FakeFile("header.h", [ + "#define FUNC(x, y) x y\n", + "FUNC(a, )\n"]) + # The space between x and y in the body is preserved + expected = "a \n" + run_case(f_obj, expected) + + +def test_function_macro_redefine(): + """Test redefining a function-like macro.""" + f_obj = FakeFile("header.h", [ + "#define FUNC(x) (x)\n", + "FUNC(1)\n", + "#undef FUNC\n", + "#define FUNC(x) ((x) * 2)\n", + "FUNC(2)\n"]) + expected = "(1)\n((2) * 2)\n" + run_case(f_obj, expected) + + +def test_function_macro_nested_parens_in_params(): + """Test function-like macro with nested parentheses in parameter.""" + f_obj = FakeFile("header.h", [ + "#define FUNC(x) x\n", + "FUNC((a, b))\n"]) + expected = "(a, b)\n" + run_case(f_obj, expected) + + +def test_function_macro_missing_args(): + """Test function-like macro with fewer arguments than parameters.""" + f_obj = FakeFile("header.h", [ + "#define FUNC(x, y, z) x y z\n", + "FUNC(a)\n"]) + # Missing arguments are treated as empty + expected = "a \n" + run_case(f_obj, expected) + + +def test_function_macro_arg_with_trailing_whitespace(): + """Test function-like macro with whitespace in arguments.""" + f_obj = FakeFile("header.h", [ + "#define FUNC(x) [x]\n", + "FUNC( a )\n"]) + expected = "[a]\n" + run_case(f_obj, expected) + + +def test_function_macro_unclosed_paren(): + """Test function-like macro with unclosed parenthesis. + + When a macro call has no closing paren, it's not expanded. + """ + f_obj = FakeFile("header.h", [ + "#define FUNC(x) [x]\n", + "FUNC(a\n"]) + # Not expanded - treated as regular tokens + expected = "FUNC(a\n" + run_case(f_obj, expected) + + +def test_function_macro_malformed_definition(): + """Test malformed function-like macro definition. + + When a macro definition has no closing paren in the parameter list, + it falls back to object-like macro behavior. + """ + f_obj = FakeFile("header.h", [ + "#define FUNC(x\n", + "FUNC\n"]) + # Falls back to object-like macro: FUNC is defined as "x" + expected = "x\n" + run_case(f_obj, expected) + + +def test_function_macro_whitespace_only_param(): + """Test function-like macro with whitespace-only parameter.""" + f_obj = FakeFile("header.h", [ + "#define FUNC( ) body\n", + "FUNC()\n"]) + # Whitespace-only param is ignored, treated as zero params + expected = "body\n" + run_case(f_obj, expected) + + +def test_function_macro_trailing_comma_whitespace(): + """Test function-like macro with trailing comma and whitespace.""" + f_obj = FakeFile("header.h", [ + "#define FUNC(a, ) a\n", + "FUNC(1, 2)\n"]) + # Second param is empty (whitespace only) + expected = "1\n" + run_case(f_obj, expected) + + +def test_function_macro_multiple_empty_params(): + """Test function-like macro with empty parameter in the middle.""" + f_obj = FakeFile("header.h", [ + "#define FUNC(a, , c) a c\n", + "FUNC(1, 2, 3)\n"]) + # Second param is empty (whitespace only) so skipped + # Macro has params [a, c], invoked with args [1, 2, 3] + expected = "1 2\n" + run_case(f_obj, expected) + + +def test_function_macro_nested_parens_in_definition(): + """Test function-like macro with nested parens in parameter list. + + This is invalid C. The parser extracts '(' as the parameter name + due to the way it finds the first non-whitespace token. + """ + f_obj = FakeFile("header.h", [ + "#define FUNC((x)) x\n", + "FUNC((5))\n"]) + # Parameter is parsed as '(', body is 'x' + # When called, '(' is not found in the arguments, so body 'x' is output + expected = "x\n" + run_case(f_obj, expected) + + +def test_function_macro_deeply_nested_parens_in_definition(): + """Test function-like macro with deeply nested parens in definition. + + This exercises the paren_depth tracking in parameter parsing. + """ + f_obj = FakeFile("header.h", [ + "#define FUNC(((a))) body\n", + "FUNC()\n"]) + # Parens are tracked, parameter extracted correctly + expected = "body\n" + run_case(f_obj, expected) + + +def test_function_macro_trailing_comma_no_whitespace(): + """Test function-like macro with trailing comma and no whitespace.""" + f_obj = FakeFile("header.h", [ + "#define FUNC(x, y) x y\n", + "FUNC(a,)\n"]) + # Second arg is completely empty (no whitespace) + expected = "a \n" + run_case(f_obj, expected) diff --git a/simplecpreprocessor/tokens.py b/simplecpreprocessor/tokens.py index 9277120..45058f7 100644 --- a/simplecpreprocessor/tokens.py +++ b/simplecpreprocessor/tokens.py @@ -56,17 +56,137 @@ def __init__(self, defines): self.seen = set() def expand_tokens(self, tokens): - for token in tokens: + # Convert to list to allow lookahead + token_list = list(tokens) + i = 0 + while i < len(token_list): + token = token_list[i] if token.value in self.seen: yield token + i += 1 else: resolved = self.defines.get(token.value, token) if resolved is token: yield token + i += 1 else: - self.seen.add(token.value) - yield from self.expand_tokens(resolved) - self.seen.remove(token.value) + # Import FunctionLikeMacro here to avoid circular import + from .core import FunctionLikeMacro + if isinstance(resolved, FunctionLikeMacro): + # Look ahead for '(' + j = i + 1 + # Skip whitespace + while j < len(token_list): + if not token_list[j].whitespace: + break + j += 1 + + if j < len(token_list) and token_list[j].value == "(": + # Extract arguments + args, end_pos = self._extract_args( + token_list, j + 1 + ) + if args is not None: + # Expand the macro + self.seen.add(token.value) + expanded = self._expand_function_macro( + resolved, args + ) + yield from self.expand_tokens(expanded) + self.seen.remove(token.value) + i = end_pos + 1 + continue + # No '(' found, don't expand + yield token + i += 1 + else: + # Object-like macro + self.seen.add(token.value) + yield from self.expand_tokens(resolved) + self.seen.remove(token.value) + i += 1 + + def _extract_args(self, tokens, start): + """Extract arguments from a function-like macro call. + + Returns (args, end_pos) where args is a list of token lists, + or (None, None) if parsing fails. + """ + args = [] + current_arg = [] + paren_depth = 0 + i = start + + while i < len(tokens): + token = tokens[i] + if token.value == "(": + paren_depth += 1 + current_arg.append(token) + elif token.value == ")": + if paren_depth == 0: + # End of argument list + # Add last argument (even if empty) + if current_arg or not args: + args.append(current_arg) + return args, i + else: + paren_depth -= 1 + current_arg.append(token) + elif token.value == "," and paren_depth == 0: + # Argument separator + args.append(current_arg) + current_arg = [] + else: + current_arg.append(token) + i += 1 + + # No closing ')' found + return None, None + + def _expand_function_macro(self, macro, args): + """Expand a function-like macro with given arguments. + + Returns a list of tokens. + """ + # Strip leading/trailing whitespace from each arg + clean_args = [] + for arg in args: + # Remove leading whitespace + start = 0 + while start < len(arg) and arg[start].whitespace: + start += 1 + # Remove trailing whitespace + end = len(arg) + while end > start and arg[end-1].whitespace: + end -= 1 + clean_args.append(arg[start:end]) + + # Expand arguments first (recursive expansion) + # Create a fresh expander to avoid recursion guard conflicts + expanded_args = [] + for arg in clean_args: + expander = TokenExpander(self.defines) + expanded_arg = list(expander.expand_tokens(arg)) + expanded_args.append(expanded_arg) + + # Build parameter -> argument mapping + param_map = {} + for i, param in enumerate(macro.params): + if i < len(expanded_args): + param_map[param] = expanded_args[i] + else: + param_map[param] = [] # Missing argument + + # Substitute parameters in body + result = [] + for token in macro.body: + if token.value in param_map: + # Replace with argument + result.extend(param_map[token.value]) + else: + result.append(token) + + return result class Tokenizer: