Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions refactor_notes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Evil Refactor Plan

## Two parsers

We will have a parser for the preprocessor language, since that's mostly separate from the rest of what we have to care about, and a parser for c++ that might contain macro calls and such. This should reduce complexity in both parsers.

## Handling lines

Since c++ statements can span multiple lines we're just going to chunk files between their preprocessor directives. Everything inbetween a directive gets ingested at once, which should let us handle multiline statements without too much trouble.

## list structures

We'll consume braces, brackets, and parens structures, since they can be used to group macro arguments. We need to make sure we can write them out relatively unchanged if they aren't relevant to macros.

## whitespace

binary, there-or-not-there; will still complicate parse structures a little, sadly.
2 changes: 2 additions & 0 deletions src/pepper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from . import abstract_symbol_tree # NOQA
from . import parser # NOQA

from . import preprocessor_language_lexer # NOQA

from ._version import get_versions
__version__: str = get_versions()['version'] # type: ignore
del get_versions
6 changes: 4 additions & 2 deletions src/pepper/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def t_PREPROCESSING_KEYWORD_ENDIF(t: lex.LexToken) -> lex.LexToken:


def t_PREPROCESSING_KEYWORD_IF(t: lex.LexToken) -> lex.LexToken:
r'\#if\b'
r'\#\s?if\b'
return t


Expand Down Expand Up @@ -153,7 +153,9 @@ def t_IDENTIFIER(t: lex.LexToken) -> lex.LexToken:


def t_INT_LITERAL(t: lex.LexToken) -> lex.LexToken:
r'[0-9]+'
r'[0-9]+L?'
if t.value[-1] == 'L':
t.value = t.value[:-1]
return t


Expand Down
204 changes: 204 additions & 0 deletions src/pepper/preprocessor_language_lexer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
#! /usr/bin/env python3

# This file is a part of the Pepper project, https://github.com/devosoft/Pepper
# (C) Michigan State University, under the MIT License
# See LICENSE.txt for more information

"""
This module handles the lexing of the C/C++ preprocessing language
"""

import sys
import ply.lex as lex
# from ply.lex.LexToken import lex.LexToken
import argparse
import pepper.symbol_table as symtable
from typing import List, Union


DEFAULT_LITERALS = ['+', '-', '*', '/', '|', '&', '(',
')', '=', ',', '{', '}', '[', ']',
'.', ';', '!', '<', '>', ':', '~',
'^', '@', '#', '&', "'", '%', "?", "\\"]

literals = DEFAULT_LITERALS

PREPROCESSING_KEYWORDS = [
'include',
'define',
'ifdef',
'ifndef',
'endif',
'else',
'if',
'py',
'error',
'warning',
'pragma'
]


tokens = [
'BOOL_AND',
'BOOL_OR',
'CHAR_LITERAL',
'COMP_EQU',
'COMP_GTE',
'COMP_LTE',
'COMP_NEQU',
'DEFINED',
'IDENTIFIER',
'INT_LITERAL',
'L_SHIFT',
'LONG_COMMENT',
'NEWLINE',
'OTHER',
'PREPROCESSING_NUMBER',
'PUNCTUATOR',
'R_SHIFT',
'STRING_LITERAL',
'SYSTEM_INCLUDE_LITERAL',
'WHITESPACE',
] + [f"PREPROCESSING_KEYWORD_{keyword.upper()}" for keyword in PREPROCESSING_KEYWORDS]


def t_IDENTIFIER(t: lex.LexToken) -> lex.LexToken:
r'([_a-zA-Z][_a-zA-Z0-9]*(\.\.\.)?)|(\.\.\.)'

if t.value in PREPROCESSING_KEYWORDS:
t.type = f"PREPROCESSING_KEYWORD_{t.value.upper()}"

return t


def t_INT_LITERAL(t: lex.LexToken) -> lex.LexToken:
r'[0-9]+L?'
if t.value[-1] == 'L':
t.value = t.value[:-1]
return t


def t_PREPROCESSING_NUMBER(t: lex.LexToken) -> lex.LexToken:
r'\.?[0-9]([0-9]|(e\+)|(e\-)|(E\+)|(E\-)|(p\+)|(p\-)|(P\+)|(P\-)|[a-zA-Z])*'
return t


def t_SYSTEM_INCLUDE_LITERAL(t: lex.LexToken) -> lex.LexToken:
r"""<[^\'\"<>]*?>"""
return t


def t_COMP_LTE(t: lex.LexToken) -> lex.LexToken:
r"<="
return t


def t_COMP_GTE(t: lex.LexToken) -> lex.LexToken:
r">="
return t


def t_COMP_EQU(t: lex.LexToken) -> lex.LexToken:
r"=="
return t


def t_COMP_NEQU(t: lex.LexToken) -> lex.LexToken:
r"!="
return t


def t_BOOL_AND(t: lex.LexToken) -> lex.LexToken:
r"&&"
return t


def t_BOOL_OR(t: lex.LexToken) -> lex.LexToken:
r"\|\|"
return t


def t_L_SHIFT(t: lex.LexToken) -> lex.LexToken:
r"<<"
return t


def t_R_SHIFT(t: lex.LexToken) -> lex.LexToken:
r">>"
return t


def t_STRING_LITERAL(t: lex.LexToken) -> lex.LexToken:
r"""('((\\['tn])|[^'\\])*')|("((\\["tn])|[^"\\])*")"""
return t


# TODO: maybe convert this to a t_ignore() rule for improved lexing performance
def t_NEWLINE(t: lex.LexToken) -> lex.LexToken:
r"\n"
t.type = 'NEWLINE'
t.lexer.lineno += 1 # the lexer doesn't know what consistutes a 'line' unless we tell it
symtable.LINE_COUNT += 1
return t


def t_WHITESPACE(t: lex.LexToken) -> lex.LexToken:
r"[\t ]"
return t


def t_error(t: lex.LexToken) -> lex.LexToken:
raise symtable.PepperSyntaxError(f"Unknown token on line {t.lexer.lineno}: {t.value[0]}")


lexer = lex.lex()


def lex(lines: List[str], debug_mode: bool = False) -> None:
"Takes a single string, containing newlines, that's the entire input"
lexer.input(lines)

arcade: List[lex.LexToken] = []
tok: Union[lex.LexToken, bool] = True
while True:
tok = lexer.token()
if not tok:
break # end of file reached
arcade.append(tok)

for token in arcade:
try:
if token.type in ignore:
if debug_mode:
print(f"(IGNORED:) {token.type}: {token.value}")
else:
continue
elif token.type in literals:
print(f"ASCII_LITERAL: {token.value}")
elif token.type != 'UNKNOWN':
print(f"{token.type}: {token.value}")
else:
print(f"Unknown token in input: {token.value}")
sys.exit(1)
except: # NOQA
print(f'Blew up trying to access type of {token}')


def get_args() -> argparse.Namespace:
parser = argparse.ArgumentParser()
parser.add_argument('input_file',
type=argparse.FileType('r'),
default=sys.stdin,
help="The file to lex")
parser.add_argument('--debug_mode', action='store_true')
return parser.parse_args()


def main() -> None:
args = get_args()

lex(args.input_file.read(), args.debug_mode)


if __name__ == '__main__':
main()
145 changes: 145 additions & 0 deletions src/pepper/preprocessor_language_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
#! /usr/bin/env python3

# This file is a part of the Pepper project, https://github.com/devosoft/Pepper
# (C) Michigan State University, under the MIT License
# See LICENSE.txt for more information

"""
This is the Parser module for Pepper

This module implements the grammar for the preprocessor language, comprised of tokens from the Lexer module.
This module implements a main function, but this is only for debugging and will be removed on release.
"""

# flake8: noqa E501
import pepper.symbol_table as symtable
import pepper.abstract_symbol_tree as ast
import sys
import argparse
import ply.yacc as yacc
from pepper.preprocessor_language_lexer import lexer
from pepper.preprocessor_language_lexer import tokens # NOQA
import pepper.symbol_table as symtable
from pepper.evaluator import parse_lines, parse_macro
from pepper.symbol_table import Node
from typing import List, cast


print(f"tokens: {tokens}")


def p_statement(p: yacc.YaccProduction) -> yacc.YaccProduction:
"""
program : '#' preprocessing_statement NEWLINE
"""
pass


def p_preprocessing_statement_to_all_statement_types(p: yacc.YaccProduction) -> yacc.YaccProduction:
"""
preprocessing_statement : define_statement
| include_statement
"""
pass


def p_define_statement_structure(p: yacc.YaccProduction) -> yacc.YaccProduction:
pass


def p_define_expression_no_args(p: yacc.YaccProduction) -> yacc.YaccProduction:
"""
define_statement : PREPROCESSING_KEYWORD_DEFINE maybe_space IDENTIFIER maybe_space expressions
"""
# p[0] = symtable.MacroExpansion(p[3], p[5])
pass


# def p_define_expression_some_args(p: yacc.YaccProduction) -> yacc.YaccProduction:
# """
# define_expression : PREPROCESSING_KEYWORD_DEFINE maybe_space IDENTIFIER '(' identifier_list ')' maybe_space expressions
# """
# # print(f"Macro expansion for ident {p[3]} with args {p[5]}")
# # p[0] = symtable.MacroExpansion(p[3], p[8], args=p[5])
# pass


def p_include_expression_disambiguation(p: yacc.YaccProduction) -> yacc.YaccProduction:
"""
include_statement : include_statement_file
| include_statement_system
"""
p[0] = p[1]


def p_include_expression_file(p: yacc.YaccProduction) -> yacc.YaccProduction:
"""
include_statement_file : PREPROCESSING_KEYWORD_INCLUDE maybe_space STRING_LITERAL
"""
p[0] = ast.PreprocessorIncludeNode([p[3]], False)


def p_include_expression_system(p: yacc.YaccProduction) -> yacc.YaccProduction:
"""
include_statement_system : PREPROCESSING_KEYWORD_INCLUDE maybe_space SYSTEM_INCLUDE_LITERAL
"""
p[0] = ast.PreprocessorIncludeNode([p[3]], True)


def p_maybe_space_empty(p: yacc.YaccProduction) -> yacc.YaccProduction:
"""
maybe_space :
"""
pass


def p_maybe_space_nonempty(p: yacc.YaccProduction) -> yacc.YaccProduction:
"""
maybe_space : WHITESPACE
"""
pass


def p_expressions(p):
"""
expressions : IDENTIFIER
"""
pass


def p_error(p: yacc.YaccProduction) -> yacc.YaccProduction:
print(f"ERROR(line {p.lineno}): syntax error")
print(p)
raise symtable.PepperSyntaxError()


def get_args() -> argparse.Namespace:
parser = argparse.ArgumentParser()
parser.add_argument('input_file',
type=argparse.FileType('r'),
default=sys.stdin,
help="The file to parse")
parser.add_argument('--debug_mode', action='store_true')
return parser.parse_args()


def parse(source: str, debug_mode: bool=False) -> Node:
if debug_mode:
parser = yacc.yacc(debug=True)
else:
parser = yacc.yacc(debug=False, errorlog=yacc.NullLogger())
parse_tree: Node = parser.parse(source, lexer=lexer)

return parse_tree


def main() -> None:
args = get_args()

# source = "\n".join(args.input_file.readlines())
parse_tree = parse(args.input_file.read(), args.debug_mode)
print(parse_tree)


if __name__ == "__main__":
main()
Loading