From d92855802d981492918d376dd6ca9eb26af07052 Mon Sep 17 00:00:00 2001 From: Jonathan Marler Date: Mon, 11 Mar 2019 11:15:39 -0600 Subject: [PATCH] Added interpolated strings --- src/dmd/cli.d | 2 + src/dmd/globals.d | 2 +- src/dmd/globals.h | 1 + src/dmd/lexer.d | 48 ++++- src/dmd/parse.d | 311 +++++++++++++++++++++++++++++++ test/Makefile | 4 +- test/fail_compilation/istring1.d | 33 ++++ test/fail_compilation/istring2.d | 11 ++ test/run.d | 2 + test/runnable/istring.d | 72 +++++++ test/tools/d_do_test.d | 33 ++-- 11 files changed, 498 insertions(+), 21 deletions(-) create mode 100644 test/fail_compilation/istring1.d create mode 100644 test/fail_compilation/istring2.d create mode 100644 test/runnable/istring.d diff --git a/src/dmd/cli.d b/src/dmd/cli.d index 773bbc6e1db2..ce2c422224a2 100644 --- a/src/dmd/cli.d +++ b/src/dmd/cli.d @@ -684,6 +684,8 @@ dmd -cov -unittest myprog.d "list all variables going into thread local storage"), Feature("vmarkdown", "vmarkdown", "list instances of Markdown replacements in Ddoc"), + Feature("interpolate", "interpolateStrings", + "enable interpolated string support via the 'i' prefix on string literals"), ]; /// Returns all available reverts diff --git a/src/dmd/globals.d b/src/dmd/globals.d index f2853bbd5756..ec4d450ac2c4 100644 --- a/src/dmd/globals.d +++ b/src/dmd/globals.d @@ -173,7 +173,7 @@ struct Param bool markdown; // enable Markdown replacements in Ddoc bool vmarkdown; // list instances of Markdown replacements in Ddoc - + bool interpolateStrings;// Enable interpolated string support via the 'i' prefix on string literals bool showGaggedErrors; // print gagged errors anyway bool printErrorContext; // print errors with the error context (the error line in the source file) bool manual; // open browser on compiler manual diff --git a/src/dmd/globals.h b/src/dmd/globals.h index 3ce581db453c..11f2278adec5 100644 --- a/src/dmd/globals.h +++ b/src/dmd/globals.h @@ -141,6 +141,7 @@ struct Param CppStdRevision cplusplus; // version of C++ name mangling to support bool markdown; // enable Markdown replacements in Ddoc bool vmarkdown; // list instances of Markdown replacements in Ddoc + bool interpolateStrings;// Enable interpolated string support via the 'i' prefix on string literals bool showGaggedErrors; // print gagged errors anyway bool printErrorContext; // print errors with the error context (the error line in the source file) bool manual; // open browser on compiler manual diff --git a/src/dmd/lexer.d b/src/dmd/lexer.d index 1ac15e45b50c..4b8184cc73ed 100644 --- a/src/dmd/lexer.d +++ b/src/dmd/lexer.d @@ -420,7 +420,7 @@ class Lexer bool commentToken; // comments are TOK.comment's int lastDocLine; // last line of previous doc comment - private DiagnosticReporter diagnosticReporter; + protected DiagnosticReporter diagnosticReporter; private Token* tokenFreelist; @@ -646,6 +646,44 @@ class Lexer case '"': escapeStringConstant(t); return; + case 'i': + if (global.params.interpolateStrings) + { + if (p[1] == 'r') + { + if (p[2] == '"') + { + p += 2; + goto case '`'; + } + } + else if (p[1] == '`') + { + p++; + goto case '`'; + } + else if (p[1] == '"') + { + p++; + goto case '"'; + } + else if (p[1] == 'q') + { + if (p[2] == '"') + { + p += 2; + delimitedStringConstant(t); + return; + } + else if (p[2] == '{') + { + p += 2; + tokenStringConstant(t); + return; + } + } + } + goto case_ident; case 'a': case 'b': case 'c': @@ -654,7 +692,6 @@ class Lexer case 'f': case 'g': case 'h': - case 'i': case 'j': case 'k': case 'l': @@ -2491,6 +2528,13 @@ class Lexer diagnosticReporter.error(loc, format, args); va_end(args); } + // temporary hack to handle r-value references to Loc structures + pragma(inline) + final void error(T...)(const Loc loc, const(char)* format, T args) + { + error(loc, format, args); + } + final void errorSupplemental(const ref Loc loc, const(char)* format, ...) { diff --git a/src/dmd/parse.d b/src/dmd/parse.d index d9b3ed127c47..a0b6f8f944b4 100644 --- a/src/dmd/parse.d +++ b/src/dmd/parse.d @@ -24,6 +24,7 @@ import dmd.root.outbuffer; import dmd.root.rmem; import dmd.root.rootobject; import dmd.tokens; +import dmd.utf; // How multiple declarations are parsed. // If 1, treat as C. @@ -7745,6 +7746,13 @@ final class Parser(AST) : Lexer break; case TOK.string_: + if (global.params.interpolateStrings && token.ptr[0] == 'i') + { + e = parseInterpolatedString(token); + nextToken(); + break; + } + goto case TOK.hexadecimalString; case TOK.hexadecimalString: { // cat adjacent strings @@ -7763,6 +7771,10 @@ final class Parser(AST) : Lexer error("mismatched string literal postfixes `'%c'` and `'%c'`", postfix, token.postfix); postfix = token.postfix; } + if (token.ptr[0] == 'i') + { + error("cannot implicitly concatenate interpolated strings with non-interpolated strings"); + } error("Implicit string concatenation is deprecated, use %s ~ %s instead", prev.toChars(), token.toChars()); @@ -8996,6 +9008,305 @@ final class Parser(AST) : Lexer token.lineComment = null; } } + + /** + Parse the given interpolated string into a tuple of expressions. + + Params: + token = the interpolated string token + + Returns: + A tuple of expressions from the interpolated string. + */ + AST.TupleExp parseInterpolatedString(Token token) + in { assert(token.value == TOK.string_ && token.ptr[0] == 'i'); } body + { + //fprintf(stderr, "parseInterpolatedString `%.*s`\n", token.len, token.ustring); + + auto parts = new AST.Expressions(); + + // Used to map string contents back to original source location + auto sourcePos = StringSourcePos(token.ptr); + + auto str = token.ustring; + auto len = token.len; + size_t mark = 0; + auto markSourcePos = sourcePos; + size_t next = 0; + size_t doubleDollar = size_t.max; + + KmainLoop: + while(true) + { + auto endOfRawCharacters = next; + if (next < len) + { + auto nextChar = sourcePos.scan(str, next); + if (nextChar != '$') + continue; + + if (next >= len) + { + error(sourcePos.loc.from(token.loc), "unfinished interpolated string expression '$'"); + break; + } + if (str[next] == '$') // handle $$ + { + if (doubleDollar == size_t.max) + doubleDollar = next - 1; + sourcePos.scan(str, next); + continue; + } + } + + // Add next string expression + if (endOfRawCharacters > mark) + { + auto markLoc = markSourcePos.loc.from(token.loc); + if (doubleDollar == size_t.max) + { + parts.push(new AST.StringExp(markLoc, cast(char*)str + mark, endOfRawCharacters - mark, token.postfix)); + } + else + { + auto buffer = cast(char*)mem.xmalloc(endOfRawCharacters - 1 - mark); + size_t offset; + { + auto length = doubleDollar + 1 - mark; + buffer[0 .. length] = str[mark .. doubleDollar + 1]; + offset = length; + } + for (size_t i = doubleDollar + 2; i < endOfRawCharacters; i++) + { + auto c = str[i]; + if (c == '$') + { + i++; + assert(i < endOfRawCharacters && str[i] == '$'); + } + buffer[offset++] = c; + } + parts.push(new AST.StringExp(markLoc, buffer, offset, token.postfix)); + doubleDollar = size_t.max; + } + } + + if (next >= len) + break; + + // Process the '$' expression + if (str[next] == '(') + { + sourcePos.scan(str, next); + mark = next; + for(uint depth = 1;;) + { + if (next >= len) + { + error(sourcePos.loc.from(token.loc), "unfinished interpolated string expression '$(...)'"); + break KmainLoop; + } + auto nextChar = sourcePos.scan(str, next); + if (nextChar == ')') + { + depth--; + if (depth == 0) + break; + } + else if (nextChar == '(') + { + depth++; + } + } + { + auto writeableStr = cast(char*)str; + // Need to null-terminate so the parser does not scan past the end of + // the expression. A case has been found where the parser will scan + // past the expression without this, namely, i"$(var)'" + writeableStr[next - 1] = '\0'; + scope(exit) writeableStr[next - 1] = ')'; + + auto expr = str[mark .. next - 1]; + //printf("parsing the expression '%s'\n", expr.ptr); + scope tempParser = new Parser!AST(mod, expr, false, diagnosticReporter); + tempParser.scanloc = sourcePos.loc.from(token.loc); + tempParser.nextToken(); + if (tempParser.token.value != TOK.endOfFile) + { + auto result = tempParser.parseExpression(); + if (tempParser.token.value != TOK.endOfFile) + { + error(sourcePos.loc.from(token.loc), "invalid expression '%s' inside interpolated string", expr.ptr); + break; + } + parts.push(result); + } + } + mark = next; + markSourcePos = sourcePos; + } + else + { + // TODO: if we want to support `$` expressions without parentheses, this is + // where we would add support for it. Maybe a good grammar node for this + // would be DotIdentifier. + error(sourcePos.loc.from(token.loc), "missing parentheses in interpolated string expression '$(...)'"); + break; + } + } + + return new AST.TupleExp(token.loc, parts); + } + + /** + Represents a location offset from a `Loc`. + */ + static struct LocOffset + { + uint offset; /// byte offset from base loc + uint line; /// line offset from base loc + uint lastLineOffset; /// byte offset of the start of the last line + + /** + Indicates 'offset' has been moved to the next line. + */ + void atNextLine() + { + line++; + lastLineOffset = offset; + } + + /** + Get location relative to `baseLoc`. + Params: + baseLoc = relative base location + Returns: + location relative to `baseLoc` + */ + Loc from(ref const Loc baseLoc) const + { + return Loc(baseLoc.filename, + baseLoc.linnum + line, + (line == 0) ? baseLoc.charnum + offset : offset - lastLineOffset); + } + } + + /** + Used to map offsets in a processed string back to the source location + */ + private static struct StringSourcePos + { + private const(char)* sourcePtr; + private bool wysiwyg; + LocOffset loc; + + this(const char* sourcePtr) + { + this.sourcePtr = sourcePtr; + loc.offset = 2; + if (sourcePtr[1] == '"') + wysiwyg = false; + else + { + wysiwyg = true; + if (sourcePtr[1] == 'r') // ir" + loc.offset++; + else if (sourcePtr[1] == '`') // i` + { } + else + { + assert(sourcePtr[1] == 'q', "code bug"); + loc.offset = 3; + if (sourcePtr[2] != '{') + { + assert(sourcePtr[2] == '"', "code bug"); + char c = sourcePtr[3]; + bool isheredoc; + if (c >= 0x80) + { + import dmd.utf : utf_decodeChar, isUniAlpha; + size_t tempOffset = 3; + dchar fullChar; + assert(!utf_decodeChar(sourcePtr, size_t.max, tempOffset, fullChar), "code bug"); + isheredoc = isUniAlpha(fullChar); + } + else + { + import core.stdc.ctype : isalpha; + isheredoc = (isalpha(c) || c == '_'); + } + if (isheredoc) + loc.offset = 3 + cast(uint)(strchr(sourcePtr + 3, '\n') - (sourcePtr + 3)) + 1; + else + loc.offset = 4; + } + } + } + } + + /** + Read and move past the next character both in the source string and + in the processed string. + Params: + str = the processed string + ridx = in/out index into the processed string + Returns: + the next character + */ + dchar scan(const(char)* str, ref size_t ridx) + { + dchar sourceChar = sourcePtr[loc.offset++]; + dchar strChar; + if (sourceChar == '\r') + { + if (sourcePtr[loc.offset] == '\n') + { + loc.offset++; + loc.atNextLine(); + sourceChar = '\n'; + } + strChar = str[ridx++]; + } + else if (wysiwyg || sourceChar != '\\') + { + strChar = str[ridx++]; + if (sourceChar == '\n') + loc.atNextLine(); + } + else + { + scope ignore = new IgnoreDiagnosticReporter(); + auto escapeStart = sourcePtr + loc.offset; + if (escapeStart[0] == 'u' || escapeStart[0] == 'U' || escapeStart[0] == '&') + assert(!utf_decodeChar(str, size_t.max, ridx, strChar), "code bug"); + else + strChar = str[ridx++]; + + auto escapeEnd = escapeStart; + auto tempLoc = Loc(); // TODO: get the current location correctly + sourceChar = Lexer.escapeSequence(tempLoc, ignore, escapeEnd); + loc.offset += (escapeEnd - escapeStart); + } + assert(strChar == sourceChar/*, "strChar `" ~ strChar ~ "' != sourceChar '" ~ sourceChar ~ "'"*/); + return strChar; + } + } + private static class IgnoreDiagnosticReporter : DiagnosticReporter + { + import core.stdc.stdarg : va_list; + private int _errorCount; + private int _warningCount; + private int _deprecationCount; + override int errorCount() { return _errorCount; } + override int warningCount() { return _warningCount; } + override int deprecationCount() { return _deprecationCount; } + override void error(const ref Loc loc, const(char)* format, va_list args) { _errorCount++; } + override void errorSupplemental(const ref Loc loc, const(char)* format, va_list) { _errorCount++; } + override void warning(const ref Loc loc, const(char)* format, va_list args) { _warningCount++; } + override void warningSupplemental(const ref Loc loc, const(char)* format, va_list) { _warningCount++; } + override void deprecation(const ref Loc loc, const(char)* format, va_list args) { _deprecationCount++; } + override void deprecationSupplemental(const ref Loc loc, const(char)* format, va_list) { _deprecationCount++; } + } } enum PREC : int diff --git a/test/Makefile b/test/Makefile index de0e63fe7e8e..02946c131177 100644 --- a/test/Makefile +++ b/test/Makefile @@ -202,8 +202,8 @@ $(RESULTS_DIR)/d_do_test$(EXE): tools/d_do_test.d $(RESULTS_DIR)/.created @echo "OS: '$(OS)'" @echo "MODEL: '$(MODEL)'" @echo "PIC: '$(PIC_FLAG)'" - $(DMD) -conf= $(MODEL_FLAG) $(DEBUG_FLAGS) -unittest -run $< - $(DMD) -conf= $(MODEL_FLAG) $(DEBUG_FLAGS) -od$(RESULTS_DIR) -of$(RESULTS_DIR)$(DSEP)d_do_test$(EXE) $< + $(DMD) -conf= $(MODEL_FLAG) $(DEBUG_FLAGS) -transition=interpolate -unittest -run $< + $(DMD) -conf= $(MODEL_FLAG) $(DEBUG_FLAGS) -transition=interpolate -od$(RESULTS_DIR) -of$(RESULTS_DIR)$(DSEP)d_do_test$(EXE) $< $(RESULTS_DIR)/sanitize_json$(EXE): tools/sanitize_json.d $(RESULTS_DIR)/.created @echo "Building sanitize_json tool" diff --git a/test/fail_compilation/istring1.d b/test/fail_compilation/istring1.d new file mode 100644 index 000000000000..fd47d8cf0287 --- /dev/null +++ b/test/fail_compilation/istring1.d @@ -0,0 +1,33 @@ +/* +PERMUTE_ARGS: +REQUIRED_ARGS: -transition=interpolate +TEST_OUTPUT: +--- +fail_compilation/istring1.d(16): Error: missing parentheses in interpolated string expression '$(...)' +fail_compilation/istring1.d(23): Error: unfinished interpolated string expression '$(...)' +fail_compilation/istring1.d(26): Error: unfinished interpolated string expression '$' +fail_compilation/istring1.d(29): Error: invalid expression '1 + 2;' inside interpolated string +fail_compilation/istring1.d(32): Error: undefined escape sequence \c +fail_compilation/istring1.d(33): Error: unterminated named entity ""; +--- +*/ +enum s1 = i` + + $! + +`; +enum s2 = i` + + $( + +`; +enum s3 = i` + + $`; +enum s4 = i` + + $(1 + 2;)`; + +// Test that bad escape sequences are handled sanely +enum s5 = i"\c"; +enum s6 = i"\""; diff --git a/test/fail_compilation/istring2.d b/test/fail_compilation/istring2.d new file mode 100644 index 000000000000..6ad61c8580bd --- /dev/null +++ b/test/fail_compilation/istring2.d @@ -0,0 +1,11 @@ +/* +PERMUTE_ARGS: +REQUIRED_ARGS: -transition=interpolate +TEST_OUTPUT: +--- +fail_compilation/istring2.d(11): Error: undefined identifier `a` +--- +*/ +enum s1 = i` + + $(a)`; diff --git a/test/run.d b/test/run.d index b18b15c02605..4484b07d9ec0 100755 --- a/test/run.d +++ b/test/run.d @@ -88,6 +88,7 @@ Options: // bootstrap all needed environment variables auto env = getEnvironment; + hostDMD = buildPath("..", "generated", env["OS"], env["BUILD"], env["MODEL"], "dmd" ~ env["EXE"]); if (runUnitTests) { @@ -162,6 +163,7 @@ void ensureToolsExists(const TestTool[] tools ...) const command = [ hostDMD, "-of"~targetBin, + "-transition=interpolate", sourceFile ] ~ tool.extraArgs; diff --git a/test/runnable/istring.d b/test/runnable/istring.d new file mode 100644 index 000000000000..3aa592fb62bf --- /dev/null +++ b/test/runnable/istring.d @@ -0,0 +1,72 @@ +/* +PERMUTE_ARGS: +REQUIRED_ARGS: -transition=interpolate +*/ +import std.conv : text; + +static assert(i"$()".length == 0); +static assert(i"$(/* a comment!*/)".length == 0); +static assert(i"$(// another comment)".length == 0); +static assert(i"$(/+ yet another comment+/)".length == 0); + +void main() +{ + int a = 42; + assert("a is 42" == text(i"a is $(a)")); + assert("a + 23 is 65" == text(i"a + 23 is $(a + 23)")); + + // test each type of string literal + int b = 93; + assert("42 + 93 = 135" == text( i"$(a) + $(b) = $(a + b)")); // double-quote + assert("42 + 93 = 135" == text( ir"$(a) + $(b) = $(a + b)")); // wysiwyg + assert("42 + 93 = 135" == text( i`$(a) + $(b) = $(a + b)`)); // wysiwyg (alt) + assert("42 + 93 = 135" == text( iq{$(a) + $(b) = $(a + b)})); // token + assert("42 + 93 = 135" == text(iq"!$(a) + $(b) = $(a + b)!")); // delimited (char) + assert("42 + 93 = 135\n" == text(iq"ABC +$(a) + $(b) = $(a + b) +ABC")); // delimited (heredoc) + + // Escaping double dollar + assert("$" == i"$$"[0]); + assert(" $ " == i" $$ "[0]); + assert(" $(just raw string) " == i" $$(just raw string) "[0]); + assert("Double dollar $$ becomes $" == text( i"Double dollar $$$$ becomes $$")); // double-quote + assert("Double dollar $$ becomes $" == text( ir"Double dollar $$$$ becomes $$")); // wysiwyg + assert("Double dollar $$ becomes $" == text( i`Double dollar $$$$ becomes $$`)); // wysiwyg (alt) + assert("Double dollar $$ becomes $" == text( iq{Double dollar $$$$ becomes $$})); // token + assert("Double dollar $$ becomes $" == text(iq"!Double dollar $$$$ becomes $$!")); // delimited + + assert(928 == add(900, 28)); +} + +string funcCode(string attributes, string returnType, string name, string args, string body) +{ + return text(iq{ + $(attributes) $(returnType) $(name)($(args)) + { + $(body) + } + }); +} +mixin(funcCode("pragma(inline)", "int", "add", "int a, int b", "return a + b;")); + +// Test interpolated strings with escape sequences +static assert(i" foo \n bar".length == 1); +static assert(i"foo \x0a bar".length == 1); +static assert(i"foo \xC2\xA2 bar".length == 1); +static assert(i"foo \u042f bar".length == 1); +static assert(i"foo \U00010f063 bar".length == 1); +static assert(i"foo \0 bar".length == 1); +static assert(i"foo \1 bar".length == 1); +static assert(i"foo \7 bar".length == 1); +static assert(i"foo \01 bar".length == 1); +static assert(i"foo \001 bar".length == 1); +static assert(i"foo \377 bar".length == 1); +static assert(i"foo " bar".length == 1); + +// Test string literals with odd newlines +static assert(i" +".length == 1); +// test carriage return +static assert(i" +".length == 1); diff --git a/test/tools/d_do_test.d b/test/tools/d_do_test.d index 2152c5b6900e..1e2dba680035 100755 --- a/test/tools/d_do_test.d +++ b/test/tools/d_do_test.d @@ -519,8 +519,7 @@ string envGetRequired(in char[] name) auto value = environment.get(name); if(value is null) { - writefln("Error: missing environment variable '%s', was this called this through the Makefile?", - name); + writeln(i"Error: missing environment variable '$(name)', was this called this through the Makefile?"); throw new SilentQuit(); } return value; @@ -733,13 +732,13 @@ int tryMain(string[] args) string objfile = output_dir ~ envData.sep ~ test_name ~ "_" ~ to!string(permuteIndex) ~ envData.obj; toCleanup ~= objfile; - command = format("%s -conf= -m%s -I%s %s %s -od%s -of%s %s %s%s %s", envData.dmd, envData.model, input_dir, - reqArgs, permutedArgs, output_dir, - (testArgs.mode == TestMode.RUN || testArgs.link ? test_app_dmd : objfile), - argSet, - (testArgs.mode == TestMode.RUN || testArgs.link ? "" : "-c "), - join(testArgs.sources, " "), - (autoCompileImports ? "-i" : join(testArgs.compiledImports, " "))); + command = text( + i"$(envData.dmd) -conf= -m$(envData.model) -I$(input_dir) $(reqArgs) ", + i"$(permutedArgs) -od$(output_dir) -of", + (testArgs.mode == TestMode.RUN || testArgs.link) ? test_app_dmd : objfile, + i` $(argSet) $(testArgs.mode == TestMode.RUN || testArgs.link ? "" : "-c ") `, + join(testArgs.sources, " "), " ", + (autoCompileImports ? "-i" : join(testArgs.compiledImports, " "))); version(Windows) command ~= " -map nul.map"; compile_output = execute(fThisRun, command, testArgs.mode != TestMode.FAIL_COMPILE, result_path); @@ -751,18 +750,21 @@ int tryMain(string[] args) string newo= result_path ~ replace(replace(filename, ".d", envData.obj), envData.sep~"imports"~envData.sep, envData.sep); toCleanup ~= newo; - command = format("%s -conf= -m%s -I%s %s %s -od%s -c %s %s", envData.dmd, envData.model, input_dir, - reqArgs, permutedArgs, output_dir, argSet, filename); + command = text( + i"$(envData.dmd) -conf= -m$(envData.model) -I$(input_dir) $(reqArgs) ", + i"$(permutedArgs) -od$(output_dir) -c $(argSet) $(filename)"); compile_output ~= execute(fThisRun, command, testArgs.mode != TestMode.FAIL_COMPILE, result_path); } if (testArgs.mode == TestMode.RUN || testArgs.link) { // link .o's into an executable - command = format("%s -conf= -m%s%s%s %s %s -od%s -of%s %s", envData.dmd, envData.model, + command = text( + i"$(envData.dmd) -conf= -m$(envData.model)", autoCompileImports ? " -i" : "", autoCompileImports ? "extraSourceIncludePaths" : "", - envData.required_args, testArgs.requiredArgsForLink, output_dir, test_app_dmd, join(toCleanup, " ")); + i" $(envData.required_args) $(testArgs.requiredArgsForLink) -od$(output_dir)", + i" -of$(test_app_dmd) $(join(toCleanup, ` `))"); version(Windows) command ~= " -map nul.map"; execute(fThisRun, command, true, result_path); @@ -870,11 +872,10 @@ int tryMain(string[] args) } f.writeln(); f.writeln("=============================="); - f.writef("Test %s failed: ", input_file); - f.writeln(e.msg); + f.writeln(i"Test $(input_file) failed: $(e.msg)"); f.close(); - writefln("\nTest %s failed. The logged output:", input_file); + writeln("\nTest $(input_file) failed. The logged output:"); auto outputText = output_file.readText; writeln(outputText); output_file.remove();