Enable arbitrary comments in HLO in both /*...*/ and // forms.

Allow '/*...*/' and '//' comments to appear anywhere in HLO text, including multi-line comments. Previously only '/*...*/' comments were only allowed and only in certain locations in a serialized Literal.

PiperOrigin-RevId: 208519204
This commit is contained in:
Mark Heffernan 2018-08-13 11:58:25 -07:00 committed by TensorFlower Gardener
parent 55b327916a
commit 881f58d20c
5 changed files with 116 additions and 17 deletions

View File

@ -143,8 +143,47 @@ TokKind HloLexer::LexToken() {
return TokKind::kLparen;
case ')':
return TokKind::kRparen;
case '/':
return LexComment();
case '/': {
if (PeekCurrentChar() == '*') {
// This is the start of a /*...*/ delimited comment. Save the current
// location in case the comment is unterminated so the error message
// will point to the beginning of the comment.
const char* comment_start = current_ptr_;
current_ptr_++;
// Advance until '*/' is found.
while (true) {
int current = GetNextChar();
if (current == '*' && PeekCurrentChar() == '/') {
// End of comment.
current_ptr_++;
break;
}
if (current == kEOF) {
// Unterminated comment.
current_ptr_ = comment_start;
return TokKind::kError;
}
}
// Return no token for the comment. Keep lexing.
continue;
} else if (PeekCurrentChar() == '/') {
// This is the start of a '//' delimited comment. Throw away
// everything until end of line or file. The end-of-line character(s)
// are left unlexed in the buffer which is harmless because these are
// skipped later by the lexer. This approach enables support for
// different end-of-line encodings.
while (true) {
int current = PeekCurrentChar();
if (current == kEOF || current == '\n' || current == '\r') {
break;
}
current_ptr_++;
}
continue;
}
// A lone '/' is an error.
return TokKind::kError;
}
case '"':
return LexString();
}
@ -357,16 +396,6 @@ tensorflow::StringPiece HloLexer::GetLine(LocTy loc) const {
return StringPieceFromPointers(start, end);
}
TokKind HloLexer::LexComment() {
auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end());
static LazyRE2 comment_pattern = {R"(\/\*.*?\*\/)"};
if (RE2::Consume(&consumable, *comment_pattern)) {
current_ptr_ = consumable.begin();
return TokKind::kComment;
}
return TokKind::kError;
}
// Lexes quoted string with escaping characters. If matched, the quoted string
// will be unescaped and stored to str_val_.
TokKind HloLexer::LexString() {
@ -412,8 +441,6 @@ string TokKindToString(TokKind kind) {
return "kRparen";
case TokKind::kArrow:
return "kArrow";
case TokKind::kComment:
return "kComment";
case TokKind::kw_HloModule:
return "kw_HloModule";
case TokKind::kw_ENTRY:

View File

@ -105,7 +105,6 @@ class HloLexer {
TokKind LexShape();
TokKind LexConstant();
TokKind LexNumberOrPattern();
TokKind LexComment();
TokKind LexString();
const tensorflow::StringPiece buf_;

View File

@ -1824,7 +1824,6 @@ bool HloParser::ParseDenseLiteral(std::unique_ptr<Literal>* literal,
break;
}
case TokKind::kComma:
case TokKind::kComment:
// Skip.
lexer_.Lex();
break;

View File

@ -1560,6 +1560,81 @@ ENTRY consts {
"last");
}
TEST_F(HloParserTest, Comments) {
const string original = R"(/* module description. */
HloModule comments:
ENTRY /*comment*/ c1 {
/* blah */
ROOT const1 = /*foo*/f32[1]{0} constant({12345 /*bar*/})
/* comment */
}
/* something else */
)";
auto module = ParseHloString(original);
TF_ASSERT_OK(module.status());
}
TEST_F(HloParserTest, MultilineComments) {
const string original = R"(HloModule multiline_comment:
ENTRY c1 {
/*
ROOT foo = f32[1]{0} constant({12345})
*/
ROOT const1 = f32[1]{0} constant({12345})
/*
a
b
c
d
*/
})";
auto module = ParseHloString(original);
TF_ASSERT_OK(module.status());
}
TEST_F(HloParserTest, UnterminatedComment) {
const string original = R"(HloModule unterminated_comment:
ENTRY c1 {
/* unterminated
ROOT const1 = f32[1]{0} constant({12345})
})";
// Verify that the error message points to the beginning of the unterminated
// comment.
ExpectHasSubstr(ParseHloString(original).status().error_message(),
"/* unterminated\n^");
}
TEST_F(HloParserTest, SlashSlashComments) {
const string original = R"(HloModule slash_slash_comment:
// Garbage
ENTRY c1 {
// Foo bar
ROOT const1 = f32[1]{0} constant({12345}) // Something else
})";
auto module = ParseHloString(original);
TF_ASSERT_OK(module.status());
}
TEST_F(HloParserTest, SlashSlashCommentMsDosEolFormat) {
const string original =
"HloModule slash_slash_comment:\r\n// Garbage\r\nENTRY c1 {\r\n// Foo "
"bar\r\nROOT const1 = f32[1]{0} constant({12345}) // Something else\r\n}";
auto module = ParseHloString(original);
TF_ASSERT_OK(module.status());
}
TEST_F(HloParserTest, SlashSlashCommentMacEolFormat) {
const string original =
"HloModule slash_slash_comment:\r// Garbage\rENTRY c1 {\r// Foo "
"bar\rROOT const1 = f32[1]{0} constant({12345}) // Something else\r}";
auto module = ParseHloString(original);
TF_ASSERT_OK(module.status());
}
TEST_F(HloParserTest, MultipleEntries) {
const string original = R"(HloModule multiple_entries:
ENTRY c1 {

View File

@ -44,7 +44,6 @@ enum class TokKind {
kRparen, // ( )
kArrow, // ->
kComment, // /*xxx*/
// Keywords
kw_HloModule,