Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions clang/include/clang/Lex/Lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ class Lexer : public PreprocessorLexer {
/// LexTokenInternal - Internal interface to lex a preprocessing token. Called
/// by Lex.
///
bool LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine);
bool LexTokenInternal(Token &Result);

bool CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr);

Expand Down Expand Up @@ -762,12 +762,9 @@ class Lexer : public PreprocessorLexer {
bool LexCharConstant (Token &Result, const char *CurPtr,
tok::TokenKind Kind);
bool LexEndOfFile (Token &Result, const char *CurPtr);
bool SkipWhitespace (Token &Result, const char *CurPtr,
bool &TokAtPhysicalStartOfLine);
bool SkipLineComment (Token &Result, const char *CurPtr,
bool &TokAtPhysicalStartOfLine);
bool SkipBlockComment (Token &Result, const char *CurPtr,
bool &TokAtPhysicalStartOfLine);
bool SkipWhitespace(Token &Result, const char *CurPtr);
bool SkipLineComment(Token &Result, const char *CurPtr);
bool SkipBlockComment(Token &Result, const char *CurPtr);
bool SaveLineComment (Token &Result, const char *CurPtr);

bool IsStartOfConflictMarker(const char *CurPtr);
Expand Down
29 changes: 2 additions & 27 deletions clang/include/clang/Lex/Preprocessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,30 +137,6 @@ struct CXXStandardLibraryVersionInfo {
std::uint64_t Version;
};

/// Record the previous 'export' keyword info.
///
/// Since P1857R3, the standard introduced several rules to determine whether
/// the 'module', 'export module', 'import', 'export import' is a valid
/// directive introducer. This class is used to record the previous 'export'
/// keyword token, and then handle 'export module' and 'export import'.
class ExportContextualKeywordInfo {
Token ExportTok;
bool AtPhysicalStartOfLine = false;

public:
ExportContextualKeywordInfo() = default;
ExportContextualKeywordInfo(const Token &Tok, bool AtPhysicalStartOfLine)
: ExportTok(Tok), AtPhysicalStartOfLine(AtPhysicalStartOfLine) {}

bool isValid() const { return ExportTok.is(tok::kw_export); }
bool isAtPhysicalStartOfLine() const { return AtPhysicalStartOfLine; }
Token getExportTok() const { return ExportTok; }
void reset() {
ExportTok.startToken();
AtPhysicalStartOfLine = false;
}
};

class ModuleNameLoc final
: llvm::TrailingObjects<ModuleNameLoc, IdentifierLoc> {
friend TrailingObjects;
Expand Down Expand Up @@ -415,7 +391,7 @@ class Preprocessor {
bool ImportingCXXNamedModules = false;

/// Whether the last token we lexed was an 'export' keyword.
ExportContextualKeywordInfo LastTokenWasExportKeyword;
Token LastExportKeyword;

/// First pp-token source location in current translation unit.
SourceLocation FirstPPTokenLoc;
Expand Down Expand Up @@ -1869,8 +1845,7 @@ class Preprocessor {
/// This consumes the import/module directive, modifies the
/// lexer/preprocessor state, and advances the lexer(s) so that the next token
/// read is the correct one.
bool HandleModuleContextualKeyword(Token &Result,
bool TokAtPhysicalStartOfLine);
bool HandleModuleContextualKeyword(Token &Result);

/// Get the start location of the first pp-token in main file.
SourceLocation getMainFileFirstPPTokenLoc() const {
Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Lex/Token.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ class Token {
HasSeenNoTrivialPPDirective =
0x1000, // Whether we've seen any 'no-trivial' pp-directives before
// current position.
PhysicalStartOfLine =
0x2000, // This token is at the start of a physical line.
};

tok::TokenKind getKind() const { return Kind; }
Expand Down Expand Up @@ -283,6 +285,10 @@ class Token {
///
bool isAtStartOfLine() const { return getFlag(StartOfLine); }

/// isAtPhysicalStartOfLine - Return true if this token is at the start of a
/// physical line.
bool isAtPhysicalStartOfLine() const { return getFlag(PhysicalStartOfLine); }

/// Return true if this token has whitespace before it.
///
bool hasLeadingSpace() const { return getFlag(LeadingSpace); }
Expand Down
84 changes: 33 additions & 51 deletions clang/lib/Lex/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2034,7 +2034,8 @@ bool Lexer::LexIdentifierContinue(Token &Result, const char *CurPtr) {

// Finally, now that we know we have an identifier, pass this off to the
// preprocessor, which may macro expand it or something.
if (II->isHandleIdentifierCase())
if (II->isHandleIdentifierCase() || II->isModuleKeyword() ||
II->isImportKeyword() || II->getTokenID() == tok::kw_export)
return PP->HandleIdentifier(Result);

return true;
Expand Down Expand Up @@ -2515,8 +2516,7 @@ bool Lexer::LexCharConstant(Token &Result, const char *CurPtr,
/// Update BufferPtr to point to the next non-whitespace character and return.
///
/// This method forms a token and returns true if KeepWhitespaceMode is enabled.
bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr,
bool &TokAtPhysicalStartOfLine) {
bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
// Whitespace - Skip it, then return the token after the whitespace.
bool SawNewline = isVerticalWhitespace(CurPtr[-1]);

Expand Down Expand Up @@ -2572,7 +2572,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr,
Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
if (SawNewline) {
Result.setFlag(Token::StartOfLine);
TokAtPhysicalStartOfLine = true;
Result.setFlag(Token::PhysicalStartOfLine);

if (NewLinePtr && lastNewLine && NewLinePtr != lastNewLine && PP) {
if (auto *Handler = PP->getEmptylineHandler())
Expand All @@ -2591,8 +2591,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr,
///
/// If we're in KeepCommentMode or any CommentHandler has inserted
/// some tokens, this will store the first token and return true.
bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
bool &TokAtPhysicalStartOfLine) {
bool Lexer::SkipLineComment(Token &Result, const char *CurPtr) {
// If Line comments aren't explicitly enabled for this language, emit an
// extension warning.
if (!LineComment) {
Expand Down Expand Up @@ -2748,7 +2747,7 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,

// The next returned token is at the start of the line.
Result.setFlag(Token::StartOfLine);
TokAtPhysicalStartOfLine = true;
Result.setFlag(Token::PhysicalStartOfLine);
// No leading whitespace seen so far.
Result.clearFlag(Token::LeadingSpace);
BufferPtr = CurPtr;
Expand Down Expand Up @@ -2873,8 +2872,7 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L,
///
/// If we're in KeepCommentMode or any CommentHandler has inserted
/// some tokens, this will store the first token and return true.
bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
bool &TokAtPhysicalStartOfLine) {
bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
// Scan one character past where we should, looking for a '/' character. Once
// we find it, check to see if it was preceded by a *. This common
// optimization helps people who like to put a lot of * characters in their
Expand Down Expand Up @@ -3077,7 +3075,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
// efficiently now. This is safe even in KeepWhitespaceMode because we would
// have already returned above with the comment as a token.
if (isHorizontalWhitespace(*CurPtr)) {
SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine);
SkipWhitespace(Result, CurPtr + 1);
return false;
}

Expand Down Expand Up @@ -3722,6 +3720,11 @@ bool Lexer::Lex(Token &Result) {
IsAtStartOfLine = false;
}

if (IsAtPhysicalStartOfLine) {
Result.setFlag(Token::PhysicalStartOfLine);
IsAtPhysicalStartOfLine = false;
}

if (HasLeadingSpace) {
Result.setFlag(Token::LeadingSpace);
HasLeadingSpace = false;
Expand All @@ -3732,11 +3735,9 @@ bool Lexer::Lex(Token &Result) {
HasLeadingEmptyMacro = false;
}

bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
IsAtPhysicalStartOfLine = false;
bool isRawLex = isLexingRawMode();
(void) isRawLex;
bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine);
bool returnedToken = LexTokenInternal(Result);
// (After the LexTokenInternal call, the lexer might be destroyed.)
assert((returnedToken || !isRawLex) && "Raw lex must succeed");
return returnedToken;
Expand All @@ -3747,7 +3748,7 @@ bool Lexer::Lex(Token &Result) {
/// has a null character at the end of the file. This returns a preprocessing
/// token, not a normal token, as such, it is an internal interface. It assumes
/// that the Flags of result have been cleared before calling this.
bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
bool Lexer::LexTokenInternal(Token &Result) {
LexStart:
assert(!Result.needsCleaning() && "Result needs cleaning");
assert(!Result.hasPtrData() && "Result has not been reset");
Expand Down Expand Up @@ -3800,7 +3801,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
if (!isLexingRawMode())
Diag(CurPtr-1, diag::null_in_file);
Result.setFlag(Token::LeadingSpace);
if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
if (SkipWhitespace(Result, CurPtr))
return true; // KeepWhitespaceMode

// We know the lexer hasn't changed, so just try again with this lexer.
Expand Down Expand Up @@ -3846,7 +3847,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
// No leading whitespace seen so far.
Result.clearFlag(Token::LeadingSpace);

if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
if (SkipWhitespace(Result, CurPtr))
return true; // KeepWhitespaceMode

// We only saw whitespace, so just try again with this lexer.
Expand All @@ -3858,7 +3859,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
case '\v':
SkipHorizontalWhitespace:
Result.setFlag(Token::LeadingSpace);
if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
if (SkipWhitespace(Result, CurPtr))
return true; // KeepWhitespaceMode

SkipIgnoredUnits:
Expand All @@ -3868,11 +3869,11 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
// too (without going through the big switch stmt).
if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
LineComment && (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
if (SkipLineComment(Result, CurPtr + 2))
return true; // There is a token to return.
goto SkipIgnoredUnits;
} else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) {
if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
if (SkipBlockComment(Result, CurPtr + 2))
return true; // There is a token to return.
goto SkipIgnoredUnits;
} else if (isHorizontalWhitespace(*CurPtr)) {
Expand Down Expand Up @@ -4030,23 +4031,10 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
case 'o': case 'p': case 'q': case 'r': case 's': case 't': /*'u'*/
case 'v': case 'w': case 'x': case 'y': case 'z':
case '_': {
case '_':
// Notify MIOpt that we read a non-whitespace/non-comment token.
MIOpt.ReadToken();

// LexIdentifierContinue may trigger HandleEndOfFile which would
// normally destroy this Lexer. However, the Preprocessor now defers
// lexer destruction until the stack of Lexer unwinds (LexLevel == 0),
// so it's safe to access member variables after this call returns.
bool returnedToken = LexIdentifierContinue(Result, CurPtr);

if (returnedToken && !LexingRawMode && !Is_PragmaLexer &&
!ParsingPreprocessorDirective && LangOpts.CPlusPlusModules &&
Result.isModuleContextualKeyword() &&
PP->HandleModuleContextualKeyword(Result, TokAtPhysicalStartOfLine))
goto HandleDirective;
return returnedToken;
}
return LexIdentifierContinue(Result, CurPtr);
case '$': // $ in identifiers.
if (LangOpts.DollarIdents) {
if (!isLexingRawMode())
Expand Down Expand Up @@ -4196,8 +4184,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*';

if (TreatAsComment) {
if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
TokAtPhysicalStartOfLine))
if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
return true; // There is a token to return.

// It is common for the tokens immediately after a // comment to be
Expand All @@ -4208,8 +4195,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
}

if (Char == '*') { // /**/ comment.
if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
TokAtPhysicalStartOfLine))
if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
return true; // There is a token to return.

// We only saw whitespace, so just try again with this lexer.
Expand Down Expand Up @@ -4249,12 +4235,9 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
// it's actually the start of a preprocessing directive. Callback to
// the preprocessor to handle it.
// TODO: -fpreprocessed mode??
if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) {
// We parsed a # character and it's the start of a preprocessing
// directive.
FormTokenWithChars(Result, CurPtr, tok::hash);
if (Result.isAtPhysicalStartOfLine() && !LexingRawMode &&
!Is_PragmaLexer)
goto HandleDirective;
}

Kind = tok::hash;
}
Expand Down Expand Up @@ -4444,12 +4427,8 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
// it's actually the start of a preprocessing directive. Callback to
// the preprocessor to handle it.
// TODO: -fpreprocessed mode??
if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer) {
// We parsed a # character and it's the start of a preprocessing
// directive.
FormTokenWithChars(Result, CurPtr, tok::hash);
if (Result.isAtPhysicalStartOfLine() && !LexingRawMode && !Is_PragmaLexer)
goto HandleDirective;
}

Kind = tok::hash;
}
Expand All @@ -4468,7 +4447,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
if (!LangOpts.AsmPreprocessor) {
if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
if (SkipWhitespace(Result, CurPtr))
return true; // KeepWhitespaceMode

// We only saw whitespace, so just try again with this lexer.
Expand Down Expand Up @@ -4501,7 +4480,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
llvm::strictConversion);
if (Status == llvm::conversionOK) {
if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
if (SkipWhitespace(Result, CurPtr))
return true; // KeepWhitespaceMode

// We only saw whitespace, so just try again with this lexer.
Expand Down Expand Up @@ -4539,6 +4518,9 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
return true;

HandleDirective:

// We parsed a # character and it's the start of a preprocessing directive.
FormTokenWithChars(Result, CurPtr, tok::hash);
PP->HandleDirective(Result);

if (PP->hadModuleLoaderFatalFailure())
Expand Down Expand Up @@ -4623,7 +4605,7 @@ bool Lexer::LexDependencyDirectiveToken(Token &Result) {
if (!isLexingRawMode()) {
const IdentifierInfo *II = PP->LookUpIdentifierInfo(Result);
if (LangOpts.CPlusPlusModules && Result.isModuleContextualKeyword() &&
PP->HandleModuleContextualKeyword(Result, Result.isAtStartOfLine())) {
PP->HandleModuleContextualKeyword(Result)) {
PP->HandleDirective(Result);
return false;
}
Expand Down
19 changes: 9 additions & 10 deletions clang/lib/Lex/PPDirectives.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -641,14 +641,13 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
Tok.is(tok::raw_identifier) &&
(Tok.getRawIdentifier() == "export" ||
Tok.getRawIdentifier() == "module")) {
llvm::SaveAndRestore ModuleDirectiveSkipping(
LastTokenWasExportKeyword);
LastTokenWasExportKeyword.reset();
llvm::SaveAndRestore ModuleDirectiveSkipping(LastExportKeyword);
LastExportKeyword.startToken();
LookUpIdentifierInfo(Tok);
IdentifierInfo *II = Tok.getIdentifierInfo();

if (II->getName()[0] == 'e') { // export
HandleModuleContextualKeyword(Tok, Tok.isAtStartOfLine());
HandleModuleContextualKeyword(Tok);
CurLexer->Lex(Tok);
if (Tok.is(tok::raw_identifier)) {
LookUpIdentifierInfo(Tok);
Expand All @@ -661,7 +660,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
// to save RawLexingMode
llvm::SaveAndRestore RestoreLexingRawMode(CurPPLexer->LexingRawMode,
false);
if (HandleModuleContextualKeyword(Tok, Tok.isAtStartOfLine())) {
if (HandleModuleContextualKeyword(Tok)) {
// We just parsed a # character at the start of a line, so we're
// in directive mode. Tell the lexer this so any newlines we see
// will be converted into an EOD token (this terminates the
Expand Down Expand Up @@ -4193,8 +4192,8 @@ void Preprocessor::HandleCXXImportDirective(Token ImportTok) {
llvm::SaveAndRestore<bool> SaveImportingCXXModules(
this->ImportingCXXNamedModules, true);

if (LastTokenWasExportKeyword.isValid())
LastTokenWasExportKeyword.reset();
if (LastExportKeyword.is(tok::kw_export))
LastExportKeyword.startToken();

Token Tok;
if (LexHeaderName(Tok)) {
Expand Down Expand Up @@ -4352,9 +4351,9 @@ void Preprocessor::HandleCXXImportDirective(Token ImportTok) {
void Preprocessor::HandleCXXModuleDirective(Token ModuleTok) {
assert(getLangOpts().CPlusPlusModules && ModuleTok.is(tok::kw_module));
Token Introducer = ModuleTok;
if (LastTokenWasExportKeyword.isValid()) {
Introducer = LastTokenWasExportKeyword.getExportTok();
LastTokenWasExportKeyword.reset();
if (LastExportKeyword.is(tok::kw_export)) {
Introducer = LastExportKeyword;
LastExportKeyword.startToken();
}

SourceLocation StartLoc = Introducer.getLocation();
Expand Down
Loading