#include "LexFoundation.h" #include using namespace Lex; LexFoundation::LexFoundation(QList seqence, const QString UnknownToken) : unknown_token(UnknownToken), lexical_seq(seqence) { empty_seq << '\t' << '\b' << ' ' << '\r' << EOF; } typedef int lexunit_index; typedef int match_start; QList LexFoundation::push(int row, int col, const QChar w) { QList result; QString remains = ""; if (!empty_seq.contains(w)) { code_acc << XChar(w, row, col); if (w != '\n') return result; } else { if (!code_acc.size()) return result; } for (auto &c : code_acc) remains += c.value(); auto mid_result = lexical_parse(remains); for (auto &r : mid_result) { auto char_start = code_acc[r.index_at_segment]; r.StartRow = char_start.row(); r.StartCol = char_start.col(); auto char_end = code_acc[r.index_at_segment + r.Text.length() - 1]; r.EndRow = char_end.row(); r.EndCol = char_end.col(); } code_acc.clear(); return mid_result; } QList LexFoundation::lexical_parse(const QString & segment) { // 获取匹配词法分析 QList result; QList> match_results; int lex_index = -1; for (auto &lex : lexical_seq) { lex_index++; QRegExp exp(lex.RegExpression); auto match_index = exp.indexIn(segment); if (match_index != -1) match_results.append(std::make_tuple(match_index, lex_index)); } // 没有匹配结果,返回未定义 if (!match_results.size()) { LexResult rst; rst.index_at_segment = 0; rst.Token = this->unknown_token; rst.Text = segment; result << rst; return result; } // 获取“匹配索引”,“词法优先级”获取最佳匹配结果,最小 std::tuple min_elm = std::make_tuple(INT32_MAX, INT32_MAX); for (auto item : match_results) { if (std::get<0>(item) < std::get<0>(min_elm)) min_elm = item; else if (std::get<0>(item) == std::get<0>(min_elm) && std::get<1>(item) < std::get<1>(min_elm)) min_elm = item; } // 发现无效匹配局部,标记前部为未知 if (std::get<0>(min_elm) != 0) { LexResult rst; rst.index_at_segment = 0; rst.Token = this->unknown_token; rst.Text = segment.mid(0, std::get<0>(min_elm)); result << rst; } // 重新匹配,获取完全匹配信息 auto lex_unit = lexical_seq[std::get<1>(min_elm)]; QRegExp exp(lex_unit.RegExpression); auto match_start = exp.indexIn(segment); auto match_len = exp.matchedLength(); // 获取匹配词法分析结果 LexResult rst; rst.Token = lex_unit.TokenType; rst.Text = segment.mid(match_start, match_len); rst.index_at_segment = match_start; result << rst; // 迭代匹配剩余字符串 auto last = segment.mid(match_start + match_len); if(last.length()){ auto xrst = lexical_parse(last); for (auto &t : xrst) t.index_at_segment += match_start + match_len; result.append(xrst); } // 返回结果 return result; } XChar::XChar(QChar c, int row, int col) : value_store(c), row_index(row), col_index(col) {} QChar XChar::value() const { return value_store; } int XChar::row() const { return row_index; } int XChar::col() const { return col_index; }