135 lines
3.1 KiB
C++
135 lines
3.1 KiB
C++
#include "LexFoundation.h"
|
|
#include <tuple>
|
|
|
|
using namespace Lex;
|
|
|
|
LexFoundation::LexFoundation(QList<LexDef> seqence, const QString UnknownToken)
|
|
: unknown_token(UnknownToken), lexical_seq(seqence)
|
|
{
|
|
empty_seq << '\t' << '\b' << ' ' << '\r' << EOF;
|
|
}
|
|
|
|
typedef int lexunit_index;
|
|
typedef int match_start;
|
|
|
|
QList<LexResult> LexFoundation::push(int row, int col, const QChar w)
|
|
{
|
|
QList<LexResult> result;
|
|
|
|
QString remains = "";
|
|
if (!empty_seq.contains(w)) {
|
|
code_acc << XChar(w, row, col);
|
|
if (w != '\n')
|
|
return result;
|
|
}
|
|
else {
|
|
if (!code_acc.size())
|
|
return result;
|
|
}
|
|
|
|
for (auto c : code_acc)
|
|
remains += c.value();
|
|
|
|
auto mid_result = lexical_parse(remains);
|
|
for (auto &r : mid_result) {
|
|
auto char_start = code_acc[r.index_at_segment];
|
|
r.StartRow = char_start.row();
|
|
r.StartCol = char_start.col();
|
|
auto char_end = code_acc[r.index_at_segment + r.Text.length() - 1];
|
|
r.EndRow = char_end.row();
|
|
r.EndCol = char_end.col();
|
|
}
|
|
|
|
code_acc.clear();
|
|
return mid_result;
|
|
}
|
|
|
|
QList<LexResult> LexFoundation::lexical_parse(const QString & segment)
|
|
{
|
|
// 获取匹配词法分析
|
|
QList<LexResult> result;
|
|
QList<std::tuple<match_start, lexunit_index>> match_results;
|
|
int lex_index = -1;
|
|
for (auto lex : lexical_seq) {
|
|
lex_index++;
|
|
QRegExp exp(lex.RegExpression);
|
|
auto match_index = exp.indexIn(segment);
|
|
if (match_index != -1)
|
|
match_results.append(std::make_tuple(match_index, lex_index));
|
|
}
|
|
|
|
// 没有匹配结果,返回未定义
|
|
if (!match_results.size())
|
|
{
|
|
LexResult rst;
|
|
rst.index_at_segment = 0;
|
|
rst.Token = this->unknown_token;
|
|
rst.Text = segment;
|
|
result << rst;
|
|
return result;
|
|
}
|
|
|
|
// 获取“匹配索引”,“词法优先级”获取最佳匹配结果,最小
|
|
std::tuple<match_start, lexunit_index> min_elm = std::make_tuple(INT32_MAX, INT32_MAX);
|
|
for (auto item : match_results) {
|
|
if (std::get<0>(item) < std::get<0>(min_elm))
|
|
min_elm = item;
|
|
else if (std::get<0>(item) == std::get<0>(min_elm) &&
|
|
std::get<1>(item) < std::get<1>(min_elm))
|
|
min_elm = item;
|
|
}
|
|
|
|
// 发现无效匹配局部,标记前部为未知
|
|
if (std::get<0>(min_elm) != 0) {
|
|
LexResult rst;
|
|
rst.index_at_segment = 0;
|
|
rst.Token = this->unknown_token;
|
|
rst.Text = segment.mid(0, std::get<0>(min_elm));
|
|
result << rst;
|
|
}
|
|
|
|
// 重新匹配,获取完全匹配信息
|
|
auto lex_unit = lexical_seq[std::get<1>(min_elm)];
|
|
QRegExp exp(lex_unit.RegExpression);
|
|
auto match_start = exp.indexIn(segment);
|
|
auto match_len = exp.matchedLength();
|
|
|
|
// 获取匹配词法分析结果
|
|
LexResult rst;
|
|
rst.Token = lex_unit.TokenType;
|
|
rst.Text = segment.mid(match_start, match_len);
|
|
rst.index_at_segment = match_start;
|
|
result << rst;
|
|
|
|
// 迭代匹配剩余字符串
|
|
auto last = segment.mid(match_start + match_len);
|
|
if(last.length()){
|
|
auto xrst = lexical_parse(last);
|
|
for (auto &t : xrst)
|
|
t.index_at_segment += match_start;
|
|
result.append(xrst);
|
|
}
|
|
|
|
|
|
// 返回结果
|
|
return result;
|
|
}
|
|
|
|
XChar::XChar(QChar c, int row, int col)
|
|
: value_store(c), row_index(row), col_index(col) {}
|
|
|
|
QChar XChar::value() const
|
|
{
|
|
return value_store;
|
|
}
|
|
|
|
int XChar::row() const
|
|
{
|
|
return row_index;
|
|
}
|
|
|
|
int XChar::col() const
|
|
{
|
|
return col_index;
|
|
}
|