QtNovelUI/DesParser/LexFoundation.cpp

135 lines
3.1 KiB
C++

#include "LexFoundation.h"
#include <tuple>
using namespace Lex;
LexFoundation::LexFoundation(QList<LexDef> seqence, const QString UnknownToken)
: unknown_token(UnknownToken), lexical_seq(seqence)
{
empty_seq << '\t' << '\b' << ' ' << '\r' << EOF;
}
typedef int lexunit_index;
typedef int match_start;
QList<LexResult> LexFoundation::push(int row, int col, const QChar w)
{
QList<LexResult> result;
QString remains = "";
if (!empty_seq.contains(w)) {
code_acc << XChar(w, row, col);
if (w != '\n')
return result;
}
else {
if (!code_acc.size())
return result;
}
for (auto c : code_acc)
remains += c.value();
auto mid_result = lexical_parse(remains);
for (auto &r : mid_result) {
auto char_start = code_acc[r.index_at_segment];
r.StartRow = char_start.row();
r.StartCol = char_start.col();
auto char_end = code_acc[r.index_at_segment + r.Text.length() - 1];
r.EndRow = char_end.row();
r.EndCol = char_end.col();
}
code_acc.clear();
return mid_result;
}
QList<LexResult> LexFoundation::lexical_parse(const QString & segment)
{
// 获取匹配词法分析
QList<LexResult> result;
QList<std::tuple<match_start, lexunit_index>> match_results;
int lex_index = -1;
for (auto lex : lexical_seq) {
lex_index++;
QRegExp exp(lex.RegExpression);
auto match_index = exp.indexIn(segment);
if (match_index != -1)
match_results.append(std::make_tuple(match_index, lex_index));
}
// 没有匹配结果,返回未定义
if (!match_results.size())
{
LexResult rst;
rst.index_at_segment = 0;
rst.Token = this->unknown_token;
rst.Text = segment;
result << rst;
return result;
}
// 获取“匹配索引”,“词法优先级”获取最佳匹配结果,最小
std::tuple<match_start, lexunit_index> min_elm = std::make_tuple(INT32_MAX, INT32_MAX);
for (auto item : match_results) {
if (std::get<0>(item) < std::get<0>(min_elm))
min_elm = item;
else if (std::get<0>(item) == std::get<0>(min_elm) &&
std::get<1>(item) < std::get<1>(min_elm))
min_elm = item;
}
// 发现无效匹配局部,标记前部为未知
if (std::get<0>(min_elm) != 0) {
LexResult rst;
rst.index_at_segment = 0;
rst.Token = this->unknown_token;
rst.Text = segment.mid(0, std::get<0>(min_elm));
result << rst;
}
// 重新匹配,获取完全匹配信息
auto lex_unit = lexical_seq[std::get<1>(min_elm)];
QRegExp exp(lex_unit.RegExpression);
auto match_start = exp.indexIn(segment);
auto match_len = exp.matchedLength();
// 获取匹配词法分析结果
LexResult rst;
rst.Token = lex_unit.TokenType;
rst.Text = segment.mid(match_start, match_len);
rst.index_at_segment = match_start;
result << rst;
// 迭代匹配剩余字符串
auto last = segment.mid(match_start + match_len);
if(last.length()){
auto xrst = lexical_parse(last);
for (auto &t : xrst)
t.index_at_segment += match_start;
result.append(xrst);
}
// 返回结果
return result;
}
XChar::XChar(QChar c, int row, int col)
: value_store(c), row_index(row), col_index(col) {}
QChar XChar::value() const
{
return value_store;
}
int XChar::row() const
{
return row_index;
}
int XChar::col() const
{
return col_index;
}