WsParser_VS/libSyntax/libsyntax.cpp

441 lines
13 KiB
C++
Raw Normal View History

2024-03-17 07:58:28 +00:00
#include "libsyntax.h"
#include "ast_basic.h"
2025-02-02 12:54:32 +00:00
#include <tuple>
#include <QDebug>
2024-03-17 07:58:28 +00:00
using namespace lib_syntax;
using namespace std;
using namespace lib_token;
2025-02-02 12:54:32 +00:00
using namespace lib_words;
2024-03-17 07:58:28 +00:00
using namespace ast_basic;
2025-02-17 05:59:25 +00:00
auto content_extractm = [](std::shared_ptr<const lib_token::IActionToken> token) {
QString content;
while (token) {
if (token->defines())
content.prepend(token->content() + " ");
token = token->prevToken();
}
return content;
};
__anyone_impl::__anyone_impl(const QList<std::shared_ptr<const IBasicRule>> mbrs) : mbrs_store(mbrs) {
}
2024-03-17 07:58:28 +00:00
2025-02-14 01:26:42 +00:00
QList<std::shared_ptr<const IBasicRule>> __anyone_impl::children() const {
2025-02-07 15:26:20 +00:00
return mbrs_store;
2025-02-04 14:26:34 +00:00
}
2024-03-17 07:58:28 +00:00
2025-02-15 04:57:53 +00:00
QList<std::shared_ptr<const MatchCursor>> __anyone_impl::parse(std::shared_ptr<const MatchCursor> cursor) const {
2025-02-24 07:02:23 +00:00
if (cursor->parseFailure() || cursor->parseComplete())
2025-02-08 05:40:36 +00:00
return QList<std::shared_ptr<const MatchCursor>>() << cursor;
2025-02-14 07:33:23 +00:00
auto syntax = present();
2025-02-08 05:40:36 +00:00
QList<std::shared_ptr<const MatchCursor>> result_list;
for (auto rx : this->children())
2025-02-15 04:57:53 +00:00
result_list.append(rx->parse(cursor));
2024-03-17 07:58:28 +00:00
2025-02-17 05:59:25 +00:00
// 完全匹配分支,必须有匹配进展
2025-02-14 01:26:42 +00:00
decltype(result_list) completely_list;
std::copy_if(result_list.begin(), result_list.end(), std::back_inserter(completely_list),
2025-02-15 04:57:53 +00:00
[&](std::shared_ptr<const MatchCursor> ins) {
2025-02-17 05:59:25 +00:00
return (cursor->totalErrorCount() == ins->totalErrorCount() && ins->operator>(*cursor))
|| ins->parseComplete();
2025-02-15 04:57:53 +00:00
});
2025-02-14 01:26:42 +00:00
if (completely_list.size())
2025-02-17 05:59:25 +00:00
return completely_list.mid(0,1);
2025-02-14 01:26:42 +00:00
2025-02-17 05:59:25 +00:00
// 经过修正的分支,必须有变化
2025-02-14 01:26:42 +00:00
decltype(result_list) modify_list;
2025-02-17 05:59:25 +00:00
std::copy_if(result_list.begin(), result_list.end(),
std::back_inserter(modify_list),
[&](std::shared_ptr<const MatchCursor> ins) {
return !ins->parseFailure() && ins->totalErrorCount() > cursor->totalErrorCount();
});
2025-02-14 01:26:42 +00:00
if (modify_list.size())
return modify_list;
2025-02-17 05:59:25 +00:00
// 匹配失败的分支
decltype(result_list) errors_list;
std::copy_if(result_list.begin(), result_list.end(),
std::back_inserter(errors_list), [](std::shared_ptr<const MatchCursor> it) { return it->parseFailure(); });
return errors_list;
2024-03-17 07:58:28 +00:00
}
2025-02-14 01:26:42 +00:00
QString __anyone_impl::present() const {
2025-02-07 15:26:20 +00:00
QString members_content;
for (auto& it : children()) {
2025-02-11 14:32:10 +00:00
members_content += it->present() + "|";
2025-02-07 15:26:20 +00:00
}
return members_content.mid(0, members_content.size() - 1);
2024-03-17 07:58:28 +00:00
}
2025-02-17 05:59:25 +00:00
__sequence_impl::__sequence_impl(const QList<std::shared_ptr<const IBasicRule>> mbrs)
: mbrs_store(mbrs) {
}
2024-03-17 07:58:28 +00:00
2025-02-14 01:26:42 +00:00
QList<std::shared_ptr<const IBasicRule>> __sequence_impl::children() const {
2025-02-04 14:26:34 +00:00
return mbrs_store;
}
2024-03-17 07:58:28 +00:00
2025-02-15 04:57:53 +00:00
QList<std::shared_ptr<const MatchCursor>> __sequence_impl::parse(std::shared_ptr<const MatchCursor> cursor) const {
2025-02-24 07:02:23 +00:00
if (cursor->parseFailure() || cursor->parseComplete())
2025-02-08 05:40:36 +00:00
return QList<std::shared_ptr<const MatchCursor>>() << cursor;
2025-02-07 15:26:20 +00:00
QList<std::shared_ptr<const MatchCursor>> bridge_list{ cursor };
for (auto rule : this->children()) {
QList<std::shared_ptr<const MatchCursor>> current_result;
2025-02-14 07:33:23 +00:00
for (auto vcurs : bridge_list) {
2025-02-15 04:57:53 +00:00
if (!vcurs->parseFailure()) {
current_result.append(rule->parse(vcurs));
2025-02-14 07:33:23 +00:00
}
}
// 完全匹配的分支
decltype(current_result) temprary_list;
std::copy_if(current_result.begin(), current_result.end(),
std::back_inserter(temprary_list), [&](std::shared_ptr<const MatchCursor> ins) {
2025-02-15 04:57:53 +00:00
return cursor->totalErrorCount() == ins->totalErrorCount() || ins->parseComplete();
2025-02-14 01:26:42 +00:00
});
2025-02-14 07:33:23 +00:00
if (temprary_list.size()) {
2025-02-17 05:59:25 +00:00
bridge_list = temprary_list.mid(0,1);
2025-02-14 07:33:23 +00:00
continue;
}
2025-02-07 15:26:20 +00:00
2024-03-17 07:58:28 +00:00
2025-02-14 07:33:23 +00:00
// 经过修复的分支
std::copy_if(current_result.begin(), current_result.end(), std::back_inserter(temprary_list),
2025-02-15 04:57:53 +00:00
[&](std::shared_ptr<const MatchCursor> ins) { return !ins->parseFailure(); });
2025-02-14 07:33:23 +00:00
if (temprary_list.size()) {
bridge_list = temprary_list;
continue;
}
2025-02-14 01:26:42 +00:00
2025-02-14 07:33:23 +00:00
bridge_list = current_result;
break;
}
2025-02-17 05:59:25 +00:00
decltype(bridge_list) temprary_list;
// 匹配代码有进展或者匹配成功
std::copy_if(bridge_list.begin(), bridge_list.end(),
std::back_inserter(temprary_list), [&](std::shared_ptr<const MatchCursor> ins) {
return ins->operator>(*cursor) || ins->parseComplete();
});
if (temprary_list.size())
return temprary_list;
std::copy_if(bridge_list.begin(), bridge_list.end(),
std::back_inserter(temprary_list), [](std::shared_ptr<const MatchCursor> ins) { return ins->parseFailure(); });
return temprary_list;
2024-03-17 07:58:28 +00:00
}
2025-02-14 01:26:42 +00:00
QString __sequence_impl::present() const {
2024-03-17 07:58:28 +00:00
QString content;
for (auto& it : children())
2025-02-04 14:26:34 +00:00
content += it->present() + " ";
2024-07-25 03:54:40 +00:00
return content.mid(0, content.size() - 1);
2024-03-17 07:58:28 +00:00
}
2025-02-14 01:26:42 +00:00
__repeat_impl::__repeat_impl(std::shared_ptr<const IBasicRule> rule, int min, int max)
2025-02-17 05:59:25 +00:00
: rule_peer(rule), min_match(min), max_match(max) {
}
2024-03-17 07:58:28 +00:00
2025-02-14 01:26:42 +00:00
QList<std::shared_ptr<const IBasicRule>> __repeat_impl::children() const {
2025-02-07 15:26:20 +00:00
return QList<std::shared_ptr<const IBasicRule>>() << rule_peer;
2025-02-04 14:26:34 +00:00
}
2024-03-17 07:58:28 +00:00
2025-02-14 01:26:42 +00:00
#include <algorithm>
2025-02-15 04:57:53 +00:00
QList<std::shared_ptr<const MatchCursor>> __repeat_impl::parse(std::shared_ptr<const MatchCursor> cursor) const {
2025-02-24 07:02:23 +00:00
if (cursor->parseFailure() || cursor->parseComplete())
2025-02-08 05:40:36 +00:00
return QList<std::shared_ptr<const MatchCursor>>() << cursor;
2025-02-07 15:26:20 +00:00
2025-02-14 07:33:23 +00:00
auto syntax = present();
2025-02-14 01:26:42 +00:00
QList<std::shared_ptr<const MatchCursor>> max_match_begin = { cursor };
if (min_match) {
QList<std::shared_ptr<const IBasicRule>> temp_rules;
for (auto idx = 0; idx < min_match; ++idx)
temp_rules << this->rule_peer;
2025-02-07 15:26:20 +00:00
2025-02-14 01:26:42 +00:00
auto seqs_rule = std::make_shared<__sequence_impl>(temp_rules);
2025-02-15 04:57:53 +00:00
max_match_begin = seqs_rule->parse(cursor);
2024-06-18 17:09:45 +00:00
}
2025-02-14 01:26:42 +00:00
// 如果不满足最小重复匹配次数要求,则返回
int continue_count = std::count_if(max_match_begin.begin(), max_match_begin.end(),
2025-02-15 04:57:53 +00:00
[](std::shared_ptr<const MatchCursor > ins) { return !ins->parseFailure(); });
2025-02-14 01:26:42 +00:00
if (!continue_count)
return max_match_begin;
// 最小匹配次数中所有错误分支都是无用的、需要舍弃
for (auto idx = 0; idx < max_match_begin.size(); ++idx) {
auto current_cursor = max_match_begin.at(idx);
2025-02-15 04:57:53 +00:00
if (current_cursor->parseFailure())
2025-02-14 01:26:42 +00:00
max_match_begin.removeAt(idx--);
}
2025-02-07 15:26:20 +00:00
2025-02-14 07:33:23 +00:00
QList<std::shared_ptr<const MatchCursor>> results;
2025-02-14 01:26:42 +00:00
decltype(results) bridge_list = max_match_begin;
2025-02-11 14:32:10 +00:00
// 尝试重复匹配最大次数
2025-02-14 07:33:23 +00:00
for (auto idx = min_match; idx < max_match; ++idx) {
2025-02-07 15:26:20 +00:00
QList<std::shared_ptr<const MatchCursor>> current_list;
2025-02-11 14:32:10 +00:00
// 匹配迭代一次
2025-02-14 07:33:23 +00:00
for (auto ins : bridge_list)
2025-02-15 04:57:53 +00:00
current_list.append(this->rule_peer->parse(ins));
2025-02-14 17:48:14 +00:00
QList<QString> contents;
for (auto bx : current_list)
2025-02-17 05:59:25 +00:00
contents << content_extractm(bx->token()) + QStringList(bx->totalErrors()).join(',');
2025-02-14 07:33:23 +00:00
// 提取完全匹配的分支
QList<std::shared_ptr<const MatchCursor>> temprary_branchs;
std::copy_if(current_list.begin(), current_list.end(),
std::back_inserter(temprary_branchs), [&](std::shared_ptr<const MatchCursor> ins) {
2025-02-17 05:59:25 +00:00
return (cursor->totalErrorCount() == ins->totalErrorCount() && (*ins) > (*cursor)) || ins->parseComplete();
2025-02-14 01:26:42 +00:00
});
2025-02-14 07:33:23 +00:00
if (temprary_branchs.size()) {
2025-02-17 05:59:25 +00:00
bridge_list = temprary_branchs.mid(0, 1);
2025-02-14 07:33:23 +00:00
continue;
2025-02-07 15:26:20 +00:00
}
2025-02-14 07:33:23 +00:00
// 提取语法修正分支
std::copy_if(current_list.begin(), current_list.end(),
std::back_inserter(temprary_branchs), [&](std::shared_ptr<const MatchCursor> ins) {
2025-02-17 05:59:25 +00:00
return !ins->parseFailure() && (*ins) > (*cursor);
2025-02-14 07:33:23 +00:00
});
if (temprary_branchs.size()) {
bridge_list = temprary_branchs;
continue;
}
break;
2024-03-17 07:58:28 +00:00
}
2025-02-14 01:26:42 +00:00
2025-02-14 07:33:23 +00:00
results.append(bridge_list);
2025-02-14 01:26:42 +00:00
std::sort(results.begin(), results.end(),
[](std::shared_ptr<const MatchCursor> a, std::shared_ptr<const MatchCursor> b) {
2025-02-17 05:59:25 +00:00
return a->operator>(*b);
2025-02-14 01:26:42 +00:00
});
// 提取完全匹配的分支
decltype(results) rets_completely;
2025-02-14 07:33:23 +00:00
for (auto ins : results) {
2025-02-17 05:59:25 +00:00
if (ins->totalErrorCount() == cursor->totalErrorCount() && !rets_completely.size()) {
rets_completely.append(ins);
break;
2025-02-14 07:33:23 +00:00
}
2025-02-17 05:59:25 +00:00
else if (ins->parseComplete())
2025-02-15 04:57:53 +00:00
rets_completely.append(ins);
2025-02-14 07:33:23 +00:00
}
2025-02-14 01:26:42 +00:00
// 提取经过修正的分支
decltype(results) rets_modified;
2025-02-14 07:33:23 +00:00
for (auto ins : results) {
2025-02-15 04:57:53 +00:00
if (!ins->parseFailure()) {
2025-02-14 07:33:23 +00:00
if (!rets_modified.size()) {
rets_modified.append(ins);
2025-02-14 01:26:42 +00:00
}
2025-02-15 04:57:53 +00:00
else if (rets_modified.last()->token()->position() == ins->token()->position()) {
2025-02-14 07:33:23 +00:00
rets_modified.append(ins);
}
}
}
2025-02-14 01:26:42 +00:00
// 允许持续的集合
2025-02-15 04:57:53 +00:00
for (auto rst : rets_modified)
if (!rets_completely.contains(rst))
rets_completely.append(rst);
2025-02-14 01:26:42 +00:00
if (rets_completely.size())
return rets_completely;
2025-02-07 15:26:20 +00:00
return results;
2024-03-17 07:58:28 +00:00
}
2025-02-14 01:26:42 +00:00
QString __repeat_impl::present() const {
2025-02-07 15:26:20 +00:00
if (min_match == 0 && max_match == INT_MAX)
2025-02-11 14:32:10 +00:00
return "(" + this->rule_peer->present() + QString(")*");
2025-02-07 15:26:20 +00:00
else if (min_match == 1 && max_match == INT_MAX)
2025-02-11 14:32:10 +00:00
return "(" + this->rule_peer->present() + QString(")+");
2025-02-07 15:26:20 +00:00
else if (min_match == 0 && max_match == 1)
2025-02-11 14:32:10 +00:00
return "(" + this->rule_peer->present() + QString(")?");
2025-02-07 15:26:20 +00:00
2025-02-11 14:32:10 +00:00
return "(" + this->rule_peer->present() + QString("){%1, %2}").arg(min_match).arg(max_match);
2025-02-07 15:26:20 +00:00
}
2025-02-04 14:26:34 +00:00
SyntaxException::SyntaxException(const QString& message) {
this->msg_store = message;
}
2025-02-04 14:26:34 +00:00
QString SyntaxException::message() const {
return msg_store;
}
2025-02-07 15:26:20 +00:00
ExprRule::ExprRule(const QString& rule_name, int expr_mark)
2025-02-17 05:59:25 +00:00
: name_store(rule_name), mark_store(expr_mark) {
}
2024-03-17 07:58:28 +00:00
2025-02-04 14:26:34 +00:00
QString ExprRule::name() const {
return name_store;
}
2024-03-17 07:58:28 +00:00
2025-02-04 14:26:34 +00:00
int ExprRule::typeMark() const {
return this->mark_store;
}
2024-03-17 07:58:28 +00:00
2024-07-12 09:35:35 +00:00
#include <ast_novel.h>
2025-02-17 05:59:25 +00:00
MatchCursor::MatchCursor(const QString& path) :_file_path(path) {
}
2025-02-04 14:26:34 +00:00
2025-02-08 05:40:36 +00:00
MatchCursor::MatchCursor(std::shared_ptr<const MatchCursor> other_ptr)
2025-02-07 15:26:20 +00:00
: _prev_cursor(other_ptr),
2025-02-08 05:40:36 +00:00
_file_path(other_ptr->_file_path),
2025-02-07 15:26:20 +00:00
_total_errors(other_ptr->_total_errors),
_current_token(other_ptr->_current_token),
2025-02-14 07:33:23 +00:00
_remains_word(other_ptr->_remains_word) {
for (auto err_pack : other_ptr->_exprs_errors) {
_exprs_errors << std::make_shared<ErrsPack>(*err_pack);
}
}
2025-02-04 14:26:34 +00:00
2025-02-17 05:59:25 +00:00
bool lib_syntax::MatchCursor::operator>(const MatchCursor& other) const {
return _current_token->position() > other._current_token->position() ||
(_current_token->position() == other._current_token->position() && _total_errors.size() > other._total_errors.size());
}
2025-02-08 05:40:36 +00:00
std::shared_ptr<const MatchCursor> MatchCursor::previous() const {
2025-02-07 15:26:20 +00:00
return _prev_cursor;
2025-02-04 14:26:34 +00:00
}
2025-02-08 05:40:36 +00:00
QString MatchCursor::filePath() const {
return _file_path;
}
2025-02-14 17:48:14 +00:00
QString lib_syntax::MatchCursor::parseSyntax() const {
if (!this->previous())
return QString();
2025-02-15 04:57:53 +00:00
QString token_splitx;
switch (this->token()->tokenType()) {
case lib_token::IActionToken::Type::ElementBegin:
token_splitx = "<B>";
break;
case lib_token::IActionToken::Type::TokenBind:
token_splitx = this->token()->defines()->regex();
break;
case lib_token::IActionToken::Type::ElementEnd:
token_splitx = "<E>";
break;
}
return this->previous()->parseSyntax() + " " + token_splitx;
2025-02-14 17:48:14 +00:00
}
2025-02-07 15:26:20 +00:00
void MatchCursor::enterExprs() {
auto new_expr = std::make_shared<ErrsPack>();
this->_exprs_errors.push_back(new_expr);
2025-02-04 14:26:34 +00:00
}
2025-02-24 07:02:23 +00:00
void lib_syntax::MatchCursor::logExprsError(std::shared_ptr<const lib_words::IPrimitiveWord> t, const QString& msg) {
if(!this->_total_errors.contains(t->position()))
this->_total_errors[t->position()] = QStringList();
auto exists = this->_total_errors[t->position()];
exists.append(msg);
this->_total_errors[t->position()] = exists;
this->_exprs_errors.last()->addError(t, msg);
2025-02-14 17:48:14 +00:00
// 普适性质的判定标准
2025-02-15 04:57:53 +00:00
this->setFailure(this->exprsErrorCount() > 1);
2025-02-04 14:26:34 +00:00
}
2025-02-07 15:26:20 +00:00
void MatchCursor::quitExprs() {
this->_exprs_errors.pop_back();
2025-02-04 14:26:34 +00:00
}
2025-02-15 04:57:53 +00:00
bool lib_syntax::MatchCursor::parseFailure() const {
return this->_parse_stop_with_errors;
}
void lib_syntax::MatchCursor::setFailure(bool mark) {
this->_parse_stop_with_errors = mark;
}
bool lib_syntax::MatchCursor::parseComplete() const {
return this->_parse_complete;
2025-02-14 17:48:14 +00:00
}
2025-02-15 04:57:53 +00:00
void lib_syntax::MatchCursor::setComplete(bool mark) {
this->_parse_complete = mark;
2025-02-04 14:26:34 +00:00
}
2025-02-07 15:26:20 +00:00
int MatchCursor::exprsErrorCount() const {
2025-02-08 05:40:36 +00:00
if (this->_exprs_errors.size())
return this->_exprs_errors.last()->errorCount();
return 0;
2025-02-04 14:26:34 +00:00
}
2025-02-07 15:26:20 +00:00
int MatchCursor::totalErrorCount() const {
return this->_total_errors.size();
2025-02-04 14:26:34 +00:00
}
2025-02-07 15:26:20 +00:00
QList<QString> MatchCursor::totalErrors() const {
2025-02-24 07:02:23 +00:00
QStringList flist;
for(auto set : this->_total_errors)
flist.append(set);
return flist;
}
void lib_syntax::MatchCursor::mergeWith(const MatchCursor& other) {
for(auto key : other._total_errors.keys()){
if (!this->_total_errors.contains(key))
this->_total_errors[key] = QStringList();
auto values = other._total_errors[key];
auto this_values = this->_total_errors[key];
this_values.append(values);
this->_total_errors[key] = this_values;
}
for (auto key : this->_total_errors.keys()) {
auto values = this->_total_errors[key];
values = values.toSet().toList();
this->_total_errors[key] = values;
}
2025-02-04 14:26:34 +00:00
}
2025-02-07 15:26:20 +00:00
void MatchCursor::setCurrent(std::shared_ptr<const IActionToken> t, std::shared_ptr<const IPrimitiveWord> remains) {
2025-02-04 14:26:34 +00:00
this->_current_token = t;
this->_remains_word = remains;
}
2025-02-15 04:57:53 +00:00
std::shared_ptr<const IActionToken> MatchCursor::token() const {
2025-02-04 14:26:34 +00:00
return this->_current_token;
}
2025-02-15 04:57:53 +00:00
std::shared_ptr<const IPrimitiveWord> MatchCursor::words() const {
2025-02-04 14:26:34 +00:00
return this->_remains_word;
2024-06-18 17:09:45 +00:00
}
2025-02-07 15:26:20 +00:00
2025-02-17 05:59:25 +00:00
lib_syntax::MatchCursor::ErrsPack::ErrsPack() {
}
2025-02-14 07:33:23 +00:00
lib_syntax::MatchCursor::ErrsPack::ErrsPack(const ErrsPack& other)
2025-02-17 05:59:25 +00:00
: _error_collection(other._error_collection) {
}
2025-02-14 07:33:23 +00:00
2025-02-24 07:02:23 +00:00
void lib_syntax::MatchCursor::ErrsPack::addError(std::shared_ptr<const lib_words::IPrimitiveWord> t, const QString& msg) {
this->_error_collection << std::make_pair(t, msg);
2025-02-07 15:26:20 +00:00
}
2025-02-24 07:02:23 +00:00
QList<std::pair<std::shared_ptr<const lib_words::IPrimitiveWord>, QString>> MatchCursor::ErrsPack::errors() const {
2025-02-07 15:26:20 +00:00
return _error_collection;
}
uint64_t MatchCursor::ErrsPack::errorCount() const {
return _error_collection.size();
}