diff --git a/libSyntax/libsyntax.cpp b/libSyntax/libsyntax.cpp index dcf1ac8..a47b5d9 100644 --- a/libSyntax/libsyntax.cpp +++ b/libSyntax/libsyntax.cpp @@ -9,13 +9,13 @@ using namespace lib_token; using namespace lib_words; using namespace ast_basic; -Any::Any(const QList> mbrs) : mbrs_store(mbrs) { } +__anyone_impl::__anyone_impl(const QList> mbrs) : mbrs_store(mbrs) { } -QList> Any::children() const { +QList> __anyone_impl::children() const { return mbrs_store; } -QList> Any::parse(std::shared_ptr cursor) const { +QList> __anyone_impl::parse(std::shared_ptr cursor) const { if (cursor->mustStop()) return QList>() << cursor; @@ -23,10 +23,24 @@ QList> Any::parse(std::shared_ptrchildren()) result_list.append(rx->parse(cursor)); + // 完全匹配分支 + decltype(result_list) completely_list; + std::copy_if(result_list.begin(), result_list.end(), std::back_inserter(completely_list), + [&](std::shared_ptr ins) { return cursor->totalErrorCount() == ins->totalErrorCount(); }); + if (completely_list.size()) + return completely_list; + + // 经过修正的分支 + decltype(result_list) modify_list; + std::copy_if(result_list.begin(), result_list.end(), std::back_inserter(modify_list), + [&](std::shared_ptr ins) { return !ins->mustStop(); }); + if (modify_list.size()) + return modify_list; + return result_list; } -QString Any::present() const { +QString __anyone_impl::present() const { QString members_content; for (auto& it : children()) { members_content += it->present() + "|"; @@ -34,13 +48,13 @@ QString Any::present() const { return members_content.mid(0, members_content.size() - 1); } -Seqs::Seqs(const QList> mbrs) : mbrs_store(mbrs) { } +__sequence_impl::__sequence_impl(const QList> mbrs) : mbrs_store(mbrs) { } -QList> Seqs::children() const { +QList> __sequence_impl::children() const { return mbrs_store; } -QList> Seqs::parse(std::shared_ptr cursor) const { +QList> __sequence_impl::parse(std::shared_ptr cursor) const { if (cursor->mustStop()) return QList>() << cursor; @@ -50,105 +64,144 @@ QList> Seqs::parse(std::shared_ptr> current_result; std::for_each(bridge_list.begin(), bridge_list.end(), [&](std::shared_ptr vcurs) { - if (vcurs->mustStop()) - results.push_back(vcurs); - else { - current_result.append(rule->parse(vcurs)); - } - }); + if (!vcurs->mustStop()) { + current_result.append(rule->parse(vcurs)); + } + else { + results.push_back(vcurs); + } + }); bridge_list = current_result; } + // 完全匹配分支集合 + decltype(bridge_list) completely_list; + std::copy_if(bridge_list.begin(), bridge_list.end(), std::back_inserter(completely_list), + [&](std::shared_ptr ins) { return ins->totalErrorCount() == cursor->totalErrorCount(); }); + if (completely_list.size()) + return completely_list; + + // 经过修正的分支 + decltype(bridge_list) modify_list; + std::copy_if(bridge_list.begin(), bridge_list.end(), std::back_inserter(modify_list), + [](std::shared_ptr ins) { return !ins->mustStop(); }); + if (modify_list.size()) + return modify_list; + results.append(bridge_list); return results; } -QString Seqs::present() const { +QString __sequence_impl::present() const { QString content; for (auto& it : children()) content += it->present() + " "; return content.mid(0, content.size() - 1); } -Rept::Rept(std::shared_ptr rule, int min, int max) +__repeat_impl::__repeat_impl(std::shared_ptr rule, int min, int max) : rule_peer(rule), min_match(min), max_match(max) { } -QList> Rept::children() const { +QList> __repeat_impl::children() const { return QList>() << rule_peer; } -QList> Rept::parse(std::shared_ptr cursor) const { +#include +QList> __repeat_impl::parse(std::shared_ptr cursor) const { if (cursor->mustStop()) return QList>() << cursor; - QList> results; - QList> bridge_list{ cursor }; - // 最小重复次数匹配 - for (auto idx = 0; idx < min_match; ++idx) { - QList> current_list; + QList> max_match_begin = { cursor }; + if (min_match) { + QList> temp_rules; + for (auto idx = 0; idx < min_match; ++idx) + temp_rules << this->rule_peer; - // 迭代每一次可能匹配 - std::for_each(bridge_list.begin(), bridge_list.end(), - [&](std::shared_ptr curs) { - if (curs->mustStop()) - results.push_back(curs); - else { - current_list.append(this->rule_peer->parse(curs)); - } - }); - - bridge_list = current_list; + auto seqs_rule = std::make_shared<__sequence_impl>(temp_rules); + max_match_begin = seqs_rule->parse(cursor); } - // 归并失败分支 - std::copy_if(bridge_list.begin(), bridge_list.end(), std::back_inserter(results), - [&](std::shared_ptr ins) { return ins->mustStop(); }); - - // 清除匹配失败分支 - for (auto idx = 0; idx < bridge_list.size(); ++idx) - if (bridge_list.at(idx)->mustStop()) - bridge_list.removeAt(idx--); - - // 不满足最小匹配 - if (!bridge_list.size()) - return results; + // 如果不满足最小重复匹配次数要求,则返回 + int continue_count = std::count_if(max_match_begin.begin(), max_match_begin.end(), + [](std::shared_ptr ins) { return !ins->mustStop(); }); + if (!continue_count) + return max_match_begin; + // 最小匹配次数中所有错误分支都是无用的、需要舍弃 + for (auto idx = 0; idx < max_match_begin.size(); ++idx) { + auto current_cursor = max_match_begin.at(idx); + if (current_cursor->mustStop()) + max_match_begin.removeAt(idx--); + } + QList> results = max_match_begin; + decltype(results) bridge_list = max_match_begin; // 尝试重复匹配最大次数 - for (auto idx = min_match; idx < max_match; ++idx) { + for (auto idx = min_match; idx < max_match && bridge_list.size(); ++idx) { QList> current_list; // 匹配迭代一次 std::for_each(bridge_list.begin(), bridge_list.end(), [&](std::shared_ptr ins) { - current_list.append(this->rule_peer->parse(ins)); - }); + current_list.append(this->rule_peer->parse(ins)); + }); - // 移除失败分支 - for (auto idx = 0; idx < current_list.size(); ++idx) { - auto rst_branch = current_list.at(idx); - if (rst_branch->mustStop() && rst_branch->currentWords()) { - results.append(rst_branch->previous()); - current_list.removeAt(idx--); - } - if (rst_branch->mustStop() && !rst_branch->currentWords()) { + for (auto vdx = 0; vdx < current_list.size(); ++vdx) { + auto rst_branch = current_list.at(vdx); + if (rst_branch->mustStop()) { results.append(rst_branch); - current_list.removeAt(idx--); + current_list.removeAt(vdx--); } } - if (!current_list.size()) - break; - bridge_list = current_list; } - results.append(bridge_list); + + + std::sort(results.begin(), results.end(), + [](std::shared_ptr a, std::shared_ptr b) { + return a->currentToken()->position() > b->currentToken()->position(); + }); + + // 提取完全匹配的分支 + decltype(results) rets_completely; + std::for_each(results.begin(), results.end(), + [&](std::shared_ptr ins) { + if (ins->totalErrorCount() == cursor->totalErrorCount()) { + if (!rets_completely.size()) { + rets_completely.append(ins); + } + else if (rets_completely.last()->currentToken()->position() == ins->currentToken()->position()) { + rets_completely.append(ins); + } + } + }); + + // 提取经过修正的分支 + decltype(results) rets_modified; + std::for_each(results.begin(), results.end(), + [&](std::shared_ptr ins) { + if (!ins->mustStop()) { + if (!rets_modified.size()) { + rets_modified.append(ins); + } + else if (rets_modified.last()->currentToken()->position() == ins->currentToken()->position()) { + rets_modified.append(ins); + } + } + }); + + // 允许持续的集合 + rets_completely.append(rets_modified); + if (rets_completely.size()) + return rets_completely; + return results; } -QString Rept::present() const { +QString __repeat_impl::present() const { if (min_match == 0 && max_match == INT_MAX) return "(" + this->rule_peer->present() + QString(")*"); else if (min_match == 1 && max_match == INT_MAX) @@ -212,7 +265,7 @@ void MatchCursor::quitExprs() { } bool MatchCursor::mustStop() const { - return exprsErrorCount() >= 2 || parse_stop(); + return exprsErrorCount() >= 2; } int MatchCursor::exprsErrorCount() const { diff --git a/libSyntax/libsyntax.h b/libSyntax/libsyntax.h index 8316db6..a1f520b 100644 --- a/libSyntax/libsyntax.h +++ b/libSyntax/libsyntax.h @@ -5,6 +5,7 @@ #include "tokens_impl.h" #include #include +#include #include #include #include "ast_basic.h" @@ -106,12 +107,12 @@ namespace lib_syntax { /** * @brief 语法规则或匹配 */ - class LIBSYNTAX_EXPORT Any : public IBasicRule, public std::enable_shared_from_this { + class LIBSYNTAX_EXPORT __anyone_impl : public IBasicRule, public std::enable_shared_from_this<__anyone_impl> { private: QList> mbrs_store; public: - Any(const QList> mbrs); + __anyone_impl(const QList> mbrs); // IBasicRule interface public: @@ -123,12 +124,12 @@ namespace lib_syntax { /** * @brief 语法规则序列匹配 */ - class LIBSYNTAX_EXPORT Seqs : public IBasicRule, public std::enable_shared_from_this { + class LIBSYNTAX_EXPORT __sequence_impl : public IBasicRule, public std::enable_shared_from_this<__sequence_impl> { private: QList> mbrs_store; public: - Seqs(const QList> mbrs); + __sequence_impl(const QList> mbrs); // IBasicRule interface public: @@ -140,13 +141,13 @@ namespace lib_syntax { /** * @brief 语法规则重复匹配 */ - class LIBSYNTAX_EXPORT Rept : public IBasicRule, public std::enable_shared_from_this { + class LIBSYNTAX_EXPORT __repeat_impl : public IBasicRule, public std::enable_shared_from_this<__repeat_impl> { private: std::shared_ptr rule_peer; int min_match, max_match; public: - Rept(std::shared_ptr rule, int min, int max); + __repeat_impl(std::shared_ptr rule, int min, int max); // IBasicRule interface public: @@ -180,12 +181,12 @@ namespace lib_syntax { */ template XProc = nullptr> requires std::derived_from - class TokenMatch : public IBasicRule, public std::enable_shared_from_this> { + class __token_match_impl : public IBasicRule, public std::enable_shared_from_this<__token_match_impl> { private: std::shared_ptr _define_peers; public: - TokenMatch(std::shared_ptr define) : _define_peers(define) { } + __token_match_impl(std::shared_ptr define) : _define_peers(define) { } // IBasicRule interface public: @@ -202,7 +203,9 @@ namespace lib_syntax { clone_ins->logExprsError(QString("Syntax[0x00001]输入流提前结束,<%1>").arg(current->filePath())); return QList>() << clone_ins; } - + if (w_this->content() == "故事介绍段落}") { + int c = w_this->column(); + } auto t_this = current->currentToken(); auto match_result = _define_peers->analysis(w_this); @@ -300,25 +303,45 @@ namespace lib_syntax { // 表达式语法解析 auto nbranch = this->expr_rule_parse(ncursor); + + // 语法完全匹配的分支 decltype(nbranch) branch_procs; - std::for_each(nbranch.begin(), nbranch.end(), [&](std::shared_ptr curs) { - if (curs->mustStop()) { - branch_procs.append(curs); - } - else { - auto t_end = curs->currentToken(); - auto w_end = curs->currentWords(); - - auto ecursor = std::make_shared(curs); - ecursor->quitExprs(); - - auto split_end = std::make_shared>(split_begin, t_end); - ecursor->setCurrent(split_end, w_end); - branch_procs.append(ecursor); - } + std::copy_if(nbranch.begin(), nbranch.end(), + std::back_inserter(branch_procs), + [&](std::shared_ptr ins) { + return ins->totalErrorCount() == cursor->totalErrorCount(); }); - return branch_procs; + // 语法修正后能匹配的分支 + if (!branch_procs.size()) { + std::copy_if(nbranch.begin(), nbranch.end(), + std::back_inserter(branch_procs), + [](std::shared_ptr ins) { return !ins->mustStop(); }); + } + + if (!branch_procs.size()) + branch_procs = nbranch; + + + decltype(nbranch) results_fnl; + std::for_each(branch_procs.begin(), branch_procs.end(), + [&](std::shared_ptr curs) { + if (curs->mustStop()) + results_fnl.append(curs); + else { + auto t_end = curs->currentToken(); + auto w_end = curs->currentWords(); + + auto ecursor = std::make_shared(curs); + ecursor->quitExprs(); + + auto split_end = std::make_shared>(split_begin, t_end); + ecursor->setCurrent(split_end, w_end); + results_fnl.append(ecursor); + } + }); + + return results_fnl; } protected: diff --git a/libSyntax/syntax_novel.cpp b/libSyntax/syntax_novel.cpp index 1a1d1a3..64696a8 100644 --- a/libSyntax/syntax_novel.cpp +++ b/libSyntax/syntax_novel.cpp @@ -49,22 +49,35 @@ void rank_set(std::shared_ptr inst, std::shared_ptrsetRank(token->content().toInt()); } -using TextDeclsSyntaxDef = lib_composit::Multi, TMatch, TMatch, TMatch>>; +auto content_extract = [](std::shared_ptr token) { + QString content; + while (token) { + if (token->defines()) + content.prepend(token->content() + " "); + token = token->prevToken(); + } + return content; + }; + +using TextDeclsSyntaxDef = Any, Match, Match, Match>; class DeclSyntax : public ElementRule { public: DeclSyntax() : ElementRule("decl_section") { } // 通过 ElementRule 继承 QList> expr_rule_parse(std::shared_ptr cursor) const override { - return _children_store->parse(cursor); + auto syntax_text = this->present(); + auto current_rst = content_extract(cursor->currentToken()); + auto rst = _children_store->parse(cursor); + return rst; } }; -using PointSyntaxDef = lib_composit::SeqsR, TMatch, TAct, - Opt, - TMatch>; +using PointSyntaxDef = lib_composit::Seqs, Match, Action, + OptMulti, + Match>; class PointSyntax : public ElementRule { public: PointSyntax() : ElementRule("point_define") { } @@ -77,9 +90,9 @@ public: -using ReferSyntaxDef = lib_composit::SeqsR, TMatch, TMatch, TAct, TMatch, TAct, TMatch, TAct, - Opt, - TMatch>; +using ReferSyntaxDef = lib_composit::Seqs, Match, Match, Action, Match, Action, Match, Action, + OptMulti, + Match>; class ReferSyntax : public ElementRule { public: ReferSyntax() : ElementRule < PointRefers, (int) NovelNode::PointRefers, ReferSyntaxDef>("point_refer") { } @@ -92,9 +105,9 @@ public: -using SliceSyntaxDef = lib_composit::SeqsR, TMatch, TAct, - lib_composit::OptMulti>, - TMatch>; +using SliceSyntaxDef = lib_composit::Seqs, Match, Action, + lib_composit::OptMulti>, + Match>; class SliceSyntax : public ElementRule { public: SliceSyntax() : ElementRule("slice_define") { } @@ -107,9 +120,9 @@ public: -using StorySyntaxDef = lib_composit::SeqsR, TMatch, TAct, - lib_composit::OptMulti>, - TMatch>; +using StorySyntaxDef = lib_composit::Seqs, Match, Action, + lib_composit::OptMulti>, + Match>; class StorySyntax : public ElementRule { public: StorySyntax() : ElementRule("story_define") { } @@ -122,9 +135,9 @@ public: -using ArticleSyntaxDef = lib_composit::SeqsR, TMatch, TAct, - lib_composit::OptMulti>, - TMatch>; +using ArticleSyntaxDef = lib_composit::Seqs, Match, Action, + lib_composit::OptMulti>, + Match>; class ArticleSyntax : public ElementRule { public: ArticleSyntax() : ElementRule("article_define") { } @@ -137,9 +150,9 @@ public: -using VolumeSyntaxDef = lib_composit::SeqsR, TMatch, TAct, - lib_composit::OptMulti>, - TMatch>; +using VolumeSyntaxDef = lib_composit::Seqs, Match, Action, + lib_composit::OptMulti>, + Match>; class VolumeSyntax : public ElementRule { public: VolumeSyntax() : ElementRule("volume_define") { } @@ -152,7 +165,7 @@ public: -using RankSyntaxDef = lib_composit::SeqsR, TMatch, TAct>; +using RankSyntaxDef = lib_composit::Seqs, Match, Action>; class RankSyntax : public ElementRule { public: RankSyntax() : ElementRule("rank_define") { } @@ -165,7 +178,7 @@ public: -using DocSyntaxDef = lib_composit::SeqsR, lib_composit::OptMulti>>; +using DocSyntaxDef = lib_composit::Seqs, lib_composit::OptMulti>>; class DocumentSyntax : public ElementRule { public: DocumentSyntax() : ElementRule("decls-doc") { } diff --git a/libSyntax/syntax_templets.h b/libSyntax/syntax_templets.h index 13d2a71..8fed3d2 100644 --- a/libSyntax/syntax_templets.h +++ b/libSyntax/syntax_templets.h @@ -25,32 +25,32 @@ namespace lib_composit { }; template - class AnyR : public lib_syntax::Any, public __types_list { + class Any : public lib_syntax::__anyone_impl, public __types_list { public: - AnyR() : Any(__types_list::getRules()) { } + Any() : __anyone_impl(__types_list::getRules()) { } }; template - class SeqsR : public lib_syntax::Seqs, public __types_list { + class Seqs : public lib_syntax::__sequence_impl, public __types_list { public: - SeqsR() : Seqs(__types_list::getRules()) { } + Seqs() : __sequence_impl(__types_list::getRules()) { } }; template requires std::derived_from - class ReptR : public lib_syntax::Rept { + class Rept : public lib_syntax::__repeat_impl { public: - ReptR() : Rept(std::make_shared(), min, max) { } + Rept() : __repeat_impl(std::make_shared(), min, max) { } }; - template class OptMulti : public ReptR { }; - template class Multi : public ReptR { }; - template class Opt : public ReptR { }; + template class OptMulti : public Rept { }; + template class Multi : public Rept { }; + template class Opt : public Rept { }; template p = nullptr> requires std::derived_from&& std::derived_from - class TAct : public lib_syntax::TokenMatch { + class Action : public lib_syntax::__token_match_impl { public: - TAct() : TokenMatch(std::make_shared()) { } + Action() : __token_match_impl(std::make_shared()) { } }; template requires std::derived_from @@ -59,5 +59,5 @@ namespace lib_composit { } template requires std::derived_from - class TMatch : public TAct> { }; + class Match : public Action> { }; } diff --git a/libSyntax/tokens_impl.h b/libSyntax/tokens_impl.h index d6f504d..89a035c 100644 --- a/libSyntax/tokens_impl.h +++ b/libSyntax/tokens_impl.h @@ -114,6 +114,8 @@ namespace lib_token { } uint64_t position() const override { + if (!this->prevToken()) + return 0; return this->prevToken()->position(); } @@ -122,10 +124,14 @@ namespace lib_token { } int row() const override { + if (!this->prevToken()) + return 0; return this->prevToken()->row(); } int column() const override { + if (!this->prevToken()) + return 0; return this->prevToken()->column(); }