改进token解析接口,添加doc—offset

This commit is contained in:
codeboss 2024-06-27 12:55:07 +08:00
parent 44a1743db9
commit 449f898257
11 changed files with 124 additions and 46 deletions

View File

@ -5,7 +5,10 @@ VisualStudioVersion = 17.8.34322.80
MinimumVisualStudioVersion = 10.0.40219.1 MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "WsNovelParser", "WsNovelParser\WsNovelParser.vcxproj", "{1EF577E8-D92D-4926-9207-1567137BB672}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "WsNovelParser", "WsNovelParser\WsNovelParser.vcxproj", "{1EF577E8-D92D-4926-9207-1567137BB672}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{1FF80476-26C9-42FB-BFF6-D587C4941964} = {1FF80476-26C9-42FB-BFF6-D587C4941964}
{C3AADEB5-3695-4DF4-B8E1-D37F928F3B2F} = {C3AADEB5-3695-4DF4-B8E1-D37F928F3B2F} {C3AADEB5-3695-4DF4-B8E1-D37F928F3B2F} = {C3AADEB5-3695-4DF4-B8E1-D37F928F3B2F}
{DAB406C7-174A-47C3-893C-343079396350} = {DAB406C7-174A-47C3-893C-343079396350}
{EF557F71-99AA-4F2B-A5F5-1A4518A11C19} = {EF557F71-99AA-4F2B-A5F5-1A4518A11C19}
EndProjectSection EndProjectSection
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libToken", "libToken\libToken.vcxproj", "{DAB406C7-174A-47C3-893C-343079396350}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libToken", "libToken\libToken.vcxproj", "{DAB406C7-174A-47C3-893C-343079396350}"

View File

@ -3,7 +3,7 @@
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LocalDebuggerWorkingDirectory>$(SolutionDir)$(Platform)\$(Configuration)\</LocalDebuggerWorkingDirectory> <LocalDebuggerWorkingDirectory>$(SolutionDir)$(Platform)\$(Configuration)\</LocalDebuggerWorkingDirectory>
<DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor> <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
<LocalDebuggerCommandArguments>--path "D:\手作小说\科学+修仙+创造世界" --dest E:\</LocalDebuggerCommandArguments> <LocalDebuggerCommandArguments>--path "D:\CustomNovels\科学+修仙+创造世界" --dest E:\</LocalDebuggerCommandArguments>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LocalDebuggerCommandArguments>--path "D:\手作小说\科学+修仙+创造世界"</LocalDebuggerCommandArguments> <LocalDebuggerCommandArguments>--path "D:\手作小说\科学+修仙+创造世界"</LocalDebuggerCommandArguments>

View File

@ -586,6 +586,7 @@ bool printer::AstGenerate::visit(std::shared_ptr<const ast_gen::ElementAccess> s
element_stack.append(dom_story); element_stack.append(dom_story);
dom_story.setAttribute(u8"name", story_node->name()); dom_story.setAttribute(u8"name", story_node->name());
dom_story.setAttribute(u8"address", (qulonglong)story_node.get());
dom_story.setAttribute(u8"file-path", story_node->filePath()); dom_story.setAttribute(u8"file-path", story_node->filePath());
dom_story.setAttribute(u8"sort", story_node->sort()); dom_story.setAttribute(u8"sort", story_node->sort());
@ -603,6 +604,7 @@ bool printer::AstGenerate::visit(std::shared_ptr<const ast_gen::ElementAccess> s
element_stack.append(dom_fragment); element_stack.append(dom_fragment);
dom_fragment.setAttribute(u8"name", fragment_node->name()); dom_fragment.setAttribute(u8"name", fragment_node->name());
dom_fragment.setAttribute(u8"address", (qulonglong)fragment_node.get());
dom_fragment.setAttribute(u8"file-path", fragment_node->filePath()); dom_fragment.setAttribute(u8"file-path", fragment_node->filePath());
append_tokens(dom_fragment, fragment_node); append_tokens(dom_fragment, fragment_node);
@ -647,6 +649,7 @@ bool printer::AstGenerate::visit(std::shared_ptr<const ast_gen::ElementAccess> s
element_stack.append(dom_volume); element_stack.append(dom_volume);
dom_volume.setAttribute(u8"name", volume_node->name()); dom_volume.setAttribute(u8"name", volume_node->name());
dom_volume.setAttribute(u8"address", (qulonglong)volume_node.get());
dom_volume.setAttribute(u8"file-path", volume_node->filePath()); dom_volume.setAttribute(u8"file-path", volume_node->filePath());
append_tokens(dom_volume, volume_node); append_tokens(dom_volume, volume_node);
@ -663,6 +666,7 @@ bool printer::AstGenerate::visit(std::shared_ptr<const ast_gen::ElementAccess> s
element_stack.append(dom_article); element_stack.append(dom_article);
dom_article.setAttribute(u8"name", article_node->name()); dom_article.setAttribute(u8"name", article_node->name());
dom_article.setAttribute(u8"address", (qulonglong)article_node.get());
dom_article.setAttribute(u8"file-path", article_node->filePath()); dom_article.setAttribute(u8"file-path", article_node->filePath());
append_tokens(dom_article, article_node); append_tokens(dom_article, article_node);

View File

@ -12,7 +12,7 @@ std::shared_ptr<const ExpressionRule> ExpressionElement::definedRule() const {
} }
QString ExpressionElement::filePath() const { QString ExpressionElement::filePath() const {
if(!tokens_bind.size()) if (!tokens_bind.size())
throw new SyntaxException(u8"InternalError[0x0002]Ò»¸ö¿ÕµÄ·Ç·¨ÎÞЧ½Úµã"); throw new SyntaxException(u8"InternalError[0x0002]Ò»¸ö¿ÕµÄ·Ç·¨ÎÞЧ½Úµã");
return tokens_bind.first()->file(); return tokens_bind.first()->file();
@ -46,7 +46,7 @@ QString ExpressionContext::currentFile() const { return this->current_file_path;
std::shared_ptr<Expression> ExpressionContext::currentInst() const std::shared_ptr<Expression> ExpressionContext::currentInst() const
{ {
if(expression_stack.size()) if (expression_stack.size())
return expression_stack.last(); return expression_stack.last();
return nullptr; return nullptr;
@ -54,7 +54,7 @@ std::shared_ptr<Expression> ExpressionContext::currentInst() const
void ExpressionContext::pushInst(std::shared_ptr<Expression> current_inst) void ExpressionContext::pushInst(std::shared_ptr<Expression> current_inst)
{ {
if(!expression_stack.size() || expression_stack.last() != current_inst) if (!expression_stack.size() || expression_stack.last() != current_inst)
expression_stack.append(current_inst); expression_stack.append(current_inst);
} }
@ -65,13 +65,13 @@ std::shared_ptr<Expression> ExpressionContext::popInst()
} }
std::shared_ptr<const BaseRule> ExpressionContext::currentExpressionRule() const { std::shared_ptr<const BaseRule> ExpressionContext::currentExpressionRule() const {
if(rule_stack.size()) if (rule_stack.size())
return rule_stack.last(); return rule_stack.last();
return nullptr; return nullptr;
} }
void ExpressionContext::pushExpressionRule(std::shared_ptr<const BaseRule> inst) { void ExpressionContext::pushExpressionRule(std::shared_ptr<const BaseRule> inst) {
if(!rule_stack.size() || rule_stack.last() != inst) if (!rule_stack.size() || rule_stack.last() != inst)
rule_stack.append(inst); rule_stack.append(inst);
} }
@ -79,3 +79,27 @@ std::shared_ptr<const BaseRule> ExpressionContext::popExpressionRule()
{ {
return rule_stack.takeLast(); return rule_stack.takeLast();
} }
void ExpressionContext::appendParseErrors(int start, const QString& e) {
this->errors_storage.append(std::make_tuple(start, e));
}
QStringList ExpressionContext::errors() const {
QStringList values;
for (auto& tp : this->errors_storage)
values.append(std::get<1>(tp));
return values;
}
void ExpressionContext::clearErrors(int start) {
for (int idx = 0; idx < this->errors_storage.size(); ++idx) {
auto &tp = errors_storage[idx];
if(std::get<0>(tp) >= start)
errors_storage.removeAt(idx--);
}
}
QList<std::shared_ptr<const BaseRule>> ExpressionContext::currentExpressionRuleStack() const {
return rule_stack;
}

View File

@ -89,6 +89,7 @@ namespace ast_basic {
QList<std::shared_ptr<const lib_syntax::BaseRule>> rule_stack; QList<std::shared_ptr<const lib_syntax::BaseRule>> rule_stack;
QList<std::shared_ptr<Expression>> expression_stack; QList<std::shared_ptr<Expression>> expression_stack;
QString current_file_path; QString current_file_path;
QList<std::tuple<int, QString>> errors_storage;
public: public:
ExpressionContext(); ExpressionContext();
@ -104,6 +105,10 @@ namespace ast_basic {
std::shared_ptr<const lib_syntax::BaseRule> currentExpressionRule() const override; std::shared_ptr<const lib_syntax::BaseRule> currentExpressionRule() const override;
void pushExpressionRule(std::shared_ptr<const lib_syntax::BaseRule> inst) override; void pushExpressionRule(std::shared_ptr<const lib_syntax::BaseRule> inst) override;
std::shared_ptr<const lib_syntax::BaseRule> popExpressionRule() override; std::shared_ptr<const lib_syntax::BaseRule> popExpressionRule() override;
virtual QList<std::shared_ptr<const lib_syntax::BaseRule>> currentExpressionRuleStack() const;
void appendParseErrors(int start, const QString& error_msg) override;
QStringList errors() const override;
void clearErrors(int start) override;
}; };
} }

View File

@ -1,5 +1,6 @@
#include "libsyntax.h" #include "libsyntax.h"
#include "ast_basic.h" #include "ast_basic.h"
#include <QDebug>
using namespace lib_syntax; using namespace lib_syntax;
using namespace std; using namespace std;
@ -14,6 +15,10 @@ std::tuple<std::shared_ptr<const Expression>, std::shared_ptr<const IWordBase>>
if (!head) if (!head)
throw new InputTerminal(rt_inst->currentFile()); throw new InputTerminal(rt_inst->currentFile());
if (head->content() == u8"初遇江枫的") {
qDebug() << u8"初遇江枫的";
}
auto match_result = define_peer->analysis(head); auto match_result = define_peer->analysis(head);
if (std::get<0>(match_result)) { if (std::get<0>(match_result)) {
rt_inst->currentInst()->addToken(std::get<0>(match_result)); rt_inst->currentInst()->addToken(std::get<0>(match_result));

View File

@ -38,6 +38,10 @@ namespace lib_syntax {
virtual void setCurrentFile(const QString &path) = 0; virtual void setCurrentFile(const QString &path) = 0;
virtual QString currentFile() const = 0; virtual QString currentFile() const = 0;
virtual void appendParseErrors(int start, const QString &error_msg) = 0;
virtual QStringList errors() const = 0;
virtual void clearErrors(int start) = 0;
/** /**
* \brief µ±Ç°±í´ïÊ½ÔªËØ. * \brief µ±Ç°±í´ïÊ½ÔªËØ.
* *
@ -50,6 +54,7 @@ namespace lib_syntax {
virtual std::shared_ptr<const BaseRule> currentExpressionRule() const = 0; virtual std::shared_ptr<const BaseRule> currentExpressionRule() const = 0;
virtual void pushExpressionRule(std::shared_ptr<const BaseRule> inst) = 0; virtual void pushExpressionRule(std::shared_ptr<const BaseRule> inst) = 0;
virtual std::shared_ptr<const BaseRule> popExpressionRule() = 0; virtual std::shared_ptr<const BaseRule> popExpressionRule() = 0;
virtual QList<std::shared_ptr<const BaseRule>> currentExpressionRuleStack() const = 0;
}; };
/** /**

View File

@ -30,15 +30,16 @@ QList<std::shared_ptr<const IWordBase>> WordReader::extract_from(const QString&
QList<std::shared_ptr<const IWordBase>> ret_list; QList<std::shared_ptr<const IWordBase>> ret_list;
int line_number = 1; int line_number = 1;
while (!tin.atEnd()) { while (!tin.atEnd()) {
auto doc_pos = tin.pos();
auto line = tin.readLine() + "\n"; auto line = tin.readLine() + "\n";
ret_list.append(this->parse_line(line_number++, line, path)); ret_list.append(this->parse_line(doc_pos, line_number++, line, path));
} }
return ret_list; return ret_list;
} }
#include <QRegExp> #include <QRegExp>
QList<std::shared_ptr<const IWordBase>> WordReader::parse_line(int row, const QString& line_text, const QString& path) const { QList<std::shared_ptr<const IWordBase>> WordReader::parse_line(int start_pos, int row, const QString& line_text, const QString& path) const {
QRegExp split_char(u8"\\s"); QRegExp split_char(u8"\\s");
auto words = line_text.split(split_char, QString::SplitBehavior::SkipEmptyParts); auto words = line_text.split(split_char, QString::SplitBehavior::SkipEmptyParts);
@ -46,7 +47,7 @@ QList<std::shared_ptr<const IWordBase>> WordReader::parse_line(int row, const QS
int columns_offset = 0; int columns_offset = 0;
for (auto& w : words) { for (auto& w : words) {
auto column_start = line_text.indexOf(w, columns_offset); auto column_start = line_text.indexOf(w, columns_offset);
auto token = std::make_shared<WordContent>(row, column_start + 1, w, path); auto token = std::make_shared<WordContent>(row, column_start + 1, start_pos+column_start, w, path);
primary_words << token; primary_words << token;
columns_offset = column_start + w.length(); columns_offset = column_start + w.length();
@ -59,10 +60,15 @@ TokenException::TokenException(const QString& message) : msg_store(message) {}
QString TokenException::message() const { return msg_store; } QString TokenException::message() const { return msg_store; }
WordContent::WordContent(int r, int c, const QString& t, const QString& p) : row_n(r), col_n(c), text_n(t), path_p(p) {} WordContent::WordContent(int r, int c, int pos, const QString& t, const QString& p)
: row_n(r), col_n(c), doc_offset(pos), text_n(t), path_p(p) {}
QString WordContent::file() const { return path_p; } QString WordContent::file() const { return path_p; }
int lib_token::WordContent::position() const {
return doc_offset;
}
QString WordContent::content() const { return text_n; } QString WordContent::content() const { return text_n; }
int WordContent::row() const { return row_n; } int WordContent::row() const { return row_n; }
@ -81,6 +87,10 @@ QString WordImpl::file() const
return content_ptr->file(); return content_ptr->file();
} }
int lib_token::WordImpl::position() const {
return content_ptr->position();
}
QString WordImpl::content() const QString WordImpl::content() const
{ {
return content_ptr->content(); return content_ptr->content();

View File

@ -16,6 +16,11 @@ namespace lib_token {
* @return * @return
*/ */
virtual QString file() const = 0; virtual QString file() const = 0;
/**
* @brief »ñȡλÖÃ
* @return ÆðʼλÖÃ
*/
virtual int position() const = 0;
/** /**
* @brief ´ÊÓïÄÚÈÝ * @brief ´ÊÓïÄÚÈÝ
* @return * @return
@ -44,15 +49,16 @@ namespace lib_token {
*/ */
class WordContent : public IWordBase { class WordContent : public IWordBase {
private: private:
int row_n, col_n; int row_n, col_n, doc_offset;
QString text_n, path_p; QString text_n, path_p;
public: public:
WordContent(int r, int c, const QString& t, const QString& p); WordContent(int r, int c, int pos, const QString& t, const QString& p);
// WordBase interface // WordBase interface
public: public:
virtual QString file() const override; virtual QString file() const override;
virtual int position() const override;
virtual QString content() const override; virtual QString content() const override;
virtual int row() const override; virtual int row() const override;
virtual int column() const override; virtual int column() const override;
@ -69,6 +75,7 @@ namespace lib_token {
// ͨ¹ý IWordBase ¼Ì³Ð // ͨ¹ý IWordBase ¼Ì³Ð
QString file() const override; QString file() const override;
virtual int position() const override;
QString content() const override; QString content() const override;
int row() const override; int row() const override;
int column() const override; int column() const override;
@ -136,7 +143,7 @@ namespace lib_token {
*/ */
class LIBTOKEN_EXPORT WordReader { class LIBTOKEN_EXPORT WordReader {
private: private:
QList<std::shared_ptr<const IWordBase>> parse_line(int row, const QString& line_text, const QString& path) const; QList<std::shared_ptr<const IWordBase>> parse_line(int start_pos, int row, const QString& line_text, const QString& path) const;
QList<std::shared_ptr<const IWordBase>> extract_from(const QString& path) const; QList<std::shared_ptr<const IWordBase>> extract_from(const QString& path) const;
public: public:

View File

@ -4,11 +4,15 @@ using namespace example_novel;
using namespace lib_token; using namespace lib_token;
TokenContent::TokenContent(int r, int c, const QString& t, const QString& p, std::shared_ptr<const ITokenDefine> paramType) TokenContent::TokenContent(int r, int c, int pos, const QString& t, const QString& p, std::shared_ptr<const ITokenDefine> paramType)
: row_n(r), col_n(c), text_n(t), path_p(p), type_def(paramType) {} : row_n(r), col_n(c), doc_offset(pos), text_n(t), path_p(p), type_def(paramType) {}
QString TokenContent::file() const { return path_p; } QString TokenContent::file() const { return path_p; }
int lib_token::TokenContent::position() const {
return doc_offset;
}
QString TokenContent::content() const { return text_n; } QString TokenContent::content() const { return text_n; }
int TokenContent::row() const { return row_n; } int TokenContent::row() const { return row_n; }
@ -37,11 +41,13 @@ LeftBracket::analysis(std::shared_ptr<const IWordBase> content) const {
if (!text.startsWith(regex())) if (!text.startsWith(regex()))
return std::make_tuple(nullptr, content); return std::make_tuple(nullptr, content);
auto token_inst = std::make_shared<TokenContent>(content->row(), content->column(), content->content().mid(0, regex().length()), auto token_inst = std::make_shared<TokenContent>(content->row(), content->column(), content->position(),
content->file(), shared_from_this()); content->content().mid(0, regex().length()), content->file(), shared_from_this());
auto t_remains = content->content().mid(regex().length()); auto t_remains = content->content().mid(regex().length());
if (t_remains.length() > 0) { if (t_remains.length() > 0) {
auto remains = std::make_shared<WordContent>(content->row(), content->column() + regex().length(), t_remains, content->file()); auto remains = std::make_shared<WordContent>(content->row(), content->column() + regex().length(),
content->position() + regex().length(), t_remains, content->file());
return std::make_tuple(token_inst, remains); return std::make_tuple(token_inst, remains);
} }
return std::make_tuple(token_inst, nullptr); return std::make_tuple(token_inst, nullptr);
@ -82,7 +88,8 @@ Keywords::analysis(std::shared_ptr<const IWordBase> content) const {
return std::make_tuple(nullptr, content); return std::make_tuple(nullptr, content);
} }
auto token_inst = std::make_shared<TokenContent>(content->row(), content->column(), content->content(), content->file(), shared_from_this()); auto token_inst = std::make_shared<TokenContent>(content->row(), content->column(), content->position(),
content->content(), content->file(), shared_from_this());
return std::make_tuple(token_inst, nullptr); return std::make_tuple(token_inst, nullptr);
} }
@ -111,7 +118,8 @@ Numbers::analysis(std::shared_ptr<const IWordBase> content) const {
if (regx.indexIn(text) == -1) if (regx.indexIn(text) == -1)
return std::make_tuple(nullptr, content); return std::make_tuple(nullptr, content);
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), content->content(), content->file(), shared_from_this()); auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), content->position(),
content->content(), content->file(), shared_from_this());
return std::make_tuple(tinst, nullptr); return std::make_tuple(tinst, nullptr);
} }
@ -135,9 +143,11 @@ VTextSection::analysis(std::shared_ptr<const IWordBase> content) const {
auto match = regx.cap(1); auto match = regx.cap(1);
auto remains = content->content().mid(match.length()); auto remains = content->content().mid(match.length());
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), match, content->file(), shared_from_this()); auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), content->position(),
match, content->file(), shared_from_this());
if (remains.length()) { if (remains.length()) {
auto t_remains = std::make_shared<WordContent>(content->row(), content->column(), remains, content->file()); auto t_remains = std::make_shared<WordContent>(content->row(), content->column(),
content->position() + match.length(), remains, content->file());
return std::make_tuple(tinst, t_remains); return std::make_tuple(tinst, t_remains);
} }
return std::make_tuple(tinst, nullptr); return std::make_tuple(tinst, nullptr);
@ -172,9 +182,11 @@ std::tuple<std::shared_ptr<const IToken>, std::shared_ptr<const IWordBase> > Nam
auto match = regx.cap(1); auto match = regx.cap(1);
auto remains = content->content().mid(match.length()); auto remains = content->content().mid(match.length());
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), match, content->file(), shared_from_this()); auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), content->position(),
match, content->file(), shared_from_this());
if (remains.length()) { if (remains.length()) {
auto t_remains = std::make_shared<WordContent>(content->row(), content->column(), remains, content->file()); auto t_remains = std::make_shared<WordContent>(content->row(), content->column(),
content->position() + match.length(), remains, content->file());
return std::make_tuple(tinst, t_remains); return std::make_tuple(tinst, t_remains);
} }
return std::make_tuple(tinst, nullptr); return std::make_tuple(tinst, nullptr);
@ -204,9 +216,11 @@ std::tuple<std::shared_ptr<const IToken>, std::shared_ptr<const IWordBase>> Decl
auto remains = content->content().mid(regex().size()); auto remains = content->content().mid(regex().size());
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), u8"#", content->file(), shared_from_this()); auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), content->position(),
u8"#", content->file(), shared_from_this());
if (remains.length()) { if (remains.length()) {
auto t_remains = std::make_shared<WordContent>(content->row(), content->column()+1, remains, content->file()); auto t_remains = std::make_shared<WordContent>(content->row(), content->column()+1,
content->position() + regex().length(), remains, content->file());
return std::make_tuple(tinst, t_remains); return std::make_tuple(tinst, t_remains);
} }
return std::make_tuple(tinst, nullptr); return std::make_tuple(tinst, nullptr);

View File

@ -13,16 +13,17 @@ namespace lib_token {
*/ */
class TokenContent : public IToken { class TokenContent : public IToken {
private: private:
int row_n, col_n; int row_n, col_n, doc_offset;
QString text_n, path_p; QString text_n, path_p;
std::shared_ptr<const ITokenDefine> type_def; std::shared_ptr<const ITokenDefine> type_def;
public: public:
TokenContent(int r, int c, const QString& t, const QString& p, std::shared_ptr<const ITokenDefine> paramType); TokenContent(int r, int c, int pos, const QString& t, const QString& p, std::shared_ptr<const ITokenDefine> paramType);
// WordBase interface // WordBase interface
public: public:
virtual QString file() const override; virtual QString file() const override;
virtual int position() const override;
virtual QString content() const override; virtual QString content() const override;
virtual int row() const override; virtual int row() const override;
virtual int column() const override; virtual int column() const override;