改进token解析接口,添加doc—offset

This commit is contained in:
codeboss 2024-06-27 12:55:07 +08:00
parent 44a1743db9
commit 449f898257
11 changed files with 124 additions and 46 deletions

View File

@ -5,7 +5,10 @@ VisualStudioVersion = 17.8.34322.80
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "WsNovelParser", "WsNovelParser\WsNovelParser.vcxproj", "{1EF577E8-D92D-4926-9207-1567137BB672}"
ProjectSection(ProjectDependencies) = postProject
{1FF80476-26C9-42FB-BFF6-D587C4941964} = {1FF80476-26C9-42FB-BFF6-D587C4941964}
{C3AADEB5-3695-4DF4-B8E1-D37F928F3B2F} = {C3AADEB5-3695-4DF4-B8E1-D37F928F3B2F}
{DAB406C7-174A-47C3-893C-343079396350} = {DAB406C7-174A-47C3-893C-343079396350}
{EF557F71-99AA-4F2B-A5F5-1A4518A11C19} = {EF557F71-99AA-4F2B-A5F5-1A4518A11C19}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libToken", "libToken\libToken.vcxproj", "{DAB406C7-174A-47C3-893C-343079396350}"

View File

@ -3,7 +3,7 @@
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LocalDebuggerWorkingDirectory>$(SolutionDir)$(Platform)\$(Configuration)\</LocalDebuggerWorkingDirectory>
<DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
<LocalDebuggerCommandArguments>--path "D:\手作小说\科学+修仙+创造世界" --dest E:\</LocalDebuggerCommandArguments>
<LocalDebuggerCommandArguments>--path "D:\CustomNovels\科学+修仙+创造世界" --dest E:\</LocalDebuggerCommandArguments>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LocalDebuggerCommandArguments>--path "D:\手作小说\科学+修仙+创造世界"</LocalDebuggerCommandArguments>

View File

@ -586,6 +586,7 @@ bool printer::AstGenerate::visit(std::shared_ptr<const ast_gen::ElementAccess> s
element_stack.append(dom_story);
dom_story.setAttribute(u8"name", story_node->name());
dom_story.setAttribute(u8"address", (qulonglong)story_node.get());
dom_story.setAttribute(u8"file-path", story_node->filePath());
dom_story.setAttribute(u8"sort", story_node->sort());
@ -603,6 +604,7 @@ bool printer::AstGenerate::visit(std::shared_ptr<const ast_gen::ElementAccess> s
element_stack.append(dom_fragment);
dom_fragment.setAttribute(u8"name", fragment_node->name());
dom_fragment.setAttribute(u8"address", (qulonglong)fragment_node.get());
dom_fragment.setAttribute(u8"file-path", fragment_node->filePath());
append_tokens(dom_fragment, fragment_node);
@ -647,6 +649,7 @@ bool printer::AstGenerate::visit(std::shared_ptr<const ast_gen::ElementAccess> s
element_stack.append(dom_volume);
dom_volume.setAttribute(u8"name", volume_node->name());
dom_volume.setAttribute(u8"address", (qulonglong)volume_node.get());
dom_volume.setAttribute(u8"file-path", volume_node->filePath());
append_tokens(dom_volume, volume_node);
@ -663,6 +666,7 @@ bool printer::AstGenerate::visit(std::shared_ptr<const ast_gen::ElementAccess> s
element_stack.append(dom_article);
dom_article.setAttribute(u8"name", article_node->name());
dom_article.setAttribute(u8"address", (qulonglong)article_node.get());
dom_article.setAttribute(u8"file-path", article_node->filePath());
append_tokens(dom_article, article_node);

View File

@ -8,34 +8,34 @@ using namespace lib_syntax;
ExpressionElement::ExpressionElement(std::shared_ptr<const ExpressionRule> bind) : _expr_rule(bind) {}
std::shared_ptr<const ExpressionRule> ExpressionElement::definedRule() const {
return _expr_rule;
return _expr_rule;
}
QString ExpressionElement::filePath() const {
if(!tokens_bind.size())
throw new SyntaxException(u8"InternalError[0x0002]Ò»¸ö¿ÕµÄ·Ç·¨ÎÞЧ½Úµã");
if (!tokens_bind.size())
throw new SyntaxException(u8"InternalError[0x0002]Ò»¸ö¿ÕµÄ·Ç·¨ÎÞЧ½Úµã");
return tokens_bind.first()->file();
return tokens_bind.first()->file();
}
void ExpressionElement::tokensReset(const QList<std::shared_ptr<const IToken>>& list) {
this->tokens_bind = list;
this->tokens_bind = list;
}
void ExpressionElement::addToken(std::shared_ptr<const IToken> token_inst) {
this->tokens_bind.append(token_inst);
this->tokens_bind.append(token_inst);
}
QList<std::shared_ptr<const Expression>> ExpressionElement::children() const {
return this->children_store;
return this->children_store;
}
void ExpressionElement::addChild(std::shared_ptr<const Expression> inst) {
this->children_store.append(inst);
this->children_store.append(inst);
}
QList<std::shared_ptr<const IToken>> ExpressionElement::tokens() const {
return this->tokens_bind;
return this->tokens_bind;
}
ExpressionContext::ExpressionContext() {}
@ -46,36 +46,60 @@ QString ExpressionContext::currentFile() const { return this->current_file_path;
std::shared_ptr<Expression> ExpressionContext::currentInst() const
{
if(expression_stack.size())
return expression_stack.last();
if (expression_stack.size())
return expression_stack.last();
return nullptr;
return nullptr;
}
void ExpressionContext::pushInst(std::shared_ptr<Expression> current_inst)
{
if(!expression_stack.size() || expression_stack.last() != current_inst)
expression_stack.append(current_inst);
if (!expression_stack.size() || expression_stack.last() != current_inst)
expression_stack.append(current_inst);
}
std::shared_ptr<Expression> ExpressionContext::popInst()
{
auto lastx = expression_stack.takeLast();
return lastx;
auto lastx = expression_stack.takeLast();
return lastx;
}
std::shared_ptr<const BaseRule> ExpressionContext::currentExpressionRule() const {
if(rule_stack.size())
return rule_stack.last();
return nullptr;
if (rule_stack.size())
return rule_stack.last();
return nullptr;
}
void ExpressionContext::pushExpressionRule(std::shared_ptr<const BaseRule> inst) {
if(!rule_stack.size() || rule_stack.last() != inst)
rule_stack.append(inst);
if (!rule_stack.size() || rule_stack.last() != inst)
rule_stack.append(inst);
}
std::shared_ptr<const BaseRule> ExpressionContext::popExpressionRule()
{
return rule_stack.takeLast();
return rule_stack.takeLast();
}
void ExpressionContext::appendParseErrors(int start, const QString& e) {
this->errors_storage.append(std::make_tuple(start, e));
}
QStringList ExpressionContext::errors() const {
QStringList values;
for (auto& tp : this->errors_storage)
values.append(std::get<1>(tp));
return values;
}
void ExpressionContext::clearErrors(int start) {
for (int idx = 0; idx < this->errors_storage.size(); ++idx) {
auto &tp = errors_storage[idx];
if(std::get<0>(tp) >= start)
errors_storage.removeAt(idx--);
}
}
QList<std::shared_ptr<const BaseRule>> ExpressionContext::currentExpressionRuleStack() const {
return rule_stack;
}

View File

@ -89,6 +89,7 @@ namespace ast_basic {
QList<std::shared_ptr<const lib_syntax::BaseRule>> rule_stack;
QList<std::shared_ptr<Expression>> expression_stack;
QString current_file_path;
QList<std::tuple<int, QString>> errors_storage;
public:
ExpressionContext();
@ -104,6 +105,10 @@ namespace ast_basic {
std::shared_ptr<const lib_syntax::BaseRule> currentExpressionRule() const override;
void pushExpressionRule(std::shared_ptr<const lib_syntax::BaseRule> inst) override;
std::shared_ptr<const lib_syntax::BaseRule> popExpressionRule() override;
virtual QList<std::shared_ptr<const lib_syntax::BaseRule>> currentExpressionRuleStack() const;
void appendParseErrors(int start, const QString& error_msg) override;
QStringList errors() const override;
void clearErrors(int start) override;
};
}

View File

@ -1,5 +1,6 @@
#include "libsyntax.h"
#include "ast_basic.h"
#include <QDebug>
using namespace lib_syntax;
using namespace std;
@ -14,6 +15,10 @@ std::tuple<std::shared_ptr<const Expression>, std::shared_ptr<const IWordBase>>
if (!head)
throw new InputTerminal(rt_inst->currentFile());
if (head->content() == u8"初遇江枫的") {
qDebug() << u8"初遇江枫的";
}
auto match_result = define_peer->analysis(head);
if (std::get<0>(match_result)) {
rt_inst->currentInst()->addToken(std::get<0>(match_result));

View File

@ -38,6 +38,10 @@ namespace lib_syntax {
virtual void setCurrentFile(const QString &path) = 0;
virtual QString currentFile() const = 0;
virtual void appendParseErrors(int start, const QString &error_msg) = 0;
virtual QStringList errors() const = 0;
virtual void clearErrors(int start) = 0;
/**
* \brief µ±Ç°±í´ïÊ½ÔªËØ.
*
@ -50,6 +54,7 @@ namespace lib_syntax {
virtual std::shared_ptr<const BaseRule> currentExpressionRule() const = 0;
virtual void pushExpressionRule(std::shared_ptr<const BaseRule> inst) = 0;
virtual std::shared_ptr<const BaseRule> popExpressionRule() = 0;
virtual QList<std::shared_ptr<const BaseRule>> currentExpressionRuleStack() const = 0;
};
/**

View File

@ -30,15 +30,16 @@ QList<std::shared_ptr<const IWordBase>> WordReader::extract_from(const QString&
QList<std::shared_ptr<const IWordBase>> ret_list;
int line_number = 1;
while (!tin.atEnd()) {
auto doc_pos = tin.pos();
auto line = tin.readLine() + "\n";
ret_list.append(this->parse_line(line_number++, line, path));
ret_list.append(this->parse_line(doc_pos, line_number++, line, path));
}
return ret_list;
}
#include <QRegExp>
QList<std::shared_ptr<const IWordBase>> WordReader::parse_line(int row, const QString& line_text, const QString& path) const {
QList<std::shared_ptr<const IWordBase>> WordReader::parse_line(int start_pos, int row, const QString& line_text, const QString& path) const {
QRegExp split_char(u8"\\s");
auto words = line_text.split(split_char, QString::SplitBehavior::SkipEmptyParts);
@ -46,7 +47,7 @@ QList<std::shared_ptr<const IWordBase>> WordReader::parse_line(int row, const QS
int columns_offset = 0;
for (auto& w : words) {
auto column_start = line_text.indexOf(w, columns_offset);
auto token = std::make_shared<WordContent>(row, column_start + 1, w, path);
auto token = std::make_shared<WordContent>(row, column_start + 1, start_pos+column_start, w, path);
primary_words << token;
columns_offset = column_start + w.length();
@ -59,10 +60,15 @@ TokenException::TokenException(const QString& message) : msg_store(message) {}
QString TokenException::message() const { return msg_store; }
WordContent::WordContent(int r, int c, const QString& t, const QString& p) : row_n(r), col_n(c), text_n(t), path_p(p) {}
WordContent::WordContent(int r, int c, int pos, const QString& t, const QString& p)
: row_n(r), col_n(c), doc_offset(pos), text_n(t), path_p(p) {}
QString WordContent::file() const { return path_p; }
int lib_token::WordContent::position() const {
return doc_offset;
}
QString WordContent::content() const { return text_n; }
int WordContent::row() const { return row_n; }
@ -81,6 +87,10 @@ QString WordImpl::file() const
return content_ptr->file();
}
int lib_token::WordImpl::position() const {
return content_ptr->position();
}
QString WordImpl::content() const
{
return content_ptr->content();

View File

@ -16,6 +16,11 @@ namespace lib_token {
* @return
*/
virtual QString file() const = 0;
/**
* @brief »ñȡλÖÃ
* @return ÆðʼλÖÃ
*/
virtual int position() const = 0;
/**
* @brief ´ÊÓïÄÚÈÝ
* @return
@ -44,15 +49,16 @@ namespace lib_token {
*/
class WordContent : public IWordBase {
private:
int row_n, col_n;
int row_n, col_n, doc_offset;
QString text_n, path_p;
public:
WordContent(int r, int c, const QString& t, const QString& p);
WordContent(int r, int c, int pos, const QString& t, const QString& p);
// WordBase interface
public:
virtual QString file() const override;
virtual int position() const override;
virtual QString content() const override;
virtual int row() const override;
virtual int column() const override;
@ -69,6 +75,7 @@ namespace lib_token {
// ͨ¹ý IWordBase ¼Ì³Ð
QString file() const override;
virtual int position() const override;
QString content() const override;
int row() const override;
int column() const override;
@ -136,7 +143,7 @@ namespace lib_token {
*/
class LIBTOKEN_EXPORT WordReader {
private:
QList<std::shared_ptr<const IWordBase>> parse_line(int row, const QString& line_text, const QString& path) const;
QList<std::shared_ptr<const IWordBase>> parse_line(int start_pos, int row, const QString& line_text, const QString& path) const;
QList<std::shared_ptr<const IWordBase>> extract_from(const QString& path) const;
public:

View File

@ -4,11 +4,15 @@ using namespace example_novel;
using namespace lib_token;
TokenContent::TokenContent(int r, int c, const QString& t, const QString& p, std::shared_ptr<const ITokenDefine> paramType)
: row_n(r), col_n(c), text_n(t), path_p(p), type_def(paramType) {}
TokenContent::TokenContent(int r, int c, int pos, const QString& t, const QString& p, std::shared_ptr<const ITokenDefine> paramType)
: row_n(r), col_n(c), doc_offset(pos), text_n(t), path_p(p), type_def(paramType) {}
QString TokenContent::file() const { return path_p; }
int lib_token::TokenContent::position() const {
return doc_offset;
}
QString TokenContent::content() const { return text_n; }
int TokenContent::row() const { return row_n; }
@ -37,11 +41,13 @@ LeftBracket::analysis(std::shared_ptr<const IWordBase> content) const {
if (!text.startsWith(regex()))
return std::make_tuple(nullptr, content);
auto token_inst = std::make_shared<TokenContent>(content->row(), content->column(), content->content().mid(0, regex().length()),
content->file(), shared_from_this());
auto token_inst = std::make_shared<TokenContent>(content->row(), content->column(), content->position(),
content->content().mid(0, regex().length()), content->file(), shared_from_this());
auto t_remains = content->content().mid(regex().length());
if (t_remains.length() > 0) {
auto remains = std::make_shared<WordContent>(content->row(), content->column() + regex().length(), t_remains, content->file());
auto remains = std::make_shared<WordContent>(content->row(), content->column() + regex().length(),
content->position() + regex().length(), t_remains, content->file());
return std::make_tuple(token_inst, remains);
}
return std::make_tuple(token_inst, nullptr);
@ -82,7 +88,8 @@ Keywords::analysis(std::shared_ptr<const IWordBase> content) const {
return std::make_tuple(nullptr, content);
}
auto token_inst = std::make_shared<TokenContent>(content->row(), content->column(), content->content(), content->file(), shared_from_this());
auto token_inst = std::make_shared<TokenContent>(content->row(), content->column(), content->position(),
content->content(), content->file(), shared_from_this());
return std::make_tuple(token_inst, nullptr);
}
@ -111,7 +118,8 @@ Numbers::analysis(std::shared_ptr<const IWordBase> content) const {
if (regx.indexIn(text) == -1)
return std::make_tuple(nullptr, content);
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), content->content(), content->file(), shared_from_this());
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), content->position(),
content->content(), content->file(), shared_from_this());
return std::make_tuple(tinst, nullptr);
}
@ -135,9 +143,11 @@ VTextSection::analysis(std::shared_ptr<const IWordBase> content) const {
auto match = regx.cap(1);
auto remains = content->content().mid(match.length());
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), match, content->file(), shared_from_this());
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), content->position(),
match, content->file(), shared_from_this());
if (remains.length()) {
auto t_remains = std::make_shared<WordContent>(content->row(), content->column(), remains, content->file());
auto t_remains = std::make_shared<WordContent>(content->row(), content->column(),
content->position() + match.length(), remains, content->file());
return std::make_tuple(tinst, t_remains);
}
return std::make_tuple(tinst, nullptr);
@ -172,9 +182,11 @@ std::tuple<std::shared_ptr<const IToken>, std::shared_ptr<const IWordBase> > Nam
auto match = regx.cap(1);
auto remains = content->content().mid(match.length());
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), match, content->file(), shared_from_this());
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), content->position(),
match, content->file(), shared_from_this());
if (remains.length()) {
auto t_remains = std::make_shared<WordContent>(content->row(), content->column(), remains, content->file());
auto t_remains = std::make_shared<WordContent>(content->row(), content->column(),
content->position() + match.length(), remains, content->file());
return std::make_tuple(tinst, t_remains);
}
return std::make_tuple(tinst, nullptr);
@ -204,9 +216,11 @@ std::tuple<std::shared_ptr<const IToken>, std::shared_ptr<const IWordBase>> Decl
auto remains = content->content().mid(regex().size());
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), u8"#", content->file(), shared_from_this());
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), content->position(),
u8"#", content->file(), shared_from_this());
if (remains.length()) {
auto t_remains = std::make_shared<WordContent>(content->row(), content->column()+1, remains, content->file());
auto t_remains = std::make_shared<WordContent>(content->row(), content->column()+1,
content->position() + regex().length(), remains, content->file());
return std::make_tuple(tinst, t_remains);
}
return std::make_tuple(tinst, nullptr);

View File

@ -13,16 +13,17 @@ namespace lib_token {
*/
class TokenContent : public IToken {
private:
int row_n, col_n;
int row_n, col_n, doc_offset;
QString text_n, path_p;
std::shared_ptr<const ITokenDefine> type_def;
public:
TokenContent(int r, int c, const QString& t, const QString& p, std::shared_ptr<const ITokenDefine> paramType);
TokenContent(int r, int c, int pos, const QString& t, const QString& p, std::shared_ptr<const ITokenDefine> paramType);
// WordBase interface
public:
virtual QString file() const override;
virtual int position() const override;
virtual QString content() const override;
virtual int row() const override;
virtual int column() const override;