From 856a81c2c916efcacd86552042d79b61bf2e7a07 Mon Sep 17 00:00:00 2001 From: codeboss <2422523675@qq.com> Date: Tue, 18 Jun 2024 21:17:06 +0800 Subject: [PATCH] =?UTF-8?q?token=E6=B5=81=E7=B2=BE=E7=AE=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- libToken/libtoken.cpp | 146 +++++++++++++++------- libToken/libtoken.h | 255 ++++++++++++++++++++------------------ libToken/tokens_novel.cpp | 35 +++--- 3 files changed, 254 insertions(+), 182 deletions(-) diff --git a/libToken/libtoken.cpp b/libToken/libtoken.cpp index ac25342..c3e55a5 100644 --- a/libToken/libtoken.cpp +++ b/libToken/libtoken.cpp @@ -7,60 +7,74 @@ using namespace lib_token; TokenReader::TokenReader(const QList> rulers) : rules_store(rulers) {} -QList> lib_token::TokenReader::extractFrom(const QString& path) const { - QFile file(path); - if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) { - throw new TokenException(u8"Lex[0x0000]指定文件无法打开:" + path); - } - QTextStream tin(&file); - tin.setCodec("UTF-8"); +std::shared_ptr lib_token::TokenReader::tokensWithin(const QString& path) const { + auto content_list = extract_from(path); + if (!content_list.size()) + return nullptr; - QList> ret_list; - int line_number = 1; - while (!tin.atEnd()) { - auto line = tin.readLine() + "\n"; - ret_list.append(this->parse_line(line_number++, line, path)); - } + std::shared_ptr prev_ptr = std::make_shared(content_list.last(), nullptr); + for (auto idx = content_list.size() - 2; idx >=0; --idx) { + auto content_ptr = content_list[idx]; + prev_ptr = std::make_shared(content_ptr, prev_ptr); + } - return ret_list; + return prev_ptr; +} + +QList> TokenReader::extract_from(const QString& path) const { + QFile file(path); + if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) { + throw new TokenException(u8"Lex[0x0000]指定文件无法打开:" + path); + } + QTextStream tin(&file); + tin.setCodec("UTF-8"); + + QList> ret_list; + int line_number = 1; + while (!tin.atEnd()) { + auto line = tin.readLine() + "\n"; + ret_list.append(this->parse_line(line_number++, line, path)); + } + + return ret_list; } QList> TokenReader::parse_line(int row, const QString& line_text, const QString& path) const { - auto words = line_text.split(" ", QString::SplitBehavior::SkipEmptyParts); + auto words = line_text.split(" ", QString::SplitBehavior::SkipEmptyParts); - QList> primary_words; - int columns_offset = 0; - for (auto& w : words) { - auto column_start = line_text.indexOf(w, columns_offset); - auto token = std::make_shared(row, column_start + 1, w, path); - primary_words << token; + QList> primary_words; + int columns_offset = 0; + for (auto& w : words) { + auto column_start = line_text.indexOf(w, columns_offset); + auto token = std::make_shared(row, column_start + 1, w, path); + primary_words << token; - columns_offset = column_start + w.length(); - } + columns_offset = column_start + w.length(); + } - QList> rets_tokens; - for (auto idx = 0; idx < primary_words.size(); ++idx) { - auto word = primary_words[idx]; - auto result = parse_token(word); - rets_tokens.append(std::get<0>(result)); + QList> rets_tokens; + for (auto idx = 0; idx < primary_words.size(); ++idx) { + auto word = primary_words[idx]; + auto result = parse_token(word); + rets_tokens.append(std::get<0>(result)); - auto remains = std::get<1>(result); - if (remains) - primary_words.insert(idx + 1, remains); - } + auto remains = std::get<1>(result); + if (remains) + primary_words.insert(idx + 1, remains); + } - return rets_tokens; + return rets_tokens; } std::tuple, std::shared_ptr> TokenReader::parse_token(std::shared_ptr word) const { - for (auto& it : this->rules_store) { - auto result = it->analysis(word); - if (std::get<0>(result)) { - return result; - } - } + for (auto& it : this->rules_store) { + auto result = it->analysis(word); + if (std::get<0>(result)) { + return result; + } + } - throw new TokenException(QString(u8"Lex[0x0001]指定词语无法解析:%1,<%2,%3>").arg(word->content()).arg(word->row()).arg(word->column())); + throw new TokenException(QString(u8"Lex[0x0001]指定词语无法解析:%1,<%2,%3>").arg(word->content()).arg(word->row()).arg(word->column())); } TokenException::TokenException(const QString& message) : msg_store(message) {} @@ -77,15 +91,53 @@ int WordPeaks::row() const { return row_n; } int WordPeaks::column() const { return col_n; } -TokenInst::TokenInst(int r, int c, const QString& t, const QString& p, std::shared_ptr type) - : row_n(r), col_n(c), text_n(t), path_p(p), type_def(type) {} +TokenContent::TokenContent(int r, int c, const QString& t, const QString& p, std::shared_ptr type) + : row_n(r), col_n(c), text_n(t), path_p(p), type_def(type) {} -QString TokenInst::file() const { return path_p; } +QString TokenContent::file() const { return path_p; } -QString TokenInst::content() const { return text_n; } +QString TokenContent::content() const { return text_n; } -int TokenInst::row() const { return row_n; } +int TokenContent::row() const { return row_n; } -int TokenInst::column() const { return col_n; } +int TokenContent::column() const { return col_n; } -std::shared_ptr TokenInst::define() const { return this->type_def; } +std::shared_ptr TokenContent::define() const { return this->type_def; } + +std::shared_ptr TokenContent::nextToken() const +{ + return nullptr; +} + +TokenImpl::TokenImpl(std::shared_ptr content, std::shared_ptr next) + : content_ptr(content), next_element(next) {} + +QString TokenImpl::file() const +{ + return content_ptr->file(); +} + +QString TokenImpl::content() const +{ + return content_ptr->content(); +} + +int TokenImpl::row() const +{ + return content_ptr->row(); +} + +int TokenImpl::column() const +{ + return content_ptr->column(); +} + +std::shared_ptr TokenImpl::define() const +{ + return content_ptr->define(); +} + +std::shared_ptr TokenImpl::nextToken() const +{ + return next_element; +} diff --git a/libToken/libtoken.h b/libToken/libtoken.h index c7c6ee8..15d281c 100644 --- a/libToken/libtoken.h +++ b/libToken/libtoken.h @@ -6,140 +6,159 @@ #include namespace lib_token { - class TokenDefine; + class TokenDefine; + /** + * @brief 解析异常 + */ + class LIBTOKEN_EXPORT TokenException { + private: + QString msg_store; - /** - * @brief 源码词语实例 - */ - class WordBase { - public: - /** - * @brief 源文件路径 - * @return - */ - virtual QString file() const = 0; - /** - * @brief 词语内容 - * @return - */ - virtual QString content() const = 0; - /** - * @brief 源码行号 - * @return - */ - virtual int row() const = 0; - /** - * @brief 源码列号 - * @return - */ - virtual int column() const = 0; - }; + public: + TokenException(const QString& message); + virtual QString message() const; + }; - /** - * @brief token解析结果 - */ - class Token : public WordBase { - public: - /** - * @brief token解析机制关联 - * @return - */ - virtual std::shared_ptr define() const = 0; - }; + /** + * @brief 源码词语实例 + */ + class WordBase { + public: + /** + * @brief 源文件路径 + * @return + */ + virtual QString file() const = 0; + /** + * @brief 词语内容 + * @return + */ + virtual QString content() const = 0; + /** + * @brief 源码行号 + * @return + */ + virtual int row() const = 0; + /** + * @brief 源码列号 + * @return + */ + virtual int column() const = 0; + }; - /** - * @brief token解析机制定义 - */ - class TokenDefine { - public: - /** - * @brief 解析机制名称 - * @return - */ - virtual QString name() const = 0; - /** - * @brief 解析机制关键定义 - * @return - */ - virtual QString regex() const = 0; + /** + * @brief token解析结果 + */ + class Token : public WordBase { + public: + /** + * @brief token解析机制关联 + * @return + */ + virtual std::shared_ptr define() const = 0; + virtual std::shared_ptr nextToken() const = 0; + }; - /** - * @brief 解析词语处理过程 - * @param content 词语 - * @return tuple 解析结果 - */ - virtual std::tuple, std::shared_ptr> analysis(std::shared_ptr content) const = 0; - }; + /** + * @brief token解析机制定义 + */ + class TokenDefine { + public: + /** + * @brief 解析机制名称 + * @return + */ + virtual QString name() const = 0; + /** + * @brief 解析机制关键定义 + * @return + */ + virtual QString regex() const = 0; - /** - * @brief 解析异常 - */ - class LIBTOKEN_EXPORT TokenException { - private: - QString msg_store; + /** + * @brief 解析词语处理过程 + * @param content 词语 + * @return tuple 解析结果 + */ + virtual std::tuple, std::shared_ptr> analysis(std::shared_ptr content) const = 0; + }; - public: - TokenException(const QString& message); - virtual QString message() const; - }; - /** - * @brief 未解析原始词语 - */ - class LIBTOKEN_EXPORT WordPeaks : public WordBase { - private: - int row_n, col_n; - QString text_n, path_p; + /** + * @brief 未解析原始词语 + */ + class WordPeaks : public WordBase { + private: + int row_n, col_n; + QString text_n, path_p; - public: - WordPeaks(int r, int c, const QString& t, const QString& p); + public: + WordPeaks(int r, int c, const QString& t, const QString& p); - // WordBase interface - public: - virtual QString file() const override; - virtual QString content() const override; - virtual int row() const override; - virtual int column() const override; - }; + // WordBase interface + public: + virtual QString file() const override; + virtual QString content() const override; + virtual int row() const override; + virtual int column() const override; + }; - /** - * @brief 词法解析成果 - */ - class LIBTOKEN_EXPORT TokenInst : public Token { - private: - int row_n, col_n; - QString text_n, path_p; - std::shared_ptr type_def; + /** + * @brief 词法解析成果 + */ + class TokenContent : public Token { + private: + int row_n, col_n; + QString text_n, path_p; + std::shared_ptr type_def; - public: - TokenInst(int r, int c, const QString& t, const QString& p, std::shared_ptr type); + public: + TokenContent(int r, int c, const QString& t, const QString& p, std::shared_ptr type); - // WordBase interface - public: - virtual QString file() const override; - virtual QString content() const override; - virtual int row() const override; - virtual int column() const override; + // WordBase interface + public: + virtual QString file() const override; + virtual QString content() const override; + virtual int row() const override; + virtual int column() const override; - // Token interface - public: - virtual std::shared_ptr define() const override; - }; + // Token interface + public: + virtual std::shared_ptr define() const override; + virtual std::shared_ptr nextToken() const override; + }; - /** - * @brief 词法获取器 - */ - class LIBTOKEN_EXPORT TokenReader { - private: - QList> rules_store; + class TokenImpl : public Token { + private: + std::shared_ptr content_ptr; + std::shared_ptr next_element; - QList> parse_line(int row, const QString& line_text, const QString& path) const; - std::tuple, std::shared_ptr> parse_token(std::shared_ptr word) const; + public: + TokenImpl(std::shared_ptr content, std::shared_ptr next); - public: - TokenReader(const QList> rulers); + // 通过 Token 继承 + QString file() const override; + QString content() const override; + int row() const override; + int column() const override; + std::shared_ptr define() const override; + std::shared_ptr nextToken() const override; + }; - QList> extractFrom(const QString& path) const; - }; + /** + * @brief 词法获取器 + */ + class LIBTOKEN_EXPORT TokenReader { + private: + QList> rules_store; + + QList> parse_line(int row, const QString& line_text, const QString& path) const; + std::tuple, std::shared_ptr> parse_token(std::shared_ptr word) const; + QList> extract_from(const QString& path) const; + + public: + TokenReader(const QList> rulers); + std::shared_ptr tokensWithin(const QString &path) const; + }; } // namespace lib_token \ No newline at end of file diff --git a/libToken/tokens_novel.cpp b/libToken/tokens_novel.cpp index 5302048..a2eac41 100644 --- a/libToken/tokens_novel.cpp +++ b/libToken/tokens_novel.cpp @@ -1,22 +1,23 @@ #include "tokens_novel.h" using namespace example_novel; +using namespace lib_token; QString LeftBracket::name() const { return u8"left-bracket"; } QString LeftBracket::regex() const { return u8"{"; } -std::tuple, std::shared_ptr> -LeftBracket::analysis(std::shared_ptr content) const { +std::tuple, std::shared_ptr> +LeftBracket::analysis(std::shared_ptr content) const { auto text = content->content(); if (!text.startsWith(regex())) return std::make_tuple(nullptr, content); - auto token_inst = std::make_shared(content->row(), content->column(), content->content().mid(0, regex().length()), + auto token_inst = std::make_shared(content->row(), content->column(), content->content().mid(0, regex().length()), content->file(), shared_from_this()); auto t_remains = content->content().mid(regex().length()); if (t_remains.length() > 0) { - auto remains = std::make_shared(content->row(), content->column() + regex().length(), t_remains, content->file()); + auto remains = std::make_shared(content->row(), content->column() + regex().length(), t_remains, content->file()); return std::make_tuple(token_inst, remains); } return std::make_tuple(token_inst, nullptr); @@ -36,13 +37,13 @@ QString Keywords::name() const { return name_store; } QString Keywords::regex() const { return value_store; } -std::tuple, std::shared_ptr> -Keywords::analysis(std::shared_ptr content) const { +std::tuple, std::shared_ptr> +Keywords::analysis(std::shared_ptr content) const { if (content->content() != regex()) { return std::make_tuple(nullptr, content); } - auto token_inst = std::make_shared(content->row(), content->column(), content->content(), content->file(), shared_from_this()); + auto token_inst = std::make_shared(content->row(), content->column(), content->content(), content->file(), shared_from_this()); return std::make_tuple(token_inst, nullptr); } @@ -54,14 +55,14 @@ QString Numbers::name() const { return u8"numbers"; } QString Numbers::regex() const { return u8"^([0-9]+)$"; } -std::tuple, std::shared_ptr> -Numbers::analysis(std::shared_ptr content) const { +std::tuple, std::shared_ptr> +Numbers::analysis(std::shared_ptr content) const { auto text = content->content(); QRegExp regx(regex()); if (regx.indexIn(text) == -1) return std::make_tuple(nullptr, content); - auto tinst = std::make_shared(content->row(), content->column(), content->content(), content->file(), shared_from_this()); + auto tinst = std::make_shared(content->row(), content->column(), content->content(), content->file(), shared_from_this()); return std::make_tuple(tinst, nullptr); } @@ -69,8 +70,8 @@ QString VTextSection::name() const { return u8"text-section"; } QString VTextSection::regex() const { return u8"^([^\\{\\}\\n@&]+)"; } -std::tuple, std::shared_ptr> -VTextSection::analysis(std::shared_ptr content) const { +std::tuple, std::shared_ptr> +VTextSection::analysis(std::shared_ptr content) const { auto text = content->content(); QRegExp regx(regex()); if (regx.indexIn(text) == -1) { @@ -80,9 +81,9 @@ VTextSection::analysis(std::shared_ptr content) const auto match = regx.cap(1); auto remains = content->content().mid(match.length()); - auto tinst = std::make_shared(content->row(), content->column(), match, content->file(), shared_from_this()); + auto tinst = std::make_shared(content->row(), content->column(), match, content->file(), shared_from_this()); if (remains.length()) { - auto t_remains = std::make_shared(content->row(), content->column(), remains, content->file()); + auto t_remains = std::make_shared(content->row(), content->column(), remains, content->file()); return std::make_tuple(tinst, t_remains); } return std::make_tuple(tinst, nullptr); @@ -96,7 +97,7 @@ QString NameSection::name() const { return u8"name-section"; } QString NameSection::regex() const { return u8"^([^:\\{\\}\\n@&][^\\{\\}\\n@&]*)"; } -std::tuple, std::shared_ptr > NameSection::analysis(std::shared_ptr content) const +std::tuple, std::shared_ptr > NameSection::analysis(std::shared_ptr content) const { auto text = content->content(); QRegExp regx(regex()); @@ -107,9 +108,9 @@ std::tuple, std::shared_ptrcontent().mid(match.length()); - auto tinst = std::make_shared(content->row(), content->column(), match, content->file(), shared_from_this()); + auto tinst = std::make_shared(content->row(), content->column(), match, content->file(), shared_from_this()); if (remains.length()) { - auto t_remains = std::make_shared(content->row(), content->column(), remains, content->file()); + auto t_remains = std::make_shared(content->row(), content->column(), remains, content->file()); return std::make_tuple(tinst, t_remains); } return std::make_tuple(tinst, nullptr);