token流精简

This commit is contained in:
codeboss 2024-06-18 21:17:06 +08:00
parent ac99860469
commit 856a81c2c9
3 changed files with 254 additions and 182 deletions

View File

@ -7,60 +7,74 @@ using namespace lib_token;
TokenReader::TokenReader(const QList<std::shared_ptr<const TokenDefine>> rulers) : rules_store(rulers) {} TokenReader::TokenReader(const QList<std::shared_ptr<const TokenDefine>> rulers) : rules_store(rulers) {}
QList<std::shared_ptr<const lib_token::Token>> lib_token::TokenReader::extractFrom(const QString& path) const { std::shared_ptr<const Token> lib_token::TokenReader::tokensWithin(const QString& path) const {
QFile file(path); auto content_list = extract_from(path);
if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) { if (!content_list.size())
throw new TokenException(u8"Lex[0x0000]指定文件无法打开:" + path); return nullptr;
}
QTextStream tin(&file);
tin.setCodec("UTF-8");
QList<std::shared_ptr<const Token>> ret_list; std::shared_ptr<const Token> prev_ptr = std::make_shared<const TokenImpl>(content_list.last(), nullptr);
int line_number = 1; for (auto idx = content_list.size() - 2; idx >=0; --idx) {
while (!tin.atEnd()) { auto content_ptr = content_list[idx];
auto line = tin.readLine() + "\n"; prev_ptr = std::make_shared<const TokenImpl>(content_ptr, prev_ptr);
ret_list.append(this->parse_line(line_number++, line, path)); }
}
return ret_list; return prev_ptr;
}
QList<std::shared_ptr<const Token>> TokenReader::extract_from(const QString& path) const {
QFile file(path);
if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) {
throw new TokenException(u8"Lex[0x0000]指定文件无法打开:" + path);
}
QTextStream tin(&file);
tin.setCodec("UTF-8");
QList<std::shared_ptr<const Token>> ret_list;
int line_number = 1;
while (!tin.atEnd()) {
auto line = tin.readLine() + "\n";
ret_list.append(this->parse_line(line_number++, line, path));
}
return ret_list;
} }
QList<std::shared_ptr<const Token>> TokenReader::parse_line(int row, const QString& line_text, const QString& path) const { QList<std::shared_ptr<const Token>> TokenReader::parse_line(int row, const QString& line_text, const QString& path) const {
auto words = line_text.split(" ", QString::SplitBehavior::SkipEmptyParts); auto words = line_text.split(" ", QString::SplitBehavior::SkipEmptyParts);
QList<std::shared_ptr<const WordBase>> primary_words; QList<std::shared_ptr<const WordBase>> primary_words;
int columns_offset = 0; int columns_offset = 0;
for (auto& w : words) { for (auto& w : words) {
auto column_start = line_text.indexOf(w, columns_offset); auto column_start = line_text.indexOf(w, columns_offset);
auto token = std::make_shared<WordPeaks>(row, column_start + 1, w, path); auto token = std::make_shared<WordPeaks>(row, column_start + 1, w, path);
primary_words << token; primary_words << token;
columns_offset = column_start + w.length(); columns_offset = column_start + w.length();
} }
QList<std::shared_ptr<const Token>> rets_tokens; QList<std::shared_ptr<const Token>> rets_tokens;
for (auto idx = 0; idx < primary_words.size(); ++idx) { for (auto idx = 0; idx < primary_words.size(); ++idx) {
auto word = primary_words[idx]; auto word = primary_words[idx];
auto result = parse_token(word); auto result = parse_token(word);
rets_tokens.append(std::get<0>(result)); rets_tokens.append(std::get<0>(result));
auto remains = std::get<1>(result); auto remains = std::get<1>(result);
if (remains) if (remains)
primary_words.insert(idx + 1, remains); primary_words.insert(idx + 1, remains);
} }
return rets_tokens; return rets_tokens;
} }
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>> TokenReader::parse_token(std::shared_ptr<const WordBase> word) const { std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>> TokenReader::parse_token(std::shared_ptr<const WordBase> word) const {
for (auto& it : this->rules_store) { for (auto& it : this->rules_store) {
auto result = it->analysis(word); auto result = it->analysis(word);
if (std::get<0>(result)) { if (std::get<0>(result)) {
return result; return result;
} }
} }
throw new TokenException(QString(u8"Lex[0x0001]指定词语无法解析:%1,<%2,%3>").arg(word->content()).arg(word->row()).arg(word->column())); throw new TokenException(QString(u8"Lex[0x0001]指定词语无法解析:%1,<%2,%3>").arg(word->content()).arg(word->row()).arg(word->column()));
} }
TokenException::TokenException(const QString& message) : msg_store(message) {} TokenException::TokenException(const QString& message) : msg_store(message) {}
@ -77,15 +91,53 @@ int WordPeaks::row() const { return row_n; }
int WordPeaks::column() const { return col_n; } int WordPeaks::column() const { return col_n; }
TokenInst::TokenInst(int r, int c, const QString& t, const QString& p, std::shared_ptr<const TokenDefine> type) TokenContent::TokenContent(int r, int c, const QString& t, const QString& p, std::shared_ptr<const TokenDefine> type)
: row_n(r), col_n(c), text_n(t), path_p(p), type_def(type) {} : row_n(r), col_n(c), text_n(t), path_p(p), type_def(type) {}
QString TokenInst::file() const { return path_p; } QString TokenContent::file() const { return path_p; }
QString TokenInst::content() const { return text_n; } QString TokenContent::content() const { return text_n; }
int TokenInst::row() const { return row_n; } int TokenContent::row() const { return row_n; }
int TokenInst::column() const { return col_n; } int TokenContent::column() const { return col_n; }
std::shared_ptr<const TokenDefine> TokenInst::define() const { return this->type_def; } std::shared_ptr<const TokenDefine> TokenContent::define() const { return this->type_def; }
std::shared_ptr<const Token> TokenContent::nextToken() const
{
return nullptr;
}
TokenImpl::TokenImpl(std::shared_ptr<const Token> content, std::shared_ptr<const Token> next)
: content_ptr(content), next_element(next) {}
QString TokenImpl::file() const
{
return content_ptr->file();
}
QString TokenImpl::content() const
{
return content_ptr->content();
}
int TokenImpl::row() const
{
return content_ptr->row();
}
int TokenImpl::column() const
{
return content_ptr->column();
}
std::shared_ptr<const TokenDefine> TokenImpl::define() const
{
return content_ptr->define();
}
std::shared_ptr<const Token> TokenImpl::nextToken() const
{
return next_element;
}

View File

@ -6,140 +6,159 @@
#include <memory> #include <memory>
namespace lib_token { namespace lib_token {
class TokenDefine; class TokenDefine;
/**
* @brief
*/
class LIBTOKEN_EXPORT TokenException {
private:
QString msg_store;
/** public:
* @brief TokenException(const QString& message);
*/ virtual QString message() const;
class WordBase { };
public:
/**
* @brief
* @return
*/
virtual QString file() const = 0;
/**
* @brief
* @return
*/
virtual QString content() const = 0;
/**
* @brief
* @return
*/
virtual int row() const = 0;
/**
* @brief
* @return
*/
virtual int column() const = 0;
};
/** /**
* @brief token解析结果 * @brief
*/ */
class Token : public WordBase { class WordBase {
public: public:
/** /**
* @brief token解析机制关联 * @brief
* @return * @return
*/ */
virtual std::shared_ptr<const TokenDefine> define() const = 0; virtual QString file() const = 0;
}; /**
* @brief
* @return
*/
virtual QString content() const = 0;
/**
* @brief
* @return
*/
virtual int row() const = 0;
/**
* @brief
* @return
*/
virtual int column() const = 0;
};
/** /**
* @brief token解析机制定义 * @brief token解析结果
*/ */
class TokenDefine { class Token : public WordBase {
public: public:
/** /**
* @brief * @brief token解析机制关联
* @return * @return
*/ */
virtual QString name() const = 0; virtual std::shared_ptr<const TokenDefine> define() const = 0;
/** virtual std::shared_ptr<const Token> nextToken() const = 0;
* @brief };
* @return
*/
virtual QString regex() const = 0;
/** /**
* @brief * @brief token解析机制定义
* @param content */
* @return tuple<token/null,remains> class TokenDefine {
*/ public:
virtual std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>> analysis(std::shared_ptr<const WordBase> content) const = 0; /**
}; * @brief
* @return
*/
virtual QString name() const = 0;
/**
* @brief
* @return
*/
virtual QString regex() const = 0;
/** /**
* @brief * @brief
*/ * @param content
class LIBTOKEN_EXPORT TokenException { * @return tuple<token/null,remains>
private: */
QString msg_store; virtual std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>> analysis(std::shared_ptr<const WordBase> content) const = 0;
};
public:
TokenException(const QString& message);
virtual QString message() const;
};
/** /**
* @brief * @brief
*/ */
class LIBTOKEN_EXPORT WordPeaks : public WordBase { class WordPeaks : public WordBase {
private: private:
int row_n, col_n; int row_n, col_n;
QString text_n, path_p; QString text_n, path_p;
public: public:
WordPeaks(int r, int c, const QString& t, const QString& p); WordPeaks(int r, int c, const QString& t, const QString& p);
// WordBase interface // WordBase interface
public: public:
virtual QString file() const override; virtual QString file() const override;
virtual QString content() const override; virtual QString content() const override;
virtual int row() const override; virtual int row() const override;
virtual int column() const override; virtual int column() const override;
}; };
/** /**
* @brief * @brief
*/ */
class LIBTOKEN_EXPORT TokenInst : public Token { class TokenContent : public Token {
private: private:
int row_n, col_n; int row_n, col_n;
QString text_n, path_p; QString text_n, path_p;
std::shared_ptr<const TokenDefine> type_def; std::shared_ptr<const TokenDefine> type_def;
public: public:
TokenInst(int r, int c, const QString& t, const QString& p, std::shared_ptr<const TokenDefine> type); TokenContent(int r, int c, const QString& t, const QString& p, std::shared_ptr<const TokenDefine> type);
// WordBase interface // WordBase interface
public: public:
virtual QString file() const override; virtual QString file() const override;
virtual QString content() const override; virtual QString content() const override;
virtual int row() const override; virtual int row() const override;
virtual int column() const override; virtual int column() const override;
// Token interface // Token interface
public: public:
virtual std::shared_ptr<const TokenDefine> define() const override; virtual std::shared_ptr<const TokenDefine> define() const override;
}; virtual std::shared_ptr<const Token> nextToken() const override;
};
/** class TokenImpl : public Token {
* @brief private:
*/ std::shared_ptr<const Token> content_ptr;
class LIBTOKEN_EXPORT TokenReader { std::shared_ptr<const Token> next_element;
private:
QList<std::shared_ptr<const TokenDefine>> rules_store;
QList<std::shared_ptr<const Token>> parse_line(int row, const QString& line_text, const QString& path) const; public:
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>> parse_token(std::shared_ptr<const WordBase> word) const; TokenImpl(std::shared_ptr<const Token> content, std::shared_ptr<const Token> next);
public: // 通过 Token 继承
TokenReader(const QList<std::shared_ptr<const TokenDefine>> rulers); QString file() const override;
QString content() const override;
int row() const override;
int column() const override;
std::shared_ptr<const TokenDefine> define() const override;
std::shared_ptr<const Token> nextToken() const override;
};
QList<std::shared_ptr<const Token>> extractFrom(const QString& path) const; /**
}; * @brief
*/
class LIBTOKEN_EXPORT TokenReader {
private:
QList<std::shared_ptr<const TokenDefine>> rules_store;
QList<std::shared_ptr<const Token>> parse_line(int row, const QString& line_text, const QString& path) const;
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>> parse_token(std::shared_ptr<const WordBase> word) const;
QList<std::shared_ptr<const Token>> extract_from(const QString& path) const;
public:
TokenReader(const QList<std::shared_ptr<const TokenDefine>> rulers);
std::shared_ptr<const Token> tokensWithin(const QString &path) const;
};
} // namespace lib_token } // namespace lib_token

View File

@ -1,22 +1,23 @@
#include "tokens_novel.h" #include "tokens_novel.h"
using namespace example_novel; using namespace example_novel;
using namespace lib_token;
QString LeftBracket::name() const { return u8"left-bracket"; } QString LeftBracket::name() const { return u8"left-bracket"; }
QString LeftBracket::regex() const { return u8"{"; } QString LeftBracket::regex() const { return u8"{"; }
std::tuple<std::shared_ptr<const lib_token::Token>, std::shared_ptr<const lib_token::WordBase>> std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>>
LeftBracket::analysis(std::shared_ptr<const lib_token::WordBase> content) const { LeftBracket::analysis(std::shared_ptr<const WordBase> content) const {
auto text = content->content(); auto text = content->content();
if (!text.startsWith(regex())) if (!text.startsWith(regex()))
return std::make_tuple(nullptr, content); return std::make_tuple(nullptr, content);
auto token_inst = std::make_shared<lib_token::TokenInst>(content->row(), content->column(), content->content().mid(0, regex().length()), auto token_inst = std::make_shared<TokenContent>(content->row(), content->column(), content->content().mid(0, regex().length()),
content->file(), shared_from_this()); content->file(), shared_from_this());
auto t_remains = content->content().mid(regex().length()); auto t_remains = content->content().mid(regex().length());
if (t_remains.length() > 0) { if (t_remains.length() > 0) {
auto remains = std::make_shared<lib_token::WordPeaks>(content->row(), content->column() + regex().length(), t_remains, content->file()); auto remains = std::make_shared<WordPeaks>(content->row(), content->column() + regex().length(), t_remains, content->file());
return std::make_tuple(token_inst, remains); return std::make_tuple(token_inst, remains);
} }
return std::make_tuple(token_inst, nullptr); return std::make_tuple(token_inst, nullptr);
@ -36,13 +37,13 @@ QString Keywords::name() const { return name_store; }
QString Keywords::regex() const { return value_store; } QString Keywords::regex() const { return value_store; }
std::tuple<std::shared_ptr<const lib_token::Token>, std::shared_ptr<const lib_token::WordBase>> std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>>
Keywords::analysis(std::shared_ptr<const lib_token::WordBase> content) const { Keywords::analysis(std::shared_ptr<const WordBase> content) const {
if (content->content() != regex()) { if (content->content() != regex()) {
return std::make_tuple(nullptr, content); return std::make_tuple(nullptr, content);
} }
auto token_inst = std::make_shared<lib_token::TokenInst>(content->row(), content->column(), content->content(), content->file(), shared_from_this()); auto token_inst = std::make_shared<TokenContent>(content->row(), content->column(), content->content(), content->file(), shared_from_this());
return std::make_tuple(token_inst, nullptr); return std::make_tuple(token_inst, nullptr);
} }
@ -54,14 +55,14 @@ QString Numbers::name() const { return u8"numbers"; }
QString Numbers::regex() const { return u8"^([0-9]+)$"; } QString Numbers::regex() const { return u8"^([0-9]+)$"; }
std::tuple<std::shared_ptr<const lib_token::Token>, std::shared_ptr<const lib_token::WordBase>> std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>>
Numbers::analysis(std::shared_ptr<const lib_token::WordBase> content) const { Numbers::analysis(std::shared_ptr<const WordBase> content) const {
auto text = content->content(); auto text = content->content();
QRegExp regx(regex()); QRegExp regx(regex());
if (regx.indexIn(text) == -1) if (regx.indexIn(text) == -1)
return std::make_tuple(nullptr, content); return std::make_tuple(nullptr, content);
auto tinst = std::make_shared<lib_token::TokenInst>(content->row(), content->column(), content->content(), content->file(), shared_from_this()); auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), content->content(), content->file(), shared_from_this());
return std::make_tuple(tinst, nullptr); return std::make_tuple(tinst, nullptr);
} }
@ -69,8 +70,8 @@ QString VTextSection::name() const { return u8"text-section"; }
QString VTextSection::regex() const { return u8"^([^\\{\\}\\n@&]+)"; } QString VTextSection::regex() const { return u8"^([^\\{\\}\\n@&]+)"; }
std::tuple<std::shared_ptr<const lib_token::Token>, std::shared_ptr<const lib_token::WordBase>> std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>>
VTextSection::analysis(std::shared_ptr<const lib_token::WordBase> content) const { VTextSection::analysis(std::shared_ptr<const WordBase> content) const {
auto text = content->content(); auto text = content->content();
QRegExp regx(regex()); QRegExp regx(regex());
if (regx.indexIn(text) == -1) { if (regx.indexIn(text) == -1) {
@ -80,9 +81,9 @@ VTextSection::analysis(std::shared_ptr<const lib_token::WordBase> content) const
auto match = regx.cap(1); auto match = regx.cap(1);
auto remains = content->content().mid(match.length()); auto remains = content->content().mid(match.length());
auto tinst = std::make_shared<lib_token::TokenInst>(content->row(), content->column(), match, content->file(), shared_from_this()); auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), match, content->file(), shared_from_this());
if (remains.length()) { if (remains.length()) {
auto t_remains = std::make_shared<lib_token::WordPeaks>(content->row(), content->column(), remains, content->file()); auto t_remains = std::make_shared<WordPeaks>(content->row(), content->column(), remains, content->file());
return std::make_tuple(tinst, t_remains); return std::make_tuple(tinst, t_remains);
} }
return std::make_tuple(tinst, nullptr); return std::make_tuple(tinst, nullptr);
@ -96,7 +97,7 @@ QString NameSection::name() const { return u8"name-section"; }
QString NameSection::regex() const { return u8"^([^:\\{\\}\\n@&][^\\{\\}\\n@&]*)"; } QString NameSection::regex() const { return u8"^([^:\\{\\}\\n@&][^\\{\\}\\n@&]*)"; }
std::tuple<std::shared_ptr<const lib_token::Token>, std::shared_ptr<const lib_token::WordBase> > NameSection::analysis(std::shared_ptr<const lib_token::WordBase> content) const std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase> > NameSection::analysis(std::shared_ptr<const WordBase> content) const
{ {
auto text = content->content(); auto text = content->content();
QRegExp regx(regex()); QRegExp regx(regex());
@ -107,9 +108,9 @@ std::tuple<std::shared_ptr<const lib_token::Token>, std::shared_ptr<const lib_to
auto match = regx.cap(1); auto match = regx.cap(1);
auto remains = content->content().mid(match.length()); auto remains = content->content().mid(match.length());
auto tinst = std::make_shared<lib_token::TokenInst>(content->row(), content->column(), match, content->file(), shared_from_this()); auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), match, content->file(), shared_from_this());
if (remains.length()) { if (remains.length()) {
auto t_remains = std::make_shared<lib_token::WordPeaks>(content->row(), content->column(), remains, content->file()); auto t_remains = std::make_shared<WordPeaks>(content->row(), content->column(), remains, content->file());
return std::make_tuple(tinst, t_remains); return std::make_tuple(tinst, t_remains);
} }
return std::make_tuple(tinst, nullptr); return std::make_tuple(tinst, nullptr);