token流精简

This commit is contained in:
codeboss 2024-06-18 21:17:06 +08:00
parent ac99860469
commit 856a81c2c9
3 changed files with 254 additions and 182 deletions

View File

@ -7,60 +7,74 @@ using namespace lib_token;
TokenReader::TokenReader(const QList<std::shared_ptr<const TokenDefine>> rulers) : rules_store(rulers) {}
QList<std::shared_ptr<const lib_token::Token>> lib_token::TokenReader::extractFrom(const QString& path) const {
QFile file(path);
if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) {
throw new TokenException(u8"Lex[0x0000]指定文件无法打开:" + path);
}
QTextStream tin(&file);
tin.setCodec("UTF-8");
std::shared_ptr<const Token> lib_token::TokenReader::tokensWithin(const QString& path) const {
auto content_list = extract_from(path);
if (!content_list.size())
return nullptr;
QList<std::shared_ptr<const Token>> ret_list;
int line_number = 1;
while (!tin.atEnd()) {
auto line = tin.readLine() + "\n";
ret_list.append(this->parse_line(line_number++, line, path));
}
std::shared_ptr<const Token> prev_ptr = std::make_shared<const TokenImpl>(content_list.last(), nullptr);
for (auto idx = content_list.size() - 2; idx >=0; --idx) {
auto content_ptr = content_list[idx];
prev_ptr = std::make_shared<const TokenImpl>(content_ptr, prev_ptr);
}
return ret_list;
return prev_ptr;
}
QList<std::shared_ptr<const Token>> TokenReader::extract_from(const QString& path) const {
QFile file(path);
if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) {
throw new TokenException(u8"Lex[0x0000]指定文件无法打开:" + path);
}
QTextStream tin(&file);
tin.setCodec("UTF-8");
QList<std::shared_ptr<const Token>> ret_list;
int line_number = 1;
while (!tin.atEnd()) {
auto line = tin.readLine() + "\n";
ret_list.append(this->parse_line(line_number++, line, path));
}
return ret_list;
}
QList<std::shared_ptr<const Token>> TokenReader::parse_line(int row, const QString& line_text, const QString& path) const {
auto words = line_text.split(" ", QString::SplitBehavior::SkipEmptyParts);
auto words = line_text.split(" ", QString::SplitBehavior::SkipEmptyParts);
QList<std::shared_ptr<const WordBase>> primary_words;
int columns_offset = 0;
for (auto& w : words) {
auto column_start = line_text.indexOf(w, columns_offset);
auto token = std::make_shared<WordPeaks>(row, column_start + 1, w, path);
primary_words << token;
QList<std::shared_ptr<const WordBase>> primary_words;
int columns_offset = 0;
for (auto& w : words) {
auto column_start = line_text.indexOf(w, columns_offset);
auto token = std::make_shared<WordPeaks>(row, column_start + 1, w, path);
primary_words << token;
columns_offset = column_start + w.length();
}
columns_offset = column_start + w.length();
}
QList<std::shared_ptr<const Token>> rets_tokens;
for (auto idx = 0; idx < primary_words.size(); ++idx) {
auto word = primary_words[idx];
auto result = parse_token(word);
rets_tokens.append(std::get<0>(result));
QList<std::shared_ptr<const Token>> rets_tokens;
for (auto idx = 0; idx < primary_words.size(); ++idx) {
auto word = primary_words[idx];
auto result = parse_token(word);
rets_tokens.append(std::get<0>(result));
auto remains = std::get<1>(result);
if (remains)
primary_words.insert(idx + 1, remains);
}
auto remains = std::get<1>(result);
if (remains)
primary_words.insert(idx + 1, remains);
}
return rets_tokens;
return rets_tokens;
}
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>> TokenReader::parse_token(std::shared_ptr<const WordBase> word) const {
for (auto& it : this->rules_store) {
auto result = it->analysis(word);
if (std::get<0>(result)) {
return result;
}
}
for (auto& it : this->rules_store) {
auto result = it->analysis(word);
if (std::get<0>(result)) {
return result;
}
}
throw new TokenException(QString(u8"Lex[0x0001]指定词语无法解析:%1,<%2,%3>").arg(word->content()).arg(word->row()).arg(word->column()));
throw new TokenException(QString(u8"Lex[0x0001]指定词语无法解析:%1,<%2,%3>").arg(word->content()).arg(word->row()).arg(word->column()));
}
TokenException::TokenException(const QString& message) : msg_store(message) {}
@ -77,15 +91,53 @@ int WordPeaks::row() const { return row_n; }
int WordPeaks::column() const { return col_n; }
TokenInst::TokenInst(int r, int c, const QString& t, const QString& p, std::shared_ptr<const TokenDefine> type)
: row_n(r), col_n(c), text_n(t), path_p(p), type_def(type) {}
TokenContent::TokenContent(int r, int c, const QString& t, const QString& p, std::shared_ptr<const TokenDefine> type)
: row_n(r), col_n(c), text_n(t), path_p(p), type_def(type) {}
QString TokenInst::file() const { return path_p; }
QString TokenContent::file() const { return path_p; }
QString TokenInst::content() const { return text_n; }
QString TokenContent::content() const { return text_n; }
int TokenInst::row() const { return row_n; }
int TokenContent::row() const { return row_n; }
int TokenInst::column() const { return col_n; }
int TokenContent::column() const { return col_n; }
std::shared_ptr<const TokenDefine> TokenInst::define() const { return this->type_def; }
std::shared_ptr<const TokenDefine> TokenContent::define() const { return this->type_def; }
std::shared_ptr<const Token> TokenContent::nextToken() const
{
return nullptr;
}
TokenImpl::TokenImpl(std::shared_ptr<const Token> content, std::shared_ptr<const Token> next)
: content_ptr(content), next_element(next) {}
QString TokenImpl::file() const
{
return content_ptr->file();
}
QString TokenImpl::content() const
{
return content_ptr->content();
}
int TokenImpl::row() const
{
return content_ptr->row();
}
int TokenImpl::column() const
{
return content_ptr->column();
}
std::shared_ptr<const TokenDefine> TokenImpl::define() const
{
return content_ptr->define();
}
std::shared_ptr<const Token> TokenImpl::nextToken() const
{
return next_element;
}

View File

@ -6,140 +6,159 @@
#include <memory>
namespace lib_token {
class TokenDefine;
class TokenDefine;
/**
* @brief
*/
class LIBTOKEN_EXPORT TokenException {
private:
QString msg_store;
/**
* @brief
*/
class WordBase {
public:
/**
* @brief
* @return
*/
virtual QString file() const = 0;
/**
* @brief
* @return
*/
virtual QString content() const = 0;
/**
* @brief
* @return
*/
virtual int row() const = 0;
/**
* @brief
* @return
*/
virtual int column() const = 0;
};
public:
TokenException(const QString& message);
virtual QString message() const;
};
/**
* @brief token解析结果
*/
class Token : public WordBase {
public:
/**
* @brief token解析机制关联
* @return
*/
virtual std::shared_ptr<const TokenDefine> define() const = 0;
};
/**
* @brief
*/
class WordBase {
public:
/**
* @brief
* @return
*/
virtual QString file() const = 0;
/**
* @brief
* @return
*/
virtual QString content() const = 0;
/**
* @brief
* @return
*/
virtual int row() const = 0;
/**
* @brief
* @return
*/
virtual int column() const = 0;
};
/**
* @brief token解析机制定义
*/
class TokenDefine {
public:
/**
* @brief
* @return
*/
virtual QString name() const = 0;
/**
* @brief
* @return
*/
virtual QString regex() const = 0;
/**
* @brief token解析结果
*/
class Token : public WordBase {
public:
/**
* @brief token解析机制关联
* @return
*/
virtual std::shared_ptr<const TokenDefine> define() const = 0;
virtual std::shared_ptr<const Token> nextToken() const = 0;
};
/**
* @brief
* @param content
* @return tuple<token/null,remains>
*/
virtual std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>> analysis(std::shared_ptr<const WordBase> content) const = 0;
};
/**
* @brief token解析机制定义
*/
class TokenDefine {
public:
/**
* @brief
* @return
*/
virtual QString name() const = 0;
/**
* @brief
* @return
*/
virtual QString regex() const = 0;
/**
* @brief
*/
class LIBTOKEN_EXPORT TokenException {
private:
QString msg_store;
/**
* @brief
* @param content
* @return tuple<token/null,remains>
*/
virtual std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>> analysis(std::shared_ptr<const WordBase> content) const = 0;
};
public:
TokenException(const QString& message);
virtual QString message() const;
};
/**
* @brief
*/
class LIBTOKEN_EXPORT WordPeaks : public WordBase {
private:
int row_n, col_n;
QString text_n, path_p;
/**
* @brief
*/
class WordPeaks : public WordBase {
private:
int row_n, col_n;
QString text_n, path_p;
public:
WordPeaks(int r, int c, const QString& t, const QString& p);
public:
WordPeaks(int r, int c, const QString& t, const QString& p);
// WordBase interface
public:
virtual QString file() const override;
virtual QString content() const override;
virtual int row() const override;
virtual int column() const override;
};
// WordBase interface
public:
virtual QString file() const override;
virtual QString content() const override;
virtual int row() const override;
virtual int column() const override;
};
/**
* @brief
*/
class LIBTOKEN_EXPORT TokenInst : public Token {
private:
int row_n, col_n;
QString text_n, path_p;
std::shared_ptr<const TokenDefine> type_def;
/**
* @brief
*/
class TokenContent : public Token {
private:
int row_n, col_n;
QString text_n, path_p;
std::shared_ptr<const TokenDefine> type_def;
public:
TokenInst(int r, int c, const QString& t, const QString& p, std::shared_ptr<const TokenDefine> type);
public:
TokenContent(int r, int c, const QString& t, const QString& p, std::shared_ptr<const TokenDefine> type);
// WordBase interface
public:
virtual QString file() const override;
virtual QString content() const override;
virtual int row() const override;
virtual int column() const override;
// WordBase interface
public:
virtual QString file() const override;
virtual QString content() const override;
virtual int row() const override;
virtual int column() const override;
// Token interface
public:
virtual std::shared_ptr<const TokenDefine> define() const override;
};
// Token interface
public:
virtual std::shared_ptr<const TokenDefine> define() const override;
virtual std::shared_ptr<const Token> nextToken() const override;
};
/**
* @brief
*/
class LIBTOKEN_EXPORT TokenReader {
private:
QList<std::shared_ptr<const TokenDefine>> rules_store;
class TokenImpl : public Token {
private:
std::shared_ptr<const Token> content_ptr;
std::shared_ptr<const Token> next_element;
QList<std::shared_ptr<const Token>> parse_line(int row, const QString& line_text, const QString& path) const;
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>> parse_token(std::shared_ptr<const WordBase> word) const;
public:
TokenImpl(std::shared_ptr<const Token> content, std::shared_ptr<const Token> next);
public:
TokenReader(const QList<std::shared_ptr<const TokenDefine>> rulers);
// 通过 Token 继承
QString file() const override;
QString content() const override;
int row() const override;
int column() const override;
std::shared_ptr<const TokenDefine> define() const override;
std::shared_ptr<const Token> nextToken() const override;
};
QList<std::shared_ptr<const Token>> extractFrom(const QString& path) const;
};
/**
* @brief
*/
class LIBTOKEN_EXPORT TokenReader {
private:
QList<std::shared_ptr<const TokenDefine>> rules_store;
QList<std::shared_ptr<const Token>> parse_line(int row, const QString& line_text, const QString& path) const;
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>> parse_token(std::shared_ptr<const WordBase> word) const;
QList<std::shared_ptr<const Token>> extract_from(const QString& path) const;
public:
TokenReader(const QList<std::shared_ptr<const TokenDefine>> rulers);
std::shared_ptr<const Token> tokensWithin(const QString &path) const;
};
} // namespace lib_token

View File

@ -1,22 +1,23 @@
#include "tokens_novel.h"
using namespace example_novel;
using namespace lib_token;
QString LeftBracket::name() const { return u8"left-bracket"; }
QString LeftBracket::regex() const { return u8"{"; }
std::tuple<std::shared_ptr<const lib_token::Token>, std::shared_ptr<const lib_token::WordBase>>
LeftBracket::analysis(std::shared_ptr<const lib_token::WordBase> content) const {
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>>
LeftBracket::analysis(std::shared_ptr<const WordBase> content) const {
auto text = content->content();
if (!text.startsWith(regex()))
return std::make_tuple(nullptr, content);
auto token_inst = std::make_shared<lib_token::TokenInst>(content->row(), content->column(), content->content().mid(0, regex().length()),
auto token_inst = std::make_shared<TokenContent>(content->row(), content->column(), content->content().mid(0, regex().length()),
content->file(), shared_from_this());
auto t_remains = content->content().mid(regex().length());
if (t_remains.length() > 0) {
auto remains = std::make_shared<lib_token::WordPeaks>(content->row(), content->column() + regex().length(), t_remains, content->file());
auto remains = std::make_shared<WordPeaks>(content->row(), content->column() + regex().length(), t_remains, content->file());
return std::make_tuple(token_inst, remains);
}
return std::make_tuple(token_inst, nullptr);
@ -36,13 +37,13 @@ QString Keywords::name() const { return name_store; }
QString Keywords::regex() const { return value_store; }
std::tuple<std::shared_ptr<const lib_token::Token>, std::shared_ptr<const lib_token::WordBase>>
Keywords::analysis(std::shared_ptr<const lib_token::WordBase> content) const {
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>>
Keywords::analysis(std::shared_ptr<const WordBase> content) const {
if (content->content() != regex()) {
return std::make_tuple(nullptr, content);
}
auto token_inst = std::make_shared<lib_token::TokenInst>(content->row(), content->column(), content->content(), content->file(), shared_from_this());
auto token_inst = std::make_shared<TokenContent>(content->row(), content->column(), content->content(), content->file(), shared_from_this());
return std::make_tuple(token_inst, nullptr);
}
@ -54,14 +55,14 @@ QString Numbers::name() const { return u8"numbers"; }
QString Numbers::regex() const { return u8"^([0-9]+)$"; }
std::tuple<std::shared_ptr<const lib_token::Token>, std::shared_ptr<const lib_token::WordBase>>
Numbers::analysis(std::shared_ptr<const lib_token::WordBase> content) const {
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>>
Numbers::analysis(std::shared_ptr<const WordBase> content) const {
auto text = content->content();
QRegExp regx(regex());
if (regx.indexIn(text) == -1)
return std::make_tuple(nullptr, content);
auto tinst = std::make_shared<lib_token::TokenInst>(content->row(), content->column(), content->content(), content->file(), shared_from_this());
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), content->content(), content->file(), shared_from_this());
return std::make_tuple(tinst, nullptr);
}
@ -69,8 +70,8 @@ QString VTextSection::name() const { return u8"text-section"; }
QString VTextSection::regex() const { return u8"^([^\\{\\}\\n@&]+)"; }
std::tuple<std::shared_ptr<const lib_token::Token>, std::shared_ptr<const lib_token::WordBase>>
VTextSection::analysis(std::shared_ptr<const lib_token::WordBase> content) const {
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>>
VTextSection::analysis(std::shared_ptr<const WordBase> content) const {
auto text = content->content();
QRegExp regx(regex());
if (regx.indexIn(text) == -1) {
@ -80,9 +81,9 @@ VTextSection::analysis(std::shared_ptr<const lib_token::WordBase> content) const
auto match = regx.cap(1);
auto remains = content->content().mid(match.length());
auto tinst = std::make_shared<lib_token::TokenInst>(content->row(), content->column(), match, content->file(), shared_from_this());
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), match, content->file(), shared_from_this());
if (remains.length()) {
auto t_remains = std::make_shared<lib_token::WordPeaks>(content->row(), content->column(), remains, content->file());
auto t_remains = std::make_shared<WordPeaks>(content->row(), content->column(), remains, content->file());
return std::make_tuple(tinst, t_remains);
}
return std::make_tuple(tinst, nullptr);
@ -96,7 +97,7 @@ QString NameSection::name() const { return u8"name-section"; }
QString NameSection::regex() const { return u8"^([^:\\{\\}\\n@&][^\\{\\}\\n@&]*)"; }
std::tuple<std::shared_ptr<const lib_token::Token>, std::shared_ptr<const lib_token::WordBase> > NameSection::analysis(std::shared_ptr<const lib_token::WordBase> content) const
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase> > NameSection::analysis(std::shared_ptr<const WordBase> content) const
{
auto text = content->content();
QRegExp regx(regex());
@ -107,9 +108,9 @@ std::tuple<std::shared_ptr<const lib_token::Token>, std::shared_ptr<const lib_to
auto match = regx.cap(1);
auto remains = content->content().mid(match.length());
auto tinst = std::make_shared<lib_token::TokenInst>(content->row(), content->column(), match, content->file(), shared_from_this());
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), match, content->file(), shared_from_this());
if (remains.length()) {
auto t_remains = std::make_shared<lib_token::WordPeaks>(content->row(), content->column(), remains, content->file());
auto t_remains = std::make_shared<WordPeaks>(content->row(), content->column(), remains, content->file());
return std::make_tuple(tinst, t_remains);
}
return std::make_tuple(tinst, nullptr);