libToken放弃词法解析

This commit is contained in:
codeboss 2024-06-20 12:49:26 +08:00
parent 3d414678ea
commit 3c320da2cd
4 changed files with 204 additions and 180 deletions

View File

@ -5,23 +5,21 @@
using namespace lib_token;
TokenReader::TokenReader(const QList<std::shared_ptr<const TokenDefine>> rulers) : rules_store(rulers) {}
std::shared_ptr<const Token> lib_token::TokenReader::tokensWithin(const QString& path) const {
std::shared_ptr<const IWordBase> TokenReader::wordsFrom(const QString& path) const {
auto content_list = extract_from(path);
if (!content_list.size())
return nullptr;
std::shared_ptr<const Token> prev_ptr = std::make_shared<const TokenImpl>(content_list.last(), nullptr);
std::shared_ptr<const IWordBase> prev_ptr = std::make_shared<const WordImpl>(content_list.last(), nullptr);
for (auto idx = content_list.size() - 2; idx >=0; --idx) {
auto content_ptr = content_list[idx];
prev_ptr = std::make_shared<const TokenImpl>(content_ptr, prev_ptr);
prev_ptr = std::make_shared<const WordImpl>(content_ptr, prev_ptr);
}
return prev_ptr;
}
QList<std::shared_ptr<const Token>> TokenReader::extract_from(const QString& path) const {
QList<std::shared_ptr<const IWordBase>> TokenReader::extract_from(const QString& path) const {
QFile file(path);
if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) {
throw new TokenException(u8"Lex[0x0000]Ö¸¶¨ÎļþÎÞ·¨´ò¿ª£º" + path);
@ -29,7 +27,7 @@ QList<std::shared_ptr<const Token>> TokenReader::extract_from(const QString& pat
QTextStream tin(&file);
tin.setCodec("UTF-8");
QList<std::shared_ptr<const Token>> ret_list;
QList<std::shared_ptr<const IWordBase>> ret_list;
int line_number = 1;
while (!tin.atEnd()) {
auto line = tin.readLine() + "\n";
@ -39,105 +37,66 @@ QList<std::shared_ptr<const Token>> TokenReader::extract_from(const QString& pat
return ret_list;
}
QList<std::shared_ptr<const Token>> TokenReader::parse_line(int row, const QString& line_text, const QString& path) const {
auto words = line_text.split(" ", QString::SplitBehavior::SkipEmptyParts);
#include <QRegExp>
QList<std::shared_ptr<const IWordBase>> TokenReader::parse_line(int row, const QString& line_text, const QString& path) const {
QRegExp split_char(u8"\\s");
auto words = line_text.split(split_char, QString::SplitBehavior::SkipEmptyParts);
QList<std::shared_ptr<const WordBase>> primary_words;
QList<std::shared_ptr<const IWordBase>> primary_words;
int columns_offset = 0;
for (auto& w : words) {
auto column_start = line_text.indexOf(w, columns_offset);
auto token = std::make_shared<WordPeaks>(row, column_start + 1, w, path);
auto token = std::make_shared<WordContent>(row, column_start + 1, w, path);
primary_words << token;
columns_offset = column_start + w.length();
}
QList<std::shared_ptr<const Token>> rets_tokens;
for (auto idx = 0; idx < primary_words.size(); ++idx) {
auto word = primary_words[idx];
auto result = parse_token(word);
rets_tokens.append(std::get<0>(result));
auto remains = std::get<1>(result);
if (remains)
primary_words.insert(idx + 1, remains);
}
return rets_tokens;
}
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>> TokenReader::parse_token(std::shared_ptr<const WordBase> word) const {
for (auto& it : this->rules_store) {
auto result = it->analysis(word);
if (std::get<0>(result)) {
return result;
}
}
throw new TokenException(QString(u8"Lex[0x0001]Ö¸¶¨´ÊÓïÎÞ·¨½âÎö£º%1,<%2,%3>").arg(word->content()).arg(word->row()).arg(word->column()));
return primary_words;
}
TokenException::TokenException(const QString& message) : msg_store(message) {}
QString TokenException::message() const { return msg_store; }
WordPeaks::WordPeaks(int r, int c, const QString& t, const QString& p) : row_n(r), col_n(c), text_n(t), path_p(p) {}
WordContent::WordContent(int r, int c, const QString& t, const QString& p) : row_n(r), col_n(c), text_n(t), path_p(p) {}
QString WordPeaks::file() const { return path_p; }
QString WordContent::file() const { return path_p; }
QString WordPeaks::content() const { return text_n; }
QString WordContent::content() const { return text_n; }
int WordPeaks::row() const { return row_n; }
int WordContent::row() const { return row_n; }
int WordPeaks::column() const { return col_n; }
int WordContent::column() const { return col_n; }
TokenContent::TokenContent(int r, int c, const QString& t, const QString& p, std::shared_ptr<const TokenDefine> type)
: row_n(r), col_n(c), text_n(t), path_p(p), type_def(type) {}
QString TokenContent::file() const { return path_p; }
QString TokenContent::content() const { return text_n; }
int TokenContent::row() const { return row_n; }
int TokenContent::column() const { return col_n; }
std::shared_ptr<const TokenDefine> TokenContent::define() const { return this->type_def; }
std::shared_ptr<const Token> TokenContent::nextToken() const
{
std::shared_ptr<const IWordBase> WordContent::nextWord() const {
return nullptr;
}
TokenImpl::TokenImpl(std::shared_ptr<const Token> content, std::shared_ptr<const Token> next)
: content_ptr(content), next_element(next) {}
WordImpl::WordImpl(std::shared_ptr<const IWordBase> content, std::shared_ptr<const IWordBase> next)
: content_ptr(content), next_ptr(next) {}
QString TokenImpl::file() const
QString WordImpl::file() const
{
return content_ptr->file();
}
QString TokenImpl::content() const
QString WordImpl::content() const
{
return content_ptr->content();
}
int TokenImpl::row() const
int WordImpl::row() const
{
return content_ptr->row();
}
int TokenImpl::column() const
int WordImpl::column() const
{
return content_ptr->column();
}
std::shared_ptr<const TokenDefine> TokenImpl::define() const
std::shared_ptr<const IWordBase> WordImpl::nextWord() const
{
return content_ptr->define();
}
std::shared_ptr<const Token> TokenImpl::nextToken() const
{
return next_element;
return next_ptr;
}

View File

@ -6,23 +6,10 @@
#include <memory>
namespace lib_token {
class TokenDefine;
/**
* @brief
*/
class LIBTOKEN_EXPORT TokenException {
private:
QString msg_store;
public:
TokenException(const QString& message);
virtual QString message() const;
};
/**
* @brief
*/
class WordBase {
class IWordBase {
public:
/**
* @brief
@ -44,25 +31,78 @@ namespace lib_token {
* @return
*/
virtual int column() const = 0;
/**
* @brief .
*
* \return
*/
virtual std::shared_ptr<const IWordBase> nextWord() const = 0;
};
/**
* @brief
*/
class WordContent : public IWordBase {
private:
int row_n, col_n;
QString text_n, path_p;
public:
WordContent(int r, int c, const QString& t, const QString& p);
// WordBase interface
public:
virtual QString file() const override;
virtual QString content() const override;
virtual int row() const override;
virtual int column() const override;
virtual std::shared_ptr<const IWordBase> nextWord() const override;
};
class WordImpl : public IWordBase {
private:
std::shared_ptr<const IWordBase> content_ptr;
std::shared_ptr<const IWordBase> next_ptr;
public:
WordImpl(std::shared_ptr<const IWordBase> content, std::shared_ptr<const IWordBase> next);
// 通过 IWordBase 继承
QString file() const override;
QString content() const override;
int row() const override;
int column() const override;
std::shared_ptr<const IWordBase> nextWord() const override;
};
/**
* @brief
*/
class LIBTOKEN_EXPORT TokenException {
private:
QString msg_store;
public:
TokenException(const QString& message);
virtual QString message() const;
};
class ITokenDefine;
/**
* @brief token解析结果
*/
class Token : public WordBase {
class IToken : public IWordBase {
public:
/**
* @brief token解析机制关联
* @return
*/
virtual std::shared_ptr<const TokenDefine> define() const = 0;
virtual std::shared_ptr<const Token> nextToken() const = 0;
virtual std::shared_ptr<const ITokenDefine> define() const = 0;
};
/**
* @brief token解析机制定义
*/
class TokenDefine {
class ITokenDefine {
public:
/**
* @brief
@ -86,85 +126,21 @@ namespace lib_token {
* @param content
* @return tuple<token/null,remains>
*/
virtual std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>> analysis(std::shared_ptr<const WordBase> content) const = 0;
virtual std::tuple<std::shared_ptr<const IToken>, std::shared_ptr<const IWordBase>> analysis(std::shared_ptr<const IWordBase> content) const = 0;
};
/**
* @brief
*/
class WordPeaks : public WordBase {
private:
int row_n, col_n;
QString text_n, path_p;
public:
WordPeaks(int r, int c, const QString& t, const QString& p);
// WordBase interface
public:
virtual QString file() const override;
virtual QString content() const override;
virtual int row() const override;
virtual int column() const override;
};
/**
* @brief
*/
class TokenContent : public Token {
private:
int row_n, col_n;
QString text_n, path_p;
std::shared_ptr<const TokenDefine> type_def;
public:
TokenContent(int r, int c, const QString& t, const QString& p, std::shared_ptr<const TokenDefine> type);
// WordBase interface
public:
virtual QString file() const override;
virtual QString content() const override;
virtual int row() const override;
virtual int column() const override;
// Token interface
public:
virtual std::shared_ptr<const TokenDefine> define() const override;
virtual std::shared_ptr<const Token> nextToken() const override;
};
class TokenImpl : public Token {
private:
std::shared_ptr<const Token> content_ptr;
std::shared_ptr<const Token> next_element;
public:
TokenImpl(std::shared_ptr<const Token> content, std::shared_ptr<const Token> next);
// 通过 Token 继承
QString file() const override;
QString content() const override;
int row() const override;
int column() const override;
std::shared_ptr<const TokenDefine> define() const override;
std::shared_ptr<const Token> nextToken() const override;
};
/**
* @brief
*/
class LIBTOKEN_EXPORT TokenReader {
private:
QList<std::shared_ptr<const TokenDefine>> rules_store;
QList<std::shared_ptr<const Token>> parse_line(int row, const QString& line_text, const QString& path) const;
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>> parse_token(std::shared_ptr<const WordBase> word) const;
QList<std::shared_ptr<const Token>> extract_from(const QString& path) const;
QList<std::shared_ptr<const IWordBase>> parse_line(int row, const QString& line_text, const QString& path) const;
QList<std::shared_ptr<const IWordBase>> extract_from(const QString& path) const;
public:
TokenReader(const QList<std::shared_ptr<const TokenDefine>> rulers);
std::shared_ptr<const Token> tokensWithin(const QString &path) const;
std::shared_ptr<const IWordBase> wordsFrom(const QString &path) const;
};
} // namespace lib_token

View File

@ -3,6 +3,25 @@
using namespace example_novel;
using namespace lib_token;
TokenContent::TokenContent(int r, int c, const QString& t, const QString& p, std::shared_ptr<const ITokenDefine> type)
: row_n(r), col_n(c), text_n(t), path_p(p), type_def(type) {}
QString TokenContent::file() const { return path_p; }
QString TokenContent::content() const { return text_n; }
int TokenContent::row() const { return row_n; }
int TokenContent::column() const { return col_n; }
std::shared_ptr<const IWordBase> lib_token::TokenContent::nextWord() const
{
return nullptr;
}
std::shared_ptr<const ITokenDefine> TokenContent::define() const { return this->type_def; }
QString LeftBracket::typeName() const { return u8"left-bracket"; }
int example_novel::LeftBracket::typeMark() const
@ -12,8 +31,8 @@ int example_novel::LeftBracket::typeMark() const
QString LeftBracket::regex() const { return u8"{"; }
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>>
LeftBracket::analysis(std::shared_ptr<const WordBase> content) const {
std::tuple<std::shared_ptr<const IToken>, std::shared_ptr<const IWordBase>>
LeftBracket::analysis(std::shared_ptr<const IWordBase> content) const {
auto text = content->content();
if (!text.startsWith(regex()))
return std::make_tuple(nullptr, content);
@ -22,7 +41,7 @@ LeftBracket::analysis(std::shared_ptr<const WordBase> content) const {
content->file(), shared_from_this());
auto t_remains = content->content().mid(regex().length());
if (t_remains.length() > 0) {
auto remains = std::make_shared<WordPeaks>(content->row(), content->column() + regex().length(), t_remains, content->file());
auto remains = std::make_shared<WordContent>(content->row(), content->column() + regex().length(), t_remains, content->file());
return std::make_tuple(token_inst, remains);
}
return std::make_tuple(token_inst, nullptr);
@ -57,8 +76,8 @@ int example_novel::Keywords::typeMark() const
QString Keywords::regex() const { return value_store; }
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>>
Keywords::analysis(std::shared_ptr<const WordBase> content) const {
std::tuple<std::shared_ptr<const IToken>, std::shared_ptr<const IWordBase>>
Keywords::analysis(std::shared_ptr<const IWordBase> content) const {
if (content->content() != regex()) {
return std::make_tuple(nullptr, content);
}
@ -85,8 +104,8 @@ int example_novel::Numbers::typeMark() const
QString Numbers::regex() const { return u8"^([0-9]+)$"; }
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>>
Numbers::analysis(std::shared_ptr<const WordBase> content) const {
std::tuple<std::shared_ptr<const IToken>, std::shared_ptr<const IWordBase>>
Numbers::analysis(std::shared_ptr<const IWordBase> content) const {
auto text = content->content();
QRegExp regx(regex());
if (regx.indexIn(text) == -1)
@ -105,8 +124,8 @@ int example_novel::VTextSection::typeMark() const
QString VTextSection::regex() const { return u8"^([^\\{\\}\\n@&]+)"; }
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase>>
VTextSection::analysis(std::shared_ptr<const WordBase> content) const {
std::tuple<std::shared_ptr<const IToken>, std::shared_ptr<const IWordBase>>
VTextSection::analysis(std::shared_ptr<const IWordBase> content) const {
auto text = content->content();
QRegExp regx(regex());
if (regx.indexIn(text) == -1) {
@ -118,7 +137,7 @@ VTextSection::analysis(std::shared_ptr<const WordBase> content) const {
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), match, content->file(), shared_from_this());
if (remains.length()) {
auto t_remains = std::make_shared<WordPeaks>(content->row(), content->column(), remains, content->file());
auto t_remains = std::make_shared<WordContent>(content->row(), content->column(), remains, content->file());
return std::make_tuple(tinst, t_remains);
}
return std::make_tuple(tinst, nullptr);
@ -142,7 +161,7 @@ int example_novel::NameSection::typeMark() const
QString NameSection::regex() const { return u8"^([^:\\{\\}\\n@&][^\\{\\}\\n@&]*)"; }
std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase> > NameSection::analysis(std::shared_ptr<const WordBase> content) const
std::tuple<std::shared_ptr<const IToken>, std::shared_ptr<const IWordBase> > NameSection::analysis(std::shared_ptr<const IWordBase> content) const
{
auto text = content->content();
QRegExp regx(regex());
@ -155,7 +174,39 @@ std::tuple<std::shared_ptr<const Token>, std::shared_ptr<const WordBase> > NameS
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), match, content->file(), shared_from_this());
if (remains.length()) {
auto t_remains = std::make_shared<WordPeaks>(content->row(), content->column(), remains, content->file());
auto t_remains = std::make_shared<WordContent>(content->row(), content->column(), remains, content->file());
return std::make_tuple(tinst, t_remains);
}
return std::make_tuple(tinst, nullptr);
}
QString example_novel::DeclareSymbo::typeName() const
{
return u8"ÉùÃ÷·û";
}
int example_novel::DeclareSymbo::typeMark() const
{
return 0x0A000000;
}
QString example_novel::DeclareSymbo::regex() const
{
return u8"#";
}
std::tuple<std::shared_ptr<const IToken>, std::shared_ptr<const IWordBase>> example_novel::DeclareSymbo::analysis(std::shared_ptr<const IWordBase> content) const
{
auto text = content->content();
if (content->column() != 1 && !text.startsWith(regex())) {
return std::make_tuple(nullptr, content);
}
auto remains = content->content().mid(regex().size());
auto tinst = std::make_shared<TokenContent>(content->row(), content->column(), u8"#", content->file(), shared_from_this());
if (remains.length()) {
auto t_remains = std::make_shared<WordContent>(content->row(), content->column()+1, remains, content->file());
return std::make_tuple(tinst, t_remains);
}
return std::make_tuple(tinst, nullptr);

View File

@ -7,15 +7,42 @@
#include <memory>
#include <tuple>
namespace lib_token {
/**
* @brief ´Ê·¨½âÎö³É¹û
*/
class TokenContent : public IToken {
private:
int row_n, col_n;
QString text_n, path_p;
std::shared_ptr<const ITokenDefine> type_def;
public:
TokenContent(int r, int c, const QString& t, const QString& p, std::shared_ptr<const ITokenDefine> type);
// WordBase interface
public:
virtual QString file() const override;
virtual QString content() const override;
virtual int row() const override;
virtual int column() const override;
virtual std::shared_ptr<const IWordBase> nextWord() const override;
// Token interface
public:
virtual std::shared_ptr<const ITokenDefine> define() const override;
};
}
namespace example_novel {
class LIBTOKEN_EXPORT LeftBracket : public lib_token::TokenDefine, public std::enable_shared_from_this<LeftBracket> {
class LIBTOKEN_EXPORT LeftBracket : public lib_token::ITokenDefine, public std::enable_shared_from_this<LeftBracket> {
// TokenDefine interface
public:
virtual QString typeName() const override;
virtual int typeMark() const override;
virtual QString regex() const override;
virtual std::tuple<std::shared_ptr<const lib_token::Token>, std::shared_ptr<const lib_token::WordBase>>
analysis(std::shared_ptr<const lib_token::WordBase> content) const override;
virtual std::tuple<std::shared_ptr<const lib_token::IToken>, std::shared_ptr<const lib_token::IWordBase>>
analysis(std::shared_ptr<const lib_token::IWordBase> content) const override;
};
class LIBTOKEN_EXPORT RightBracket : public LeftBracket {
@ -46,7 +73,7 @@ namespace example_novel {
virtual QString regex() const override;
};
class LIBTOKEN_EXPORT Keywords : public lib_token::TokenDefine, public std::enable_shared_from_this<Keywords> {
class LIBTOKEN_EXPORT Keywords : public lib_token::ITokenDefine, public std::enable_shared_from_this<Keywords> {
private:
QString value_store, name_store;
int type_code;
@ -59,38 +86,49 @@ namespace example_novel {
virtual QString typeName() const override;
virtual int typeMark() const override;
virtual QString regex() const override;
virtual std::tuple<std::shared_ptr<const lib_token::Token>, std::shared_ptr<const lib_token::WordBase>>
analysis(std::shared_ptr<const lib_token::WordBase> content) const override;
virtual std::tuple<std::shared_ptr<const lib_token::IToken>, std::shared_ptr<const lib_token::IWordBase>>
analysis(std::shared_ptr<const lib_token::IWordBase> content) const override;
};
class LIBTOKEN_EXPORT Numbers : public lib_token::TokenDefine, public std::enable_shared_from_this<Numbers> {
class LIBTOKEN_EXPORT Numbers : public lib_token::ITokenDefine, public std::enable_shared_from_this<Numbers> {
// TokenDefine interface
public:
virtual QString typeName() const override;
virtual int typeMark() const override;
virtual QString regex() const override;
virtual std::tuple<std::shared_ptr<const lib_token::Token>, std::shared_ptr<const lib_token::WordBase>>
analysis(std::shared_ptr<const lib_token::WordBase> content) const override;
virtual std::tuple<std::shared_ptr<const lib_token::IToken>, std::shared_ptr<const lib_token::IWordBase>>
analysis(std::shared_ptr<const lib_token::IWordBase> content) const override;
};
class LIBTOKEN_EXPORT NameSection : public lib_token::TokenDefine, public std::enable_shared_from_this<NameSection> {
class LIBTOKEN_EXPORT NameSection : public lib_token::ITokenDefine, public std::enable_shared_from_this<NameSection> {
// TokenDefine interface
public:
virtual QString typeName() const override;
virtual int typeMark() const override;
virtual QString regex() const override;
virtual std::tuple<std::shared_ptr<const lib_token::Token>, std::shared_ptr<const lib_token::WordBase>>
analysis(std::shared_ptr<const lib_token::WordBase> content) const override;
virtual std::tuple<std::shared_ptr<const lib_token::IToken>, std::shared_ptr<const lib_token::IWordBase>>
analysis(std::shared_ptr<const lib_token::IWordBase> content) const override;
};
class LIBTOKEN_EXPORT VTextSection : public lib_token::TokenDefine, public std::enable_shared_from_this<VTextSection> {
class LIBTOKEN_EXPORT VTextSection : public lib_token::ITokenDefine, public std::enable_shared_from_this<VTextSection> {
// TokenDefine interface
public:
virtual QString typeName() const override;
virtual int typeMark() const override;
virtual QString regex() const override;
virtual std::tuple<std::shared_ptr<const lib_token::Token>, std::shared_ptr<const lib_token::WordBase>>
analysis(std::shared_ptr<const lib_token::WordBase> content) const override;
virtual std::tuple<std::shared_ptr<const lib_token::IToken>, std::shared_ptr<const lib_token::IWordBase>>
analysis(std::shared_ptr<const lib_token::IWordBase> content) const override;
};
class LIBTOKEN_EXPORT DeclareSymbo : public lib_token::ITokenDefine, public std::enable_shared_from_this<DeclareSymbo> {
public:
// ͨ¹ý TokenDefine ¼Ì³Ð
virtual QString typeName() const override;
virtual int typeMark() const override;
virtual QString regex() const override;
virtual std::tuple<std::shared_ptr<const lib_token::IToken>, std::shared_ptr<const lib_token::IWordBase>>
analysis(std::shared_ptr<const lib_token::IWordBase> content) const override;
};
} // namespace example_novel