94 lines
2.7 KiB
C++
94 lines
2.7 KiB
C++
|
|
#include "lex_foundation.h"
|
||
|
|
#include "tokeniimpl.h"
|
||
|
|
#include <tuple>
|
||
|
|
|
||
|
|
using namespace Lex;
|
||
|
|
|
||
|
|
TokensReader::TokensReader(QList<TokenDef *> sequence) { analysis_sequences = sequence; }
|
||
|
|
|
||
|
|
QList<Token *> TokensReader::getTokensOfDocument(const QFileInfo &file) {
|
||
|
|
auto batch_row = 0;
|
||
|
|
QList<Token *> list;
|
||
|
|
|
||
|
|
QFile byte_input(file.canonicalFilePath());
|
||
|
|
if (!byte_input.open(QIODevice::Text | QIODevice::ReadOnly))
|
||
|
|
throw new LexException("指定文件无法打开:" + file.canonicalFilePath());
|
||
|
|
|
||
|
|
QTextStream source(&byte_input);
|
||
|
|
source.setCodec("UTF-8");
|
||
|
|
|
||
|
|
while (!source.atEnd()) {
|
||
|
|
auto line = source.readLine();
|
||
|
|
list.append(get_tokens_of_line(file, line, batch_row));
|
||
|
|
batch_row++;
|
||
|
|
}
|
||
|
|
|
||
|
|
return list;
|
||
|
|
}
|
||
|
|
|
||
|
|
QList<Token *> TokensReader::getTokensOfContents(const QByteArray &buff, const QFileInfo &_file) {
|
||
|
|
auto batch_row = 0;
|
||
|
|
QList<Token *> list;
|
||
|
|
|
||
|
|
QTextStream source(buff, QIODevice::ReadOnly);
|
||
|
|
source.setCodec("UTF-8");
|
||
|
|
|
||
|
|
while (!source.atEnd()) {
|
||
|
|
auto line = source.readLine();
|
||
|
|
list.append(get_tokens_of_line(_file, line, batch_row));
|
||
|
|
batch_row++;
|
||
|
|
}
|
||
|
|
|
||
|
|
return list;
|
||
|
|
}
|
||
|
|
|
||
|
|
QList<Token *> TokensReader::get_tokens_of_line(const QFileInfo &associate, const QString &line, int row) {
|
||
|
|
auto split_seqs = line.split(" ", QString::SplitBehavior::SkipEmptyParts);
|
||
|
|
auto batch_column = 0;
|
||
|
|
|
||
|
|
// 转换单行的内容为源列表
|
||
|
|
QList<WordBase *> source_sequences;
|
||
|
|
for (auto &it : split_seqs) {
|
||
|
|
auto inst = new TokenWord(associate.canonicalFilePath());
|
||
|
|
source_sequences.append(inst);
|
||
|
|
|
||
|
|
auto start_index = line.indexOf(it, batch_column);
|
||
|
|
inst->reset(it, row, start_index);
|
||
|
|
batch_column = start_index + it.length();
|
||
|
|
}
|
||
|
|
|
||
|
|
// 对单行的所有的内容进行解析
|
||
|
|
QList<Token *> results;
|
||
|
|
for (auto idx = 0; idx < source_sequences.size(); ++idx) {
|
||
|
|
// 对单个词语进行解析
|
||
|
|
auto inst = source_sequences[idx];
|
||
|
|
|
||
|
|
auto retv = get_token(*inst);
|
||
|
|
results.append(retv);
|
||
|
|
|
||
|
|
// 如果存在未解析的剩余的内容
|
||
|
|
if (retv->remains())
|
||
|
|
source_sequences.insert(idx + 1, retv->remains());
|
||
|
|
|
||
|
|
delete inst;
|
||
|
|
}
|
||
|
|
|
||
|
|
return results;
|
||
|
|
}
|
||
|
|
|
||
|
|
Token *TokensReader::get_token(const WordBase &word) {
|
||
|
|
for (auto &it : analysis_sequences) {
|
||
|
|
auto lex_result = it->analysis(word);
|
||
|
|
if (lex_result)
|
||
|
|
return lex_result;
|
||
|
|
}
|
||
|
|
|
||
|
|
throw new LexException(QString("指定的词语无法解析:%1 <row:%2,col:%3>").arg(word.content()).arg(word.row()).arg(word.column()));
|
||
|
|
}
|
||
|
|
|
||
|
|
LexException::LexException(const QString &msg) { this->msg_store = msg; }
|
||
|
|
|
||
|
|
QString LexException::message() { return msg_store; }
|
||
|
|
|
||
|
|
const char *LexException::what() const { return msg_store.toLocal8Bit(); }
|