WsParser_VS/libToken/libtoken.cpp

114 lines
3.0 KiB
C++
Raw Normal View History

2024-03-17 07:58:28 +00:00
#include "libtoken.h"
#include <QFile>
#include <QTextStream>
using namespace lib_token;
2024-06-20 11:18:41 +00:00
std::shared_ptr<const IWordBase> WordReader::wordsFrom(const QString& path) const {
2024-06-18 13:17:06 +00:00
auto content_list = extract_from(path);
if (!content_list.size())
return nullptr;
2024-06-20 04:49:26 +00:00
std::shared_ptr<const IWordBase> prev_ptr = std::make_shared<const WordImpl>(content_list.last(), nullptr);
2024-06-18 13:17:06 +00:00
for (auto idx = content_list.size() - 2; idx >=0; --idx) {
auto content_ptr = content_list[idx];
2024-06-20 04:49:26 +00:00
prev_ptr = std::make_shared<const WordImpl>(content_ptr, prev_ptr);
2024-06-18 13:17:06 +00:00
}
return prev_ptr;
}
2024-06-20 11:18:41 +00:00
QList<std::shared_ptr<const IWordBase>> WordReader::extract_from(const QString& path) const {
2024-06-18 13:17:06 +00:00
QFile file(path);
if (!file.open(QIODevice::ReadOnly | QIODevice::Text)) {
throw new TokenException(u8"Lex[0x0000]ָ<><D6B8><EFBFBD>ļ<EFBFBD><C4BC>޷<EFBFBD><DEB7>򿪣<EFBFBD>" + path);
}
QTextStream tin(&file);
tin.setCodec("UTF-8");
2024-06-20 04:49:26 +00:00
QList<std::shared_ptr<const IWordBase>> ret_list;
2024-06-18 13:17:06 +00:00
int line_number = 1;
while (!tin.atEnd()) {
uint64_t relative_offset = line_number;
relative_offset = relative_offset << 32;
2024-06-18 13:17:06 +00:00
auto line = tin.readLine() + "\n";
ret_list.append(this->parse_line(relative_offset, line_number++, line, path));
2024-06-18 13:17:06 +00:00
}
return ret_list;
2024-03-17 07:58:28 +00:00
}
2024-06-20 04:49:26 +00:00
#include <QRegExp>
QList<std::shared_ptr<const IWordBase>> WordReader::parse_line(uint64_t start_pos, int row, const QString& line_text, const QString& path) const {
2024-06-20 04:49:26 +00:00
QRegExp split_char(u8"\\s");
auto words = line_text.split(split_char, QString::SplitBehavior::SkipEmptyParts);
2024-06-18 13:17:06 +00:00
2024-06-20 04:49:26 +00:00
QList<std::shared_ptr<const IWordBase>> primary_words;
2024-06-18 13:17:06 +00:00
int columns_offset = 0;
for (auto& w : words) {
auto column_start = line_text.indexOf(w, columns_offset);
auto token = std::make_shared<WordContent>(row, column_start + 1, start_pos+column_start, w, path);
2024-06-18 13:17:06 +00:00
primary_words << token;
columns_offset = column_start + w.length();
}
2024-06-20 04:49:26 +00:00
return primary_words;
2024-03-17 07:58:28 +00:00
}
TokenException::TokenException(const QString& message) : msg_store(message) {}
QString TokenException::message() const { return msg_store; }
WordContent::WordContent(int r, int c, uint64_t pos, const QString& t, const QString& p)
: row_n(r), col_n(c), doc_offset(pos), text_n(t), path_p(p) {}
2024-03-17 07:58:28 +00:00
2024-06-20 04:49:26 +00:00
QString WordContent::file() const { return path_p; }
2024-03-17 07:58:28 +00:00
uint64_t lib_token::WordContent::position() const {
return doc_offset;
}
2024-06-20 04:49:26 +00:00
QString WordContent::content() const { return text_n; }
2024-03-17 07:58:28 +00:00
2024-06-20 04:49:26 +00:00
int WordContent::row() const { return row_n; }
2024-03-17 07:58:28 +00:00
2024-06-20 04:49:26 +00:00
int WordContent::column() const { return col_n; }
2024-06-18 13:17:06 +00:00
2024-06-20 04:49:26 +00:00
std::shared_ptr<const IWordBase> WordContent::nextWord() const {
2024-06-18 13:17:06 +00:00
return nullptr;
}
2024-03-17 07:58:28 +00:00
2024-06-20 04:49:26 +00:00
WordImpl::WordImpl(std::shared_ptr<const IWordBase> content, std::shared_ptr<const IWordBase> next)
: content_ptr(content), next_ptr(next) {}
2024-06-18 13:17:06 +00:00
2024-06-20 04:49:26 +00:00
QString WordImpl::file() const
2024-06-18 13:17:06 +00:00
{
return content_ptr->file();
}
uint64_t lib_token::WordImpl::position() const {
return content_ptr->position();
}
2024-06-20 04:49:26 +00:00
QString WordImpl::content() const
2024-06-18 13:17:06 +00:00
{
return content_ptr->content();
}
2024-06-20 04:49:26 +00:00
int WordImpl::row() const
2024-06-18 13:17:06 +00:00
{
return content_ptr->row();
}
2024-06-20 04:49:26 +00:00
int WordImpl::column() const
2024-06-18 13:17:06 +00:00
{
return content_ptr->column();
}
2024-06-20 04:49:26 +00:00
std::shared_ptr<const IWordBase> WordImpl::nextWord() const
2024-06-18 13:17:06 +00:00
{
2024-06-20 04:49:26 +00:00
return next_ptr;
2024-06-18 13:17:06 +00:00
}