2017-05-12 14:17:57 +02:00
|
|
|
|
#include <tokenize/Stream.h>
|
2016-08-02 19:58:54 +02:00
|
|
|
|
|
2017-05-09 15:05:59 +02:00
|
|
|
|
#include <algorithm>
|
2016-08-02 19:58:54 +02:00
|
|
|
|
#include <fstream>
|
|
|
|
|
|
2017-05-12 14:17:57 +02:00
|
|
|
|
#include <tokenize/TokenizerException.h>
|
2016-11-29 06:03:05 +01:00
|
|
|
|
|
2017-05-12 14:17:57 +02:00
|
|
|
|
namespace tokenize
|
2016-08-02 19:58:54 +02:00
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
//
|
|
|
|
|
// Stream
|
|
|
|
|
//
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
Stream::Stream()
|
|
|
|
|
{
|
|
|
|
|
std::setlocale(LC_NUMERIC, "C");
|
|
|
|
|
|
|
|
|
|
// Don’t skip whitespace
|
|
|
|
|
m_stream.exceptions(std::istream::badbit);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
Stream::Stream(std::string streamName, std::istream &istream)
|
|
|
|
|
{
|
|
|
|
|
read(streamName, istream);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
void Stream::read(std::string streamName, std::istream &istream)
|
|
|
|
|
{
|
|
|
|
|
// Store position of new section
|
|
|
|
|
const auto position = m_stream.tellp();
|
|
|
|
|
|
|
|
|
|
m_delimiters.push_back({position, streamName});
|
|
|
|
|
|
|
|
|
|
m_stream << istream.rdbuf();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
2017-05-09 15:05:59 +02:00
|
|
|
|
void Stream::read(const std::experimental::filesystem::path &path)
|
2016-08-02 19:58:54 +02:00
|
|
|
|
{
|
2017-05-09 15:05:59 +02:00
|
|
|
|
if (!std::experimental::filesystem::is_regular_file(path))
|
2016-08-02 19:58:54 +02:00
|
|
|
|
throw std::runtime_error("File does not exist: “" + path.string() + "”");
|
|
|
|
|
|
|
|
|
|
std::ifstream fileStream(path.string(), std::ios::in);
|
|
|
|
|
|
|
|
|
|
read(path.string(), fileStream);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
void Stream::reset()
|
|
|
|
|
{
|
|
|
|
|
m_stream.clear();
|
|
|
|
|
seek(0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
void Stream::seek(Position position)
|
|
|
|
|
{
|
|
|
|
|
m_stream.clear();
|
|
|
|
|
m_stream.seekg(position);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
typename Stream::Position Stream::position() const
|
|
|
|
|
{
|
|
|
|
|
return m_stream.tellg();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
2016-11-29 06:03:05 +01:00
|
|
|
|
Location Stream::location() const
|
2016-08-02 19:58:54 +02:00
|
|
|
|
{
|
|
|
|
|
const auto currentPosition = position();
|
|
|
|
|
|
|
|
|
|
// Find current section
|
|
|
|
|
auto currentFile = std::find_if(m_delimiters.crbegin(), m_delimiters.crend(),
|
|
|
|
|
[&](const auto &fileDelimiter)
|
|
|
|
|
{
|
|
|
|
|
return currentPosition >= fileDelimiter.position;
|
|
|
|
|
});
|
|
|
|
|
|
2017-05-12 14:17:57 +02:00
|
|
|
|
// If the tokenizer is at the end of the stream, still count from the beginning of the last section
|
2016-08-02 19:58:54 +02:00
|
|
|
|
if (currentFile == m_delimiters.crend())
|
|
|
|
|
currentFile = m_delimiters.crbegin();
|
|
|
|
|
|
|
|
|
|
// Go back to beginning of section
|
|
|
|
|
m_stream.clear();
|
|
|
|
|
m_stream.seekg(currentFile->position);
|
|
|
|
|
|
|
|
|
|
size_t row = 1;
|
|
|
|
|
size_t column = 1;
|
|
|
|
|
|
2016-11-29 06:03:05 +01:00
|
|
|
|
// Compute the location character by character
|
2016-08-02 19:58:54 +02:00
|
|
|
|
while (true)
|
|
|
|
|
{
|
|
|
|
|
if (currentPosition == -1 && atEnd())
|
|
|
|
|
break;
|
|
|
|
|
else if (currentPosition >= 0 && position() >= currentPosition)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
const auto character = currentCharacter();
|
|
|
|
|
|
|
|
|
|
if (character == '\n')
|
|
|
|
|
{
|
|
|
|
|
row++;
|
|
|
|
|
column = 1;
|
|
|
|
|
}
|
|
|
|
|
else if (std::isblank(character) || std::isprint(character))
|
|
|
|
|
column++;
|
|
|
|
|
|
|
|
|
|
m_stream.ignore(1);
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-29 06:03:05 +01:00
|
|
|
|
return {currentFile->sectionName.c_str(), currentFile->sectionName.c_str(), row, row, column, column};
|
2016-08-02 19:58:54 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
char Stream::currentCharacter() const
|
|
|
|
|
{
|
|
|
|
|
return m_stream.peek();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
bool Stream::atEnd() const
|
|
|
|
|
{
|
|
|
|
|
return position() == -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
void Stream::check() const
|
|
|
|
|
{
|
|
|
|
|
if (atEnd())
|
2017-05-12 14:17:57 +02:00
|
|
|
|
throw TokenizerException(location(), "reading past end of file");
|
2016-08-02 19:58:54 +02:00
|
|
|
|
|
|
|
|
|
if (m_stream.fail())
|
2017-05-12 14:17:57 +02:00
|
|
|
|
throw TokenizerException(location());
|
2016-08-02 19:58:54 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
void Stream::advance()
|
|
|
|
|
{
|
|
|
|
|
check();
|
|
|
|
|
m_stream.ignore(1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
}
|