Refactored tokenizer to lazily evaluate file locations.

This commit is contained in:
2017-06-18 18:15:04 +02:00
parent 04dffdb09e
commit 4c0583c91f
30 changed files with 420 additions and 260 deletions

View File

@@ -0,0 +1,150 @@
#include <tokenize/Location.h>
#include <algorithm>
#include <tokenize/Stream.h>
namespace tokenize
{
////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Location
//
////////////////////////////////////////////////////////////////////////////////////////////////////
Location::Location(Stream &stream)
: m_stream{stream},
m_position{stream.position()}
{
}
////////////////////////////////////////////////////////////////////////////////////////////////////
Location::Location(Stream &stream, StreamPosition position)
: m_stream{stream},
m_position{position}
{
}
////////////////////////////////////////////////////////////////////////////////////////////////////
const char *Location::sectionStart() const
{
if (!m_isInitialized)
initializeLazily();
return m_sectionStart;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
const char *Location::sectionEnd() const
{
if (!m_isInitialized)
initializeLazily();
return m_sectionEnd;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
StreamPosition Location::rowStart() const
{
if (!m_isInitialized)
initializeLazily();
return m_rowStart;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
StreamPosition Location::rowEnd() const
{
if (!m_isInitialized)
initializeLazily();
return m_rowEnd;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
StreamPosition Location::columnStart() const
{
if (!m_isInitialized)
initializeLazily();
return m_columnStart;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
StreamPosition Location::columnEnd() const
{
if (!m_isInitialized)
initializeLazily();
return m_columnEnd;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
void Location::initializeLazily() const
{
const auto previousPosition = m_stream.position();
const auto &delimiters = m_stream.delimiters();
// Find current section
auto currentFile = std::find_if(delimiters.crbegin(), delimiters.crend(),
[&](const auto &fileDelimiter)
{
return m_position >= fileDelimiter.position;
});
// If the tokenizer is at the end of the stream, still count from the beginning of the last section
if (currentFile == delimiters.crend())
currentFile = delimiters.crbegin();
// Go back to beginning of section
m_stream.seek(currentFile->position);
StreamPosition row{1};
StreamPosition column{1};
// Compute the location character by character
while (true)
{
if (m_stream.atEnd())
break;
else if (m_stream.position() >= m_position)
break;
const auto character = m_stream.currentCharacter();
if (character == '\n')
{
row++;
column = 1;
}
else if (std::isblank(character) || std::isprint(character))
column++;
m_stream.advance();
}
m_sectionStart = currentFile->sectionName.c_str();
m_sectionEnd = currentFile->sectionName.c_str();
m_rowStart = row;
m_rowEnd = row;
m_columnStart = column;
m_columnEnd = column;
m_isInitialized = true;
m_stream.seek(previousPosition);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
}

View File

@@ -1,6 +1,5 @@
#include <tokenize/Stream.h>
#include <algorithm>
#include <fstream>
namespace tokenize
@@ -12,12 +11,7 @@ namespace tokenize
//
////////////////////////////////////////////////////////////////////////////////////////////////////
const Stream::Position Stream::InvalidPosition{std::numeric_limits<Position>::max()};
////////////////////////////////////////////////////////////////////////////////////////////////////
Stream::Stream()
: m_position{0}
{
std::setlocale(LC_NUMERIC, "C");
}
@@ -34,18 +28,16 @@ Stream::Stream(std::string streamName, std::istream &istream)
void Stream::read(std::string streamName, std::istream &istream)
{
// Store position of new section
const auto position = m_stream.size();
m_delimiters.push_back({m_stream.size(), streamName});
m_delimiters.push_back({position, streamName});
istream.seekg(0, std::ios::end);
/*istream.seekg(0, std::ios::end);
const auto streamSize = istream.tellg();
istream.seekg(0, std::ios::beg);
const auto startPosition = m_stream.size();
m_stream.resize(m_stream.size() + streamSize);
std::copy(std::istreambuf_iterator<char>(istream), std::istreambuf_iterator<char>(), m_stream.begin() + startPosition);
m_stream.resize(m_stream.size() + streamSize);*/
std::copy(std::istreambuf_iterator<char>(istream), std::istreambuf_iterator<char>(), std::back_inserter(m_stream));
}
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -69,65 +61,18 @@ void Stream::reset()
////////////////////////////////////////////////////////////////////////////////////////////////////
void Stream::seek(Position position)
void Stream::seek(StreamPosition position)
{
m_position = position;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
typename Stream::Position Stream::position() const
StreamPosition Stream::position() const
{
return m_position;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
Location Stream::location() const
{
const auto currentPosition = position();
// Find current section
auto currentFile = std::find_if(m_delimiters.crbegin(), m_delimiters.crend(),
[&](const auto &fileDelimiter)
{
return currentPosition >= fileDelimiter.position;
});
// If the tokenizer is at the end of the stream, still count from the beginning of the last section
if (currentFile == m_delimiters.crend())
currentFile = m_delimiters.crbegin();
// Go back to beginning of section
m_position = currentFile->position;
size_t row = 1;
size_t column = 1;
// Compute the location character by character
while (true)
{
if (currentPosition >= m_stream.size() && atEnd())
break;
else if (currentPosition < m_stream.size() && position() >= currentPosition)
break;
const auto character = currentCharacter();
if (character == '\n')
{
row++;
column = 1;
}
else if (std::isblank(character) || std::isprint(character))
column++;
m_position++;
}
return {currentFile->sectionName.c_str(), currentFile->sectionName.c_str(), row, row, column, column};
}
////////////////////////////////////////////////////////////////////////////////////////////////////
}