Refactored tokenizer to lazily evaluate file locations.

This commit is contained in:
2017-06-18 18:15:04 +02:00
parent 04dffdb09e
commit 4c0583c91f
30 changed files with 420 additions and 260 deletions

View File

@@ -1,7 +1,7 @@
#ifndef __TOKENIZE__LOCATION_H
#define __TOKENIZE__LOCATION_H
#include <cstdlib>
#include <tokenize/StreamPosition.h>
namespace tokenize
{
@@ -12,16 +12,41 @@ namespace tokenize
//
////////////////////////////////////////////////////////////////////////////////////////////////////
struct Location
class Stream;
////////////////////////////////////////////////////////////////////////////////////////////////////
class Location
{
const char *sectionStart = nullptr;
const char *sectionEnd = nullptr;
public:
Location(Stream &stream);
Location(Stream &stream, StreamPosition position);
std::size_t rowStart = -1;
std::size_t rowEnd = -1;
const char *sectionStart() const;
const char *sectionEnd() const;
std::size_t columnStart = -1;
std::size_t columnEnd = -1;
StreamPosition rowStart() const;
StreamPosition rowEnd() const;
StreamPosition columnStart() const;
StreamPosition columnEnd() const;
private:
void initializeLazily() const;
Stream &m_stream;
const StreamPosition m_position;
mutable bool m_isInitialized{false};
mutable const char *m_sectionStart{nullptr};
mutable const char *m_sectionEnd{nullptr};
mutable StreamPosition m_rowStart{InvalidStreamPosition};
mutable StreamPosition m_rowEnd{InvalidStreamPosition};
mutable StreamPosition m_columnStart{InvalidStreamPosition};
mutable StreamPosition m_columnEnd{InvalidStreamPosition};
};
////////////////////////////////////////////////////////////////////////////////////////////////////

View File

@@ -9,6 +9,7 @@
#include <vector>
#include <tokenize/Location.h>
#include <tokenize/StreamPosition.h>
#include <tokenize/TokenizerException.h>
namespace tokenize
@@ -23,12 +24,9 @@ namespace tokenize
class Stream
{
public:
using Position = size_t;
static const Position InvalidPosition;
struct Delimiter
{
Position position;
StreamPosition position;
std::string sectionName;
};
@@ -46,15 +44,17 @@ class Stream
void read(const std::experimental::filesystem::path &path);
void reset();
void seek(Position position);
Position position() const;
Location location() const;
void seek(StreamPosition position);
StreamPosition position() const;
char currentCharacter() const
const std::vector<Delimiter> &delimiters() const
{
assert(m_position < m_stream.size());
return m_delimiters;
}
// TODO: check if this should be secured by check()
char currentCharacter()
{
check();
return m_stream[m_position];
}
@@ -69,15 +69,15 @@ class Stream
return m_position >= m_stream.size();
}
void check() const
void check()
{
if (atEnd())
throw TokenizerException(location(), "reading past end of file");
throw TokenizerException(*this, "reading past end of file");
}
protected:
std::string m_stream;
mutable Position m_position;
mutable StreamPosition m_position{0};
std::vector<Delimiter> m_delimiters;
};

View File

@@ -0,0 +1,23 @@
#ifndef __TOKENIZE__STREAM_POSITION_H
#define __TOKENIZE__STREAM_POSITION_H
#include <cstddef>
#include <limits>
namespace tokenize
{
////////////////////////////////////////////////////////////////////////////////////////////////////
//
// StreamPosition
//
////////////////////////////////////////////////////////////////////////////////////////////////////
using StreamPosition = size_t;
static const StreamPosition InvalidStreamPosition{std::numeric_limits<StreamPosition>::max()};
////////////////////////////////////////////////////////////////////////////////////////////////////
}
#endif

View File

@@ -189,7 +189,7 @@ void Tokenizer<TokenizerPolicy>::expect(const Type &expectedValue)
std::stringstream message;
message << "unexpected value, expected “" << expectedValue << "";
throw TokenizerException(location(), message.str());
throw TokenizerException(*this, message.str());
}
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -208,7 +208,7 @@ std::string Tokenizer<TokenizerPolicy>::getIdentifier()
if (!TokenizerPolicy::isIdentifierCharacter(character))
{
if (value.empty())
throw TokenizerException(location(), "could not parse identifier");
throw TokenizerException(*this, "could not parse identifier");
return value;
}
@@ -406,7 +406,7 @@ uint64_t Tokenizer<TokenizerPolicy>::getIntegerBody()
check();
if (!std::isdigit(currentCharacter()))
throw TokenizerException(location(), "could not read integer value");
throw TokenizerException(*this, "could not read integer value");
uint64_t value = 0;
@@ -448,7 +448,7 @@ uint64_t Tokenizer<TokenizerPolicy>::getImpl(Tag<uint64_t>)
skipWhiteSpace();
if (currentCharacter() == '-')
throw TokenizerException(location(), "expected unsigned integer, got signed one");
throw TokenizerException(*this, "expected unsigned integer, got signed one");
return getIntegerBody();
}
@@ -482,7 +482,7 @@ bool Tokenizer<TokenizerPolicy>::getImpl(Tag<bool>)
if (testAndSkip<char>('1'))
return true;
throw TokenizerException(location(), "could not read Boolean value");
throw TokenizerException(*this, "could not read Boolean value");
}
////////////////////////////////////////////////////////////////////////////////////////////////////

View File

@@ -30,10 +30,7 @@ class TokenizerException: public std::exception
explicit TokenizerException(const Location &location, const std::string &message)
: m_location{location},
m_message{message},
// TODO: refactor
m_plainMessage{std::string(m_location.sectionStart) + ":" + std::to_string(m_location.rowStart)
+ ":" + std::to_string(m_location.columnStart) + " " + m_message}
m_message{message}
{
}
@@ -41,7 +38,7 @@ class TokenizerException: public std::exception
const char *what() const noexcept
{
return m_plainMessage.c_str();
return m_message.c_str();
}
const Location &location() const
@@ -57,7 +54,6 @@ class TokenizerException: public std::exception
private:
Location m_location;
std::string m_message;
std::string m_plainMessage;
};
////////////////////////////////////////////////////////////////////////////////////////////////////