Refactored tokenizer to lazily evaluate file locations.
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
#ifndef __TOKENIZE__LOCATION_H
|
||||
#define __TOKENIZE__LOCATION_H
|
||||
|
||||
#include <cstdlib>
|
||||
#include <tokenize/StreamPosition.h>
|
||||
|
||||
namespace tokenize
|
||||
{
|
||||
@@ -12,16 +12,41 @@ namespace tokenize
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct Location
|
||||
class Stream;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class Location
|
||||
{
|
||||
const char *sectionStart = nullptr;
|
||||
const char *sectionEnd = nullptr;
|
||||
public:
|
||||
Location(Stream &stream);
|
||||
Location(Stream &stream, StreamPosition position);
|
||||
|
||||
std::size_t rowStart = -1;
|
||||
std::size_t rowEnd = -1;
|
||||
const char *sectionStart() const;
|
||||
const char *sectionEnd() const;
|
||||
|
||||
std::size_t columnStart = -1;
|
||||
std::size_t columnEnd = -1;
|
||||
StreamPosition rowStart() const;
|
||||
StreamPosition rowEnd() const;
|
||||
|
||||
StreamPosition columnStart() const;
|
||||
StreamPosition columnEnd() const;
|
||||
|
||||
private:
|
||||
void initializeLazily() const;
|
||||
|
||||
Stream &m_stream;
|
||||
const StreamPosition m_position;
|
||||
|
||||
mutable bool m_isInitialized{false};
|
||||
|
||||
mutable const char *m_sectionStart{nullptr};
|
||||
mutable const char *m_sectionEnd{nullptr};
|
||||
|
||||
mutable StreamPosition m_rowStart{InvalidStreamPosition};
|
||||
mutable StreamPosition m_rowEnd{InvalidStreamPosition};
|
||||
|
||||
mutable StreamPosition m_columnStart{InvalidStreamPosition};
|
||||
mutable StreamPosition m_columnEnd{InvalidStreamPosition};
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@@ -9,6 +9,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include <tokenize/Location.h>
|
||||
#include <tokenize/StreamPosition.h>
|
||||
#include <tokenize/TokenizerException.h>
|
||||
|
||||
namespace tokenize
|
||||
@@ -23,12 +24,9 @@ namespace tokenize
|
||||
class Stream
|
||||
{
|
||||
public:
|
||||
using Position = size_t;
|
||||
static const Position InvalidPosition;
|
||||
|
||||
struct Delimiter
|
||||
{
|
||||
Position position;
|
||||
StreamPosition position;
|
||||
std::string sectionName;
|
||||
};
|
||||
|
||||
@@ -46,15 +44,17 @@ class Stream
|
||||
void read(const std::experimental::filesystem::path &path);
|
||||
|
||||
void reset();
|
||||
void seek(Position position);
|
||||
Position position() const;
|
||||
Location location() const;
|
||||
void seek(StreamPosition position);
|
||||
StreamPosition position() const;
|
||||
|
||||
char currentCharacter() const
|
||||
const std::vector<Delimiter> &delimiters() const
|
||||
{
|
||||
assert(m_position < m_stream.size());
|
||||
return m_delimiters;
|
||||
}
|
||||
|
||||
// TODO: check if this should be secured by check()
|
||||
char currentCharacter()
|
||||
{
|
||||
check();
|
||||
return m_stream[m_position];
|
||||
}
|
||||
|
||||
@@ -69,15 +69,15 @@ class Stream
|
||||
return m_position >= m_stream.size();
|
||||
}
|
||||
|
||||
void check() const
|
||||
void check()
|
||||
{
|
||||
if (atEnd())
|
||||
throw TokenizerException(location(), "reading past end of file");
|
||||
throw TokenizerException(*this, "reading past end of file");
|
||||
}
|
||||
|
||||
protected:
|
||||
std::string m_stream;
|
||||
mutable Position m_position;
|
||||
mutable StreamPosition m_position{0};
|
||||
|
||||
std::vector<Delimiter> m_delimiters;
|
||||
};
|
||||
|
23
lib/tokenize/include/tokenize/StreamPosition.h
Normal file
23
lib/tokenize/include/tokenize/StreamPosition.h
Normal file
@@ -0,0 +1,23 @@
|
||||
#ifndef __TOKENIZE__STREAM_POSITION_H
|
||||
#define __TOKENIZE__STREAM_POSITION_H
|
||||
|
||||
#include <cstddef>
|
||||
#include <limits>
|
||||
|
||||
namespace tokenize
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// StreamPosition
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
using StreamPosition = size_t;
|
||||
static const StreamPosition InvalidStreamPosition{std::numeric_limits<StreamPosition>::max()};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@@ -189,7 +189,7 @@ void Tokenizer<TokenizerPolicy>::expect(const Type &expectedValue)
|
||||
std::stringstream message;
|
||||
message << "unexpected value, expected “" << expectedValue << "”";
|
||||
|
||||
throw TokenizerException(location(), message.str());
|
||||
throw TokenizerException(*this, message.str());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -208,7 +208,7 @@ std::string Tokenizer<TokenizerPolicy>::getIdentifier()
|
||||
if (!TokenizerPolicy::isIdentifierCharacter(character))
|
||||
{
|
||||
if (value.empty())
|
||||
throw TokenizerException(location(), "could not parse identifier");
|
||||
throw TokenizerException(*this, "could not parse identifier");
|
||||
|
||||
return value;
|
||||
}
|
||||
@@ -406,7 +406,7 @@ uint64_t Tokenizer<TokenizerPolicy>::getIntegerBody()
|
||||
check();
|
||||
|
||||
if (!std::isdigit(currentCharacter()))
|
||||
throw TokenizerException(location(), "could not read integer value");
|
||||
throw TokenizerException(*this, "could not read integer value");
|
||||
|
||||
uint64_t value = 0;
|
||||
|
||||
@@ -448,7 +448,7 @@ uint64_t Tokenizer<TokenizerPolicy>::getImpl(Tag<uint64_t>)
|
||||
skipWhiteSpace();
|
||||
|
||||
if (currentCharacter() == '-')
|
||||
throw TokenizerException(location(), "expected unsigned integer, got signed one");
|
||||
throw TokenizerException(*this, "expected unsigned integer, got signed one");
|
||||
|
||||
return getIntegerBody();
|
||||
}
|
||||
@@ -482,7 +482,7 @@ bool Tokenizer<TokenizerPolicy>::getImpl(Tag<bool>)
|
||||
if (testAndSkip<char>('1'))
|
||||
return true;
|
||||
|
||||
throw TokenizerException(location(), "could not read Boolean value");
|
||||
throw TokenizerException(*this, "could not read Boolean value");
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@@ -30,10 +30,7 @@ class TokenizerException: public std::exception
|
||||
|
||||
explicit TokenizerException(const Location &location, const std::string &message)
|
||||
: m_location{location},
|
||||
m_message{message},
|
||||
// TODO: refactor
|
||||
m_plainMessage{std::string(m_location.sectionStart) + ":" + std::to_string(m_location.rowStart)
|
||||
+ ":" + std::to_string(m_location.columnStart) + " " + m_message}
|
||||
m_message{message}
|
||||
{
|
||||
}
|
||||
|
||||
@@ -41,7 +38,7 @@ class TokenizerException: public std::exception
|
||||
|
||||
const char *what() const noexcept
|
||||
{
|
||||
return m_plainMessage.c_str();
|
||||
return m_message.c_str();
|
||||
}
|
||||
|
||||
const Location &location() const
|
||||
@@ -57,7 +54,6 @@ class TokenizerException: public std::exception
|
||||
private:
|
||||
Location m_location;
|
||||
std::string m_message;
|
||||
std::string m_plainMessage;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
Reference in New Issue
Block a user