Refactoring of tokenizer and stream classes for better efficiency and maintainability.
This commit is contained in:
@@ -22,7 +22,6 @@ if (CMAKE_GENERATOR STREQUAL "Ninja" AND
|
||||
set(CMAKE_CXX_FLAGS "-fdiagnostics-color=always ${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
|
||||
add_subdirectory(src)
|
||||
if(TOKENIZE_BUILD_TESTS)
|
||||
add_subdirectory(tests)
|
||||
endif(TOKENIZE_BUILD_TESTS)
|
||||
|
@@ -1,6 +1,8 @@
|
||||
#ifndef __TOKENIZE__LOCATION_H
|
||||
#define __TOKENIZE__LOCATION_H
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <tokenize/StreamPosition.h>
|
||||
|
||||
namespace tokenize
|
||||
@@ -16,37 +18,19 @@ class Stream;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class Location
|
||||
struct Location
|
||||
{
|
||||
public:
|
||||
Location(Stream &stream);
|
||||
Location(Stream &stream, StreamPosition position);
|
||||
StreamPosition position{InvalidStreamPosition};
|
||||
|
||||
const char *sectionStart() const;
|
||||
const char *sectionEnd() const;
|
||||
// TODO: think about avoiding copying strings
|
||||
std::string sectionStart;
|
||||
std::string sectionEnd;
|
||||
|
||||
StreamPosition rowStart() const;
|
||||
StreamPosition rowEnd() const;
|
||||
StreamPosition rowStart{InvalidStreamPosition};
|
||||
StreamPosition rowEnd{InvalidStreamPosition};
|
||||
|
||||
StreamPosition columnStart() const;
|
||||
StreamPosition columnEnd() const;
|
||||
|
||||
private:
|
||||
void initializeLazily() const;
|
||||
|
||||
Stream &m_stream;
|
||||
const StreamPosition m_position;
|
||||
|
||||
mutable bool m_isInitialized{false};
|
||||
|
||||
mutable const char *m_sectionStart{nullptr};
|
||||
mutable const char *m_sectionEnd{nullptr};
|
||||
|
||||
mutable StreamPosition m_rowStart{InvalidStreamPosition};
|
||||
mutable StreamPosition m_rowEnd{InvalidStreamPosition};
|
||||
|
||||
mutable StreamPosition m_columnStart{InvalidStreamPosition};
|
||||
mutable StreamPosition m_columnEnd{InvalidStreamPosition};
|
||||
StreamPosition columnStart{InvalidStreamPosition};
|
||||
StreamPosition columnEnd{InvalidStreamPosition};
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@@ -1,8 +1,10 @@
|
||||
#ifndef __TOKENIZE__STREAM_H
|
||||
#define __TOKENIZE__STREAM_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <experimental/filesystem>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <sstream>
|
||||
@@ -24,38 +26,123 @@ namespace tokenize
|
||||
class Stream
|
||||
{
|
||||
public:
|
||||
struct Delimiter
|
||||
struct Section
|
||||
{
|
||||
StreamPosition position;
|
||||
std::string sectionName;
|
||||
std::string name;
|
||||
|
||||
std::vector<StreamPosition> newlines;
|
||||
};
|
||||
|
||||
public:
|
||||
Stream();
|
||||
explicit Stream(std::string streamName, std::istream &istream);
|
||||
Stream()
|
||||
{
|
||||
std::setlocale(LC_NUMERIC, "C");
|
||||
}
|
||||
|
||||
explicit Stream(std::string streamName, std::istream &istream)
|
||||
{
|
||||
read(streamName, istream);
|
||||
}
|
||||
|
||||
~Stream() = default;
|
||||
|
||||
Stream(const Stream &other) = delete;
|
||||
Stream &operator=(const Stream &other) = delete;
|
||||
Stream(Stream &&other) = delete;
|
||||
Stream &operator=(Stream &&other) = delete;
|
||||
Stream(Stream &&other) = default;
|
||||
Stream &operator=(Stream &&other) = default;
|
||||
|
||||
void read(std::string streamName, std::istream &istream);
|
||||
void read(const std::experimental::filesystem::path &path);
|
||||
|
||||
void reset();
|
||||
void seek(StreamPosition position);
|
||||
StreamPosition position() const;
|
||||
|
||||
const std::vector<Delimiter> &delimiters() const
|
||||
void read(std::string streamName, std::istream &istream)
|
||||
{
|
||||
return m_delimiters;
|
||||
// Store position of new section
|
||||
m_sections.push_back({m_content.size(), streamName, {}});
|
||||
|
||||
const auto contentStartIndex = m_content.size();
|
||||
|
||||
try
|
||||
{
|
||||
istream.seekg(0, std::ios::end);
|
||||
const auto streamSize = istream.tellg();
|
||||
istream.seekg(0, std::ios::beg);
|
||||
|
||||
m_content.reserve(m_content.size() + streamSize);
|
||||
}
|
||||
catch (const std::exception &exception)
|
||||
{
|
||||
istream.clear();
|
||||
}
|
||||
|
||||
std::copy(std::istreambuf_iterator<char>(istream), std::istreambuf_iterator<char>(), std::back_inserter(m_content));
|
||||
|
||||
for (auto i = contentStartIndex; i < m_content.size(); i++)
|
||||
if (m_content[i] == '\n')
|
||||
m_sections.back().newlines.emplace_back(i);
|
||||
}
|
||||
|
||||
void read(const std::experimental::filesystem::path &path)
|
||||
{
|
||||
if (!std::experimental::filesystem::is_regular_file(path))
|
||||
throw std::runtime_error("File does not exist: “" + path.string() + "”");
|
||||
|
||||
std::ifstream fileStream(path.string(), std::ios::in);
|
||||
|
||||
read(path.string(), fileStream);
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
m_position = 0;
|
||||
}
|
||||
|
||||
void seek(StreamPosition position)
|
||||
{
|
||||
m_position = position;
|
||||
}
|
||||
|
||||
StreamPosition position() const
|
||||
{
|
||||
return m_position;
|
||||
}
|
||||
|
||||
Location location() const
|
||||
{
|
||||
// Find current section
|
||||
auto section = std::upper_bound(m_sections.cbegin(), m_sections.cend(), m_position,
|
||||
[&](const auto &lhs, const auto &rhs)
|
||||
{
|
||||
return lhs < rhs.position;
|
||||
});
|
||||
|
||||
assert(section != m_sections.cbegin());
|
||||
|
||||
section--;
|
||||
|
||||
// Find line (row) in the file
|
||||
auto line = std::lower_bound(section->newlines.cbegin(), section->newlines.cend(), m_position);
|
||||
|
||||
if (line == section->newlines.cbegin())
|
||||
{
|
||||
const auto row = 1;
|
||||
const auto column = static_cast<StreamPosition>(m_position - section->position + 1);
|
||||
|
||||
return {m_position, section->name, section->name, row, row, column, column};
|
||||
}
|
||||
|
||||
const auto row = static_cast<StreamPosition>(line - section->newlines.cbegin() + 1);
|
||||
const auto column = static_cast<StreamPosition>(m_position - *(line - 1));
|
||||
|
||||
return {m_position, section->name, section->name, row, row, column, column};
|
||||
}
|
||||
|
||||
const std::vector<Section> §ions() const
|
||||
{
|
||||
return m_sections;
|
||||
}
|
||||
|
||||
char currentCharacter()
|
||||
{
|
||||
check();
|
||||
return m_stream[m_position];
|
||||
return m_content[m_position];
|
||||
}
|
||||
|
||||
void advance()
|
||||
@@ -64,22 +151,42 @@ class Stream
|
||||
m_position++;
|
||||
}
|
||||
|
||||
void advanceUnchecked()
|
||||
{
|
||||
m_position++;
|
||||
}
|
||||
|
||||
bool atEnd() const
|
||||
{
|
||||
return m_position >= m_stream.size();
|
||||
return m_position >= m_content.size();
|
||||
}
|
||||
|
||||
void check()
|
||||
{
|
||||
if (atEnd())
|
||||
throw TokenizerException(*this, "reading past end of file");
|
||||
throw TokenizerException(location(), "reading past end of file");
|
||||
}
|
||||
|
||||
StreamPosition size() const
|
||||
{
|
||||
return m_content.size();
|
||||
}
|
||||
|
||||
std::string &content()
|
||||
{
|
||||
return m_content;
|
||||
}
|
||||
|
||||
const std::string &content() const
|
||||
{
|
||||
return m_content;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::string m_stream;
|
||||
std::string m_content;
|
||||
mutable StreamPosition m_position{0};
|
||||
|
||||
std::vector<Delimiter> m_delimiters;
|
||||
std::vector<Section> m_sections;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@@ -27,20 +27,19 @@ struct Tag
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class TokenizerPolicy = CaseSensitiveTokenizerPolicy>
|
||||
class Tokenizer: public Stream, public TokenizerPolicy
|
||||
class Tokenizer : public Stream, public TokenizerPolicy
|
||||
{
|
||||
template<class OtherTokenizerPolicy>
|
||||
friend class Tokenizer;
|
||||
|
||||
public:
|
||||
explicit Tokenizer();
|
||||
explicit Tokenizer() noexcept;
|
||||
explicit Tokenizer(std::string streamName, std::istream &istream);
|
||||
|
||||
template<class OtherTokenizer>
|
||||
Tokenizer(OtherTokenizer &&otherTokenizer)
|
||||
Tokenizer(OtherTokenizer &&other) noexcept
|
||||
: Stream(std::forward<OtherTokenizer>(other))
|
||||
{
|
||||
m_stream = std::move(otherTokenizer.m_stream);
|
||||
m_delimiters = std::move(otherTokenizer.m_delimiters);
|
||||
}
|
||||
|
||||
void removeComments(const std::string &startSequence, const std::string &endSequence, bool removeEnd);
|
||||
@@ -94,8 +93,7 @@ class Tokenizer: public Stream, public TokenizerPolicy
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class TokenizerPolicy>
|
||||
Tokenizer<TokenizerPolicy>::Tokenizer()
|
||||
: Stream()
|
||||
Tokenizer<TokenizerPolicy>::Tokenizer() noexcept
|
||||
{
|
||||
}
|
||||
|
||||
@@ -189,7 +187,7 @@ void Tokenizer<TokenizerPolicy>::expect(const Type &expectedValue)
|
||||
std::stringstream message;
|
||||
message << "unexpected value, expected “" << expectedValue << "”";
|
||||
|
||||
throw TokenizerException(*this, message.str());
|
||||
throw TokenizerException(location(), message.str());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -208,7 +206,7 @@ std::string Tokenizer<TokenizerPolicy>::getIdentifier()
|
||||
if (!TokenizerPolicy::isIdentifierCharacter(character))
|
||||
{
|
||||
if (value.empty())
|
||||
throw TokenizerException(*this, "could not parse identifier");
|
||||
throw TokenizerException(location(), "could not parse identifier");
|
||||
|
||||
return value;
|
||||
}
|
||||
@@ -289,31 +287,31 @@ template<class TokenizerPolicy>
|
||||
void Tokenizer<TokenizerPolicy>::removeComments(const std::string &startSequence, const std::string &endSequence, bool removeEnd)
|
||||
{
|
||||
// TODO: move to appropriate place
|
||||
for (auto &character : m_stream)
|
||||
for (auto &character : m_content)
|
||||
character = TokenizerPolicy::transformCharacter(character);
|
||||
|
||||
const auto removeRange =
|
||||
[&](const auto &start, const auto &end)
|
||||
{
|
||||
const auto previousPosition = m_position;
|
||||
const auto previousPosition = position();
|
||||
|
||||
assert(start < m_stream.size());
|
||||
assert(start < m_content.size());
|
||||
|
||||
m_position = start;
|
||||
seek(start);
|
||||
|
||||
while (m_position < end)
|
||||
while (position() < end)
|
||||
{
|
||||
if (atEnd())
|
||||
return;
|
||||
|
||||
m_stream[m_position] = ' ';
|
||||
m_position++;
|
||||
m_content[position()] = ' ';
|
||||
advanceUnchecked();
|
||||
}
|
||||
|
||||
m_position = previousPosition;
|
||||
seek(previousPosition);
|
||||
};
|
||||
|
||||
m_position = 0;
|
||||
seek(0);
|
||||
|
||||
// TODO: refactor
|
||||
while (!atEnd())
|
||||
@@ -325,13 +323,13 @@ void Tokenizer<TokenizerPolicy>::removeComments(const std::string &startSequence
|
||||
if ((startSequenceFound = testAndSkip(startSequence)))
|
||||
break;
|
||||
|
||||
advance();
|
||||
advanceUnchecked();
|
||||
}
|
||||
|
||||
if (!startSequenceFound && atEnd())
|
||||
break;
|
||||
|
||||
const auto startPosition = m_position - startSequence.size();
|
||||
const auto startPosition = position() - startSequence.size();
|
||||
|
||||
bool endSequenceFound = false;
|
||||
|
||||
@@ -340,21 +338,21 @@ void Tokenizer<TokenizerPolicy>::removeComments(const std::string &startSequence
|
||||
if ((endSequenceFound = testAndSkip(endSequence)))
|
||||
break;
|
||||
|
||||
advance();
|
||||
advanceUnchecked();
|
||||
}
|
||||
|
||||
// If the end sequence is to be removed or could not be found, remove entire range
|
||||
const auto endPosition =
|
||||
(removeEnd || !endSequenceFound)
|
||||
? m_position
|
||||
: m_position - endSequence.size();
|
||||
? position()
|
||||
: position() - endSequence.size();
|
||||
|
||||
removeRange(startPosition, endPosition);
|
||||
|
||||
m_position = endPosition + 1;
|
||||
seek(endPosition + 1);
|
||||
}
|
||||
|
||||
m_position = 0;
|
||||
seek(0);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -406,7 +404,7 @@ uint64_t Tokenizer<TokenizerPolicy>::getIntegerBody()
|
||||
check();
|
||||
|
||||
if (!std::isdigit(currentCharacter()))
|
||||
throw TokenizerException(*this, "could not read integer value");
|
||||
throw TokenizerException(location(), "could not read integer value");
|
||||
|
||||
uint64_t value = 0;
|
||||
|
||||
@@ -420,7 +418,7 @@ uint64_t Tokenizer<TokenizerPolicy>::getIntegerBody()
|
||||
value *= 10;
|
||||
value += character - '0';
|
||||
|
||||
advance();
|
||||
advanceUnchecked();
|
||||
}
|
||||
|
||||
return value;
|
||||
@@ -448,7 +446,7 @@ uint64_t Tokenizer<TokenizerPolicy>::getImpl(Tag<uint64_t>)
|
||||
skipWhiteSpace();
|
||||
|
||||
if (currentCharacter() == '-')
|
||||
throw TokenizerException(*this, "expected unsigned integer, got signed one");
|
||||
throw TokenizerException(location(), "expected unsigned integer, got signed one");
|
||||
|
||||
return getIntegerBody();
|
||||
}
|
||||
@@ -482,7 +480,7 @@ bool Tokenizer<TokenizerPolicy>::getImpl(Tag<bool>)
|
||||
if (testAndSkip<char>('1'))
|
||||
return true;
|
||||
|
||||
throw TokenizerException(*this, "could not read Boolean value");
|
||||
throw TokenizerException(location(), "could not read Boolean value");
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@@ -1,21 +0,0 @@
|
||||
set(target tokenize)
|
||||
|
||||
file(GLOB core_sources "tokenize/*.cpp")
|
||||
file(GLOB core_headers "../include/tokenize/*.h")
|
||||
|
||||
set(includes
|
||||
${PROJECT_SOURCE_DIR}/include
|
||||
)
|
||||
|
||||
set(sources
|
||||
${core_sources}
|
||||
${core_headers}
|
||||
)
|
||||
|
||||
set(libraries
|
||||
stdc++fs
|
||||
)
|
||||
|
||||
add_library(${target} ${sources})
|
||||
target_include_directories(${target} PRIVATE ${includes})
|
||||
target_link_libraries(${target} ${libraries})
|
@@ -1,150 +0,0 @@
|
||||
#include <tokenize/Location.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include <tokenize/Stream.h>
|
||||
|
||||
namespace tokenize
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Location
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Location::Location(Stream &stream)
|
||||
: m_stream{stream},
|
||||
m_position{stream.position()}
|
||||
{
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Location::Location(Stream &stream, StreamPosition position)
|
||||
: m_stream{stream},
|
||||
m_position{position}
|
||||
{
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
const char *Location::sectionStart() const
|
||||
{
|
||||
if (!m_isInitialized)
|
||||
initializeLazily();
|
||||
|
||||
return m_sectionStart;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
const char *Location::sectionEnd() const
|
||||
{
|
||||
if (!m_isInitialized)
|
||||
initializeLazily();
|
||||
|
||||
return m_sectionEnd;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
StreamPosition Location::rowStart() const
|
||||
{
|
||||
if (!m_isInitialized)
|
||||
initializeLazily();
|
||||
|
||||
return m_rowStart;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
StreamPosition Location::rowEnd() const
|
||||
{
|
||||
if (!m_isInitialized)
|
||||
initializeLazily();
|
||||
|
||||
return m_rowEnd;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
StreamPosition Location::columnStart() const
|
||||
{
|
||||
if (!m_isInitialized)
|
||||
initializeLazily();
|
||||
|
||||
return m_columnStart;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
StreamPosition Location::columnEnd() const
|
||||
{
|
||||
if (!m_isInitialized)
|
||||
initializeLazily();
|
||||
|
||||
return m_columnEnd;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void Location::initializeLazily() const
|
||||
{
|
||||
const auto previousPosition = m_stream.position();
|
||||
|
||||
const auto &delimiters = m_stream.delimiters();
|
||||
|
||||
// Find current section
|
||||
auto currentFile = std::find_if(delimiters.crbegin(), delimiters.crend(),
|
||||
[&](const auto &fileDelimiter)
|
||||
{
|
||||
return m_position >= fileDelimiter.position;
|
||||
});
|
||||
|
||||
// If the tokenizer is at the end of the stream, still count from the beginning of the last section
|
||||
if (currentFile == delimiters.crend())
|
||||
currentFile = delimiters.crbegin();
|
||||
|
||||
// Go back to beginning of section
|
||||
m_stream.seek(currentFile->position);
|
||||
|
||||
StreamPosition row{1};
|
||||
StreamPosition column{1};
|
||||
|
||||
// Compute the location character by character
|
||||
while (true)
|
||||
{
|
||||
if (m_stream.atEnd())
|
||||
break;
|
||||
else if (m_stream.position() >= m_position)
|
||||
break;
|
||||
|
||||
const auto character = m_stream.currentCharacter();
|
||||
|
||||
if (character == '\n')
|
||||
{
|
||||
row++;
|
||||
column = 1;
|
||||
}
|
||||
else if (std::isblank(character) || std::isprint(character))
|
||||
column++;
|
||||
|
||||
m_stream.advance();
|
||||
}
|
||||
|
||||
m_sectionStart = currentFile->sectionName.c_str();
|
||||
m_sectionEnd = currentFile->sectionName.c_str();
|
||||
m_rowStart = row;
|
||||
m_rowEnd = row;
|
||||
m_columnStart = column;
|
||||
m_columnEnd = column;
|
||||
|
||||
m_isInitialized = true;
|
||||
|
||||
m_stream.seek(previousPosition);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
}
|
@@ -1,84 +0,0 @@
|
||||
#include <tokenize/Stream.h>
|
||||
|
||||
#include <fstream>
|
||||
|
||||
namespace tokenize
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Stream
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Stream::Stream()
|
||||
{
|
||||
std::setlocale(LC_NUMERIC, "C");
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Stream::Stream(std::string streamName, std::istream &istream)
|
||||
{
|
||||
read(streamName, istream);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void Stream::read(std::string streamName, std::istream &istream)
|
||||
{
|
||||
// Store position of new section
|
||||
m_delimiters.push_back({m_stream.size(), streamName});
|
||||
|
||||
try
|
||||
{
|
||||
istream.seekg(0, std::ios::end);
|
||||
const auto streamSize = istream.tellg();
|
||||
istream.seekg(0, std::ios::beg);
|
||||
|
||||
m_stream.reserve(m_stream.size() + streamSize);
|
||||
}
|
||||
catch (const std::exception &exception)
|
||||
{
|
||||
istream.clear();
|
||||
}
|
||||
|
||||
std::copy(std::istreambuf_iterator<char>(istream), std::istreambuf_iterator<char>(), std::back_inserter(m_stream));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void Stream::read(const std::experimental::filesystem::path &path)
|
||||
{
|
||||
if (!std::experimental::filesystem::is_regular_file(path))
|
||||
throw std::runtime_error("File does not exist: “" + path.string() + "”");
|
||||
|
||||
std::ifstream fileStream(path.string(), std::ios::in);
|
||||
|
||||
read(path.string(), fileStream);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void Stream::reset()
|
||||
{
|
||||
m_position = 0;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void Stream::seek(StreamPosition position)
|
||||
{
|
||||
m_position = position;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
StreamPosition Stream::position() const
|
||||
{
|
||||
return m_position;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
}
|
@@ -7,13 +7,9 @@ set(includes
|
||||
${PROJECT_SOURCE_DIR}/../../lib/catch/single_include
|
||||
)
|
||||
|
||||
set(libraries
|
||||
tokenize
|
||||
)
|
||||
|
||||
add_executable(${target} ${core_sources})
|
||||
target_include_directories(${target} PRIVATE ${includes})
|
||||
target_link_libraries(${target} ${libraries})
|
||||
target_link_libraries(${target})
|
||||
|
||||
add_custom_target(run-tokenize-tests
|
||||
COMMAND ${CMAKE_BINARY_DIR}/bin/tokenize-tests --use-colour=yes
|
||||
|
@@ -11,13 +11,13 @@ TEST_CASE("[tokenizer] Simple strings are tokenized correctly", "[tokenizer]")
|
||||
tokenize::Tokenizer<> p("input", s);
|
||||
|
||||
REQUIRE(p.get<std::string>() == "identifier");
|
||||
REQUIRE(p.get<size_t>() == 5u);
|
||||
REQUIRE(p.get<size_t>() == 5);
|
||||
REQUIRE(p.get<int>() == -51);
|
||||
REQUIRE(p.get<bool>() == false);
|
||||
REQUIRE(p.get<bool>() == true);
|
||||
|
||||
REQUIRE(p.get<int>() == 100);
|
||||
REQUIRE(p.get<size_t>() == 200u);
|
||||
REQUIRE(p.get<size_t>() == 200);
|
||||
REQUIRE(p.get<int>() == -300);
|
||||
REQUIRE_THROWS_AS(p.get<size_t>(), tokenize::TokenizerException);
|
||||
}
|
||||
@@ -30,13 +30,13 @@ TEST_CASE("[tokenizer] Tokenizing exceptions are correctly reported", "[tokenize
|
||||
tokenize::Tokenizer<> p("input", s);
|
||||
|
||||
REQUIRE_NOTHROW(p.expect<std::string>("identifier"));
|
||||
REQUIRE_NOTHROW(p.expect<size_t>(5u));
|
||||
REQUIRE_NOTHROW(p.expect<size_t>(5));
|
||||
REQUIRE_NOTHROW(p.expect<int>(-51));
|
||||
REQUIRE_NOTHROW(p.expect<bool>(false));
|
||||
REQUIRE_NOTHROW(p.expect<bool>(true));
|
||||
|
||||
REQUIRE_NOTHROW(p.expect<int>(100));
|
||||
REQUIRE_NOTHROW(p.expect<size_t>(200u));
|
||||
REQUIRE_NOTHROW(p.expect<size_t>(200));
|
||||
REQUIRE_NOTHROW(p.expect<int>(-300));
|
||||
REQUIRE_THROWS_AS(p.expect<size_t>(-400), tokenize::TokenizerException);
|
||||
|
||||
@@ -44,7 +44,7 @@ TEST_CASE("[tokenizer] Tokenizing exceptions are correctly reported", "[tokenize
|
||||
REQUIRE_THROWS_AS(p.expect<std::string>("error"), tokenize::TokenizerException);
|
||||
|
||||
p.seek(14);
|
||||
REQUIRE_THROWS_AS(p.expect<size_t>(6u), tokenize::TokenizerException);
|
||||
REQUIRE_THROWS_AS(p.expect<size_t>(6), tokenize::TokenizerException);
|
||||
|
||||
p.seek(17);
|
||||
REQUIRE_THROWS_AS(p.expect<int>(-50), tokenize::TokenizerException);
|
||||
@@ -76,53 +76,53 @@ TEST_CASE("[tokenizer] While tokenizing, the cursor position is as expected", "[
|
||||
|
||||
pos = p.position();
|
||||
REQUIRE(p.testAndReturn<std::string>("error") == false);
|
||||
REQUIRE(p.position() == pos);
|
||||
CHECK(p.position() == pos);
|
||||
REQUIRE(p.testAndReturn<std::string>("identifier") == true);
|
||||
REQUIRE(p.position() == pos);
|
||||
CHECK(p.position() == pos);
|
||||
REQUIRE(p.testAndSkip<std::string>("error") == false);
|
||||
REQUIRE(p.position() == pos);
|
||||
CHECK(p.position() == pos);
|
||||
REQUIRE(p.testAndSkip<std::string>("identifier") == true);
|
||||
REQUIRE(p.position() == 12);
|
||||
CHECK(p.position() == 12);
|
||||
|
||||
pos = p.position();
|
||||
REQUIRE(p.testAndReturn<size_t>(6u) == false);
|
||||
REQUIRE(p.position() == pos);
|
||||
REQUIRE(p.testAndReturn<size_t>(5u) == true);
|
||||
REQUIRE(p.position() == pos);
|
||||
REQUIRE(p.testAndSkip<size_t>(6u) == false);
|
||||
REQUIRE(p.position() == pos);
|
||||
REQUIRE(p.testAndSkip<size_t>(5u) == true);
|
||||
REQUIRE(p.position() == 15);
|
||||
REQUIRE(p.testAndReturn<size_t>(6) == false);
|
||||
CHECK(p.position() == pos);
|
||||
REQUIRE(p.testAndReturn<size_t>(5) == true);
|
||||
CHECK(p.position() == pos);
|
||||
REQUIRE(p.testAndSkip<size_t>(6) == false);
|
||||
CHECK(p.position() == pos);
|
||||
REQUIRE(p.testAndSkip<size_t>(5) == true);
|
||||
CHECK(p.position() == 15);
|
||||
|
||||
pos = p.position();
|
||||
REQUIRE(p.testAndReturn<int>(-50) == false);
|
||||
REQUIRE(p.position() == pos);
|
||||
CHECK(p.position() == pos);
|
||||
REQUIRE(p.testAndReturn<int>(-51) == true);
|
||||
REQUIRE(p.position() == pos);
|
||||
CHECK(p.position() == pos);
|
||||
REQUIRE(p.testAndSkip<int>(-50) == false);
|
||||
REQUIRE(p.position() == pos);
|
||||
CHECK(p.position() == pos);
|
||||
REQUIRE(p.testAndSkip<int>(-51) == true);
|
||||
REQUIRE(p.position() == 22);
|
||||
CHECK(p.position() == 22);
|
||||
|
||||
pos = p.position();
|
||||
REQUIRE(p.testAndReturn<bool>(true) == false);
|
||||
REQUIRE(p.position() == pos);
|
||||
CHECK(p.position() == pos);
|
||||
REQUIRE(p.testAndReturn<bool>(false) == true);
|
||||
REQUIRE(p.position() == pos);
|
||||
CHECK(p.position() == pos);
|
||||
REQUIRE(p.testAndSkip<bool>(true) == false);
|
||||
REQUIRE(p.position() == pos);
|
||||
CHECK(p.position() == pos);
|
||||
REQUIRE(p.testAndSkip<bool>(false) == true);
|
||||
REQUIRE(p.position() == 25);
|
||||
CHECK(p.position() == 25);
|
||||
|
||||
pos = p.position();
|
||||
REQUIRE(p.testAndReturn<bool>(false) == false);
|
||||
REQUIRE(p.position() == pos);
|
||||
CHECK(p.position() == pos);
|
||||
REQUIRE(p.testAndReturn<bool>(true) == true);
|
||||
REQUIRE(p.position() == pos);
|
||||
CHECK(p.position() == pos);
|
||||
REQUIRE(p.testAndSkip<bool>(false) == false);
|
||||
REQUIRE(p.position() == pos);
|
||||
CHECK(p.position() == pos);
|
||||
REQUIRE(p.testAndSkip<bool>(true) == true);
|
||||
REQUIRE(p.position() == 27);
|
||||
CHECK(p.position() == 27);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -181,125 +181,251 @@ TEST_CASE("[tokenizer] While tokenizing, the cursor location is as expcected", "
|
||||
|
||||
const auto startPosition = p.position();
|
||||
|
||||
{
|
||||
auto l = tokenize::Location(p);
|
||||
REQUIRE(l.rowStart() == 1u);
|
||||
REQUIRE(l.columnStart() == 1u);
|
||||
REQUIRE(p.currentCharacter() == '1');
|
||||
}
|
||||
tokenize::Location l;
|
||||
|
||||
REQUIRE_NOTHROW(p.advance());
|
||||
l = p.location();
|
||||
CHECK(l.rowStart == 1);
|
||||
CHECK(l.columnStart == 1);
|
||||
CHECK(p.currentCharacter() == '1');
|
||||
|
||||
{
|
||||
auto l = tokenize::Location(p);
|
||||
REQUIRE(l.rowStart() == 1u);
|
||||
REQUIRE(l.columnStart() == 2u);
|
||||
REQUIRE(p.currentCharacter() == '2');
|
||||
}
|
||||
p.advance();
|
||||
|
||||
REQUIRE_NOTHROW(p.advance());
|
||||
l = p.location();
|
||||
CHECK(l.rowStart == 1);
|
||||
CHECK(l.columnStart == 2);
|
||||
CHECK(p.currentCharacter() == '2');
|
||||
|
||||
{
|
||||
auto l = tokenize::Location(p);
|
||||
REQUIRE(l.rowStart() == 1u);
|
||||
REQUIRE(l.columnStart() == 3u);
|
||||
REQUIRE(p.currentCharacter() == '3');
|
||||
}
|
||||
p.advance();
|
||||
|
||||
REQUIRE_NOTHROW(p.advance());
|
||||
l = p.location();
|
||||
CHECK(l.rowStart == 1);
|
||||
CHECK(l.columnStart == 3);
|
||||
CHECK(p.currentCharacter() == '3');
|
||||
|
||||
{
|
||||
auto l = tokenize::Location(p);
|
||||
REQUIRE(l.rowStart() == 1u);
|
||||
REQUIRE(l.columnStart() == 4u);
|
||||
REQUIRE(p.currentCharacter() == ' ');
|
||||
}
|
||||
p.advance();
|
||||
|
||||
REQUIRE_NOTHROW(p.advance());
|
||||
l = p.location();
|
||||
CHECK(l.rowStart == 1);
|
||||
CHECK(l.columnStart == 4);
|
||||
CHECK(p.currentCharacter() == ' ');
|
||||
|
||||
{
|
||||
auto l = tokenize::Location(p);
|
||||
REQUIRE(l.rowStart() == 1u);
|
||||
REQUIRE(l.columnStart() == 5u);
|
||||
REQUIRE(p.currentCharacter() == '\n');
|
||||
}
|
||||
p.advance();
|
||||
|
||||
REQUIRE_NOTHROW(p.advance());
|
||||
l = p.location();
|
||||
CHECK(l.rowStart == 1);
|
||||
CHECK(l.columnStart == 5);
|
||||
CHECK(p.currentCharacter() == '\n');
|
||||
|
||||
{
|
||||
auto l = tokenize::Location(p);
|
||||
REQUIRE(l.rowStart() == 2u);
|
||||
REQUIRE(l.columnStart() == 1u);
|
||||
REQUIRE(p.currentCharacter() == '4');
|
||||
}
|
||||
p.advance();
|
||||
|
||||
REQUIRE_NOTHROW(p.advance());
|
||||
l = p.location();
|
||||
CHECK(l.rowStart == 2);
|
||||
CHECK(l.columnStart == 1);
|
||||
CHECK(p.currentCharacter() == '4');
|
||||
|
||||
p.advance();
|
||||
|
||||
REQUIRE_NOTHROW(p.expect<std::string>("test1"));
|
||||
|
||||
{
|
||||
auto l = tokenize::Location(p);
|
||||
REQUIRE(l.rowStart() == 3u);
|
||||
REQUIRE(l.columnStart() == 6u);
|
||||
}
|
||||
l = p.location();
|
||||
CHECK(l.rowStart == 3);
|
||||
CHECK(l.columnStart == 6);
|
||||
|
||||
REQUIRE_NOTHROW(p.expect<std::string>("test2"));
|
||||
|
||||
{
|
||||
auto l = tokenize::Location(p);
|
||||
REQUIRE(l.rowStart() == 4u);
|
||||
REQUIRE(l.columnStart() == 7u);
|
||||
}
|
||||
l = p.location();
|
||||
CHECK(l.rowStart == 4);
|
||||
CHECK(l.columnStart == 7);
|
||||
|
||||
REQUIRE_NOTHROW(p.expect<std::string>("test3"));
|
||||
|
||||
{
|
||||
auto l = tokenize::Location(p);
|
||||
REQUIRE(l.rowStart() == 5u);
|
||||
REQUIRE(l.columnStart() == 6u);
|
||||
}
|
||||
l = p.location();
|
||||
CHECK(l.rowStart == 5);
|
||||
CHECK(l.columnStart == 6);
|
||||
|
||||
REQUIRE_NOTHROW(p.skipLine());
|
||||
|
||||
{
|
||||
auto l = tokenize::Location(p);
|
||||
REQUIRE(l.rowStart() == 6u);
|
||||
REQUIRE(l.columnStart() == 1u);
|
||||
}
|
||||
l = p.location();
|
||||
CHECK(l.rowStart == 6);
|
||||
CHECK(l.columnStart == 1);
|
||||
|
||||
REQUIRE_NOTHROW(p.skipLine());
|
||||
|
||||
{
|
||||
auto l = tokenize::Location(p);
|
||||
REQUIRE(l.rowStart() == 7u);
|
||||
REQUIRE(l.columnStart() == 1u);
|
||||
}
|
||||
l = p.location();
|
||||
CHECK(l.rowStart == 7);
|
||||
CHECK(l.columnStart == 1);
|
||||
|
||||
REQUIRE_NOTHROW(p.skipWhiteSpace());
|
||||
|
||||
{
|
||||
auto l = tokenize::Location(p);
|
||||
REQUIRE(l.rowStart() == 10u);
|
||||
REQUIRE(l.columnStart() == 1u);
|
||||
REQUIRE(p.atEnd());
|
||||
}
|
||||
l = p.location();
|
||||
CHECK(l.rowStart == 10);
|
||||
CHECK(l.columnStart == 1);
|
||||
CHECK(p.atEnd());
|
||||
|
||||
p.reset();
|
||||
REQUIRE(p.position() == startPosition);
|
||||
REQUIRE_FALSE(p.atEnd());
|
||||
CHECK(p.position() == startPosition);
|
||||
CHECK_FALSE(p.atEnd());
|
||||
|
||||
for (size_t i = 0; i < 5; i++)
|
||||
p.advance();
|
||||
|
||||
REQUIRE(p.position() == static_cast<std::istream::pos_type>(5));
|
||||
CHECK(p.position() == static_cast<std::istream::pos_type>(5));
|
||||
|
||||
p.seek(static_cast<std::istream::pos_type>(7));
|
||||
|
||||
REQUIRE(p.position() == static_cast<std::istream::pos_type>(7));
|
||||
CHECK(p.position() == static_cast<std::istream::pos_type>(7));
|
||||
|
||||
REQUIRE_NOTHROW(p.expect<std::string>("test1"));
|
||||
}
|
||||
|
||||
// TODO: test tokenizer with multiple sections
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TEST_CASE("[tokenizer] While tokenizing with multiple sections, the cursor location is as expcected", "[tokenizer]")
|
||||
{
|
||||
std::stringstream s1("123 \n4\ntest1\n");
|
||||
std::stringstream s2("456 \n7\ntest2\n");
|
||||
tokenize::Tokenizer<> p;
|
||||
p.read("test-1", s1);
|
||||
p.read("test-2", s2);
|
||||
|
||||
const auto advance =
|
||||
[&](auto steps)
|
||||
{
|
||||
for (auto i = 0; i < steps; i++)
|
||||
p.advance();
|
||||
};
|
||||
|
||||
tokenize::Location l;
|
||||
|
||||
l = p.location();
|
||||
CHECK(l.sectionStart == "test-1");
|
||||
CHECK(l.rowStart == 1);
|
||||
CHECK(l.columnStart == 1);
|
||||
CHECK(p.currentCharacter() == '1');
|
||||
|
||||
advance(1);
|
||||
|
||||
l = p.location();
|
||||
CHECK(l.sectionStart == "test-1");
|
||||
CHECK(l.rowStart == 1);
|
||||
CHECK(l.columnStart == 2);
|
||||
CHECK(p.currentCharacter() == '2');
|
||||
|
||||
advance(3);
|
||||
|
||||
l = p.location();
|
||||
CHECK(l.sectionStart == "test-1");
|
||||
CHECK(l.rowStart == 1);
|
||||
CHECK(l.columnStart == 5);
|
||||
CHECK(p.currentCharacter() == '\n');
|
||||
|
||||
advance(1);
|
||||
|
||||
l = p.location();
|
||||
CHECK(l.sectionStart == "test-1");
|
||||
CHECK(l.rowStart == 2);
|
||||
CHECK(l.columnStart == 1);
|
||||
CHECK(p.currentCharacter() == '4');
|
||||
|
||||
advance(1);
|
||||
|
||||
l = p.location();
|
||||
CHECK(l.sectionStart == "test-1");
|
||||
CHECK(l.rowStart == 2);
|
||||
CHECK(l.columnStart == 2);
|
||||
CHECK(p.currentCharacter() == '\n');
|
||||
|
||||
advance(1);
|
||||
|
||||
l = p.location();
|
||||
CHECK(l.sectionStart == "test-1");
|
||||
CHECK(l.rowStart == 3);
|
||||
CHECK(l.columnStart == 1);
|
||||
CHECK(p.currentCharacter() == 't');
|
||||
|
||||
advance(4);
|
||||
|
||||
l = p.location();
|
||||
CHECK(l.sectionStart == "test-1");
|
||||
CHECK(l.rowStart == 3);
|
||||
CHECK(l.columnStart == 5);
|
||||
CHECK(p.currentCharacter() == '1');
|
||||
|
||||
advance(1);
|
||||
|
||||
l = p.location();
|
||||
CHECK(l.sectionStart == "test-1");
|
||||
CHECK(l.rowStart == 3);
|
||||
CHECK(l.columnStart == 6);
|
||||
CHECK(p.currentCharacter() == '\n');
|
||||
|
||||
advance(1);
|
||||
|
||||
l = p.location();
|
||||
CHECK(l.sectionStart == "test-2");
|
||||
CHECK(l.rowStart == 1);
|
||||
CHECK(l.columnStart == 1);
|
||||
CHECK(p.currentCharacter() == '4');
|
||||
|
||||
advance(1);
|
||||
|
||||
l = p.location();
|
||||
CHECK(l.sectionStart == "test-2");
|
||||
CHECK(l.rowStart == 1);
|
||||
CHECK(l.columnStart == 2);
|
||||
CHECK(p.currentCharacter() == '5');
|
||||
|
||||
advance(3);
|
||||
|
||||
l = p.location();
|
||||
CHECK(l.sectionStart == "test-2");
|
||||
CHECK(l.rowStart == 1);
|
||||
CHECK(l.columnStart == 5);
|
||||
CHECK(p.currentCharacter() == '\n');
|
||||
|
||||
advance(1);
|
||||
|
||||
l = p.location();
|
||||
CHECK(l.sectionStart == "test-2");
|
||||
CHECK(l.rowStart == 2);
|
||||
CHECK(l.columnStart == 1);
|
||||
CHECK(p.currentCharacter() == '7');
|
||||
|
||||
advance(1);
|
||||
|
||||
l = p.location();
|
||||
CHECK(l.sectionStart == "test-2");
|
||||
CHECK(l.rowStart == 2);
|
||||
CHECK(l.columnStart == 2);
|
||||
CHECK(p.currentCharacter() == '\n');
|
||||
|
||||
advance(1);
|
||||
|
||||
l = p.location();
|
||||
CHECK(l.sectionStart == "test-2");
|
||||
CHECK(l.rowStart == 3);
|
||||
CHECK(l.columnStart == 1);
|
||||
CHECK(p.currentCharacter() == 't');
|
||||
|
||||
advance(4);
|
||||
|
||||
l = p.location();
|
||||
CHECK(l.sectionStart == "test-2");
|
||||
CHECK(l.rowStart == 3);
|
||||
CHECK(l.columnStart == 5);
|
||||
CHECK(p.currentCharacter() == '2');
|
||||
|
||||
advance(1);
|
||||
|
||||
l = p.location();
|
||||
CHECK(l.sectionStart == "test-2");
|
||||
CHECK(l.rowStart == 3);
|
||||
CHECK(l.columnStart == 6);
|
||||
CHECK(p.currentCharacter() == '\n');
|
||||
|
||||
advance(1);
|
||||
|
||||
CHECK(p.atEnd());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -313,23 +439,21 @@ TEST_CASE("[tokenizer] Comments are correctly removed", "[tokenizer]")
|
||||
|
||||
REQUIRE_NOTHROW(p1.expect<std::string>("test1"));
|
||||
|
||||
{
|
||||
auto l = tokenize::Location(p1);
|
||||
REQUIRE(l.rowStart() == 2u);
|
||||
REQUIRE(l.columnStart() == 6u);
|
||||
}
|
||||
tokenize::Location l;
|
||||
|
||||
l = p1.location();
|
||||
CHECK(l.rowStart == 2);
|
||||
CHECK(l.columnStart == 6);
|
||||
|
||||
REQUIRE_NOTHROW(p1.expect<std::string>("test2"));
|
||||
|
||||
{
|
||||
auto l = tokenize::Location(p1);
|
||||
REQUIRE(l.rowStart() == 3u);
|
||||
REQUIRE(l.columnStart() == 6u);
|
||||
}
|
||||
l = p1.location();
|
||||
CHECK(l.rowStart == 3);
|
||||
CHECK(l.columnStart == 6);
|
||||
|
||||
p1.skipWhiteSpace();
|
||||
|
||||
REQUIRE(p1.atEnd());
|
||||
CHECK(p1.atEnd());
|
||||
|
||||
std::stringstream s2("test;");
|
||||
tokenize::Tokenizer<> p2("input", s2);
|
||||
@@ -340,7 +464,7 @@ TEST_CASE("[tokenizer] Comments are correctly removed", "[tokenizer]")
|
||||
|
||||
p2.skipWhiteSpace();
|
||||
|
||||
REQUIRE(p2.atEnd());
|
||||
CHECK(p2.atEnd());
|
||||
|
||||
std::stringstream s3("/* comment at start */ test1 /* comment in between */ test2 /*");
|
||||
tokenize::Tokenizer<> p3("input", s3);
|
||||
@@ -352,7 +476,7 @@ TEST_CASE("[tokenizer] Comments are correctly removed", "[tokenizer]")
|
||||
|
||||
p3.skipWhiteSpace();
|
||||
|
||||
REQUIRE(p3.atEnd());
|
||||
CHECK(p3.atEnd());
|
||||
|
||||
// Check that if there are no comments, the end is not accidentally truncated
|
||||
std::stringstream s4("test foo bar");
|
||||
@@ -364,5 +488,5 @@ TEST_CASE("[tokenizer] Comments are correctly removed", "[tokenizer]")
|
||||
REQUIRE_NOTHROW(p4.expect<std::string>("foo"));
|
||||
REQUIRE_NOTHROW(p4.expect<std::string>("bar"));
|
||||
|
||||
REQUIRE(p4.atEnd());
|
||||
CHECK(p4.atEnd());
|
||||
}
|
||||
|
Reference in New Issue
Block a user