Refactoring of tokenizer and stream classes for better efficiency and maintainability.

2017-06-21 02:56:27 +02:00
parent 97c6e58355
commit 39c0e27cb2
38 changed files with 466 additions and 512 deletions
--- a/lib/tokenize/CMakeLists.txt
+++ b/lib/tokenize/CMakeLists.txt
@@ -22,7 +22,6 @@ if (CMAKE_GENERATOR STREQUAL "Ninja" AND
 	set(CMAKE_CXX_FLAGS "-fdiagnostics-color=always ${CMAKE_CXX_FLAGS}")
 endif()

-add_subdirectory(src)
 if(TOKENIZE_BUILD_TESTS)
 	add_subdirectory(tests)
 endif(TOKENIZE_BUILD_TESTS)
--- a/lib/tokenize/include/tokenize/Location.h
+++ b/lib/tokenize/include/tokenize/Location.h
@@ -1,6 +1,8 @@
 #ifndef __TOKENIZE__LOCATION_H
 #define __TOKENIZE__LOCATION_H

+#include <string>
+
 #include <tokenize/StreamPosition.h>

 namespace tokenize
@@ -16,37 +18,19 @@ class Stream;

 ////////////////////////////////////////////////////////////////////////////////////////////////////

-class Location
+struct Location
 {
-	public:
-		Location(Stream &stream);
-		Location(Stream &stream, StreamPosition position);
+	StreamPosition position{InvalidStreamPosition};

-		const char *sectionStart() const;
-		const char *sectionEnd() const;
+	// TODO: think about avoiding copying strings
+	std::string sectionStart;
+	std::string sectionEnd;

-		StreamPosition rowStart() const;
-		StreamPosition rowEnd() const;
+	StreamPosition rowStart{InvalidStreamPosition};
+	StreamPosition rowEnd{InvalidStreamPosition};

-		StreamPosition columnStart() const;
-		StreamPosition columnEnd() const;
-
-	private:
-		void initializeLazily() const;
-
-		Stream &m_stream;
-		const StreamPosition m_position;
-
-		mutable bool m_isInitialized{false};
-
-		mutable const char *m_sectionStart{nullptr};
-		mutable const char *m_sectionEnd{nullptr};
-
-		mutable StreamPosition m_rowStart{InvalidStreamPosition};
-		mutable StreamPosition m_rowEnd{InvalidStreamPosition};
-
-		mutable StreamPosition m_columnStart{InvalidStreamPosition};
-		mutable StreamPosition m_columnEnd{InvalidStreamPosition};
+	StreamPosition columnStart{InvalidStreamPosition};
+	StreamPosition columnEnd{InvalidStreamPosition};
 };

 ////////////////////////////////////////////////////////////////////////////////////////////////////
--- a/lib/tokenize/include/tokenize/Stream.h
+++ b/lib/tokenize/include/tokenize/Stream.h
@@ -1,8 +1,10 @@
 #ifndef __TOKENIZE__STREAM_H
 #define __TOKENIZE__STREAM_H

+#include <algorithm>
 #include <cassert>
 #include <experimental/filesystem>
+#include <fstream>
 #include <iostream>
 #include <iterator>
 #include <sstream>
@@ -24,38 +26,123 @@ namespace tokenize
 class Stream
 {
 	public:
-		struct Delimiter
+		struct Section
 		{
 			StreamPosition position;
-			std::string sectionName;
+			std::string name;
+
+			std::vector<StreamPosition> newlines;
 		};

 	public:
-		Stream();
-		explicit Stream(std::string streamName, std::istream &istream);
+		Stream()
+		{
+			std::setlocale(LC_NUMERIC, "C");
+		}
+
+		explicit Stream(std::string streamName, std::istream &istream)
+		{
+			read(streamName, istream);
+		}
+
 		~Stream() = default;

 		Stream(const Stream &other) = delete;
 		Stream &operator=(const Stream &other) = delete;
-		Stream(Stream &&other) = delete;
-		Stream &operator=(Stream &&other) = delete;
+		Stream(Stream &&other) = default;
+		Stream &operator=(Stream &&other) = default;

-		void read(std::string streamName, std::istream &istream);
-		void read(const std::experimental::filesystem::path &path);
-
-		void reset();
-		void seek(StreamPosition position);
-		StreamPosition position() const;
-
-		const std::vector<Delimiter> &delimiters() const
+		void read(std::string streamName, std::istream &istream)
 		{
-			return m_delimiters;
+			// Store position of new section
+			m_sections.push_back({m_content.size(), streamName, {}});
+
+			const auto contentStartIndex = m_content.size();
+
+			try
+			{
+				istream.seekg(0, std::ios::end);
+				const auto streamSize = istream.tellg();
+				istream.seekg(0, std::ios::beg);
+
+				m_content.reserve(m_content.size() + streamSize);
+			}
+			catch (const std::exception &exception)
+			{
+				istream.clear();
+			}
+
+			std::copy(std::istreambuf_iterator<char>(istream), std::istreambuf_iterator<char>(), std::back_inserter(m_content));
+
+			for (auto i = contentStartIndex; i < m_content.size(); i++)
+				if (m_content[i] == '\n')
+					m_sections.back().newlines.emplace_back(i);
+		}
+
+		void read(const std::experimental::filesystem::path &path)
+		{
+			if (!std::experimental::filesystem::is_regular_file(path))
+				throw std::runtime_error("File does not exist: “" + path.string() + "”");
+
+			std::ifstream fileStream(path.string(), std::ios::in);
+
+			read(path.string(), fileStream);
+		}
+
+		void reset()
+		{
+			m_position = 0;
+		}
+
+		void seek(StreamPosition position)
+		{
+			m_position = position;
+		}
+
+		StreamPosition position() const
+		{
+			return m_position;
+		}
+
+		Location location() const
+		{
+			// Find current section
+			auto section = std::upper_bound(m_sections.cbegin(), m_sections.cend(), m_position,
+				[&](const auto &lhs, const auto &rhs)
+				{
+					return lhs < rhs.position;
+				});
+
+			assert(section != m_sections.cbegin());
+
+			section--;
+
+			// Find line (row) in the file
+			auto line = std::lower_bound(section->newlines.cbegin(), section->newlines.cend(), m_position);
+
+			if (line == section->newlines.cbegin())
+			{
+				const auto row = 1;
+				const auto column = static_cast<StreamPosition>(m_position - section->position + 1);
+
+				return {m_position, section->name, section->name, row, row, column, column};
+			}
+
+			const auto row = static_cast<StreamPosition>(line - section->newlines.cbegin() + 1);
+			const auto column = static_cast<StreamPosition>(m_position - *(line - 1));
+
+			return {m_position, section->name, section->name, row, row, column, column};
+		}
+
+		const std::vector<Section> &sections() const
+		{
+			return m_sections;
 		}

 		char currentCharacter()
 		{
 			check();
-			return m_stream[m_position];
+			return m_content[m_position];
 		}

 		void advance()
@@ -64,22 +151,42 @@ class Stream
 			m_position++;
 		}

+		void advanceUnchecked()
+		{
+			m_position++;
+		}
+
 		bool atEnd() const
 		{
-			return m_position >= m_stream.size();
+			return m_position >= m_content.size();
 		}

 		void check()
 		{
 			if (atEnd())
-				throw TokenizerException(*this, "reading past end of file");
+				throw TokenizerException(location(), "reading past end of file");
+		}
+
+		StreamPosition size() const
+		{
+			return m_content.size();
+		}
+
+		std::string &content()
+		{
+			return m_content;
+		}
+
+		const std::string &content() const
+		{
+			return m_content;
 		}

 	protected:
-		std::string m_stream;
+		std::string m_content;
 		mutable StreamPosition m_position{0};

-		std::vector<Delimiter> m_delimiters;
+		std::vector<Section> m_sections;
 };

 ////////////////////////////////////////////////////////////////////////////////////////////////////
--- a/lib/tokenize/include/tokenize/Tokenizer.h
+++ b/lib/tokenize/include/tokenize/Tokenizer.h
@@ -27,20 +27,19 @@ struct Tag
 ////////////////////////////////////////////////////////////////////////////////////////////////////

 template<class TokenizerPolicy = CaseSensitiveTokenizerPolicy>
-class Tokenizer: public Stream, public TokenizerPolicy
+class Tokenizer : public Stream, public TokenizerPolicy
 {
 	template<class OtherTokenizerPolicy>
 	friend class Tokenizer;

 	public:
-		explicit Tokenizer();
+		explicit Tokenizer() noexcept;
 		explicit Tokenizer(std::string streamName, std::istream &istream);

 		template<class OtherTokenizer>
-		Tokenizer(OtherTokenizer &&otherTokenizer)
+		Tokenizer(OtherTokenizer &&other) noexcept
+		:	Stream(std::forward<OtherTokenizer>(other))
 		{
-			m_stream = std::move(otherTokenizer.m_stream);
-			m_delimiters = std::move(otherTokenizer.m_delimiters);
 		}

 		void removeComments(const std::string &startSequence, const std::string &endSequence, bool removeEnd);
@@ -94,8 +93,7 @@ class Tokenizer: public Stream, public TokenizerPolicy
 ////////////////////////////////////////////////////////////////////////////////////////////////////

 template<class TokenizerPolicy>
-Tokenizer<TokenizerPolicy>::Tokenizer()
-:	Stream()
+Tokenizer<TokenizerPolicy>::Tokenizer() noexcept
 {
 }

@@ -189,7 +187,7 @@ void Tokenizer<TokenizerPolicy>::expect(const Type &expectedValue)
 	std::stringstream message;
 	message << "unexpected value, expected “" << expectedValue << "”";

-	throw TokenizerException(*this, message.str());
+	throw TokenizerException(location(), message.str());
 }

 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -208,7 +206,7 @@ std::string Tokenizer<TokenizerPolicy>::getIdentifier()
 		if (!TokenizerPolicy::isIdentifierCharacter(character))
 		{
 			if (value.empty())
-				throw TokenizerException(*this, "could not parse identifier");
+				throw TokenizerException(location(), "could not parse identifier");

 			return value;
 		}
@@ -289,31 +287,31 @@ template<class TokenizerPolicy>
 void Tokenizer<TokenizerPolicy>::removeComments(const std::string &startSequence, const std::string &endSequence, bool removeEnd)
 {
 	// TODO: move to appropriate place
-	for (auto &character : m_stream)
+	for (auto &character : m_content)
 		character = TokenizerPolicy::transformCharacter(character);

 	const auto removeRange =
 		[&](const auto &start, const auto &end)
 		{
-			const auto previousPosition = m_position;
+			const auto previousPosition = position();

-			assert(start < m_stream.size());
+			assert(start < m_content.size());

-			m_position = start;
+			seek(start);

-			while (m_position < end)
+			while (position() < end)
 			{
 				if (atEnd())
 					return;

-				m_stream[m_position] = ' ';
-				m_position++;
+				m_content[position()] = ' ';
+				advanceUnchecked();
 			}

-			m_position = previousPosition;
+			seek(previousPosition);
 		};

-	m_position = 0;
+	seek(0);

 	// TODO: refactor
 	while (!atEnd())
@@ -325,13 +323,13 @@ void Tokenizer<TokenizerPolicy>::removeComments(const std::string &startSequence
 			if ((startSequenceFound = testAndSkip(startSequence)))
 				break;

-			advance();
+			advanceUnchecked();
 		}

 		if (!startSequenceFound && atEnd())
 			break;

-		const auto startPosition = m_position - startSequence.size();
+		const auto startPosition = position() - startSequence.size();

 		bool endSequenceFound = false;

@@ -340,21 +338,21 @@ void Tokenizer<TokenizerPolicy>::removeComments(const std::string &startSequence
 			if ((endSequenceFound = testAndSkip(endSequence)))
 				break;

-			advance();
+			advanceUnchecked();
 		}

 		// If the end sequence is to be removed or could not be found, remove entire range
 		const auto endPosition =
 			(removeEnd || !endSequenceFound)
-			? m_position
-			: m_position - endSequence.size();
+			? position()
+			: position() - endSequence.size();

 		removeRange(startPosition, endPosition);

-		m_position = endPosition + 1;
+		seek(endPosition + 1);
 	}

-	m_position = 0;
+	seek(0);
 }

 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -406,7 +404,7 @@ uint64_t Tokenizer<TokenizerPolicy>::getIntegerBody()
 	check();

 	if (!std::isdigit(currentCharacter()))
-		throw TokenizerException(*this, "could not read integer value");
+		throw TokenizerException(location(), "could not read integer value");

 	uint64_t value = 0;

@@ -420,7 +418,7 @@ uint64_t Tokenizer<TokenizerPolicy>::getIntegerBody()
 		value *= 10;
 		value += character - '0';

-		advance();
+		advanceUnchecked();
 	}

 	return value;
@@ -448,7 +446,7 @@ uint64_t Tokenizer<TokenizerPolicy>::getImpl(Tag<uint64_t>)
 	skipWhiteSpace();

 	if (currentCharacter() == '-')
-		throw TokenizerException(*this, "expected unsigned integer, got signed one");
+		throw TokenizerException(location(), "expected unsigned integer, got signed one");

 	return getIntegerBody();
 }
@@ -482,7 +480,7 @@ bool Tokenizer<TokenizerPolicy>::getImpl(Tag<bool>)
 	if (testAndSkip<char>('1'))
 		return true;

-	throw TokenizerException(*this, "could not read Boolean value");
+	throw TokenizerException(location(), "could not read Boolean value");
 }

 ////////////////////////////////////////////////////////////////////////////////////////////////////
--- a/lib/tokenize/src/CMakeLists.txt
+++ b/lib/tokenize/src/CMakeLists.txt
@@ -1,21 +0,0 @@
-set(target tokenize)
-
-file(GLOB core_sources "tokenize/*.cpp")
-file(GLOB core_headers "../include/tokenize/*.h")
-
-set(includes
-	${PROJECT_SOURCE_DIR}/include
-)
-
-set(sources
-	${core_sources}
-	${core_headers}
-)
-
-set(libraries
-	stdc++fs
-)
-
-add_library(${target} ${sources})
-target_include_directories(${target} PRIVATE ${includes})
-target_link_libraries(${target} ${libraries})
--- a/lib/tokenize/src/tokenize/Location.cpp
+++ b/lib/tokenize/src/tokenize/Location.cpp
@@ -1,150 +0,0 @@
-#include <tokenize/Location.h>
-
-#include <algorithm>
-
-#include <tokenize/Stream.h>
-
-namespace tokenize
-{
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-// Location
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-Location::Location(Stream &stream)
-:	m_stream{stream},
-	m_position{stream.position()}
-{
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-Location::Location(Stream &stream, StreamPosition position)
-:	m_stream{stream},
-	m_position{position}
-{
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-const char *Location::sectionStart() const
-{
-	if (!m_isInitialized)
-		initializeLazily();
-
-	return m_sectionStart;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-const char *Location::sectionEnd() const
-{
-	if (!m_isInitialized)
-		initializeLazily();
-
-	return m_sectionEnd;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-StreamPosition Location::rowStart() const
-{
-	if (!m_isInitialized)
-		initializeLazily();
-
-	return m_rowStart;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-StreamPosition Location::rowEnd() const
-{
-	if (!m_isInitialized)
-		initializeLazily();
-
-	return m_rowEnd;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-StreamPosition Location::columnStart() const
-{
-	if (!m_isInitialized)
-		initializeLazily();
-
-	return m_columnStart;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-StreamPosition Location::columnEnd() const
-{
-	if (!m_isInitialized)
-		initializeLazily();
-
-	return m_columnEnd;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void Location::initializeLazily() const
-{
-	const auto previousPosition = m_stream.position();
-
-	const auto &delimiters = m_stream.delimiters();
-
-	// Find current section
-	auto currentFile = std::find_if(delimiters.crbegin(), delimiters.crend(),
-		[&](const auto &fileDelimiter)
-		{
-			return m_position >= fileDelimiter.position;
-		});
-
-	// If the tokenizer is at the end of the stream, still count from the beginning of the last section
-	if (currentFile == delimiters.crend())
-		currentFile = delimiters.crbegin();
-
-	// Go back to beginning of section
-	m_stream.seek(currentFile->position);
-
-	StreamPosition row{1};
-	StreamPosition column{1};
-
-	// Compute the location character by character
-	while (true)
-	{
-		if (m_stream.atEnd())
-			break;
-		else if (m_stream.position() >= m_position)
-			break;
-
-		const auto character = m_stream.currentCharacter();
-
-		if (character == '\n')
-		{
-			row++;
-			column = 1;
-		}
-		else if (std::isblank(character) || std::isprint(character))
-			column++;
-
-		m_stream.advance();
-	}
-
-	m_sectionStart = currentFile->sectionName.c_str();
-	m_sectionEnd = currentFile->sectionName.c_str();
-	m_rowStart = row;
-	m_rowEnd = row;
-	m_columnStart = column;
-	m_columnEnd = column;
-
-	m_isInitialized = true;
-
-	m_stream.seek(previousPosition);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-}
--- a/lib/tokenize/src/tokenize/Stream.cpp
+++ b/lib/tokenize/src/tokenize/Stream.cpp
@@ -1,84 +0,0 @@
-#include <tokenize/Stream.h>
-
-#include <fstream>
-
-namespace tokenize
-{
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-//
-// Stream
-//
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-Stream::Stream()
-{
-	std::setlocale(LC_NUMERIC, "C");
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-Stream::Stream(std::string streamName, std::istream &istream)
-{
-	read(streamName, istream);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void Stream::read(std::string streamName, std::istream &istream)
-{
-	// Store position of new section
-	m_delimiters.push_back({m_stream.size(), streamName});
-
-	try
-	{
-		istream.seekg(0, std::ios::end);
-		const auto streamSize = istream.tellg();
-		istream.seekg(0, std::ios::beg);
-
-		m_stream.reserve(m_stream.size() + streamSize);
-	}
-	catch (const std::exception &exception)
-	{
-		istream.clear();
-	}
-
-	std::copy(std::istreambuf_iterator<char>(istream), std::istreambuf_iterator<char>(), std::back_inserter(m_stream));
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void Stream::read(const std::experimental::filesystem::path &path)
-{
-	if (!std::experimental::filesystem::is_regular_file(path))
-		throw std::runtime_error("File does not exist: “" + path.string() + "”");
-
-	std::ifstream fileStream(path.string(), std::ios::in);
-
-	read(path.string(), fileStream);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void Stream::reset()
-{
-	m_position = 0;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void Stream::seek(StreamPosition position)
-{
-	m_position = position;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-StreamPosition Stream::position() const
-{
-	return m_position;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-}
--- a/lib/tokenize/tests/CMakeLists.txt
+++ b/lib/tokenize/tests/CMakeLists.txt
@@ -7,13 +7,9 @@ set(includes
 	${PROJECT_SOURCE_DIR}/../../lib/catch/single_include
 )

-set(libraries
-	tokenize
-)
-
 add_executable(${target} ${core_sources})
 target_include_directories(${target} PRIVATE ${includes})
-target_link_libraries(${target} ${libraries})
+target_link_libraries(${target})

 add_custom_target(run-tokenize-tests
 	COMMAND ${CMAKE_BINARY_DIR}/bin/tokenize-tests --use-colour=yes
--- a/lib/tokenize/tests/TestTokenizer.cpp
+++ b/lib/tokenize/tests/TestTokenizer.cpp
@@ -11,13 +11,13 @@ TEST_CASE("[tokenizer] Simple strings are tokenized correctly", "[tokenizer]")
 	tokenize::Tokenizer<> p("input", s);

 	REQUIRE(p.get<std::string>() == "identifier");
-	REQUIRE(p.get<size_t>() == 5u);
+	REQUIRE(p.get<size_t>() == 5);
 	REQUIRE(p.get<int>() == -51);
 	REQUIRE(p.get<bool>() == false);
 	REQUIRE(p.get<bool>() == true);

 	REQUIRE(p.get<int>() == 100);
-	REQUIRE(p.get<size_t>() == 200u);
+	REQUIRE(p.get<size_t>() == 200);
 	REQUIRE(p.get<int>() == -300);
 	REQUIRE_THROWS_AS(p.get<size_t>(), tokenize::TokenizerException);
 }
@@ -30,13 +30,13 @@ TEST_CASE("[tokenizer] Tokenizing exceptions are correctly reported", "[tokenize
 	tokenize::Tokenizer<> p("input", s);

 	REQUIRE_NOTHROW(p.expect<std::string>("identifier"));
-	REQUIRE_NOTHROW(p.expect<size_t>(5u));
+	REQUIRE_NOTHROW(p.expect<size_t>(5));
 	REQUIRE_NOTHROW(p.expect<int>(-51));
 	REQUIRE_NOTHROW(p.expect<bool>(false));
 	REQUIRE_NOTHROW(p.expect<bool>(true));

 	REQUIRE_NOTHROW(p.expect<int>(100));
-	REQUIRE_NOTHROW(p.expect<size_t>(200u));
+	REQUIRE_NOTHROW(p.expect<size_t>(200));
 	REQUIRE_NOTHROW(p.expect<int>(-300));
 	REQUIRE_THROWS_AS(p.expect<size_t>(-400), tokenize::TokenizerException);

@@ -44,7 +44,7 @@ TEST_CASE("[tokenizer] Tokenizing exceptions are correctly reported", "[tokenize
 	REQUIRE_THROWS_AS(p.expect<std::string>("error"), tokenize::TokenizerException);

 	p.seek(14);
-	REQUIRE_THROWS_AS(p.expect<size_t>(6u), tokenize::TokenizerException);
+	REQUIRE_THROWS_AS(p.expect<size_t>(6), tokenize::TokenizerException);

 	p.seek(17);
 	REQUIRE_THROWS_AS(p.expect<int>(-50), tokenize::TokenizerException);
@@ -76,53 +76,53 @@ TEST_CASE("[tokenizer] While tokenizing, the cursor position is as expected", "[

 	pos = p.position();
 	REQUIRE(p.testAndReturn<std::string>("error") == false);
-	REQUIRE(p.position() == pos);
+	CHECK(p.position() == pos);
 	REQUIRE(p.testAndReturn<std::string>("identifier") == true);
-	REQUIRE(p.position() == pos);
+	CHECK(p.position() == pos);
 	REQUIRE(p.testAndSkip<std::string>("error") == false);
-	REQUIRE(p.position() == pos);
+	CHECK(p.position() == pos);
 	REQUIRE(p.testAndSkip<std::string>("identifier") == true);
-	REQUIRE(p.position() == 12);
+	CHECK(p.position() == 12);

 	pos = p.position();
-	REQUIRE(p.testAndReturn<size_t>(6u) == false);
-	REQUIRE(p.position() == pos);
-	REQUIRE(p.testAndReturn<size_t>(5u) == true);
-	REQUIRE(p.position() == pos);
-	REQUIRE(p.testAndSkip<size_t>(6u) == false);
-	REQUIRE(p.position() == pos);
-	REQUIRE(p.testAndSkip<size_t>(5u) == true);
-	REQUIRE(p.position() == 15);
+	REQUIRE(p.testAndReturn<size_t>(6) == false);
+	CHECK(p.position() == pos);
+	REQUIRE(p.testAndReturn<size_t>(5) == true);
+	CHECK(p.position() == pos);
+	REQUIRE(p.testAndSkip<size_t>(6) == false);
+	CHECK(p.position() == pos);
+	REQUIRE(p.testAndSkip<size_t>(5) == true);
+	CHECK(p.position() == 15);

 	pos = p.position();
 	REQUIRE(p.testAndReturn<int>(-50) == false);
-	REQUIRE(p.position() == pos);
+	CHECK(p.position() == pos);
 	REQUIRE(p.testAndReturn<int>(-51) == true);
-	REQUIRE(p.position() == pos);
+	CHECK(p.position() == pos);
 	REQUIRE(p.testAndSkip<int>(-50) == false);
-	REQUIRE(p.position() == pos);
+	CHECK(p.position() == pos);
 	REQUIRE(p.testAndSkip<int>(-51) == true);
-	REQUIRE(p.position() == 22);
+	CHECK(p.position() == 22);

 	pos = p.position();
 	REQUIRE(p.testAndReturn<bool>(true) == false);
-	REQUIRE(p.position() == pos);
+	CHECK(p.position() == pos);
 	REQUIRE(p.testAndReturn<bool>(false) == true);
-	REQUIRE(p.position() == pos);
+	CHECK(p.position() == pos);
 	REQUIRE(p.testAndSkip<bool>(true) == false);
-	REQUIRE(p.position() == pos);
+	CHECK(p.position() == pos);
 	REQUIRE(p.testAndSkip<bool>(false) == true);
-	REQUIRE(p.position() == 25);
+	CHECK(p.position() == 25);

 	pos = p.position();
 	REQUIRE(p.testAndReturn<bool>(false) == false);
-	REQUIRE(p.position() == pos);
+	CHECK(p.position() == pos);
 	REQUIRE(p.testAndReturn<bool>(true) == true);
-	REQUIRE(p.position() == pos);
+	CHECK(p.position() == pos);
 	REQUIRE(p.testAndSkip<bool>(false) == false);
-	REQUIRE(p.position() == pos);
+	CHECK(p.position() == pos);
 	REQUIRE(p.testAndSkip<bool>(true) == true);
-	REQUIRE(p.position() == 27);
+	CHECK(p.position() == 27);
 }

 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -181,125 +181,251 @@ TEST_CASE("[tokenizer] While tokenizing, the cursor location is as expcected", "

 	const auto startPosition = p.position();

-	{
-		auto l = tokenize::Location(p);
-		REQUIRE(l.rowStart() == 1u);
-		REQUIRE(l.columnStart() == 1u);
-		REQUIRE(p.currentCharacter() == '1');
-	}
+	tokenize::Location l;

-	REQUIRE_NOTHROW(p.advance());
+	l = p.location();
+	CHECK(l.rowStart == 1);
+	CHECK(l.columnStart == 1);
+	CHECK(p.currentCharacter() == '1');

-	{
-		auto l = tokenize::Location(p);
-		REQUIRE(l.rowStart() == 1u);
-		REQUIRE(l.columnStart() == 2u);
-		REQUIRE(p.currentCharacter() == '2');
-	}
+	p.advance();

-	REQUIRE_NOTHROW(p.advance());
+	l = p.location();
+	CHECK(l.rowStart == 1);
+	CHECK(l.columnStart == 2);
+	CHECK(p.currentCharacter() == '2');

-	{
-		auto l = tokenize::Location(p);
-		REQUIRE(l.rowStart() == 1u);
-		REQUIRE(l.columnStart() == 3u);
-		REQUIRE(p.currentCharacter() == '3');
-	}
+	p.advance();

-	REQUIRE_NOTHROW(p.advance());
+	l = p.location();
+	CHECK(l.rowStart == 1);
+	CHECK(l.columnStart == 3);
+	CHECK(p.currentCharacter() == '3');

-	{
-		auto l = tokenize::Location(p);
-		REQUIRE(l.rowStart() == 1u);
-		REQUIRE(l.columnStart() == 4u);
-		REQUIRE(p.currentCharacter() == ' ');
-	}
+	p.advance();

-	REQUIRE_NOTHROW(p.advance());
+	l = p.location();
+	CHECK(l.rowStart == 1);
+	CHECK(l.columnStart == 4);
+	CHECK(p.currentCharacter() == ' ');

-	{
-		auto l = tokenize::Location(p);
-		REQUIRE(l.rowStart() == 1u);
-		REQUIRE(l.columnStart() == 5u);
-		REQUIRE(p.currentCharacter() == '\n');
-	}
+	p.advance();

-	REQUIRE_NOTHROW(p.advance());
+	l = p.location();
+	CHECK(l.rowStart == 1);
+	CHECK(l.columnStart == 5);
+	CHECK(p.currentCharacter() == '\n');

-	{
-		auto l = tokenize::Location(p);
-		REQUIRE(l.rowStart() == 2u);
-		REQUIRE(l.columnStart() == 1u);
-		REQUIRE(p.currentCharacter() == '4');
-	}
+	p.advance();

-	REQUIRE_NOTHROW(p.advance());
+	l = p.location();
+	CHECK(l.rowStart == 2);
+	CHECK(l.columnStart == 1);
+	CHECK(p.currentCharacter() == '4');
+
+	p.advance();

 	REQUIRE_NOTHROW(p.expect<std::string>("test1"));

-	{
-		auto l = tokenize::Location(p);
-		REQUIRE(l.rowStart() == 3u);
-		REQUIRE(l.columnStart() == 6u);
-	}
+	l = p.location();
+	CHECK(l.rowStart == 3);
+	CHECK(l.columnStart == 6);

 	REQUIRE_NOTHROW(p.expect<std::string>("test2"));

-	{
-		auto l = tokenize::Location(p);
-		REQUIRE(l.rowStart() == 4u);
-		REQUIRE(l.columnStart() == 7u);
-	}
+	l = p.location();
+	CHECK(l.rowStart == 4);
+	CHECK(l.columnStart == 7);

 	REQUIRE_NOTHROW(p.expect<std::string>("test3"));

-	{
-		auto l = tokenize::Location(p);
-		REQUIRE(l.rowStart() == 5u);
-		REQUIRE(l.columnStart() == 6u);
-	}
+	l = p.location();
+	CHECK(l.rowStart == 5);
+	CHECK(l.columnStart == 6);

 	REQUIRE_NOTHROW(p.skipLine());

-	{
-		auto l = tokenize::Location(p);
-		REQUIRE(l.rowStart() == 6u);
-		REQUIRE(l.columnStart() == 1u);
-	}
+	l = p.location();
+	CHECK(l.rowStart == 6);
+	CHECK(l.columnStart == 1);

 	REQUIRE_NOTHROW(p.skipLine());

-	{
-		auto l = tokenize::Location(p);
-		REQUIRE(l.rowStart() == 7u);
-		REQUIRE(l.columnStart() == 1u);
-	}
+	l = p.location();
+	CHECK(l.rowStart == 7);
+	CHECK(l.columnStart == 1);

 	REQUIRE_NOTHROW(p.skipWhiteSpace());

-	{
-		auto l = tokenize::Location(p);
-		REQUIRE(l.rowStart() == 10u);
-		REQUIRE(l.columnStart() == 1u);
-		REQUIRE(p.atEnd());
-	}
+	l = p.location();
+	CHECK(l.rowStart == 10);
+	CHECK(l.columnStart == 1);
+	CHECK(p.atEnd());

 	p.reset();
-	REQUIRE(p.position() == startPosition);
-	REQUIRE_FALSE(p.atEnd());
+	CHECK(p.position() == startPosition);
+	CHECK_FALSE(p.atEnd());

 	for (size_t i = 0; i < 5; i++)
 		p.advance();

-	REQUIRE(p.position() == static_cast<std::istream::pos_type>(5));
+	CHECK(p.position() == static_cast<std::istream::pos_type>(5));

 	p.seek(static_cast<std::istream::pos_type>(7));

-	REQUIRE(p.position() == static_cast<std::istream::pos_type>(7));
+	CHECK(p.position() == static_cast<std::istream::pos_type>(7));

 	REQUIRE_NOTHROW(p.expect<std::string>("test1"));
+}

-	// TODO: test tokenizer with multiple sections
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+TEST_CASE("[tokenizer] While tokenizing with multiple sections, the cursor location is as expcected", "[tokenizer]")
+{
+	std::stringstream s1("123 \n4\ntest1\n");
+	std::stringstream s2("456 \n7\ntest2\n");
+	tokenize::Tokenizer<> p;
+	p.read("test-1", s1);
+	p.read("test-2", s2);
+
+	const auto advance =
+		[&](auto steps)
+		{
+			for (auto i = 0; i < steps; i++)
+				p.advance();
+		};
+
+	tokenize::Location l;
+
+	l = p.location();
+	CHECK(l.sectionStart == "test-1");
+	CHECK(l.rowStart == 1);
+	CHECK(l.columnStart == 1);
+	CHECK(p.currentCharacter() == '1');
+
+	advance(1);
+
+	l = p.location();
+	CHECK(l.sectionStart == "test-1");
+	CHECK(l.rowStart == 1);
+	CHECK(l.columnStart == 2);
+	CHECK(p.currentCharacter() == '2');
+
+	advance(3);
+
+	l = p.location();
+	CHECK(l.sectionStart == "test-1");
+	CHECK(l.rowStart == 1);
+	CHECK(l.columnStart == 5);
+	CHECK(p.currentCharacter() == '\n');
+
+	advance(1);
+
+	l = p.location();
+	CHECK(l.sectionStart == "test-1");
+	CHECK(l.rowStart == 2);
+	CHECK(l.columnStart == 1);
+	CHECK(p.currentCharacter() == '4');
+
+	advance(1);
+
+	l = p.location();
+	CHECK(l.sectionStart == "test-1");
+	CHECK(l.rowStart == 2);
+	CHECK(l.columnStart == 2);
+	CHECK(p.currentCharacter() == '\n');
+
+	advance(1);
+
+	l = p.location();
+	CHECK(l.sectionStart == "test-1");
+	CHECK(l.rowStart == 3);
+	CHECK(l.columnStart == 1);
+	CHECK(p.currentCharacter() == 't');
+
+	advance(4);
+
+	l = p.location();
+	CHECK(l.sectionStart == "test-1");
+	CHECK(l.rowStart == 3);
+	CHECK(l.columnStart == 5);
+	CHECK(p.currentCharacter() == '1');
+
+	advance(1);
+
+	l = p.location();
+	CHECK(l.sectionStart == "test-1");
+	CHECK(l.rowStart == 3);
+	CHECK(l.columnStart == 6);
+	CHECK(p.currentCharacter() == '\n');
+
+	advance(1);
+
+	l = p.location();
+	CHECK(l.sectionStart == "test-2");
+	CHECK(l.rowStart == 1);
+	CHECK(l.columnStart == 1);
+	CHECK(p.currentCharacter() == '4');
+
+	advance(1);
+
+	l = p.location();
+	CHECK(l.sectionStart == "test-2");
+	CHECK(l.rowStart == 1);
+	CHECK(l.columnStart == 2);
+	CHECK(p.currentCharacter() == '5');
+
+	advance(3);
+
+	l = p.location();
+	CHECK(l.sectionStart == "test-2");
+	CHECK(l.rowStart == 1);
+	CHECK(l.columnStart == 5);
+	CHECK(p.currentCharacter() == '\n');
+
+	advance(1);
+
+	l = p.location();
+	CHECK(l.sectionStart == "test-2");
+	CHECK(l.rowStart == 2);
+	CHECK(l.columnStart == 1);
+	CHECK(p.currentCharacter() == '7');
+
+	advance(1);
+
+	l = p.location();
+	CHECK(l.sectionStart == "test-2");
+	CHECK(l.rowStart == 2);
+	CHECK(l.columnStart == 2);
+	CHECK(p.currentCharacter() == '\n');
+
+	advance(1);
+
+	l = p.location();
+	CHECK(l.sectionStart == "test-2");
+	CHECK(l.rowStart == 3);
+	CHECK(l.columnStart == 1);
+	CHECK(p.currentCharacter() == 't');
+
+	advance(4);
+
+	l = p.location();
+	CHECK(l.sectionStart == "test-2");
+	CHECK(l.rowStart == 3);
+	CHECK(l.columnStart == 5);
+	CHECK(p.currentCharacter() == '2');
+
+	advance(1);
+
+	l = p.location();
+	CHECK(l.sectionStart == "test-2");
+	CHECK(l.rowStart == 3);
+	CHECK(l.columnStart == 6);
+	CHECK(p.currentCharacter() == '\n');
+
+	advance(1);
+
+	CHECK(p.atEnd());
 }

 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -313,23 +439,21 @@ TEST_CASE("[tokenizer] Comments are correctly removed", "[tokenizer]")

 	REQUIRE_NOTHROW(p1.expect<std::string>("test1"));

-	{
-		auto l = tokenize::Location(p1);
-		REQUIRE(l.rowStart() == 2u);
-		REQUIRE(l.columnStart() == 6u);
-	}
+	tokenize::Location l;
+
+	l = p1.location();
+	CHECK(l.rowStart == 2);
+	CHECK(l.columnStart == 6);

 	REQUIRE_NOTHROW(p1.expect<std::string>("test2"));

-	{
-		auto l = tokenize::Location(p1);
-		REQUIRE(l.rowStart() == 3u);
-		REQUIRE(l.columnStart() == 6u);
-	}
+	l = p1.location();
+	CHECK(l.rowStart == 3);
+	CHECK(l.columnStart == 6);

 	p1.skipWhiteSpace();

-	REQUIRE(p1.atEnd());
+	CHECK(p1.atEnd());

 	std::stringstream s2("test;");
 	tokenize::Tokenizer<> p2("input", s2);
@@ -340,7 +464,7 @@ TEST_CASE("[tokenizer] Comments are correctly removed", "[tokenizer]")

 	p2.skipWhiteSpace();

-	REQUIRE(p2.atEnd());
+	CHECK(p2.atEnd());

 	std::stringstream s3("/* comment at start */ test1 /* comment in between */ test2 /*");
 	tokenize::Tokenizer<> p3("input", s3);
@@ -352,7 +476,7 @@ TEST_CASE("[tokenizer] Comments are correctly removed", "[tokenizer]")

 	p3.skipWhiteSpace();

-	REQUIRE(p3.atEnd());
+	CHECK(p3.atEnd());

 	// Check that if there are no comments, the end is not accidentally truncated
 	std::stringstream s4("test foo bar");
@@ -364,5 +488,5 @@ TEST_CASE("[tokenizer] Comments are correctly removed", "[tokenizer]")
 	REQUIRE_NOTHROW(p4.expect<std::string>("foo"));
 	REQUIRE_NOTHROW(p4.expect<std::string>("bar"));

-	REQUIRE(p4.atEnd());
+	CHECK(p4.atEnd());
 }