Reimplemented tokenizer with plain streams.

This commit is contained in:
2017-06-18 15:09:42 +02:00
parent b9b987490b
commit 268fa84c9e
10 changed files with 122 additions and 166 deletions

View File

@@ -1,6 +1,7 @@
#ifndef __TOKENIZE__STREAM_H
#define __TOKENIZE__STREAM_H
#include <cassert>
#include <experimental/filesystem>
#include <iostream>
#include <iterator>
@@ -8,6 +9,7 @@
#include <vector>
#include <tokenize/Location.h>
#include <tokenize/TokenizerException.h>
namespace tokenize
{
@@ -21,7 +23,8 @@ namespace tokenize
class Stream
{
public:
using Position = std::stringstream::pos_type;
using Position = size_t;
static const Position InvalidPosition;
struct Delimiter
{
@@ -47,14 +50,34 @@ class Stream
Position position() const;
Location location() const;
char currentCharacter() const;
void advance();
bool atEnd() const;
char currentCharacter() const
{
assert(m_position < m_stream.size());
void check() const;
// TODO: check if this should be secured by check()
return m_stream[m_position];
}
void advance()
{
check();
m_position++;
}
bool atEnd() const
{
return m_position >= m_stream.size();
}
void check() const
{
if (atEnd())
throw TokenizerException(location(), "reading past end of file");
}
protected:
mutable std::stringstream m_stream;
std::string m_stream;
mutable Position m_position;
std::vector<Delimiter> m_delimiters;
};

View File

@@ -45,8 +45,6 @@ class Tokenizer: public Stream, public TokenizerPolicy
void removeComments(const std::string &startSequence, const std::string &endSequence, bool removeEnd);
char currentCharacter() const;
template<typename Type>
Type get();
@@ -115,8 +113,6 @@ Tokenizer<TokenizerPolicy>::Tokenizer(std::string streamName, std::istream &istr
template<class TokenizerPolicy>
void Tokenizer<TokenizerPolicy>::skipWhiteSpace()
{
check();
while (!atEnd() && TokenizerPolicy::isWhiteSpaceCharacter(currentCharacter()))
advance();
}
@@ -126,8 +122,6 @@ void Tokenizer<TokenizerPolicy>::skipWhiteSpace()
template<class TokenizerPolicy>
void Tokenizer<TokenizerPolicy>::skipBlankSpace()
{
check();
while (!atEnd() && TokenizerPolicy::isBlankCharacter(currentCharacter()))
advance();
}
@@ -137,9 +131,7 @@ void Tokenizer<TokenizerPolicy>::skipBlankSpace()
template<class TokenizerPolicy>
void Tokenizer<TokenizerPolicy>::skipLine()
{
check();
while (currentCharacter() != '\n')
while (!atEnd() && currentCharacter() != '\n')
advance();
advance();
@@ -296,78 +288,51 @@ std::string Tokenizer<TokenizerPolicy>::getLine()
template<class TokenizerPolicy>
void Tokenizer<TokenizerPolicy>::removeComments(const std::string &startSequence, const std::string &endSequence, bool removeEnd)
{
const auto inPosition = m_stream.tellg();
const auto outPosition = m_stream.tellp();
m_stream.seekg(0);
// TODO: move to appropriate place
for (auto &character : m_stream)
character = TokenizerPolicy::transformCharacter(character);
const auto removeRange =
[&](const auto &start, const auto &end)
{
assert(start != -1);
const auto previousPosition = m_position;
m_stream.clear();
m_stream.seekp(start);
m_stream.seekg(start);
assert(start < m_stream.size());
auto position = start;
m_position = start;
while (end == -1 || position < end)
while (m_position < end)
{
m_stream.ignore(1);
if (atEnd())
return;
m_stream.put(' ');
position += static_cast<std::streamoff>(1);
m_stream[m_position] = ' ';
m_position++;
}
m_position = previousPosition;
};
m_position = 0;
while (!atEnd())
{
Position startPosition = m_stream.tellg();
while (!atEnd())
{
startPosition = m_stream.tellg();
if (testAndSkip(startSequence))
break;
while (!atEnd() && !testAndSkip(startSequence))
advance();
}
Position endPosition = m_stream.tellg();
while (!atEnd())
{
endPosition = m_stream.tellg();
if (testAndSkip(endSequence))
break;
auto startPosition = m_position - startSequence.size();
while (!atEnd() && !testAndSkip(endSequence))
advance();
}
if (removeEnd)
endPosition = m_stream.tellg();
auto endPosition = (removeEnd) ? m_position : m_position - endSequence.size();
removeRange(startPosition, endPosition);
m_position = endPosition + 1;
}
m_stream.clear();
m_stream.seekg(inPosition);
m_stream.seekp(outPosition);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
char Tokenizer<TokenizerPolicy>::currentCharacter() const
{
return TokenizerPolicy::transformCharacter(Stream::currentCharacter());
m_position = 0;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -506,20 +471,20 @@ bool Tokenizer<TokenizerPolicy>::testImpl(const std::string &expectedValue)
if (!TokenizerPolicy::isWhiteSpaceCharacter(expectedValue.front()))
skipWhiteSpace();
const auto match = std::find_if(expectedValue.cbegin(), expectedValue.cend(),
[&](const auto &expectedCharacter)
{
const auto character = static_cast<char>(this->currentCharacter());
if (character != expectedCharacter)
return true;
this->advance();
for (size_t i = 0; i < expectedValue.size(); i++)
{
if (atEnd())
return false;
});
return (match == expectedValue.cend());
const auto character = currentCharacter();
if (character != expectedValue[i])
return false;
advance();
}
return true;
}
////////////////////////////////////////////////////////////////////////////////////////////////////

View File

@@ -3,8 +3,6 @@
#include <algorithm>
#include <fstream>
#include <tokenize/TokenizerException.h>
namespace tokenize
{
@@ -14,12 +12,14 @@ namespace tokenize
//
////////////////////////////////////////////////////////////////////////////////////////////////////
const Stream::Position Stream::InvalidPosition{std::numeric_limits<Position>::max()};
////////////////////////////////////////////////////////////////////////////////////////////////////
Stream::Stream()
: m_position{0}
{
std::setlocale(LC_NUMERIC, "C");
// Dont skip whitespace
m_stream.exceptions(std::istream::badbit);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -34,11 +34,18 @@ Stream::Stream(std::string streamName, std::istream &istream)
void Stream::read(std::string streamName, std::istream &istream)
{
// Store position of new section
const auto position = m_stream.tellp();
const auto position = m_stream.size();
m_delimiters.push_back({position, streamName});
m_stream << istream.rdbuf();
istream.seekg(0, std::ios::end);
const auto streamSize = istream.tellg();
istream.seekg(0, std::ios::beg);
const auto startPosition = m_stream.size();
m_stream.resize(m_stream.size() + streamSize);
std::copy(std::istreambuf_iterator<char>(istream), std::istreambuf_iterator<char>(), m_stream.begin() + startPosition);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -57,23 +64,21 @@ void Stream::read(const std::experimental::filesystem::path &path)
void Stream::reset()
{
m_stream.clear();
seek(0);
m_position = 0;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
void Stream::seek(Position position)
{
m_stream.clear();
m_stream.seekg(position);
m_position = position;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
typename Stream::Position Stream::position() const
{
return m_stream.tellg();
return m_position;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -94,8 +99,7 @@ Location Stream::location() const
currentFile = m_delimiters.crbegin();
// Go back to beginning of section
m_stream.clear();
m_stream.seekg(currentFile->position);
m_position = currentFile->position;
size_t row = 1;
size_t column = 1;
@@ -103,9 +107,9 @@ Location Stream::location() const
// Compute the location character by character
while (true)
{
if (currentPosition == -1 && atEnd())
if (currentPosition >= m_stream.size() && atEnd())
break;
else if (currentPosition >= 0 && position() >= currentPosition)
else if (currentPosition < m_stream.size() && position() >= currentPosition)
break;
const auto character = currentCharacter();
@@ -118,7 +122,7 @@ Location Stream::location() const
else if (std::isblank(character) || std::isprint(character))
column++;
m_stream.ignore(1);
m_position++;
}
return {currentFile->sectionName.c_str(), currentFile->sectionName.c_str(), row, row, column, column};
@@ -126,38 +130,4 @@ Location Stream::location() const
////////////////////////////////////////////////////////////////////////////////////////////////////
char Stream::currentCharacter() const
{
// TODO: check if this should be secured by check()
return m_stream.peek();
}
////////////////////////////////////////////////////////////////////////////////////////////////////
bool Stream::atEnd() const
{
return position() == -1;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
void Stream::check() const
{
if (atEnd())
throw TokenizerException(location(), "reading past end of file");
if (m_stream.fail())
throw TokenizerException(location());
}
////////////////////////////////////////////////////////////////////////////////////////////////////
void Stream::advance()
{
check();
m_stream.ignore(1);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
}