Reimplemented tokenizer with plain streams.
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
#ifndef __TOKENIZE__STREAM_H
|
||||
#define __TOKENIZE__STREAM_H
|
||||
|
||||
#include <cassert>
|
||||
#include <experimental/filesystem>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
@@ -8,6 +9,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include <tokenize/Location.h>
|
||||
#include <tokenize/TokenizerException.h>
|
||||
|
||||
namespace tokenize
|
||||
{
|
||||
@@ -21,7 +23,8 @@ namespace tokenize
|
||||
class Stream
|
||||
{
|
||||
public:
|
||||
using Position = std::stringstream::pos_type;
|
||||
using Position = size_t;
|
||||
static const Position InvalidPosition;
|
||||
|
||||
struct Delimiter
|
||||
{
|
||||
@@ -47,14 +50,34 @@ class Stream
|
||||
Position position() const;
|
||||
Location location() const;
|
||||
|
||||
char currentCharacter() const;
|
||||
void advance();
|
||||
bool atEnd() const;
|
||||
char currentCharacter() const
|
||||
{
|
||||
assert(m_position < m_stream.size());
|
||||
|
||||
void check() const;
|
||||
// TODO: check if this should be secured by check()
|
||||
return m_stream[m_position];
|
||||
}
|
||||
|
||||
void advance()
|
||||
{
|
||||
check();
|
||||
m_position++;
|
||||
}
|
||||
|
||||
bool atEnd() const
|
||||
{
|
||||
return m_position >= m_stream.size();
|
||||
}
|
||||
|
||||
void check() const
|
||||
{
|
||||
if (atEnd())
|
||||
throw TokenizerException(location(), "reading past end of file");
|
||||
}
|
||||
|
||||
protected:
|
||||
mutable std::stringstream m_stream;
|
||||
std::string m_stream;
|
||||
mutable Position m_position;
|
||||
|
||||
std::vector<Delimiter> m_delimiters;
|
||||
};
|
||||
|
@@ -45,8 +45,6 @@ class Tokenizer: public Stream, public TokenizerPolicy
|
||||
|
||||
void removeComments(const std::string &startSequence, const std::string &endSequence, bool removeEnd);
|
||||
|
||||
char currentCharacter() const;
|
||||
|
||||
template<typename Type>
|
||||
Type get();
|
||||
|
||||
@@ -115,8 +113,6 @@ Tokenizer<TokenizerPolicy>::Tokenizer(std::string streamName, std::istream &istr
|
||||
template<class TokenizerPolicy>
|
||||
void Tokenizer<TokenizerPolicy>::skipWhiteSpace()
|
||||
{
|
||||
check();
|
||||
|
||||
while (!atEnd() && TokenizerPolicy::isWhiteSpaceCharacter(currentCharacter()))
|
||||
advance();
|
||||
}
|
||||
@@ -126,8 +122,6 @@ void Tokenizer<TokenizerPolicy>::skipWhiteSpace()
|
||||
template<class TokenizerPolicy>
|
||||
void Tokenizer<TokenizerPolicy>::skipBlankSpace()
|
||||
{
|
||||
check();
|
||||
|
||||
while (!atEnd() && TokenizerPolicy::isBlankCharacter(currentCharacter()))
|
||||
advance();
|
||||
}
|
||||
@@ -137,9 +131,7 @@ void Tokenizer<TokenizerPolicy>::skipBlankSpace()
|
||||
template<class TokenizerPolicy>
|
||||
void Tokenizer<TokenizerPolicy>::skipLine()
|
||||
{
|
||||
check();
|
||||
|
||||
while (currentCharacter() != '\n')
|
||||
while (!atEnd() && currentCharacter() != '\n')
|
||||
advance();
|
||||
|
||||
advance();
|
||||
@@ -296,78 +288,51 @@ std::string Tokenizer<TokenizerPolicy>::getLine()
|
||||
template<class TokenizerPolicy>
|
||||
void Tokenizer<TokenizerPolicy>::removeComments(const std::string &startSequence, const std::string &endSequence, bool removeEnd)
|
||||
{
|
||||
const auto inPosition = m_stream.tellg();
|
||||
const auto outPosition = m_stream.tellp();
|
||||
|
||||
m_stream.seekg(0);
|
||||
// TODO: move to appropriate place
|
||||
for (auto &character : m_stream)
|
||||
character = TokenizerPolicy::transformCharacter(character);
|
||||
|
||||
const auto removeRange =
|
||||
[&](const auto &start, const auto &end)
|
||||
{
|
||||
assert(start != -1);
|
||||
const auto previousPosition = m_position;
|
||||
|
||||
m_stream.clear();
|
||||
m_stream.seekp(start);
|
||||
m_stream.seekg(start);
|
||||
assert(start < m_stream.size());
|
||||
|
||||
auto position = start;
|
||||
m_position = start;
|
||||
|
||||
while (end == -1 || position < end)
|
||||
while (m_position < end)
|
||||
{
|
||||
m_stream.ignore(1);
|
||||
|
||||
if (atEnd())
|
||||
return;
|
||||
|
||||
m_stream.put(' ');
|
||||
position += static_cast<std::streamoff>(1);
|
||||
m_stream[m_position] = ' ';
|
||||
m_position++;
|
||||
}
|
||||
|
||||
m_position = previousPosition;
|
||||
};
|
||||
|
||||
m_position = 0;
|
||||
|
||||
while (!atEnd())
|
||||
{
|
||||
Position startPosition = m_stream.tellg();
|
||||
|
||||
while (!atEnd())
|
||||
{
|
||||
startPosition = m_stream.tellg();
|
||||
|
||||
if (testAndSkip(startSequence))
|
||||
break;
|
||||
|
||||
while (!atEnd() && !testAndSkip(startSequence))
|
||||
advance();
|
||||
}
|
||||
|
||||
Position endPosition = m_stream.tellg();
|
||||
|
||||
while (!atEnd())
|
||||
{
|
||||
endPosition = m_stream.tellg();
|
||||
|
||||
if (testAndSkip(endSequence))
|
||||
break;
|
||||
auto startPosition = m_position - startSequence.size();
|
||||
|
||||
while (!atEnd() && !testAndSkip(endSequence))
|
||||
advance();
|
||||
}
|
||||
|
||||
if (removeEnd)
|
||||
endPosition = m_stream.tellg();
|
||||
auto endPosition = (removeEnd) ? m_position : m_position - endSequence.size();
|
||||
|
||||
removeRange(startPosition, endPosition);
|
||||
|
||||
m_position = endPosition + 1;
|
||||
}
|
||||
|
||||
m_stream.clear();
|
||||
|
||||
m_stream.seekg(inPosition);
|
||||
m_stream.seekp(outPosition);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<class TokenizerPolicy>
|
||||
char Tokenizer<TokenizerPolicy>::currentCharacter() const
|
||||
{
|
||||
return TokenizerPolicy::transformCharacter(Stream::currentCharacter());
|
||||
m_position = 0;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -506,20 +471,20 @@ bool Tokenizer<TokenizerPolicy>::testImpl(const std::string &expectedValue)
|
||||
if (!TokenizerPolicy::isWhiteSpaceCharacter(expectedValue.front()))
|
||||
skipWhiteSpace();
|
||||
|
||||
const auto match = std::find_if(expectedValue.cbegin(), expectedValue.cend(),
|
||||
[&](const auto &expectedCharacter)
|
||||
{
|
||||
const auto character = static_cast<char>(this->currentCharacter());
|
||||
|
||||
if (character != expectedCharacter)
|
||||
return true;
|
||||
|
||||
this->advance();
|
||||
|
||||
for (size_t i = 0; i < expectedValue.size(); i++)
|
||||
{
|
||||
if (atEnd())
|
||||
return false;
|
||||
});
|
||||
|
||||
return (match == expectedValue.cend());
|
||||
const auto character = currentCharacter();
|
||||
|
||||
if (character != expectedValue[i])
|
||||
return false;
|
||||
|
||||
advance();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
@@ -3,8 +3,6 @@
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
|
||||
#include <tokenize/TokenizerException.h>
|
||||
|
||||
namespace tokenize
|
||||
{
|
||||
|
||||
@@ -14,12 +12,14 @@ namespace tokenize
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
const Stream::Position Stream::InvalidPosition{std::numeric_limits<Position>::max()};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
Stream::Stream()
|
||||
: m_position{0}
|
||||
{
|
||||
std::setlocale(LC_NUMERIC, "C");
|
||||
|
||||
// Don’t skip whitespace
|
||||
m_stream.exceptions(std::istream::badbit);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -34,11 +34,18 @@ Stream::Stream(std::string streamName, std::istream &istream)
|
||||
void Stream::read(std::string streamName, std::istream &istream)
|
||||
{
|
||||
// Store position of new section
|
||||
const auto position = m_stream.tellp();
|
||||
const auto position = m_stream.size();
|
||||
|
||||
m_delimiters.push_back({position, streamName});
|
||||
|
||||
m_stream << istream.rdbuf();
|
||||
istream.seekg(0, std::ios::end);
|
||||
const auto streamSize = istream.tellg();
|
||||
istream.seekg(0, std::ios::beg);
|
||||
|
||||
const auto startPosition = m_stream.size();
|
||||
|
||||
m_stream.resize(m_stream.size() + streamSize);
|
||||
std::copy(std::istreambuf_iterator<char>(istream), std::istreambuf_iterator<char>(), m_stream.begin() + startPosition);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -57,23 +64,21 @@ void Stream::read(const std::experimental::filesystem::path &path)
|
||||
|
||||
void Stream::reset()
|
||||
{
|
||||
m_stream.clear();
|
||||
seek(0);
|
||||
m_position = 0;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void Stream::seek(Position position)
|
||||
{
|
||||
m_stream.clear();
|
||||
m_stream.seekg(position);
|
||||
m_position = position;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typename Stream::Position Stream::position() const
|
||||
{
|
||||
return m_stream.tellg();
|
||||
return m_position;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -94,8 +99,7 @@ Location Stream::location() const
|
||||
currentFile = m_delimiters.crbegin();
|
||||
|
||||
// Go back to beginning of section
|
||||
m_stream.clear();
|
||||
m_stream.seekg(currentFile->position);
|
||||
m_position = currentFile->position;
|
||||
|
||||
size_t row = 1;
|
||||
size_t column = 1;
|
||||
@@ -103,9 +107,9 @@ Location Stream::location() const
|
||||
// Compute the location character by character
|
||||
while (true)
|
||||
{
|
||||
if (currentPosition == -1 && atEnd())
|
||||
if (currentPosition >= m_stream.size() && atEnd())
|
||||
break;
|
||||
else if (currentPosition >= 0 && position() >= currentPosition)
|
||||
else if (currentPosition < m_stream.size() && position() >= currentPosition)
|
||||
break;
|
||||
|
||||
const auto character = currentCharacter();
|
||||
@@ -118,7 +122,7 @@ Location Stream::location() const
|
||||
else if (std::isblank(character) || std::isprint(character))
|
||||
column++;
|
||||
|
||||
m_stream.ignore(1);
|
||||
m_position++;
|
||||
}
|
||||
|
||||
return {currentFile->sectionName.c_str(), currentFile->sectionName.c_str(), row, row, column, column};
|
||||
@@ -126,38 +130,4 @@ Location Stream::location() const
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char Stream::currentCharacter() const
|
||||
{
|
||||
// TODO: check if this should be secured by check()
|
||||
return m_stream.peek();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool Stream::atEnd() const
|
||||
{
|
||||
return position() == -1;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void Stream::check() const
|
||||
{
|
||||
if (atEnd())
|
||||
throw TokenizerException(location(), "reading past end of file");
|
||||
|
||||
if (m_stream.fail())
|
||||
throw TokenizerException(location());
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void Stream::advance()
|
||||
{
|
||||
check();
|
||||
m_stream.ignore(1);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user