Reimplemented tokenizer with plain streams.

This commit is contained in:
Patrick Lühne 2017-06-18 15:09:42 +02:00
parent b9b987490b
commit 268fa84c9e
Signed by: patrick
GPG Key ID: 05F3611E97A70ABF
10 changed files with 122 additions and 166 deletions

View File

@ -23,10 +23,10 @@ namespace detail
ActionParser::ActionParser(Context &context, ast::Domain &domain)
: m_context{context},
m_domain{domain},
m_parametersPosition{-1},
m_preconditionPosition{-1},
m_effectPosition{-1},
m_varsPosition{-1}
m_parametersPosition{tokenize::Stream::InvalidPosition},
m_preconditionPosition{tokenize::Stream::InvalidPosition},
m_effectPosition{tokenize::Stream::InvalidPosition},
m_varsPosition{tokenize::Stream::InvalidPosition}
{
}
@ -40,26 +40,26 @@ ast::ActionPointer ActionParser::parse()
auto &tokenizer = m_context.tokenizer;
if (m_parametersPosition != -1)
if (m_parametersPosition != tokenize::Stream::InvalidPosition)
{
tokenizer.seek(m_parametersPosition);
parseParameterSection(*action);
}
// For compatibility with old PDDL versions, vars sections are parsed in addition to parameters
if (m_varsPosition != -1)
if (m_varsPosition != tokenize::Stream::InvalidPosition)
{
tokenizer.seek(m_varsPosition);
parseVarsSection(*action);
}
if (m_preconditionPosition != -1)
if (m_preconditionPosition != tokenize::Stream::InvalidPosition)
{
tokenizer.seek(m_preconditionPosition);
parsePreconditionSection(*action);
}
if (m_effectPosition != -1)
if (m_effectPosition != tokenize::Stream::InvalidPosition)
{
tokenizer.seek(m_effectPosition);
parseEffectSection(*action);
@ -84,7 +84,7 @@ void ActionParser::findSections(ast::Action &action)
const auto setSectionPosition =
[&](const std::string &sectionName, auto &sectionPosition, const auto value, bool unique = false)
{
if (unique && sectionPosition != -1)
if (unique && sectionPosition != tokenize::Stream::InvalidPosition)
{
tokenizer.seek(value);
throw ParserException(tokenizer.location(), "only one “:" + sectionName + "” section allowed");

View File

@ -19,8 +19,8 @@ namespace detail
void parseAndAddUntypedConstantDeclaration(Context &context, ast::ConstantDeclarations &constantDeclarations)
{
auto &tokenizer = context.tokenizer;
auto constantName = tokenizer.getIdentifier();
auto constantName = tokenizer.getIdentifier();
assert(constantName != "-");
constantDeclarations.emplace_back(std::make_unique<ast::ConstantDeclaration>(std::move(constantName)));

View File

@ -19,8 +19,8 @@ namespace detail
DescriptionParser::DescriptionParser(Context &context)
: m_context{context},
m_domainPosition{-1},
m_problemPosition{-1}
m_domainPosition{tokenize::Stream::InvalidPosition},
m_problemPosition{tokenize::Stream::InvalidPosition}
{
}
@ -33,7 +33,7 @@ ast::Description DescriptionParser::parse()
findSections();
if (m_domainPosition == -1)
if (m_domainPosition == tokenize::Stream::InvalidPosition)
throw ParserException("no PDDL domain specified");
tokenizer.seek(m_domainPosition);
@ -41,7 +41,7 @@ ast::Description DescriptionParser::parse()
auto domain = DomainParser(m_context).parse();
// If no problem is given, return just the domain
if (m_problemPosition == -1)
if (m_problemPosition == tokenize::Stream::InvalidPosition)
return {std::move(domain), std::experimental::nullopt};
tokenizer.seek(m_problemPosition);
@ -86,7 +86,7 @@ void DescriptionParser::findSections()
if (tokenizer.testAndSkip<std::string>("domain"))
{
if (m_domainPosition != -1)
if (m_domainPosition != tokenize::Stream::InvalidPosition)
throw ParserException(tokenizer.location(), "PDDL description may not contain two domains");
m_domainPosition = position;
@ -95,7 +95,7 @@ void DescriptionParser::findSections()
}
else if (m_context.tokenizer.testAndSkip<std::string>("problem"))
{
if (m_problemPosition != -1)
if (m_problemPosition != tokenize::Stream::InvalidPosition)
throw ParserException("PDDL description may not contain two problems currently");
m_problemPosition = position;

View File

@ -22,10 +22,10 @@ namespace detail
DomainParser::DomainParser(Context &context)
: m_context{context},
m_requirementsPosition{-1},
m_typesPosition{-1},
m_constantsPosition{-1},
m_predicatesPosition{-1}
m_requirementsPosition{tokenize::Stream::InvalidPosition},
m_typesPosition{tokenize::Stream::InvalidPosition},
m_constantsPosition{tokenize::Stream::InvalidPosition},
m_predicatesPosition{tokenize::Stream::InvalidPosition}
{
}
@ -39,32 +39,32 @@ ast::DomainPointer DomainParser::parse()
auto &tokenizer = m_context.tokenizer;
if (m_requirementsPosition != -1)
if (m_requirementsPosition != tokenize::Stream::InvalidPosition)
{
tokenizer.seek(m_requirementsPosition);
parseRequirementSection(*domain);
}
if (m_typesPosition != -1)
if (m_typesPosition != tokenize::Stream::InvalidPosition)
{
tokenizer.seek(m_typesPosition);
parseTypeSection(*domain);
}
if (m_constantsPosition != -1)
if (m_constantsPosition != tokenize::Stream::InvalidPosition)
{
tokenizer.seek(m_constantsPosition);
parseConstantSection(*domain);
}
if (m_predicatesPosition != -1)
if (m_predicatesPosition != tokenize::Stream::InvalidPosition)
{
tokenizer.seek(m_predicatesPosition);
parsePredicateSection(*domain);
}
for (size_t i = 0; i < m_actionPositions.size(); i++)
if (m_actionPositions[i] != -1)
if (m_actionPositions[i] != tokenize::Stream::InvalidPosition)
{
tokenizer.seek(m_actionPositions[i]);
parseActionSection(*domain);
@ -93,7 +93,7 @@ void DomainParser::findSections(ast::Domain &domain)
const auto setSectionPosition =
[&](const std::string &sectionName, auto &sectionPosition, const auto value, bool unique = false)
{
if (unique && sectionPosition != -1)
if (unique && sectionPosition != tokenize::Stream::InvalidPosition)
{
tokenizer.seek(value);
throw ParserException(tokenizer.location(), "only one “:" + sectionName + "” section allowed");
@ -125,7 +125,7 @@ void DomainParser::findSections(ast::Domain &domain)
setSectionPosition("predicates", m_predicatesPosition, position, true);
else if (tokenizer.testIdentifierAndSkip("action"))
{
m_actionPositions.emplace_back(-1);
m_actionPositions.emplace_back(tokenize::Stream::InvalidPosition);
setSectionPosition("action", m_actionPositions.back(), position);
}
else if (tokenizer.testIdentifierAndSkip("functions")

View File

@ -19,8 +19,6 @@ ast::PrimitiveTypePointer parsePrimitiveType(Context &context, ast::Domain &doma
auto &tokenizer = context.tokenizer;
auto &types = domain.types;
tokenizer.skipWhiteSpace();
auto typeName = tokenizer.getIdentifier();
if (typeName.empty())

View File

@ -22,11 +22,11 @@ namespace detail
ProblemParser::ProblemParser(Context &context, ast::Domain &domain)
: m_context{context},
m_domain{domain},
m_domainPosition{-1},
m_requirementsPosition{-1},
m_objectsPosition{-1},
m_initialStatePosition{-1},
m_goalPosition{-1}
m_domainPosition{tokenize::Stream::InvalidPosition},
m_requirementsPosition{tokenize::Stream::InvalidPosition},
m_objectsPosition{tokenize::Stream::InvalidPosition},
m_initialStatePosition{tokenize::Stream::InvalidPosition},
m_goalPosition{tokenize::Stream::InvalidPosition}
{
}
@ -40,31 +40,31 @@ ast::ProblemPointer ProblemParser::parse()
auto &tokenizer = m_context.tokenizer;
if (m_domainPosition == -1)
if (m_domainPosition == tokenize::Stream::InvalidPosition)
throw ParserException(tokenizer.location(), "problem description does not specify a corresponding domain");
tokenizer.seek(m_domainPosition);
parseDomainSection(*problem);
if (m_requirementsPosition != -1)
if (m_requirementsPosition != tokenize::Stream::InvalidPosition)
{
tokenizer.seek(m_requirementsPosition);
parseRequirementSection(*problem);
}
if (m_objectsPosition != -1)
if (m_objectsPosition != tokenize::Stream::InvalidPosition)
{
tokenizer.seek(m_objectsPosition);
parseObjectSection(*problem);
}
if (m_initialStatePosition == -1)
if (m_initialStatePosition == tokenize::Stream::InvalidPosition)
throw ParserException(tokenizer.location(), "problem description does not specify an initial state");
tokenizer.seek(m_initialStatePosition);
parseInitialStateSection(*problem);
if (m_goalPosition == -1)
if (m_goalPosition == tokenize::Stream::InvalidPosition)
throw ParserException(tokenizer.location(), "problem description does not specify a goal");
tokenizer.seek(m_goalPosition);
@ -91,7 +91,7 @@ void ProblemParser::findSections(ast::Problem &problem)
const auto setSectionPosition =
[&](const std::string &sectionName, auto &sectionPosition, const auto value, bool unique = false)
{
if (unique && sectionPosition != -1)
if (unique && sectionPosition != tokenize::Stream::InvalidPosition)
{
tokenizer.seek(value);
throw ParserException(tokenizer.location(), "only one “:" + sectionName + "” section allowed");

View File

@ -19,9 +19,9 @@ namespace detail
void parseAndAddUntypedVariableDeclaration(Context &context, ast::VariableDeclarations &variableDeclarations)
{
auto &tokenizer = context.tokenizer;
tokenizer.expect<std::string>("?");
auto variableName = tokenizer.getIdentifier();
assert(variableName != "-");
variableDeclarations.emplace_back(std::make_unique<ast::VariableDeclaration>(std::move(variableName)));

View File

@ -1,6 +1,7 @@
#ifndef __TOKENIZE__STREAM_H
#define __TOKENIZE__STREAM_H
#include <cassert>
#include <experimental/filesystem>
#include <iostream>
#include <iterator>
@ -8,6 +9,7 @@
#include <vector>
#include <tokenize/Location.h>
#include <tokenize/TokenizerException.h>
namespace tokenize
{
@ -21,7 +23,8 @@ namespace tokenize
class Stream
{
public:
using Position = std::stringstream::pos_type;
using Position = size_t;
static const Position InvalidPosition;
struct Delimiter
{
@ -47,14 +50,34 @@ class Stream
Position position() const;
Location location() const;
char currentCharacter() const;
void advance();
bool atEnd() const;
char currentCharacter() const
{
assert(m_position < m_stream.size());
void check() const;
// TODO: check if this should be secured by check()
return m_stream[m_position];
}
void advance()
{
check();
m_position++;
}
bool atEnd() const
{
return m_position >= m_stream.size();
}
void check() const
{
if (atEnd())
throw TokenizerException(location(), "reading past end of file");
}
protected:
mutable std::stringstream m_stream;
std::string m_stream;
mutable Position m_position;
std::vector<Delimiter> m_delimiters;
};

View File

@ -45,8 +45,6 @@ class Tokenizer: public Stream, public TokenizerPolicy
void removeComments(const std::string &startSequence, const std::string &endSequence, bool removeEnd);
char currentCharacter() const;
template<typename Type>
Type get();
@ -115,8 +113,6 @@ Tokenizer<TokenizerPolicy>::Tokenizer(std::string streamName, std::istream &istr
template<class TokenizerPolicy>
void Tokenizer<TokenizerPolicy>::skipWhiteSpace()
{
check();
while (!atEnd() && TokenizerPolicy::isWhiteSpaceCharacter(currentCharacter()))
advance();
}
@ -126,8 +122,6 @@ void Tokenizer<TokenizerPolicy>::skipWhiteSpace()
template<class TokenizerPolicy>
void Tokenizer<TokenizerPolicy>::skipBlankSpace()
{
check();
while (!atEnd() && TokenizerPolicy::isBlankCharacter(currentCharacter()))
advance();
}
@ -137,9 +131,7 @@ void Tokenizer<TokenizerPolicy>::skipBlankSpace()
template<class TokenizerPolicy>
void Tokenizer<TokenizerPolicy>::skipLine()
{
check();
while (currentCharacter() != '\n')
while (!atEnd() && currentCharacter() != '\n')
advance();
advance();
@ -296,78 +288,51 @@ std::string Tokenizer<TokenizerPolicy>::getLine()
template<class TokenizerPolicy>
void Tokenizer<TokenizerPolicy>::removeComments(const std::string &startSequence, const std::string &endSequence, bool removeEnd)
{
const auto inPosition = m_stream.tellg();
const auto outPosition = m_stream.tellp();
m_stream.seekg(0);
// TODO: move to appropriate place
for (auto &character : m_stream)
character = TokenizerPolicy::transformCharacter(character);
const auto removeRange =
[&](const auto &start, const auto &end)
{
assert(start != -1);
const auto previousPosition = m_position;
m_stream.clear();
m_stream.seekp(start);
m_stream.seekg(start);
assert(start < m_stream.size());
auto position = start;
m_position = start;
while (end == -1 || position < end)
while (m_position < end)
{
m_stream.ignore(1);
if (atEnd())
return;
m_stream.put(' ');
position += static_cast<std::streamoff>(1);
m_stream[m_position] = ' ';
m_position++;
}
m_position = previousPosition;
};
while (!atEnd())
{
Position startPosition = m_stream.tellg();
m_position = 0;
while (!atEnd())
{
startPosition = m_stream.tellg();
if (testAndSkip(startSequence))
break;
while (!atEnd() && !testAndSkip(startSequence))
advance();
}
Position endPosition = m_stream.tellg();
while (!atEnd())
{
endPosition = m_stream.tellg();
if (testAndSkip(endSequence))
break;
auto startPosition = m_position - startSequence.size();
while (!atEnd() && !testAndSkip(endSequence))
advance();
}
if (removeEnd)
endPosition = m_stream.tellg();
auto endPosition = (removeEnd) ? m_position : m_position - endSequence.size();
removeRange(startPosition, endPosition);
m_position = endPosition + 1;
}
m_stream.clear();
m_stream.seekg(inPosition);
m_stream.seekp(outPosition);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
template<class TokenizerPolicy>
char Tokenizer<TokenizerPolicy>::currentCharacter() const
{
return TokenizerPolicy::transformCharacter(Stream::currentCharacter());
m_position = 0;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -506,20 +471,20 @@ bool Tokenizer<TokenizerPolicy>::testImpl(const std::string &expectedValue)
if (!TokenizerPolicy::isWhiteSpaceCharacter(expectedValue.front()))
skipWhiteSpace();
const auto match = std::find_if(expectedValue.cbegin(), expectedValue.cend(),
[&](const auto &expectedCharacter)
for (size_t i = 0; i < expectedValue.size(); i++)
{
const auto character = static_cast<char>(this->currentCharacter());
if (character != expectedCharacter)
return true;
this->advance();
if (atEnd())
return false;
});
return (match == expectedValue.cend());
const auto character = currentCharacter();
if (character != expectedValue[i])
return false;
advance();
}
return true;
}
////////////////////////////////////////////////////////////////////////////////////////////////////

View File

@ -3,8 +3,6 @@
#include <algorithm>
#include <fstream>
#include <tokenize/TokenizerException.h>
namespace tokenize
{
@ -14,12 +12,14 @@ namespace tokenize
//
////////////////////////////////////////////////////////////////////////////////////////////////////
const Stream::Position Stream::InvalidPosition{std::numeric_limits<Position>::max()};
////////////////////////////////////////////////////////////////////////////////////////////////////
Stream::Stream()
: m_position{0}
{
std::setlocale(LC_NUMERIC, "C");
// Dont skip whitespace
m_stream.exceptions(std::istream::badbit);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -34,11 +34,18 @@ Stream::Stream(std::string streamName, std::istream &istream)
void Stream::read(std::string streamName, std::istream &istream)
{
// Store position of new section
const auto position = m_stream.tellp();
const auto position = m_stream.size();
m_delimiters.push_back({position, streamName});
m_stream << istream.rdbuf();
istream.seekg(0, std::ios::end);
const auto streamSize = istream.tellg();
istream.seekg(0, std::ios::beg);
const auto startPosition = m_stream.size();
m_stream.resize(m_stream.size() + streamSize);
std::copy(std::istreambuf_iterator<char>(istream), std::istreambuf_iterator<char>(), m_stream.begin() + startPosition);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -57,23 +64,21 @@ void Stream::read(const std::experimental::filesystem::path &path)
void Stream::reset()
{
m_stream.clear();
seek(0);
m_position = 0;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
void Stream::seek(Position position)
{
m_stream.clear();
m_stream.seekg(position);
m_position = position;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
typename Stream::Position Stream::position() const
{
return m_stream.tellg();
return m_position;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -94,8 +99,7 @@ Location Stream::location() const
currentFile = m_delimiters.crbegin();
// Go back to beginning of section
m_stream.clear();
m_stream.seekg(currentFile->position);
m_position = currentFile->position;
size_t row = 1;
size_t column = 1;
@ -103,9 +107,9 @@ Location Stream::location() const
// Compute the location character by character
while (true)
{
if (currentPosition == -1 && atEnd())
if (currentPosition >= m_stream.size() && atEnd())
break;
else if (currentPosition >= 0 && position() >= currentPosition)
else if (currentPosition < m_stream.size() && position() >= currentPosition)
break;
const auto character = currentCharacter();
@ -118,7 +122,7 @@ Location Stream::location() const
else if (std::isblank(character) || std::isprint(character))
column++;
m_stream.ignore(1);
m_position++;
}
return {currentFile->sectionName.c_str(), currentFile->sectionName.c_str(), row, row, column, column};
@ -126,38 +130,4 @@ Location Stream::location() const
////////////////////////////////////////////////////////////////////////////////////////////////////
char Stream::currentCharacter() const
{
// TODO: check if this should be secured by check()
return m_stream.peek();
}
////////////////////////////////////////////////////////////////////////////////////////////////////
bool Stream::atEnd() const
{
return position() == -1;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
void Stream::check() const
{
if (atEnd())
throw TokenizerException(location(), "reading past end of file");
if (m_stream.fail())
throw TokenizerException(location());
}
////////////////////////////////////////////////////////////////////////////////////////////////////
void Stream::advance()
{
check();
m_stream.ignore(1);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
}