How to manage include statements with boost::spirit::lex

classic Classic list List threaded Threaded
2 messages Options
Reply | Threaded
Open this post in threaded view
|

How to manage include statements with boost::spirit::lex

Peter Foelsche-2
I tried to implement some example lexer which can deal with include files.
I did this my implementing my own iterator -- pointing into a std::string. An include statement inserts the included file (into the string) after the include statement. I can imagine that this is not the correct philosophy.
Any hints would be appreciated...

Peter

PS.
In case of somebody is curious about the "`include" -- the format has something to do with some language called verilog-a.

==begin source.cpp==
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/make_shared.hpp>
#include <boost/bind.hpp>


#include <iostream>
#include <fstream>
#include <string>
#include <utility>
#include <iterator>
#include <string>


using namespace boost::spirit;


struct CIterator:std::iterator<std::input_iterator_tag, char>
{ boost::shared_ptr<std::string> m_sState;
std::size_t m_iPos;
CIterator(const CIterator&_r)
:m_sState(_r.m_sState),
m_iPos(_r.m_iPos)
{
}
void open(const std::string &_r) const
{ std::ifstream sFile(_r.c_str());
sFile.unsetf(std::ios::skipws);
sFile.exceptions(std::ifstream::badbit);
m_sState->insert(m_iPos, std::string(std::istream_iterator<char>(sFile), std::istream_iterator<char>()));
}
CIterator(void)
:m_sState(boost::make_shared<std::string>()),
m_iPos(0)
{
}
CIterator(const std::string &_r)
:m_sState(boost::make_shared<std::string>()),
m_iPos(0)
{ open(_r);
}
CIterator &operator=(const CIterator&_r)
{ m_sState = _r.m_sState;
m_iPos = _r.m_iPos;
return *this;
}
CIterator operator++(int)
{ const CIterator s(*this);
++*this;
return s;
}
CIterator &operator++(void)
{ ++m_iPos;
return *this;
}
const char &operator*(void) const
{ return m_sState->at(m_iPos);
}
const char *operator->(void) const
{ return &m_sState->at(m_iPos);
}
bool eof(void) const
{ return m_iPos == m_sState->size();
}
bool operator==(const CIterator &_r) const
{ return eof() && _r.eof() || m_iPos == _r.m_iPos && m_sState == _r.m_sState;
}
bool operator!=(const CIterator &_r) const
{ return !(*this == _r);
}
};
template <typename Lexer>
struct strip_comments_tokens : lex::lexer<Lexer>
{ std::vector<std::string> m_sStateStack;
struct set_lexer_state
{ std::string m_sState;
set_lexer_state(const std::string &_rState)
:m_sState(_rState)
{
}
// This is called by the semantic action handling code during the lexing
template <typename Iterator, typename Context>
inline void operator()(
Iterator const&,
Iterator const&,
BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&,
std::size_t&,
Context &_rCtx) const
{ _rCtx.set_state_name(m_sState.c_str());
}
};
struct push_lexer_state
{ std::string m_sState;
strip_comments_tokens &m_r;
push_lexer_state(const std::string &_rState, strip_comments_tokens &_r)
:m_sState(_rState),
m_r(_r)
{
}
// This is called by the semantic action handling code during the lexing
template <typename Iterator, typename Context>
inline void operator()(
Iterator const&,
Iterator const&,
BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&,
std::size_t&,
Context &_rCtx) const
{ m_r.m_sStateStack.push_back(_rCtx.get_state_name());
_rCtx.set_state_name(m_sState.c_str());
}
};
struct pop_lexer_state
{ strip_comments_tokens &m_r;
pop_lexer_state(strip_comments_tokens &_r)
:m_r(_r)
{
}
// This is called by the semantic action handling code during the lexing
template <typename Iterator, typename Context>
inline void operator()(
Iterator const&,
Iterator const&,
BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&,
std::size_t&,
Context &_rCtx) const
{ _rCtx.set_state_name(m_r.m_sStateStack.back().c_str());
m_r.m_sStateStack.pop_back();
}
};
        const lex::token_def<> m_sCppComment,
m_sCCommentStart,
m_sCCommentStartInclude,
m_sCCommentEnd,
m_sInclude,
m_sFileName,
m_sSpace,
m_sInteger,
m_sCommentChar;
static const char s_acStateInclude[];
static const char s_acStateInitial[];
static const char s_acStateComment[];
struct echo
{ template<typename IT, typename PASS_FLAG, typename ID_TYPE, typename CONTEX>
void operator()(const IT &_p, const IT &_pEnd, const PASS_FLAG&, const ID_TYPE&, const CONTEX&) const
{ std::cout << std::string(_p, _pEnd) << std::endl;
}
};
struct openIncludeFile
{ strip_comments_tokens &m_r;
openIncludeFile(strip_comments_tokens &_r)
:m_r(_r)
{
}
// This is called by the semantic action handling code during the lexing
template <typename Iterator, typename Context>
inline void operator()(
Iterator const&_p,
Iterator const&_pEnd,
BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&,
std::size_t&,
Context &_rCtx) const
{ _rCtx.set_state_name(s_acStateInitial);
_pEnd.open(std::string(_p, _pEnd).substr(1, std::distance(_p, _pEnd) - 2));
}
};
strip_comments_tokens(void)
                : strip_comments_tokens::base_type(lex::match_flags::match_default),
                m_sCppComment("\\/\\/[^\n]*\n"),
                m_sCCommentStart("\\/\\*"),
                m_sCCommentStartInclude("\\/\\*"),
m_sCCommentEnd("\\*\\/"),
m_sCommentChar("."),
                m_sInclude("`include[ \t]*"),
m_sFileName("\\\"[^\"]*\\\""),
m_sSpace("[ \t\n\r]*"),
m_sInteger("[0-9]*")
        { self = m_sCppComment
| m_sCCommentStart[push_lexer_state(s_acStateComment, *this)]
                        | m_sInclude[set_lexer_state(s_acStateInclude)]
| m_sSpace
| m_sInteger[echo()];
self(s_acStateInclude) = m_sCCommentStartInclude[push_lexer_state(s_acStateComment, *this)]
| m_sFileName[openIncludeFile(*this)];
self(s_acStateComment) = m_sCCommentEnd[pop_lexer_state(*this)]
| m_sCommentChar;
        }
};
template <typename Lexer>
const char strip_comments_tokens<Lexer>::s_acStateInclude[] = "INCLUDE";
template <typename Lexer>
const char strip_comments_tokens<Lexer>::s_acStateInitial[] = "INITIAL";
template <typename Lexer>
const char strip_comments_tokens<Lexer>::s_acStateComment[] = "COMMENT";


int main(int argc, char**argv)
{ typedef CIterator base_iterator_type;
typedef lex::lexertl::actor_lexer<lex::lexertl::token<base_iterator_type> > lexer_type;
strip_comments_tokens<lexer_type> strip_comments;
CIterator p(argv[1]);
while (p != CIterator() && lex::tokenize(p, CIterator(), strip_comments))
;
std::cerr << "succeeded: \"" << std::string(p, CIterator()) << "\"" << std::endl;
return 0;
}
==end source.cpp==
==begin example input a.txt==
123
`include "b.txt" /* comment 555 */
`include "b.txt" // comment
`include /*asda*/"b.txt" // comment
234
==end a.txt==
==begin included file b.txt==
345
456
==end b.txt==

This email may contain material that is confidential and/or proprietary that the sender intended only for specific recipients. Any review by unintended recipients, forwarding or creating derivative works without the written permission of Silvaco, Inc. or its subsidiaries is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies.
------------------------------------------------------------------------------
Attend Shape: An AT&T Tech Expo July 15-16. Meet us at AT&T Park in San
Francisco, CA to explore cutting-edge tech and listen to tech luminaries
present their vision of the future. This family event has something for
everyone, including kids. Get more information and register today.
http://sdm.link/attshape
_______________________________________________
Spirit-general mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/spirit-general
Reply | Threaded
Open this post in threaded view
|

Re: How to manage include statements with boost::spirit::lex

Peter Foelsche-2
since gmail is too smart to leave the text alone, here are all the files again attached...

On Wed, Jun 29, 2016 at 11:41 AM, Peter Foelsche <[hidden email]> wrote:
I tried to implement some example lexer which can deal with include files.
I did this my implementing my own iterator -- pointing into a std::string. An include statement inserts the included file (into the string) after the include statement. I can imagine that this is not the correct philosophy.
Any hints would be appreciated...

Peter

PS.
In case of somebody is curious about the "`include" -- the format has something to do with some language called verilog-a.

==begin source.cpp==
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/make_shared.hpp>
#include <boost/bind.hpp>


#include <iostream>
#include <fstream>
#include <string>
#include <utility>
#include <iterator>
#include <string>


using namespace boost::spirit;


struct CIterator:std::iterator<std::input_iterator_tag, char>
{ boost::shared_ptr<std::string> m_sState;
std::size_t m_iPos;
CIterator(const CIterator&_r)
:m_sState(_r.m_sState),
m_iPos(_r.m_iPos)
{
}
void open(const std::string &_r) const
{ std::ifstream sFile(_r.c_str());
sFile.unsetf(std::ios::skipws);
sFile.exceptions(std::ifstream::badbit);
m_sState->insert(m_iPos, std::string(std::istream_iterator<char>(sFile), std::istream_iterator<char>()));
}
CIterator(void)
:m_sState(boost::make_shared<std::string>()),
m_iPos(0)
{
}
CIterator(const std::string &_r)
:m_sState(boost::make_shared<std::string>()),
m_iPos(0)
{ open(_r);
}
CIterator &operator=(const CIterator&_r)
{ m_sState = _r.m_sState;
m_iPos = _r.m_iPos;
return *this;
}
CIterator operator++(int)
{ const CIterator s(*this);
++*this;
return s;
}
CIterator &operator++(void)
{ ++m_iPos;
return *this;
}
const char &operator*(void) const
{ return m_sState->at(m_iPos);
}
const char *operator->(void) const
{ return &m_sState->at(m_iPos);
}
bool eof(void) const
{ return m_iPos == m_sState->size();
}
bool operator==(const CIterator &_r) const
{ return eof() && _r.eof() || m_iPos == _r.m_iPos && m_sState == _r.m_sState;
}
bool operator!=(const CIterator &_r) const
{ return !(*this == _r);
}
};
template <typename Lexer>
struct strip_comments_tokens : lex::lexer<Lexer>
{ std::vector<std::string> m_sStateStack;
struct set_lexer_state
{ std::string m_sState;
set_lexer_state(const std::string &_rState)
:m_sState(_rState)
{
}
// This is called by the semantic action handling code during the lexing
template <typename Iterator, typename Context>
inline void operator()(
Iterator const&,
Iterator const&,
BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&,
std::size_t&,
Context &_rCtx) const
{ _rCtx.set_state_name(m_sState.c_str());
}
};
struct push_lexer_state
{ std::string m_sState;
strip_comments_tokens &m_r;
push_lexer_state(const std::string &_rState, strip_comments_tokens &_r)
:m_sState(_rState),
m_r(_r)
{
}
// This is called by the semantic action handling code during the lexing
template <typename Iterator, typename Context>
inline void operator()(
Iterator const&,
Iterator const&,
BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&,
std::size_t&,
Context &_rCtx) const
{ m_r.m_sStateStack.push_back(_rCtx.get_state_name());
_rCtx.set_state_name(m_sState.c_str());
}
};
struct pop_lexer_state
{ strip_comments_tokens &m_r;
pop_lexer_state(strip_comments_tokens &_r)
:m_r(_r)
{
}
// This is called by the semantic action handling code during the lexing
template <typename Iterator, typename Context>
inline void operator()(
Iterator const&,
Iterator const&,
BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&,
std::size_t&,
Context &_rCtx) const
{ _rCtx.set_state_name(m_r.m_sStateStack.back().c_str());
m_r.m_sStateStack.pop_back();
}
};
        const lex::token_def<> m_sCppComment,
m_sCCommentStart,
m_sCCommentStartInclude,
m_sCCommentEnd,
m_sInclude,
m_sFileName,
m_sSpace,
m_sInteger,
m_sCommentChar;
static const char s_acStateInclude[];
static const char s_acStateInitial[];
static const char s_acStateComment[];
struct echo
{ template<typename IT, typename PASS_FLAG, typename ID_TYPE, typename CONTEX>
void operator()(const IT &_p, const IT &_pEnd, const PASS_FLAG&, const ID_TYPE&, const CONTEX&) const
{ std::cout << std::string(_p, _pEnd) << std::endl;
}
};
struct openIncludeFile
{ strip_comments_tokens &m_r;
openIncludeFile(strip_comments_tokens &_r)
:m_r(_r)
{
}
// This is called by the semantic action handling code during the lexing
template <typename Iterator, typename Context>
inline void operator()(
Iterator const&_p,
Iterator const&_pEnd,
BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&,
std::size_t&,
Context &_rCtx) const
{ _rCtx.set_state_name(s_acStateInitial);
_pEnd.open(std::string(_p, _pEnd).substr(1, std::distance(_p, _pEnd) - 2));
}
};
strip_comments_tokens(void)
                : strip_comments_tokens::base_type(lex::match_flags::match_default),
                m_sCppComment("\\/\\/[^\n]*\n"),
                m_sCCommentStart("\\/\\*"),
                m_sCCommentStartInclude("\\/\\*"),
m_sCCommentEnd("\\*\\/"),
m_sCommentChar("."),
                m_sInclude("`include[ \t]*"),
m_sFileName("\\\"[^\"]*\\\""),
m_sSpace("[ \t\n\r]*"),
m_sInteger("[0-9]*")
        { self = m_sCppComment
| m_sCCommentStart[push_lexer_state(s_acStateComment, *this)]
                        | m_sInclude[set_lexer_state(s_acStateInclude)]
| m_sSpace
| m_sInteger[echo()];
self(s_acStateInclude) = m_sCCommentStartInclude[push_lexer_state(s_acStateComment, *this)]
| m_sFileName[openIncludeFile(*this)];
self(s_acStateComment) = m_sCCommentEnd[pop_lexer_state(*this)]
| m_sCommentChar;
        }
};
template <typename Lexer>
const char strip_comments_tokens<Lexer>::s_acStateInclude[] = "INCLUDE";
template <typename Lexer>
const char strip_comments_tokens<Lexer>::s_acStateInitial[] = "INITIAL";
template <typename Lexer>
const char strip_comments_tokens<Lexer>::s_acStateComment[] = "COMMENT";


int main(int argc, char**argv)
{ typedef CIterator base_iterator_type;
typedef lex::lexertl::actor_lexer<lex::lexertl::token<base_iterator_type> > lexer_type;
strip_comments_tokens<lexer_type> strip_comments;
CIterator p(argv[1]);
while (p != CIterator() && lex::tokenize(p, CIterator(), strip_comments))
;
std::cerr << "succeeded: \"" << std::string(p, CIterator()) << "\"" << std::endl;
return 0;
}
==end source.cpp==
==begin example input a.txt==
123
`include "b.txt" /* comment 555 */
`include "b.txt" // comment
`include /*asda*/"b.txt" // comment
234
==end a.txt==
==begin included file b.txt==
345
456
==end b.txt==


This email may contain material that is confidential and/or proprietary that the sender intended only for specific recipients. Any review by unintended recipients, forwarding or creating derivative works without the written permission of Silvaco, Inc. or its subsidiaries is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies.
------------------------------------------------------------------------------
Attend Shape: An AT&T Tech Expo July 15-16. Meet us at AT&T Park in San
Francisco, CA to explore cutting-edge tech and listen to tech luminaries
present their vision of the future. This family event has something for
everyone, including kids. Get more information and register today.
http://sdm.link/attshape
_______________________________________________
Spirit-general mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/spirit-general

a.txt (154 bytes) Download Attachment
b.txt (12 bytes) Download Attachment
Source.cpp (8K) Download Attachment