x3: Parsing with lexer tokens

classic Classic list List threaded Threaded
2 messages Options
Reply | Threaded
Open this post in threaded view
|

x3: Parsing with lexer tokens

Mikael Asplund

Hi!

 

I'm trying to use spirit x3 to parse output from my lexer (needed), which is a vector of tokens. I'm running into all sorts of compiler errors, though, and there are no examples out there that I could find, so I wrote a minimal example and put it online to request comments from you on how to proceed.

 

Please have a look here:  http://coliru.stacked-crooked.com/a/8975361b37f0c640

 

This little example is supposed to be run like this: ./a.out  1  +  3

 

Any help?

 

  /Mikael

 

PS. I'll include the code here too, but it's probably more readable in the link above. Here we go:

 

 

#include <string>

#include <vector>

#include <cctype>

#include <iostream>

 

#include <boost/spirit/home/x3.hpp>

 

using std::string;

using std::vector;

namespace x3 = boost::spirit::x3;

 

int parse_input(vector<string> v);

 

int main(int argc, const char* argv[]) {

  if (argc > 1) {

    return parse_input(vector<string>(&argv[1], &argv[argc + !argc]));

  }

  return -1;

}

 

namespace lexer {

                struct SourcePosition {

                                string file;

                                int line = -1;

                };

               

                enum class TokenType { OPERATOR, NUMBER, IDENTIFIER };

               

                struct Token : public SourcePosition {

                                Token(TokenType t, const string& s, const SourcePosition& pos = SourcePosition())

                                : SourcePosition(pos), type(t), str(s) {}

 

                                TokenType type;

                                string str;

                               

                                bool operator == (const Token& o) { // Token with empty string will match any token of that type

                                                return type == o.type && (str == o.str || str.empty() || o.str.empty());

                                }

                };

               

                std::ostream& operator<<(std::ostream& o, const TokenType& t) {

                                return o << (t == TokenType::OPERATOR ? "OPERATOR" :

                                                                                t == TokenType::NUMBER ? "NUMBER" : "IDENTIFIER");

                }

                std::ostream& operator<<(std::ostream& o, const Token& t) {

                                return o << t.type << "(\"" << t.str << "\")";

                }

               

                vector<Token> lex(vector<string> v) {

                                vector<Token> out;

                                auto conforms_to = [](string s, auto pred) {

                                                return s.end() != std::find_if(s.begin(), s.end(), pred);

                                };

                                for (auto& s : v) {

                                                if (conforms_to(s, [](char c){ return std::isdigit(c); })) {

                                                                out.push_back(Token(TokenType::NUMBER, s));

                                                } else if (conforms_to(s, [](char c){ return std::isalnum(c); })) {

                                                                out.push_back(Token(TokenType::IDENTIFIER, s));

                                                } else {

                                                                out.push_back(Token(TokenType::OPERATOR, s));

                                                }

                                }

                                return out;

                }

}

 

namespace parser {

                using lexer::Token;

                using lexer::TokenType;

               

                struct TokenLiteralParser : x3::parser<TokenLiteralParser> {

                                TokenLiteralParser(const Token& iTok) : tok(iTok) {}

 

                                template <typename It, typename Ctx, typename A>

                                bool parse(It& first, const It& last, const Ctx& ctx, x3::unused_type, A& attr) const {

                                                if (first != last && *first == tok) {

                                                                ++first;

                                                                return true;

                                                }

                                                return false;

                                }

 

                                Token tok;

                };

 

    Token TokenAnyNumber(TokenType::NUMBER, "");

    Token TokenAnyIdentifier(TokenType::IDENTIFIER, "");

    Token TokenPlus(TokenType::OPERATOR, "+");

    Token TokenMinus(TokenType::OPERATOR, "-");

 

               

}

 

struct Outputter {

                template <typename T>

                Outputter& operator=(const T& t) {

                                std::cout << "output => " << typeid(t).name() << ": " << t << std::endl;

                                return *this;

                }

} outputter;

 

int parse_input(vector<string> v) {

                using namespace parser;

               

                using TL = TokenLiteralParser;

               

                auto lexed = lexer::lex(v);

                std::copy(lexed.begin(), lexed.end(), std::ostream_iterator<Token>(std::cout << "\n------ TOKENS ------\n", "\n"));

                std::cout << "--------------------\n";

               

                auto first = lexed.begin();

                auto last = lexed.end();

               

                Outputter out;

               

                bool r = phrase_parse(first, last,

 

            //  Begin grammar

            (

                (TL(TokenAnyNumber) % (TL(TokenPlus) | TL(TokenMinus)))

            ),

            //  End grammar

 

            x3::unused, out);

               

                return !r;

}

 


------------------------------------------------------------------------------
Site24x7 APM Insight: Get Deep Visibility into Application Performance
APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month
Monitor end-to-end web transactions and take corrective actions now
Troubleshoot faster and improve end-user experience. Signup Now!
http://pubads.g.doubleclick.net/gampad/clk?id=272487151&iu=/4140
_______________________________________________
Spirit-general mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/spirit-general
Reply | Threaded
Open this post in threaded view
|

Re: x3: Parsing with lexer tokens

Mikael Asplund

Hi!

 

I finally managed to get a working example code of parsing with tokens, thanks to Mr Guzman! Very much obliged!

I thought I'd share it so it's available for the next person with the same problem. The debug-outputter object has some code that at least currently doesn't seem to work with GCC (works with clang) that I had to comment out, and visual studio will not let me have a semantic action attached at all in the example (visual studio 2015 update 1), but that's likely to improve...

 

So here is the working example:

 

http://coliru.stacked-crooked.com/a/94ab3dae700b9049

 

Regards,

   Mikael

 

From: Mikael Asplund [mailto:[hidden email]]
Sent: Tuesday, February 16, 2016 16:41
To: [hidden email]
Subject: [Spirit-general] x3: Parsing with lexer tokens

 

Hi!

 

I'm trying to use spirit x3 to parse output from my lexer (needed), which is a vector of tokens. I'm running into all sorts of compiler errors, though, and there are no examples out there that I could find, so I wrote a minimal example and put it online to request comments from you on how to proceed.

 

Please have a look here:  http://coliru.stacked-crooked.com/a/8975361b37f0c640

 

This little example is supposed to be run like this: ./a.out  1  +  3

 

Any help?

 

  /Mikael

 

PS. I'll include the code here too, but it's probably more readable in the link above. Here we go:

 

 

#include <string>

#include <vector>

#include <cctype>

#include <iostream>

 

#include <boost/spirit/home/x3.hpp>

 

using std::string;

using std::vector;

namespace x3 = boost::spirit::x3;

 

int parse_input(vector<string> v);

 

int main(int argc, const char* argv[]) {

  if (argc > 1) {

    return parse_input(vector<string>(&argv[1], &argv[argc + !argc]));

  }

  return -1;

}

 

namespace lexer {

                struct SourcePosition {

                                string file;

                                int line = -1;

                };

               

                enum class TokenType { OPERATOR, NUMBER, IDENTIFIER };

               

                struct Token : public SourcePosition {

                                Token(TokenType t, const string& s, const SourcePosition& pos = SourcePosition())

                                : SourcePosition(pos), type(t), str(s) {}

 

                                TokenType type;

                                string str;

                               

                                bool operator == (const Token& o) { // Token with empty string will match any token of that type

                                                return type == o.type && (str == o.str || str.empty() || o.str.empty());

                                }

                };

               

                std::ostream& operator<<(std::ostream& o, const TokenType& t) {

                                return o << (t == TokenType::OPERATOR ? "OPERATOR" :

                                                                                t == TokenType::NUMBER ? "NUMBER" : "IDENTIFIER");

                }

                std::ostream& operator<<(std::ostream& o, const Token& t) {

                                return o << t.type << "(\"" << t.str << "\")";

                }

               

                vector<Token> lex(vector<string> v) {

                                vector<Token> out;

                                auto conforms_to = [](string s, auto pred) {

                                                return s.end() != std::find_if(s.begin(), s.end(), pred);

                                };

                                for (auto& s : v) {

                                                if (conforms_to(s, [](char c){ return std::isdigit(c); })) {

                                                                out.push_back(Token(TokenType::NUMBER, s));

                                                } else if (conforms_to(s, [](char c){ return std::isalnum(c); })) {

                                                                out.push_back(Token(TokenType::IDENTIFIER, s));

                                                } else {

                                                                out.push_back(Token(TokenType::OPERATOR, s));

                                                }

                                }

                                return out;

                }

}

 

namespace parser {

                using lexer::Token;

                using lexer::TokenType;

               

                struct TokenLiteralParser : x3::parser<TokenLiteralParser> {

                                TokenLiteralParser(const Token& iTok) : tok(iTok) {}

 

                                template <typename It, typename Ctx, typename A>

                                bool parse(It& first, const It& last, const Ctx& ctx, x3::unused_type, A& attr) const {

                                                if (first != last && *first == tok) {

                                                                ++first;

                                                                return true;

                                                }

                                                return false;

                                }

 

                                Token tok;

                };

 

    Token TokenAnyNumber(TokenType::NUMBER, "");

    Token TokenAnyIdentifier(TokenType::IDENTIFIER, "");

    Token TokenPlus(TokenType::OPERATOR, "+");

    Token TokenMinus(TokenType::OPERATOR, "-");

 

               

}

 

struct Outputter {

                template <typename T>

                Outputter& operator=(const T& t) {

                                std::cout << "output => " << typeid(t).name() << ": " << t << std::endl;

                                return *this;

                }

} outputter;

 

int parse_input(vector<string> v) {

                using namespace parser;

               

                using TL = TokenLiteralParser;

               

                auto lexed = lexer::lex(v);

                std::copy(lexed.begin(), lexed.end(), std::ostream_iterator<Token>(std::cout << "\n------ TOKENS ------\n", "\n"));

                std::cout << "--------------------\n";

               

                auto first = lexed.begin();

                auto last = lexed.end();

               

                Outputter out;

               

                bool r = phrase_parse(first, last,

 

            //  Begin grammar

            (

                (TL(TokenAnyNumber) % (TL(TokenPlus) | TL(TokenMinus)))

            ),

            //  End grammar

 

            x3::unused, out);

               

                return !r;

}

 


------------------------------------------------------------------------------
Site24x7 APM Insight: Get Deep Visibility into Application Performance
APM + Mobile APM + RUM: Monitor 3 App instances at just $35/Month
Monitor end-to-end web transactions and take corrective actions now
Troubleshoot faster and improve end-user experience. Signup Now!
http://pubads.g.doubleclick.net/gampad/clk?id=272487151&iu=/4140
_______________________________________________
Spirit-general mailing list
[hidden email]
https://lists.sourceforge.net/lists/listinfo/spirit-general