xpressive/tools/perl2xpr.cpp
2014-01-11 00:36:40 -08:00

189 lines
6.5 KiB
C++

///////////////////////////////////////////////////////////////////////////////
// perl2xpr.cpp
// A utility for translating a Perl regular expression into an
// xpressive static regular expression.
//
// Copyright 2007 Eric Niebler. Distributed under the Boost
// Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#include <stack>
#include <string>
#include <iostream>
#include <boost/xpressive/xpressive_static.hpp>
#include <boost/xpressive/regex_actions.hpp>
namespace x = boost::xpressive;
using namespace x;
int main(int argc, char *argv[])
{
int i = 1, j = 1;
bool nocase = false;
char const *dot = " ~_n ";
char const *bos = " bos ";
char const *eos = " eos ";
for(; i < argc && '-' == *argv[i]; argv[i][++j]? 0: (j=1,++i))
{
switch(argv[i][j])
{
case 'i': // perl /i modifier
nocase = true;
break;
case 's': // perl /s modifier
dot = " _ ";
break;
case 'm': // perl /m modifier
bos = " bol ";
eos = " eol ";
break;
default:
std::cerr << "Unknown option : " << argv[i] << std::endl;
return -1;
}
}
if(i == argc)
{
std::cerr << "Usage:\n perl2xpr [-i] [-s] [-m] 're'\n";
return -1;
}
// Local variables used by the semantic actions below
local<int> mark_nbr;
local<std::string> tmp;
local<std::stack<std::string> > strings;
// The rules in the dynamic regex grammar
cregex regex, alts, seq, quant, repeat, atom, escape, group, lit, charset, setelem;
lit = ~(set='.','^','$','*','+','?','(',')','{','}','[',']','\\','|')
;
escape = as_xpr('b') [top(strings) += " _b "]
| as_xpr('B') [top(strings) += " ~_b "]
| as_xpr('d') [top(strings) += " _d "]
| as_xpr('D') [top(strings) += " ~_d "]
| as_xpr('s') [top(strings) += " _s "]
| as_xpr('S') [top(strings) += " ~_s "]
| as_xpr('w') [top(strings) += " _w "]
| as_xpr('W') [top(strings) += " ~_w "]
| _d [top(strings) += " s" + _ + " "]
| _ [top(strings) += " as_xpr('" + _ + "') "]
;
group = (
as_xpr("?:") [top(strings) += " ( "]
| as_xpr("?i:") [top(strings) += " icase( "]
| as_xpr("?>") [top(strings) += " keep( "]
| as_xpr("?=") [top(strings) += " before( "]
| as_xpr("?!") [top(strings) += " ~before( "]
| as_xpr("?<=") [top(strings) += " after( "]
| as_xpr("?<!") [top(strings) += " ~after( "]
| nil [top(strings) += " ( s" + as<std::string>(++mark_nbr) + "= "]
)
>> x::ref(regex)
>> as_xpr(')') [top(strings) += " ) "]
;
setelem = as_xpr('\\') >> _ [top(strings) += " as_xpr('" + _ + "') "]
| "[:" >> !as_xpr('^') [top(strings) += "~"]
>> (+_w) [top(strings) += _ ]
>> ":]"
| (
(s1=~as_xpr(']'))
>> '-'
>> (s2=~as_xpr(']'))
) [top(strings) += "range('" + s1 + "','" + s2 + "')"]
;
charset = !as_xpr('^') [top(strings) += " ~ "]
>> nil [top(strings) += " set[ "]
>> (
setelem
| (~as_xpr(']')) [top(strings) += " as_xpr('" + _ + "') "]
)
>>*(
nil [top(strings) += " | "]
>> (
setelem
| (~as_xpr(']')) [top(strings) += "'" + _ + "'"]
)
)
>> as_xpr(']') [top(strings) += " ] "]
;
atom = (
+(lit >> ~before((set='*','+','?','{')))
| lit
) [top(strings) += " as_xpr(\"" + _ + "\") "]
| as_xpr('.') [top(strings) += dot]
| as_xpr('^') [top(strings) += bos]
| as_xpr('$') [top(strings) += eos]
| '\\' >> escape
| '(' >> group
| '[' >> charset
;
repeat = as_xpr('{') [tmp = " repeat<"]
>> (+_d) [tmp += _]
>> !(
as_xpr(',') [tmp += ","]
>> (
(+_d) [tmp += _]
| nil [tmp += "inf"]
)
)
>> as_xpr('}') [top(strings) = tmp + ">( " + top(strings) + " ) "]
;
quant = nil [push(strings, "")]
>> atom
>> !(
(
as_xpr("*") [insert(top(strings), 0, " * ")] // [strings->*top()->*insert(0, " * ")]
| as_xpr("+") [insert(top(strings), 0, " + ")] // [strings->*top()->*insert(0, " + ")]
| as_xpr("?") [insert(top(strings), 0, " ! ")] // [strings->*top()->*insert(0, " ! ")]
| repeat
)
>> !as_xpr('?') [insert(top(strings), 0, " - ")]
)
>> nil [tmp = top(strings), pop(strings), top(strings) += tmp]
;
seq = quant
>> *(
nil [top(strings) += " >> "]
>> quant
)
;
alts = seq
>> *(
as_xpr('|') [top(strings) += " | "]
>> seq
)
;
regex = alts
;
strings.get().push("");
if(!regex_match(argv[i], regex))
{
std::cerr << "ERROR: unrecognized regular expression" << std::endl;
return -1;
}
else if(nocase)
{
std::cout << "icase( " << strings.get().top() << " )" << std::endl;
}
else
{
std::cout << strings.get().top() << std::endl;
}
return 0;
}