189 lines
6.5 KiB
C++
189 lines
6.5 KiB
C++
///////////////////////////////////////////////////////////////////////////////
|
|
// perl2xpr.cpp
|
|
// A utility for translating a Perl regular expression into an
|
|
// xpressive static regular expression.
|
|
//
|
|
// Copyright 2007 Eric Niebler. Distributed under the Boost
|
|
// Software License, Version 1.0. (See accompanying file
|
|
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
#include <stack>
|
|
#include <string>
|
|
#include <iostream>
|
|
#include <boost/xpressive/xpressive_static.hpp>
|
|
#include <boost/xpressive/regex_actions.hpp>
|
|
|
|
namespace x = boost::xpressive;
|
|
using namespace x;
|
|
|
|
int main(int argc, char *argv[])
|
|
{
|
|
int i = 1, j = 1;
|
|
bool nocase = false;
|
|
char const *dot = " ~_n ";
|
|
char const *bos = " bos ";
|
|
char const *eos = " eos ";
|
|
|
|
for(; i < argc && '-' == *argv[i]; argv[i][++j]? 0: (j=1,++i))
|
|
{
|
|
switch(argv[i][j])
|
|
{
|
|
case 'i': // perl /i modifier
|
|
nocase = true;
|
|
break;
|
|
case 's': // perl /s modifier
|
|
dot = " _ ";
|
|
break;
|
|
case 'm': // perl /m modifier
|
|
bos = " bol ";
|
|
eos = " eol ";
|
|
break;
|
|
default:
|
|
std::cerr << "Unknown option : " << argv[i] << std::endl;
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if(i == argc)
|
|
{
|
|
std::cerr << "Usage:\n perl2xpr [-i] [-s] [-m] 're'\n";
|
|
return -1;
|
|
}
|
|
|
|
// Local variables used by the semantic actions below
|
|
local<int> mark_nbr;
|
|
local<std::string> tmp;
|
|
local<std::stack<std::string> > strings;
|
|
|
|
// The rules in the dynamic regex grammar
|
|
cregex regex, alts, seq, quant, repeat, atom, escape, group, lit, charset, setelem;
|
|
|
|
lit = ~(set='.','^','$','*','+','?','(',')','{','}','[',']','\\','|')
|
|
;
|
|
|
|
escape = as_xpr('b') [top(strings) += " _b "]
|
|
| as_xpr('B') [top(strings) += " ~_b "]
|
|
| as_xpr('d') [top(strings) += " _d "]
|
|
| as_xpr('D') [top(strings) += " ~_d "]
|
|
| as_xpr('s') [top(strings) += " _s "]
|
|
| as_xpr('S') [top(strings) += " ~_s "]
|
|
| as_xpr('w') [top(strings) += " _w "]
|
|
| as_xpr('W') [top(strings) += " ~_w "]
|
|
| _d [top(strings) += " s" + _ + " "]
|
|
| _ [top(strings) += " as_xpr('" + _ + "') "]
|
|
;
|
|
|
|
group = (
|
|
as_xpr("?:") [top(strings) += " ( "]
|
|
| as_xpr("?i:") [top(strings) += " icase( "]
|
|
| as_xpr("?>") [top(strings) += " keep( "]
|
|
| as_xpr("?=") [top(strings) += " before( "]
|
|
| as_xpr("?!") [top(strings) += " ~before( "]
|
|
| as_xpr("?<=") [top(strings) += " after( "]
|
|
| as_xpr("?<!") [top(strings) += " ~after( "]
|
|
| nil [top(strings) += " ( s" + as<std::string>(++mark_nbr) + "= "]
|
|
)
|
|
>> x::ref(regex)
|
|
>> as_xpr(')') [top(strings) += " ) "]
|
|
;
|
|
|
|
setelem = as_xpr('\\') >> _ [top(strings) += " as_xpr('" + _ + "') "]
|
|
| "[:" >> !as_xpr('^') [top(strings) += "~"]
|
|
>> (+_w) [top(strings) += _ ]
|
|
>> ":]"
|
|
| (
|
|
(s1=~as_xpr(']'))
|
|
>> '-'
|
|
>> (s2=~as_xpr(']'))
|
|
) [top(strings) += "range('" + s1 + "','" + s2 + "')"]
|
|
;
|
|
|
|
charset = !as_xpr('^') [top(strings) += " ~ "]
|
|
>> nil [top(strings) += " set[ "]
|
|
>> (
|
|
setelem
|
|
| (~as_xpr(']')) [top(strings) += " as_xpr('" + _ + "') "]
|
|
)
|
|
>>*(
|
|
nil [top(strings) += " | "]
|
|
>> (
|
|
setelem
|
|
| (~as_xpr(']')) [top(strings) += "'" + _ + "'"]
|
|
)
|
|
)
|
|
>> as_xpr(']') [top(strings) += " ] "]
|
|
;
|
|
|
|
atom = (
|
|
+(lit >> ~before((set='*','+','?','{')))
|
|
| lit
|
|
) [top(strings) += " as_xpr(\"" + _ + "\") "]
|
|
| as_xpr('.') [top(strings) += dot]
|
|
| as_xpr('^') [top(strings) += bos]
|
|
| as_xpr('$') [top(strings) += eos]
|
|
| '\\' >> escape
|
|
| '(' >> group
|
|
| '[' >> charset
|
|
;
|
|
|
|
repeat = as_xpr('{') [tmp = " repeat<"]
|
|
>> (+_d) [tmp += _]
|
|
>> !(
|
|
as_xpr(',') [tmp += ","]
|
|
>> (
|
|
(+_d) [tmp += _]
|
|
| nil [tmp += "inf"]
|
|
)
|
|
)
|
|
>> as_xpr('}') [top(strings) = tmp + ">( " + top(strings) + " ) "]
|
|
;
|
|
|
|
quant = nil [push(strings, "")]
|
|
>> atom
|
|
>> !(
|
|
(
|
|
as_xpr("*") [insert(top(strings), 0, " * ")] // [strings->*top()->*insert(0, " * ")]
|
|
| as_xpr("+") [insert(top(strings), 0, " + ")] // [strings->*top()->*insert(0, " + ")]
|
|
| as_xpr("?") [insert(top(strings), 0, " ! ")] // [strings->*top()->*insert(0, " ! ")]
|
|
| repeat
|
|
)
|
|
>> !as_xpr('?') [insert(top(strings), 0, " - ")]
|
|
)
|
|
>> nil [tmp = top(strings), pop(strings), top(strings) += tmp]
|
|
;
|
|
|
|
seq = quant
|
|
>> *(
|
|
nil [top(strings) += " >> "]
|
|
>> quant
|
|
)
|
|
;
|
|
|
|
alts = seq
|
|
>> *(
|
|
as_xpr('|') [top(strings) += " | "]
|
|
>> seq
|
|
)
|
|
;
|
|
|
|
regex = alts
|
|
;
|
|
|
|
strings.get().push("");
|
|
if(!regex_match(argv[i], regex))
|
|
{
|
|
std::cerr << "ERROR: unrecognized regular expression" << std::endl;
|
|
return -1;
|
|
}
|
|
else if(nocase)
|
|
{
|
|
std::cout << "icase( " << strings.get().top() << " )" << std::endl;
|
|
}
|
|
else
|
|
{
|
|
std::cout << strings.get().top() << std::endl;
|
|
}
|
|
|
|
return 0;
|
|
}
|