// Copyright (c) 2005-2007 Hartmut Kaiser (hartmut.kaiser@gmail.com) // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #if !defined(SAGA_IMPL_ENGINE_URL_GRAMMAR_HPP) #define SAGA_IMPL_ENGINE_URL_GRAMMAR_HPP // #define BOOST_SPIRIT_DEBUG #include #include #include #include #include #include #include /////////////////////////////////////////////////////////////////////////////// namespace saga { namespace impl { /////////////////////////////////////////////////////////////////////////////// // // This grammar was taken from the RFC1808 (http://www.w3.org/Addressing/rfc1808.txt) // // URL = ( absoluteURL | relativeURL ) [ "#" fragment ] // // absoluteURL = generic-RL | ( scheme ":" *( uchar | reserved ) ) // // generic-RL = scheme ":" relativeURL // // relativeURL = net_path | abs_path | rel_path // // net_path = "//" net_loc [ abs_path ] // abs_path = "/" rel_path // rel_path = [ path ] [ ";" params ] [ "?" query ] // // path = fsegment *( "/" segment ) // fsegment = *pchar // segment = *pchar // // params = param *( ";" param ) // param = *( pchar | "/" ) // // scheme = *( alpha | digit | "+" | "-" | "." ) // net_loc = *( pchar | ";" | "?" ) // query = *( uchar | reserved ) // fragment = *( uchar | reserved ) // // pchar = uchar | ":" | "@" | "&" | "=" // uchar = unreserved | escape // unreserved = alpha | digit | safe | extra // // escape = "%" hex hex // hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | // "a" | "b" | "c" | "d" | "e" | "f" // // alpha = lowalpha | hialpha // lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | // "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | // "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" // hialpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | // "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | // "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" // // digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | // "8" | "9" // // safe = "$" | "-" | "_" | "." | "+" // extra = "!" | "*" | "'" | "(" | ")" | "," // national = "{" | "}" | "|" | "\" | "^" | "~" | "[" | "]" | "`" // reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" // punctuation = "<" | ">" | "#" | "%" | <"> // /////////////////////////////////////////////////////////////////////////////// // Some notes to self // - absolute file paths without a host are recognized as scheme:////abs_path // (note: 4 slashes), on windows possibly scheme:///c:/...) // - relative file paths without a host are reognized as scheme:///rel_path // (note: 3 slashes) /////////////////////////////////////////////////////////////////////////////// boost::spirit::uint_parser const hex_escape_p = boost::spirit::uint_parser(); /////////////////////////////////////////////////////////////////////////////// struct net_loc_closure : boost::spirit::closure { member1 userinfo; member2 hostname; }; /////////////////////////////////////////////////////////////////////////////// template struct url_grammar : public boost::spirit::grammar > { url_grammar(T& work) : work_(work) { BOOST_SPIRIT_DEBUG_TRACE_GRAMMAR_NAME(*this, "url_grammar", (true)); } template struct definition { typedef boost::spirit::rule rule_type; rule_type full_url, absolute_url, relative_url, fragment; rule_type scheme, uchar, reserved; rule_type net_path, rel_path, net_loc, net_loc_part; rule_type path, params, pchar; rule_type unreserved, escape, safe, extra; boost::spirit::subrule<0, net_loc_closure::context_t> net_loc_sub; definition(url_grammar const &self) { using namespace boost::spirit; using namespace phoenix; full_url = ( absolute_url | relative_url ) >> !( '#' >> fragment [ bind(&self.work_, &T::set_fragment) ( construct_(arg1, arg2) ) ] ) ; absolute_url = ( scheme >> ':' ) [ // recognized scheme bind(&self.work_, &T::set_scheme) ( construct_(arg1, arg2) ) ] >> ( relative_url | *(uchar | reserved) ) [ // recognized scheme specific part bind(&self.work_, &T::set_scheme_specific_part) ( construct_(arg1, arg2) ) ] ; scheme = repeat_p(2, more) // schemes are at least 2 characters [ alpha_p | digit_p | '+' | '-' | '.' ] ; relative_url = net_path | rel_path ; fragment = *(uchar | reserved) ; net_path = "//" >> net_loc >> !('/' >> rel_path) ; net_loc = ( net_loc_sub = ( // optional username + (optional) password !( ( *net_loc_part >> !( ':' >> *net_loc_part ) >> '@' ) [ net_loc_sub.userinfo = construct_(arg1, arg2) ] ) // hostname >> (*net_loc_part) [ net_loc_sub.hostname = construct_(arg1, arg2) ] // optional port number >> !( ':' >> int_p [ bind(&self.work_, &T::set_port)(arg1) ] ) ) [ // successfully parsed net_loc, assign the findings bind(&self.work_, &T::set_host_userinfo)(net_loc_sub.hostname, net_loc_sub.userinfo) ] ) ; rel_path // optional path = !path [ bind(&self.work_, &T::set_path) ( construct_(arg1, arg2) ) ] // optional parameters >> !( ';' >> params [ bind(&self.work_, &T::set_params) ( construct_(arg1, arg2) ) ] ) // optional query >> !( '?' >> (*(uchar | reserved)) [ bind(&self.work_, &T::set_query) ( construct_(arg1, arg2) ) ] ) ; path = !ch_p('/') >> *pchar >> *('/' >> *pchar) ; params = +(pchar | '/') >> *(';' >> *(pchar | '/')) ; net_loc_part = uchar | '&' | '=' | ';' | '?' ; pchar = uchar | ':' | '@' | '&' | '=' | ' ' | '\t' ; uchar = unreserved | escape ; unreserved = alpha_p | digit_p | safe | extra ; escape = '%' >> hex_escape_p ; safe = ch_p('$') | '-' | '_' | '.'| '+' ; extra = ch_p('!') | '*' | '\'' | '(' | ')' | ',' | '~' | '[' | ']' ; reserved = ch_p(';') | '/' | '?' | ':' | '@' | '&' | '=' ; BOOST_SPIRIT_DEBUG_RULE(full_url); BOOST_SPIRIT_DEBUG_RULE(absolute_url); BOOST_SPIRIT_DEBUG_RULE(relative_url); BOOST_SPIRIT_DEBUG_RULE(fragment); BOOST_SPIRIT_DEBUG_RULE(scheme); BOOST_SPIRIT_DEBUG_RULE(uchar); BOOST_SPIRIT_DEBUG_RULE(reserved); BOOST_SPIRIT_DEBUG_RULE(net_path); BOOST_SPIRIT_DEBUG_RULE(rel_path); BOOST_SPIRIT_DEBUG_RULE(net_loc); BOOST_SPIRIT_DEBUG_RULE(net_loc_sub); BOOST_SPIRIT_DEBUG_RULE(net_loc_part); BOOST_SPIRIT_DEBUG_RULE(params); BOOST_SPIRIT_DEBUG_RULE(pchar); BOOST_SPIRIT_DEBUG_RULE(path); BOOST_SPIRIT_DEBUG_RULE(unreserved); BOOST_SPIRIT_DEBUG_RULE(escape); BOOST_SPIRIT_DEBUG_RULE(safe); BOOST_SPIRIT_DEBUG_RULE(extra); } // start rule of this grammar rule_type const& start() const { return full_url; } }; T& work_; }; } } // namespace saga::impl /////////////////////////////////////////////////////////////////////////////// #endif // SAGA_IMPL_ENGINE_URL_GRAMMAR_HPP