// Author: Steve Lombardi
// Date: May 3, 2010
// Originally posted on www.stephenlombardi.com
// Purpose: make Boost.Tokenizer useful

#include <iostream>
#include <list>
#include <string>

#include <boost/tokenizer.hpp>

// helper functions for creating a hetergenous container of tfuncs

class tfunc_polym {
public:
	virtual bool operator( )( std::string::const_iterator & next, std::string::const_iterator end, std::string & tok ) const = 0;
	virtual void reset( ) = 0;
};

template< class TFunc >
class tfunc_polym_derived : public tfunc_polym {
public:
	tfunc_polym_derived( TFunc tfunc ) : tfunc_( tfunc ) { }
	bool operator( )( std::string::const_iterator & next, std::string::const_iterator end, std::string & tok ) const {
		return tfunc_( next, end, tok );
	}
	void reset( ) {
		tfunc_.reset( );
	}
private:
	TFunc tfunc_;
};

class tfunc_polym_adapter {
public:
	tfunc_polym_adapter( tfunc_polym * tfunc ) : tfunc_( tfunc ) { }
	bool operator( )( std::string::const_iterator & next, std::string::const_iterator end, std::string & tok ) const {
		return (*tfunc_)( next, end, tok );
	}
	void reset( ) {
		tfunc_->reset( );
	}
private:
	tfunc_polym * tfunc_;
};

// basic tfunc types

class find_str_tfunc {
public:
	find_str_tfunc( const std::string & str ) : str_( str ) { }
	bool operator( )( std::string::const_iterator & next, std::string::const_iterator end, std::string & tok ) const {
		std::string::const_iterator iter = str_.begin( );
		for( ; *iter == *next && iter != str_.end( ); ++iter, ++next );
		if( iter == str_.end( ) ) {
			tok = str_;
			return true;
		} else {
			return false;
		}
	}
	void reset( ) {
	}
private:
	std::string str_;
};

class find_ident_tfunc {
public:
	bool operator( )( std::string::const_iterator & next, std::string::const_iterator end, std::string & tok ) const {
		std::string::const_iterator begin = next;
		if( *next >= 'A' && *next <= 'Z' || *next >= 'a' && *next <= 'z' ) {
			++next;
			for( ; ( *next >= 'A' && *next <= 'Z' || *next >= 'a' && *next <= 'z' || *next >= '0' && *next <= '9' ) && next != end; ++next );
			tok = std::string( begin, next );
			return true;
		} else {
			return false;
		}
	}
	void reset( ) {
	}
private:
};

class find_integer_tfunc {
public:
	bool operator( )( std::string::const_iterator & next, std::string::const_iterator end, std::string & tok ) const {
		std::string::const_iterator begin = next;
		if( *next >= '0' && *next <= '9' ) {
			++next;
			for( ; ( *next >= '0' && *next <= '9' ) && next != end; ++next );
			tok = std::string( begin, next );
			return true;
		} else {
			return false;
		}
	}
	void reset( ) {
	}
private:
};

// tfunc type to combine other tfuncs

template< class TFunc >
class multi_tfunc {
public:
	template< class X >
	multi_tfunc( X begin, X end ) : tfuncs_( begin, end ) { }
	bool operator( )( std::string::const_iterator & next, std::string::const_iterator end, std::string & tok ) const {
		for( ; *next == ' '; ++next );

		std::string longest;
		for( typename std::list< TFunc >::const_iterator iter = tfuncs_.begin( ); iter != tfuncs_.end( ); ++iter ) {
			std::string::const_iterator nextcopy = next;
			std::string result;
			if( (*iter)( nextcopy, end, result ) && result.size( ) > longest.size( ) ) {
				longest = result;
			}
		}

		if( longest.empty( ) ) {
			return false;
		} else {
			std::advance( next, longest.size( ) );
			tok = longest;
			return true;
		}
	}
	void reset( ) {
	}
private:
	std::list< TFunc > tfuncs_;
};

// a basic example

int main( ) {
	std::list< tfunc_polym_adapter > tfuncs;
	tfuncs.push_back( new tfunc_polym_derived< find_str_tfunc >( find_str_tfunc( "while" ) ) );
	tfuncs.push_back( new tfunc_polym_derived< find_str_tfunc >( find_str_tfunc( "(" ) ) );
	tfuncs.push_back( new tfunc_polym_derived< find_str_tfunc >( find_str_tfunc( ")" ) ) );
	tfuncs.push_back( new tfunc_polym_derived< find_str_tfunc >( find_str_tfunc( "{" ) ) );
	tfuncs.push_back( new tfunc_polym_derived< find_str_tfunc >( find_str_tfunc( "}" ) ) );
	tfuncs.push_back( new tfunc_polym_derived< find_str_tfunc >( find_str_tfunc( ">" ) ) );
	tfuncs.push_back( new tfunc_polym_derived< find_ident_tfunc >( find_ident_tfunc( ) ) );
	tfuncs.push_back( new tfunc_polym_derived< find_integer_tfunc >( find_integer_tfunc( ) ) );

	std::string s = "while( identifier123 > 5 ) { }";
	boost::tokenizer< multi_tfunc< tfunc_polym_adapter > > tok( s, multi_tfunc< tfunc_polym_adapter >( tfuncs.begin( ), tfuncs.end( ) ) );
	std::copy( tok.begin( ), tok.end( ), std::ostream_iterator< std::string >( std::cout, "\n" ) );

	return 0;
}

