数字混じり文字列のソートをBoost.Xpressiveで

元ネタはここ(http://ja.doukaku.org/295/)なんですが、
札幌C++勉強会で扱ったのでちょっとやってみる。

方針は比較関数で文字列と数値に分けて、それぞれの場合で処理を分けてるだけ。

#include<iostream>
#include<string>
#include<vector>
#include<fstream>
#include<boost/lexical_cast.hpp>
#include<boost/xpressive/xpressive.hpp>
#include<boost/xpressive/regex_actions.hpp>

struct Compare {
	typedef std::pair<std::string,int> Pair;

	bool operator()(const std::string& str1,const std::string& str2) const
	{
		using namespace boost::xpressive;
		std::vector<Pair> vec1,vec2;
		sregex re1 = make_regex(vec1), re2 = make_regex(vec2);

		// 数字と数字以外の文字列に分けてvecに入れる
		// 詳しくはCompare::make_regexメンバ関数を参照
		sregex_token_iterator(str1.begin(),str1.end(),re1);
		sregex_token_iterator(str2.begin(),str2.end(),re2);

		std::vector<Pair>::iterator
			begin1=vec1.begin(),end1=vec1.end(),
			begin2=vec2.begin(),end2=vec2.end();
		
		for(;begin1!=end1 && begin2!=end2;++begin1,++begin2) {
			if(begin1->second==0&&begin2->second==0){ // 数字
				const int d1 = boost::lexical_cast<int>(begin1->first);
				const int d2 = boost::lexical_cast<int>(begin2->first);
				if(d1<d2) return true;
				else if(d1>d2) return false;
			}
			else { // 数字以外
				const std::string& s1 = begin1->first;
				const std::string& s2 = begin2->first;
				if(s1<s2) return true;
				else if(s1>s2) return false;
			}
		}
		return false;
	}

	boost::xpressive::sregex make_regex(std::vector<Pair>& v) const {
		using namespace boost::xpressive;
		// 数字の場合は文字列と0のペア
		// 数字以外の場合は文字列と1のペアとして
		// vectorに入れる、という正規表現
		sregex re = +( 
				(s1=+_d)[
					push_back(ref(v), make_pair(s1,0))] | 
				(s2=+~_d)[
					push_back(ref(v), make_pair(s2,1))]
				);
		return re;
	}
};

int main(int argc,char** argv) {
	using namespace boost::xpressive;
	typedef std::pair<std::string,int> Pair;
	if(argc!=2) return -1;
	std::ifstream ifs(argv[1]);
	std::vector<std::string> lines;
	for(std::string str;std::getline(ifs,str);){
		lines.push_back(str);
		std::cout << str << std::endl;
	}
	std::cout << "sorted: " <<  std::endl;
	std::sort(lines.begin(),lines.end(),Compare());
	std::copy(lines.begin(),lines.end(),std::ostream_iterator<std::string>(std::cout,"\n"));
	return 0;
}

細かい仕様についてはよく知らないので、何かミスがあるかも。
あとパフォーマンスとかは考えてないので、このままでは実際には使えないですね。
文字列比較の度に正規表現でサーチしてますし。
効率を考えるならすべてトークン分割した状態でコンテナに保持させてからソートした方がよさそう。

最初boost::tupleでやってmake_tupleではコンパイル通らなかったけども(これを通すために悩んだ)
どうやらmake_pairしか用意されてないようだ。