00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044 #include "pcre++.h"
00045
00046
00047
00048
00049
00050 Pcre::Pcre(const string& expression) {
00051 _expression = expression;
00052 _flags = 0;
00053 case_t = global_t = false;
00054 zero();
00055 Compile(0);
00056 }
00057
00058 Pcre::Pcre(const string& expression, const string& flags) {
00059 _expression = expression;
00060 unsigned int FLAG = 0;
00061
00062 for(unsigned int flag=0; flag<flags.length(); flag++) {
00063 switch(flags[flag]) {
00064 case 'i': FLAG |= PCRE_CASELESS; case_t = true; break;
00065 case 'm': FLAG |= PCRE_MULTILINE; break;
00066 case 's': FLAG |= PCRE_DOTALL; break;
00067 case 'x': FLAG |= PCRE_EXTENDED; break;
00068 case 'g': global_t = true; break;
00069 }
00070 }
00071
00072 _flags = FLAG;
00073
00074 zero();
00075 Compile(FLAG);
00076 }
00077
00078 Pcre::Pcre(const Pcre &P) {
00079 _expression = P._expression;
00080 _flags = P._flags;
00081 case_t = P.case_t;
00082 global_t = P.global_t;
00083 zero();
00084 Compile(_flags);
00085 }
00086
00087 Pcre::Pcre() {
00088 zero();
00089 }
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100 Pcre::~Pcre() {
00101
00102 if (p_pcre != NULL) {
00103 pcre_free(p_pcre);
00104 }
00105 if (p_pcre_extra != NULL) {
00106 pcre_free(p_pcre_extra);
00107 }
00108 if(sub_vec != NULL) {
00109 delete[] sub_vec;
00110 }
00111 if(num_matches > 0) {
00112 delete resultset;
00113 }
00114 if(err_str != NULL) {
00115 delete err_str;
00116 }
00117 }
00118
00119
00120
00121
00122
00123
00124
00125 const Pcre& Pcre::operator = (const string& expression) {
00126
00127 reset();
00128 _expression = expression;
00129 _flags = 0;
00130 case_t = global_t = false;
00131 Compile(0);
00132 return *this;
00133 }
00134
00135
00136 const Pcre& Pcre::operator = (const Pcre &P) {
00137 reset();
00138 _expression = P._expression;
00139 _flags = P._flags;
00140 case_t = P.case_t;
00141 global_t = P.global_t;
00142 zero();
00143 Compile(_flags);
00144 return *this;
00145 }
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155 void Pcre::zero() {
00156
00157 p_pcre_extra = NULL;
00158 p_pcre = NULL;
00159 sub_vec = NULL;
00160 resultset = NULL;
00161 err_str = NULL;
00162 num_matches = -1;
00163 }
00164
00165 void Pcre::reset() {
00166 did_match = false;
00167 num_matches = -1;
00168 }
00169
00170
00171
00172
00173
00174
00175
00176
00177 void Pcre::Compile(int flags) {
00178 p_pcre = pcre_compile((char *)_expression.c_str(), flags,
00179 (const char **)(&err_str), &erroffset, NULL);
00180
00181 if(p_pcre == NULL) {
00182
00183 string Error = err_str;
00184 throw exception("pcre_compile(..) failed: " + Error);
00185 }
00186
00187
00188 int where;
00189 int info = pcre_fullinfo( p_pcre, p_pcre_extra, PCRE_INFO_CAPTURECOUNT, &where);
00190 if(info == 0) {
00191 sub_len = (where +2) * 3;
00192 }
00193 else {
00194 throw exception(info);
00195 }
00196 reset();
00197 }
00198
00199
00200
00201
00202
00203
00204
00205 bool Pcre::search(const string& stuff, int OffSet) {
00206 return dosearch(stuff, OffSet);
00207 }
00208
00209 bool Pcre::search(const string& stuff) {
00210 return dosearch(stuff, 0);
00211 }
00212
00213 bool Pcre::dosearch(const string& stuff, int OffSet) {
00214 reset();
00215 sub_vec = new int[sub_len];
00216 int num = pcre_exec(p_pcre, p_pcre_extra, (char *)stuff.c_str(),
00217 (int)stuff.length(), OffSet, 0, (int *)sub_vec, sub_len);
00218
00219 if(num < 0) {
00220
00221 return false;
00222 }
00223 else if(num == 0) {
00224
00225 return false;
00226 }
00227 else if(num == 1) {
00228
00229 did_match = true;
00230 num_matches = 0;
00231 return true;
00232 }
00233 else if(num > 1) {
00234
00235 resultset = new Array;
00236 const char **stringlist;
00237 did_match = true;
00238 num_matches = num - 1;
00239
00240 int res = pcre_get_substring_list((char *)stuff.c_str(), sub_vec, num, &stringlist);
00241 if(res == 0) {
00242 for(int i=1; i<num; i++) {
00243 resultset->push_back(stringlist[i]);
00244 }
00245 pcre_free_substring_list(stringlist);
00246 }
00247 else {
00248 throw exception(res);
00249 }
00250 return true;
00251 }
00252 else {
00253
00254 return false;
00255 }
00256 }
00257
00258 Array* Pcre::get_sub_strings() {
00259 if(resultset != NULL)
00260 return resultset;
00261 else
00262 return NULL;
00263 }
00264
00265 string Pcre::get_match(int pos) {
00266 if(pos >= 0 && pos < num_matches) {
00267 ArrayIterator P = resultset->begin() + pos;
00268 return *P;
00269 }
00270 else {
00271 throw exception("out of range");
00272 }
00273 }
00274
00275 int Pcre::get_match_start() {
00276 if (sub_vec)
00277 return sub_vec[0];
00278 else
00279 return -1;
00280 }
00281
00282 int Pcre::get_match_end() {
00283 if (sub_vec)
00284 return sub_vec[1] - 1;
00285 else
00286 return -1;
00287 }
00288
00289 int Pcre::get_match_start(int pos) {
00290 if(pos >= 0 && pos <= num_matches) {
00291
00292
00293
00294 return sub_vec[ (++pos) * 2 ];
00295 }
00296 else {
00297 throw exception("out of range");
00298 }
00299 }
00300
00301 int Pcre::get_match_end(int pos) {
00302 if(pos >= 0 && pos <= num_matches) {
00303
00304
00305
00306
00307
00308 return sub_vec[ ((++pos) * 2) + 1 ] - 1;
00309 }
00310 else {
00311 throw exception("out of range");
00312 }
00313 }
00314
00315 size_t Pcre::get_match_length(int pos) {
00316 if(pos >= 0 && pos < num_matches) {
00317 ArrayIterator P = resultset->begin() + pos;
00318 return P->length();
00319 }
00320 else {
00321 throw exception("out of range");
00322 }
00323 }
00324
00325 Array Pcre::_split(const string& piece, int limit, int start_offset, int end_offset) {
00326 Array Splitted;
00327
00328 if(_expression.length() == 1) {
00329
00330 string buffer, _delimiter, _piece;
00331 char z;
00332 if(case_t) {
00333 z = toupper(_expression[0]);
00334 for(size_t pos=0; pos < piece.length(); pos++) {
00335 _piece += (char)toupper(piece[pos]);
00336 }
00337 }
00338 else {
00339 z = _expression[0];
00340 _piece = piece;
00341 }
00342 for(size_t pos=0; pos<piece.length(); pos++) {
00343 if(_piece[pos] == z) {
00344 Splitted.push_back(buffer);
00345 buffer = "";
00346 }
00347 else {
00348 buffer += piece[pos];
00349 }
00350 }
00351 if(buffer != "") {
00352 Splitted.push_back(buffer);
00353 }
00354 }
00355 else {
00356
00357 if(_expression[0] != '(' && _expression[ _expression.length() ] != ')' ) {
00358
00359 pcre_free(p_pcre);
00360 pcre_free(p_pcre_extra);
00361
00362 pcre *_p = NULL;
00363 pcre_extra *_e = NULL;;
00364
00365 p_pcre = _p;
00366 p_pcre_extra = _e;
00367
00368 _expression = "(" + _expression + ")";
00369 Compile(_flags);
00370 }
00371 int num_pieces=0, pos=0, piece_end = 0, piece_start = 0;
00372 for(;;) {
00373 if(search(piece, pos) == true) {
00374 if(matches() > 0) {
00375 piece_end = get_match_start(0) - 1;
00376 piece_start = pos;
00377 pos = piece_end + 1 + get_match_length(0);
00378 string junk(piece, piece_start, (piece_end - piece_start)+1);
00379 num_pieces++;
00380 if( (limit != 0 && num_pieces < limit) || limit == 0) {
00381 if( (start_offset != 0 && num_pieces >= start_offset) || start_offset == 0) {
00382 if( (end_offset != 0 && num_pieces <= end_offset) || end_offset == 0) {
00383
00384 Splitted.push_back(junk);
00385 }
00386 }
00387 }
00388 }
00389 }
00390 else {
00391
00392 string junk(piece, pos, (piece.length() - pos));
00393 num_pieces++;
00394 if( (limit != 0 && num_pieces < limit) || limit == 0) {
00395 if( (start_offset != 0 && num_pieces >= start_offset) || start_offset == 0) {
00396 if( (end_offset != 0 && num_pieces <= end_offset) || end_offset == 0) {
00397
00398 Splitted.push_back(junk);
00399 }
00400 }
00401 }
00402 break;
00403 }
00404 }
00405 }
00406 return Splitted;
00407 }
00408
00409 Array Pcre::split(const string& piece) {
00410 return _split(piece, 0, 0, 0);
00411 }
00412
00413 Array Pcre::split(const string& piece, int limit) {
00414 return _split(piece, limit, 0, 0);
00415 }
00416
00417 Array Pcre::split(const string& piece, int limit, int start_offset) {
00418 return _split(piece, limit, start_offset, 0);
00419 }
00420
00421 Array Pcre::split(const string& piece, int limit, int start_offset, int end_offset) {
00422 return _split(piece, limit, start_offset, end_offset);
00423 }
00424
00425 Array Pcre::split(const string& piece, vector<int> positions) {
00426 Array PreSplitted = _split(piece, 0, 0, 0);
00427 Array Splitted;
00428 for(vector<int>::iterator vecIt=positions.begin(); vecIt != positions.end(); ++vecIt) {
00429 Splitted.push_back(PreSplitted[*vecIt]);
00430 }
00431 return Splitted;
00432 }
00433
00434
00435
00436 string Pcre::replace(const string& piece, const string& with) {
00437 string Replaced(piece);
00438
00439
00440
00441
00442
00443 Pcre braces("[^\\\\]\\(.*[^\\\\]\\)");
00444 if(! braces.search(_expression)) {
00445
00446
00447
00448
00449 pcre_free(p_pcre);
00450 pcre_free(p_pcre_extra);
00451
00452 pcre *_p = NULL;
00453 pcre_extra *_e = NULL;;
00454
00455 p_pcre = _p;
00456 p_pcre_extra = _e;
00457
00458 _expression = "(" + _expression + ")";
00459 Compile(_flags);
00460 }
00461
00462 if(search(piece)) {
00463
00464 string use_with = _replace_vars(with);
00465 if(!global_t) {
00466
00467
00468
00469
00470 if(matched() && matches() >= 1) {
00471 int len = get_match_end() - get_match_start() + 1;
00472 Replaced.replace(get_match_start(0), len, use_with);
00473 }
00474 }
00475 else {
00476
00477
00478
00479
00480
00481
00482
00483
00484 string sLeftOver = Replaced;
00485 int iCurPosition = 0;
00486 while( search( sLeftOver ) ) {
00487 if( matched() && matches() >= 1 ) {
00488 int len = 0;
00489 string lookfor;
00490 lookfor.erase();
00491 int match_pos;
00492 for (match_pos = 0; match_pos < matches(); match_pos++) {
00493 len += ((get_match_end(match_pos) - get_match_start(match_pos)) + 1);
00494 lookfor += get_match(match_pos);
00495 }
00496 match_pos = Replaced.find( lookfor, iCurPosition );
00497 Replaced.replace(match_pos, len, use_with);
00498 iCurPosition = ( match_pos + use_with.length() );
00499 sLeftOver = Replaced.substr( iCurPosition, string::npos );
00500 }
00501 }
00502 }
00503 }
00504 return Replaced;
00505 }
00506
00507
00508
00509 string Pcre::_replace_vars(const string& piece) {
00510 Pcre dollar("\\$[0-9]+");
00511 string with = piece;
00512 if(dollar.search(with)) {
00513 for(int index=0; index < num_matches; index++) {
00514
00515 string sub = get_match(index);
00516 ostringstream num;
00517 num << index+1;
00518 string dollar_num = "(\\$" + num.str() + ")";
00519 Pcre subsplit(dollar_num);
00520
00521 Array splitted = subsplit.split(with);
00522 string Replaced;
00523 for(size_t pos=0; pos < splitted.size(); pos++) {
00524 if(pos == (splitted.size() - 1))
00525 Replaced += splitted[pos];
00526 else
00527 Replaced += splitted[pos] + sub;
00528 }
00529 with = Replaced;
00530 }
00531 return with;
00532 }
00533 else {
00534
00535 return with;
00536 }
00537 }