00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044 #include "pcre++.h"
00045
00046
00047
00048
00049
00050 Pcre::Pcre(const string& expression) {
00051 _expression = expression;
00052 _flags = 0;
00053 case_t = global_t = false;
00054 zero();
00055 Compile(0);
00056 }
00057
00058 Pcre::Pcre(const string& expression, const string& flags) {
00059 _expression = expression;
00060 unsigned int FLAG = 0;
00061
00062 for(unsigned int flag=0; flag<flags.length(); flag++) {
00063 switch(flags[flag]) {
00064 case 'i': FLAG |= PCRE_CASELESS; case_t = true; break;
00065 case 'm': FLAG |= PCRE_MULTILINE; break;
00066 case 's': FLAG |= PCRE_DOTALL; break;
00067 case 'x': FLAG |= PCRE_EXTENDED; break;
00068 case 'g': global_t = true; break;
00069 }
00070 }
00071
00072 _flags = FLAG;
00073
00074 zero();
00075 Compile(FLAG);
00076 }
00077
00078 Pcre::Pcre(const Pcre &P) {
00079 _expression = P._expression;
00080 _flags = P._flags;
00081 case_t = P.case_t;
00082 global_t = P.global_t;
00083 zero();
00084 Compile(_flags);
00085 }
00086
00087 Pcre::Pcre() {
00088 zero();
00089 }
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100 Pcre::~Pcre() {
00101
00102 if (p_pcre != NULL) {
00103 pcre_free(p_pcre);
00104 }
00105 if (p_pcre_extra != NULL) {
00106 pcre_free(p_pcre_extra);
00107 }
00108 if(sub_vec != NULL) {
00109 delete[] sub_vec;
00110 }
00111 if(num_matches > 0) {
00112 delete resultset;
00113 }
00114 if(err_str != NULL) {
00115 delete err_str;
00116 }
00117 }
00118
00119
00120
00121
00122
00123
00124
00125 const Pcre& Pcre::operator = (const string& expression) {
00126
00127 reset();
00128 _expression = expression;
00129 _flags = 0;
00130 case_t = global_t = false;
00131 Compile(0);
00132 return *this;
00133 }
00134
00135
00136 const Pcre& Pcre::operator = (const Pcre &P) {
00137 reset();
00138 _expression = P._expression;
00139 _flags = P._flags;
00140 case_t = P.case_t;
00141 global_t = P.global_t;
00142 zero();
00143 Compile(_flags);
00144 return *this;
00145 }
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155 void Pcre::zero() {
00156
00157 p_pcre_extra = NULL;
00158 p_pcre = NULL;
00159 sub_vec = NULL;
00160 resultset = NULL;
00161 err_str = NULL;
00162 num_matches = -1;
00163 }
00164
00165 void Pcre::reset() {
00166 did_match = false;
00167 num_matches = -1;
00168 }
00169
00170
00171
00172
00173
00174
00175
00176
00177 void Pcre::Compile(int flags) {
00178 p_pcre = pcre_compile((char *)_expression.c_str(), flags,
00179 (const char **)(&err_str), &erroffset, NULL);
00180
00181 if(p_pcre == NULL) {
00182
00183 string Error = err_str;
00184 throw exception("pcre_compile(..) failed: " + Error);
00185 }
00186
00187
00188 int where;
00189 int info = pcre_fullinfo( p_pcre, p_pcre_extra, PCRE_INFO_CAPTURECOUNT, &where);
00190 if(info == 0) {
00191 sub_len = (where +2) * 3;
00192 }
00193 else {
00194 throw exception(info);
00195 }
00196 reset();
00197 }
00198
00199
00200
00201
00202
00203
00204
00205 bool Pcre::search(const string& stuff, int OffSet) {
00206 return dosearch(stuff, OffSet);
00207 }
00208
00209 bool Pcre::search(const string& stuff) {
00210 return dosearch(stuff, 0);
00211 }
00212
00213 bool Pcre::dosearch(const string& stuff, int OffSet) {
00214 reset();
00215 sub_vec = new int[sub_len];
00216 int num = pcre_exec(p_pcre, p_pcre_extra, (char *)stuff.c_str(),
00217 (int)stuff.length(), OffSet, 0, (int *)sub_vec, sub_len);
00218
00219 if(num < 0) {
00220
00221 return false;
00222 }
00223 else if(num == 0) {
00224
00225 return false;
00226 }
00227 else if(num == 1) {
00228
00229 did_match = true;
00230 num_matches = 0;
00231 return true;
00232 }
00233 else if(num > 1) {
00234
00235 resultset = new Array;
00236 const char **stringlist;
00237 did_match = true;
00238 num_matches = num - 1;
00239
00240 int res = pcre_get_substring_list((char *)stuff.c_str(), sub_vec, num, &stringlist);
00241 if(res == 0) {
00242 for(int i=1; i<num; i++) {
00243 resultset->push_back(stringlist[i]);
00244 }
00245 pcre_free_substring_list(stringlist);
00246 }
00247 else {
00248 throw exception(res);
00249 }
00250 return true;
00251 }
00252 else {
00253
00254 return false;
00255 }
00256 }
00257
00258 Array* Pcre::get_sub_strings() {
00259 if(resultset != NULL)
00260 return resultset;
00261 else
00262 return NULL;
00263 }
00264
00265 string Pcre::get_match(int pos) {
00266 if(pos >= 0 && pos < num_matches) {
00267 ArrayIterator P = resultset->begin() + pos;
00268 return *P;
00269 }
00270 else {
00271 throw exception("out of range");
00272 }
00273 }
00274
00275 int Pcre::get_match_start(int pos) {
00276 if(pos >= 0 && pos <= num_matches) {
00277
00278
00279
00280 return sub_vec[ (++pos) * 2 ];
00281 }
00282 else {
00283 throw exception("out of range");
00284 }
00285 }
00286
00287 int Pcre::get_match_end(int pos) {
00288 if(pos >= 0 && pos <= num_matches) {
00289
00290
00291
00292
00293
00294 return sub_vec[ ((++pos) * 2) + 1 ] - 1;
00295 }
00296 else {
00297 throw exception("out of range");
00298 }
00299 }
00300
00301 size_t Pcre::get_match_length(int pos) {
00302 if(pos >= 0 && pos < num_matches) {
00303 ArrayIterator P = resultset->begin() + pos;
00304 return P->length();
00305 }
00306 else {
00307 throw exception("out of range");
00308 }
00309 }
00310
00311 Array Pcre::_split(const string& piece, int limit, int start_offset, int end_offset) {
00312 Array Splitted;
00313
00314 if(_expression.length() == 1) {
00315
00316 string buffer, _delimiter, _piece;
00317 char z;
00318 if(case_t) {
00319 z = toupper(_expression[0]);
00320 for(size_t pos=0; pos < piece.length(); pos++) {
00321 _piece += (char)toupper(piece[pos]);
00322 }
00323 }
00324 else {
00325 z = _expression[0];
00326 _piece = piece;
00327 }
00328 for(size_t pos=0; pos<piece.length(); pos++) {
00329 if(_piece[pos] == z) {
00330 Splitted.push_back(buffer);
00331 buffer = "";
00332 }
00333 else {
00334 buffer += piece[pos];
00335 }
00336 }
00337 if(buffer != "") {
00338 Splitted.push_back(buffer);
00339 }
00340 }
00341 else {
00342
00343 if(_expression[0] != '(' && _expression[ _expression.length() ] != ')' ) {
00344
00345 pcre_free(p_pcre);
00346 pcre_free(p_pcre_extra);
00347
00348 pcre *_p = NULL;
00349 pcre_extra *_e = NULL;;
00350
00351 p_pcre = _p;
00352 p_pcre_extra = _e;
00353
00354 _expression = "(" + _expression + ")";
00355 Compile(_flags);
00356 }
00357 int num_pieces=0, pos=0, piece_end = 0, piece_start = 0;
00358 for(;;) {
00359 if(search(piece, pos) == true) {
00360 if(matches() > 0) {
00361 piece_end = get_match_start(0) - 1;
00362 piece_start = pos;
00363 pos = piece_end + 1 + get_match_length(0);
00364 string junk(piece, piece_start, (piece_end - piece_start)+1);
00365 num_pieces++;
00366 if( (limit != 0 && num_pieces < limit) || limit == 0) {
00367 if( (start_offset != 0 && num_pieces >= start_offset) || start_offset == 0) {
00368 if( (end_offset != 0 && num_pieces <= end_offset) || end_offset == 0) {
00369
00370 Splitted.push_back(junk);
00371 }
00372 }
00373 }
00374 }
00375 }
00376 else {
00377
00378 string junk(piece, pos, (piece.length() - pos));
00379 num_pieces++;
00380 if( (limit != 0 && num_pieces < limit) || limit == 0) {
00381 if( (start_offset != 0 && num_pieces >= start_offset) || start_offset == 0) {
00382 if( (end_offset != 0 && num_pieces <= end_offset) || end_offset == 0) {
00383
00384 Splitted.push_back(junk);
00385 }
00386 }
00387 }
00388 break;
00389 }
00390 }
00391 }
00392 return Splitted;
00393 }
00394
00395 Array Pcre::split(const string& piece) {
00396 return _split(piece, 0, 0, 0);
00397 }
00398
00399 Array Pcre::split(const string& piece, int limit) {
00400 return _split(piece, limit, 0, 0);
00401 }
00402
00403 Array Pcre::split(const string& piece, int limit, int start_offset) {
00404 return _split(piece, limit, start_offset, 0);
00405 }
00406
00407 Array Pcre::split(const string& piece, int limit, int start_offset, int end_offset) {
00408 return _split(piece, limit, start_offset, end_offset);
00409 }
00410
00411 Array Pcre::split(const string& piece, vector<int> positions) {
00412 Array PreSplitted = _split(piece, 0, 0, 0);
00413 Array Splitted;
00414 for(vector<int>::iterator vecIt=positions.begin(); vecIt != positions.end(); ++vecIt) {
00415 Splitted.push_back(PreSplitted[*vecIt]);
00416 }
00417 return Splitted;
00418 }
00419
00420
00421 string Pcre::replace(const string& piece, const string& with) {
00422 string Replaced(piece);
00423
00424 if(_expression[0] != '(' && _expression[ _expression.length() ] != ')' ) {
00425
00426 _expression = "(" + _expression + ")";
00427
00428 pcre_free(p_pcre);
00429 pcre_free(p_pcre_extra);
00430
00431 pcre *_p = NULL;
00432 pcre_extra *_e = NULL;;
00433
00434 p_pcre = _p;
00435 p_pcre_extra = _e;
00436
00437 _expression = "(" + _expression + ")";
00438 Compile(_flags);
00439 }
00440
00441 if(search(piece)) {
00442
00443 string use_with = _replace_vars(with);
00444 if(!global_t) {
00445
00446 if(matched() && matches() >= 1) {
00447 Replaced.replace(get_match_start(0), (get_match_end(0) - get_match_start(0)) + 1, use_with);
00448 }
00449 }
00450 else {
00451
00452 Array Splitted = split(piece);
00453 Replaced = "";
00454 for(size_t pos=0; pos < Splitted.size(); pos++) {
00455 if(pos == (Splitted.size() - 1))
00456 Replaced += Splitted[pos];
00457 else
00458 Replaced += Splitted[pos] + with;
00459 }
00460 }
00461 }
00462 return Replaced;
00463 }
00464
00465 string Pcre::_replace_vars(const string& piece) {
00466 Pcre dollar("\\$[0-9]+");
00467 string with = piece;
00468 if(dollar.search(with)) {
00469 for(int index=0; index < num_matches; index++) {
00470
00471 string sub = get_match(index);
00472 ostringstream num(index+1);
00473 Pcre subsplit(string("(\\$") + num.str() + ")");
00474 Array splitted = subsplit.split(with);
00475 string Replaced;
00476 for(size_t pos=0; pos < splitted.size(); pos++) {
00477 if(pos == (splitted.size() - 1))
00478 Replaced += splitted[pos];
00479 else
00480 Replaced += splitted[pos] + sub;
00481 }
00482 with = Replaced;
00483 }
00484 return with;
00485 }
00486 else {
00487
00488 return with;
00489 }
00490 }