00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 using namespace std;
00012
00013
00014 #include <set>
00015 #include <vector>
00016 #include <string>
00017
00018 extern "C" {
00019 #include "define.h"
00020 #include "libstrfunc.h"
00021 #include "libpst.h"
00022 #include "common.h"
00023 #include "timeconv.h"
00024 #include "lzfu.h"
00025 #include "stdarg.h"
00026 #include "iconv.h"
00027 }
00028
00029 int32_t usage();
00030 int32_t version();
00031 char *check_filename(char *fname);
00032 void print_ldif_single(const char *attr, const char *value);
00033 void print_ldif_address(const char *attr, int nvalues, char *value, ...);
00034 void print_ldif_dn(const char *attr, const char *value, const char *base);
00035 void print_ldif_multi(const char *dn, const char *value);
00036 void print_ldif_two(const char *attr, const char *value1, const char *value2);
00037 void print_escaped_dn(const char *value);
00038 void build_cn(char *cn, size_t len, int nvalues, char *value, ...);
00039
00040 char *prog_name;
00041 pst_file pstfile;
00042 bool old_schema = false;
00043 char *ldap_base = NULL;
00044 int ldif_extra_line_count = 0;
00045 iconv_t cd = 0;
00046 vector<string> ldap_class;
00047 vector<string> ldif_extra_line;
00048
00049
00051
00052 struct ltstr {
00053 bool operator()(const char* s1, const char* s2) const {
00054 return strcasecmp(s1, s2) < 0;
00055 }
00056 };
00057
00058 typedef set<const char *, ltstr> string_set;
00059
00060 static string_set all_strings;
00061
00062
00064
00065
00066 static void free_strings(string_set &s);
00067 static void free_strings(string_set &s)
00068 {
00069 for (string_set::iterator i=s.begin(); i!=s.end(); i++) {
00070 free((void*)*i);
00071 }
00072 s.clear();
00073 }
00074
00075
00077
00078
00079 static const char* register_string(string_set &s, const char *name);
00080 static const char* register_string(string_set &s, const char *name) {
00081 string_set::const_iterator i = s.find(name);
00082 if (i != s.end()) return *i;
00083 char *x = strdup(name);
00084 s.insert(x);
00085 return x;
00086 }
00087
00088
00090
00091
00092 static const char* register_string(const char *name);
00093 static const char* register_string(const char *name) {
00094 return register_string(all_strings, name);
00095 }
00096
00097
00099
00100
00101 static const char* unique_string(const char *name);
00102 static const char* unique_string(const char *name) {
00103 int unique = 2;
00104 string_set::iterator i = all_strings.find(name);
00105 if (i == all_strings.end()) return register_string(name);
00106 while (true) {
00107 char n[strlen(name)+10];
00108 snprintf(n, sizeof(n), "%s %d", name, unique++);
00109 string_set::iterator i = all_strings.find(n);
00110 if (i == all_strings.end()) return register_string(n);
00111 }
00112 }
00113
00114
00115 static void process(pst_desc_ll *d_ptr);
00116 static void process(pst_desc_ll *d_ptr) {
00117 pst_item *item = NULL;
00118 while (d_ptr) {
00119 if (d_ptr->desc) {
00120 item = pst_parse_item(&pstfile, d_ptr);
00121 DEBUG_INFO(("item pointer is %p\n", item));
00122 if (item) {
00123 if (item->folder && d_ptr->child && strcasecmp(item->file_as, "Deleted Items")) {
00124
00125 fprintf(stderr, "entering folder %s\n", item->file_as);
00126 process(d_ptr->child);
00127
00128 } else if (item->contact && (item->type == PST_TYPE_CONTACT)) {
00129
00130 char cn[1000];
00131
00132 build_cn(cn, sizeof(cn), 4,
00133 item->contact->display_name_prefix,
00134 item->contact->first_name,
00135 item->contact->surname,
00136 item->contact->suffix);
00137 if (cn[0] != 0) {
00138
00139 const char *ucn = unique_string(cn);
00140
00141 print_ldif_dn("dn", ucn, ldap_base);
00142 print_ldif_single("cn", ucn);
00143 if (item->contact->first_name) {
00144 print_ldif_two("givenName",
00145 item->contact->display_name_prefix,
00146 item->contact->first_name);
00147 }
00148 if (item->contact->surname) {
00149 print_ldif_two("sn",
00150 item->contact->surname,
00151 item->contact->suffix);
00152 }
00153 else if (item->contact->company_name) {
00154 print_ldif_single("sn", item->contact->company_name);
00155 }
00156 else
00157 print_ldif_single("sn", ucn);
00158
00159 if (old_schema) {
00160 if (item->contact->job_title)
00161 print_ldif_single("personalTitle", item->contact->job_title);
00162 if (item->contact->company_name)
00163 print_ldif_single("company", item->contact->company_name);
00164 }
00165 else {
00166
00167 if (item->contact->job_title)
00168 print_ldif_single("title", item->contact->job_title);
00169 if (item->contact->company_name)
00170 print_ldif_single("o", item->contact->company_name);
00171 }
00172 if (item->contact->address1 && *item->contact->address1)
00173 print_ldif_single("mail", item->contact->address1);
00174 if (item->contact->address2 && *item->contact->address2)
00175 print_ldif_single("mail", item->contact->address2);
00176 if (item->contact->address3 && *item->contact->address3)
00177 print_ldif_single("mail", item->contact->address3);
00178 if (item->contact->address1a && *item->contact->address1a)
00179 print_ldif_single("mail", item->contact->address1a);
00180 if (item->contact->address2a && *item->contact->address2a)
00181 print_ldif_single("mail", item->contact->address2a);
00182 if (item->contact->address3a && *item->contact->address3a)
00183 print_ldif_single("mail", item->contact->address3a);
00184
00185 if (old_schema) {
00186 if (item->contact->business_address) {
00187 if (item->contact->business_po_box)
00188 print_ldif_single("postalAddress", item->contact->business_po_box);
00189 if (item->contact->business_street)
00190 print_ldif_multi("postalAddress", item->contact->business_street);
00191 if (item->contact->business_city)
00192 print_ldif_single("l", item->contact->business_city);
00193 if (item->contact->business_state)
00194 print_ldif_single("st", item->contact->business_state);
00195 if (item->contact->business_postal_code)
00196 print_ldif_single("postalCode", item->contact->business_postal_code);
00197 }
00198 else if (item->contact->home_address) {
00199 if (item->contact->home_po_box)
00200 print_ldif_single("postalAddress", item->contact->home_po_box);
00201 if (item->contact->home_street)
00202 print_ldif_multi("postalAddress", item->contact->home_street);
00203 if (item->contact->home_city)
00204 print_ldif_single("l", item->contact->home_city);
00205 if (item->contact->home_state)
00206 print_ldif_single("st", item->contact->home_state);
00207 if (item->contact->home_postal_code)
00208 print_ldif_single("postalCode", item->contact->home_postal_code);
00209 }
00210 else if (item->contact->other_address) {
00211 if (item->contact->other_po_box)
00212 print_ldif_single("postalAddress", item->contact->other_po_box);
00213 if (item->contact->other_street)
00214 print_ldif_multi("postalAddress", item->contact->other_street);
00215 if (item->contact->other_city)
00216 print_ldif_single("l", item->contact->other_city);
00217 if (item->contact->other_state)
00218 print_ldif_single("st", item->contact->other_state);
00219 if (item->contact->other_postal_code)
00220 print_ldif_single("postalCode", item->contact->other_postal_code);
00221 }
00222 }
00223 else {
00224
00225 if (item->contact->business_address) {
00226 print_ldif_address("postalAddress", 6,
00227 item->contact->business_po_box,
00228 item->contact->business_street,
00229 item->contact->business_city,
00230 item->contact->business_state,
00231 item->contact->business_postal_code,
00232 item->contact->business_country);
00233 if (item->contact->business_city)
00234 print_ldif_single("l", item->contact->business_city);
00235 if (item->contact->business_state)
00236 print_ldif_single("st", item->contact->business_state);
00237 if (item->contact->business_postal_code)
00238 print_ldif_single("postalCode", item->contact->business_postal_code);
00239 }
00240 else if (item->contact->home_address) {
00241 if (item->contact->home_city)
00242 print_ldif_single("l", item->contact->home_city);
00243 if (item->contact->home_state)
00244 print_ldif_single("st", item->contact->home_state);
00245 if (item->contact->home_postal_code)
00246 print_ldif_single("postalCode", item->contact->home_postal_code);
00247 }
00248 else if (item->contact->other_address) {
00249 print_ldif_address("postalAddress", 6,
00250 item->contact->other_po_box,
00251 item->contact->other_street,
00252 item->contact->other_city,
00253 item->contact->other_state,
00254 item->contact->other_postal_code,
00255 item->contact->other_country);
00256 if (item->contact->other_city)
00257 print_ldif_single("l", item->contact->other_city);
00258 if (item->contact->other_state)
00259 print_ldif_single("st", item->contact->other_state);
00260 if (item->contact->other_postal_code)
00261 print_ldif_single("postalCode", item->contact->other_postal_code);
00262 }
00263 if (item->contact->home_address) {
00264 print_ldif_address("homePostalAddress", 6,
00265 item->contact->home_po_box,
00266 item->contact->home_street,
00267 item->contact->home_city,
00268 item->contact->home_state,
00269 item->contact->home_postal_code,
00270 item->contact->home_country);
00271 }
00272 }
00273
00274 if (item->contact->business_fax)
00275 print_ldif_single("facsimileTelephoneNumber", item->contact->business_fax);
00276 else if (item->contact->home_fax)
00277 print_ldif_single("facsimileTelephoneNumber", item->contact->home_fax);
00278
00279 if (item->contact->business_phone)
00280 print_ldif_single("telephoneNumber", item->contact->business_phone);
00281 if (item->contact->home_phone)
00282 print_ldif_single("homePhone", item->contact->home_phone);
00283
00284 if (item->contact->car_phone)
00285 print_ldif_single("mobile", item->contact->car_phone);
00286 else if (item->contact->mobile_phone)
00287 print_ldif_single("mobile", item->contact->mobile_phone);
00288 else if (item->contact->other_phone)
00289 print_ldif_single("mobile", item->contact->other_phone);
00290
00291 if (!old_schema) {
00292 if (item->contact->business_homepage)
00293 print_ldif_single("labeledURI", item->contact->business_homepage);
00294 if (item->contact->personal_homepage)
00295 print_ldif_single("labeledURI", item->contact->personal_homepage);
00296 }
00297
00298 if (item->comment)
00299 print_ldif_single("description", item->comment);
00300
00301 for (vector<string>::size_type i=0; i<ldap_class.size(); i++)
00302 print_ldif_single("objectClass", ldap_class[i].c_str());
00303 printf("\n");
00304 }
00305 }
00306 else {
00307 DEBUG_INFO(("item is not a contact\n"));
00308 }
00309 }
00310 pst_freeItem(item);
00311 }
00312 d_ptr = d_ptr->next;
00313 }
00314 }
00315
00316
00317
00318
00319
00320 void print_ldif_single(const char *attr, const char *value)
00321 {
00322 size_t len;
00323 bool is_safe_string = true;
00324 bool needs_code_conversion = false;
00325 bool space_flag = false;
00326
00327
00328 while (*value == ' ') value++;
00329 len = strlen(value) + 1;
00330 char buffer[len];
00331 char *p = buffer;
00332
00333
00334
00335 if (*value == ':' || *value == '<')
00336 is_safe_string = false;
00337 for (;;) {
00338 char ch = *value++;
00339
00340 if (ch == 0 || ch == '\n')
00341 break;
00342 else if (ch == '\r')
00343 continue;
00344 else if (ch == ' ') {
00345 space_flag = true;
00346 continue;
00347 }
00348 else {
00349 if ((ch & 0x80) == 0x80) {
00350 needs_code_conversion = true;
00351 is_safe_string = false;
00352 }
00353 if (space_flag) {
00354 *p++ = ' ';
00355 space_flag = false;
00356 }
00357 *p++ = ch;
00358 }
00359 }
00360 *p = 0;
00361 if (is_safe_string) {
00362 printf("%s: %s\n", attr, buffer);
00363 return;
00364 }
00365
00366 if (needs_code_conversion && cd != 0) {
00367 size_t inlen = p - buffer;
00368 size_t utf8_len = 2 * inlen + 1;
00369 char utf8_buffer[utf8_len];
00370 char *utf8_p = utf8_buffer;
00371
00372 iconv(cd, NULL, NULL, NULL, NULL);
00373 p = buffer;
00374 int ret = iconv(cd, (ICONV_CONST char**)&p, &inlen, &utf8_p, &utf8_len);
00375
00376 if (ret >= 0) {
00377 *utf8_p = 0;
00378 p = base64_encode(utf8_buffer, utf8_p - utf8_buffer);
00379 }
00380 else
00381 p = base64_encode(buffer, strlen(buffer));
00382 }
00383 else
00384 p = base64_encode(buffer, strlen(buffer));
00385 printf("%s:: %s\n", attr, p);
00386 free(p);
00387 }
00388
00389
00390
00391
00392 void print_ldif_address(const char *attr, int nvalues, char *value, ...)
00393 {
00394 bool space_flag = false;
00395 bool newline_flag = false;
00396 char *address = NULL;
00397 int len = 0;
00398 int i = 0;
00399 va_list ap;
00400
00401 va_start(ap, value);
00402
00403 while (!value) {
00404 nvalues--;
00405 if (nvalues == 0) {
00406 va_end(ap);
00407 return;
00408 }
00409 value = va_arg(ap, char *);
00410 }
00411 for (;;) {
00412 char ch = *value++;
00413
00414 if (ch == 0 || ch == '\n') {
00415 do {
00416 value = NULL;
00417 nvalues--;
00418 if (nvalues == 0) break;
00419 value = va_arg(ap, char *);
00420 } while (!value);
00421 if (!value) break;
00422 space_flag = true;
00423 newline_flag = true;
00424 }
00425 else if (ch == '\r')
00426 continue;
00427 else if (ch == '\n') {
00428 newline_flag = true;
00429 continue;
00430 }
00431 else if (ch == ' ') {
00432 space_flag = true;
00433 continue;
00434 }
00435 else {
00436 if (i > (len-5)) {
00437 len += 256;
00438 address = (char *)realloc(address, len);
00439 }
00440 if (newline_flag) {
00441 address[i++] = '$';
00442 newline_flag = false;
00443 space_flag = false;
00444 }
00445 else if (space_flag) {
00446 address[i++] = ' ';
00447 space_flag = false;
00448 }
00449 if (ch == '$' || ch == '\\') address[i++] = '\\';
00450 address[i++] = ch;
00451 }
00452 }
00453 va_end(ap);
00454 if (i == 0) return;
00455 address[i] = 0;
00456 print_ldif_single(attr, address);
00457 free(address);
00458 }
00459
00460
00461 void print_ldif_multi(const char *dn, const char *value)
00462 {
00463 const char *n;
00464 while ((n = strchr(value, '\n'))) {
00465 print_ldif_single(dn, value);
00466 value = n + 1;
00467 }
00468 print_ldif_single(dn, value);
00469 }
00470
00471
00472 void print_ldif_two(const char *attr, const char *value1, const char *value2)
00473 {
00474 size_t len1, len2;
00475 if (value1 && *value1)
00476 len1 = strlen(value1);
00477 else {
00478 print_ldif_single(attr, value2);
00479 return;
00480 }
00481
00482 if (value2 && *value2)
00483 len2 = strlen(value2);
00484 else {
00485 print_ldif_single(attr, value1);
00486 return;
00487 }
00488
00489 char value[len1 + len2 + 2];
00490 memcpy(value, value1, len1);
00491 value[len1] = ' ';
00492 memcpy(value + len1 + 1, value2, len2 + 1);
00493 print_ldif_single(attr, value);
00494 }
00495
00496
00497 void build_cn(char *cn, size_t len, int nvalues, char *value, ...)
00498 {
00499 bool space_flag = false;
00500 size_t i = 0;
00501 va_list ap;
00502
00503 va_start(ap, value);
00504
00505 while (!value) {
00506 nvalues--;
00507 if (nvalues == 0) {
00508 cn[0] = 0;
00509 va_end(ap);
00510 return;
00511 }
00512 value = va_arg(ap, char *);
00513 }
00514 for (;;) {
00515 char ch = *value++;
00516
00517 if (ch == 0 || ch == '\n') {
00518 do {
00519 value = NULL;
00520 nvalues--;
00521 if (nvalues == 0) break;
00522 value = va_arg(ap, char *);
00523 } while (!value);
00524 if (!value) break;
00525 space_flag = true;
00526 }
00527 else if (ch == '\r')
00528 continue;
00529 else if (ch == ' ') {
00530 space_flag = true;
00531 continue;
00532 }
00533 else {
00534 if (space_flag) {
00535 if (i > 0) {
00536 if (i < (len - 2)) cn[i++] = ' ';
00537 else break;
00538 }
00539 space_flag = false;
00540 }
00541 if (i < (len - 1)) cn[i++] = ch;
00542 else break;
00543 }
00544 }
00545 cn[i] = 0;
00546 va_end(ap);
00547 }
00548
00549
00550 int main(int argc, char** argv) {
00551 pst_desc_ll *d_ptr;
00552 char *fname = NULL;
00553 int c;
00554 char *d_log = NULL;
00555 prog_name = argv[0];
00556 pst_item *item = NULL;
00557
00558 while ((c = getopt(argc, argv, "b:c:C:d:l:oVh"))!= -1) {
00559 switch (c) {
00560 case 'b':
00561 ldap_base = optarg;
00562 break;
00563 case 'c':
00564 ldap_class.push_back(string(optarg));
00565 break;
00566 case 'C':
00567 cd = iconv_open("UTF-8", optarg);
00568 if (cd == (iconv_t)(-1)) {
00569 fprintf(stderr, "I don't know character set \"%s\"!\n\n", optarg);
00570 fprintf(stderr, "Type: \"iconv --list\" to get list of known character sets\n");
00571 return 1;
00572 }
00573 break;
00574 case 'd':
00575 d_log = optarg;
00576 break;
00577 case 'h':
00578 usage();
00579 exit(0);
00580 break;
00581 case 'l':
00582 ldif_extra_line.push_back(string(optarg));
00583 break;
00584 case 'o':
00585 old_schema = true;
00586 break;
00587 case 'V':
00588 version();
00589 exit(0);
00590 break;
00591 default:
00592 usage();
00593 exit(1);
00594 break;
00595 }
00596 }
00597
00598 if ((argc > optind) && (ldap_base)) {
00599 fname = argv[optind];
00600 } else {
00601 usage();
00602 exit(2);
00603 }
00604
00605 #ifdef DEBUG_ALL
00606
00607 if (!d_log) d_log = "pst2ldif.log";
00608 #endif
00609 DEBUG_INIT(d_log);
00610 DEBUG_REGISTER_CLOSE();
00611 DEBUG_ENT("main");
00612 RET_DERROR(pst_open(&pstfile, fname), 1, ("Error opening File\n"));
00613 RET_DERROR(pst_load_index(&pstfile), 2, ("Index Error\n"));
00614
00615 pst_load_extended_attributes(&pstfile);
00616
00617 d_ptr = pstfile.d_head;
00618 item = (pst_item*)pst_parse_item(&pstfile, d_ptr);
00619 if (!item || !item->message_store) {
00620 DEBUG_RET();
00621 DIE(("main: Could not get root record\n"));
00622 }
00623
00624 d_ptr = pst_getTopOfFolders(&pstfile, item);
00625 if (!d_ptr) {
00626 DEBUG_RET();
00627 DIE(("Top of folders record not found. Cannot continue\n"));
00628 }
00629
00630 pst_freeItem(item);
00631
00632 if (old_schema && (strlen(ldap_base) > 2)) {
00633 char *ldap_org = strdup(ldap_base+2);
00634 char *temp = strchr(ldap_org, ',');
00635 if (temp) {
00636 *temp = '\0';
00637
00638 printf("dn: %s\n", ldap_base);
00639 printf("o: %s\n", ldap_org);
00640 printf("objectClass: organization\n\n");
00641 printf("dn: cn=root, %s\n", ldap_base);
00642 printf("cn: root\n");
00643 printf("sn: root\n");
00644 for (vector<string>::size_type i=0; i<ldap_class.size(); i++)
00645 print_ldif_single("objectClass", ldap_class[i].c_str());
00646 printf("\n");
00647 }
00648 }
00649
00650 process(d_ptr->child);
00651 pst_close(&pstfile);
00652 DEBUG_RET();
00653 free_strings(all_strings);
00654 if (cd) iconv_close(cd);
00655
00656 return 0;
00657 }
00658
00659
00660 int32_t usage() {
00661 version();
00662 printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name);
00663 printf("OPTIONS:\n");
00664 printf("\t-V\t- Version. Display program version\n");
00665 printf("\t-C charset\t- assumed character set of non-ASCII characters\n");
00666 printf("\t-b ldapbase\t- set the LDAP base value\n");
00667 printf("\t-c class\t- set the class of the LDAP objects (may contain more than one)\n");
00668 printf("\t-d <filename>\t- Debug to file. This is a binary log. Use readpstlog to print it\n");
00669 printf("\t-h\t- Help. This screen\n");
00670 printf("\t-l line\t- extra line to insert in the LDIF file for each contact\n");
00671 printf("\t-o\t- use old schema, default is new schema\n");
00672 return 0;
00673 }
00674
00675
00676 int32_t version() {
00677 printf("pst2ldif v%s\n", VERSION);
00678 #if BYTE_ORDER == BIG_ENDIAN
00679 printf("Big Endian implementation being used.\n");
00680 #elif BYTE_ORDER == LITTLE_ENDIAN
00681 printf("Little Endian implementation being used.\n");
00682 #else
00683 # error "Byte order not supported by this library"
00684 #endif
00685 #ifdef __GNUC__
00686 printf("GCC %d.%d : %s %s\n", __GNUC__, __GNUC_MINOR__, __DATE__, __TIME__);
00687 #endif
00688 return 0;
00689 }
00690
00691
00692 char *check_filename(char *fname) {
00693 char *t = fname;
00694 if (t == NULL) {
00695 return fname;
00696 }
00697 while ((t = strpbrk(t, "/\\:"))) {
00698
00699 *t = '_';
00700 }
00701 return fname;
00702 }
00703
00704
00705
00706 void print_ldif_dn(const char *attr, const char *value, const char *base)
00707 {
00708 printf("dn: cn=");
00709
00710 while (*value == ' ')
00711 value++;
00712
00713 print_escaped_dn(value);
00714 if (base && base[0]) {
00715 printf(", %s", base);
00716 }
00717 printf("\n");
00718 return;
00719 }
00720
00721
00722 void print_escaped_dn(const char *value)
00723 {
00724 char ch;
00725 bool needs_code_conversion = false;
00726 char *utf8_buffer = NULL;
00727
00728
00729 if (cd) {
00730 const char *p = value;
00731 while (*p) {
00732 if (*p++ & 0x80) {
00733 needs_code_conversion = true;
00734 break;
00735 }
00736 }
00737 }
00738
00739 if (needs_code_conversion) {
00740 size_t inlen = strlen(value);
00741 size_t utf8_len = 2 * inlen + 1;
00742 char *p = (char *)value;
00743 char *utf8_p = utf8_buffer;
00744
00745 utf8_buffer = (char *)malloc(utf8_len);
00746 utf8_p = utf8_buffer;
00747 iconv(cd, NULL, NULL, NULL, NULL);
00748 if (iconv(cd, (ICONV_CONST char**)&p, &inlen, &utf8_p, &utf8_len) >= 0) {
00749 *utf8_p = 0;
00750 value = utf8_buffer;
00751 }
00752 }
00753
00754
00755 if (*value == '#' || *value == ' ')
00756 putchar('\\');
00757
00758 while ((ch = *value++) != 0) {
00759 if (((ch & 0x80) != 0) || (ch <= 0x1F))
00760
00761 printf("\\%2.2X", ch & 0xFF);
00762 else switch (ch) {
00763 case '\\':
00764 case '"' :
00765 case '+' :
00766 case ',' :
00767 case ';' :
00768 case '<' :
00769 case '>' :
00770 putchar('\\');
00771
00772 default:
00773 putchar(ch);
00774 }
00775 }
00776 if (utf8_buffer) free((void *)utf8_buffer);
00777 return;
00778 }