<?php
/* Script to convert an Address Book LDIF file to one
   conformable to core and Mozilla Thunderbird 1.0 schema.

   Vesion 1.0.1

   The following information is needed up front:

   1.  Base DN, e.g.
	   o=Sudleyplace,ou=AddressBooks,dc=Qualitas,dc=com

   Change the variable $BaseDN to suit your purposes.


   The following conversions must be made (note that the order of some of
   these conversions is important -- e.g., Rule 1 escapes reserved symbols
   and then Rule 2 escapes non-ASCII symbols.  If we did it in the opposite
   order, the backslash preceding a non-ASCII symbol would be escaped again
   -- not what we want.

   1.	Escape reserved symbols in dn: and member: lines, e.g.
		(',', '+', '"', '\', '<', '>' or ';').

   2.	Escape non-ASCII characters in dn: and member: lines.

   3.	If both cn= and mail= appear in a dn: or member:, they must be
		joined with a plus sign to create a multi-valued RDN, e.g.
		dn: cn=First Last,mail=email@example.com
		dn: cn=First Last+mail=email@example.com

   4.	The Base DN must be appended to each dn:

   5.	Capitalize "true" and "false" after xmozillausehtmlmail: and
		mozillaUseHtmlMail:

   6.	Comment out modifytimestamp:, e.g.
		modifytimestamp:
		#modifytimestamp:

   7.	Comment out mozillaNickname: if within groupOfNames

   8a.	Change mozilla_AimScreenName: to nsAIMid:
   8b.	Change company: 			  to o:
   8c.	Change homeStreet:			  to mozillaHomeStreet:
   8d.	Change department:			  to ou:

   9.	There must be a cn: and sn: field in each section.

   10.	No duplicate DNs may be present.
		Note that we don't fix this problem; instead we output a
		  message to standard output.

   Written by Bob Smith (bsmith@sudleyplace.com) in conjunction with
   the web page at http://www.sudleyplace.com/LDAP/.

   It is assumed that the file to be converted is in standard input;
   the output file is to be written to standard output.  Thus, this
   script is expected to be used as follows:

		php abookconvet.php <abookin.ldif >abookout.ldif
 */

// Change the following line to suit your address book.
$BaseDN = 'o=Sudleyplace,ou=AddressBooks,dc=Qualitas,dc=com';

// Require at least PHP version 5
if (version_compare (PHP_VERSION, '5.0.0', '<'))
{
	fwrite (STDERR, '*** This program requires PHP 5.0.0 or later.');
	exit;
} // End IF

// Read in all of standard input as an array of lines (with trailing newlines)
$inpfile = file ('php://stdin');

// Strip trailing newlines from each line
foreach ($inpfile as &$line)
	$line = rtrim ($line);


$RS = "\\,+\"<>;";  // Reserved Symbols

// Create the arrays of patterns and
// replacements needed in EscapeReserved ()
$ERpat = array ();
$ERrep = array ();

for ($i = 0; $i < strlen ($RS); $i++)
{
	$ERpat[] = preg_replace ("/\"/",
							 "[\"]",
							 preg_replace ("/\\\/",
										   "[\\\\\\]",
										   preg_replace ("/\+/",
														 "[+]",
														 '/'  . $RS[$i] . '/')));
	$ERrep[] = preg_replace ("/\\\/",
							 "\\\\\\",
							 '\\' . $RS[$i]);
} // End FOR

// Convert the input file as per the above rules

// Rule 0:	Decode base64 encoding in dn: and member:
$inpfile = preg_replace_callback ("/^((?:dn|member)::\s+)(.*)/i",
								  "Base64Decode",
								  $inpfile);

// Rule 1:	Escape reserved symbols in dn: and member:, e.g.
//	   (',', '+', '"', '\', '<', '>' or ';').
// 1a  dn: 
// 1b  dn: cn=...
// 1c  dn: cn=...,mail=...
// 1d  dn:		  mail=...

// Substitute double backslashes for backslashes,
//	 and backslash-doublequote for doublequote so
//	 the pattern and replacements are proper for
//	 regex.
$R2 = preg_replace ("/\\\/", "\\\\\\", $RS);
$R2 = preg_replace ("/\"/",  "\\\"",   $R2);
$RC = "[$R2]";      // Regex Class of reserved symbols

$inpfile = preg_replace_callback ("/^((?:dn|member):\s+cn=.*?)($RC.*)/i",   // 1b & 1c
								  "EscapeReserved",
								  $inpfile);

$inpfile = preg_replace_callback ("/^((?:dn|member):\s+mail=.*?)($RC.*)/i", // 1d
								  "EscapeReserved",
								  $inpfile);

// Rule 2:	Escape non-ASCII characters in dn: and member:
$inpfile = preg_replace_callback ("/^((?:dn|member)::\s+)(.*)/i",
								  "EscapeNonASCII",
								  $inpfile);

// Rule 3:	If both cn= and mail= appear in a dn: or member:, they must be
//			joined with a plus sign to create a multi-valued RDN, e.g.
//			dn: cn=First Last,mail=email@example.com
//			dn: cn=First Last+mail=email@example.com
$inpfile = preg_replace ("/^((?:dn|member):\s+cn=.*?),(mail=.*)/i",
						 "\$1+\$2",
						 $inpfile);

// Rule 4:	The Base DN must be appended to each dn:
$inpfile = preg_replace ("/^(dn:.*)/i",
						 "\$1,$BaseDN",
						 $inpfile);

// Rule 5:	Capitalize "true" and "false" after xmozillausehtmlmail: and
//			mozillaUseHtmlMail:
$inpfile = preg_replace ("/^((xmozillausehtmlmail:|mozillaUseHtmlMail:)\s+)false/i",
						 "\$1FALSE",
						 $inpfile);

$inpfile = preg_replace ("/^((xmozillausehtmlmail:|mozillaUseHtmlMail:)\s+)true/i",
						 "\$1TRUE",
						 $inpfile);

// Rule 6:	Comment out modifytimestamp:, e.g.
//			modifytimestamp:
//			#modifytimestamp:
$inpfile = preg_replace ("/^modifytimestamp:/i",
						 "#modifytimestamp:",
						 $inpfile);
// Rule 7:	Comment out mozillaNickname: if within groupOfNames
// Convert from an array of lines to an array of sections
$inpfile = explode ("\n\n", implode ("\n", $inpfile));

foreach ($inpfile as &$sect)
if (preg_match ("/^objectclass:\s+groupOfNames$/mi", $sect))
	$sect = preg_replace ("/^mozillaNickname:/mi",
						  "#mozillaNickname:",
						  $sect);
// Convert from an array of sections to an array of lines
$inpfile = explode ("\n", implode ("\n\n", $inpfile));

// Rule 8a:  Change mozilla_AimScreenName: to nsAIMid:
$inpfile = preg_replace ("/^mozilla_AimScreenName:/i",
						 "nsAIMid:",
						 $inpfile);

// Rule 8b:  Change company: to o:
$inpfile = preg_replace ("/^company:/i",
						 "o:",
						 $inpfile);

// Rule 8c:  Change homeStreet: to mozillaHomeStreet:
$inpfile = preg_replace ("/^homeStreet:/i",
						 "mozillaHomeStreet:",
						 $inpfile);

// Rule 8d:  Change department: to ou:
$inpfile = preg_replace ("/^department:/i",
						 "ou:",
						 $inpfile);

// Rule 9:	There must be a cn: and sn: field in each section.

// Convert from an array of lines to an array of sections
$inpfile = explode ("\n\n", implode ("\n", $inpfile));

$cnCount = $snCount = 0;
foreach ($inpfile as &$sect)
{
	// In case there are any trailing newlines, ...
	$sect = rtrim ($sect);

	// If there's no "sn:" and no "objectclass: groupOfNames",
	//	 append a dummy "sn:".
	if (preg_match ("/^sn:/mi", $sect) == 0
	 && preg_match ("/^objectclass:\s+groupOfNames/mi", $sect) == 0)
		$sect .= "\nsn: sn" . ($snCount++);

	// If there's no "cn:",
	//	 append a dummy "cn:".
	if (preg_match ("/^cn:/mi", $sect) == 0)
	{
		$sect .= "\ncn: ". ($cn = "cn" . ($cnCount++));

		// Place this field into the DN
		// Check for "mail=" present
		if (preg_match ("/^dn:\s+mail=/", $sect))
			$sect = preg_replace ("/^(dn:\s+)(mail=.*)/",
								  "\$1cn=$cn+\$2",
								  $sect);
		else
			$sect = preg_replace ("/^(dn:\s+)(.*)/",
								  "\$1cn=$cn\$2",
								  $sect);
	} // End IF
} // End FOREACH

// Rule 10:  No duplicate DNs may be present.
//			 Note that we don't fix this problem; instead we output a
//			   message to standard error.
$allDNs = array ();
foreach ($inpfile as &$sect)
	// Catch all DNs (skipping over "dn:", hence offset 3)
	$allDNs[] = trim (substr ($sect, 3, strpos ($sect, "\n") - 3));

// Sort the entries to find duplicates
sort (&$allDNs, SORT_STRING);

// Loop through the DNs looking for duplicates,
//	 writing an error message to standard error.
$ErrCnt = 0;
for ($i = 0; $i < count ($allDNs) - 1; $i++)
if ($allDNs[$i] == $allDNs[$i + 1])
{
	fwrite (STDERR, "\nDuplicate entries:\n" .
					$allDNs[$i] . "\n" .
					$allDNs[$i + 1] . "\n");
	$ErrCnt++;
} // End FOR/IF

if ($ErrCnt)
	fwrite (STDERR, "\nPlease fix these errors before sending the address book to the server.");

// Put it all back together and write to standard output
echo implode ("\n\n", $inpfile);


/******************** FUNCTIONS ******************************/

// Callback for decoding base64 encoding in dn: and member:
//	 Note we leave in the double-colon so the next rule
//	   can recognize it.
// $matches[0] = complete match
//		   [1] = match to 1st subpattern enclosed in parens
//		   [2] = ...	  2nd ...
//		   [n] = ...	  nth ...
function Base64Decode ($matches)
{
	return $matches[1] . base64_decode ($matches[2]);
} // End Base64Decode ()


// Callback for escaping reserved symbols such as
//	 (','  '+'  '"'  '\'  '<'  '>' or ';') in dn: and member:
// $matches[0] = complete match
//		   [1] = match to 1st subpattern enclosed in parens
//		   [2] = ...	  2nd ...
//		   [n] = ...	  nth ...
function EscapeReserved ($matches)
{
	global $ERpat, $ERrep;

	// Check for $matches[2] containing ",mail="
	//	 in which case we split it and work
	//	 on the separate parts.
	if (preg_match ("/,mail=/", $matches[2]))
	{
		$Split = explode (",mail=", $matches[2]);

		foreach ($Split as &$Part)
			$Part = preg_replace ($ERpat, $ERrep, $Part);
		return $matches[1] . implode (",mail=", $Split);
	} else
		return $matches[1] .
			   preg_replace ($ERpat,
							 $ERrep,
							 $matches[2]);
} // End EscapeReserved ()


// Callback for escaping non-ASCII characters in dn: and member:
// $matches[0] = complete match
//		   [1] = match to 1st subpattern enclosed in parens
//		   [2] = ...	  2nd ...
//		   [n] = ...	  nth ...
function EscapeNonASCII ($matches)
{
	// Replace characters >= 0x80 with \xx
	//	 as in 0xC4 ==> \C4
	$a = $matches[2];
	$len = strlen ($a);
	$b = '';
	for ($i = 0; $i < $len; $i++)
	{
		$o = ord ($a[$i]);
		$b .= ($o >= 0x80) ? sprintf ("\\%02X", $o)
						   : $a[$i];
	} // End FOR

	return preg_replace ("/::/", ":", $matches[1]) . $b;
} // End EscapeNonASCII ()


?>