* xdiff : Add XDiff support in pure PHP. Allow to see which lines of two text files are modified, like the "diff" command. See https://en.wikipedia.org/wiki/Diff

git-svn-id: https://svn.fournier38.fr/svn/ProgSVN/trunk@4310 bf3deb0d-5f1a-0410-827f-c0cc1f45334c
This commit is contained in:
2018-07-30 13:15:36 +00:00
parent 738b7e82e0
commit ccee53e033
2 changed files with 683 additions and 0 deletions

258
Tests/xdiffTest.php Normal file
View File

@@ -0,0 +1,258 @@
<?php
/** DomFramework
* @package domframework
* @author Dominique Fournier <dominique@fournier38.fr>
*/
/** Test the domframework xdiff part */
class test_xdiff extends PHPUnit_Framework_TestCase
{
// Declaration of $string1 and $string2
// Taken from
// {{{
private $string1 = "This part of the
document has stayed the
same from version to
version. It shouldn't
be shown if it doesn't
change. Otherwise, that
would not be helping to
compress the size of the
changes.
This paragraph contains
text that is outdated.
It will be deleted in the
near future.
It is important to spell
check this dokument. On
the other hand, a
misspelled word isn't
the end of the world.
Nothing in the rest of
this paragraph needs to
be changed. Things can
be added after it.
";
private $string2 = "This is an important
notice! It should
therefore be located at
the beginning of this
document!
This part of the
document has stayed the
same from version to
version. It shouldn't
be shown if it doesn't
change. Otherwise, that
would not be helping to
compress the size of the
changes.
It is important to spell
check this document. On
the other hand, a
misspelled word isn't
the end of the world.
Nothing in the rest of
this paragraph needs to
be changed. Things can
be added after it.
This paragraph contains
important new additions
to this document.
";
// }}}
public function test_diff_normal_1 ()
{
// Mode normal
$xdiff = new xdiff ();
$res = $xdiff->diff ($this->string1, $this->string2);
$this->assertSame ($res, "0a1,6
> This is an important
> notice! It should
> therefore be located at
> the beginning of this
> document!
>
11,15d16
< This paragraph contains
< text that is outdated.
< It will be deleted in the
< near future.
<
17c18
< check this dokument. On
---
> check this document. On
24a26,29
>
> This paragraph contains
> important new additions
> to this document.
");
}
public function test_diff_normal_2 ()
{
// Mode normal
$xdiff = new xdiff ();
$res = $xdiff->diff ("NEWLINE\n".$this->string1, $this->string2);
$this->assertSame ($res, "1c1,6
< NEWLINE
---
> This is an important
> notice! It should
> therefore be located at
> the beginning of this
> document!
>
12,16d16
< This paragraph contains
< text that is outdated.
< It will be deleted in the
< near future.
<
18c18
< check this dokument. On
---
> check this document. On
25a26,29
>
> This paragraph contains
> important new additions
> to this document.
");
}
public function test_diff_normal_3 ()
{
// Mode normal
$xdiff = new xdiff ();
$res = $xdiff->diff ("NEWLINE\n", "\n");
$this->assertSame ($res, "1c1
< NEWLINE
---
>
");
}
public function test_diff_normal_4 ()
{
// Mode normal
$xdiff = new xdiff ();
$res = $xdiff->diff ("\n", "NEWLINE\n");
$this->assertSame ($res, "1c1
<
---
> NEWLINE
");
}
public function test_diff_normal_5 ()
{
// Mode normal
$xdiff = new xdiff ();
$res = $xdiff->diff ("\n", "\n");
$this->assertSame ($res, "");
}
public function test_diff_unified_1 ()
{
// Mode unified
$xdiff = new xdiff ("unified");
$res = $xdiff->diff ($this->string1, $this->string2);
$this->assertSame ($res, "--- Original ".date ("Y-m-d H:i:s.u000 O")."
+++ New ".date ("Y-m-d H:i:s.u001 O")."
@@ -0,0 +1,6 @@
+This is an important
+notice! It should
+therefore be located at
+the beginning of this
+document!
+
@@ -11,5 +16,0 @@
-This paragraph contains
-text that is outdated.
-It will be deleted in the
-near future.
-
@@ -17 +18 @@
-check this dokument. On
+check this document. On
@@ -24,0 +26,4 @@
+
+This paragraph contains
+important new additions
+to this document.
");
}
public function test_diff_unified_2 ()
{
// Mode unified
$xdiff = new xdiff ("unified");
$res = $xdiff->diff ("NEWLINE\n".$this->string1, $this->string2);
$this->assertSame ($res, "--- Original ".date ("Y-m-d H:i:s.u000 O")."
+++ New ".date ("Y-m-d H:i:s.u001 O")."
@@ -1 +1,6 @@
-NEWLINE
+This is an important
+notice! It should
+therefore be located at
+the beginning of this
+document!
+
@@ -12,5 +16,0 @@
-This paragraph contains
-text that is outdated.
-It will be deleted in the
-near future.
-
@@ -18 +18 @@
-check this dokument. On
+check this document. On
@@ -25,0 +26,4 @@
+
+This paragraph contains
+important new additions
+to this document.
");
}
public function test_diff_unified_3 ()
{
$xdiff = new xdiff ("unified");
$res = $xdiff->diff ("NEWLINE\n", "\n");
$this->assertSame ($res, "--- Original ".date ("Y-m-d H:i:s.u000 O")."
+++ New ".date ("Y-m-d H:i:s.u001 O")."
@@ -1 +1 @@
-NEWLINE
+
");
}
public function test_diff_unified_4 ()
{
$xdiff = new xdiff ("unified");
$res = $xdiff->diff ("\n", "NEWLINE\n");
$this->assertSame ($res, "--- Original ".date ("Y-m-d H:i:s.u000 O")."
+++ New ".date ("Y-m-d H:i:s.u001 O")."
@@ -1 +1 @@
-
+NEWLINE
");
}
public function test_diff_unified_5 ()
{
$xdiff = new xdiff ("unified");
$res = $xdiff->diff ("\n", "\n");
$this->assertSame ($res, "");
}
}

425
xdiff.php Normal file
View File

@@ -0,0 +1,425 @@
<?php
/** DomFramework
* @package domframework
* @author Dominique Fournier <dominique@fournier38.fr>
*/
/** Create a diff from two strings, array or files
* The output is compatible with "patch" command.
*/
class xdiff
{
/** The name of file1
*/
private $filename1 = "Original";
/** The name of file2
*/
private $filename2 = "New";
/** The timestamp for file1
*/
private $file1Time;
/** The timestamp for file2
*/
private $file2Time;
/** The output requested
*/
private $output = null;
/** The constructor allow to choose the output.
* @param string $output The output mode [normal|unified]
*/
public function __construct ($output = "normal")
// {{{
{
if (! method_exists ($this, "display".ucfirst ($output)))
throw new \Exception ("Invalid output requested to xdiff", 406);
$this->output = "display".ucfirst ($output);
$this->file1Time = date ("Y-m-d H:i:s.u000 O");
$this->file2Time = date ("Y-m-d H:i:s.u001 O");
}
// }}}
/** Compute the differences between two strings $string1 and $string2
* @param string $string1 The first string to compare
* @param string $string2 The second string to compare
*/
public function diff ($string1, $string2)
// {{{
{
if (! is_string ($string1))
throw new \Exception (
"Invalid string1 provided to diff method : not a string", 406);
if (! is_string ($string2))
throw new \Exception (
"Invalid string2 provided to diff method : not a string", 406);
return $this->diffArray (
preg_split ("#(.*\\R)#", $string1, -1,
PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY),
preg_split ("#(.*\\R)#", $string2, -1,
PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY));
}
// }}}
/** Compute the differences between two files $file1 and $file2
* @param string $file1 The first file to use to compare
* @param string $file2 The second file to use to compare
*/
public function diffFile ($file1, $file2)
// {{{
{
if (! is_string ($file1))
throw new \Exception (
"Invalid file1 provided to diff method : not a string", 406);
if (! is_string ($file2))
throw new \Exception (
"Invalid file2 provided to diff method : not a string", 406);
if (! file_exists ($file1))
throw new \Exception (
"Invalid file1 provided to diff method : file don't exists", 406);
if (! file_exists ($file2))
throw new \Exception (
"Invalid file2 provided to diff method : file don't exists", 406);
if (! is_readable ($file1))
throw new \Exception (
"Invalid file1 provided to diff method : file is not readable", 406);
if (! is_readable ($file2))
throw new \Exception (
"Invalid file2 provided to diff method : file is not readable", 406);
$this->filename1 = $file1;
$this->filename2 = $file2;
$this->file1Time = date ("Y-m-d H:i:s.u000 O",
filemtime ($this->filename1));
$this->file2Time = date ("Y-m-d H:i:s.u001 O",
filemtime ($this->filename2));
return $this->diff (file_get_contents ($file1),
file_get_contents ($file2));
}
// }}}
/** Compute the differences between two arrays $array1 and $array2
* @param array $array1 The first array to compare
* @param array $array2 The second array to compare
*/
public function diffArray ($array1, $array2)
// {{{
{
$diff = $this->computeArray ($array1, $array2);
$method = $this->output;
return $this->$method ($diff);
}
// }}}
/** Compute the differences between two arrays $array1 and $array2
* @param array $array1 The first array to compare
* @param array $array2 The second array to compare
*/
final public function computeArray ($array1, $array2)
// {{{
{
if (! is_array ($array1))
throw new \Exception (
"Invalid array1 provided to diffArray method : not a array", 406);
if (! is_array ($array2))
throw new \Exception (
"Invalid array2 provided to diffArray method : not a array", 406);
$diff = array ();
$i = 0; // $i is the index for $array1
$j = 0; // $j is the index for $array2
while ($i < count ($array1) || $j < count ($array2))
{
$chunk1 = array ();
$chunk2 = array ();
if (key_exists ($i, $array1) && key_exists ($j, $array2) &&
$array1[$i] === $array2[$j])
{
// EQUAL
while (key_exists ($i, $array1) && key_exists ($j, $array2) &&
$array1[$i] === $array2[$j])
{
$chunk1[] = $array1[$i];
$i++;
$j++;
}
$diff[] = array ("type" => "Equal",
"startLine1" => 1 + $i - count ($chunk1),
"endLine1" => $i,
"startLine2" => 1 + $j - count ($chunk1),
"endLine2" => $j,
"length" => count ($chunk1),
"chunk" => $chunk1);
continue;
}
// Generate the chunks
$lcs = $this->lcs ($array1, $array2, $i, $j);
if ($lcs === "")
{
while ($i < count ($array1))
{
if ($array1[$i] === $lcs)
break;
$chunk1[] = $array1[$i];
$i++;
$lcs = $this->lcs ($array1, $array2, $i, $j);
}
while ($j < count ($array2))
{
if ($array2[$j] === $lcs)
break;
$chunk2[] = $array2[$j];
$j++;
$lcs = $this->lcs ($array1, $array2, $i, $j);
}
}
else
{
while ($i < count ($array1))
{
if ($array1[$i] === $lcs)
break;
$chunk1[] = $array1[$i];
$i++;
}
while ($j < count ($array2))
{
if ($array2[$j] === $lcs)
break;
$chunk2[] = $array2[$j];
$j++;
}
}
// Add the diffs by the chunks availability
if (empty ($chunk1) && ! empty ($chunk2))
{
// APPEND
while ($j < count ($array2))
{
if ($array2[$j] === $lcs)
break;
$chunk2[] = $array2[$j];
$j++;
}
$diff[] = array ("type" => "Append",
"startLine1" => $i,
"endLine1" => $i,
"startLine2" => 1 + $j - count ($chunk2),
"endLine2" => $j,
"length" => count ($chunk2),
"chunk" => $chunk2);
}
elseif (! empty ($chunk1) && empty ($chunk2))
{
// DELETE
while ($i < count ($array1))
{
if ($array1[$i] === $lcs)
break;
$chunk1[] = $array1[$i];
$i++;
}
$diff[] = array ("type" => "Delete",
"startLine1" => 1 + $i - count ($chunk1),
"endLine1" => $i,
"startLine2" => $j,
"endLine2" => $j,
"length" => count ($chunk1),
"chunk" => $chunk1);
}
else
{
// CHANGE ON BOTH ARRAY
while ($i < count ($array1))
{
if ($array1[$i] === $lcs)
break;
$chunk1[] = $array1[$i];
$i++;
}
while ($j < count ($array2))
{
if ($array2[$j] === $lcs)
break;
$chunk2[] = $array2[$j];
$j++;
}
$diff[] = array ("type" => "Change",
"startLine1" => 1 + $i - count ($chunk1),
"endLine1" => $i,
"startLine2" => 1 + $j - count ($chunk2),
"endLine2" => $j,
"length1" => count ($chunk1),
"length2" => count ($chunk2),
"chunk1" => $chunk1,
"chunk2" => $chunk2);
}
}
return $diff;
}
// }}}
/** Return a string like "diff -u"
* @param array $diffArray The diff array analyzed by diffArray method
* @return string
*/
private function displayUnified ($diffArray)
// {{{
{
$d = "";
$i = 0 ;
while ($i < count ($diffArray))
{
$diff = $diffArray[$i];
$i++;
if ($diff["type"] === "Equal")
continue;
if ($diff["type"] === "Append")
{
$info = "@@ -";
$info .= $diff["startLine1"];
$info .= ",0";
$info .= " +";
$info .= $diff["startLine2"];
$info .= ",".$diff["length"];
$info .= " @@\n";
$d .= $info;
$d .= "+".implode ("+", $diff["chunk"]);
}
elseif ($diff["type"] === "Delete")
{
$info = "@@ -";
$info .= $diff["startLine1"];
$info .= ",".$diff["length"];
$info .= " +";
$info .= $diff["startLine2"];
$info .= ",0";
$info .= " @@\n";
$d .= $info;
$d .= "-".implode ("-", $diff["chunk"]);
}
elseif ($diff["type"] === "Change")
{
$info = "@@ -";
$info .= $diff["startLine1"];
if ($diff["length1"] !== 1)
$info .= ",".$diff["length1"];
$info .= " +";
$info .= $diff["startLine2"];
if ($diff["length2"] !== 1)
$info .= ",".$diff["length2"];
$info .= " @@\n";
$d .= $info;
$d .= "-".implode ("-", $diff["chunk1"]);
$d .= "+".implode ("+", $diff["chunk2"]);
}
else
throw new \Exception ("Invalid Chunk Type : ".$diff["type"]);
}
if ($d === "")
return $d;
$e = "--- $this->filename1 $this->file1Time\n";
$e .= "+++ $this->filename2 $this->file2Time\n";
return $e.$d;
}
// }}}
/** Return a string like "diff" without parameter
* @param array $diffArray The diff array analyzed by diffArray method
* @return string
*/
private function displayNormal ($diffArray)
// {{{
{
$d = "";
$i = 0 ;
while ($i < count ($diffArray))
{
$diff = $diffArray[$i];
$i++;
if ($diff["type"] === "Equal")
continue;
$info = $diff["startLine1"];
if ($diff["startLine1"] !== $diff["endLine1"])
$info .= ",".$diff["endLine1"];
$info .= "%s";
$info .= $diff["startLine2"];
if ($diff["startLine2"] !== $diff["endLine2"])
$info .= ",".$diff["endLine2"];
$info .= "\n";
if ($diff["type"] === "Append")
{
$d .= sprintf ($info, "a");
$d .= "> ".implode ("> ", $diff["chunk"]);
}
elseif ($diff["type"] === "Delete")
{
$d .= sprintf ($info, "d");
$d .= "< ".implode ("< ", $diff["chunk"]);
}
elseif ($diff["type"] === "Change")
{
$d .= sprintf ($info, "c");
$d .= "< ".implode ("< ", $diff["chunk1"]);
$d .= "---\n";
$d .= "> ".implode ("> ", $diff["chunk2"]);
}
else
throw new \Exception ("Invalid Chunk Type : ".$diff["type"]);
}
return $d;
}
// }}}
/** This function return the next common part between both arrays starting at
* position $i for $array1 and $j for array2
* Return empty string if no common lines was found
* @return string
*/
private function lcs ($array1, $array2, $i, $j)
// {{{
{
$found1 = false;
$found2 = false;
while ($i < count ($array1))
{
$tmp2 = $j;
while ($tmp2 < count ($array2))
{
if ($array1[$i] === $array2[$tmp2] && trim ($array1[$i]) !== "")
{
$found1 = true;
break 2;
}
$tmp2++;
}
$i++;
}
while ($j < count ($array2))
{
$tmp1 = $i;
while ($tmp1 < count ($array1))
{
if ($array2[$j] === $array1[$tmp1] && trim ($array2[$j]) !== "")
{
$found2 = true;
break 2;
}
$tmp1++;
}
$j++;
}
if (! $found1 || ! $found2)
return "";
if ($tmp1 - $i < $tmp2 - $j)
{
return "";
}
return $array1[$i];
}
// }}}
}