From ccee53e0337b14922e35cbfbfc3032875b15aa00 Mon Sep 17 00:00:00 2001 From: Dominique Fournier Date: Mon, 30 Jul 2018 13:15:36 +0000 Subject: [PATCH] * xdiff : Add XDiff support in pure PHP. Allow to see which lines of two text files are modified, like the "diff" command. See https://en.wikipedia.org/wiki/Diff git-svn-id: https://svn.fournier38.fr/svn/ProgSVN/trunk@4310 bf3deb0d-5f1a-0410-827f-c0cc1f45334c --- Tests/xdiffTest.php | 258 +++++++++++++++++++++++++++ xdiff.php | 425 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 683 insertions(+) create mode 100644 Tests/xdiffTest.php create mode 100644 xdiff.php diff --git a/Tests/xdiffTest.php b/Tests/xdiffTest.php new file mode 100644 index 0000000..c349d88 --- /dev/null +++ b/Tests/xdiffTest.php @@ -0,0 +1,258 @@ + + */ + +/** Test the domframework xdiff part */ +class test_xdiff extends PHPUnit_Framework_TestCase +{ + // Declaration of $string1 and $string2 + // Taken from + // {{{ + private $string1 = "This part of the +document has stayed the +same from version to +version. It shouldn't +be shown if it doesn't +change. Otherwise, that +would not be helping to +compress the size of the +changes. + +This paragraph contains +text that is outdated. +It will be deleted in the +near future. + +It is important to spell +check this dokument. On +the other hand, a +misspelled word isn't +the end of the world. +Nothing in the rest of +this paragraph needs to +be changed. Things can +be added after it. +"; + private $string2 = "This is an important +notice! It should +therefore be located at +the beginning of this +document! + +This part of the +document has stayed the +same from version to +version. It shouldn't +be shown if it doesn't +change. Otherwise, that +would not be helping to +compress the size of the +changes. + +It is important to spell +check this document. On +the other hand, a +misspelled word isn't +the end of the world. +Nothing in the rest of +this paragraph needs to +be changed. Things can +be added after it. + +This paragraph contains +important new additions +to this document. +"; + // }}} + + public function test_diff_normal_1 () + { + // Mode normal + $xdiff = new xdiff (); + $res = $xdiff->diff ($this->string1, $this->string2); + $this->assertSame ($res, "0a1,6 +> This is an important +> notice! It should +> therefore be located at +> the beginning of this +> document! +> +11,15d16 +< This paragraph contains +< text that is outdated. +< It will be deleted in the +< near future. +< +17c18 +< check this dokument. On +--- +> check this document. On +24a26,29 +> +> This paragraph contains +> important new additions +> to this document. +"); + } + + public function test_diff_normal_2 () + { + // Mode normal + $xdiff = new xdiff (); + $res = $xdiff->diff ("NEWLINE\n".$this->string1, $this->string2); + $this->assertSame ($res, "1c1,6 +< NEWLINE +--- +> This is an important +> notice! It should +> therefore be located at +> the beginning of this +> document! +> +12,16d16 +< This paragraph contains +< text that is outdated. +< It will be deleted in the +< near future. +< +18c18 +< check this dokument. On +--- +> check this document. On +25a26,29 +> +> This paragraph contains +> important new additions +> to this document. +"); + } + + public function test_diff_normal_3 () + { + // Mode normal + $xdiff = new xdiff (); + $res = $xdiff->diff ("NEWLINE\n", "\n"); + $this->assertSame ($res, "1c1 +< NEWLINE +--- +> +"); + } + + public function test_diff_normal_4 () + { + // Mode normal + $xdiff = new xdiff (); + $res = $xdiff->diff ("\n", "NEWLINE\n"); + $this->assertSame ($res, "1c1 +< +--- +> NEWLINE +"); + } + + public function test_diff_normal_5 () + { + // Mode normal + $xdiff = new xdiff (); + $res = $xdiff->diff ("\n", "\n"); + $this->assertSame ($res, ""); + } + + public function test_diff_unified_1 () + { + // Mode unified + $xdiff = new xdiff ("unified"); + $res = $xdiff->diff ($this->string1, $this->string2); + $this->assertSame ($res, "--- Original ".date ("Y-m-d H:i:s.u000 O")." ++++ New ".date ("Y-m-d H:i:s.u001 O")." +@@ -0,0 +1,6 @@ ++This is an important ++notice! It should ++therefore be located at ++the beginning of this ++document! ++ +@@ -11,5 +16,0 @@ +-This paragraph contains +-text that is outdated. +-It will be deleted in the +-near future. +- +@@ -17 +18 @@ +-check this dokument. On ++check this document. On +@@ -24,0 +26,4 @@ ++ ++This paragraph contains ++important new additions ++to this document. +"); + } + + public function test_diff_unified_2 () + { + // Mode unified + $xdiff = new xdiff ("unified"); + $res = $xdiff->diff ("NEWLINE\n".$this->string1, $this->string2); + $this->assertSame ($res, "--- Original ".date ("Y-m-d H:i:s.u000 O")." ++++ New ".date ("Y-m-d H:i:s.u001 O")." +@@ -1 +1,6 @@ +-NEWLINE ++This is an important ++notice! It should ++therefore be located at ++the beginning of this ++document! ++ +@@ -12,5 +16,0 @@ +-This paragraph contains +-text that is outdated. +-It will be deleted in the +-near future. +- +@@ -18 +18 @@ +-check this dokument. On ++check this document. On +@@ -25,0 +26,4 @@ ++ ++This paragraph contains ++important new additions ++to this document. +"); + } + + public function test_diff_unified_3 () + { + $xdiff = new xdiff ("unified"); + $res = $xdiff->diff ("NEWLINE\n", "\n"); + $this->assertSame ($res, "--- Original ".date ("Y-m-d H:i:s.u000 O")." ++++ New ".date ("Y-m-d H:i:s.u001 O")." +@@ -1 +1 @@ +-NEWLINE ++ +"); + } + + public function test_diff_unified_4 () + { + $xdiff = new xdiff ("unified"); + $res = $xdiff->diff ("\n", "NEWLINE\n"); + $this->assertSame ($res, "--- Original ".date ("Y-m-d H:i:s.u000 O")." ++++ New ".date ("Y-m-d H:i:s.u001 O")." +@@ -1 +1 @@ +- ++NEWLINE +"); + } + + public function test_diff_unified_5 () + { + $xdiff = new xdiff ("unified"); + $res = $xdiff->diff ("\n", "\n"); + $this->assertSame ($res, ""); + } + +} diff --git a/xdiff.php b/xdiff.php new file mode 100644 index 0000000..8f9e885 --- /dev/null +++ b/xdiff.php @@ -0,0 +1,425 @@ + + */ + +/** Create a diff from two strings, array or files + * The output is compatible with "patch" command. + */ +class xdiff +{ + /** The name of file1 + */ + private $filename1 = "Original"; + + /** The name of file2 + */ + private $filename2 = "New"; + + /** The timestamp for file1 + */ + private $file1Time; + + /** The timestamp for file2 + */ + private $file2Time; + + /** The output requested + */ + private $output = null; + + /** The constructor allow to choose the output. + * @param string $output The output mode [normal|unified] + */ + public function __construct ($output = "normal") + // {{{ + { + if (! method_exists ($this, "display".ucfirst ($output))) + throw new \Exception ("Invalid output requested to xdiff", 406); + $this->output = "display".ucfirst ($output); + $this->file1Time = date ("Y-m-d H:i:s.u000 O"); + $this->file2Time = date ("Y-m-d H:i:s.u001 O"); + } + // }}} + + /** Compute the differences between two strings $string1 and $string2 + * @param string $string1 The first string to compare + * @param string $string2 The second string to compare + */ + public function diff ($string1, $string2) + // {{{ + { + if (! is_string ($string1)) + throw new \Exception ( + "Invalid string1 provided to diff method : not a string", 406); + if (! is_string ($string2)) + throw new \Exception ( + "Invalid string2 provided to diff method : not a string", 406); + return $this->diffArray ( + preg_split ("#(.*\\R)#", $string1, -1, + PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY), + preg_split ("#(.*\\R)#", $string2, -1, + PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY)); + } + // }}} + + /** Compute the differences between two files $file1 and $file2 + * @param string $file1 The first file to use to compare + * @param string $file2 The second file to use to compare + */ + public function diffFile ($file1, $file2) + // {{{ + { + if (! is_string ($file1)) + throw new \Exception ( + "Invalid file1 provided to diff method : not a string", 406); + if (! is_string ($file2)) + throw new \Exception ( + "Invalid file2 provided to diff method : not a string", 406); + if (! file_exists ($file1)) + throw new \Exception ( + "Invalid file1 provided to diff method : file don't exists", 406); + if (! file_exists ($file2)) + throw new \Exception ( + "Invalid file2 provided to diff method : file don't exists", 406); + if (! is_readable ($file1)) + throw new \Exception ( + "Invalid file1 provided to diff method : file is not readable", 406); + if (! is_readable ($file2)) + throw new \Exception ( + "Invalid file2 provided to diff method : file is not readable", 406); + $this->filename1 = $file1; + $this->filename2 = $file2; + $this->file1Time = date ("Y-m-d H:i:s.u000 O", + filemtime ($this->filename1)); + $this->file2Time = date ("Y-m-d H:i:s.u001 O", + filemtime ($this->filename2)); + return $this->diff (file_get_contents ($file1), + file_get_contents ($file2)); + } + // }}} + + /** Compute the differences between two arrays $array1 and $array2 + * @param array $array1 The first array to compare + * @param array $array2 The second array to compare + */ + public function diffArray ($array1, $array2) + // {{{ + { + $diff = $this->computeArray ($array1, $array2); + $method = $this->output; + return $this->$method ($diff); + } + // }}} + + /** Compute the differences between two arrays $array1 and $array2 + * @param array $array1 The first array to compare + * @param array $array2 The second array to compare + */ + final public function computeArray ($array1, $array2) + // {{{ + { + if (! is_array ($array1)) + throw new \Exception ( + "Invalid array1 provided to diffArray method : not a array", 406); + if (! is_array ($array2)) + throw new \Exception ( + "Invalid array2 provided to diffArray method : not a array", 406); + $diff = array (); + $i = 0; // $i is the index for $array1 + $j = 0; // $j is the index for $array2 + while ($i < count ($array1) || $j < count ($array2)) + { + $chunk1 = array (); + $chunk2 = array (); + if (key_exists ($i, $array1) && key_exists ($j, $array2) && + $array1[$i] === $array2[$j]) + { + // EQUAL + while (key_exists ($i, $array1) && key_exists ($j, $array2) && + $array1[$i] === $array2[$j]) + { + $chunk1[] = $array1[$i]; + $i++; + $j++; + } + $diff[] = array ("type" => "Equal", + "startLine1" => 1 + $i - count ($chunk1), + "endLine1" => $i, + "startLine2" => 1 + $j - count ($chunk1), + "endLine2" => $j, + "length" => count ($chunk1), + "chunk" => $chunk1); + + continue; + } + // Generate the chunks + $lcs = $this->lcs ($array1, $array2, $i, $j); + if ($lcs === "") + { + while ($i < count ($array1)) + { + if ($array1[$i] === $lcs) + break; + $chunk1[] = $array1[$i]; + $i++; + $lcs = $this->lcs ($array1, $array2, $i, $j); + } + while ($j < count ($array2)) + { + if ($array2[$j] === $lcs) + break; + $chunk2[] = $array2[$j]; + $j++; + $lcs = $this->lcs ($array1, $array2, $i, $j); + } + } + else + { + while ($i < count ($array1)) + { + if ($array1[$i] === $lcs) + break; + $chunk1[] = $array1[$i]; + $i++; + } + while ($j < count ($array2)) + { + if ($array2[$j] === $lcs) + break; + $chunk2[] = $array2[$j]; + $j++; + } + } + // Add the diffs by the chunks availability + if (empty ($chunk1) && ! empty ($chunk2)) + { + // APPEND + while ($j < count ($array2)) + { + if ($array2[$j] === $lcs) + break; + $chunk2[] = $array2[$j]; + $j++; + } + $diff[] = array ("type" => "Append", + "startLine1" => $i, + "endLine1" => $i, + "startLine2" => 1 + $j - count ($chunk2), + "endLine2" => $j, + "length" => count ($chunk2), + "chunk" => $chunk2); + } + elseif (! empty ($chunk1) && empty ($chunk2)) + { + // DELETE + while ($i < count ($array1)) + { + if ($array1[$i] === $lcs) + break; + $chunk1[] = $array1[$i]; + $i++; + } + $diff[] = array ("type" => "Delete", + "startLine1" => 1 + $i - count ($chunk1), + "endLine1" => $i, + "startLine2" => $j, + "endLine2" => $j, + "length" => count ($chunk1), + "chunk" => $chunk1); + } + else + { + // CHANGE ON BOTH ARRAY + while ($i < count ($array1)) + { + if ($array1[$i] === $lcs) + break; + $chunk1[] = $array1[$i]; + $i++; + } + while ($j < count ($array2)) + { + if ($array2[$j] === $lcs) + break; + $chunk2[] = $array2[$j]; + $j++; + } + $diff[] = array ("type" => "Change", + "startLine1" => 1 + $i - count ($chunk1), + "endLine1" => $i, + "startLine2" => 1 + $j - count ($chunk2), + "endLine2" => $j, + "length1" => count ($chunk1), + "length2" => count ($chunk2), + "chunk1" => $chunk1, + "chunk2" => $chunk2); + } + } + return $diff; + } + // }}} + + /** Return a string like "diff -u" + * @param array $diffArray The diff array analyzed by diffArray method + * @return string + */ + private function displayUnified ($diffArray) + // {{{ + { + $d = ""; + $i = 0 ; + while ($i < count ($diffArray)) + { + $diff = $diffArray[$i]; + $i++; + if ($diff["type"] === "Equal") + continue; + + + if ($diff["type"] === "Append") + { + $info = "@@ -"; + $info .= $diff["startLine1"]; + $info .= ",0"; + $info .= " +"; + $info .= $diff["startLine2"]; + $info .= ",".$diff["length"]; + $info .= " @@\n"; + $d .= $info; + $d .= "+".implode ("+", $diff["chunk"]); + } + elseif ($diff["type"] === "Delete") + { + $info = "@@ -"; + $info .= $diff["startLine1"]; + $info .= ",".$diff["length"]; + $info .= " +"; + $info .= $diff["startLine2"]; + $info .= ",0"; + $info .= " @@\n"; + $d .= $info; + $d .= "-".implode ("-", $diff["chunk"]); + } + elseif ($diff["type"] === "Change") + { + $info = "@@ -"; + $info .= $diff["startLine1"]; + if ($diff["length1"] !== 1) + $info .= ",".$diff["length1"]; + $info .= " +"; + $info .= $diff["startLine2"]; + if ($diff["length2"] !== 1) + $info .= ",".$diff["length2"]; + $info .= " @@\n"; + $d .= $info; + $d .= "-".implode ("-", $diff["chunk1"]); + $d .= "+".implode ("+", $diff["chunk2"]); + } + else + throw new \Exception ("Invalid Chunk Type : ".$diff["type"]); + } + if ($d === "") + return $d; + $e = "--- $this->filename1 $this->file1Time\n"; + $e .= "+++ $this->filename2 $this->file2Time\n"; + return $e.$d; + } + // }}} + + /** Return a string like "diff" without parameter + * @param array $diffArray The diff array analyzed by diffArray method + * @return string + */ + private function displayNormal ($diffArray) + // {{{ + { + $d = ""; + $i = 0 ; + while ($i < count ($diffArray)) + { + $diff = $diffArray[$i]; + $i++; + if ($diff["type"] === "Equal") + continue; + $info = $diff["startLine1"]; + if ($diff["startLine1"] !== $diff["endLine1"]) + $info .= ",".$diff["endLine1"]; + $info .= "%s"; + $info .= $diff["startLine2"]; + if ($diff["startLine2"] !== $diff["endLine2"]) + $info .= ",".$diff["endLine2"]; + $info .= "\n"; + if ($diff["type"] === "Append") + { + $d .= sprintf ($info, "a"); + $d .= "> ".implode ("> ", $diff["chunk"]); + } + elseif ($diff["type"] === "Delete") + { + $d .= sprintf ($info, "d"); + $d .= "< ".implode ("< ", $diff["chunk"]); + } + elseif ($diff["type"] === "Change") + { + $d .= sprintf ($info, "c"); + $d .= "< ".implode ("< ", $diff["chunk1"]); + $d .= "---\n"; + $d .= "> ".implode ("> ", $diff["chunk2"]); + } + else + throw new \Exception ("Invalid Chunk Type : ".$diff["type"]); + } + return $d; + } + // }}} + + /** This function return the next common part between both arrays starting at + * position $i for $array1 and $j for array2 + * Return empty string if no common lines was found + * @return string + */ + private function lcs ($array1, $array2, $i, $j) + // {{{ + { + $found1 = false; + $found2 = false; + while ($i < count ($array1)) + { + $tmp2 = $j; + while ($tmp2 < count ($array2)) + { + if ($array1[$i] === $array2[$tmp2] && trim ($array1[$i]) !== "") + { + $found1 = true; + break 2; + } + $tmp2++; + } + $i++; + } + while ($j < count ($array2)) + { + $tmp1 = $i; + while ($tmp1 < count ($array1)) + { + if ($array2[$j] === $array1[$tmp1] && trim ($array2[$j]) !== "") + { + $found2 = true; + break 2; + } + $tmp1++; + } + $j++; + } + if (! $found1 || ! $found2) + return ""; + if ($tmp1 - $i < $tmp2 - $j) + { + return ""; + } + return $array1[$i]; + } + // }}} +}