@@ -1294,6 +1294,46 @@ public void diff_cleanupMerge(LinkedList<Diff> diffs) {
12941294    }
12951295  }
12961296
1297+   /** 
1298+    * Rearrange diff boudnaries that split Unicode surrogate pairs. 
1299+    * @param diffs Linked list of diff objects 
1300+    */ 
1301+   public  void  diff_cleanupSplitSurrogates (List <Diff > diffs ) {
1302+     char  lastEnd  = 0 ;
1303+     boolean  isFirst  = true ;
1304+     HashSet <Diff > toRemove  = new  HashSet <Diff >();
1305+ 
1306+     for  (Diff  aDiff  : diffs ) {
1307+       if  (aDiff .text .isEmpty ()) {
1308+         toRemove .add (aDiff );
1309+         continue ;
1310+       }
1311+ 
1312+       char  thisTop  = aDiff .text .charAt (0 );
1313+       char  thisEnd  = aDiff .text .charAt (aDiff .text .length () - 1 );
1314+ 
1315+       if  (Character .isHighSurrogate (thisEnd )) {
1316+         lastEnd  = thisEnd ;
1317+         aDiff .text  = aDiff .text .substring (0 , aDiff .text .length () - 1 );
1318+       }
1319+ 
1320+       if  (!isFirst  && Character .isHighSurrogate (lastEnd ) && Character .isLowSurrogate (thisTop )) {
1321+         aDiff .text  = lastEnd  + aDiff .text ;
1322+       }
1323+       
1324+       isFirst  = false ;
1325+ 
1326+       if  ( aDiff .text .isEmpty () ) {
1327+         toRemove .add (aDiff );
1328+         continue ;
1329+       }
1330+     }
1331+ 
1332+     for  (Diff  aDiff  : toRemove ) {
1333+       diffs .remove (aDiff );
1334+     }
1335+   }
1336+ 
12971337  /** 
12981338   * loc is a location in text1, compute and return the equivalent location in 
12991339   * text2. 
@@ -1430,31 +1470,8 @@ public int diff_levenshtein(List<Diff> diffs) {
14301470   */ 
14311471  public  String  diff_toDelta (List <Diff > diffs ) {
14321472    StringBuilder  text  = new  StringBuilder ();
1433-     char  lastEnd  = 0 ;
1434-     boolean  isFirst  = true ;
1473+     this .diff_cleanupSplitSurrogates (diffs );
14351474    for  (Diff  aDiff  : diffs ) {
1436-       if  (aDiff .text .isEmpty ()) {
1437-         continue ;
1438-       }
1439- 
1440-       char  thisTop  = aDiff .text .charAt (0 );
1441-       char  thisEnd  = aDiff .text .charAt (aDiff .text .length () - 1 );
1442- 
1443-       if  (Character .isHighSurrogate (thisEnd )) {
1444-         lastEnd  = thisEnd ;
1445-         aDiff .text  = aDiff .text .substring (0 , aDiff .text .length () - 1 );
1446-       }
1447- 
1448-       if  (! isFirst  && Character .isHighSurrogate (lastEnd ) && Character .isLowSurrogate (thisTop )) {
1449-         aDiff .text  = lastEnd  + aDiff .text ;
1450-       }
1451- 
1452-       isFirst  = false ;
1453- 
1454-       if  ( aDiff .text .isEmpty () ) {
1455-         continue ;
1456-       }
1457- 
14581475      switch  (aDiff .operation ) {
14591476      case  INSERT :
14601477        try  {
0 commit comments