From e9a59cd4ebf95dfb74d9cba364fc7e7abf17e341 Mon Sep 17 00:00:00 2001
From: vslashg <veloso@verylowsodium.com>
Date: Fri, 20 Sep 2024 20:55:06 +0000
Subject: [PATCH 1/2] Add new functionaltiy for reversing diffs.

ReverseFileDiff() and ReverseMultiFileDiff() accept a diff and return
a diff which is the reverse operation -- that is, a diff that when
applied would undo the edits of the original diff.
---
 diff/reverse.go                          | 151 +++++++++++++++++++++++
 diff/reverse_test.go                     | 146 ++++++++++++++++++++++
 diff/testdata/no_newline_both.reversed   |   5 +
 diff/testdata/no_newline_new.reversed    |   6 +
 diff/testdata/no_newline_orig.reversed   |   4 +
 diff/testdata/sample_file.reversed       |  31 +++++
 diff/testdata/sample_hunks.reversed      |  37 ++++++
 diff/testdata/sample_multi_file.reversed |  63 ++++++++++
 8 files changed, 443 insertions(+)
 create mode 100644 diff/reverse.go
 create mode 100644 diff/reverse_test.go
 create mode 100644 diff/testdata/no_newline_both.reversed
 create mode 100644 diff/testdata/no_newline_new.reversed
 create mode 100644 diff/testdata/no_newline_orig.reversed
 create mode 100644 diff/testdata/sample_file.reversed
 create mode 100644 diff/testdata/sample_hunks.reversed
 create mode 100644 diff/testdata/sample_multi_file.reversed

diff --git a/diff/reverse.go b/diff/reverse.go
new file mode 100644
index 0000000..7ceac7d
--- /dev/null
+++ b/diff/reverse.go
@@ -0,0 +1,151 @@
+package diff
+
+import (
+	"bytes"
+	"errors"
+	"fmt"
+	"regexp"
+)
+
+// ReverseFileDiff takes a diff.FileDiff, and returns the reverse operation.
+// This is a FileDiff that undoes the edit of the original.
+func ReverseFileDiff(fd *FileDiff) (*FileDiff, error) {
+	reverse := FileDiff{
+		OrigName: fd.NewName,
+		OrigTime: fd.NewTime,
+		NewName:  fd.OrigName,
+		NewTime:  fd.OrigTime,
+		Extended: fd.Extended,
+	}
+	for _, hunk := range fd.Hunks {
+		invHunk, err := reverseHunk(hunk)
+		if err != nil {
+			return nil, err
+		}
+		reverse.Hunks = append(reverse.Hunks, invHunk)
+	}
+	return &reverse, nil
+}
+
+// ReverseMultiFileDiff reverses a series of FileDiffs.
+func ReverseMultiFileDiff(fds []*FileDiff) ([]*FileDiff, error) {
+	var reverse []*FileDiff
+	for _, fd := range fds {
+		r, err := ReverseFileDiff(fd)
+		if err != nil {
+			return nil, err
+		}
+		reverse = append(reverse, r)
+	}
+	return reverse, nil
+}
+
+// A subhunk represents a portion of a Hunk.Body, split into three sections.
+// It consists of zero or more context lines, followed by zero or more orig
+// lines and then zero or more new lines.
+//
+// Each line is stored WITHOUT its starting character, but with the newlines
+// included.  The final entry in a section may be missing a trailing newline.
+//
+// A missing newline in orig is represented in a Hunk by OrigNoNewlineAt,
+// but is represented here as a missing newline.
+type subhunk struct {
+	context [][]byte
+	orig    [][]byte
+	new     [][]byte
+}
+
+// reverseHunk converts a Hunk into its reverse operation.
+func reverseHunk(forward *Hunk) (*Hunk, error) {
+	reverse := Hunk{
+		OrigStartLine:   forward.NewStartLine,
+		OrigLines:       forward.NewLines,
+		OrigNoNewlineAt: 0, // we may change this below
+		NewStartLine:    forward.OrigStartLine,
+		NewLines:        forward.OrigLines,
+		Section:         forward.Section,
+		StartPosition:   forward.StartPosition,
+	}
+	subs, err := toSubhunks(forward)
+	if err != nil {
+		return nil, err
+	}
+	for _, sub := range subs {
+		invSub := subhunk{
+			context: sub.context,
+			orig:    sub.new,
+			new:     sub.orig,
+		}
+		for _, line := range invSub.context {
+			reverse.Body = append(reverse.Body, ' ')
+			reverse.Body = append(reverse.Body, line...)
+		}
+		for _, line := range invSub.orig {
+			reverse.Body = append(reverse.Body, '-')
+			reverse.Body = append(reverse.Body, line...)
+		}
+		if len(invSub.orig) > 0 && reverse.Body[len(reverse.Body)-1] != '\n' {
+			// There was a missing newline in `orig`, which we encode in a
+			// hunk with an offset.
+			reverse.Body = append(reverse.Body, '\n')
+			reverse.OrigNoNewlineAt = int32(len(reverse.Body))
+		}
+		for _, line := range invSub.new {
+			reverse.Body = append(reverse.Body, '+')
+			reverse.Body = append(reverse.Body, line...)
+		}
+	}
+	return &reverse, nil
+}
+
+var subhunkLineRe = regexp.MustCompile(`^.[^\n]*(\n|$)`)
+
+func extractLinesStartingWith(from *[]byte, startingWith byte) [][]byte {
+	var lines [][]byte
+	for len(*from) > 0 && (*from)[0] == startingWith {
+		line := subhunkLineRe.Find(*from)
+		lines = append(lines, line[1:])
+		*from = (*from)[len(line):]
+	}
+	return lines
+}
+
+// Extracts the subhunks from a diff.Hunk.
+//
+// This groups a Hunk's buffer into one or more subhunks, matching the conditions
+// of `subhunk` above.  This function groups, strips prefix characters, and strips
+// a newline for `OrigNoNewlineAt` if necessary.
+func toSubhunks(hunk *Hunk) ([]subhunk, error) {
+	var body []byte = hunk.Body
+	var subhunks []subhunk
+	if len(body) == 0 {
+		return nil, nil
+	}
+	for len(body) > 0 {
+		sh := subhunk{
+			context: extractLinesStartingWith(&body, ' '),
+			orig:    extractLinesStartingWith(&body, '-'),
+			new:     extractLinesStartingWith(&body, '+'),
+		}
+		if len(sh.context) == 0 && len(sh.orig) == 0 && len(sh.new) == 0 {
+			// The first line didn't start with any expected prefix.
+			return nil, fmt.Errorf("unexpected character %q at start of line", body[0])
+		}
+		subhunks = append(subhunks, sh)
+	}
+	if hunk.OrigNoNewlineAt > 0 {
+		// The Hunk represents a missing newline at the end of an "orig" line with a
+		// OrigNoNewlineAt index.  We represent it here as an actual missing newline.
+		var lastSubhunk *subhunk = &subhunks[len(subhunks)-1]
+		s := len(lastSubhunk.orig)
+		if s == 0 {
+			return nil, errors.New("inconsistent OrigNoNewlineAt in input")
+		}
+		var cut bool
+		lastSubhunk.orig[s-1], cut = bytes.CutSuffix(lastSubhunk.orig[s-1], []byte("\n"))
+		if !cut {
+			return nil, errors.New("missing newline in input")
+		}
+	}
+	return subhunks, nil
+}
diff --git a/diff/reverse_test.go b/diff/reverse_test.go
new file mode 100644
index 0000000..313ac6a
--- /dev/null
+++ b/diff/reverse_test.go
@@ -0,0 +1,146 @@
+package diff
+
+import (
+	"bytes"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+)
+
+func TestReverseHunks(t *testing.T) {
+	tests := []struct {
+		inputFile string
+		wantFile  string
+	}{
+		{
+			inputFile: "sample_hunks.diff",
+			wantFile:  "sample_hunks.reversed",
+		},
+		{
+			inputFile: "no_newline_new.diff",
+			wantFile:  "no_newline_new.reversed",
+		},
+		{
+			inputFile: "no_newline_orig.diff",
+			wantFile:  "no_newline_orig.reversed",
+		},
+		{
+			inputFile: "no_newline_both.diff",
+			wantFile:  "no_newline_both.reversed",
+		},
+	}
+	for _, test := range tests {
+		inputData, err := os.ReadFile(filepath.Join("testdata", test.inputFile))
+		if err != nil {
+			t.Fatal(err)
+		}
+		wantData, err := os.ReadFile(filepath.Join("testdata", test.wantFile))
+		if err != nil {
+			t.Fatal(err)
+		}
+		input, err := ParseHunks(inputData)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		var reversed []*Hunk
+		for _, in := range input {
+			out, err := reverseHunk(in)
+			if err != nil {
+				// This should only fail if the Hunk data structure is inconsistent
+				t.Errorf("%s: Unexpected reverseHunk() error: %s", test.inputFile, err)
+			}
+			reversed = append(reversed, out)
+		}
+		gotData, err := PrintHunks(reversed)
+		if err != nil {
+			t.Errorf("%s: PrintHunks of reversed data: %s", test.inputFile, err)
+		}
+		if !bytes.Equal(wantData, gotData) {
+			t.Errorf("%s: Reversed hunk does not match expected.\nWant vs got:\n%s",
+				test.inputFile, cmp.Diff(wantData, gotData))
+		}
+	}
+}
+
+func TestReverseFileDiff(t *testing.T) {
+	tests := []struct {
+		inputFile string
+		wantFile  string
+	}{
+		{
+			inputFile: "sample_file.diff",
+			wantFile:  "sample_file.reversed",
+		},
+	}
+	for _, test := range tests {
+		inputData, err := os.ReadFile(filepath.Join("testdata", test.inputFile))
+		if err != nil {
+			t.Fatal(err)
+		}
+		wantData, err := os.ReadFile(filepath.Join("testdata", test.wantFile))
+		if err != nil {
+			t.Fatal(err)
+		}
+		input, err := ParseFileDiff(inputData)
+		if err != nil {
+			t.Fatal(err)
+		}
+		reversed, err := ReverseFileDiff(input)
+		if err != nil {
+			t.Errorf("%s: ReverseFileDiff: %s", test.inputFile, err)
+		}
+		gotData, err := PrintFileDiff(reversed)
+		if err != nil {
+			t.Errorf("%s: PrintFileDiff of reversed data: %s", test.inputFile, err)
+		}
+		if !bytes.Equal(wantData, gotData) {
+			t.Errorf("%s: Reversed diff does not match expected.\nWant vs got:\n%s",
+				test.inputFile, cmp.Diff(wantData, gotData))
+		}
+	}
+}
+
+func TestReverseMultiFileDiff(t *testing.T) {
+	tests := []struct {
+		inputFile string
+		wantFile  string
+	}{
+		{
+			inputFile: "sample_file.diff",
+			wantFile:  "sample_file.reversed",
+		},
+		{
+			inputFile: "sample_multi_file.diff",
+			wantFile:  "sample_multi_file.reversed",
+		},
+	}
+	for _, test := range tests {
+		inputData, err := os.ReadFile(filepath.Join("testdata", test.inputFile))
+		if err != nil {
+			t.Fatal(err)
+		}
+		wantData, err := os.ReadFile(filepath.Join("testdata", test.wantFile))
+		if err != nil {
+			t.Fatal(err)
+		}
+		input, err := ParseMultiFileDiff(inputData)
+		if err != nil {
+			t.Fatal(err)
+		}
+		reversed, err := ReverseMultiFileDiff(input)
+		if err != nil {
+			t.Errorf("%s: ReverseMultiFileDiff: %s", test.inputFile, err)
+		}
+		gotData, err := PrintMultiFileDiff(reversed)
+		if err != nil {
+			t.Errorf("%s: PrintMultiFileDiff of reversed data: %s", test.inputFile, err)
+		}
+		if !bytes.Equal(wantData, gotData) {
+			t.Errorf("%s: Reversed diff does not match expected.\nWant vs got:\n%s",
+				test.inputFile, cmp.Diff(wantData, gotData))
+		}
+	}
+}
diff --git a/diff/testdata/no_newline_both.reversed b/diff/testdata/no_newline_both.reversed
new file mode 100644
index 0000000..52ab7bc
--- /dev/null
+++ b/diff/testdata/no_newline_both.reversed
@@ -0,0 +1,5 @@
+@@ -1,1 +1,1 @@
+-b
+\ No newline at end of file
++a
+\ No newline at end of file
diff --git a/diff/testdata/no_newline_new.reversed b/diff/testdata/no_newline_new.reversed
new file mode 100644
index 0000000..443aa8a
--- /dev/null
+++ b/diff/testdata/no_newline_new.reversed
@@ -0,0 +1,6 @@
+@@ -1,2 +1,3 @@
+ a
+-a
+\ No newline at end of file
++a
++a
diff --git a/diff/testdata/no_newline_orig.reversed b/diff/testdata/no_newline_orig.reversed
new file mode 100644
index 0000000..df02b0c
--- /dev/null
+++ b/diff/testdata/no_newline_orig.reversed
@@ -0,0 +1,4 @@
+@@ -1,1 +1,1 @@
+-b
++a
+\ No newline at end of file
diff --git a/diff/testdata/sample_file.reversed b/diff/testdata/sample_file.reversed
new file mode 100644
index 0000000..90eb49b
--- /dev/null
+++ b/diff/testdata/sample_file.reversed
@@ -0,0 +1,31 @@
+--- newname	2009-10-11 15:12:30.000000000 +0000
++++ oldname	2009-10-11 15:12:20.000000000 +0000
+@@ -1,9 +1,3 @@
+-This is an important
+-notice! It should
+-therefore be located at
+-the beginning of this
+-document!
+-
+ This part of the
+ document has stayed the
+ same from version to
+@@ -11,10 +5,16 @@
+ be shown if it doesn't
+ change.  Otherwise, that
+ would not be helping to
+-compress anything.
++compress the size of the
++changes.
++
++This paragraph contains
++text that is outdated.
++It will be deleted in the
++near future.
+ 
+ It is important to spell
+-check this document. On
++check this dokument. On
+ the other hand, a
+ misspelled word isn't
+ the end of the world.
diff --git a/diff/testdata/sample_hunks.reversed b/diff/testdata/sample_hunks.reversed
new file mode 100644
index 0000000..1ac7d8a
--- /dev/null
+++ b/diff/testdata/sample_hunks.reversed
@@ -0,0 +1,37 @@
+@@ -1,9 +1,3 @@ Section Header
+-This is an important
+-notice! It should
+-therefore be located at
+-the beginning of this
+-document!
+-
+ This part of the
+ document has stayed the
+ same from version to
+@@ -11,10 +5,16 @@
+ be shown if it doesn't
+ change.  Otherwise, that
+ would not be helping to
+-compress anything.
++compress the size of the
++changes.
++
++This paragraph contains
++text that is outdated.
++It will be deleted in the
++near future.
+ 
+ It is important to spell
+-check this document. On
++check this dokument. On
+ the other hand, a
+ misspelled word isn't
+ the end of the world.
+@@ -22,7 +22,3 @@
+ this paragraph needs to
+ be changed. Things can
+ be added after it.
+-
+-This paragraph contains
+-important new additions
+-to this document.
diff --git a/diff/testdata/sample_multi_file.reversed b/diff/testdata/sample_multi_file.reversed
new file mode 100644
index 0000000..283f8e4
--- /dev/null
+++ b/diff/testdata/sample_multi_file.reversed
@@ -0,0 +1,63 @@
+diff --ruN a/oldname1 b/newname1
+old mode 0777
+new mode 0755
+--- newname1	2009-10-11 15:12:30.000000000 +0000
++++ oldname1	2009-10-11 15:12:20.000000000 +0000
+@@ -1,9 +1,3 @@
+-This is an important
+-notice! It should
+-therefore be located at
+-the beginning of this
+-document!
+-
+ This part of the
+ document has stayed the
+ same from version to
+@@ -11,10 +5,16 @@
+ be shown if it doesn't
+ change.  Otherwise, that
+ would not be helping to
+-compress anything.
++compress the size of the
++changes.
++
++This paragraph contains
++text that is outdated.
++It will be deleted in the
++near future.
+ 
+ It is important to spell
+-check this document. On
++check this dokument. On
+diff --ruN a/oldname2 b/newname2
+--- newname2	2009-10-11 15:12:30.000000000 +0000
++++ oldname2	2009-10-11 15:12:20.000000000 +0000
+@@ -1,9 +1,3 @@
+-This is an important
+-notice! It should
+-therefore be located at
+-the beginning of this
+-document!
+-
+ This part of the
+ document has stayed the
+ same from version to
+@@ -11,10 +5,16 @@
+ be shown if it doesn't
+ change.  Otherwise, that
+ would not be helping to
+-compress anything.
++compress the size of the
++changes.
++
++This paragraph contains
++text that is outdated.
++It will be deleted in the
++near future.
+ 
+ It is important to spell
+-check this document. On
++check this dokument. On
+ the other hand, a
+ misspelled word isn't
+ the end of the world.

From 6f1b6f42b07a82a1df98d78757c53c7f165e6a4c Mon Sep 17 00:00:00 2001
From: Keegan Carruthers-Smith <keegan.csmith@gmail.com>
Date: Thu, 2 Apr 2026 15:24:42 +0200
Subject: [PATCH 2/2] Fix reverse diff round-trip handling

Amp-Thread-ID: https://ampcode.com/threads/T-019d4e56-85e0-7468-8797-0b8c811d37af
Co-authored-by: Amp <amp@ampcode.com>
---
 diff/reverse.go      | 59 ++++++++++++++++++++++++++++++------
 diff/reverse_test.go | 72 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 122 insertions(+), 9 deletions(-)

diff --git a/diff/reverse.go b/diff/reverse.go
index 7ceac7d..87715ef 100644
--- a/diff/reverse.go
+++ b/diff/reverse.go
@@ -4,7 +4,6 @@ import (
 	"bytes"
 	"errors"
 	"fmt"
-	"regexp"
 )
 
 // ReverseFileDiff takes a diff.FileDiff, and returns the reverse operation.
@@ -49,8 +48,13 @@ func ReverseMultiFileDiff(fds []*FileDiff) ([]*FileDiff, error) {
 //
 // A missing newline in orig is represented in a Hunk by OrigNoNewlineAt,
 // but is represented here as a missing newline.
+type contextLine struct {
+	body []byte
+	bare bool
+}
+
 type subhunk struct {
-	context [][]byte
+	context []contextLine
 	orig    [][]byte
 	new     [][]byte
 }
@@ -77,8 +81,12 @@ func reverseHunk(forward *Hunk) (*Hunk, error) {
 			new:     sub.orig,
 		}
 		for _, line := range invSub.context {
+			if line.bare {
+				reverse.Body = append(reverse.Body, line.body...)
+				continue
+			}
 			reverse.Body = append(reverse.Body, ' ')
-			reverse.Body = append(reverse.Body, line...)
+			reverse.Body = append(reverse.Body, line.body...)
 		}
 		for _, line := range invSub.orig {
 			reverse.Body = append(reverse.Body, '-')
@@ -98,14 +106,47 @@ func reverseHunk(forward *Hunk) (*Hunk, error) {
 	return &reverse, nil
 }
 
-var subhunkLineRe = regexp.MustCompile(`^.[^\n]*(\n|$)`)
+func extractContextLines(from *[]byte) []contextLine {
+	var lines []contextLine
+	for len(*from) > 0 {
+		if (*from)[0] == '\n' {
+			lines = append(lines, contextLine{body: []byte{'\n'}, bare: true})
+			*from = (*from)[1:]
+			continue
+		}
+		if (*from)[0] != ' ' {
+			break
+		}
+
+		newline := bytes.IndexByte(*from, '\n')
+		if newline < 0 {
+			lines = append(lines, contextLine{body: (*from)[1:]})
+			*from = nil
+			continue
+		}
+
+		lines = append(lines, contextLine{body: (*from)[1 : newline+1]})
+		*from = (*from)[newline+1:]
+	}
+	return lines
+}
 
 func extractLinesStartingWith(from *[]byte, startingWith byte) [][]byte {
 	var lines [][]byte
-	for len(*from) > 0 && (*from)[0] == startingWith {
-		line := subhunkLineRe.Find(*from)
-		lines = append(lines, line[1:])
-		*from = (*from)[len(line):]
+	for len(*from) > 0 {
+		if (*from)[0] != startingWith {
+			break
+		}
+
+		newline := bytes.IndexByte(*from, '\n')
+		if newline < 0 {
+			lines = append(lines, (*from)[1:])
+			*from = nil
+			continue
+		}
+
+		lines = append(lines, (*from)[1:newline+1])
+		*from = (*from)[newline+1:]
 	}
 	return lines
 }
@@ -123,7 +164,7 @@ func toSubhunks(hunk *Hunk) ([]subhunk, error) {
 	}
 	for len(body) > 0 {
 		sh := subhunk{
-			context: extractLinesStartingWith(&body, ' '),
+			context: extractContextLines(&body),
 			orig:    extractLinesStartingWith(&body, '-'),
 			new:     extractLinesStartingWith(&body, '+'),
 		}
diff --git a/diff/reverse_test.go b/diff/reverse_test.go
index 313ac6a..a656e54 100644
--- a/diff/reverse_test.go
+++ b/diff/reverse_test.go
@@ -144,3 +144,75 @@ func TestReverseMultiFileDiff(t *testing.T) {
 		}
 	}
 }
+
+func TestReverseRoundTripOnTestdata(t *testing.T) {
+	fixtures, err := filepath.Glob(filepath.Join("testdata", "*.diff"))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	skipped := map[string]bool{
+		"empty.diff":            true,
+		"empty_new.diff":        true,
+		"empty_orig.diff":       true,
+		"sample_bad_hunks.diff": true,
+	}
+
+	for _, fixture := range fixtures {
+		fixture := fixture
+		name := filepath.Base(fixture)
+
+		t.Run(name, func(t *testing.T) {
+			data, err := os.ReadFile(fixture)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			if fileDiffs, err := ParseMultiFileDiff(data); err == nil && len(fileDiffs) > 0 {
+				reversed, err := ReverseMultiFileDiff(fileDiffs)
+				if err != nil {
+					t.Fatalf("first reverse: %s", err)
+				}
+				roundTrip, err := ReverseMultiFileDiff(reversed)
+				if err != nil {
+					t.Fatalf("second reverse: %s", err)
+				}
+				if diff := cmp.Diff(fileDiffs, roundTrip); diff != "" {
+					t.Fatalf("double reverse did not restore original file diffs:\n%s", diff)
+				}
+				return
+			}
+
+			if hunks, err := ParseHunks(data); err == nil && len(hunks) > 0 {
+				var reversed []*Hunk
+				for _, hunk := range hunks {
+					inv, err := reverseHunk(hunk)
+					if err != nil {
+						t.Fatalf("first reverse: %s", err)
+					}
+					reversed = append(reversed, inv)
+				}
+
+				var roundTrip []*Hunk
+				for _, hunk := range reversed {
+					inv, err := reverseHunk(hunk)
+					if err != nil {
+						t.Fatalf("second reverse: %s", err)
+					}
+					roundTrip = append(roundTrip, inv)
+				}
+
+				if diff := cmp.Diff(hunks, roundTrip); diff != "" {
+					t.Fatalf("double reverse did not restore original hunks:\n%s", diff)
+				}
+				return
+			}
+
+			if skipped[name] {
+				return
+			}
+
+			t.Fatalf("fixture did not contain parseable file diffs or hunks")
+		})
+	}
+}