From e9a59cd4ebf95dfb74d9cba364fc7e7abf17e341 Mon Sep 17 00:00:00 2001 From: vslashg Date: Fri, 20 Sep 2024 20:55:06 +0000 Subject: [PATCH 1/2] Add new functionaltiy for reversing diffs. ReverseFileDiff() and ReverseMultiFileDiff() accept a diff and return a diff which is the reverse operation -- that is, a diff that when applied would undo the edits of the original diff. --- diff/reverse.go | 151 +++++++++++++++++++++++ diff/reverse_test.go | 146 ++++++++++++++++++++++ diff/testdata/no_newline_both.reversed | 5 + diff/testdata/no_newline_new.reversed | 6 + diff/testdata/no_newline_orig.reversed | 4 + diff/testdata/sample_file.reversed | 31 +++++ diff/testdata/sample_hunks.reversed | 37 ++++++ diff/testdata/sample_multi_file.reversed | 63 ++++++++++ 8 files changed, 443 insertions(+) create mode 100644 diff/reverse.go create mode 100644 diff/reverse_test.go create mode 100644 diff/testdata/no_newline_both.reversed create mode 100644 diff/testdata/no_newline_new.reversed create mode 100644 diff/testdata/no_newline_orig.reversed create mode 100644 diff/testdata/sample_file.reversed create mode 100644 diff/testdata/sample_hunks.reversed create mode 100644 diff/testdata/sample_multi_file.reversed diff --git a/diff/reverse.go b/diff/reverse.go new file mode 100644 index 0000000..7ceac7d --- /dev/null +++ b/diff/reverse.go @@ -0,0 +1,151 @@ +package diff + +import ( + "bytes" + "errors" + "fmt" + "regexp" +) + +// ReverseFileDiff takes a diff.FileDiff, and returns the reverse operation. +// This is a FileDiff that undoes the edit of the original. +func ReverseFileDiff(fd *FileDiff) (*FileDiff, error) { + reverse := FileDiff{ + OrigName: fd.NewName, + OrigTime: fd.NewTime, + NewName: fd.OrigName, + NewTime: fd.OrigTime, + Extended: fd.Extended, + } + for _, hunk := range fd.Hunks { + invHunk, err := reverseHunk(hunk) + if err != nil { + return nil, err + } + reverse.Hunks = append(reverse.Hunks, invHunk) + } + return &reverse, nil +} + +// ReverseMultiFileDiff reverses a series of FileDiffs. +func ReverseMultiFileDiff(fds []*FileDiff) ([]*FileDiff, error) { + var reverse []*FileDiff + for _, fd := range fds { + r, err := ReverseFileDiff(fd) + if err != nil { + return nil, err + } + reverse = append(reverse, r) + } + return reverse, nil +} + +// A subhunk represents a portion of a Hunk.Body, split into three sections. +// It consists of zero or more context lines, followed by zero or more orig +// lines and then zero or more new lines. +// +// Each line is stored WITHOUT its starting character, but with the newlines +// included. The final entry in a section may be missing a trailing newline. +// +// A missing newline in orig is represented in a Hunk by OrigNoNewlineAt, +// but is represented here as a missing newline. +type subhunk struct { + context [][]byte + orig [][]byte + new [][]byte +} + +// reverseHunk converts a Hunk into its reverse operation. +func reverseHunk(forward *Hunk) (*Hunk, error) { + reverse := Hunk{ + OrigStartLine: forward.NewStartLine, + OrigLines: forward.NewLines, + OrigNoNewlineAt: 0, // we may change this below + NewStartLine: forward.OrigStartLine, + NewLines: forward.OrigLines, + Section: forward.Section, + StartPosition: forward.StartPosition, + } + subs, err := toSubhunks(forward) + if err != nil { + return nil, err + } + for _, sub := range subs { + invSub := subhunk{ + context: sub.context, + orig: sub.new, + new: sub.orig, + } + for _, line := range invSub.context { + reverse.Body = append(reverse.Body, ' ') + reverse.Body = append(reverse.Body, line...) + } + for _, line := range invSub.orig { + reverse.Body = append(reverse.Body, '-') + reverse.Body = append(reverse.Body, line...) + } + if len(invSub.orig) > 0 && reverse.Body[len(reverse.Body)-1] != '\n' { + // There was a missing newline in `orig`, which we encode in a + // hunk with an offset. + reverse.Body = append(reverse.Body, '\n') + reverse.OrigNoNewlineAt = int32(len(reverse.Body)) + } + for _, line := range invSub.new { + reverse.Body = append(reverse.Body, '+') + reverse.Body = append(reverse.Body, line...) + } + } + return &reverse, nil +} + +var subhunkLineRe = regexp.MustCompile(`^.[^\n]*(\n|$)`) + +func extractLinesStartingWith(from *[]byte, startingWith byte) [][]byte { + var lines [][]byte + for len(*from) > 0 && (*from)[0] == startingWith { + line := subhunkLineRe.Find(*from) + lines = append(lines, line[1:]) + *from = (*from)[len(line):] + } + return lines +} + +// Extracts the subhunks from a diff.Hunk. +// +// This groups a Hunk's buffer into one or more subhunks, matching the conditions +// of `subhunk` above. This function groups, strips prefix characters, and strips +// a newline for `OrigNoNewlineAt` if necessary. +func toSubhunks(hunk *Hunk) ([]subhunk, error) { + var body []byte = hunk.Body + var subhunks []subhunk + if len(body) == 0 { + return nil, nil + } + for len(body) > 0 { + sh := subhunk{ + context: extractLinesStartingWith(&body, ' '), + orig: extractLinesStartingWith(&body, '-'), + new: extractLinesStartingWith(&body, '+'), + } + if len(sh.context) == 0 && len(sh.orig) == 0 && len(sh.new) == 0 { + // The first line didn't start with any expected prefix. + return nil, fmt.Errorf("unexpected character %q at start of line", body[0]) + } + subhunks = append(subhunks, sh) + } + if hunk.OrigNoNewlineAt > 0 { + // The Hunk represents a missing newline at the end of an "orig" line with a + // OrigNoNewlineAt index. We represent it here as an actual missing newline. + var lastSubhunk *subhunk = &subhunks[len(subhunks)-1] + s := len(lastSubhunk.orig) + if s == 0 { + return nil, errors.New("inconsistent OrigNoNewlineAt in input") + } + var cut bool + lastSubhunk.orig[s-1], cut = bytes.CutSuffix(lastSubhunk.orig[s-1], []byte("\n")) + if !cut { + return nil, errors.New("missing newline in input") + } + } + return subhunks, nil +} diff --git a/diff/reverse_test.go b/diff/reverse_test.go new file mode 100644 index 0000000..313ac6a --- /dev/null +++ b/diff/reverse_test.go @@ -0,0 +1,146 @@ +package diff + +import ( + "bytes" + "os" + "path/filepath" + "testing" + + "github.com/google/go-cmp/cmp" +) + +func TestReverseHunks(t *testing.T) { + tests := []struct { + inputFile string + wantFile string + }{ + { + inputFile: "sample_hunks.diff", + wantFile: "sample_hunks.reversed", + }, + { + inputFile: "no_newline_new.diff", + wantFile: "no_newline_new.reversed", + }, + { + inputFile: "no_newline_orig.diff", + wantFile: "no_newline_orig.reversed", + }, + { + inputFile: "no_newline_both.diff", + wantFile: "no_newline_both.reversed", + }, + } + for _, test := range tests { + inputData, err := os.ReadFile(filepath.Join("testdata", test.inputFile)) + if err != nil { + t.Fatal(err) + } + wantData, err := os.ReadFile(filepath.Join("testdata", test.wantFile)) + if err != nil { + t.Fatal(err) + } + input, err := ParseHunks(inputData) + if err != nil { + t.Fatal(err) + } + + var reversed []*Hunk + for _, in := range input { + out, err := reverseHunk(in) + if err != nil { + // This should only fail if the Hunk data structure is inconsistent + t.Errorf("%s: Unexpected reverseHunk() error: %s", test.inputFile, err) + } + reversed = append(reversed, out) + } + gotData, err := PrintHunks(reversed) + if err != nil { + t.Errorf("%s: PrintHunks of reversed data: %s", test.inputFile, err) + } + if !bytes.Equal(wantData, gotData) { + t.Errorf("%s: Reversed hunk does not match expected.\nWant vs got:\n%s", + test.inputFile, cmp.Diff(wantData, gotData)) + } + } +} + +func TestReverseFileDiff(t *testing.T) { + tests := []struct { + inputFile string + wantFile string + }{ + { + inputFile: "sample_file.diff", + wantFile: "sample_file.reversed", + }, + } + for _, test := range tests { + inputData, err := os.ReadFile(filepath.Join("testdata", test.inputFile)) + if err != nil { + t.Fatal(err) + } + wantData, err := os.ReadFile(filepath.Join("testdata", test.wantFile)) + if err != nil { + t.Fatal(err) + } + input, err := ParseFileDiff(inputData) + if err != nil { + t.Fatal(err) + } + reversed, err := ReverseFileDiff(input) + if err != nil { + t.Errorf("%s: ReverseFileDiff: %s", test.inputFile, err) + } + gotData, err := PrintFileDiff(reversed) + if err != nil { + t.Errorf("%s: PrintFileDiff of reversed data: %s", test.inputFile, err) + } + if !bytes.Equal(wantData, gotData) { + t.Errorf("%s: Reversed diff does not match expected.\nWant vs got:\n%s", + test.inputFile, cmp.Diff(wantData, gotData)) + } + } +} + +func TestReverseMultiFileDiff(t *testing.T) { + tests := []struct { + inputFile string + wantFile string + }{ + { + inputFile: "sample_file.diff", + wantFile: "sample_file.reversed", + }, + { + inputFile: "sample_multi_file.diff", + wantFile: "sample_multi_file.reversed", + }, + } + for _, test := range tests { + inputData, err := os.ReadFile(filepath.Join("testdata", test.inputFile)) + if err != nil { + t.Fatal(err) + } + wantData, err := os.ReadFile(filepath.Join("testdata", test.wantFile)) + if err != nil { + t.Fatal(err) + } + input, err := ParseMultiFileDiff(inputData) + if err != nil { + t.Fatal(err) + } + reversed, err := ReverseMultiFileDiff(input) + if err != nil { + t.Errorf("%s: ReverseMultiFileDiff: %s", test.inputFile, err) + } + gotData, err := PrintMultiFileDiff(reversed) + if err != nil { + t.Errorf("%s: PrintMultiFileDiff of reversed data: %s", test.inputFile, err) + } + if !bytes.Equal(wantData, gotData) { + t.Errorf("%s: Reversed diff does not match expected.\nWant vs got:\n%s", + test.inputFile, cmp.Diff(wantData, gotData)) + } + } +} diff --git a/diff/testdata/no_newline_both.reversed b/diff/testdata/no_newline_both.reversed new file mode 100644 index 0000000..52ab7bc --- /dev/null +++ b/diff/testdata/no_newline_both.reversed @@ -0,0 +1,5 @@ +@@ -1,1 +1,1 @@ +-b +\ No newline at end of file ++a +\ No newline at end of file diff --git a/diff/testdata/no_newline_new.reversed b/diff/testdata/no_newline_new.reversed new file mode 100644 index 0000000..443aa8a --- /dev/null +++ b/diff/testdata/no_newline_new.reversed @@ -0,0 +1,6 @@ +@@ -1,2 +1,3 @@ + a +-a +\ No newline at end of file ++a ++a diff --git a/diff/testdata/no_newline_orig.reversed b/diff/testdata/no_newline_orig.reversed new file mode 100644 index 0000000..df02b0c --- /dev/null +++ b/diff/testdata/no_newline_orig.reversed @@ -0,0 +1,4 @@ +@@ -1,1 +1,1 @@ +-b ++a +\ No newline at end of file diff --git a/diff/testdata/sample_file.reversed b/diff/testdata/sample_file.reversed new file mode 100644 index 0000000..90eb49b --- /dev/null +++ b/diff/testdata/sample_file.reversed @@ -0,0 +1,31 @@ +--- newname 2009-10-11 15:12:30.000000000 +0000 ++++ oldname 2009-10-11 15:12:20.000000000 +0000 +@@ -1,9 +1,3 @@ +-This is an important +-notice! It should +-therefore be located at +-the beginning of this +-document! +- + This part of the + document has stayed the + same from version to +@@ -11,10 +5,16 @@ + be shown if it doesn't + change. Otherwise, that + would not be helping to +-compress anything. ++compress the size of the ++changes. ++ ++This paragraph contains ++text that is outdated. ++It will be deleted in the ++near future. + + It is important to spell +-check this document. On ++check this dokument. On + the other hand, a + misspelled word isn't + the end of the world. diff --git a/diff/testdata/sample_hunks.reversed b/diff/testdata/sample_hunks.reversed new file mode 100644 index 0000000..1ac7d8a --- /dev/null +++ b/diff/testdata/sample_hunks.reversed @@ -0,0 +1,37 @@ +@@ -1,9 +1,3 @@ Section Header +-This is an important +-notice! It should +-therefore be located at +-the beginning of this +-document! +- + This part of the + document has stayed the + same from version to +@@ -11,10 +5,16 @@ + be shown if it doesn't + change. Otherwise, that + would not be helping to +-compress anything. ++compress the size of the ++changes. ++ ++This paragraph contains ++text that is outdated. ++It will be deleted in the ++near future. + + It is important to spell +-check this document. On ++check this dokument. On + the other hand, a + misspelled word isn't + the end of the world. +@@ -22,7 +22,3 @@ + this paragraph needs to + be changed. Things can + be added after it. +- +-This paragraph contains +-important new additions +-to this document. diff --git a/diff/testdata/sample_multi_file.reversed b/diff/testdata/sample_multi_file.reversed new file mode 100644 index 0000000..283f8e4 --- /dev/null +++ b/diff/testdata/sample_multi_file.reversed @@ -0,0 +1,63 @@ +diff --ruN a/oldname1 b/newname1 +old mode 0777 +new mode 0755 +--- newname1 2009-10-11 15:12:30.000000000 +0000 ++++ oldname1 2009-10-11 15:12:20.000000000 +0000 +@@ -1,9 +1,3 @@ +-This is an important +-notice! It should +-therefore be located at +-the beginning of this +-document! +- + This part of the + document has stayed the + same from version to +@@ -11,10 +5,16 @@ + be shown if it doesn't + change. Otherwise, that + would not be helping to +-compress anything. ++compress the size of the ++changes. ++ ++This paragraph contains ++text that is outdated. ++It will be deleted in the ++near future. + + It is important to spell +-check this document. On ++check this dokument. On +diff --ruN a/oldname2 b/newname2 +--- newname2 2009-10-11 15:12:30.000000000 +0000 ++++ oldname2 2009-10-11 15:12:20.000000000 +0000 +@@ -1,9 +1,3 @@ +-This is an important +-notice! It should +-therefore be located at +-the beginning of this +-document! +- + This part of the + document has stayed the + same from version to +@@ -11,10 +5,16 @@ + be shown if it doesn't + change. Otherwise, that + would not be helping to +-compress anything. ++compress the size of the ++changes. ++ ++This paragraph contains ++text that is outdated. ++It will be deleted in the ++near future. + + It is important to spell +-check this document. On ++check this dokument. On + the other hand, a + misspelled word isn't + the end of the world. From 6f1b6f42b07a82a1df98d78757c53c7f165e6a4c Mon Sep 17 00:00:00 2001 From: Keegan Carruthers-Smith Date: Thu, 2 Apr 2026 15:24:42 +0200 Subject: [PATCH 2/2] Fix reverse diff round-trip handling Amp-Thread-ID: https://ampcode.com/threads/T-019d4e56-85e0-7468-8797-0b8c811d37af Co-authored-by: Amp --- diff/reverse.go | 59 ++++++++++++++++++++++++++++++------ diff/reverse_test.go | 72 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 9 deletions(-) diff --git a/diff/reverse.go b/diff/reverse.go index 7ceac7d..87715ef 100644 --- a/diff/reverse.go +++ b/diff/reverse.go @@ -4,7 +4,6 @@ import ( "bytes" "errors" "fmt" - "regexp" ) // ReverseFileDiff takes a diff.FileDiff, and returns the reverse operation. @@ -49,8 +48,13 @@ func ReverseMultiFileDiff(fds []*FileDiff) ([]*FileDiff, error) { // // A missing newline in orig is represented in a Hunk by OrigNoNewlineAt, // but is represented here as a missing newline. +type contextLine struct { + body []byte + bare bool +} + type subhunk struct { - context [][]byte + context []contextLine orig [][]byte new [][]byte } @@ -77,8 +81,12 @@ func reverseHunk(forward *Hunk) (*Hunk, error) { new: sub.orig, } for _, line := range invSub.context { + if line.bare { + reverse.Body = append(reverse.Body, line.body...) + continue + } reverse.Body = append(reverse.Body, ' ') - reverse.Body = append(reverse.Body, line...) + reverse.Body = append(reverse.Body, line.body...) } for _, line := range invSub.orig { reverse.Body = append(reverse.Body, '-') @@ -98,14 +106,47 @@ func reverseHunk(forward *Hunk) (*Hunk, error) { return &reverse, nil } -var subhunkLineRe = regexp.MustCompile(`^.[^\n]*(\n|$)`) +func extractContextLines(from *[]byte) []contextLine { + var lines []contextLine + for len(*from) > 0 { + if (*from)[0] == '\n' { + lines = append(lines, contextLine{body: []byte{'\n'}, bare: true}) + *from = (*from)[1:] + continue + } + if (*from)[0] != ' ' { + break + } + + newline := bytes.IndexByte(*from, '\n') + if newline < 0 { + lines = append(lines, contextLine{body: (*from)[1:]}) + *from = nil + continue + } + + lines = append(lines, contextLine{body: (*from)[1 : newline+1]}) + *from = (*from)[newline+1:] + } + return lines +} func extractLinesStartingWith(from *[]byte, startingWith byte) [][]byte { var lines [][]byte - for len(*from) > 0 && (*from)[0] == startingWith { - line := subhunkLineRe.Find(*from) - lines = append(lines, line[1:]) - *from = (*from)[len(line):] + for len(*from) > 0 { + if (*from)[0] != startingWith { + break + } + + newline := bytes.IndexByte(*from, '\n') + if newline < 0 { + lines = append(lines, (*from)[1:]) + *from = nil + continue + } + + lines = append(lines, (*from)[1:newline+1]) + *from = (*from)[newline+1:] } return lines } @@ -123,7 +164,7 @@ func toSubhunks(hunk *Hunk) ([]subhunk, error) { } for len(body) > 0 { sh := subhunk{ - context: extractLinesStartingWith(&body, ' '), + context: extractContextLines(&body), orig: extractLinesStartingWith(&body, '-'), new: extractLinesStartingWith(&body, '+'), } diff --git a/diff/reverse_test.go b/diff/reverse_test.go index 313ac6a..a656e54 100644 --- a/diff/reverse_test.go +++ b/diff/reverse_test.go @@ -144,3 +144,75 @@ func TestReverseMultiFileDiff(t *testing.T) { } } } + +func TestReverseRoundTripOnTestdata(t *testing.T) { + fixtures, err := filepath.Glob(filepath.Join("testdata", "*.diff")) + if err != nil { + t.Fatal(err) + } + + skipped := map[string]bool{ + "empty.diff": true, + "empty_new.diff": true, + "empty_orig.diff": true, + "sample_bad_hunks.diff": true, + } + + for _, fixture := range fixtures { + fixture := fixture + name := filepath.Base(fixture) + + t.Run(name, func(t *testing.T) { + data, err := os.ReadFile(fixture) + if err != nil { + t.Fatal(err) + } + + if fileDiffs, err := ParseMultiFileDiff(data); err == nil && len(fileDiffs) > 0 { + reversed, err := ReverseMultiFileDiff(fileDiffs) + if err != nil { + t.Fatalf("first reverse: %s", err) + } + roundTrip, err := ReverseMultiFileDiff(reversed) + if err != nil { + t.Fatalf("second reverse: %s", err) + } + if diff := cmp.Diff(fileDiffs, roundTrip); diff != "" { + t.Fatalf("double reverse did not restore original file diffs:\n%s", diff) + } + return + } + + if hunks, err := ParseHunks(data); err == nil && len(hunks) > 0 { + var reversed []*Hunk + for _, hunk := range hunks { + inv, err := reverseHunk(hunk) + if err != nil { + t.Fatalf("first reverse: %s", err) + } + reversed = append(reversed, inv) + } + + var roundTrip []*Hunk + for _, hunk := range reversed { + inv, err := reverseHunk(hunk) + if err != nil { + t.Fatalf("second reverse: %s", err) + } + roundTrip = append(roundTrip, inv) + } + + if diff := cmp.Diff(hunks, roundTrip); diff != "" { + t.Fatalf("double reverse did not restore original hunks:\n%s", diff) + } + return + } + + if skipped[name] { + return + } + + t.Fatalf("fixture did not contain parseable file diffs or hunks") + }) + } +}