diff --git a/diff/diff.go b/diff/diff.go index 81aa655..cc19fe5 100644 --- a/diff/diff.go +++ b/diff/diff.go @@ -5,12 +5,18 @@ import ( "time" ) +// ParseOptions specifies options for parsing diffs. +type ParseOptions struct { + // KeepCR specifies whether to keep trailing carriage return characters (\r) in lines. + KeepCR bool +} + // A FileDiff represents a unified diff for a single file. // // A file unified diff has a header that resembles the following: // -// --- oldname 2009-10-11 15:12:20.000000000 -0700 -// +++ newname 2009-10-11 15:12:30.000000000 -0700 +// --- oldname 2009-10-11 15:12:20.000000000 -0700 +// +++ newname 2009-10-11 15:12:30.000000000 -0700 type FileDiff struct { // the original name of the file OrigName string diff --git a/diff/diff_test.go b/diff/diff_test.go index e7d96f4..fc42e0b 100644 --- a/diff/diff_test.go +++ b/diff/diff_test.go @@ -2,6 +2,7 @@ package diff import ( "bytes" + "github.com/google/go-cmp/cmp" "io" "io/ioutil" "path/filepath" @@ -9,8 +10,6 @@ import ( "strings" "testing" "time" - - "github.com/google/go-cmp/cmp" ) func unix(sec int64) *time.Time { @@ -1123,3 +1122,99 @@ func TestFileDiff_Stat(t *testing.T) { } } } + +func TestParseMultiFileDiff_Comprehensive(t *testing.T) { + diffData, err := ioutil.ReadFile(filepath.Join("testdata", "sample_multi_file.diff")) + if err != nil { + t.Fatal(err) + } + + fds, err := ParseMultiFileDiff(diffData) + if err != nil { + t.Fatalf("ParseMultiFileDiff failed: %v", err) + } + + if len(fds) != 2 { + t.Fatalf("Expected 2 file diffs, got %d", len(fds)) + } + + // Verify first file + fd1 := fds[0] + if fd1.OrigName != "oldname1" || fd1.NewName != "newname1" { + t.Errorf("Unexpected names for file 1: %q -> %q", fd1.OrigName, fd1.NewName) + } + if len(fd1.Extended) != 3 { + t.Errorf("Expected 3 extended headers for file 1, got %d", len(fd1.Extended)) + } + + if len(fd1.Hunks) != 2 { + t.Fatalf("Expected 2 hunks for file 1, got %d", len(fd1.Hunks)) + } + + h1 := fd1.Hunks[0] + if h1.OrigStartLine != 1 || h1.OrigLines != 3 || h1.NewStartLine != 1 || h1.NewLines != 9 { + t.Errorf("Unexpected dimensions for file 1, hunk 1: Orig(%d, %d) New(%d, %d)", h1.OrigStartLine, h1.OrigLines, h1.NewStartLine, h1.NewLines) + } + + h2 := fd1.Hunks[1] + if h2.OrigStartLine != 5 || h2.OrigLines != 16 || h2.NewStartLine != 11 || h2.NewLines != 10 { + t.Errorf("Unexpected dimensions for file 1, hunk 2: Orig(%d, %d) New(%d, %d)", h2.OrigStartLine, h2.OrigLines, h2.NewStartLine, h2.NewLines) + } + + // Verify second file + fd2 := fds[1] + if fd2.OrigName != "oldname2" || fd2.NewName != "newname2" { + t.Errorf("Unexpected names for file 2: %q -> %q", fd2.OrigName, fd2.NewName) + } + if len(fd2.Hunks) != 2 { + t.Fatalf("Expected 2 hunks for file 2, got %d", len(fd2.Hunks)) + } + + h3 := fd2.Hunks[0] + if h3.OrigStartLine != 1 || h3.OrigLines != 3 || h3.NewStartLine != 1 || h3.NewLines != 9 { + t.Errorf("Unexpected dimensions for file 2, hunk 1: Orig(%d, %d) New(%d, %d)", h3.OrigStartLine, h3.OrigLines, h3.NewStartLine, h3.NewLines) + } +} + +func TestParseMultiFileDiff_KeepCR_E2E(t *testing.T) { + // A full multi-file diff with CRLF line endings + input := "--- file1\r\n" + + "+++ file1\r\n" + + "@@ -1,3 +1,3 @@\r\n" + + " line1\r\n" + + "-line2\r\n" + + "+line2_new\r\n" + + " line3\r\n" + + "diff --git a/file2 b/file2\r\n" + + "new file mode 100644\r\n" + + "index 0000000..e69de29\r\n" + + opts := ParseOptions{KeepCR: true} + fds, err := ParseMultiFileDiffOptions([]byte(input), opts) + if err != nil { + t.Fatalf("ParseMultiFileDiffOptions failed: %v", err) + } + + if len(fds) != 2 { + t.Fatalf("Expected 2 file diffs, got %d", len(fds)) + } + + // File 1 Verify Hunks contain \r + fd1 := fds[0] + if len(fd1.Hunks) != 1 { + t.Fatalf("Expected 1 hunk for file 1, got %d", len(fd1.Hunks)) + } + h1 := fd1.Hunks[0] + if !strings.Contains(string(h1.Body), "\r\n") { + t.Errorf("Expected Hunk body to contain CRLF, got: %q", string(h1.Body)) + } + + // File 2 Verify filename is NOT "file2\r" + fd2 := fds[1] + if fd2.NewName != "b/file2" { + t.Errorf("Expected NewName 'b/file2', got %q", fd2.NewName) + } + if len(fd2.Extended) != 3 { + t.Errorf("Expected 3 extended headers for file 2, got %d", len(fd2.Extended)) + } +} diff --git a/diff/parse.go b/diff/parse.go index 48eeb96..b73e230 100644 --- a/diff/parse.go +++ b/diff/parse.go @@ -1,7 +1,6 @@ package diff import ( - "bufio" "bytes" "errors" "fmt" @@ -17,13 +16,24 @@ import ( // case of per-file errors. If it cannot detect when the diff of the next file // begins, the hunks are added to the FileDiff of the previous file. func ParseMultiFileDiff(diff []byte) ([]*FileDiff, error) { - return NewMultiFileDiffReader(bytes.NewReader(diff)).ReadAllFiles() + return ParseMultiFileDiffOptions(diff, ParseOptions{}) +} + +// ParseMultiFileDiffOptions parses a multi-file unified diff with the given options. +func ParseMultiFileDiffOptions(diff []byte, opts ParseOptions) ([]*FileDiff, error) { + return NewMultiFileDiffReaderOptions(bytes.NewReader(diff), opts).ReadAllFiles() } // NewMultiFileDiffReader returns a new MultiFileDiffReader that reads // a multi-file unified diff from r. func NewMultiFileDiffReader(r io.Reader) *MultiFileDiffReader { - return &MultiFileDiffReader{reader: newLineReader(r)} + return NewMultiFileDiffReaderOptions(r, ParseOptions{}) +} + +// NewMultiFileDiffReaderOptions returns a new MultiFileDiffReader that reads +// a multi-file unified diff from r with the given options. +func NewMultiFileDiffReaderOptions(r io.Reader, opts ParseOptions) *MultiFileDiffReader { + return &MultiFileDiffReader{reader: newLineReaderOptions(r, opts)} } // MultiFileDiffReader reads a multi-file unified diff. @@ -153,13 +163,24 @@ func (r *MultiFileDiffReader) ReadAllFiles() ([]*FileDiff, error) { // ParseFileDiff parses a file unified diff. func ParseFileDiff(diff []byte) (*FileDiff, error) { - return NewFileDiffReader(bytes.NewReader(diff)).Read() + return ParseFileDiffOptions(diff, ParseOptions{}) +} + +// ParseFileDiffOptions parses a file unified diff with the given options. +func ParseFileDiffOptions(diff []byte, opts ParseOptions) (*FileDiff, error) { + return NewFileDiffReaderOptions(bytes.NewReader(diff), opts).Read() } // NewFileDiffReader returns a new FileDiffReader that reads a file // unified diff. func NewFileDiffReader(r io.Reader) *FileDiffReader { - return &FileDiffReader{reader: &lineReader{reader: bufio.NewReader(r)}} + return NewFileDiffReaderOptions(r, ParseOptions{}) +} + +// NewFileDiffReaderOptions returns a new FileDiffReader that reads a file +// unified diff with the given options. +func NewFileDiffReaderOptions(r io.Reader, opts ParseOptions) *FileDiffReader { + return &FileDiffReader{reader: newLineReaderOptions(r, opts)} } // FileDiffReader reads a unified file diff. @@ -405,6 +426,7 @@ func readQuotedFilename(text string) (value string, remainder string, err error) // valid syntax, it may be impossible to extract filenames; if so, the // function returns ("", "", true). func parseDiffGitArgs(diffArgs string) (string, string, bool) { + diffArgs = strings.TrimSuffix(diffArgs, "\r") length := len(diffArgs) if length < 3 { return "", "", false @@ -540,6 +562,7 @@ func handleEmpty(fd *FileDiff) (wasEmpty bool) { return } rawFilename := header[len(prefix):] + rawFilename = strings.TrimSuffix(rawFilename, "\r") // extract the filename prefix (e.g. "a/") from the 'diff --git' line. var prefixLetterIndex int @@ -586,7 +609,12 @@ var ( // only of hunks and not include a file header; if it has a file // header, use ParseFileDiff. func ParseHunks(diff []byte) ([]*Hunk, error) { - r := NewHunksReader(bytes.NewReader(diff)) + return ParseHunksOptions(diff, ParseOptions{}) +} + +// ParseHunksOptions parses hunks from a unified diff with the given options. +func ParseHunksOptions(diff []byte, opts ParseOptions) ([]*Hunk, error) { + r := NewHunksReaderOptions(bytes.NewReader(diff), opts) hunks, err := r.ReadAllHunks() if err != nil { return nil, err @@ -597,7 +625,13 @@ func ParseHunks(diff []byte) ([]*Hunk, error) { // NewHunksReader returns a new HunksReader that reads unified diff hunks // from r. func NewHunksReader(r io.Reader) *HunksReader { - return &HunksReader{reader: &lineReader{reader: bufio.NewReader(r)}} + return NewHunksReaderOptions(r, ParseOptions{}) +} + +// NewHunksReaderOptions returns a new HunksReader that reads unified diff hunks +// from r with the given options. +func NewHunksReaderOptions(r io.Reader, opts ParseOptions) *HunksReader { + return &HunksReader{reader: newLineReaderOptions(r, opts)} } // A HunksReader reads hunks from a unified diff. @@ -701,7 +735,7 @@ func (r *HunksReader) ReadHunk() (*Hunk, error) { // handle that case. return r.hunk, &ParseError{r.line, r.offset, &ErrBadHunkLine{Line: line}} } - if bytes.Equal(line, []byte(noNewlineMessage)) { + if bytes.Equal(bytes.TrimSuffix(line, []byte("\r")), []byte(noNewlineMessage)) { if lastLineFromOrig { // Retain the newline in the body (otherwise the // diff line would be like "-a+b", where "+b" is @@ -755,6 +789,7 @@ func linePrefix(c byte) bool { // if its value is 1. normalizeHeader returns an error if the header // is not in the correct format. func normalizeHeader(header string) (string, string, error) { + header = strings.TrimSuffix(header, "\r") // Split the header into five parts: the first '@@', the two // ranges, the last '@@', and the optional section. pieces := strings.SplitN(header, " ", 5) @@ -815,7 +850,8 @@ func parseOnlyInMessage(line []byte) (bool, []byte, []byte) { if idx < 0 { return false, nil, nil } - return true, line[:idx], line[idx+2:] + filename := bytes.TrimSuffix(line[idx+2:], []byte("\r")) + return true, line[:idx], filename } // A ParseError is a description of a unified diff syntax error. diff --git a/diff/reader_util.go b/diff/reader_util.go index 4530025..f1c17be 100644 --- a/diff/reader_util.go +++ b/diff/reader_util.go @@ -13,6 +13,13 @@ func newLineReader(r io.Reader) *lineReader { return &lineReader{reader: bufio.NewReader(r)} } +func newLineReaderOptions(r io.Reader, opts ParseOptions) *lineReader { + return &lineReader{ + reader: bufio.NewReader(r), + keepCR: opts.KeepCR, + } +} + // lineReader is a wrapper around a bufio.Reader that caches the next line to // provide lookahead functionality for the next two lines. type lineReader struct { @@ -20,13 +27,15 @@ type lineReader struct { cachedNextLine []byte cachedNextLineErr error + + keepCR bool } // readLine returns the next unconsumed line and advances the internal cache of // the lineReader. func (l *lineReader) readLine() ([]byte, error) { if l.cachedNextLine == nil && l.cachedNextLineErr == nil { - l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader) + l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader, l.keepCR) } if l.cachedNextLineErr != nil { @@ -35,7 +44,7 @@ func (l *lineReader) readLine() ([]byte, error) { next := l.cachedNextLine - l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader) + l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader, l.keepCR) return next, nil } @@ -47,7 +56,7 @@ func (l *lineReader) readLine() ([]byte, error) { // be used when at the end of the file. func (l *lineReader) nextLineStartsWith(prefix string) (bool, error) { if l.cachedNextLine == nil && l.cachedNextLineErr == nil { - l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader) + l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader, l.keepCR) } return l.lineHasPrefix(l.cachedNextLine, prefix, l.cachedNextLineErr) @@ -64,7 +73,7 @@ func (l *lineReader) nextLineStartsWith(prefix string) (bool, error) { // returned. func (l *lineReader) nextNextLineStartsWith(prefix string) (bool, error) { if l.cachedNextLine == nil && l.cachedNextLineErr == nil { - l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader) + l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader, l.keepCR) } next, err := l.reader.Peek(len(prefix)) @@ -93,7 +102,7 @@ func (l *lineReader) lineHasPrefix(line []byte, prefix string, readErr error) (b // the next line in the Reader with the trailing newline stripped. It will return an // io.EOF error when there is nothing left to read (at the start of the function call). It // will return any other errors it receives from the underlying call to ReadBytes. -func readLine(r *bufio.Reader) ([]byte, error) { +func readLine(r *bufio.Reader, keepCR bool) ([]byte, error) { line_, err := r.ReadBytes('\n') if err == io.EOF { if len(line_) == 0 { @@ -103,12 +112,18 @@ func readLine(r *bufio.Reader) ([]byte, error) { // ReadBytes returned io.EOF, because it didn't find another newline, but there is // still the remainder of the file to return as a line. line := line_ + if !keepCR { + return dropCR(line), nil + } return line, nil } else if err != nil { return nil, err } line := line_[0 : len(line_)-1] - return dropCR(line), nil + if !keepCR { + return dropCR(line), nil + } + return line, nil } // dropCR drops a terminal \r from the data. diff --git a/diff/reader_util_test.go b/diff/reader_util_test.go index 8dd0016..760fcb7 100644 --- a/diff/reader_util_test.go +++ b/diff/reader_util_test.go @@ -51,7 +51,7 @@ index 0000000..3be2928`, in := bufio.NewReader(strings.NewReader(test.input)) out := []string{} for { - l, err := readLine(in) + l, err := readLine(in, false) if err == io.EOF { break } @@ -207,3 +207,24 @@ ccc rest of line` } } + +func TestReadLine_KeepCR(t *testing.T) { + input := "line1\r\nline2\r\n" + in := bufio.NewReader(strings.NewReader(input)) + + l, err := readLine(in, true) + if err != nil { + t.Fatal(err) + } + if string(l) != "line1\r" { + t.Errorf("expected line1\\r, got %q", string(l)) + } + + l, err = readLine(in, true) + if err != nil { + t.Fatal(err) + } + if string(l) != "line2\r" { + t.Errorf("expected line2\\r, got %q", string(l)) + } +} diff --git a/diff/repro_test.go b/diff/repro_test.go new file mode 100644 index 0000000..4fb284b --- /dev/null +++ b/diff/repro_test.go @@ -0,0 +1,143 @@ +package diff + +import ( + "path/filepath" + "reflect" + "testing" +) + +func TestHandleEmpty_KeepCR(t *testing.T) { + // A diff with CRLF line endings + input := "diff --git a/file b/file\r\n" + + "new file mode 100644\r\n" + + "index 0000000..e69de29\r\n" + + opts := ParseOptions{KeepCR: true} + fds, err := ParseMultiFileDiffOptions([]byte(input), opts) + if err != nil { + t.Fatal(err) + } + + if len(fds) != 1 { + t.Fatalf("expected 1 FileDiff, got %d", len(fds)) + } + + fd := fds[0] + // The problem: if KeepCR is true, NewName might be "b/file\r" + wantNewName := "b/file" + if fd.NewName != wantNewName { + t.Errorf("expected NewName %q, got %q", wantNewName, fd.NewName) + } + + // Also check Extended headers + wantExtended := []string{ + "diff --git a/file b/file\r", + "new file mode 100644\r", + "index 0000000..e69de29\r", + } + if !reflect.DeepEqual(fd.Extended, wantExtended) { + t.Errorf("Extended headers mismatch.\nwant: %q\ngot: %q", wantExtended, fd.Extended) + } +} +func TestParseOnlyIn_KeepCR(t *testing.T) { + // A diff with an "Only in" message and CRLF line ending + input := "Only in /tmp: file\r\n" + + opts := ParseOptions{KeepCR: true} + fds, err := ParseMultiFileDiffOptions([]byte(input), opts) + if err != nil { + t.Fatal(err) + } + + if len(fds) != 1 { + t.Fatalf("expected 1 FileDiff, got %d", len(fds)) + } + + fd := fds[0] + // The path should be /tmp/file (or \tmp\file on windows, but filepath.Join is used) + // Actually, the code uses filepath.Join(string(source), string(filename)) + wantOrigName := filepath.Join("/tmp", "file") + if fd.OrigName != wantOrigName { + t.Errorf("expected OrigName %q, got %q", wantOrigName, fd.OrigName) + } + + if fd.NewName != "" { + t.Errorf("expected empty NewName, got %q", fd.NewName) + } +} + +func TestNormalizeHeader_KeepCR(t *testing.T) { + tests := []struct { + name string + input string + wantH string + wantS string + wantErr bool + }{ + { + name: "NoSection_CR", + input: "@@ -1,1 +1,1 @@\r", + wantH: "@@ -1,1 +1,1 @@", + wantS: "", + }, + { + name: "WithSection_CR", + input: "@@ -1,1 +1,1 @@ some section\r", + wantH: "@@ -1,1 +1,1 @@ some section", + wantS: "some section", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotH, gotS, err := normalizeHeader(tt.input) + if (err != nil) != tt.wantErr { + t.Errorf("normalizeHeader() error = %v, wantErr %v", err, tt.wantErr) + return + } + if gotH != tt.wantH { + t.Errorf("normalizeHeader() gotH = %q, want %q", gotH, tt.wantH) + } + if gotS != tt.wantS { + t.Errorf("normalizeHeader() gotS = %q, want %q", gotS, tt.wantS) + } + }) + } +} + +func TestNoNewline_KeepCR(t *testing.T) { + // A diff with CRLF line endings and a "No newline at end of file" marker + input := "--- a/file\r\n" + + "+++ b/file\r\n" + + "@@ -1 +1 @@\r\n" + + "-a\r\n" + + "\\ No newline at end of file\r\n" + + "+b\r\n" + + opts := ParseOptions{KeepCR: true} + fds, err := ParseMultiFileDiffOptions([]byte(input), opts) + if err != nil { + t.Fatal(err) + } + + if len(fds) != 1 { + t.Fatalf("expected 1 FileDiff, got %d", len(fds)) + } + + fd := fds[0] + if len(fd.Hunks) != 1 { + t.Fatalf("expected 1 hunk, got %d", len(fd.Hunks)) + } + + h := fd.Hunks[0] + // The body should contain the lines exactly as they are (including \r\n) + // but the "No newline" marker should NOT be in the body. + wantBody := "-a\r\n+b\r\n" + if string(h.Body) != wantBody { + t.Errorf("expected body %q, got %q", wantBody, string(h.Body)) + } + + if h.OrigNoNewlineAt == 0 { + t.Error("expected OrigNoNewlineAt to be set") + } +}