Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions diff/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,18 @@ import (
"time"
)

// ParseOptions specifies options for parsing diffs.
type ParseOptions struct {
// KeepCR specifies whether to keep trailing carriage return characters (\r) in lines.
KeepCR bool
}

// A FileDiff represents a unified diff for a single file.
//
// A file unified diff has a header that resembles the following:
//
// --- oldname 2009-10-11 15:12:20.000000000 -0700
// +++ newname 2009-10-11 15:12:30.000000000 -0700
// --- oldname 2009-10-11 15:12:20.000000000 -0700
// +++ newname 2009-10-11 15:12:30.000000000 -0700
type FileDiff struct {
// the original name of the file
OrigName string
Expand Down
99 changes: 97 additions & 2 deletions diff/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@ package diff

import (
"bytes"
"github.com/google/go-cmp/cmp"
"io"
"io/ioutil"
"path/filepath"
"reflect"
"strings"
"testing"
"time"

"github.com/google/go-cmp/cmp"
)

func unix(sec int64) *time.Time {
Expand Down Expand Up @@ -1123,3 +1122,99 @@ func TestFileDiff_Stat(t *testing.T) {
}
}
}

func TestParseMultiFileDiff_Comprehensive(t *testing.T) {
diffData, err := ioutil.ReadFile(filepath.Join("testdata", "sample_multi_file.diff"))
if err != nil {
t.Fatal(err)
}

fds, err := ParseMultiFileDiff(diffData)
if err != nil {
t.Fatalf("ParseMultiFileDiff failed: %v", err)
}

if len(fds) != 2 {
t.Fatalf("Expected 2 file diffs, got %d", len(fds))
}

// Verify first file
fd1 := fds[0]
if fd1.OrigName != "oldname1" || fd1.NewName != "newname1" {
t.Errorf("Unexpected names for file 1: %q -> %q", fd1.OrigName, fd1.NewName)
}
if len(fd1.Extended) != 3 {
t.Errorf("Expected 3 extended headers for file 1, got %d", len(fd1.Extended))
}

if len(fd1.Hunks) != 2 {
t.Fatalf("Expected 2 hunks for file 1, got %d", len(fd1.Hunks))
}

h1 := fd1.Hunks[0]
if h1.OrigStartLine != 1 || h1.OrigLines != 3 || h1.NewStartLine != 1 || h1.NewLines != 9 {
t.Errorf("Unexpected dimensions for file 1, hunk 1: Orig(%d, %d) New(%d, %d)", h1.OrigStartLine, h1.OrigLines, h1.NewStartLine, h1.NewLines)
}

h2 := fd1.Hunks[1]
if h2.OrigStartLine != 5 || h2.OrigLines != 16 || h2.NewStartLine != 11 || h2.NewLines != 10 {
t.Errorf("Unexpected dimensions for file 1, hunk 2: Orig(%d, %d) New(%d, %d)", h2.OrigStartLine, h2.OrigLines, h2.NewStartLine, h2.NewLines)
}

// Verify second file
fd2 := fds[1]
if fd2.OrigName != "oldname2" || fd2.NewName != "newname2" {
t.Errorf("Unexpected names for file 2: %q -> %q", fd2.OrigName, fd2.NewName)
}
if len(fd2.Hunks) != 2 {
t.Fatalf("Expected 2 hunks for file 2, got %d", len(fd2.Hunks))
}

h3 := fd2.Hunks[0]
if h3.OrigStartLine != 1 || h3.OrigLines != 3 || h3.NewStartLine != 1 || h3.NewLines != 9 {
t.Errorf("Unexpected dimensions for file 2, hunk 1: Orig(%d, %d) New(%d, %d)", h3.OrigStartLine, h3.OrigLines, h3.NewStartLine, h3.NewLines)
}
}

func TestParseMultiFileDiff_KeepCR_E2E(t *testing.T) {
// A full multi-file diff with CRLF line endings
input := "--- file1\r\n" +
"+++ file1\r\n" +
"@@ -1,3 +1,3 @@\r\n" +
" line1\r\n" +
"-line2\r\n" +
"+line2_new\r\n" +
" line3\r\n" +
"diff --git a/file2 b/file2\r\n" +
"new file mode 100644\r\n" +
"index 0000000..e69de29\r\n"

opts := ParseOptions{KeepCR: true}
fds, err := ParseMultiFileDiffOptions([]byte(input), opts)
if err != nil {
t.Fatalf("ParseMultiFileDiffOptions failed: %v", err)
}

if len(fds) != 2 {
t.Fatalf("Expected 2 file diffs, got %d", len(fds))
}

// File 1 Verify Hunks contain \r
fd1 := fds[0]
if len(fd1.Hunks) != 1 {
t.Fatalf("Expected 1 hunk for file 1, got %d", len(fd1.Hunks))
}
h1 := fd1.Hunks[0]
if !strings.Contains(string(h1.Body), "\r\n") {
t.Errorf("Expected Hunk body to contain CRLF, got: %q", string(h1.Body))
}

// File 2 Verify filename is NOT "file2\r"
fd2 := fds[1]
if fd2.NewName != "b/file2" {
t.Errorf("Expected NewName 'b/file2', got %q", fd2.NewName)
}
if len(fd2.Extended) != 3 {
t.Errorf("Expected 3 extended headers for file 2, got %d", len(fd2.Extended))
}
}
54 changes: 45 additions & 9 deletions diff/parse.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package diff

import (
"bufio"
"bytes"
"errors"
"fmt"
Expand All @@ -17,13 +16,24 @@ import (
// case of per-file errors. If it cannot detect when the diff of the next file
// begins, the hunks are added to the FileDiff of the previous file.
func ParseMultiFileDiff(diff []byte) ([]*FileDiff, error) {
return NewMultiFileDiffReader(bytes.NewReader(diff)).ReadAllFiles()
return ParseMultiFileDiffOptions(diff, ParseOptions{})
}

// ParseMultiFileDiffOptions parses a multi-file unified diff with the given options.
func ParseMultiFileDiffOptions(diff []byte, opts ParseOptions) ([]*FileDiff, error) {
return NewMultiFileDiffReaderOptions(bytes.NewReader(diff), opts).ReadAllFiles()
}

// NewMultiFileDiffReader returns a new MultiFileDiffReader that reads
// a multi-file unified diff from r.
func NewMultiFileDiffReader(r io.Reader) *MultiFileDiffReader {
return &MultiFileDiffReader{reader: newLineReader(r)}
return NewMultiFileDiffReaderOptions(r, ParseOptions{})
}

// NewMultiFileDiffReaderOptions returns a new MultiFileDiffReader that reads
// a multi-file unified diff from r with the given options.
func NewMultiFileDiffReaderOptions(r io.Reader, opts ParseOptions) *MultiFileDiffReader {
return &MultiFileDiffReader{reader: newLineReaderOptions(r, opts)}
}

// MultiFileDiffReader reads a multi-file unified diff.
Expand Down Expand Up @@ -153,13 +163,24 @@ func (r *MultiFileDiffReader) ReadAllFiles() ([]*FileDiff, error) {

// ParseFileDiff parses a file unified diff.
func ParseFileDiff(diff []byte) (*FileDiff, error) {
return NewFileDiffReader(bytes.NewReader(diff)).Read()
return ParseFileDiffOptions(diff, ParseOptions{})
}

// ParseFileDiffOptions parses a file unified diff with the given options.
func ParseFileDiffOptions(diff []byte, opts ParseOptions) (*FileDiff, error) {
return NewFileDiffReaderOptions(bytes.NewReader(diff), opts).Read()
}

// NewFileDiffReader returns a new FileDiffReader that reads a file
// unified diff.
func NewFileDiffReader(r io.Reader) *FileDiffReader {
return &FileDiffReader{reader: &lineReader{reader: bufio.NewReader(r)}}
return NewFileDiffReaderOptions(r, ParseOptions{})
}

// NewFileDiffReaderOptions returns a new FileDiffReader that reads a file
// unified diff with the given options.
func NewFileDiffReaderOptions(r io.Reader, opts ParseOptions) *FileDiffReader {
return &FileDiffReader{reader: newLineReaderOptions(r, opts)}
}

// FileDiffReader reads a unified file diff.
Expand Down Expand Up @@ -405,6 +426,7 @@ func readQuotedFilename(text string) (value string, remainder string, err error)
// valid syntax, it may be impossible to extract filenames; if so, the
// function returns ("", "", true).
func parseDiffGitArgs(diffArgs string) (string, string, bool) {
diffArgs = strings.TrimSuffix(diffArgs, "\r")
length := len(diffArgs)
if length < 3 {
return "", "", false
Expand Down Expand Up @@ -540,6 +562,7 @@ func handleEmpty(fd *FileDiff) (wasEmpty bool) {
return
}
rawFilename := header[len(prefix):]
rawFilename = strings.TrimSuffix(rawFilename, "\r")

// extract the filename prefix (e.g. "a/") from the 'diff --git' line.
var prefixLetterIndex int
Expand Down Expand Up @@ -586,7 +609,12 @@ var (
// only of hunks and not include a file header; if it has a file
// header, use ParseFileDiff.
func ParseHunks(diff []byte) ([]*Hunk, error) {
r := NewHunksReader(bytes.NewReader(diff))
return ParseHunksOptions(diff, ParseOptions{})
}

// ParseHunksOptions parses hunks from a unified diff with the given options.
func ParseHunksOptions(diff []byte, opts ParseOptions) ([]*Hunk, error) {
r := NewHunksReaderOptions(bytes.NewReader(diff), opts)
hunks, err := r.ReadAllHunks()
if err != nil {
return nil, err
Expand All @@ -597,7 +625,13 @@ func ParseHunks(diff []byte) ([]*Hunk, error) {
// NewHunksReader returns a new HunksReader that reads unified diff hunks
// from r.
func NewHunksReader(r io.Reader) *HunksReader {
return &HunksReader{reader: &lineReader{reader: bufio.NewReader(r)}}
return NewHunksReaderOptions(r, ParseOptions{})
}

// NewHunksReaderOptions returns a new HunksReader that reads unified diff hunks
// from r with the given options.
func NewHunksReaderOptions(r io.Reader, opts ParseOptions) *HunksReader {
return &HunksReader{reader: newLineReaderOptions(r, opts)}
}

// A HunksReader reads hunks from a unified diff.
Expand Down Expand Up @@ -701,7 +735,7 @@ func (r *HunksReader) ReadHunk() (*Hunk, error) {
// handle that case.
return r.hunk, &ParseError{r.line, r.offset, &ErrBadHunkLine{Line: line}}
}
if bytes.Equal(line, []byte(noNewlineMessage)) {
if bytes.Equal(bytes.TrimSuffix(line, []byte("\r")), []byte(noNewlineMessage)) {
if lastLineFromOrig {
// Retain the newline in the body (otherwise the
// diff line would be like "-a+b", where "+b" is
Expand Down Expand Up @@ -755,6 +789,7 @@ func linePrefix(c byte) bool {
// if its value is 1. normalizeHeader returns an error if the header
// is not in the correct format.
func normalizeHeader(header string) (string, string, error) {
header = strings.TrimSuffix(header, "\r")
// Split the header into five parts: the first '@@', the two
// ranges, the last '@@', and the optional section.
pieces := strings.SplitN(header, " ", 5)
Expand Down Expand Up @@ -815,7 +850,8 @@ func parseOnlyInMessage(line []byte) (bool, []byte, []byte) {
if idx < 0 {
return false, nil, nil
}
return true, line[:idx], line[idx+2:]
filename := bytes.TrimSuffix(line[idx+2:], []byte("\r"))
return true, line[:idx], filename
}

// A ParseError is a description of a unified diff syntax error.
Expand Down
27 changes: 21 additions & 6 deletions diff/reader_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,29 @@ func newLineReader(r io.Reader) *lineReader {
return &lineReader{reader: bufio.NewReader(r)}
}

func newLineReaderOptions(r io.Reader, opts ParseOptions) *lineReader {
return &lineReader{
reader: bufio.NewReader(r),
keepCR: opts.KeepCR,
}
}

// lineReader is a wrapper around a bufio.Reader that caches the next line to
// provide lookahead functionality for the next two lines.
type lineReader struct {
reader *bufio.Reader

cachedNextLine []byte
cachedNextLineErr error

keepCR bool
}

// readLine returns the next unconsumed line and advances the internal cache of
// the lineReader.
func (l *lineReader) readLine() ([]byte, error) {
if l.cachedNextLine == nil && l.cachedNextLineErr == nil {
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader)
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader, l.keepCR)
}

if l.cachedNextLineErr != nil {
Expand All @@ -35,7 +44,7 @@ func (l *lineReader) readLine() ([]byte, error) {

next := l.cachedNextLine

l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader)
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader, l.keepCR)

return next, nil
}
Expand All @@ -47,7 +56,7 @@ func (l *lineReader) readLine() ([]byte, error) {
// be used when at the end of the file.
func (l *lineReader) nextLineStartsWith(prefix string) (bool, error) {
if l.cachedNextLine == nil && l.cachedNextLineErr == nil {
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader)
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader, l.keepCR)
}

return l.lineHasPrefix(l.cachedNextLine, prefix, l.cachedNextLineErr)
Expand All @@ -64,7 +73,7 @@ func (l *lineReader) nextLineStartsWith(prefix string) (bool, error) {
// returned.
func (l *lineReader) nextNextLineStartsWith(prefix string) (bool, error) {
if l.cachedNextLine == nil && l.cachedNextLineErr == nil {
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader)
l.cachedNextLine, l.cachedNextLineErr = readLine(l.reader, l.keepCR)
}

next, err := l.reader.Peek(len(prefix))
Expand Down Expand Up @@ -93,7 +102,7 @@ func (l *lineReader) lineHasPrefix(line []byte, prefix string, readErr error) (b
// the next line in the Reader with the trailing newline stripped. It will return an
// io.EOF error when there is nothing left to read (at the start of the function call). It
// will return any other errors it receives from the underlying call to ReadBytes.
func readLine(r *bufio.Reader) ([]byte, error) {
func readLine(r *bufio.Reader, keepCR bool) ([]byte, error) {
line_, err := r.ReadBytes('\n')
if err == io.EOF {
if len(line_) == 0 {
Expand All @@ -103,12 +112,18 @@ func readLine(r *bufio.Reader) ([]byte, error) {
// ReadBytes returned io.EOF, because it didn't find another newline, but there is
// still the remainder of the file to return as a line.
line := line_
if !keepCR {
return dropCR(line), nil
}
return line, nil
} else if err != nil {
return nil, err
}
line := line_[0 : len(line_)-1]
return dropCR(line), nil
if !keepCR {
return dropCR(line), nil
}
return line, nil
}

// dropCR drops a terminal \r from the data.
Expand Down
23 changes: 22 additions & 1 deletion diff/reader_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ index 0000000..3be2928`,
in := bufio.NewReader(strings.NewReader(test.input))
out := []string{}
for {
l, err := readLine(in)
l, err := readLine(in, false)
if err == io.EOF {
break
}
Expand Down Expand Up @@ -207,3 +207,24 @@ ccc rest of line`

}
}

func TestReadLine_KeepCR(t *testing.T) {
input := "line1\r\nline2\r\n"
in := bufio.NewReader(strings.NewReader(input))

l, err := readLine(in, true)
if err != nil {
t.Fatal(err)
}
if string(l) != "line1\r" {
t.Errorf("expected line1\\r, got %q", string(l))
}

l, err = readLine(in, true)
if err != nil {
t.Fatal(err)
}
if string(l) != "line2\r" {
t.Errorf("expected line2\\r, got %q", string(l))
}
}
Loading