From 5d205a639cf945a7250ef7f2afab509d91a6dd69 Mon Sep 17 00:00:00 2001 From: Jim Myhrberg Date: Sat, 31 Oct 2020 19:55:56 +0000 Subject: [PATCH] wip: improve whitespace handling in paragraph parsing --- pkg/commit/parser.go | 10 ++++-- pkg/commit/parser_test.go | 70 +++++++++++++++++++++++++++++---------- 2 files changed, 59 insertions(+), 21 deletions(-) diff --git a/pkg/commit/parser.go b/pkg/commit/parser.go index d843a5c..5adacb5 100644 --- a/pkg/commit/parser.go +++ b/pkg/commit/parser.go @@ -34,10 +34,14 @@ func parseHeader(header []byte) (*Commit, error) { }, nil } -func paragraphs(input []byte) [][]byte { - paras := bytes.Split(normlizeLinefeeds(input), []byte{lf, lf}) +func paragraphs(commitMsg []byte) [][]byte { + paras := bytes.Split( + bytes.TrimSpace(normlizeLinefeeds(commitMsg)), + []byte{lf, lf}, + ) + for i, p := range paras { - paras[i] = bytes.Trim(p, crlf) + paras[i] = bytes.TrimSpace(p) } return paras diff --git a/pkg/commit/parser_test.go b/pkg/commit/parser_test.go index fa78e2c..e781538 100644 --- a/pkg/commit/parser_test.go +++ b/pkg/commit/parser_test.go @@ -194,12 +194,17 @@ func Test_paragraph(t *testing.T) { args: args{input: []byte("hello world\nthe brown fox\n")}, want: [][]byte{[]byte("hello world\nthe brown fox")}, }, + { + name: "excess whitespace", + args: args{input: []byte(" \n hello world\nthe brown fox \n ")}, + want: [][]byte{[]byte("hello world\nthe brown fox")}, + }, { name: "multiple paragraphs", args: args{input: []byte( - "Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n" + - "Praesent eleifend lorem non purus finibus, interdum\n" + - "hendrerit sem bibendum.\n" + + "Lorem ipsum dolor sit amet, consectetur adipiscing\n" + + "elit.Praesent eleifend lorem non purus finibus,\n" + + "interdum hendrerit sem bibendum.\n" + "\n" + "Etiam porttitor mollis nulla, egestas facilisis nisi\n" + "molestie ut. Quisque mi mi, commodo ut mattis a,\n" + @@ -211,9 +216,9 @@ func Test_paragraph(t *testing.T) { )}, want: [][]byte{ []byte( - "Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n" + - "Praesent eleifend lorem non purus finibus, interdum\n" + - "hendrerit sem bibendum.", + "Lorem ipsum dolor sit amet, consectetur adipiscing\n" + + "elit.Praesent eleifend lorem non purus finibus,\n" + + "interdum hendrerit sem bibendum.", ), []byte( "Etiam porttitor mollis nulla, egestas facilisis nisi\n" + @@ -227,12 +232,41 @@ func Test_paragraph(t *testing.T) { ), }, }, + { + name: "paragraphs with surrounding whitespace", + args: args{input: []byte( + "\n" + + " \n" + + " Lorem ipsum dolor sit amet, consectetur adipiscing\n" + + "elit.Praesent eleifend lorem non purus finibus,\n" + + "interdum hendrerit sem bibendum. \n" + + "\n" + + "\n" + + " Etiam porttitor mollis nulla, egestas facilisis nisi\n" + + "molestie ut. Quisque mi mi, commodo ut mattis a,\n" + + "scelerisque eu elit.\n" + + " \n" + + " ", + )}, + want: [][]byte{ + []byte( + "Lorem ipsum dolor sit amet, consectetur adipiscing\n" + + "elit.Praesent eleifend lorem non purus finibus,\n" + + "interdum hendrerit sem bibendum.", + ), + []byte( + "Etiam porttitor mollis nulla, egestas facilisis nisi\n" + + "molestie ut. Quisque mi mi, commodo ut mattis a,\n" + + "scelerisque eu elit.", + ), + }, + }, { name: "CRLF line separator", args: args{input: []byte( - "Lorem ipsum dolor sit amet, consectetur adipiscing elit.\r\n" + - "Praesent eleifend lorem non purus finibus, interdum\r\n" + - "hendrerit sem bibendum.\r\n" + + "Lorem ipsum dolor sit amet, consectetur adipiscing\r\n" + + "elit.Praesent eleifend lorem non purus finibus,\r\n" + + "interdum hendrerit sem bibendum.\r\n" + "\r\n" + "Etiam porttitor mollis nulla, egestas facilisis nisi\r\n" + "molestie ut. Quisque mi mi, commodo ut mattis a,\r\n" + @@ -240,9 +274,9 @@ func Test_paragraph(t *testing.T) { )}, want: [][]byte{ []byte( - "Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n" + - "Praesent eleifend lorem non purus finibus, interdum\n" + - "hendrerit sem bibendum.", + "Lorem ipsum dolor sit amet, consectetur adipiscing\n" + + "elit.Praesent eleifend lorem non purus finibus,\n" + + "interdum hendrerit sem bibendum.", ), []byte( "Etiam porttitor mollis nulla, egestas facilisis nisi\n" + @@ -254,9 +288,9 @@ func Test_paragraph(t *testing.T) { { name: "CR line separator", args: args{input: []byte( - "Lorem ipsum dolor sit amet, consectetur adipiscing elit.\r" + - "Praesent eleifend lorem non purus finibus, interdum\r" + - "hendrerit sem bibendum.\r" + + "Lorem ipsum dolor sit amet, consectetur adipiscing\r" + + "elit.Praesent eleifend lorem non purus finibus,\r" + + "interdum hendrerit sem bibendum.\r" + "\r" + "Etiam porttitor mollis nulla, egestas facilisis nisi\r" + "molestie ut. Quisque mi mi, commodo ut mattis a,\r" + @@ -264,9 +298,9 @@ func Test_paragraph(t *testing.T) { )}, want: [][]byte{ []byte( - "Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n" + - "Praesent eleifend lorem non purus finibus, interdum\n" + - "hendrerit sem bibendum.", + "Lorem ipsum dolor sit amet, consectetur adipiscing\n" + + "elit.Praesent eleifend lorem non purus finibus,\n" + + "interdum hendrerit sem bibendum.", ), []byte( "Etiam porttitor mollis nulla, egestas facilisis nisi\n" +