@@ -10,6 +10,7 @@ import (
1010 "bytes"
1111 "fmt"
1212 gohtml "html"
13+ "io"
1314 "path/filepath"
1415 "strings"
1516 "sync"
@@ -26,7 +27,13 @@ import (
2627)
2728
2829// don't index files larger than this many bytes for performance purposes
29- const sizeLimit = 1000000
30+ const sizeLimit = 1024 * 1024
31+
32+ // newLineInHTML is the HTML entity to be used for newline in HTML content, if it's empty then the original "\n" is kept
33+ // this option is here for 2 purposes:
34+ // (1) make it easier to switch back to the original "\n" if there is any compatibility issue in the future
35+ // (2) make it clear to do tests: " " is the real newline for rendering, '\n' is ignorable/trim-able and could be ignored
36+ var newLineInHTML = " "
3037
3138var (
3239 // For custom user mapping
@@ -46,7 +53,6 @@ func NewContext() {
4653 highlightMapping [keys [i ].Name ()] = keys [i ].Value ()
4754 }
4855 }
49-
5056 // The size 512 is simply a conservative rule of thumb
5157 c , err := lru .New2Q (512 )
5258 if err != nil {
@@ -60,7 +66,7 @@ func NewContext() {
6066func Code (fileName , language , code string ) string {
6167 NewContext ()
6268
63- // diff view newline will be passed as empty, change to literal \n so it can be copied
69+ // diff view newline will be passed as empty, change to literal '\n' so it can be copied
6470 // preserve literal newline in blame view
6571 if code == "" || code == "\n " {
6672 return "\n "
@@ -128,36 +134,32 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string {
128134 return code
129135 }
130136
131- htmlw .Flush ()
137+ _ = htmlw .Flush ()
132138 // Chroma will add newlines for certain lexers in order to highlight them properly
133- // Once highlighted, strip them here so they don't cause copy/paste trouble in HTML output
139+ // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
134140 return strings .TrimSuffix (htmlbuf .String (), "\n " )
135141}
136142
137- // File returns a slice of chroma syntax highlighted lines of code
138- func File (numLines int , fileName , language string , code []byte ) []string {
143+ // File returns a slice of chroma syntax highlighted HTML lines of code
144+ func File (fileName , language string , code []byte ) ( []string , error ) {
139145 NewContext ()
140146
141147 if len (code ) > sizeLimit {
142- return plainText ( string ( code ), numLines )
148+ return PlainText ( code ), nil
143149 }
150+
144151 formatter := html .New (html .WithClasses (true ),
145152 html .WithLineNumbers (false ),
146153 html .PreventSurroundingPre (true ),
147154 )
148155
149- if formatter == nil {
150- log .Error ("Couldn't create chroma formatter" )
151- return plainText (string (code ), numLines )
152- }
153-
154- htmlbuf := bytes.Buffer {}
155- htmlw := bufio .NewWriter (& htmlbuf )
156+ htmlBuf := bytes.Buffer {}
157+ htmlWriter := bufio .NewWriter (& htmlBuf )
156158
157159 var lexer chroma.Lexer
158160
159161 // provided language overrides everything
160- if len ( language ) > 0 {
162+ if language != "" {
161163 lexer = lexers .Get (language )
162164 }
163165
@@ -168,9 +170,9 @@ func File(numLines int, fileName, language string, code []byte) []string {
168170 }
169171
170172 if lexer == nil {
171- language := analyze .GetCodeLanguage (fileName , code )
173+ guessLanguage := analyze .GetCodeLanguage (fileName , code )
172174
173- lexer = lexers .Get (language )
175+ lexer = lexers .Get (guessLanguage )
174176 if lexer == nil {
175177 lexer = lexers .Match (fileName )
176178 if lexer == nil {
@@ -181,54 +183,92 @@ func File(numLines int, fileName, language string, code []byte) []string {
181183
182184 iterator , err := lexer .Tokenise (nil , string (code ))
183185 if err != nil {
184- log .Error ("Can't tokenize code: %v" , err )
185- return plainText (string (code ), numLines )
186+ return nil , fmt .Errorf ("can't tokenize code: %w" , err )
186187 }
187188
188- err = formatter .Format (htmlw , styles .GitHub , iterator )
189+ err = formatter .Format (htmlWriter , styles .GitHub , iterator )
189190 if err != nil {
190- log .Error ("Can't format code: %v" , err )
191- return plainText (string (code ), numLines )
191+ return nil , fmt .Errorf ("can't format code: %w" , err )
192192 }
193193
194- htmlw .Flush ()
195- finalNewLine := false
196- if len (code ) > 0 {
197- finalNewLine = code [len (code )- 1 ] == '\n'
198- }
194+ _ = htmlWriter .Flush ()
195+
196+ m := make ([]string , 0 , bytes .Count (code , []byte {'\n' })+ 1 )
199197
200- m := make ([]string , 0 , numLines )
201- for _ , v := range strings .SplitN (htmlbuf .String (), "\n " , numLines ) {
202- content := v
203- // need to keep lines that are only \n so copy/paste works properly in browser
204- if content == "" {
205- content = "\n "
206- } else if content == `</span><span class="w">` {
207- content += "\n </span>"
208- } else if content == `</span></span><span class="line"><span class="cl">` {
209- content += "\n "
198+ htmlStr := htmlBuf .String ()
199+ line := strings.Builder {}
200+ insideLine := 0 // every <span class="cl"> makes it increase one level, every closed <span class="cl"> makes it decrease one level
201+ tagStack := make ([]string , 0 , 4 )
202+ for len (htmlStr ) > 0 {
203+ pos1 := strings .IndexByte (htmlStr , '<' )
204+ pos2 := strings .IndexByte (htmlStr , '>' )
205+ if pos1 == - 1 || pos2 == - 1 || pos1 > pos2 {
206+ break
210207 }
211- content = strings .TrimSuffix (content , `<span class="w">` )
212- content = strings .TrimPrefix (content , `</span>` )
213- m = append (m , content )
208+ tag := htmlStr [pos1 : pos2 + 1 ]
209+ if insideLine > 0 {
210+ line .WriteString (htmlStr [:pos1 ])
211+ }
212+ if tag [1 ] == '/' {
213+ if len (tagStack ) == 0 {
214+ return nil , fmt .Errorf ("can't find matched tag: %q" , tag )
215+ }
216+ popped := tagStack [len (tagStack )- 1 ]
217+ tagStack = tagStack [:len (tagStack )- 1 ]
218+ if popped == `<span class="cl">` {
219+ insideLine --
220+ lineStr := line .String ()
221+ if newLineInHTML != "" && lineStr != "" && lineStr [len (lineStr )- 1 ] == '\n' {
222+ lineStr = lineStr [:len (lineStr )- 1 ] + newLineInHTML
223+ }
224+ m = append (m , lineStr )
225+ line = strings.Builder {}
226+ }
227+ if insideLine > 0 {
228+ line .WriteString (tag )
229+ }
230+ } else {
231+ tagStack = append (tagStack , tag )
232+ if insideLine > 0 {
233+ line .WriteString (tag )
234+ }
235+ if tag == `<span class="cl">` {
236+ insideLine ++
237+ }
238+ }
239+ htmlStr = htmlStr [pos2 + 1 :]
214240 }
215- if finalNewLine {
216- m = append (m , "<span class=\" w\" >\n </span>" )
241+
242+ if len (m ) == 0 {
243+ m = append (m , "" ) // maybe we do not want to return 0 lines
217244 }
218245
219- return m
246+ return m , nil
220247}
221248
222- // return unhiglighted map
223- func plainText (code string , numLines int ) []string {
224- m := make ([]string , 0 , numLines )
225- for _ , v := range strings .SplitN (code , "\n " , numLines ) {
226- content := v
227- // need to keep lines that are only \n so copy/paste works properly in browser
228- if content == "" {
229- content = "\n "
249+ // PlainText returns non-highlighted HTML for code
250+ func PlainText (code []byte ) []string {
251+ r := bufio .NewReader (bytes .NewReader (code ))
252+ m := make ([]string , 0 , bytes .Count (code , []byte {'\n' })+ 1 )
253+ for {
254+ content , err := r .ReadString ('\n' )
255+ if err != nil && err != io .EOF {
256+ log .Error ("failed to read string from buffer: %v" , err )
257+ break
258+ }
259+ if content == "" && err == io .EOF {
260+ break
261+ }
262+ s := gohtml .EscapeString (content )
263+ if newLineInHTML != "" && s != "" && s [len (s )- 1 ] == '\n' {
264+ s = s [:len (s )- 1 ] + newLineInHTML
230265 }
231- m = append (m , gohtml . EscapeString ( content ) )
266+ m = append (m , s )
232267 }
268+
269+ if len (m ) == 0 {
270+ m = append (m , "" ) // maybe we do not want to return 0 lines
271+ }
272+
233273 return m
234274}
0 commit comments