feat: highlighted code search results (#4749)

closes #4534

<details>
<summary>Screenshots</summary>

![](https://codeberg.org/attachments/0ab8a7b0-6485-46dc-a730-c016abb1f287)
</details>

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/4749
Reviewed-by: 0ko <0ko@noreply.codeberg.org>
Co-authored-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
Co-committed-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
This commit is contained in:
Shiny Nematoda 2024-08-06 05:57:25 +00:00 committed by Earl Warren
parent 517637137c
commit 06d2e90fa4
10 changed files with 214 additions and 75 deletions

View File

@ -1,4 +1,5 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// Copyright 2024 The Forgejo Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
@ -19,9 +20,10 @@ import (
)
type GrepResult struct {
Filename string
LineNumbers []int
LineCodes []string
Filename string
LineNumbers []int
LineCodes []string
HighlightedRanges [][3]int
}
type GrepOptions struct {
@ -33,6 +35,13 @@ type GrepOptions struct {
PathSpec []setting.Glob
}
func hasPrefixFold(s, t string) bool {
if len(s) < len(t) {
return false
}
return strings.EqualFold(s[:len(t)], t)
}
func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepOptions) ([]*GrepResult, error) {
stdoutReader, stdoutWriter, err := os.Pipe()
if err != nil {
@ -53,18 +62,19 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
2^@repo: go-gitea/gitea
*/
var results []*GrepResult
cmd := NewCommand(ctx, "grep", "--null", "--break", "--heading", "--fixed-strings", "--line-number", "--ignore-case", "--full-name")
cmd := NewCommand(ctx, "grep",
"--null", "--break", "--heading", "--column",
"--fixed-strings", "--line-number", "--ignore-case", "--full-name")
cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber))
if opts.MatchesPerFile > 0 {
cmd.AddOptionValues("--max-count", fmt.Sprint(opts.MatchesPerFile))
}
words := []string{search}
if opts.IsFuzzy {
words := strings.Fields(search)
for _, word := range words {
cmd.AddOptionValues("-e", strings.TrimLeft(word, "-"))
}
} else {
cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
words = strings.Fields(search)
}
for _, word := range words {
cmd.AddOptionValues("-e", strings.TrimLeft(word, "-"))
}
// pathspec
@ -128,6 +138,24 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
if lineNum, lineCode, ok := strings.Cut(line, "\x00"); ok {
lineNumInt, _ := strconv.Atoi(lineNum)
res.LineNumbers = append(res.LineNumbers, lineNumInt)
if lineCol, lineCode2, ok := strings.Cut(lineCode, "\x00"); ok {
lineColInt, _ := strconv.Atoi(lineCol)
start := lineColInt - 1
matchLen := len(lineCode2)
for _, word := range words {
if hasPrefixFold(lineCode2[start:], word) {
matchLen = len(word)
break
}
}
res.HighlightedRanges = append(res.HighlightedRanges, [3]int{
len(res.LineCodes),
start,
start + matchLen,
})
res.LineCodes = append(res.LineCodes, lineCode2)
continue
}
res.LineCodes = append(res.LineCodes, lineCode)
}
}

View File

@ -20,28 +20,43 @@ func TestGrepSearch(t *testing.T) {
require.NoError(t, err)
defer repo.Close()
res, err := GrepSearch(context.Background(), repo, "void", GrepOptions{})
res, err := GrepSearch(context.Background(), repo, "public", GrepOptions{})
require.NoError(t, err)
assert.Equal(t, []*GrepResult{
{
Filename: "java-hello/main.java",
LineNumbers: []int{3},
LineCodes: []string{" public static void main(String[] args)"},
LineNumbers: []int{1, 3},
LineCodes: []string{
"public class HelloWorld",
" public static void main(String[] args)",
},
HighlightedRanges: [][3]int{{0, 0, 6}, {1, 1, 7}},
},
{
Filename: "main.vendor.java",
LineNumbers: []int{3},
LineCodes: []string{" public static void main(String[] args)"},
LineNumbers: []int{1, 3},
LineCodes: []string{
"public class HelloWorld",
" public static void main(String[] args)",
},
HighlightedRanges: [][3]int{{0, 0, 6}, {1, 1, 7}},
},
}, res)
res, err = GrepSearch(context.Background(), repo, "void", GrepOptions{MaxResultLimit: 1})
res, err = GrepSearch(context.Background(), repo, "void", GrepOptions{MaxResultLimit: 1, ContextLineNumber: 2})
require.NoError(t, err)
assert.Equal(t, []*GrepResult{
{
Filename: "java-hello/main.java",
LineNumbers: []int{3},
LineCodes: []string{" public static void main(String[] args)"},
LineNumbers: []int{1, 2, 3, 4, 5},
LineCodes: []string{
"public class HelloWorld",
"{",
" public static void main(String[] args)",
" {",
" System.out.println(\"Hello world!\");",
},
HighlightedRanges: [][3]int{{2, 15, 19}},
},
}, res)
@ -49,24 +64,28 @@ func TestGrepSearch(t *testing.T) {
require.NoError(t, err)
assert.Equal(t, []*GrepResult{
{
Filename: "i-am-a-python.p",
LineNumbers: []int{1},
LineCodes: []string{"## This is a simple file to do a hello world"},
Filename: "i-am-a-python.p",
LineNumbers: []int{1},
LineCodes: []string{"## This is a simple file to do a hello world"},
HighlightedRanges: [][3]int{{0, 39, 44}},
},
{
Filename: "java-hello/main.java",
LineNumbers: []int{1},
LineCodes: []string{"public class HelloWorld"},
Filename: "java-hello/main.java",
LineNumbers: []int{1},
LineCodes: []string{"public class HelloWorld"},
HighlightedRanges: [][3]int{{0, 18, 23}},
},
{
Filename: "main.vendor.java",
LineNumbers: []int{1},
LineCodes: []string{"public class HelloWorld"},
Filename: "main.vendor.java",
LineNumbers: []int{1},
LineCodes: []string{"public class HelloWorld"},
HighlightedRanges: [][3]int{{0, 18, 23}},
},
{
Filename: "python-hello/hello.py",
LineNumbers: []int{1},
LineCodes: []string{"## This is a simple file to do a hello world"},
Filename: "python-hello/hello.py",
LineNumbers: []int{1},
LineCodes: []string{"## This is a simple file to do a hello world"},
HighlightedRanges: [][3]int{{0, 39, 44}},
},
}, res)

View File

@ -12,6 +12,7 @@ import (
"code.gitea.io/gitea/modules/highlight"
"code.gitea.io/gitea/modules/indexer/code/internal"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/services/gitdiff"
)
// Result a search result to display
@ -70,11 +71,85 @@ func writeStrings(buf *bytes.Buffer, strs ...string) error {
return nil
}
func HighlightSearchResultCode(filename string, lineNums []int, code string) []ResultLine {
const (
highlightTagStart = "<span class=\"search-highlight\">"
highlightTagEnd = "</span>"
)
func HighlightSearchResultCode(filename string, lineNums []int, highlightRanges [][3]int, code string) []ResultLine {
hcd := gitdiff.NewHighlightCodeDiff()
hcd.CollectUsedRunes(code)
startTag, endTag := hcd.NextPlaceholder(), hcd.NextPlaceholder()
hcd.PlaceholderTokenMap[startTag] = highlightTagStart
hcd.PlaceholderTokenMap[endTag] = highlightTagEnd
// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
hl, _ := highlight.Code(filename, "", code)
highlightedLines := strings.Split(string(hl), "\n")
conv := hcd.ConvertToPlaceholders(string(hl))
convLines := strings.Split(conv, "\n")
// each highlightRange is of the form [line number, start pos, end pos]
for _, highlightRange := range highlightRanges {
ln, start, end := highlightRange[0], highlightRange[1], highlightRange[2]
line := convLines[ln]
if line == "" || len(line) <= start || len(line) < end {
continue
}
sb := strings.Builder{}
count := -1
isOpen := false
for _, r := range line {
if token, ok := hcd.PlaceholderTokenMap[r];
// token was not found
!ok ||
// token was marked as used
token == "" ||
// the token is not an valid html tag emited by chroma
!(len(token) > 6 && (token[0:5] == "<span" || token[0:6] == "</span")) {
count++
} else if !isOpen {
// open the tag only after all other placeholders
sb.WriteRune(r)
continue
} else if isOpen && count < end {
// if the tag is open, but a placeholder exists in between
// close the tag
sb.WriteRune(endTag)
// write the placeholder
sb.WriteRune(r)
// reopen the tag
sb.WriteRune(startTag)
continue
}
switch count {
case end:
// if tag is not open, no need to close
if !isOpen {
break
}
sb.WriteRune(endTag)
isOpen = false
case start:
// if tag is open, do not open again
if isOpen {
break
}
isOpen = true
sb.WriteRune(startTag)
}
sb.WriteRune(r)
}
if isOpen {
sb.WriteRune(endTag)
}
convLines[ln] = sb.String()
}
conv = strings.Join(convLines, "\n")
highlightedLines := strings.Split(hcd.Recover(conv), "\n")
// The lineNums outputted by highlight.Code might not match the original lineNums, because "highlight" removes the last `\n`
lines := make([]ResultLine, min(len(highlightedLines), len(lineNums)))
for i := 0; i < len(lines); i++ {
@ -92,6 +167,7 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
contentLines := strings.SplitAfter(result.Content[startIndex:endIndex], "\n")
lineNums := make([]int, 0, len(contentLines))
index := startIndex
var highlightRanges [][3]int
for i, line := range contentLines {
var err error
if index < result.EndIndex &&
@ -99,6 +175,7 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
result.StartIndex < result.EndIndex {
openActiveIndex := max(result.StartIndex-index, 0)
closeActiveIndex := min(result.EndIndex-index, len(line))
highlightRanges = append(highlightRanges, [3]int{i, openActiveIndex, closeActiveIndex})
err = writeStrings(&formattedLinesBuffer,
line[:openActiveIndex],
line[openActiveIndex:closeActiveIndex],
@ -122,7 +199,7 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
UpdatedUnix: result.UpdatedUnix,
Language: result.Language,
Color: result.Color,
Lines: HighlightSearchResultCode(result.Filename, lineNums, formattedLinesBuffer.String()),
Lines: HighlightSearchResultCode(result.Filename, lineNums, highlightRanges, formattedLinesBuffer.String()),
}, nil
}

View File

@ -85,7 +85,7 @@ func Search(ctx *context.Context) {
// UpdatedUnix: not supported yet
// Language: not supported yet
// Color: not supported yet
Lines: code_indexer.HighlightSearchResultCode(r.Filename, r.LineNumbers, strings.Join(r.LineCodes, "\n")),
Lines: code_indexer.HighlightSearchResultCode(r.Filename, r.LineNumbers, r.HighlightedRanges, strings.Join(r.LineCodes, "\n")),
})
}
}

View File

@ -337,7 +337,7 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, loc
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content, locale)
}
hcd := newHighlightCodeDiff()
hcd := NewHighlightCodeDiff()
diffRecord := hcd.diffWithHighlight(diffSection.FileName, language, diff1[1:], diff2[1:])
// it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back
// if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)"

View File

@ -31,17 +31,17 @@ func extractHTMLToken(s string) (before, token, after string, valid bool) {
return "", "", s, true
}
// highlightCodeDiff is used to do diff with highlighted HTML code.
// HighlightCodeDiff is used to do diff with highlighted HTML code.
// It totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes.
// The HTML tags and entities will be replaced by Unicode placeholders: "<span>{TEXT}</span>" => "\uE000{TEXT}\uE001"
// These Unicode placeholders are friendly to the diff.
// Then after diff, the placeholders in diff result will be recovered to the HTML tags and entities.
// It's guaranteed that the tags in final diff result are paired correctly.
type highlightCodeDiff struct {
type HighlightCodeDiff struct {
placeholderBegin rune
placeholderMaxCount int
placeholderIndex int
placeholderTokenMap map[rune]string
PlaceholderTokenMap map[rune]string
tokenPlaceholderMap map[string]rune
placeholderOverflowCount int
@ -49,52 +49,52 @@ type highlightCodeDiff struct {
lineWrapperTags []string
}
func newHighlightCodeDiff() *highlightCodeDiff {
return &highlightCodeDiff{
func NewHighlightCodeDiff() *HighlightCodeDiff {
return &HighlightCodeDiff{
placeholderBegin: rune(0x100000), // Plane 16: Supplementary Private Use Area B (U+100000..U+10FFFD)
placeholderMaxCount: 64000,
placeholderTokenMap: map[rune]string{},
PlaceholderTokenMap: map[rune]string{},
tokenPlaceholderMap: map[string]rune{},
}
}
// nextPlaceholder returns 0 if no more placeholder can be used
// NextPlaceholder returns 0 if no more placeholder can be used
// the diff is done line by line, usually there are only a few (no more than 10) placeholders in one line
// so the placeholderMaxCount is impossible to be exhausted in real cases.
func (hcd *highlightCodeDiff) nextPlaceholder() rune {
func (hcd *HighlightCodeDiff) NextPlaceholder() rune {
for hcd.placeholderIndex < hcd.placeholderMaxCount {
r := hcd.placeholderBegin + rune(hcd.placeholderIndex)
hcd.placeholderIndex++
// only use non-existing (not used by code) rune as placeholders
if _, ok := hcd.placeholderTokenMap[r]; !ok {
if _, ok := hcd.PlaceholderTokenMap[r]; !ok {
return r
}
}
return 0 // no more available placeholder
}
func (hcd *highlightCodeDiff) isInPlaceholderRange(r rune) bool {
func (hcd *HighlightCodeDiff) isInPlaceholderRange(r rune) bool {
return hcd.placeholderBegin <= r && r < hcd.placeholderBegin+rune(hcd.placeholderMaxCount)
}
func (hcd *highlightCodeDiff) collectUsedRunes(code string) {
func (hcd *HighlightCodeDiff) CollectUsedRunes(code string) {
for _, r := range code {
if hcd.isInPlaceholderRange(r) {
// put the existing rune (used by code) in map, then this rune won't be used a placeholder anymore.
hcd.placeholderTokenMap[r] = ""
hcd.PlaceholderTokenMap[r] = ""
}
}
}
func (hcd *highlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB string) []diffmatchpatch.Diff {
hcd.collectUsedRunes(codeA)
hcd.collectUsedRunes(codeB)
func (hcd *HighlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB string) []diffmatchpatch.Diff {
hcd.CollectUsedRunes(codeA)
hcd.CollectUsedRunes(codeB)
highlightCodeA, _ := highlight.Code(filename, language, codeA)
highlightCodeB, _ := highlight.Code(filename, language, codeB)
convertedCodeA := hcd.convertToPlaceholders(string(highlightCodeA))
convertedCodeB := hcd.convertToPlaceholders(string(highlightCodeB))
convertedCodeA := hcd.ConvertToPlaceholders(string(highlightCodeA))
convertedCodeB := hcd.ConvertToPlaceholders(string(highlightCodeB))
diffs := diffMatchPatch.DiffMain(convertedCodeA, convertedCodeB, true)
diffs = diffMatchPatch.DiffCleanupEfficiency(diffs)
@ -106,7 +106,7 @@ func (hcd *highlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB
}
// convertToPlaceholders totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes.
func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string {
func (hcd *HighlightCodeDiff) ConvertToPlaceholders(htmlCode string) string {
var tagStack []string
res := strings.Builder{}
@ -153,10 +153,10 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string {
// remember the placeholder and token in the map
placeholder, ok := hcd.tokenPlaceholderMap[tokenInMap]
if !ok {
placeholder = hcd.nextPlaceholder()
placeholder = hcd.NextPlaceholder()
if placeholder != 0 {
hcd.tokenPlaceholderMap[tokenInMap] = placeholder
hcd.placeholderTokenMap[placeholder] = tokenInMap
hcd.PlaceholderTokenMap[placeholder] = tokenInMap
}
}
@ -179,12 +179,16 @@ func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string {
return res.String()
}
func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) {
func (hcd *HighlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) {
diff.Text = hcd.Recover(diff.Text)
}
func (hcd *HighlightCodeDiff) Recover(src string) string {
sb := strings.Builder{}
var tagStack []string
for _, r := range diff.Text {
token, ok := hcd.placeholderTokenMap[r]
for _, r := range src {
token, ok := hcd.PlaceholderTokenMap[r]
if !ok || token == "" {
sb.WriteRune(r) // if the rune is not a placeholder, write it as it is
continue
@ -218,5 +222,5 @@ func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) {
}
}
diff.Text = sb.String()
return sb.String()
}

View File

@ -13,7 +13,7 @@ import (
)
func TestDiffWithHighlight(t *testing.T) {
hcd := newHighlightCodeDiff()
hcd := NewHighlightCodeDiff()
diffs := hcd.diffWithHighlight(
"main.v", "",
" run('<>')\n",
@ -28,9 +28,9 @@ func TestDiffWithHighlight(t *testing.T) {
output = diffToHTML(nil, diffs, DiffLineAdd)
assert.Equal(t, expected, output)
hcd = newHighlightCodeDiff()
hcd.placeholderTokenMap['O'] = "<span>"
hcd.placeholderTokenMap['C'] = "</span>"
hcd = NewHighlightCodeDiff()
hcd.PlaceholderTokenMap['O'] = "<span>"
hcd.PlaceholderTokenMap['C'] = "</span>"
diff := diffmatchpatch.Diff{}
diff.Text = "OC"
@ -47,20 +47,20 @@ func TestDiffWithHighlight(t *testing.T) {
}
func TestDiffWithHighlightPlaceholder(t *testing.T) {
hcd := newHighlightCodeDiff()
hcd := NewHighlightCodeDiff()
diffs := hcd.diffWithHighlight(
"main.js", "",
"a='\U00100000'",
"a='\U0010FFFD''",
)
assert.Equal(t, "", hcd.placeholderTokenMap[0x00100000])
assert.Equal(t, "", hcd.placeholderTokenMap[0x0010FFFD])
assert.Equal(t, "", hcd.PlaceholderTokenMap[0x00100000])
assert.Equal(t, "", hcd.PlaceholderTokenMap[0x0010FFFD])
expected := fmt.Sprintf(`<span class="nx">a</span><span class="o">=</span><span class="s1">&#39;</span><span class="removed-code">%s</span>&#39;`, "\U00100000")
output := diffToHTML(hcd.lineWrapperTags, diffs, DiffLineDel)
assert.Equal(t, expected, output)
hcd = newHighlightCodeDiff()
hcd = NewHighlightCodeDiff()
diffs = hcd.diffWithHighlight(
"main.js", "",
"a='\U00100000'",
@ -72,7 +72,7 @@ func TestDiffWithHighlightPlaceholder(t *testing.T) {
}
func TestDiffWithHighlightPlaceholderExhausted(t *testing.T) {
hcd := newHighlightCodeDiff()
hcd := NewHighlightCodeDiff()
hcd.placeholderMaxCount = 0
diffs := hcd.diffWithHighlight(
"main.js", "",
@ -83,7 +83,7 @@ func TestDiffWithHighlightPlaceholderExhausted(t *testing.T) {
expected := fmt.Sprintf(`<span class="removed-code">%s#39;</span>`, "\uFFFD")
assert.Equal(t, expected, output)
hcd = newHighlightCodeDiff()
hcd = NewHighlightCodeDiff()
hcd.placeholderMaxCount = 0
diffs = hcd.diffWithHighlight(
"main.js", "",
@ -102,7 +102,7 @@ func TestDiffWithHighlightPlaceholderExhausted(t *testing.T) {
func TestDiffWithHighlightTagMatch(t *testing.T) {
totalOverflow := 0
for i := 0; i < 100; i++ {
hcd := newHighlightCodeDiff()
hcd := NewHighlightCodeDiff()
hcd.placeholderMaxCount = i
diffs := hcd.diffWithHighlight(
"main.js", "",

View File

@ -8,6 +8,7 @@ import (
"code.gitea.io/gitea/modules/test"
"code.gitea.io/gitea/tests"
"github.com/PuerkitoBio/goquery"
"github.com/stretchr/testify/assert"
)
@ -15,11 +16,16 @@ func TestExploreCodeSearchIndexer(t *testing.T) {
defer tests.PrepareTestEnv(t)()
defer test.MockVariableValue(&setting.Indexer.RepoIndexerEnabled, true)()
req := NewRequest(t, "GET", "/explore/code")
req := NewRequest(t, "GET", "/explore/code?q=file&fuzzy=true")
resp := MakeRequest(t, req, http.StatusOK)
doc := NewHTMLParser(t, resp.Body).Find(".explore")
doc := NewHTMLParser(t, resp.Body)
msg := doc.Find(".explore").Find(".ui.container").Find(".ui.message[data-test-tag=grep]")
msg := doc.
Find(".ui.container").
Find(".ui.message[data-test-tag=grep]")
assert.EqualValues(t, 0, msg.Length())
assert.Empty(t, msg.Nodes)
doc.Find(".file-body").Each(func(i int, sel *goquery.Selection) {
assert.Positive(t, sel.Find(".code-inner").Find(".search-highlight").Length(), 0)
})
}

View File

@ -27,7 +27,8 @@ func resultFilenames(t testing.TB, doc *HTMLDoc) []string {
result := make([]string, resultSelections.Length())
resultSelections.Each(func(i int, selection *goquery.Selection) {
assert.Positive(t, resultSelections.Find("div ol li").Length(), 0)
assert.Positive(t, selection.Find("div ol li").Length(), 0)
assert.Positive(t, selection.Find(".code-inner").Find(".search-highlight").Length(), 0)
result[i] = selection.
Find(".header").
Find("span.file a.file-link").

View File

@ -1752,6 +1752,10 @@ td .commit-summary {
color: inherit;
}
.search-highlight {
background: var(--color-primary-alpha-40);
}
.repository.quickstart .guide .item {
padding: 1em;
}