summaryrefslogtreecommitdiff
path: root/workhorse/internal/lsif_transformer/parser
diff options
context:
space:
mode:
Diffstat (limited to 'workhorse/internal/lsif_transformer/parser')
-rw-r--r--workhorse/internal/lsif_transformer/parser/cache.go56
-rw-r--r--workhorse/internal/lsif_transformer/parser/cache_test.go33
-rw-r--r--workhorse/internal/lsif_transformer/parser/code_hover.go124
-rw-r--r--workhorse/internal/lsif_transformer/parser/code_hover_test.go106
-rw-r--r--workhorse/internal/lsif_transformer/parser/docs.go144
-rw-r--r--workhorse/internal/lsif_transformer/parser/docs_test.go54
-rw-r--r--workhorse/internal/lsif_transformer/parser/errors.go30
-rw-r--r--workhorse/internal/lsif_transformer/parser/errors_test.go26
-rw-r--r--workhorse/internal/lsif_transformer/parser/hovers.go162
-rw-r--r--workhorse/internal/lsif_transformer/parser/hovers_test.go30
-rw-r--r--workhorse/internal/lsif_transformer/parser/id.go52
-rw-r--r--workhorse/internal/lsif_transformer/parser/id_test.go28
-rw-r--r--workhorse/internal/lsif_transformer/parser/parser.go109
-rw-r--r--workhorse/internal/lsif_transformer/parser/parser_test.go80
-rw-r--r--workhorse/internal/lsif_transformer/parser/performance_test.go47
-rw-r--r--workhorse/internal/lsif_transformer/parser/ranges.go214
-rw-r--r--workhorse/internal/lsif_transformer/parser/ranges_test.go61
-rw-r--r--workhorse/internal/lsif_transformer/parser/references.go107
-rw-r--r--workhorse/internal/lsif_transformer/parser/references_test.go44
-rw-r--r--workhorse/internal/lsif_transformer/parser/testdata/dump.lsif.zipbin0 -> 2023 bytes
-rw-r--r--workhorse/internal/lsif_transformer/parser/testdata/expected/lsif/main.go.json208
-rw-r--r--workhorse/internal/lsif_transformer/parser/testdata/expected/lsif/morestrings/reverse.go.json249
-rw-r--r--workhorse/internal/lsif_transformer/parser/testdata/workhorse.lsif.zipbin0 -> 2120741 bytes
23 files changed, 1964 insertions, 0 deletions
diff --git a/workhorse/internal/lsif_transformer/parser/cache.go b/workhorse/internal/lsif_transformer/parser/cache.go
new file mode 100644
index 00000000000..395069cd217
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/cache.go
@@ -0,0 +1,56 @@
+package parser
+
+import (
+ "encoding/binary"
+ "io"
+ "io/ioutil"
+ "os"
+)
+
+// This cache implementation is using a temp file to provide key-value data storage
+// It allows to avoid storing intermediate calculations in RAM
+// The stored data must be a fixed-size value or a slice of fixed-size values, or a pointer to such data
+type cache struct {
+ file *os.File
+ chunkSize int64
+}
+
+func newCache(tempDir, filename string, data interface{}) (*cache, error) {
+ f, err := ioutil.TempFile(tempDir, filename)
+ if err != nil {
+ return nil, err
+ }
+
+ if err := os.Remove(f.Name()); err != nil {
+ return nil, err
+ }
+
+ return &cache{file: f, chunkSize: int64(binary.Size(data))}, nil
+}
+
+func (c *cache) SetEntry(id Id, data interface{}) error {
+ if err := c.setOffset(id); err != nil {
+ return err
+ }
+
+ return binary.Write(c.file, binary.LittleEndian, data)
+}
+
+func (c *cache) Entry(id Id, data interface{}) error {
+ if err := c.setOffset(id); err != nil {
+ return err
+ }
+
+ return binary.Read(c.file, binary.LittleEndian, data)
+}
+
+func (c *cache) Close() error {
+ return c.file.Close()
+}
+
+func (c *cache) setOffset(id Id) error {
+ offset := int64(id) * c.chunkSize
+ _, err := c.file.Seek(offset, io.SeekStart)
+
+ return err
+}
diff --git a/workhorse/internal/lsif_transformer/parser/cache_test.go b/workhorse/internal/lsif_transformer/parser/cache_test.go
new file mode 100644
index 00000000000..23a2ac6e9a9
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/cache_test.go
@@ -0,0 +1,33 @@
+package parser
+
+import (
+ "io/ioutil"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+type chunk struct {
+ A int16
+ B int16
+}
+
+func TestCache(t *testing.T) {
+ cache, err := newCache("", "test-chunks", chunk{})
+ require.NoError(t, err)
+ defer cache.Close()
+
+ c := chunk{A: 1, B: 2}
+ require.NoError(t, cache.SetEntry(1, &c))
+ require.NoError(t, cache.setOffset(0))
+
+ content, err := ioutil.ReadAll(cache.file)
+ require.NoError(t, err)
+
+ expected := []byte{0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x2, 0x0}
+ require.Equal(t, expected, content)
+
+ var nc chunk
+ require.NoError(t, cache.Entry(1, &nc))
+ require.Equal(t, c, nc)
+}
diff --git a/workhorse/internal/lsif_transformer/parser/code_hover.go b/workhorse/internal/lsif_transformer/parser/code_hover.go
new file mode 100644
index 00000000000..dbdaba643d1
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/code_hover.go
@@ -0,0 +1,124 @@
+package parser
+
+import (
+ "encoding/json"
+ "strings"
+ "unicode/utf8"
+
+ "github.com/alecthomas/chroma"
+ "github.com/alecthomas/chroma/lexers"
+)
+
+const maxValueSize = 250
+
+type token struct {
+ Class string `json:"class,omitempty"`
+ Value string `json:"value"`
+}
+
+type codeHover struct {
+ TruncatedValue *truncatableString `json:"value,omitempty"`
+ Tokens [][]token `json:"tokens,omitempty"`
+ Language string `json:"language,omitempty"`
+ Truncated bool `json:"truncated,omitempty"`
+}
+
+type truncatableString struct {
+ Value string
+ Truncated bool
+}
+
+func (ts *truncatableString) UnmarshalText(b []byte) error {
+ s := 0
+ for i := 0; s < len(b); i++ {
+ if i >= maxValueSize {
+ ts.Truncated = true
+ break
+ }
+
+ _, size := utf8.DecodeRune(b[s:])
+
+ s += size
+ }
+
+ ts.Value = string(b[0:s])
+
+ return nil
+}
+
+func (ts *truncatableString) MarshalJSON() ([]byte, error) {
+ return json.Marshal(ts.Value)
+}
+
+func newCodeHover(content json.RawMessage) (*codeHover, error) {
+ // Hover value can be either an object: { "value": "func main()", "language": "go" }
+ // Or a string with documentation
+ // We try to unmarshal the content into a string and if we fail, we unmarshal it into an object
+ var c codeHover
+ if err := json.Unmarshal(content, &c.TruncatedValue); err != nil {
+ if err := json.Unmarshal(content, &c); err != nil {
+ return nil, err
+ }
+
+ c.setTokens()
+ }
+
+ c.Truncated = c.TruncatedValue.Truncated
+
+ if len(c.Tokens) > 0 {
+ c.TruncatedValue = nil // remove value for hovers which have tokens
+ }
+
+ return &c, nil
+}
+
+func (c *codeHover) setTokens() {
+ lexer := lexers.Get(c.Language)
+ if lexer == nil {
+ return
+ }
+
+ iterator, err := lexer.Tokenise(nil, c.TruncatedValue.Value)
+ if err != nil {
+ return
+ }
+
+ var tokenLines [][]token
+ for _, tokenLine := range chroma.SplitTokensIntoLines(iterator.Tokens()) {
+ var tokens []token
+ var rawToken string
+ for _, t := range tokenLine {
+ class := c.classFor(t.Type)
+
+ // accumulate consequent raw values in a single string to store them as
+ // [{ Class: "kd", Value: "func" }, { Value: " main() {" }] instead of
+ // [{ Class: "kd", Value: "func" }, { Value: " " }, { Value: "main" }, { Value: "(" }...]
+ if class == "" {
+ rawToken = rawToken + t.Value
+ } else {
+ if rawToken != "" {
+ tokens = append(tokens, token{Value: rawToken})
+ rawToken = ""
+ }
+
+ tokens = append(tokens, token{Class: class, Value: t.Value})
+ }
+ }
+
+ if rawToken != "" {
+ tokens = append(tokens, token{Value: rawToken})
+ }
+
+ tokenLines = append(tokenLines, tokens)
+ }
+
+ c.Tokens = tokenLines
+}
+
+func (c *codeHover) classFor(tokenType chroma.TokenType) string {
+ if strings.HasPrefix(tokenType.String(), "Keyword") || tokenType == chroma.String || tokenType == chroma.Comment {
+ return chroma.StandardTypes[tokenType]
+ }
+
+ return ""
+}
diff --git a/workhorse/internal/lsif_transformer/parser/code_hover_test.go b/workhorse/internal/lsif_transformer/parser/code_hover_test.go
new file mode 100644
index 00000000000..2030e530155
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/code_hover_test.go
@@ -0,0 +1,106 @@
+package parser
+
+import (
+ "encoding/json"
+ "fmt"
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestHighlight(t *testing.T) {
+ tests := []struct {
+ name string
+ language string
+ value string
+ want [][]token
+ }{
+ {
+ name: "go function definition",
+ language: "go",
+ value: "func main()",
+ want: [][]token{{{Class: "kd", Value: "func"}, {Value: " main()"}}},
+ },
+ {
+ name: "go struct definition",
+ language: "go",
+ value: "type Command struct",
+ want: [][]token{{{Class: "kd", Value: "type"}, {Value: " Command "}, {Class: "kd", Value: "struct"}}},
+ },
+ {
+ name: "go struct multiline definition",
+ language: "go",
+ value: `struct {\nConfig *Config\nReadWriter *ReadWriter\nEOFSent bool\n}`,
+ want: [][]token{
+ {{Class: "kd", Value: "struct"}, {Value: " {\n"}},
+ {{Value: "Config *Config\n"}},
+ {{Value: "ReadWriter *ReadWriter\n"}},
+ {{Value: "EOFSent "}, {Class: "kt", Value: "bool"}, {Value: "\n"}},
+ {{Value: "}"}},
+ },
+ },
+ {
+ name: "ruby method definition",
+ language: "ruby",
+ value: "def read(line)",
+ want: [][]token{{{Class: "k", Value: "def"}, {Value: " read(line)"}}},
+ },
+ {
+ name: "ruby multiline method definition",
+ language: "ruby",
+ value: `def read(line)\nend`,
+ want: [][]token{
+ {{Class: "k", Value: "def"}, {Value: " read(line)\n"}},
+ {{Class: "k", Value: "end"}},
+ },
+ },
+ {
+ name: "unknown/malicious language is passed",
+ language: "<lang> alert(1); </lang>",
+ value: `def a;\nend`,
+ want: [][]token(nil),
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ raw := []byte(fmt.Sprintf(`{"language":"%s","value":"%s"}`, tt.language, tt.value))
+ c, err := newCodeHover(json.RawMessage(raw))
+
+ require.NoError(t, err)
+ require.Equal(t, tt.want, c.Tokens)
+ })
+ }
+}
+
+func TestMarkdown(t *testing.T) {
+ value := `"This method reverses a string \n\n"`
+ c, err := newCodeHover(json.RawMessage(value))
+
+ require.NoError(t, err)
+ require.Equal(t, "This method reverses a string \n\n", c.TruncatedValue.Value)
+}
+
+func TestTruncatedValue(t *testing.T) {
+ value := strings.Repeat("a", 500)
+ rawValue, err := json.Marshal(value)
+ require.NoError(t, err)
+
+ c, err := newCodeHover(rawValue)
+ require.NoError(t, err)
+
+ require.Equal(t, value[0:maxValueSize], c.TruncatedValue.Value)
+ require.True(t, c.TruncatedValue.Truncated)
+}
+
+func TestTruncatingMultiByteChars(t *testing.T) {
+ value := strings.Repeat("ಅ", 500)
+ rawValue, err := json.Marshal(value)
+ require.NoError(t, err)
+
+ c, err := newCodeHover(rawValue)
+ require.NoError(t, err)
+
+ symbolSize := 3
+ require.Equal(t, value[0:maxValueSize*symbolSize], c.TruncatedValue.Value)
+}
diff --git a/workhorse/internal/lsif_transformer/parser/docs.go b/workhorse/internal/lsif_transformer/parser/docs.go
new file mode 100644
index 00000000000..c626e07d3fe
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/docs.go
@@ -0,0 +1,144 @@
+package parser
+
+import (
+ "archive/zip"
+ "bufio"
+ "encoding/json"
+ "io"
+ "strings"
+)
+
+const maxScanTokenSize = 1024 * 1024
+
+type Line struct {
+ Type string `json:"label"`
+}
+
+type Docs struct {
+ Root string
+ Entries map[Id]string
+ DocRanges map[Id][]Id
+ Ranges *Ranges
+}
+
+type Document struct {
+ Id Id `json:"id"`
+ Uri string `json:"uri"`
+}
+
+type DocumentRange struct {
+ OutV Id `json:"outV"`
+ RangeIds []Id `json:"inVs"`
+}
+
+type Metadata struct {
+ Root string `json:"projectRoot"`
+}
+
+func NewDocs(config Config) (*Docs, error) {
+ ranges, err := NewRanges(config)
+ if err != nil {
+ return nil, err
+ }
+
+ return &Docs{
+ Root: "file:///",
+ Entries: make(map[Id]string),
+ DocRanges: make(map[Id][]Id),
+ Ranges: ranges,
+ }, nil
+}
+
+func (d *Docs) Parse(r io.Reader) error {
+ scanner := bufio.NewScanner(r)
+ buf := make([]byte, 0, bufio.MaxScanTokenSize)
+ scanner.Buffer(buf, maxScanTokenSize)
+
+ for scanner.Scan() {
+ if err := d.process(scanner.Bytes()); err != nil {
+ return err
+ }
+ }
+
+ return scanner.Err()
+}
+
+func (d *Docs) process(line []byte) error {
+ l := Line{}
+ if err := json.Unmarshal(line, &l); err != nil {
+ return err
+ }
+
+ switch l.Type {
+ case "metaData":
+ if err := d.addMetadata(line); err != nil {
+ return err
+ }
+ case "document":
+ if err := d.addDocument(line); err != nil {
+ return err
+ }
+ case "contains":
+ if err := d.addDocRanges(line); err != nil {
+ return err
+ }
+ default:
+ return d.Ranges.Read(l.Type, line)
+ }
+
+ return nil
+}
+
+func (d *Docs) Close() error {
+ return d.Ranges.Close()
+}
+
+func (d *Docs) SerializeEntries(w *zip.Writer) error {
+ for id, path := range d.Entries {
+ filePath := Lsif + "/" + path + ".json"
+
+ f, err := w.Create(filePath)
+ if err != nil {
+ return err
+ }
+
+ if err := d.Ranges.Serialize(f, d.DocRanges[id], d.Entries); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (d *Docs) addMetadata(line []byte) error {
+ var metadata Metadata
+ if err := json.Unmarshal(line, &metadata); err != nil {
+ return err
+ }
+
+ d.Root = strings.TrimSpace(metadata.Root) + "/"
+
+ return nil
+}
+
+func (d *Docs) addDocument(line []byte) error {
+ var doc Document
+ if err := json.Unmarshal(line, &doc); err != nil {
+ return err
+ }
+
+ d.Entries[doc.Id] = strings.TrimPrefix(doc.Uri, d.Root)
+
+ return nil
+}
+
+func (d *Docs) addDocRanges(line []byte) error {
+ var docRange DocumentRange
+ if err := json.Unmarshal(line, &docRange); err != nil {
+ return err
+ }
+
+ d.DocRanges[docRange.OutV] = append(d.DocRanges[docRange.OutV], docRange.RangeIds...)
+
+ return nil
+}
diff --git a/workhorse/internal/lsif_transformer/parser/docs_test.go b/workhorse/internal/lsif_transformer/parser/docs_test.go
new file mode 100644
index 00000000000..57dca8e773d
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/docs_test.go
@@ -0,0 +1,54 @@
+package parser
+
+import (
+ "bytes"
+ "fmt"
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func createLine(id, label, uri string) []byte {
+ return []byte(fmt.Sprintf(`{"id":"%s","label":"%s","uri":"%s"}`+"\n", id, label, uri))
+}
+
+func TestParse(t *testing.T) {
+ d, err := NewDocs(Config{})
+ require.NoError(t, err)
+ defer d.Close()
+
+ data := []byte(`{"id":"1","label":"metaData","projectRoot":"file:///Users/nested"}` + "\n")
+ data = append(data, createLine("2", "document", "file:///Users/nested/file.rb")...)
+ data = append(data, createLine("3", "document", "file:///Users/nested/folder/file.rb")...)
+ data = append(data, createLine("4", "document", "file:///Users/wrong/file.rb")...)
+
+ require.NoError(t, d.Parse(bytes.NewReader(data)))
+
+ require.Equal(t, d.Entries[2], "file.rb")
+ require.Equal(t, d.Entries[3], "folder/file.rb")
+ require.Equal(t, d.Entries[4], "file:///Users/wrong/file.rb")
+}
+
+func TestParseContainsLine(t *testing.T) {
+ d, err := NewDocs(Config{})
+ require.NoError(t, err)
+ defer d.Close()
+
+ data := []byte(`{"id":"5","label":"contains","outV":"1", "inVs": ["2", "3"]}` + "\n")
+ data = append(data, []byte(`{"id":"6","label":"contains","outV":"1", "inVs": [4]}`+"\n")...)
+
+ require.NoError(t, d.Parse(bytes.NewReader(data)))
+
+ require.Equal(t, []Id{2, 3, 4}, d.DocRanges[1])
+}
+
+func TestParsingVeryLongLine(t *testing.T) {
+ d, err := NewDocs(Config{})
+ require.NoError(t, err)
+ defer d.Close()
+
+ line := []byte(`{"id": "` + strings.Repeat("a", 64*1024) + `"}`)
+
+ require.NoError(t, d.Parse(bytes.NewReader(line)))
+}
diff --git a/workhorse/internal/lsif_transformer/parser/errors.go b/workhorse/internal/lsif_transformer/parser/errors.go
new file mode 100644
index 00000000000..1040a789413
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/errors.go
@@ -0,0 +1,30 @@
+package parser
+
+import (
+ "errors"
+ "strings"
+)
+
+func combineErrors(errsOrNil ...error) error {
+ var errs []error
+ for _, err := range errsOrNil {
+ if err != nil {
+ errs = append(errs, err)
+ }
+ }
+
+ if len(errs) == 0 {
+ return nil
+ }
+
+ if len(errs) == 1 {
+ return errs[0]
+ }
+
+ var msgs []string
+ for _, err := range errs {
+ msgs = append(msgs, err.Error())
+ }
+
+ return errors.New(strings.Join(msgs, "\n"))
+}
diff --git a/workhorse/internal/lsif_transformer/parser/errors_test.go b/workhorse/internal/lsif_transformer/parser/errors_test.go
new file mode 100644
index 00000000000..31a7130d05e
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/errors_test.go
@@ -0,0 +1,26 @@
+package parser
+
+import (
+ "errors"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+type customErr struct {
+ err string
+}
+
+func (e customErr) Error() string {
+ return e.err
+}
+
+func TestCombineErrors(t *testing.T) {
+ err := combineErrors(nil, errors.New("first"), nil, customErr{"second"})
+ require.EqualError(t, err, "first\nsecond")
+
+ err = customErr{"custom error"}
+ require.Equal(t, err, combineErrors(nil, err, nil))
+
+ require.Nil(t, combineErrors(nil, nil, nil))
+}
diff --git a/workhorse/internal/lsif_transformer/parser/hovers.go b/workhorse/internal/lsif_transformer/parser/hovers.go
new file mode 100644
index 00000000000..e96d7e4fca3
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/hovers.go
@@ -0,0 +1,162 @@
+package parser
+
+import (
+ "encoding/json"
+ "io/ioutil"
+ "os"
+)
+
+type Offset struct {
+ At int32
+ Len int32
+}
+
+type Hovers struct {
+ File *os.File
+ Offsets *cache
+ CurrentOffset int
+}
+
+type RawResult struct {
+ Contents []json.RawMessage `json:"contents"`
+}
+
+type RawData struct {
+ Id Id `json:"id"`
+ Result RawResult `json:"result"`
+}
+
+type HoverRef struct {
+ ResultSetId Id `json:"outV"`
+ HoverId Id `json:"inV"`
+}
+
+type ResultSetRef struct {
+ ResultSetId Id `json:"outV"`
+ RefId Id `json:"inV"`
+}
+
+func NewHovers(config Config) (*Hovers, error) {
+ tempPath := config.TempPath
+
+ file, err := ioutil.TempFile(tempPath, "hovers")
+ if err != nil {
+ return nil, err
+ }
+
+ if err := os.Remove(file.Name()); err != nil {
+ return nil, err
+ }
+
+ offsets, err := newCache(tempPath, "hovers-indexes", Offset{})
+ if err != nil {
+ return nil, err
+ }
+
+ return &Hovers{
+ File: file,
+ Offsets: offsets,
+ CurrentOffset: 0,
+ }, nil
+}
+
+func (h *Hovers) Read(label string, line []byte) error {
+ switch label {
+ case "hoverResult":
+ if err := h.addData(line); err != nil {
+ return err
+ }
+ case "textDocument/hover":
+ if err := h.addHoverRef(line); err != nil {
+ return err
+ }
+ case "textDocument/references":
+ if err := h.addResultSetRef(line); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (h *Hovers) For(refId Id) json.RawMessage {
+ var offset Offset
+ if err := h.Offsets.Entry(refId, &offset); err != nil || offset.Len == 0 {
+ return nil
+ }
+
+ hover := make([]byte, offset.Len)
+ _, err := h.File.ReadAt(hover, int64(offset.At))
+ if err != nil {
+ return nil
+ }
+
+ return json.RawMessage(hover)
+}
+
+func (h *Hovers) Close() error {
+ return combineErrors(
+ h.File.Close(),
+ h.Offsets.Close(),
+ )
+}
+
+func (h *Hovers) addData(line []byte) error {
+ var rawData RawData
+ if err := json.Unmarshal(line, &rawData); err != nil {
+ return err
+ }
+
+ codeHovers := []*codeHover{}
+ for _, rawContent := range rawData.Result.Contents {
+ c, err := newCodeHover(rawContent)
+ if err != nil {
+ return err
+ }
+
+ codeHovers = append(codeHovers, c)
+ }
+
+ codeHoversData, err := json.Marshal(codeHovers)
+ if err != nil {
+ return err
+ }
+
+ n, err := h.File.Write(codeHoversData)
+ if err != nil {
+ return err
+ }
+
+ offset := Offset{At: int32(h.CurrentOffset), Len: int32(n)}
+ h.CurrentOffset += n
+
+ return h.Offsets.SetEntry(rawData.Id, &offset)
+}
+
+func (h *Hovers) addHoverRef(line []byte) error {
+ var hoverRef HoverRef
+ if err := json.Unmarshal(line, &hoverRef); err != nil {
+ return err
+ }
+
+ var offset Offset
+ if err := h.Offsets.Entry(hoverRef.HoverId, &offset); err != nil {
+ return err
+ }
+
+ return h.Offsets.SetEntry(hoverRef.ResultSetId, &offset)
+}
+
+func (h *Hovers) addResultSetRef(line []byte) error {
+ var ref ResultSetRef
+ if err := json.Unmarshal(line, &ref); err != nil {
+ return err
+ }
+
+ var offset Offset
+ if err := h.Offsets.Entry(ref.ResultSetId, &offset); err != nil {
+ return nil
+ }
+
+ return h.Offsets.SetEntry(ref.RefId, &offset)
+}
diff --git a/workhorse/internal/lsif_transformer/parser/hovers_test.go b/workhorse/internal/lsif_transformer/parser/hovers_test.go
new file mode 100644
index 00000000000..3037be103af
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/hovers_test.go
@@ -0,0 +1,30 @@
+package parser
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestHoversRead(t *testing.T) {
+ h := setupHovers(t)
+
+ var offset Offset
+ require.NoError(t, h.Offsets.Entry(2, &offset))
+ require.Equal(t, Offset{At: 0, Len: 19}, offset)
+
+ require.Equal(t, `[{"value":"hello"}]`, string(h.For(1)))
+
+ require.NoError(t, h.Close())
+}
+
+func setupHovers(t *testing.T) *Hovers {
+ h, err := NewHovers(Config{})
+ require.NoError(t, err)
+
+ require.NoError(t, h.Read("hoverResult", []byte(`{"id":"2","label":"hoverResult","result":{"contents": ["hello"]}}`)))
+ require.NoError(t, h.Read("textDocument/hover", []byte(`{"id":4,"label":"textDocument/hover","outV":"3","inV":2}`)))
+ require.NoError(t, h.Read("textDocument/references", []byte(`{"id":"3","label":"textDocument/references","outV":3,"inV":"1"}`)))
+
+ return h
+}
diff --git a/workhorse/internal/lsif_transformer/parser/id.go b/workhorse/internal/lsif_transformer/parser/id.go
new file mode 100644
index 00000000000..2adc4e092f5
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/id.go
@@ -0,0 +1,52 @@
+package parser
+
+import (
+ "encoding/json"
+ "errors"
+ "strconv"
+)
+
+const (
+ minId = 1
+ maxId = 20 * 1000 * 1000
+)
+
+type Id int32
+
+func (id *Id) UnmarshalJSON(b []byte) error {
+ if len(b) > 0 && b[0] != '"' {
+ if err := id.unmarshalInt(b); err != nil {
+ return err
+ }
+ } else {
+ if err := id.unmarshalString(b); err != nil {
+ return err
+ }
+ }
+
+ if *id < minId || *id > maxId {
+ return errors.New("json: id is invalid")
+ }
+
+ return nil
+}
+
+func (id *Id) unmarshalInt(b []byte) error {
+ return json.Unmarshal(b, (*int32)(id))
+}
+
+func (id *Id) unmarshalString(b []byte) error {
+ var s string
+ if err := json.Unmarshal(b, &s); err != nil {
+ return err
+ }
+
+ i, err := strconv.Atoi(s)
+ if err != nil {
+ return err
+ }
+
+ *id = Id(i)
+
+ return nil
+}
diff --git a/workhorse/internal/lsif_transformer/parser/id_test.go b/workhorse/internal/lsif_transformer/parser/id_test.go
new file mode 100644
index 00000000000..c1c53928378
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/id_test.go
@@ -0,0 +1,28 @@
+package parser
+
+import (
+ "encoding/json"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+type jsonWithId struct {
+ Value Id `json:"value"`
+}
+
+func TestId(t *testing.T) {
+ var v jsonWithId
+ require.NoError(t, json.Unmarshal([]byte(`{ "value": 1230 }`), &v))
+ require.Equal(t, Id(1230), v.Value)
+
+ require.NoError(t, json.Unmarshal([]byte(`{ "value": "1230" }`), &v))
+ require.Equal(t, Id(1230), v.Value)
+
+ require.Error(t, json.Unmarshal([]byte(`{ "value": "1.5" }`), &v))
+ require.Error(t, json.Unmarshal([]byte(`{ "value": 1.5 }`), &v))
+ require.Error(t, json.Unmarshal([]byte(`{ "value": "-1" }`), &v))
+ require.Error(t, json.Unmarshal([]byte(`{ "value": -1 }`), &v))
+ require.Error(t, json.Unmarshal([]byte(`{ "value": 21000000 }`), &v))
+ require.Error(t, json.Unmarshal([]byte(`{ "value": "21000000" }`), &v))
+}
diff --git a/workhorse/internal/lsif_transformer/parser/parser.go b/workhorse/internal/lsif_transformer/parser/parser.go
new file mode 100644
index 00000000000..085e7a856aa
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/parser.go
@@ -0,0 +1,109 @@
+package parser
+
+import (
+ "archive/zip"
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+
+ "gitlab.com/gitlab-org/labkit/log"
+)
+
+var (
+ Lsif = "lsif"
+)
+
+type Parser struct {
+ Docs *Docs
+
+ pr *io.PipeReader
+}
+
+type Config struct {
+ TempPath string
+}
+
+func NewParser(ctx context.Context, r io.Reader, config Config) (io.ReadCloser, error) {
+ docs, err := NewDocs(config)
+ if err != nil {
+ return nil, err
+ }
+
+ // ZIP files need to be seekable. Don't hold it all in RAM, use a tempfile
+ tempFile, err := ioutil.TempFile(config.TempPath, Lsif)
+ if err != nil {
+ return nil, err
+ }
+
+ defer tempFile.Close()
+
+ if err := os.Remove(tempFile.Name()); err != nil {
+ return nil, err
+ }
+
+ size, err := io.Copy(tempFile, r)
+ if err != nil {
+ return nil, err
+ }
+ log.WithContextFields(ctx, log.Fields{"lsif_zip_cache_bytes": size}).Print("cached incoming LSIF zip on disk")
+
+ zr, err := zip.NewReader(tempFile, size)
+ if err != nil {
+ return nil, err
+ }
+
+ if len(zr.File) == 0 {
+ return nil, errors.New("empty zip file")
+ }
+
+ file, err := zr.File[0].Open()
+ if err != nil {
+ return nil, err
+ }
+
+ defer file.Close()
+
+ if err := docs.Parse(file); err != nil {
+ return nil, err
+ }
+
+ pr, pw := io.Pipe()
+ parser := &Parser{
+ Docs: docs,
+ pr: pr,
+ }
+
+ go parser.transform(pw)
+
+ return parser, nil
+}
+
+func (p *Parser) Read(b []byte) (int, error) {
+ return p.pr.Read(b)
+}
+
+func (p *Parser) Close() error {
+ p.pr.Close()
+
+ return p.Docs.Close()
+}
+
+func (p *Parser) transform(pw *io.PipeWriter) {
+ zw := zip.NewWriter(pw)
+
+ if err := p.Docs.SerializeEntries(zw); err != nil {
+ zw.Close() // Free underlying resources only
+ pw.CloseWithError(fmt.Errorf("lsif parser: Docs.SerializeEntries: %v", err))
+ return
+ }
+
+ if err := zw.Close(); err != nil {
+ pw.CloseWithError(fmt.Errorf("lsif parser: ZipWriter.Close: %v", err))
+ return
+ }
+
+ pw.Close()
+}
diff --git a/workhorse/internal/lsif_transformer/parser/parser_test.go b/workhorse/internal/lsif_transformer/parser/parser_test.go
new file mode 100644
index 00000000000..3a4d72360e2
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/parser_test.go
@@ -0,0 +1,80 @@
+package parser
+
+import (
+ "archive/zip"
+ "bytes"
+ "context"
+ "encoding/json"
+ "io"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestGenerate(t *testing.T) {
+ filePath := "testdata/dump.lsif.zip"
+ tmpDir := filePath + ".tmp"
+ defer os.RemoveAll(tmpDir)
+
+ createFiles(t, filePath, tmpDir)
+
+ verifyCorrectnessOf(t, tmpDir, "lsif/main.go.json")
+ verifyCorrectnessOf(t, tmpDir, "lsif/morestrings/reverse.go.json")
+}
+
+func verifyCorrectnessOf(t *testing.T, tmpDir, fileName string) {
+ file, err := ioutil.ReadFile(filepath.Join(tmpDir, fileName))
+ require.NoError(t, err)
+
+ var buf bytes.Buffer
+ require.NoError(t, json.Indent(&buf, file, "", " "))
+
+ expected, err := ioutil.ReadFile(filepath.Join("testdata/expected/", fileName))
+ require.NoError(t, err)
+
+ require.Equal(t, string(expected), buf.String())
+}
+
+func createFiles(t *testing.T, filePath, tmpDir string) {
+ t.Helper()
+ file, err := os.Open(filePath)
+ require.NoError(t, err)
+
+ parser, err := NewParser(context.Background(), file, Config{})
+ require.NoError(t, err)
+
+ zipFileName := tmpDir + ".zip"
+ w, err := os.Create(zipFileName)
+ require.NoError(t, err)
+ defer os.RemoveAll(zipFileName)
+
+ _, err = io.Copy(w, parser)
+ require.NoError(t, err)
+ require.NoError(t, parser.Close())
+
+ extractZipFiles(t, tmpDir, zipFileName)
+}
+
+func extractZipFiles(t *testing.T, tmpDir, zipFileName string) {
+ zipReader, err := zip.OpenReader(zipFileName)
+ require.NoError(t, err)
+
+ for _, file := range zipReader.Reader.File {
+ zippedFile, err := file.Open()
+ require.NoError(t, err)
+ defer zippedFile.Close()
+
+ fileDir, fileName := filepath.Split(file.Name)
+ require.NoError(t, os.MkdirAll(filepath.Join(tmpDir, fileDir), os.ModePerm))
+
+ outputFile, err := os.Create(filepath.Join(tmpDir, fileDir, fileName))
+ require.NoError(t, err)
+ defer outputFile.Close()
+
+ _, err = io.Copy(outputFile, zippedFile)
+ require.NoError(t, err)
+ }
+}
diff --git a/workhorse/internal/lsif_transformer/parser/performance_test.go b/workhorse/internal/lsif_transformer/parser/performance_test.go
new file mode 100644
index 00000000000..5a12d90072f
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/performance_test.go
@@ -0,0 +1,47 @@
+package parser
+
+import (
+ "context"
+ "io"
+ "io/ioutil"
+ "os"
+ "runtime"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func BenchmarkGenerate(b *testing.B) {
+ filePath := "testdata/workhorse.lsif.zip"
+ tmpDir := filePath + ".tmp"
+ defer os.RemoveAll(tmpDir)
+
+ var memoryUsage float64
+ for i := 0; i < b.N; i++ {
+ memoryUsage += measureMemory(func() {
+ file, err := os.Open(filePath)
+ require.NoError(b, err)
+
+ parser, err := NewParser(context.Background(), file, Config{})
+ require.NoError(b, err)
+
+ _, err = io.Copy(ioutil.Discard, parser)
+ require.NoError(b, err)
+ require.NoError(b, parser.Close())
+ })
+ }
+
+ b.ReportMetric(memoryUsage/float64(b.N), "MiB/op")
+}
+
+func measureMemory(f func()) float64 {
+ var m, m1 runtime.MemStats
+ runtime.ReadMemStats(&m)
+
+ f()
+
+ runtime.ReadMemStats(&m1)
+ runtime.GC()
+
+ return float64(m1.Alloc-m.Alloc) / 1024 / 1024
+}
diff --git a/workhorse/internal/lsif_transformer/parser/ranges.go b/workhorse/internal/lsif_transformer/parser/ranges.go
new file mode 100644
index 00000000000..a11a66d70ca
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/ranges.go
@@ -0,0 +1,214 @@
+package parser
+
+import (
+ "encoding/json"
+ "errors"
+ "io"
+ "strconv"
+)
+
+const (
+ definitions = "definitions"
+ references = "references"
+)
+
+type Ranges struct {
+ DefRefs map[Id]Item
+ References *References
+ Hovers *Hovers
+ Cache *cache
+}
+
+type RawRange struct {
+ Id Id `json:"id"`
+ Data Range `json:"start"`
+}
+
+type Range struct {
+ Line int32 `json:"line"`
+ Character int32 `json:"character"`
+ RefId Id
+}
+
+type RawItem struct {
+ Property string `json:"property"`
+ RefId Id `json:"outV"`
+ RangeIds []Id `json:"inVs"`
+ DocId Id `json:"document"`
+}
+
+type Item struct {
+ Line int32
+ DocId Id
+}
+
+type SerializedRange struct {
+ StartLine int32 `json:"start_line"`
+ StartChar int32 `json:"start_char"`
+ DefinitionPath string `json:"definition_path,omitempty"`
+ Hover json.RawMessage `json:"hover"`
+ References []SerializedReference `json:"references,omitempty"`
+}
+
+func NewRanges(config Config) (*Ranges, error) {
+ hovers, err := NewHovers(config)
+ if err != nil {
+ return nil, err
+ }
+
+ references, err := NewReferences(config)
+ if err != nil {
+ return nil, err
+ }
+
+ cache, err := newCache(config.TempPath, "ranges", Range{})
+ if err != nil {
+ return nil, err
+ }
+
+ return &Ranges{
+ DefRefs: make(map[Id]Item),
+ References: references,
+ Hovers: hovers,
+ Cache: cache,
+ }, nil
+}
+
+func (r *Ranges) Read(label string, line []byte) error {
+ switch label {
+ case "range":
+ if err := r.addRange(line); err != nil {
+ return err
+ }
+ case "item":
+ if err := r.addItem(line); err != nil {
+ return err
+ }
+ default:
+ return r.Hovers.Read(label, line)
+ }
+
+ return nil
+}
+
+func (r *Ranges) Serialize(f io.Writer, rangeIds []Id, docs map[Id]string) error {
+ encoder := json.NewEncoder(f)
+ n := len(rangeIds)
+
+ if _, err := f.Write([]byte("[")); err != nil {
+ return err
+ }
+
+ for i, rangeId := range rangeIds {
+ entry, err := r.getRange(rangeId)
+ if err != nil {
+ continue
+ }
+
+ serializedRange := SerializedRange{
+ StartLine: entry.Line,
+ StartChar: entry.Character,
+ DefinitionPath: r.definitionPathFor(docs, entry.RefId),
+ Hover: r.Hovers.For(entry.RefId),
+ References: r.References.For(docs, entry.RefId),
+ }
+ if err := encoder.Encode(serializedRange); err != nil {
+ return err
+ }
+ if i+1 < n {
+ if _, err := f.Write([]byte(",")); err != nil {
+ return err
+ }
+ }
+ }
+
+ if _, err := f.Write([]byte("]")); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (r *Ranges) Close() error {
+ return combineErrors(
+ r.Cache.Close(),
+ r.References.Close(),
+ r.Hovers.Close(),
+ )
+}
+
+func (r *Ranges) definitionPathFor(docs map[Id]string, refId Id) string {
+ defRef, ok := r.DefRefs[refId]
+ if !ok {
+ return ""
+ }
+
+ defPath := docs[defRef.DocId] + "#L" + strconv.Itoa(int(defRef.Line))
+
+ return defPath
+}
+
+func (r *Ranges) addRange(line []byte) error {
+ var rg RawRange
+ if err := json.Unmarshal(line, &rg); err != nil {
+ return err
+ }
+
+ return r.Cache.SetEntry(rg.Id, &rg.Data)
+}
+
+func (r *Ranges) addItem(line []byte) error {
+ var rawItem RawItem
+ if err := json.Unmarshal(line, &rawItem); err != nil {
+ return err
+ }
+
+ if rawItem.Property != definitions && rawItem.Property != references {
+ return nil
+ }
+
+ if len(rawItem.RangeIds) == 0 {
+ return errors.New("no range IDs")
+ }
+
+ var references []Item
+
+ for _, rangeId := range rawItem.RangeIds {
+ rg, err := r.getRange(rangeId)
+ if err != nil {
+ return err
+ }
+
+ rg.RefId = rawItem.RefId
+
+ if err := r.Cache.SetEntry(rangeId, rg); err != nil {
+ return err
+ }
+
+ item := Item{
+ Line: rg.Line + 1,
+ DocId: rawItem.DocId,
+ }
+
+ if rawItem.Property == definitions {
+ r.DefRefs[rawItem.RefId] = item
+ } else {
+ references = append(references, item)
+ }
+ }
+
+ if err := r.References.Store(rawItem.RefId, references); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (r *Ranges) getRange(rangeId Id) (*Range, error) {
+ var rg Range
+ if err := r.Cache.Entry(rangeId, &rg); err != nil {
+ return nil, err
+ }
+
+ return &rg, nil
+}
diff --git a/workhorse/internal/lsif_transformer/parser/ranges_test.go b/workhorse/internal/lsif_transformer/parser/ranges_test.go
new file mode 100644
index 00000000000..c1400ba61da
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/ranges_test.go
@@ -0,0 +1,61 @@
+package parser
+
+import (
+ "bytes"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestRangesRead(t *testing.T) {
+ r, cleanup := setup(t)
+ defer cleanup()
+
+ firstRange := Range{Line: 1, Character: 2, RefId: 4}
+ rg, err := r.getRange(1)
+ require.NoError(t, err)
+ require.Equal(t, &firstRange, rg)
+
+ secondRange := Range{Line: 5, Character: 4, RefId: 4}
+ rg, err = r.getRange(2)
+ require.NoError(t, err)
+ require.Equal(t, &secondRange, rg)
+
+ thirdRange := Range{Line: 7, Character: 4, RefId: 4}
+ rg, err = r.getRange(3)
+ require.NoError(t, err)
+ require.Equal(t, &thirdRange, rg)
+}
+
+func TestSerialize(t *testing.T) {
+ r, cleanup := setup(t)
+ defer cleanup()
+
+ docs := map[Id]string{6: "def-path", 7: "ref-path"}
+
+ var buf bytes.Buffer
+ err := r.Serialize(&buf, []Id{1}, docs)
+ want := `[{"start_line":1,"start_char":2,"definition_path":"def-path#L2","hover":null,"references":[{"path":"ref-path#L6"},{"path":"ref-path#L8"}]}` + "\n]"
+
+ require.NoError(t, err)
+ require.Equal(t, want, buf.String())
+}
+
+func setup(t *testing.T) (*Ranges, func()) {
+ r, err := NewRanges(Config{})
+ require.NoError(t, err)
+
+ require.NoError(t, r.Read("range", []byte(`{"id":1,"label":"range","start":{"line":1,"character":2}}`)))
+ require.NoError(t, r.Read("range", []byte(`{"id":"2","label":"range","start":{"line":5,"character":4}}`)))
+ require.NoError(t, r.Read("range", []byte(`{"id":"3","label":"range","start":{"line":7,"character":4}}`)))
+
+ require.NoError(t, r.Read("item", []byte(`{"id":5,"label":"item","property":"definitions","outV":"4","inVs":[1],"document":"6"}`)))
+ require.NoError(t, r.Read("item", []byte(`{"id":"6","label":"item","property":"references","outV":4,"inVs":["2"],"document":"7"}`)))
+ require.NoError(t, r.Read("item", []byte(`{"id":"7","label":"item","property":"references","outV":4,"inVs":["3"],"document":"7"}`)))
+
+ cleanup := func() {
+ require.NoError(t, r.Close())
+ }
+
+ return r, cleanup
+}
diff --git a/workhorse/internal/lsif_transformer/parser/references.go b/workhorse/internal/lsif_transformer/parser/references.go
new file mode 100644
index 00000000000..58ff9a61c02
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/references.go
@@ -0,0 +1,107 @@
+package parser
+
+import (
+ "strconv"
+)
+
+type ReferencesOffset struct {
+ Id Id
+ Len int32
+}
+
+type References struct {
+ Items *cache
+ Offsets *cache
+ CurrentOffsetId Id
+}
+
+type SerializedReference struct {
+ Path string `json:"path"`
+}
+
+func NewReferences(config Config) (*References, error) {
+ tempPath := config.TempPath
+
+ items, err := newCache(tempPath, "references", Item{})
+ if err != nil {
+ return nil, err
+ }
+
+ offsets, err := newCache(tempPath, "references-offsets", ReferencesOffset{})
+ if err != nil {
+ return nil, err
+ }
+
+ return &References{
+ Items: items,
+ Offsets: offsets,
+ CurrentOffsetId: 0,
+ }, nil
+}
+
+// Store is responsible for keeping track of references that will be used when
+// serializing in `For`.
+//
+// The references are stored in a file to cache them. It is like
+// `map[Id][]Item` (where `Id` is `refId`) but relies on caching the array and
+// its offset in files for storage to reduce RAM usage. The items can be
+// fetched by calling `getItems`.
+func (r *References) Store(refId Id, references []Item) error {
+ size := len(references)
+
+ if size == 0 {
+ return nil
+ }
+
+ items := append(r.getItems(refId), references...)
+ err := r.Items.SetEntry(r.CurrentOffsetId, items)
+ if err != nil {
+ return err
+ }
+
+ size = len(items)
+ r.Offsets.SetEntry(refId, ReferencesOffset{Id: r.CurrentOffsetId, Len: int32(size)})
+ r.CurrentOffsetId += Id(size)
+
+ return nil
+}
+
+func (r *References) For(docs map[Id]string, refId Id) []SerializedReference {
+ references := r.getItems(refId)
+ if references == nil {
+ return nil
+ }
+
+ var serializedReferences []SerializedReference
+
+ for _, reference := range references {
+ serializedReference := SerializedReference{
+ Path: docs[reference.DocId] + "#L" + strconv.Itoa(int(reference.Line)),
+ }
+
+ serializedReferences = append(serializedReferences, serializedReference)
+ }
+
+ return serializedReferences
+}
+
+func (r *References) Close() error {
+ return combineErrors(
+ r.Items.Close(),
+ r.Offsets.Close(),
+ )
+}
+
+func (r *References) getItems(refId Id) []Item {
+ var offset ReferencesOffset
+ if err := r.Offsets.Entry(refId, &offset); err != nil || offset.Len == 0 {
+ return nil
+ }
+
+ items := make([]Item, offset.Len)
+ if err := r.Items.Entry(offset.Id, &items); err != nil {
+ return nil
+ }
+
+ return items
+}
diff --git a/workhorse/internal/lsif_transformer/parser/references_test.go b/workhorse/internal/lsif_transformer/parser/references_test.go
new file mode 100644
index 00000000000..7b47513bc53
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/references_test.go
@@ -0,0 +1,44 @@
+package parser
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestReferencesStore(t *testing.T) {
+ const (
+ docId = 1
+ refId = 3
+ )
+
+ r, err := NewReferences(Config{})
+ require.NoError(t, err)
+
+ err = r.Store(refId, []Item{{Line: 2, DocId: docId}, {Line: 3, DocId: docId}})
+ require.NoError(t, err)
+
+ docs := map[Id]string{docId: "doc.go"}
+ serializedReferences := r.For(docs, refId)
+
+ require.Contains(t, serializedReferences, SerializedReference{Path: "doc.go#L2"})
+ require.Contains(t, serializedReferences, SerializedReference{Path: "doc.go#L3"})
+
+ require.NoError(t, r.Close())
+}
+
+func TestReferencesStoreEmpty(t *testing.T) {
+ const refId = 3
+
+ r, err := NewReferences(Config{})
+ require.NoError(t, err)
+
+ err = r.Store(refId, []Item{})
+ require.NoError(t, err)
+
+ docs := map[Id]string{1: "doc.go"}
+ serializedReferences := r.For(docs, refId)
+
+ require.Nil(t, serializedReferences)
+ require.NoError(t, r.Close())
+}
diff --git a/workhorse/internal/lsif_transformer/parser/testdata/dump.lsif.zip b/workhorse/internal/lsif_transformer/parser/testdata/dump.lsif.zip
new file mode 100644
index 00000000000..e7c9ef2da66
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/testdata/dump.lsif.zip
Binary files differ
diff --git a/workhorse/internal/lsif_transformer/parser/testdata/expected/lsif/main.go.json b/workhorse/internal/lsif_transformer/parser/testdata/expected/lsif/main.go.json
new file mode 100644
index 00000000000..781cb78fc1a
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/testdata/expected/lsif/main.go.json
@@ -0,0 +1,208 @@
+[
+ {
+ "start_line": 7,
+ "start_char": 1,
+ "definition_path": "main.go#L4",
+ "hover": [
+ {
+ "tokens": [
+ [
+ {
+ "class": "kn",
+ "value": "package"
+ },
+ {
+ "value": " "
+ },
+ {
+ "class": "s",
+ "value": "\"github.com/user/hello/morestrings\""
+ }
+ ]
+ ],
+ "language": "go"
+ },
+ {
+ "value": "Package morestrings implements additional functions to manipulate UTF-8 encoded strings, beyond what is provided in the standard \"strings\" package. \n\n"
+ }
+ ],
+ "references": [
+ {
+ "path": "main.go#L8"
+ },
+ {
+ "path": "main.go#L9"
+ }
+ ]
+ },
+ {
+ "start_line": 7,
+ "start_char": 13,
+ "definition_path": "morestrings/reverse.go#L12",
+ "hover": [
+ {
+ "tokens": [
+ [
+ {
+ "class": "kd",
+ "value": "func"
+ },
+ {
+ "value": " Reverse(s "
+ },
+ {
+ "class": "kt",
+ "value": "string"
+ },
+ {
+ "value": ") "
+ },
+ {
+ "class": "kt",
+ "value": "string"
+ }
+ ]
+ ],
+ "language": "go"
+ },
+ {
+ "value": "This method reverses a string \n\n"
+ }
+ ],
+ "references": [
+ {
+ "path": "main.go#L8"
+ }
+ ]
+ },
+ {
+ "start_line": 8,
+ "start_char": 1,
+ "definition_path": "main.go#L4",
+ "hover": [
+ {
+ "tokens": [
+ [
+ {
+ "class": "kn",
+ "value": "package"
+ },
+ {
+ "value": " "
+ },
+ {
+ "class": "s",
+ "value": "\"github.com/user/hello/morestrings\""
+ }
+ ]
+ ],
+ "language": "go"
+ },
+ {
+ "value": "Package morestrings implements additional functions to manipulate UTF-8 encoded strings, beyond what is provided in the standard \"strings\" package. \n\n"
+ }
+ ],
+ "references": [
+ {
+ "path": "main.go#L8"
+ },
+ {
+ "path": "main.go#L9"
+ }
+ ]
+ },
+ {
+ "start_line": 8,
+ "start_char": 13,
+ "definition_path": "morestrings/reverse.go#L5",
+ "hover": [
+ {
+ "tokens": [
+ [
+ {
+ "class": "kd",
+ "value": "func"
+ },
+ {
+ "value": " Func2(i "
+ },
+ {
+ "class": "kt",
+ "value": "int"
+ },
+ {
+ "value": ") "
+ },
+ {
+ "class": "kt",
+ "value": "string"
+ }
+ ]
+ ],
+ "language": "go"
+ }
+ ],
+ "references": [
+ {
+ "path": "main.go#L9"
+ }
+ ]
+ },
+ {
+ "start_line": 6,
+ "start_char": 5,
+ "definition_path": "main.go#L7",
+ "hover": [
+ {
+ "tokens": [
+ [
+ {
+ "class": "kd",
+ "value": "func"
+ },
+ {
+ "value": " main()"
+ }
+ ]
+ ],
+ "language": "go"
+ }
+ ]
+ },
+ {
+ "start_line": 3,
+ "start_char": 2,
+ "definition_path": "main.go#L4",
+ "hover": [
+ {
+ "tokens": [
+ [
+ {
+ "class": "kn",
+ "value": "package"
+ },
+ {
+ "value": " "
+ },
+ {
+ "class": "s",
+ "value": "\"github.com/user/hello/morestrings\""
+ }
+ ]
+ ],
+ "language": "go"
+ },
+ {
+ "value": "Package morestrings implements additional functions to manipulate UTF-8 encoded strings, beyond what is provided in the standard \"strings\" package. \n\n"
+ }
+ ],
+ "references": [
+ {
+ "path": "main.go#L8"
+ },
+ {
+ "path": "main.go#L9"
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/workhorse/internal/lsif_transformer/parser/testdata/expected/lsif/morestrings/reverse.go.json b/workhorse/internal/lsif_transformer/parser/testdata/expected/lsif/morestrings/reverse.go.json
new file mode 100644
index 00000000000..1d238413d53
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/testdata/expected/lsif/morestrings/reverse.go.json
@@ -0,0 +1,249 @@
+[
+ {
+ "start_line": 11,
+ "start_char": 5,
+ "definition_path": "morestrings/reverse.go#L12",
+ "hover": [
+ {
+ "tokens": [
+ [
+ {
+ "class": "kd",
+ "value": "func"
+ },
+ {
+ "value": " Reverse(s "
+ },
+ {
+ "class": "kt",
+ "value": "string"
+ },
+ {
+ "value": ") "
+ },
+ {
+ "class": "kt",
+ "value": "string"
+ }
+ ]
+ ],
+ "language": "go"
+ },
+ {
+ "value": "This method reverses a string \n\n"
+ }
+ ],
+ "references": [
+ {
+ "path": "main.go#L8"
+ }
+ ]
+ },
+ {
+ "start_line": 4,
+ "start_char": 11,
+ "definition_path": "morestrings/reverse.go#L5",
+ "hover": [
+ {
+ "tokens": [
+ [
+ {
+ "class": "kd",
+ "value": "var"
+ },
+ {
+ "value": " i "
+ },
+ {
+ "class": "kt",
+ "value": "int"
+ }
+ ]
+ ],
+ "language": "go"
+ }
+ ]
+ },
+ {
+ "start_line": 11,
+ "start_char": 13,
+ "definition_path": "morestrings/reverse.go#L12",
+ "hover": [
+ {
+ "tokens": [
+ [
+ {
+ "class": "kd",
+ "value": "var"
+ },
+ {
+ "value": " s "
+ },
+ {
+ "class": "kt",
+ "value": "string"
+ }
+ ]
+ ],
+ "language": "go"
+ }
+ ]
+ },
+ {
+ "start_line": 12,
+ "start_char": 1,
+ "definition_path": "morestrings/reverse.go#L13",
+ "hover": [
+ {
+ "tokens": [
+ [
+ {
+ "class": "kd",
+ "value": "var"
+ },
+ {
+ "value": " a "
+ },
+ {
+ "class": "kt",
+ "value": "string"
+ }
+ ]
+ ],
+ "language": "go"
+ }
+ ],
+ "references": [
+ {
+ "path": "morestrings/reverse.go#L15"
+ }
+ ]
+ },
+ {
+ "start_line": 5,
+ "start_char": 1,
+ "definition_path": "morestrings/reverse.go#L6",
+ "hover": [
+ {
+ "tokens": [
+ [
+ {
+ "class": "kd",
+ "value": "var"
+ },
+ {
+ "value": " b "
+ },
+ {
+ "class": "kt",
+ "value": "string"
+ }
+ ]
+ ],
+ "language": "go"
+ }
+ ],
+ "references": [
+ {
+ "path": "morestrings/reverse.go#L8"
+ }
+ ]
+ },
+ {
+ "start_line": 14,
+ "start_char": 8,
+ "definition_path": "morestrings/reverse.go#L13",
+ "hover": [
+ {
+ "tokens": [
+ [
+ {
+ "class": "kd",
+ "value": "var"
+ },
+ {
+ "value": " a "
+ },
+ {
+ "class": "kt",
+ "value": "string"
+ }
+ ]
+ ],
+ "language": "go"
+ }
+ ],
+ "references": [
+ {
+ "path": "morestrings/reverse.go#L15"
+ }
+ ]
+ },
+ {
+ "start_line": 7,
+ "start_char": 8,
+ "definition_path": "morestrings/reverse.go#L6",
+ "hover": [
+ {
+ "tokens": [
+ [
+ {
+ "class": "kd",
+ "value": "var"
+ },
+ {
+ "value": " b "
+ },
+ {
+ "class": "kt",
+ "value": "string"
+ }
+ ]
+ ],
+ "language": "go"
+ }
+ ],
+ "references": [
+ {
+ "path": "morestrings/reverse.go#L8"
+ }
+ ]
+ },
+ {
+ "start_line": 4,
+ "start_char": 5,
+ "definition_path": "morestrings/reverse.go#L5",
+ "hover": [
+ {
+ "tokens": [
+ [
+ {
+ "class": "kd",
+ "value": "func"
+ },
+ {
+ "value": " Func2(i "
+ },
+ {
+ "class": "kt",
+ "value": "int"
+ },
+ {
+ "value": ") "
+ },
+ {
+ "class": "kt",
+ "value": "string"
+ }
+ ]
+ ],
+ "language": "go"
+ }
+ ],
+ "references": [
+ {
+ "path": "main.go#L9"
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/workhorse/internal/lsif_transformer/parser/testdata/workhorse.lsif.zip b/workhorse/internal/lsif_transformer/parser/testdata/workhorse.lsif.zip
new file mode 100644
index 00000000000..76491ed8a93
--- /dev/null
+++ b/workhorse/internal/lsif_transformer/parser/testdata/workhorse.lsif.zip
Binary files differ