Skip to content

Commit

Permalink
gopls/internal/util/asm: better assembly parsing
Browse files Browse the repository at this point in the history
This CL adds a rudimentary parser for symbols in Go .s files.
It is a placeholder for a more principled implementation,
but it is sufficient to make Definition support control labels
(also in this CL) and for a cross-references index (future work).

+ test of Definition on control label
+ test of asm.Parse

Updates golang/go#71754

Change-Id: I2ff19b4ade130c051197d6b097a1a3dbcd95555a
Reviewed-on: https://go-review.googlesource.com/c/tools/+/654335
LUCI-TryBot-Result: Go LUCI <[email protected]>
Reviewed-by: Jonathan Amsterdam <[email protected]>
Auto-Submit: Alan Donovan <[email protected]>
  • Loading branch information
adonovan authored and gopherbot committed Mar 4, 2025
1 parent 455db21 commit d81d6fc
Show file tree
Hide file tree
Showing 5 changed files with 353 additions and 24 deletions.
59 changes: 35 additions & 24 deletions gopls/internal/goasm/definition.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package goasm provides language-server features for files in Go
// assembly language (https://go.dev/doc/asm).
package goasm

import (
"bytes"
"context"
"fmt"
"go/token"
"strings"
"unicode"

"golang.org/x/tools/gopls/internal/cache"
"golang.org/x/tools/gopls/internal/cache/metadata"
"golang.org/x/tools/gopls/internal/file"
"golang.org/x/tools/gopls/internal/protocol"
"golang.org/x/tools/gopls/internal/util/asm"
"golang.org/x/tools/gopls/internal/util/morestrings"
"golang.org/x/tools/internal/event"
)
Expand All @@ -41,21 +41,27 @@ func Definition(ctx context.Context, snapshot *cache.Snapshot, fh file.Handle, p
return nil, err
}

// Parse the assembly.
//
// TODO(adonovan): make this just another
// attribute of the type-checked cache.Package.
file := asm.Parse(content)

// Figure out the selected symbol.
// For now, just find the identifier around the cursor.
//
// TODO(adonovan): use a real asm parser; see cmd/asm/internal/asm/parse.go.
// Ideally this would just be just another attribute of the
// type-checked cache.Package.
nonIdentRune := func(r rune) bool { return !isIdentRune(r) }
i := bytes.LastIndexFunc(content[:offset], nonIdentRune)
j := bytes.IndexFunc(content[offset:], nonIdentRune)
if j < 0 || j == 0 {
return nil, nil // identifier runs to EOF, or not an identifier
var found *asm.Ident
for _, id := range file.Idents {
if id.Offset <= offset && offset <= id.End() {
found = &id
break
}
}
sym := string(content[i+1 : offset+j])
sym = strings.ReplaceAll(sym, "·", ".") // (U+00B7 MIDDLE DOT)
sym = strings.ReplaceAll(sym, "∕", "/") // (U+2215 DIVISION SLASH)
if found == nil {
return nil, fmt.Errorf("not an identifier")
}

// Resolve a symbol with a "." prefix to the current package.
sym := found.Name
if sym != "" && sym[0] == '.' {
sym = string(mp.PkgPath) + sym
}
Expand Down Expand Up @@ -92,18 +98,23 @@ func Definition(ctx context.Context, snapshot *cache.Snapshot, fh file.Handle, p
if err == nil {
return []protocol.Location{loc}, nil
}
}

// TODO(adonovan): support jump to var, block label, and other
// TEXT, DATA, and GLOBAL symbols in the same file. Needs asm parser.
} else {
// local symbols (funcs, vars, labels)
for _, id := range file.Idents {
if id.Name == found.Name &&
(id.Kind == asm.Text || id.Kind == asm.Global || id.Kind == asm.Label) {

return nil, nil
}
loc, err := mapper.OffsetLocation(id.Offset, id.End())
if err != nil {
return nil, err
}
return []protocol.Location{loc}, nil
}
}
}

// The assembler allows center dot (· U+00B7) and
// division slash (∕ U+2215) to work as identifier characters.
func isIdentRune(r rune) bool {
return unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' || r == '·' || r == '∕'
return nil, nil
}

// TODO(rfindley): avoid the duplicate column mapping here, by associating a
Expand Down
3 changes: 3 additions & 0 deletions gopls/internal/golang/assembly.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ package golang
// - ./codeaction.go - computes the symbol and offers the CodeAction command.
// - ../server/command.go - handles the command by opening a web page.
// - ../server/server.go - handles the HTTP request and calls this function.
//
// For language-server behavior in Go assembly language files,
// see [golang.org/x/tools/gopls/internal/goasm].

import (
"bytes"
Expand Down
3 changes: 3 additions & 0 deletions gopls/internal/test/marker/testdata/definition/asm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ var _ = ff // pacify unusedfunc analyzer
TEXT ·ff(SB), $16 //@ loc(ffasm, "ff"), def("ff", ffgo)
CALL example·com∕b·B //@ def("com", bB)
JMP ·ff //@ def("ff", ffgo)
JMP label //@ def("label", label)
label: //@ loc(label,"label")
RET

-- b/b.go --
package b
Expand Down
245 changes: 245 additions & 0 deletions gopls/internal/util/asm/parse.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package asm provides a simple parser for Go assembly files.
package asm

import (
"bufio"
"bytes"
"fmt"
"strings"
"unicode"
)

// Kind describes the nature of an identifier in an assembly file.
type Kind uint8

const (
Invalid Kind = iota // reserved zero value; not used by Ident
Ref // arbitrary reference to symbol or control label
Text // definition of TEXT (function) symbol
Global // definition of GLOBL (var) symbol
Data // initialization of GLOBL (var) symbol; effectively a reference
Label // definition of control label
)

func (k Kind) String() string {
if int(k) < len(kindString) {
return kindString[k]
}
return fmt.Sprintf("Kind(%d)", k)
}

var kindString = [...]string{
Invalid: "invalid",
Ref: "ref",
Text: "text",
Global: "global",
Data: "data",
Label: "label",
}

// A file represents a parsed file of Go assembly language.
type File struct {
Idents []Ident

// TODO(adonovan): use token.File? This may be important in a
// future in which analyzers can report diagnostics in .s files.
}

// Ident represents an identifier in an assembly file.
type Ident struct {
Name string // symbol name (after correcting [·∕]); Name[0]='.' => current package
Offset int // zero-based byte offset
Kind Kind
}

// End returns the identifier's end offset.
func (id Ident) End() int { return id.Offset + len(id.Name) }

// Parse extracts identifiers from Go assembly files.
// Since it is a best-effort parser, it never returns an error.
func Parse(content []byte) *File {
var idents []Ident
offset := 0 // byte offset of start of current line

// TODO(adonovan) use a proper tokenizer that respects
// comments, string literals, line continuations, etc.
scan := bufio.NewScanner(bytes.NewReader(content))
for ; scan.Scan(); offset += len(scan.Bytes()) + len("\n") {
line := scan.Text()

// Strip comments.
if idx := strings.Index(line, "//"); idx >= 0 {
line = line[:idx]
}

// Skip blank lines.
if strings.TrimSpace(line) == "" {
continue
}

// Check for label definitions (ending with colon).
if colon := strings.IndexByte(line, ':'); colon > 0 {
label := strings.TrimSpace(line[:colon])
if isIdent(label) {
idents = append(idents, Ident{
Name: label,
Offset: offset + strings.Index(line, label),
Kind: Label,
})
continue
}
}

// Split line into words.
words := strings.Fields(line)
if len(words) == 0 {
continue
}

// A line of the form
// TEXT ·sym<ABIInternal>(SB),NOSPLIT,$12
// declares a text symbol "·sym".
if len(words) > 1 {
kind := Invalid
switch words[0] {
case "TEXT":
kind = Text
case "GLOBL":
kind = Global
case "DATA":
kind = Data
}
if kind != Invalid {
sym := words[1]
sym = cutBefore(sym, ",") // strip ",NOSPLIT,$12" etc
sym = cutBefore(sym, "(") // "sym(SB)" -> "sym"
sym = cutBefore(sym, "<") // "sym<ABIInternal>" -> "sym"
sym = strings.TrimSpace(sym)
if isIdent(sym) {
// (The Index call assumes sym is not itself "TEXT" etc.)
idents = append(idents, Ident{
Name: cleanup(sym),
Kind: kind,
Offset: offset + strings.Index(line, sym),
})
}
continue
}
}

// Find references in the rest of the line.
pos := 0
for _, word := range words {
// Find actual position of word within line.
tokenPos := strings.Index(line[pos:], word)
if tokenPos < 0 {
panic(line)
}
tokenPos += pos
pos = tokenPos + len(word)

// Reject probable instruction mnemonics (e.g. MOV).
if len(word) >= 2 && word[0] != '·' &&
!strings.ContainsFunc(word, unicode.IsLower) {
continue
}

if word[0] == '$' {
word = word[1:]
tokenPos++

// Reject probable immediate values (e.g. "$123").
if !strings.ContainsFunc(word, isNonDigit) {
continue
}
}

// Reject probably registers (e.g. "PC").
if len(word) <= 3 && !strings.ContainsFunc(word, unicode.IsLower) {
continue
}

// Probable identifier reference.
//
// TODO(adonovan): handle FP symbols correctly;
// sym+8(FP) is essentially a comment about
// stack slot 8, not a reference to a symbol
// with a declaration somewhere; so they form
// an equivalence class without a canonical
// declaration.
//
// TODO(adonovan): handle pseudoregisters and field
// references such as:
// MOVD $runtime·g0(SB), g // pseudoreg
// MOVD R0, g_stackguard0(g) // field ref

sym := cutBefore(word, "(") // "·sym(SB)" => "sym"
sym = cutBefore(sym, "+") // "sym+8(FP)" => "sym"
sym = cutBefore(sym, "<") // "sym<ABIInternal>" =>> "sym"
if isIdent(sym) {
idents = append(idents, Ident{
Name: cleanup(sym),
Kind: Ref,
Offset: offset + tokenPos,
})
}
}
}

_ = scan.Err() // ignore scan errors

return &File{Idents: idents}
}

// isIdent reports whether s is a valid Go assembly identifier.
func isIdent(s string) bool {
for i, r := range s {
if !isIdentRune(r, i) {
return false
}
}
return len(s) > 0
}

// cutBefore returns the portion of s before the first occurrence of sep, if any.
func cutBefore(s, sep string) string {
if before, _, ok := strings.Cut(s, sep); ok {
return before
}
return s
}

// cleanup converts a symbol name from assembler syntax to linker syntax.
func cleanup(sym string) string {
return repl.Replace(sym)
}

var repl = strings.NewReplacer(
"·", ".", // (U+00B7 MIDDLE DOT)
"∕", "/", // (U+2215 DIVISION SLASH)
)

func isNonDigit(r rune) bool { return !unicode.IsDigit(r) }

// -- plundered from GOROOT/src/cmd/asm/internal/asm/parse.go --

// We want center dot (·) and division slash (∕) to work as identifier characters.
func isIdentRune(ch rune, i int) bool {
if unicode.IsLetter(ch) {
return true
}
switch ch {
case '_': // Underscore; traditional.
return true
case '\u00B7': // Represents the period in runtime.exit. U+00B7 '·' middle dot
return true
case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '∕' division slash
return true
}
// Digits are OK only after the first character.
return i > 0 && unicode.IsDigit(ch)
}
Loading

0 comments on commit d81d6fc

Please sign in to comment.