Skip to content

Commit

Permalink
Implement smarter trailing comments detection (bazelbuild#333)
Browse files Browse the repository at this point in the history
  • Loading branch information
vladmos authored Jun 18, 2018
1 parent b6bc3d7 commit 5a4c4ca
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 85 deletions.
73 changes: 52 additions & 21 deletions build/parse.y
Original file line number Diff line number Diff line change
Expand Up @@ -1062,36 +1062,67 @@ func forceMultiLineComprehension(start Position, expr Expr, clauses []Expr, end
return previousEnd.Line != end.Line
}

// extractTrailingComments extracts trailing comments from a block statement
// and returns the comments. The comments can be either CommentBlock statements
// or After-comments for a statement of a different type.
// extractTrailingComments extracts trailing comments of an indented block starting with the first
// comment line with indentation less than the block indentation.
// The comments can either belong to CommentBlock statements or to the last non-comment statement
// as After-comments.
func extractTrailingComments(stmt Expr) []Expr {
body := getLastBody(stmt)
var comments []Expr
if body != nil && len(*body) > 0 {
// Detach and return all trailing comment blocks
for i := len(*body)-1; i >= 0; i-- {
cb, ok := (*body)[i].(*CommentBlock)
if !ok {
break
// Get the current indentation level
start, _ := (*body)[0].Span()
indentation := start.LineRune

// Find the last non-comment statement
lastNonCommentIndex := -1
for i, stmt := range *body {
if _, ok := stmt.(*CommentBlock); !ok {
lastNonCommentIndex = i
}
comments = append(comments, cb)
*body = (*body)[:i]
}
if lastNonCommentIndex == -1 {
return comments
}

// Detach after comments from the last statement
lastStmt := (*body)[len(*body)-1]
cb := &CommentBlock{Comments: Comments{After: lastStmt.Comment().After}}
if len(cb.After) > 0 {
lastStmt.Comment().After = []Comment{}
comments = append(comments, cb)
// Iterate over the trailing comments, find the first comment line that's not indented enough,
// dedent it and all the following comments.
for i := lastNonCommentIndex; i < len(*body); i++ {
stmt := (*body)[i]
if comment := extractDedentedComment(stmt, indentation); comment != nil {
// This comment and all the following CommentBlock statements are to be extracted.
comments = append(comments, comment)
comments = append(comments, (*body)[i+1:]...)
*body = (*body)[:i+1]
// If the current statement is a CommentBlock statement without any comment lines
// it should be removed too.
if i > lastNonCommentIndex && len(stmt.Comment().After) == 0 {
*body = (*body)[:i]
}
}
}
}
return comments
}

// extractDedentedComment extract the first comment line from `stmt` which indentation is smaller
// than `indentation`, and all following comment lines, and returns them in a newly created
// CommentBlock statement.
func extractDedentedComment(stmt Expr, indentation int) Expr {
for i, line := range stmt.Comment().After {
// line.Start.LineRune == 0 can't exist in parsed files, it indicates that the comment line
// has been added by an AST modification. Don't take such lines into account.
if line.Start.LineRune > 0 && line.Start.LineRune < indentation {
// This and all the following lines should be dedented
cb := &CommentBlock{
Start: line.Start,
Comments: Comments{After: stmt.Comment().After[i:]},
}
stmt.Comment().After = stmt.Comment().After[:i]
return cb
}
}
// The comments are collected in the reversed order, reverse them again
for i, j := 0, len(comments)-1; i < j; i, j = i+1, j-1 {
comments[i], comments[j] = comments[j], comments[i]
}
return comments
return nil
}

// getLastBody returns the last body of a block statement (the only body for For- and DefStmt
Expand Down
73 changes: 52 additions & 21 deletions build/parse.y.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,38 +268,69 @@ func forceMultiLineComprehension(start Position, expr Expr, clauses []Expr, end
return previousEnd.Line != end.Line
}

// extractTrailingComments extracts trailing comments from a block statement
// and returns the comments. The comments can be either CommentBlock statements
// or After-comments for a statement of a different type.
// extractTrailingComments extracts trailing comments of an indented block starting with the first
// comment line with indentation less than the block indentation.
// The comments can either belong to CommentBlock statements or to the last non-comment statement
// as After-comments.
func extractTrailingComments(stmt Expr) []Expr {
body := getLastBody(stmt)
var comments []Expr
if body != nil && len(*body) > 0 {
// Detach and return all trailing comment blocks
for i := len(*body) - 1; i >= 0; i-- {
cb, ok := (*body)[i].(*CommentBlock)
if !ok {
break
// Get the current indentation level
start, _ := (*body)[0].Span()
indentation := start.LineRune

// Find the last non-comment statement
lastNonCommentIndex := -1
for i, stmt := range *body {
if _, ok := stmt.(*CommentBlock); !ok {
lastNonCommentIndex = i
}
comments = append(comments, cb)
*body = (*body)[:i]
}

// Detach after comments from the last statement
lastStmt := (*body)[len(*body)-1]
cb := &CommentBlock{Comments: Comments{After: lastStmt.Comment().After}}
if len(cb.After) > 0 {
lastStmt.Comment().After = []Comment{}
comments = append(comments, cb)
if lastNonCommentIndex == -1 {
return comments
}

// Iterate over the trailing comments, find the first comment line that's not indented enough,
// dedent it and all the following comments.
for i := lastNonCommentIndex; i < len(*body); i++ {
stmt := (*body)[i]
if comment := extractDedentedComment(stmt, indentation); comment != nil {
// This comment and all the following CommentBlock statements are to be extracted.
comments = append(comments, comment)
comments = append(comments, (*body)[i+1:]...)
*body = (*body)[:i+1]
// If the current statement is a CommentBlock statement without any comment lines
// it should be removed too.
if i > lastNonCommentIndex && len(stmt.Comment().After) == 0 {
*body = (*body)[:i]
}
}
}
}
// The comments are collected in the reversed order, reverse them again
for i, j := 0, len(comments)-1; i < j; i, j = i+1, j-1 {
comments[i], comments[j] = comments[j], comments[i]
}
return comments
}

// extractDedentedComment extract the first comment line from `stmt` which indentation is smaller
// than `indentation`, and all following comment lines, and returns them in a newly created
// CommentBlock statement.
func extractDedentedComment(stmt Expr, indentation int) Expr {
for i, line := range stmt.Comment().After {
// line.Start.LineRune == 0 can't exist in parsed files, it indicates that the comment line
// has been added by an AST modification. Don't take such lines into account.
if line.Start.LineRune > 0 && line.Start.LineRune < indentation {
// This and all the following lines should be dedented
cb := &CommentBlock{
Start: line.Start,
Comments: Comments{After: stmt.Comment().After[i:]},
}
stmt.Comment().After = stmt.Comment().After[:i]
return cb
}
}
return nil
}

// getLastBody returns the last body of a block statement (the only body for For- and DefStmt
// objects, the last in a if-elif-else chain
func getLastBody(stmt Expr) *[]Expr {
Expand Down
2 changes: 1 addition & 1 deletion build/testdata/051.in
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ if foo:
bar
if foo:
bar
#comment
#comment
bar

bar
Expand Down
73 changes: 52 additions & 21 deletions build/testdata/058.golden
Original file line number Diff line number Diff line change
Expand Up @@ -2,63 +2,94 @@ def f():
if foo:
return bar

# 1 these comments
# these comments
# belong to the next statement
def f():
if foo:
return bar

# 2 these comments
# these comments
# belong to the top level

def f():
if foo:
return bar

# 3 these comments
# belong to the next statement
# these comments
# belong to the top level

def f():
if foo:
return bar
# this comment belongs to the return statement

# 4 these comments
# these comments are aligned
# with the if-statement as after-comments

# belong to the top level
# this should have the same indentation level as above

# but this line belongs to the top level

# and this belongs to the following for-loop
for a in b:
if foo:
pass
# this comment is aligned with pass

# this comment is aligned with pass

# this comment is aligned with pass
pass
elif bar:
pass

# 5 these comments
# belong to the top level
# this comment is aligned with pass
# this comment is aligned with pass

# this comment is aligned with elif
# this comment is aligned with elif

# this comment is aligned with for
# this comment is aligned with for

for a in b:
if foo:
pass
elif bar:
pass
else:
pass

# 6 these comments
# belong to the top level

def foo():
return # This comment stays here
# all
# of
# these
# comments
# are
# not
# indented

# 7 these comments
for a in b:
if foo:
pass
elif bar:
pass
else:
pass # This comment stays here

# belong to the
# all
# of
# these
# comments

# top level
# are
# aligned
# with
# else

def foo():
return # This comment stays here
# this comment belongs to the return statement

# 8 these comments
# this comment belongs to the function

# belong to the
# but these belong

# top level
# to the top level
Loading

0 comments on commit 5a4c4ca

Please sign in to comment.