From 11807f130e745e414d8910423a3257832347d237 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Thu, 21 Jun 2012 20:43:10 +0200 Subject: [PATCH 1/5] =?UTF-8?q?add=20Bash=20scanner=20from=20Petr=20Kov?= =?UTF-8?q?=C3=A1=C5=99=20and=20Steven=20Penny=20(for=20testing)=20[GH-19]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/coderay/scanners/bash.rb | 291 +++++++++++++++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 lib/coderay/scanners/bash.rb diff --git a/lib/coderay/scanners/bash.rb b/lib/coderay/scanners/bash.rb new file mode 100644 index 00000000..e1494922 --- /dev/null +++ b/lib/coderay/scanners/bash.rb @@ -0,0 +1,291 @@ +# Scanner for Bash +# Author: Petr Kovar + +module CodeRay module Scanners + + class Bash < Scanner + + register_for :bash + file_extension 'sh' + title 'bash script' + + RESERVED_WORDS = %w( + ! [[ ]] case do done elif else esac fi for function if in select then time until while { } + ) + + COMMANDS = %w( + : . break cd continue eval exec exit export getopts hash pwd + readonly return shift test [ ] times trap umask unset + ) + + BASH_COMMANDS = %w( + alias bind builtin caller command declare echo enable help let + local logout printf read set shopt source type typeset ulimit unalias + ) + + PROGRAMS = %w( + awk bash bunzip2 bzcat bzip2 cat chgrp chmod chown cp cut date dd df dir dmesg du ed egrep + false fgrep findmnt fusermount gawk grep groups gunzip gzip hostname install keyctl kill less + ln loadkeys login ls lsblk lsinitcpio lsmod mbchk mkdir mkfifo mknod more mount mountpoint mv + netstat pidof ping ping6 ps pwd readlink red rm rmdir sed sh shred sleep stty su sudo sync tar + touch tput tr traceroute traceroute6 true umount uname uncompress vdir zcat + ) + + VARIABLES = %w( + CDPATH HOME IFS MAIL MAILPATH OPTARG OPTIND PATH PS1 PS2 + ) + + BASH_VARIABLES = %w( + BASH BASH_ARGC BASH_ARGV BASH_COMMAND BASH_ENV BASH_EXECUTION_STRING + BASH_LINENO BASH_REMATCH BASH_SOURCE BASH_SUBSHELL BASH_VERSINFO + BASH_VERSINFO[0] BASH_VERSINFO[1] BASH_VERSINFO[2] BASH_VERSINFO[3] + BASH_VERSINFO[4] BASH_VERSINFO[5] BASH_VERSION COLUMNS COMP_CWORD + COMP_LINE COMP_POINT COMP_WORDBREAKS COMP_WORDS COMPREPLAY DIRSTACK + EMACS EUID FCEDIT FIGNORE FUNCNAME GLOBIGNORE GROUPS histchars HISTCMD + HISTCONTROL HISTFILE HISTFILESIZE HISTIGNORE HISTSIZE HISTTIMEFORMAT + HOSTFILE HOSTNAME HOSTTYPE IGNOREEOF INPUTRC LANG LC_ALL LC_COLLATE + LC_CTYPE LC_MESSAGE LC_NUMERIC LINENNO LINES MACHTYPE MAILCHECK OLDPWD + OPTERR OSTYPE PIPESTATUS POSIXLY_CORRECT PPID PROMPT_COMMAND PS3 PS4 PWD + RANDOM REPLAY SECONDS SHELL SHELLOPTS SHLVL TIMEFORMAT TMOUT TMPDIR UID + ) + + PRE_CONSTANTS = / \$\{? (?: \# | \? | \d | \* | @ | - | \$ | \! | _ ) \}? /ox + + IDENT_KIND = WordList.new(:ident). + add(RESERVED_WORDS, :reserved). + add(COMMANDS, :method). + add(BASH_COMMANDS, :method). +# add(PROGRAMS, :method). + add(VARIABLES, :predefined). + add(BASH_VARIABLES, :predefined) + + attr_reader :state, :quote + + def initialize(*args) + super(*args) + @state = :initial + @quote = nil + @shell = false + @brace_shell = 0 + @quote_brace_shell = 0 + end + + def scan_tokens encoder, options + + until eos? + kind = match = nil + + if match = scan(/\n/) + encoder.text_token(match, :plain) + next + end + + if @state == :initial + if match = scan(/\A#!.*/) + kind = :directive + elsif match = scan(/\s*#.*/) + kind = :comment + elsif match = scan(/.#/) + kind = :ident + elsif match = scan(/(?:\. |source ).*/) + kind = :reserved + elsif match = scan(/(?:\\.|,)/) + kind = :plain + elsif match = scan(/;/) + kind = :delimiter + elsif match = scan(/"/) + @state = :quote + @quote = match + encoder.begin_group :string + encoder.text_token(match, :delimiter) + next + elsif match = scan(/<<\S+/) + @state = :quote + match =~ /<<(\S+)/ + @quote = "#{$1}" + encoder.begin_group :string + encoder.text_token(match, :delimiter) + next + elsif match = scan(/`/) + if @shell + encoder.text_token(match, :delimiter) + encoder.end_group :shell + else + encoder.begin_group :shell + encoder.text_token(match, :delimiter) + end + @shell = (not @shell) + next + elsif match = scan(/'[^']*'?/) + kind = :string + elsif match = scan(/(?: \& | > | < | \| >> | << | >\& )/ox) + kind = :bin + elsif match = scan(/\d+[\.-](?:\d+[\.-]?)+/) + #versions, dates, and hyphen delimited numbers + kind = :float + elsif match = scan(/\d+\.\d+\s+/) + kind = :float + elsif match = scan(/\d+/) + kind = :integer + elsif match = scan(/ (?: \$\(\( | \)\) ) /x) + kind = :global_variable + elsif match = scan(/ \$\{ [^\}]+ \} /ox) + match =~ /\$\{(.*)\}/ + var=$1 + if var =~ /\[.*\]/ + encoder.text_token("${", :instance_variable) + match_array(var, encoder) + encoder.text_token("}", :instance_variable) + next + end + kind = IDENT_KIND[var] + kind = :instance_variable if kind == :ident + #elsif match = scan(/ \$\( [^\)]+ \) /ox) + elsif match = scan(/ \$\( /ox) + @brace_shell += 1 + encoder.begin_group :shell + encoder.text_token(match, :delimiter) + next + elsif match = scan(/ \) /ox) + if @brace_shell > 0 + encoder.text_token(match, :delimiter) + encoder.end_group :shell + @brace_shell -= 1 + next + end + elsif match = scan(PRE_CONSTANTS) + kind = :predefined_constant + elsif match = scan(/[^\s'"]*[A-Za-z_][A-Za-z_0-9]*\+?=/) + match =~ /(.*?)([A-Za-z_][A-Za-z_0-9]*)(\+?=)/ + str = $1 + pre = $2 + op = $3 + kind = :plain + if str.to_s.strip.empty? + kind = IDENT_KIND[pre] + kind = :instance_variable if kind == :ident + encoder.text_token(pre, kind) + encoder.text_token(op, :operator) + next + end + elsif match = scan(/[A-Za-z_]+\[[A-Za-z_\@\*\d]+\]/) + # array + match_array(match, encoder) + next + elsif match = scan(/ \$[A-Za-z_][A-Za-z_0-9]* /ox) + match =~ /\$(.*)/ + kind = IDENT_KIND[$1] + kind = :instance_variable if kind == :ident + elsif match = scan(/read \S+/) + match =~ /read(\s+)(\S+)/ + encoder.text_token('read', :method) + encoder.text_token($1, :space) + encoder.text_token($2, :instance_variable) + next + elsif match = scan(/[\!\:\[\]\{\}]/) + kind = :reserved + elsif match = scan(/ [A-Za-z_][A-Za-z_\d]*;? /x) + match =~ /([^;]+);?/ + kind = IDENT_KIND[$1] + if match[/([^;]+);$/] + encoder.text_token($1, kind) + encoder.text_token(';', :delimiter) + next + end + elsif match = scan(/(?: = | - | \+ | \{ | \} | \( | \) | && | \|\| | ;; | ! )/ox) + kind = :operator + elsif match = scan(/\s+/) + kind = :space + elsif match = scan(/[^ \$"'`\d]/) + kind = :plain + elsif match = scan(/.+/) + # this shouldn't be :reserved for highlighting bad matches + match, kind = handle_error(match, options) + end + elsif @state == :quote + if (match = scan(/\\.?/)) + kind = :content + elsif match = scan(/#{@quote}/) + encoder.text_token(match, :delimiter) + encoder.end_group :string + @quote = nil + @state = :initial + next + #kind = :symbol + elsif match = scan(PRE_CONSTANTS) + kind = :predefined_constant + elsif match = scan(/ (?: \$\(\(.*?\)\) ) /x) + kind = :global_variable + elsif match = scan(/ \$\( /ox) + encoder.begin_group :shell + encoder.text_token(match, :delimiter) + @quote_brace_shell += 1 + next + elsif match = scan(/\)/) + if @quote_brace_shell > 0 + encoder.text_token(match, :delimiter) + encoder.end_group :shell + @quote_brace_shell -= 1 + next + else + kind = :content + end + elsif match = scan(/ \$ (?: (?: \{ [^\}]* \}) | (?: [A-Za-z_0-9]+ ) ) /x) + match =~ /(\$\{?)([^\}]*)(\}?)/ + pre=$1 + var=$2 + post=$3 + if var =~ /\[.*?\]/ + encoder.text_token(pre,:instance_variable) + match_array(var, encoder) + encoder.text_token(post,:instance_variable) + next + end + kind = IDENT_KIND[match] + kind = :instance_variable if kind == :ident + elsif match = scan(/[^\)\$#{@quote}\\]+/) + kind = :content + else match = scan(/.+/) + # this shouldn't be + #kind = :reserved + #raise match + match, kind = handle_error(match, options) + end + end + + match ||= matched + encoder.text_token(match, kind) + end + + if @state == :quote + encoder.end_group :string + end + + encoder + end + + + def match_array(match, encoder) + match =~ /([A-Za-z_]+)\[(.*?)\]/ + var = $1 + key = $2 + kind = IDENT_KIND[var] + kind = :instance_variable if kind == :ident + encoder.text_token(var, kind) + encoder.text_token("[", :operator) + encoder.text_token(key, :key) + encoder.text_token("]", :operator) + end + + def handle_error(match, options) + o = {:ignore_errors => true}.merge(options) + if o[:ignore_errors] + [match, :plain] + else + [">>>>>#{match}<<<<<", :error] + end + end + + end +end +end From 55a95e67d8b1a703c8643d2b2df4178d0293b61d Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Mon, 10 Jun 2013 02:05:25 +0200 Subject: [PATCH 2/5] update from pejuko/coderay_bash --- lib/coderay/scanners/bash.rb | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/lib/coderay/scanners/bash.rb b/lib/coderay/scanners/bash.rb index e1494922..bf87a5e5 100644 --- a/lib/coderay/scanners/bash.rb +++ b/lib/coderay/scanners/bash.rb @@ -76,7 +76,7 @@ def scan_tokens encoder, options kind = match = nil if match = scan(/\n/) - encoder.text_token(match, :plain) + encoder.text_token(match, :end_line) next end @@ -85,9 +85,11 @@ def scan_tokens encoder, options kind = :directive elsif match = scan(/\s*#.*/) kind = :comment - elsif match = scan(/.#/) + elsif match = scan(/[^"]#/) kind = :ident - elsif match = scan(/(?:\. |source ).*/) + elsif match = scan(/\.\.+/) + kind = :plain + elsif match = scan(/(?:\.|source)\s+/) kind = :reserved elsif match = scan(/(?:\\.|,)/) kind = :plain @@ -146,13 +148,11 @@ def scan_tokens encoder, options encoder.begin_group :shell encoder.text_token(match, :delimiter) next - elsif match = scan(/ \) /ox) - if @brace_shell > 0 - encoder.text_token(match, :delimiter) - encoder.end_group :shell - @brace_shell -= 1 - next - end + elsif @brace_shell > 0 && match = scan(/ \) /ox) + encoder.text_token(match, :delimiter) + encoder.end_group :shell + @brace_shell -= 1 + next elsif match = scan(PRE_CONSTANTS) kind = :predefined_constant elsif match = scan(/[^\s'"]*[A-Za-z_][A-Za-z_0-9]*\+?=/) @@ -266,7 +266,7 @@ def scan_tokens encoder, options def match_array(match, encoder) - match =~ /([A-Za-z_]+)\[(.*?)\]/ + match =~ /(.+)\[(.*?)\]/ var = $1 key = $2 kind = IDENT_KIND[var] From 4131a5ff622e7c767dfd042c4c357906a2544c01 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Mon, 10 Jun 2013 02:08:04 +0200 Subject: [PATCH 3/5] add .sh file type --- lib/coderay/helpers/file_type.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/coderay/helpers/file_type.rb b/lib/coderay/helpers/file_type.rb index a5d83ff2..2f8cc8e8 100644 --- a/lib/coderay/helpers/file_type.rb +++ b/lib/coderay/helpers/file_type.rb @@ -118,6 +118,7 @@ def shebang filename 'ru' => :ruby, 'rxml' => :ruby, 'sass' => :sass, + 'sh' => :bash, 'sql' => :sql, 'tmproj' => :xml, 'xaml' => :xml, From ccf15c5c5dd7b33ee475b03b905be93c6c7eb627 Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Thu, 23 Jan 2014 22:26:25 +0100 Subject: [PATCH 4/5] fixing wrong token type --- lib/coderay/scanners/bash.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/coderay/scanners/bash.rb b/lib/coderay/scanners/bash.rb index bf87a5e5..a9b506e6 100644 --- a/lib/coderay/scanners/bash.rb +++ b/lib/coderay/scanners/bash.rb @@ -76,7 +76,7 @@ def scan_tokens encoder, options kind = match = nil if match = scan(/\n/) - encoder.text_token(match, :end_line) + encoder.text_token(match, :space) next end From 3eca4cbdbdc768510ada77b7724128463c028daf Mon Sep 17 00:00:00 2001 From: Kornelius Kalnbach Date: Thu, 23 Jan 2014 22:35:00 +0100 Subject: [PATCH 5/5] fix another token kind in bash scanner --- lib/coderay/scanners/bash.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/coderay/scanners/bash.rb b/lib/coderay/scanners/bash.rb index a9b506e6..b79047e2 100644 --- a/lib/coderay/scanners/bash.rb +++ b/lib/coderay/scanners/bash.rb @@ -121,7 +121,7 @@ def scan_tokens encoder, options elsif match = scan(/'[^']*'?/) kind = :string elsif match = scan(/(?: \& | > | < | \| >> | << | >\& )/ox) - kind = :bin + kind = :binary elsif match = scan(/\d+[\.-](?:\d+[\.-]?)+/) #versions, dates, and hyphen delimited numbers kind = :float