tokyo-rubyist-meetup · 0cjs · Oct 21, 2012 · Oct 21, 2012 · Oct 21, 2012 · Oct 21, 2012
diff --git a/cjs/Test b/cjs/Test
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+./read < input2 | diff -u expected -
diff --git a/cjs/expected b/cjs/expected
@@ -0,0 +1,7 @@
+4649
+18782
+889
+758
+931
+315
+39
diff --git a/cjs/input1 b/cjs/input1
@@ -0,0 +1 @@
+ロクロゼロ
diff --git a/cjs/input2 b/cjs/input2
@@ -0,0 +1,7 @@
+ヨロシク  
+イヤナヤツ
+ハヤク    
+ナゴヤ    
+クサイ    
+サイゴ    
+サンキュー
diff --git a/cjs/read b/cjs/read
@@ -0,0 +1,49 @@
+#!/usr/bin/env ruby
+# -*- coding: utf-8 -*-
+
+$vocabulary = {
+    '0'     => [ 'マル' , 'マ' , 'レイ' , 'レ' , 'オウ' , 'ゼロ' , 'ゼ' , ],
+    '1'     => [ 'ヒトツ' , 'ヒト' , 'ヒ' , 'イチ' , 'イ' , 'ワン' , ],
+    '2'     => [ 'フタツ' , 'フタ' , 'フ' , 'ニ' , 'ツ' , ],
+    '3'     => [ 'ミツ' , 'ミ' , 'サン' , 'サ' , 'スリー' , ],
+    '4'     => [ 'ヨン' , 'ヨ' , 'ヨツ' , 'シ' , 'フォー' , ],
+    '5'     => [ 'イツツ' , 'イツ' , 'ゴ' , 'コ' , 'ファイブ' , 'ファイヴ' , ],
+    '6'     => [ 'ムツ' , 'ム' , 'ロク' , 'ロ' , 'シックス' , ],
+    '7'     => [ 'ナナツ' , 'ナナ' , 'ナ' , 'シチ' , 'セブン' , 'セヴン' , ],
+    '8'     => [ 'ヤツ' , 'ヤ' , 'ハチ' , 'ハ' , 'バ' , 'エート' , ],
+    '9'     => [ 'ココノツ' , 'コ' , 'キュウ' , 'ク' , 'ナイン' , 'キュー' , ],
+    '10'    => [ 'トオ' , 'ジュウ' , 'ジ' , 'テン' , ],
+    '0'     => [ 'ゼロ' ],
+    '6'     => [ 'ロク', 'ロ' ],
+    "\n"    => ["\n",],
+    ''      => [' '],
+    '18782' => [ 'イヤナヤツ' ],
+}
+$token_map = {}
+$vocabulary.each {
+    |output, tokens|
+    tokens.each { |tok| $token_map[tok] = output }
+}
+$tokens = $token_map.keys \
+    .sort { |x,y| y.length <=> x.length }
+$input = ARGF.read
+
+# Trying to match each possible token in turn is not the most efficient
+# way of doing this, but that's not a big problem here because our
+# expected inputs are quite small. If we need to make this faster, I'd
+# suggest changin the list to a patricia trie encoding the bytes in each
+# token.
+#
+def consume_token
+    $tokens.each { |tok|
+        if $input[0..(tok.length-1)] == tok
+            print($token_map[tok])
+            $input = $input[(tok.length)..-1]
+            return true
+        end
+    }
+    puts("\nUnmatched input: " + $input.inspect + "\n" + $input)
+    false
+end
+
+loop { $input.length > 0 && consume_token || break }
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		#!/bin/bash

		./read < input2 \| diff -u expected -