-
Notifications
You must be signed in to change notification settings - Fork 2
/
btc_grunt.py
8940 lines (7860 loc) · 284 KB
/
btc_grunt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
module containing some general bitcoin-related functions. whenever the word
"orphan" is used in this file it refers to orphan-block, not orphan-transaction.
orphan transactions do not exist in the blockfiles that this module processes.
"""
# TODO - switch from strings to bytearray() for speed (stackoverflow.com/q/16678363/339874)
# TODO - scan for compressed/uncompressed addresses when scanning by public key or private key
# TODO - now that the block is grabbed by height, validate the block hash against
# the hash table
# TODO - use signrawtransaction to validate signatures (en.bitcoin.it/wiki/Raw_Transactions#JSON-RPC_API)
# TODO - figure out what to do if we found ourselves on a fork - particularly wrt doublespends
import pprint
import copy
import binascii
import hashlib
import re
import ast
import glob
import os
import shutil
import errno
import progress_meter
import psutil
import inspect
import decimal
import json
import dicttoxml
import xml.dom.minidom
import csv
import collections
import time
import config_grunt
config_dict = config_grunt.config_dict
# install bitcoinrpc like so:
# git clone https://github.com/jgarzik/python-bitcoinrpc.git
# cd python-bitcoinrpc
# change first line of setup.py to:
#!/usr/bin/env python2.7
# chmod 755 setup.py
# sudo ./setup.py install
from bitcoinrpc.authproxy import AuthServiceProxy, JSONRPCException
# pybitcointools is absolutely essential - some versions of openssl will fail
# the signature validations in unit_tests/script_tests.py. this is because some
# versions of openssl require correct der encoding - see here
# http://bitcoin.stackexchange.com/q/37469/2116 - rather than trying to enforce
# a particular version of openssl (messy) i just chose an ecdsa library that
# will consistently validate bitcoin signatures. it is quite a bit slower
# unfortunately.
import bitcoin as pybitcointools
# module to do language-related stuff for this project
import lang_grunt
# module to process the user-specified btc-inquisitor options
import options_grunt
# module globals:
# rpc details. do not set here - these are updated from config.json
rpc_connection_string = None
# the rpc connection object. initialized from the config file
rpc = None
# if the result set grows beyond this then dump the saved blocks to screen
max_saved_blocks = 50
# for validation
max_block_size = 1024 * 1024
# update these, if necessary, using the user-specified options and function
# get_range_options()
block_range_filter_lower = None
block_range_filter_upper = None
coinbase_maturity = 100 # blocks
satoshis_per_btc = 100000000
coinbase_index = 0xffffffff
int_max = 0x7fffffff
initial_bits = "1d00ffff" # gets converted to bin in sanitize_globals() asap
max_script_size = 10000 # bytes (bitcoin/src/script/interpreter.cpp)
max_script_element_size = 520 # bytes (bitcoin/src/script/script.h)
max_op_count = 200 # nOpCount in bitcoin/src/script/interpreter.cpp
locktime_threshold = 500000000 # tue nov 5 00:53:20 1985
max_sequence_num = 0xffffffff
# address symbols. from https://en.bitcoin.it/wiki/List_of_address_prefixes
address_symbol = {
"pub_key_hash": {"magic": 0, "prefix": "1"},
"script_hash": {"magic": 5, "prefix": "3"},
"compact_pub_key": {"magic": 21, "prefix": "4"},
"namecoin_pub_key_hash": {"magic": 52, "prefix": "M"},
"private_key": {"magic": 128, "prefix": "5"},
"testnet_pub_key_hash": {"magic": 111, "prefix": "n"},
"testnet_script_hash": {"magic": 196, "prefix": "2"}
}
# start-heights for the given block version numbers. see bip34 for discussion.
block_version_ranges = {
1: 0,
2: 227836, # bip34
3: 363724, # bip66 (bitcoin.stackexchange.com/a/38438/2116)
4: float('inf') # version 4 does not yet exist - set to something very large
}
#base_dir = None # init
tx_metadata_dir = None # init
# TODO - mark all validation data as True for blocks we have already passed
# format is hash, height, prev hash
saved_validation_file = "@@base_dir@@/latest-validated-block.txt"
saved_validation_data = None # gets initialized asap in the following code.
# block hashes of known orphans (one per line in hex)
known_orphans_file = "@@base_dir@@/known-orphans.txt"
saved_known_orphans = None # gets initialized asap in the following code.
aux_blockchain_data = None # gets initialized asap in the following code
tx_metadata_keynames = [
# the end of the tx hash as a hex string (the start is in the file name)
"tx_hash",
# last 2 bytes of the block hash - tx num. this is necessary so we can
# distinguish between transactions with identical hashes.
"blockhashend_txnum",
"blockfile_num", # int (deprecated - set to empty string)
"block_start_pos", # int (deprecated - set to empty string)
"tx_start_pos", # int (deprecated - set to empty string)
"tx_size", # int (deprecated - set to empty string)
"block_height", # int (deprecated - set to empty string)
"is_coinbase", # 1 = True, None = False
"is_orphan", # 1 = True, None = False (deprecated - set to empty string)
"spending_txs_list" # "[spendee_hash-spendee_index, ...]"
]
block_header_info = [
"orphan_status",
"block_height",
"block_hash",
"version",
"previous_block_hash",
"merkle_root",
"timestamp",
"bits",
"target",
"difficulty",
"nonce",
"block_size",
#"block_bytes"
]
block_header_validation_info = [
# do the transaction hashes form the merkle root specified in the header?
"merkle_root_validation_status",
# do the previous bits and time to mine 2016 blocks produce these bits?
"bits_validation_status",
# is the difficulty > 1?
"difficulty_validation_status",
# is the block hash below the target?
"block_hash_validation_status",
# is the block size less than the permitted maximum?
"block_size_validation_status",
# versions must coincide with block height ranges
"block_version_validation_status"
]
all_txin_info = [
"prev_txs_metadata",
"prev_txs",
"txin_funds",
"txin_coinbase_change_funds",
"txin_hash",
"txin_index",
"txin_script_length",
"txin_script",
"txin_script_list",
"txin_script_format",
"txin_parsed_script",
"txin_sequence_num"
# note that the "pubkey" txin elements are included in the
# all_tx_validation_info list rather than here. this is because txin scripts
# require validation against txout scripts from the previous tx.
]
all_txout_info = [
"txout_funds",
"txout_script_length",
"txout_script",
"txout_script_list",
"txout_script_format",
# a dict of pubkeys taken directly from the txout script. note that since no
# validation of the txout script is required then these pubkeys may be
# invalid (bitcoin.stackexchange.com/a/38049/2116). the only way a pubkey is
# identified here is if the txout script matches a standard format.
"txout_standard_script_pubkey",
# a list of addresses taken directly from the txout script (not derived from
# pubkeys). note that this list may contain p2sh addresses. as with pubkeys,
# there is no guarantee that these addresses are spendable or even valid.
# the only way an address is identified here is if the txout script matches
# a standard format.
"txout_standard_script_address",
"txout_parsed_script"
]
remaining_tx_info = [
#"tx_pos_in_block",
"num_txs",
"tx_version",
"num_tx_inputs",
"num_tx_outputs",
"tx_lock_time",
"tx_timestamp",
"tx_hash",
"tx_bytes",
"tx_size",
"tx_change"
]
all_tx_validation_info = [
"tx_lock_time_validation_status",
"txin_coinbase_hash_validation_status",
"txin_hash_validation_status",
"txin_coinbase_index_validation_status",
"txin_index_validation_status",
"txin_single_spend_validation_status",
"txin_spend_from_non_orphan_validation_status",
"txin_checksig_validation_status",
"txin_mature_coinbase_spend_validation_status",
# this element contains signatures and pubkeys in the format:
# {sig0: {pubkey0: True, pubkey1: "explanation of failure"}, ...}
"txin_sig_pubkey_validation_status",
# only check the standard address. there is no point checking the addresses
# we create from pubkeys since these must be correct
"txout_standard_script_address_checksum_validation_status",
"txins_exist_validation_status",
"txouts_exist_validation_status",
"tx_funds_balance_validation_status",
# TODO - implement this
# for a standard p2pkh script, validate that the pubkey maps to the given
# address and not to the (un)compressed alternate address
"tx_pubkey_to_address_validation_status"
]
# TODO - redo this according to bip9. maybe include timestamps as well as
# version numbers for the unique identifier
all_version_validation_info = {
1: [], # default
2: ["txin_coinbase_block_height_validation_status"], # bip34
3: ["txin_der_signature_validation_status"], # bip66
4: [] # bip65 - this enables checklocktimeverify, but there is nothing to
# validate, just new functionality to enable
}
# TODO - some future bip will create a version for this validation info:
"canonical_signatures_validation_status"
"non_push_scriptsig_validation_status"
"smallest_possible_push_in_scriptsig_validation_status"
"zero_padded_stack_ints_validation_status"
"low_s_validation_status"
"superfluous_scriptsig_operations_validation_status"
"ignored_checksig_stack_element_validation_status"
# info without validation
all_tx_info = all_txin_info + all_txout_info + remaining_tx_info
all_block_info = block_header_info + all_tx_info
# info with validation
all_tx_and_validation_info = all_tx_info + all_tx_validation_info
all_block_header_and_validation_info = block_header_info + \
block_header_validation_info
all_block_and_validation_info = all_block_header_and_validation_info + \
all_tx_and_validation_info
# validation info only
all_validation_info = block_header_validation_info + all_tx_validation_info
"""config_file = "config.json"
def import_config():
" ""
this function is run automatically whenever this module is imported - see
the final lines in this file
"" "
global base_dir, tx_metadata_dir, rpc_connection_string
if not os.path.isfile(config_file):
# the config file is not mandatory since there are uses of this script
# which do not involve reading the blockchain or writing outputs to file
return
try:
with open(config_file, "r") as f:
config_json = f.read()
except:
raise IOError("config file %s is inaccessible" % config_file)
try:
config_dict = json.loads(config_json)
except Exception, error_string:
raise IOError(
"config file %s contains malformed json, which could not be parsed:"
" %s.%serror details: %s"
% (config_file, config_json, os.linesep, error_string)
)
if "base_dir" in config_dict:
base_dir = os.path.join(os.path.expanduser(config_dict["base_dir"]), "")
if "rpc_user" in config_dict:
rpc_user = config_dict["rpc_user"]
if "rpc_password" in config_dict:
rpc_password = config_dict["rpc_password"]
if "rpc_host" in config_dict:
rpc_host = config_dict["rpc_host"]
if "rpc_port" in config_dict:
rpc_port = config_dict["rpc_port"]
rpc_connection_string = "http://%s:%s@%s:%d" % (
rpc_user, rpc_password, rpc_host, rpc_port
)
if "tx_metadata_dir" in config_dict:
tx_metadata_dir = os.path.join(
substitute_base_dir(
os.path.expanduser(config_dict["tx_metadata_dir"])
), ""
)
def substitute_base_dir(path):
" ""string substitution: @@base_dir@@ -> base_dir"" "
# TODO use config_grunt.py now
global base_dir
if "@@base_dir@@" in path:
try:
path = path.replace("@@base_dir@@", base_dir)
except:
raise Exception(
"failed to add base directory %s to tx metadata directory %s"
% (base_dir, path)
)
# normpath converts // to / but also removes any trailing slashes
return os.path.normpath(path)
"""
def sanitize_globals():
"""
this function is run automatically whenever this module is imported - see
the final lines in this file
"""
global tx_metadata_dir, blank_hash, initial_bits, \
saved_validation_data, saved_validation_file, aux_blockchain_data, \
known_orphans_file, saved_known_orphans
"""
if config_dict["base_dir"] is not None:
if not os.path.isdir(config_dict["base_dir"]):
raise IOError(
"cannot access base directory %s" % config_dict["base_dir"]
)
if tx_metadata_dir is not None:
if not os.path.isdir(tx_metadata_dir):
raise IOError(
"cannot access the transaction metadata directory %s"
% tx_metadata_dir
)
"""
blank_hash = hex2bin("0" * 64)
initial_bits = hex2bin(initial_bits)
#saved_validation_file = substitute_base_dir(saved_validation_file)
saved_validation_data = get_saved_validation_data()
#known_orphans_file = substitute_base_dir(known_orphans_file)
saved_known_orphans = get_saved_known_orphans()
def enforce_sanitization(inputs_have_been_sanitized):
previous_function = inspect.stack()[1][3] # [0][3] would be this func name
if not inputs_have_been_sanitized:
raise Exception(
"you must sanitize the input options with function"
" sanitize_options_or_die() before passing them to function %s()."
% previous_function
)
def ensure_correct_bitcoind_version():
"""make sure all the bitcoind methods used in this program are available"""
version = get_info()["version"]
if version < 70000:
raise ValueError(
"you are running bitcoind version %s. however this script requires"
" at least version 0.7. please upgrade bitcoind"
% bitcoind_version2human_str(version)
)
def init_base_dir():
"""
if the base dir does not exist then attempt to create it. also create the
necessary subdirectories and their readme files for this script. die if this
fails.
"""
# TODO - fix the words written in the file
if not os.path.exists(base_dir):
os.makedirs(base_dir)
if not os.path.exists(tx_metadata_dir):
os.makedirs(tx_metadata_dir)
readme_file = os.path.join(base_dir, "README")
try:
if not os.path.exists(readme_file):
with open(readme_file, "w") as f:
f.write(
"this directory contains the following metadata for the"
" btc-inquisitor script:%s- tx_metadata dir - data to"
" locate transactions in the blockchain. the directory"
" makes up the hash of each transaction and the text file"
" located within the final dir contains the blockfile"
" number, the position of the start of the block (including"
" magic network id), the position of the start of the"
" transaction within this block, the size of the"
" transaction, the height of the block this transaction is"
" in, the orphan status of the block, the transaction"
" outputs that have been spent and the hashes indexes of"
" the transactions that have spent these hashes. for"
" example, file ~/.bit-inquisitor/tx_metadata/ab/cd/ef.txt"
" corresponds to transaction with hash abcdef (obviously"
" not a real hash). and if it has content"
" 7,1000,180,200,60000,orphan,[0123-0,4567-5,] then this"
" transaction exists in blockfile blk00007.dat, 1000 bytes"
" into the file, the transaction starts 180 bytes into this"
" block and has a length of 200 bytes. the block that the"
" transaction exists in has height 60,000 (where 0 is the"
" genesis block), it is an orphan transaction. the first"
" output of this transaction has already been spent by"
" index 0 of a transaction starting with hash 0123, and the"
" second output of this transaction has already been spent"
" by index 5 of a transaction starting with hash 4567. the"
" third output has not yet been spent.%sthe transaction"
" metadata in these files is used to perform checksigs in"
" the blockchain for validations, and the spending data is"
" used to check for doublespends. the block height is used"
" to ensure that a coinbase transaction has reached"
" maturity before being spent."
% ((os.linesep * 2), os.linesep)
)
except:
raise IOError("failed to create readme file")
def init_orphan_list():
"""
we only know if a block is an orphan by waiting coinbase_maturity blocks
then looking back and identifying blocks which are not on the main-chain.
so save all blocks then analyse previous coinbase_maturity blocks every
coinbase_maturity blocks and update the list of orphan hashes
"""
orphans = [] # list of hashes
return orphans
def validate_blockchain(options, get_prev_tx_methods, sanitized = False):
"""
validate the blockchain beginning at the genesis block. this function is
called whenever the user invokes the -v/--validate flag.
validation creates a (huge) database of spent txs on disk and checks block
heights against block hashes in bitcoind.
no data is returned as part of this function - exit silently upon success
and raise an error upon fail.
the user will almost certainly want to use the progress meter flag
-p/--progress in conjunction with validation as it will take a very long
time (weeks) to validate the blockchain from start to finish.
a dict of block heights and hashes (hash_table) is built up and used to
detect orphans in this function. any transactions for these orphan blocks
are marked as "is_orphan" in the previous tx data in the parsed block.
attempting to spend a transaction from these orphan blocks results in a
failed validation and function enforce_valid_block() will raise an exception
"""
# mimic the behaviour of the original bitcoin source code when performing
# validations. this means validating certain buggy transactions without
# dying. search 'bugs_and_all' in this file to see where this is necessary.
bugs_and_all = True
# make sure the user input data has been sanitized
enforce_sanitization(sanitized)
# initialize the hash table from where we left off validating last time.
# {current hash: [current block height, previous hash], ...}
hash_table = init_hash_table()
# get the block height to start validating from. begin 1 after the latest
# block in the hash table, since all hash table blocks have already been
# validated.
block_height = truncate_hash_table(hash_table, 1).values()[0][0] + 1
# get the very latest block height in the blockchain
latest_block = get_info()["blocks"]
# init the bits for the previous (already validated) block
if block_height == 0:
block_1_ago = {"bits": None, "timestamp": None}
else:
temp = get_block(block_height - 1, "json")
block_1_ago = {"bits": hex2bin(temp["bits"]), "timestamp": temp["time"]}
def prog(action):
"""quick function to update progress meter"""
if options.progress:
progress_meter.render(
100 * block_height / float(latest_block),
"%s block %d of %d" % (action, block_height, latest_block)
)
while True:
# if we have already validated the whole user-defined range then exit
# here note that block_height is the latest validated block height
if block_height >= block_range_filter_upper:
# TODO - test this
return True
# get the block from bitcoind
prog("fetching")
block_bytes = get_block(block_height, "bytes")
# get the version validation info for this block height. note that
# blocks and transactions do not have the validation elements from
# future elements. its not that these are set to None - its that the
# elements don't exist, since we cannot predict the future
version_validation_info = get_version_validation_info(
get_version_from_height(block_height)
)
# parse the block and initialize the validation elements to None
prog("parsing")
parsed_block = block_bin2dict(
block_bytes, block_height, all_block_and_validation_info + \
version_validation_info, get_prev_tx_methods, options.explain
)
# die if this block has no ancestor in the hash table
enforce_ancestor(hash_table, parsed_block["previous_block_hash"])
save_tx_metadata(parsed_block)
# update the hash table (contains orphan and main-chain blocks)
hash_table[parsed_block["block_hash"]] = [
parsed_block["block_height"], parsed_block["previous_block_hash"]
]
# if there are any orphans in the hash table then save them to disk and
# also to the saved_known_orphans var
save_new_orphans(hash_table, parsed_block["block_hash"])
# truncate the hash table so as not to use up too much ram
if len(hash_table) > (2 * coinbase_maturity):
hash_table = truncate_hash_table(hash_table, coinbase_maturity)
# update the validation elements of the parsed block
prog("validating")
parsed_block = validate_block(
parsed_block, block_1_ago, bugs_and_all, options.explain
)
# die if the block failed validation
enforce_valid_block(parsed_block, options)
# mark off all the txs that this validated block spends
prog("spending txs from")
mark_spent_txs(parsed_block)
# if this block height has not been saved before, or if it has been
# saved but has now changed, then back it up to disk. it is important to
# leave this until after validation, otherwise an invalid block height
# will be written to disk as if it were valid. we back-up to disk in
# case an error is encountered later (which would prevent this backup
# from occuring and then we would need to start parsing from the
# beginning again)
save_latest_validated_block(
bin2hex(parsed_block["block_hash"]), parsed_block["block_height"],
bin2hex(parsed_block["previous_block_hash"])
)
# update vars for the next loop...
# update the bits data for the next loop
(block_1_ago["bits"], block_1_ago["timestamp"]) = (
parsed_block["bits"], parsed_block["timestamp"]
)
# get the very latest block height in the blockchain to keep the
# progress meter accurate
latest_block = get_info()["blocks"]
block_height += 1
# terminate the progress meter if we are using one
if options.progress:
progress_meter.done()
# TODO - necessary?
save_new_orphans(hash_table, parsed_block["block_hash"])
def init_hash_table(block_data = None):
"""
construct the hash table that is needed to begin validating the blockchain
from the position specified by the user. the hash table must begin 1 block
before the range we begin parsing at and is in the format:
{current hash: [current block height, previous hash], ...}
if block_data is not specified then init the hash table from file. if it is
specified then also update the hash table from this block data.
"""
hash_table = {blank_hash: [-1, blank_hash]} # init
if saved_validation_data is not None:
(
saved_validated_block_hash, saved_validated_block_height,
saved_previous_validated_block_hash
) = saved_validation_data
hash_table[saved_validated_block_hash] = [
saved_validated_block_height, saved_previous_validated_block_hash
]
if block_data is not None:
hash_table[block_data["block_hash"]] = [
block_data["block_height"], block_data["previous_block_hash"]
]
return hash_table
def backup_hash_table(hash_table, latest_block_hash):
"""
save the last entry of the hash table to disk. the "block height" in the
hash table is the latest validated block.
no need to check if this block is an orphan (this will be inevitable
sometimes anyway). if we end up saving an orphan then we will just go back
coinbase_maturity blocks to restart the hash table from there.
"""
try:
with open(hash_table_file, "w") as f:
f.write(
"%s,%s,%s." % (
latest_block_hash, hash_table[latest_block_hash][0],
hash_table[latest_block_hash][1]
)
)
except:
raise IOError(
"failed to save the hash table to file %s" % hash_table_file
)
def print_or_return_blocks(
filtered_blocks, parsed_block, options, max_saved_blocks
):
"""
if the filtered_blocks dict is bigger than max_saved_blocks then output the
data now. this must be done otherwise we will run out of memory.
if the filtered_blocks dict is not bigger than max_saved_blocks then just
append the latest block to the filtered_blocks dict and return the whole
dict.
if the user has not specified any output data (probably just doing a
validation) then don't update the filtered_blocks dict.
"""
if options.OUTPUT_TYPE is None:
return filtered_blocks
filtered_blocks[parsed_block["block_hash"]] = parsed_block
# if there is too much data to save in memory then print it now
if len(filtered_blocks) > max_saved_blocks:
# first filter out the data that has been specified by the options
data = final_results_filter(filtered_blocks, options)
print get_formatted_data(options, data)
# clear filtered_blocks to prevent memory from growing
filtered_blocks = {}
return filtered_blocks
# if there is not too much data to save in memory then just return it
return filtered_blocks
"""
def extract_txs(binary_blocks, options):
"" "
return only the relevant transactions. no progress meter here as this stage
should be very quick even for thousands of transactions
"" "
filtered_txs = []
for (block_height, block) in binary_blocks.items():
if isinstance(block, dict):
parsed_block = block
else:
parsed_block = block_bin2dict(
block, ["tx_hash", "tx_bytes", "txin_address", "txout_address"]
)
for tx_num in sorted(parsed_block["tx"]):
break_now = False # reset
if (
(options.TXHASHES is not None) and
parsed_block["tx"][tx_num]["hash"] in options.TXHASHES
):
filtered_txs.append(parsed_block["tx"][tx_num])
continue # on to next tx
if parsed_block["tx"][tx_num]["input"] is not None:
for input_num in parsed_block["tx"][tx_num]["input"]:
if (
(parsed_block["tx"][tx_num]["input"][input_num] \
["address"] is not None) and
(options.ADDRESSES is not None) and
(parsed_block["tx"][tx_num]["input"][input_num] \
["address"] in options.ADDRESSES)
):
filtered_txs.append(parsed_block["tx"][tx_num])
break_now = True
break # to next txin
if break_now:
continue # to next tx_num
if parsed_block["tx"][tx_num]["output"] is not None:
for output_num in parsed_block["tx"][tx_num]["output"]:
if (
(parsed_block["tx"][tx_num]["output"][output_num] \
["address"] is not None) and
(options.ADDRESSES is not None) and
(parsed_block["tx"][tx_num]["output"][output_num] \
["address"] in options.ADDRESSES)
):
filtered_txs.append(parsed_block["tx"][tx_num])
break_now = True
break # to next txout
if break_now:
continue # to next tx_num
return filtered_txs
"""
def save_latest_validated_block(
latest_validated_block_hash, latest_validated_block_height,
previous_validated_block_hash
):
"""
save to disk the latest block that has been validated. overwrite file if it
exists. the file format is:
latest validated block hash, latest validated block height, previously
validated hash
"""
# do not overwrite a later value with an earlier value
if saved_validation_data is not None:
(
saved_validated_block_hash, saved_validated_block_height,
saved_previous_validated_block_hash
) = saved_validation_data # global
if latest_validated_block_height <= saved_validated_block_height:
return
# from here on we know that the latest validated block is beyond where we
# were upto before.
# backup the file in case the write fails (copy2 preserves file metadata)
backup_validation_file = "%s.backup.%s" % (
saved_validation_file, time.strftime("%Y-%m-%d-%H-%M-%S")
)
shutil.copy2(saved_validation_file, backup_validation_file)
# the old validation point is now safely backed-up :)
# we only want 1 backup so make sure that any previous backups are removed
for filename in glob.glob("%s*" % saved_validation_file):
if filename not in [saved_validation_file, backup_validation_file]:
os.remove(filename)
# now update the latest-saved-tx file
with open(saved_validation_file, "w") as f:
f.write(
"%s,%d,%s." % (
latest_validated_block_hash, latest_validated_block_height,
previous_validated_block_hash
)
)
def get_saved_validation_data():
"""
retrieve the saved validation data. this enables us to avoid re-validating
blocks that have already been validated in the past. the file format is:
saved validated hash,corresponding height,previous validated hash.
note the full-stop at the end - this is vital as it ensures that the entire
file has been written correctly in the past, and not terminated half way
through a write.
"""
try:
with open(saved_validation_file, "r") as f:
file_data = f.read().strip()
file_exists = True
except:
# the file cannot be opened
file_exists = False
saved_validation_data = None
if file_exists:
if file_data[-1] != ".":
raise IOError(
"the validation data was not previously backed up to disk"
" correctly. it should end with a full stop, however one was"
" not found. this implies that the file-write was interrupted."
" please attempt manual reconstruction of the file."
)
else:
file_data = file_data[: -1]
saved_validation_data = file_data.split(",")
saved_validation_data[0] = hex2bin(saved_validation_data[0])
saved_validation_data[1] = int(saved_validation_data[1])
saved_validation_data[2] = hex2bin(saved_validation_data[2])
return saved_validation_data
def get_saved_known_orphans():
"""
retrieve the saved orphan data. this is necessary because validation now
happens seperately to block retrieval, and we need to know if a block is an
orphan when retrieving it via rpc.
the file format is "block height, block hash" per line. note the full-stop
on the final line - this is vital as it ensures that the entire file has
been written correctly in the past, and not terminated half way through a
write.
"""
try:
with open(known_orphans_file, "r") as f:
file_data = f.readlines()
file_exists = True
except:
# the file cannot be opened
file_exists = False
saved_known_orphans = None
if file_exists:
if file_data[-1].strip() != ".":
raise IOError(
"the validation data was not previously backed up to disk"
" correctly. it should end with a full stop, however one was"
" not found. this implies that the file-write was interrupted."
" please restore from one of the %s backup files."
% known_orphans_file
)
else:
file_data = file_data[: -1]
saved_known_orphans = {} # init
# convert the whole-file-string to a dict in two setps. firstly
# get a list with each element being a line of the file
list_of_csvs = [
orphan_block_hash.strip() for orphan_block_hash in file_data
]
# then loop through the list of csvs and convert to a dict
for csv_str in list_of_csvs:
csv_list = csv_str.split(",")
block_height = int(csv_list[0])
block_hash = hex2bin(csv_list[1])
if block_height not in saved_known_orphans:
# this block height has not been saved before
saved_known_orphans[block_height] = [block_hash]
elif block_hash not in saved_known_orphans[block_height]:
# this block height has been saved before but not this hash
saved_known_orphans[block_height].append(block_hash)
return saved_known_orphans
def save_known_orphans(orphans, backup = True):
"""
save the supplied dict of orphans to disk and backup the old file if
necessary. the purpose of the backup is to enable restoring in case of a
failed disk write. orphans is in the format {
block height: [block hash, ...], ...
}
"""
global saved_known_orphans
backup_orphans_file = "%s.backup.%s" % (
known_orphans_file, time.strftime("%Y-%m-%d-%H-%M-%S")
)
# copy2 preserves file metadata
shutil.copy2(known_orphans_file, backup_orphans_file)
# the old orphans file is now safely backed-up :)
for filename in glob.glob("%s*" % known_orphans_file):
if filename not in [known_orphans_file, backup_orphans_file]:
os.remove(filename)
with open(known_orphans_file, "w") as f:
# convert the orphans var to a single string for the whole file. first
# get a list of csv strings
csv_list = [] # init
for (block_height, hash_list) in orphans.items():
for block_hash in hash_list:
csv_list.append("%d,%s" % (block_height, bin2hex(block_hash)))
csv_list.append(".")
# convert the list of csv strings into a single string for the file
f.write("\n".join(s for s in csv_list))
# the new orphan data is saved to disk - reflect this in the global variable
saved_known_orphans = orphans
def is_orphan(block_hash):
for (block_height, orphan_block_hash_list) in saved_known_orphans.items():
if block_hash in orphan_block_hash_list:
return True
return False
def save_tx_metadata(parsed_block):
"""
save all txs in this block to the filesystem. as of this block the txs are
unspent.
we need to backup the location data of each tx so that it can be retrieved
from the blockchain later on. for this we need to store:
- the last bytes of the blockhash
- the tx number
- the blockfile number (deprecated*)
- the start position of the block, including magic_network_id (deprecated*)
- the start position of the tx in the block (deprecated*)
- the size of the tx in bytes (deprecated*)
* these elements were previously used to extract txs from the blk[0-9]*.dat
files, but since the transition to rpc, these elements are deprecated and so
are set to be blank strings in the metadata files.
the block hash and tx number are used to distinguish between duplicate txs
with the same hash. this way we can determine if there is a doublespend.
we also need to store the block height so that we can check whether the tx
has reached coinbase maturity before it is spent.
we also need to store the coinbase status of the transaction so we can
validate that the transaction has reached maturity later on. note that while
this information can be obtained from bitcoind we might as well get it from
the tx_metadata files since we also need the orphan status too
we also need to store the orphan status so that we know whether this block
is spendable or not. it is possible that the orphan status has not been
determined by this stage - this is not a problem as it will be updated later
on if the block is found to be an orphan.
finally, we need to store the spending tx hash and txin index. this will
enable us to determine if double-spending occurs. store a list as the final
entry of the csv file in the format: [h0-i0, h1-i1, ...], where:
- h0 is the hash of the transaction that spends txout 0
- i0 is the txin index of the transaction that spends txout 0
"""
# use only the last x bytes of the block hash to conserve disk space. this
# still gives us 0xff^x chances of catching a duplicate tx hash - plenty
# given how rare this is
x = 2
block_hashend = bin2hex(parsed_block["block_hash"][-x:])
for (tx_num, tx) in sorted(parsed_block["tx"].items()):
is_coinbase = 1 if (tx_num == 0) else None
is_orphan = None if not parsed_block["is_orphan"] else 1
# no spending txs at this stage
spending_txs_list = [None] * len(tx["output"])
blockhashend_txnum = "%s-%s" % (block_hashend, tx_num)
save_data = {
blockhashend_txnum: {
# TODO - check if the block height and coinbase (tx num in block)
# are returned by getrawtransaction
# it gives the block hash and we can get the info from there (though it might be slow?)
"block_height": parsed_block["block_height"],
"is_coinbase": is_coinbase,
"is_orphan": is_orphan,
"spending_txs_list": spending_txs_list
}
}
save_tx_data_to_disk(bin2hex(tx["hash"]), save_data)
def save_tx_data_to_disk(txhash, save_data):
"""
save a 64 character hash, eg 2ea121e32934b7348445f09f46d03dda69117f2540de164
36835db7f032370d0 in a directory structure like base_dir/2e/a1/21.txt the
remainder of the hash is the first column entry within the csv file:
e32934b7348445f09f46d03dda69117f2540de16436835db7f032370d0
this way we use up a maximum of 0xff^3 = 16,777,216 files, but probably far
fewer. there should be plenty of inodes for this amount of files on any 1tb+
hard drive.
txs actually are not unique, for example, block 91842 and block 91812 both