From 9cbb218b7f5614b7263814b699092c051e4e0669 Mon Sep 17 00:00:00 2001 From: ddcw Date: Tue, 9 Jan 2024 11:03:48 +0800 Subject: [PATCH] ibd2sql v1.0 --- README.md | 654 +++++++++++++++++++---------- ibd2sql/COLLATIONS.py | 4 + ibd2sql/__init__.py | 5 + ibd2sql/ibd2sql.py | 322 ++++++++++++++ ibd2sql/innodb_page.py | 417 ++++++++++++++++++ ibd2sql/innodb_page_expage.py | 1 + ibd2sql/innodb_page_ibuf.py | 1 + ibd2sql/innodb_page_index.py | 548 ++++++++++++++++++++++++ ibd2sql/innodb_page_inode.py | 72 ++++ ibd2sql/innodb_page_sdi.py | 382 +++++++++++++++++ ibd2sql/innodb_page_spaceORxdes.py | 58 +++ ibd2sql/innodb_type.py | 156 +++++++ ibd2sql/mysql_json.py | 266 ++++++++++++ ibd2sql/page_type.py | 108 +++++ main.py | 340 ++++++++------- 15 files changed, 2938 insertions(+), 396 deletions(-) create mode 100644 ibd2sql/COLLATIONS.py create mode 100644 ibd2sql/__init__.py create mode 100644 ibd2sql/ibd2sql.py create mode 100644 ibd2sql/innodb_page.py create mode 100644 ibd2sql/innodb_page_expage.py create mode 100644 ibd2sql/innodb_page_ibuf.py create mode 100644 ibd2sql/innodb_page_index.py create mode 100644 ibd2sql/innodb_page_inode.py create mode 100644 ibd2sql/innodb_page_sdi.py create mode 100644 ibd2sql/innodb_page_spaceORxdes.py create mode 100644 ibd2sql/innodb_type.py create mode 100644 ibd2sql/mysql_json.py create mode 100644 ibd2sql/page_type.py diff --git a/README.md b/README.md index bab3891..1d536c5 100644 --- a/README.md +++ b/README.md @@ -1,141 +1,401 @@ # 介绍 -解析mysql8.0的数据文件, 并生成相关SQL. + ibd2sql 可以提取innodb ibd文件的元数据信息, 并拼接为 DDL , 还可以根据元数据信息解析ibd文件中的数据insert/replace SQL语句. + 仅支持**mysql 8.0** . -# 功能 -| 选项 | 说明 | 备注 | -| ----------------- | --------------------- | --------------------- | -| --sql | 打印解析出来的数据的insert语句 | | -| --ddl | 打印相关DDL | | -| --data | 打印解析出来的数据的LIST格式 | | -| --delete | 打印被标记为deleted的数据 | 全看运气 | -| --complete-insert | insert语句包含列名字 | | -| --table-name | 替换insert语句的表名 | 不含DDL的,这是特性,不是BUG -_- | -| -h | 打印帮助信息 | | -| -f | 对于包含有限支持和不支持的数据类型强制解析 | 我也不知道会发生啥... | +# 功能特点 +1. 提取DDL +2. 提取SQL为insert/replace语句 +3. 提取标记为delete的数据 +4. 根据条件过滤相关数据行 +5. 可使用DEBUG来查看解析过程 +6. 无第三方依赖包, 纯python3代码写的. +7. 支持分区表,前缀索引等 -# 使用方法 -推荐使用源码, 毕竟没得依赖 -## 查看DDL + +# 下载和使用方法 + +## 下载 + +## 源码下载: + +```shell +wget https://github.com/ddcw/ibd2sql/archive/refs/heads/main.zip +``` + +## 二进制下载: + +github : https://github.com/ddcw/ibd2sql/releases + + + + + +## 使用 + +由于无第三方依赖包, 建议使用源码 ```shell -python3 main.py --ddl /data/mysql_3314/mysqldata/db1/t20230830.ibd +SHELL> python3 main.py --help +usage: main.py [-h] [--version] [--ddl] [--sql] [--delete] [--complete-insert] + [--force] [--set] [--multi-value] [--replace] + [--table TABLE_NAME] [--schema SCHEMA_NAME] + [--sdi-table SDI_TABLE] [--where-trx WHERE_TRX] + [--where-rollptr WHERE_ROLLPTR] [--where WHERE] [--limit LIMIT] + [--debug] [--debug-file DEBUG_FILE] [--page-min PAGE_MIN] + [--page-max PAGE_MAX] [--page-start PAGE_START] + [--page-count PAGE_COUNT] [--page-skip PAGE_SKIP] + [--parallel PARALLEL] + FILENAME + +解析mysql8.0的ibd文件 https://github.com/ddcw/ibd2sql + +positional arguments: + FILENAME ibd filename + +optional arguments: + -h, --help show this help message and exit + --version, -v, -V show version + --ddl, -d print ddl + --sql print data by sql + --delete print data only for flag of deleted + --complete-insert use complete insert statements for sql + --force, -f force pasrser file when Error Page + --set set/enum to fill in actual data instead of strings + --multi-value single sql if data belong to one page + --replace "REPLACE INTO" replace to "INSERT INTO" (default) + --table TABLE_NAME replace table name except ddl + --schema SCHEMA_NAME replace table name except ddl + --sdi-table SDI_TABLE + read SDI PAGE from this file(ibd)(partition table) + --where-trx WHERE_TRX + default (0,281474976710656) + --where-rollptr WHERE_ROLLPTR + default (0,72057594037927936) + --where WHERE filter data(TODO) + --limit LIMIT limit rows + --debug, -D will DEBUG (it's too big) + --debug-file DEBUG_FILE + default sys.stdout if DEBUG + --page-min PAGE_MIN if PAGE NO less than it, will break + --page-max PAGE_MAX if PAGE NO great than it, will break + --page-start PAGE_START + INDEX PAGE START NO + --page-count PAGE_COUNT + page count NO + --page-skip PAGE_SKIP + skip some pages when start parse index page + --parallel PARALLEL, -p PARALLEL + parse to data/sql with N threads.(default 4) TODO + ``` -## 查看数据(INSERT) + + +# 使用例子 + +## 提取DDL + +说明: DDL不包含row_format ```shell -python3 main.py --sql /data/mysql_3314/mysqldata/db1/t20230830.ibd +SHELL> python main.py /data/mysql_3314/mysqldata/ibd2sql/t20240102_js.ibd --ddl +CREATE TABLE IF NOT EXISTS `ibd2sql`.`t20240102_js`( + `id` int NOT NULL, + `name` varchar(200) NULL, + `aa` json NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci ; + ``` -## 查看数据(含列名) +## 提取SQL + +如果有溢出页, 就将溢出页字段置为Null -对于某些数据库,可能需要列名字(比如某些分布式数据库) +binary类型默认为 base64 ```shell -python3 main.py --sql --complete-insert /data/mysql_3314/mysqldata/db1/t20230830.ibd +SHELL> python main.py /data/mysql_3314/mysqldata/ibd2sql/AllTypesExample.ibd --sql +REPLACE INTO `ibd2sql`.`AllTypesExample` VALUES (3, 0, 0, 0, 0, 0, 0.0, 0.0, 0.0, '2000-2-29', '2000-2-29 0:0:0', '2023-8-30 14:32:35.', '0:0:0', 2001, '00000', 'Zero', 0x3030303030, '00000', 0, C, X,Z, '{"AA": {"BB": true, "CC": [{"dd": null}]}}'); ``` -## 查看数据(LIST) +## 提取被标记为deleted的行 ```shell -python3 main.py --data /data/mysql_3314/mysqldata/db1/t20230830.ibd +SHELL> python main.py /data/mysql_3314/mysqldata/ibd2sql/AllTypesExample.ibd --sql --delete --complete +REPLACE INTO `ibd2sql`.`AllTypesExample`(`id`,`int_col`,`tinyint_col`,`smallint_col`,`mediumint_col`,`bigint_col`,`float_col`,`double_col`,`decimal_col`,`date_col`,`datetime_col`,`timestamp_col`,`time_col`,`year_col`,`char_col`,`varchar_col`,`binary_col`,`varbinary_col`,`bit_col`,`enum_col`,`set_col`,`josn_type`) VALUES (4, 0, 0, 0, 0, 0, 0.0, 0.0, 0.0, '2000-2-29', '2000-2-29 0:0:0', '2023-8-30 14:32:35.', '0:0:0', 2001, '00000', 'Zero', 0x3030303030, '00000', 0, 'C', 'X,Z', '{"AA": {"BB": true, "CC": [{"dd": null}]}}'); + + ``` -## 查看被标记为deleted的数据 +## 解析分区表 + +要使用--sdi-table指定元数据信息所在的第一个分区 ```shell -python3 main.py --delete /data/mysql_3314/mysqldata/db1/t20230830.ibd +SHELL> python main.py /data/mysql_3314/mysqldata/ibd2sql/t20240105_hash#p#p2.ibd --sdi-table /data/mysql_3314/mysqldata/ibd2sql/t20240105_hash#p#p0.ibd --sql --ddl +CREATE TABLE IF NOT EXISTS `ibd2sql`.`t20240105_hash`( + `id` int NULL, + `name` varchar(20) NULL, + `bt` date NULL +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci +/*!50100 PARTITION BY HASH (year(`bt`)) +PARTITIONS 4 */; +INSERT INTO `ibd2sql`.`t20240105_hash` VALUES (1, 'aa', '1998-1-1'); ``` -# 支持 +# 支持内容 + +## 表属性 + +| 对象 | 是否支持 | 描述 | +| ------------ | ---- | ----------------------- | +| 存储引擎 | 支持 | 仅支持innodb | +| 字符集 | 支持 | | +| 排序规则 | 支持 | | +| 分区表(仅一级分区) | 支持 | | +| 表和schema名字替换 | 支持 | | +| 注释 | 支持 | | +| row_format | 不支持 | only DYNAMIC or COMPACT | + -支持几乎所有mysql 8.0的数据类型(除了json). 对lob对象也是有限支持. -支持 大部分表/字段属性 +## 字段属性 -## DDL支持 +| 对象 | 是否支持 | 描述 | +| ---- | ---- | --------- | +| 是否为空 | 支持 | | +| 虚拟列 | 支持 | 虚拟列不参与行解析 | +| 默认值 | 支持 | | +| 自增 | 支持 | | +| 注释 | 支持 | | +| 符号 | 支持 | 数字类型存在符号 | -| 对象 | 是否支持 | 备注 | -| ------------- | ---- | ---------- | -| IF NOT EXISTS | 支持 | 默认都是这个 | -| 自增 | 支持 | | -| 默认值 | 支持 | | -| 字段和表的注释 | 支持 | | -| 索引 | 支持 | 主键索引, 普通索引 | -| 外键 | 支持 | | -| 约束 | 支持 | | -| 存储引擎 | 支持 | 只支持innodb | -| 分区 | 不支持 | 不支持 | +## 索引相关 +| 对象 | 是否支持 | 描述 | +| ------ | ---- | ------------- | +| 主键索引 | 支持 | | +| 唯一索引 | 支持 | | +| 普通索引 | 支持 | | +| 虚拟列的索引 | 支持 | | +| 前缀索引 | 支持 | 前缀索引KEY数据不完整. | +| 复合索引 | 支持 | | -## 支持的数据类型 + + +## 数据类型 + +基本上除了空间类型外, 都支持, 但对于blob等大对象, 仅支持非溢出页的情况 参考:https://dev.mysql.com/doc/refman/8.0/en/storage-requirements.html -### 数字类型 +### 数据类型 + +整型均支持符号, 第一bit位为符号位(如果有符号的话) 取值方式为: -| 类型 | 大小(字节) | 有无符号 | 备注 | -| --------------- | ---------------------------------------- | ---- | ---- | -| tinyint | 1 bytes | 可选 | | -| smallint | 2 bytes | 可选 | | -| mediumint | 3 bytes | 可选 | | -| int | 4 bytes | 可选 | | -| bigint | 8 bytes | 可选 | | -| float(p) | 4 bytes if *p* is None,
4 bytes if 0 <= *p* <= 24,
8 bytes if 25 <= *p* <= 53 (就是double) | 有 | | -| double | 8 bytes | 有 | | -| DECIMAL/NUMERIC | 变长 | 有 | | -| bit(M) | (*M*+7)/8 | 有 | | +```python3 +_t 是数据 +_s 是字节数 +(_t&((1<<_s)-1))-2**_s if _t < 2**_s and not is_unsigned else (_t&((1<<_s)-1)) +``` +| 对象 | 存储占用空间(字节) | 存储方式 | 范围(仅考虑有符号的情况) | +| ------------ | ---------------------------- | ------ | ---------------------- | +| tinyint | 1 | | -128-128 | +| smallint | 2 | 大端字节序 | -32768-32768 | +| int | 4 | 大端字节序 | -2147483648-2147483648 | +| float(n) | size = 4 if ext <= 24 else 8 | float | | +| double | 8 | double | | +| bigint | 8 | 大端字节序 | | +| mediumint | 3 | 大端字节序 | -8388608-8388608 | +| decimal(m,n) | | | | +| | | | | +### 时间类型 -int/tinyint/smallint/mediumint/bigint计算方式: +| 对象 | 存储空间(字节) | 描述 | 取值范围 | +| ------------ | -------- | ----- | ---------------------------------------- | +| date | 3 | | '1000-01-01' to '9999-12-31' | +| datetime(n) | 5+N | | '1000-01-01 00:00:00.000000' to '9999-12-31 23:59:59.999999' | +| time(n) | 3+N | | '-838:59:59.000000' to '838:59:59.000000' | +| timestamp(n) | 4+N | | '1000-01-01' to '9999-12-31' | +| year | 1 | +1900 | '1901' to '2115' | -```python -_t = int.from_bytes(bdata,'big') +N计算方式 -# 第一位(bit)为符号位, 1:正数, 0:负数 -# is_unsigned 无符号: True 有符号:False -# n 大小,单位:字节 -# bdata 为原始数据 -# _t 为临时数据 -return (_t&((1<<(2**n-1)))-1))-2**(2**n-1) if _t < 2**(2**n-1) and not is_unsigned else (_t&((1<<(2**n-1))-1)) ``` +N = int((n+1)/2) +``` + + + +### 字符类型 + +| 类型 | 大小(字节) | 范围 | 备注 | +| ------------------------ | ---------------------------------------- | --------- | ----- | +| char(M) | L | <=255 字符 | | +| BINARY(M) | M | <=255 字节 | | +| VARCHAR(M), VARBINARY(M) | 1 字节长度 + L: 当 L < 1282 字节长度 + L: 当L >=128 | <=65535字节 | | +| TINYBLOB, TINYTEXT | L + 1 bytes, where L < 256 | < 256 B | | +| BLOB, TEXT | L + 2 bytes, where L < 2**16 | <=65535字节 | 仅非溢出页 | +| MEDIUMBLOB, MEDIUMTEXT | L + 3 bytes, where L < 2**24 | 16M | 仅非溢出页 | +| LONGBLOB, LONGTEXT | L + 4 bytes, where L < 2**32 | 4G | 仅非溢出页 | +| | | | | + + + +### 其它类型 + +| 类型 | 大小 | 范围 | 备注 | +| ---- | ---------------------------------------- | ---- | ---------- | +| ENUM | 1 or 2 bytes, depending on the number of enumeration values (65,535 values maximum) | | 使用数字表示 | +| SET | 1, 2, 3, 4, or 8 bytes, depending on the number of set members (64 members maximum) | | 使用数字表示 | +| JSON | 仅非溢出页 | | mysql二进制化的 | +| 空间坐标 | | | 不支持 | + + + +# CHANGE LOG -float/double/real +| 版本 | 变更时间 | 说明 | 备注 | +| ---- | ---------- | ---------------------- | ---------------------------------------- | +| v0.1 | 2023.4.27 | 第一个版本.... | | +| v0.2 | 2023.08.30 | 支持更多数据类型 | 1. 修复year/tinyint的支持
2. 符号支持(对于数字类型)
3. 更多的数据类型支持
4. 更多的表属性支持
5. 美化输出
6. 支持表名替换
7. 支持--complete-insert | +| v0.3 | 2023.10.13 | 支持5.7升级到8.0的ibd文件 | 修复一些BUG | +| v1.0 | 2024.01.05 | 支持debug
支持更多类型和功能 | 1. 支持DEBUG
2. 支持分区表
3. 支持唯一索引
4.支持虚拟列
5. 支持instant
6.支持约束和外键
7. 支持限制输出
8.支持前缀索引 | + + + +# 修复已知问题 + +1. [前缀索引](https://www.modb.pro/db/1700402156981538816)支持. 前缀索引完整数据在数据字段而不是KEY +2. [json/blob等大对象](https://www.modb.pro/db/626066)支持: 支持非溢出页的大对象 +3. [5.7升级到8.0后找不到SDI](https://github.com/ddcw/ibd2sql/issues/5). :sdi pagno 记录在第一页 +4. [bigint类型,注释,表属性](https://github.com/ddcw/ibd2sql/issues/2) : 支持更多数据类型, 和表属性 +5. [只有1个主键和其它](https://github.com/ddcw/ibd2sql/issues/4) : 支持只有1个主键的情况, 并新增DEBUG功能 + + + +# 其它 + +比较杂, 基本上就是解析Ibd文件的时候遇到的不平坦的路 + + + +## JSON格式 + +json是mysql对其二进制化的, 所以对于数字的存储是使用的小端, 对于可变长字符串存储是使用的256*128这种 -```python -return struct.unpack('f',bdata)[0] -return struct.unpack('d',bdata)[0] ``` + 如果第一bit是1 就表示要使用2字节表示: + 后面1字节表示 使用有多少个128字节, 然后加上前面1字节(除了第一bit)的数据(0-127) 就是最终数据 +----------------------------------------------------- +| 1 bit flag | 7 bit data | if flag, 8 bit data*128 | +----------------------------------------------------- +``` + +``` + - ----------------- + | JSON OBJECT/ARRAY | + - ----------------- + | + ------------------------------------------------------------------------- +| TYPE | ELEMENT_COUNT | KEY-ENTRY(if object) | VALUE-ENTRY | KEY | VALUE | + ------------------------------------------------------------------------- + | | | + | | -------------- + -------------------------- | | UTF8MB4 DATA | + | KEY-OFFSET | KEY-LENGTH | | -------------- + -------------------------- | + | + -------------------------------- + | TYPE | OFFSET/VALUE(if small) | + -------------------------------- +``` -decimal + + +## 分区表 + +分区表的元数据信息都放在第一个分区的. + +`dd['object']['partitions']` + + + +## 前缀索引,唯一索引 + +前缀索引判断条件: + +``` +indexes[x]['elements'][x]['length'] < col['char_length'] +``` + +如果是前缀索引, KEY位置存储的数据就不是完整的(主键为前缀索引的情况), 后面读剩余字段的时候还要包含前缀索引 + + + +唯一索引: + +``` +index[x]['type']如下值: +1: PRIMARY +2: UNIQUE +3: NORMAL +``` + + + + + +## innodb varchar长度计算 + +innodb的varchar存储长度计算 + +``` +第一字节小于等于 128 字节时, 就1字节. 否则就第一字节超过128字节的部分 *256 再加上第二字节部分来表示总大小 就是256*256 = 65536 + _size = self.readreverse(1) + size = struct.unpack('>B',_size)[0] + if size > REC_N_FIELDS_ONE_BYTE_MAX: + size = struct.unpack('>B',self.readreverse(1))[0] + (size-128)*256 + return size +``` + + + + + +## decimal计算 + +``` 整数部分和小数部分是分开的 每部分 的每9位10进制数占4字节, 剩余的就按 1-2 为1字节, 这样算 @@ -143,197 +403,141 @@ decimal 比如 (5,2) 整数就是2字节, 小数也是1字节 - (10,3) 整数就是4+1字节, 小数就是2字节 +``` -### 时间类型 +计算方式参考: + +```python3 +total_digits, decimal_digits = re.compile('decimal\((.+)\)').findall(col['column_type_utf8'],)[0].split(',') +total_digits = int(total_digits) +decimal_digits = int(decimal_digits) +integer_p1_count = int((total_digits - decimal_digits)/9) # +integer_p2_count = total_digits - decimal_digits - integer_p1_count*9 +integer_size = integer_p1_count*4 + int((integer_p2_count+1)/2) +decimal_p1_count = int(decimal_digits/9) +decimal_p2_count = decimal_digits - decimal_p1_count*9 +decimal_size = decimal_p1_count*4 + int((decimal_p2_count+1)/2) +total_size = integer_size + decimal_size + +size = total_size #decimal占用大小 +``` -| 类型 | 大小(字节) | 备注 | 范围 | -| ------------ | ------ | ----- | ---------------------------------------- | -| year | 1 | +1901 | '1901' to '2115' | -| date | 3 | | '1000-01-01' to '9999-12-31' | -| time(n) | 3+N | | '-838:59:59.000000' to '838:59:59.000000' | -| datetime(n) | 5+N | | '1000-01-01 00:00:00.000000' to '9999-12-31 23:59:59.999999' | -| timestamp(n) | 4+N | | '1000-01-01' to '9999-12-31' | -N计算方式 -```python -N = int((n+1)/2) + + +## 时间类型计算 + +date + +``` +固定3字节 1bit符号, 14bit年 4bit月 5bit日 +----------------------------------- +| signed | 1 bit | +----------------------------------- +| year | 14 bit | +----------------------------------- +| month | 4 bit | +----------------------------------- +| day | 5 bit | +----------------------------------- ``` +datetime +``` +5 bytes + fractional seconds storage +1bit符号 year_month:17bit day:5 hour:5 minute:6 second:6 +--------------------------------------------------------------------- +| signed | 1 bit | +|-------------------------------------------------------------------- +| year and month | 17 bit | +|-------------------------------------------------------------------- +| day | 5 bit | +|-------------------------------------------------------------------- +| hour | 5 bit | +|-------------------------------------------------------------------- +| minute | 6 bit | +|-------------------------------------------------------------------- +| second | 6 bit | +--------------------------------------------------------------------- +| fractional seconds storage |each 2 digits is stored 1 byte| +--------------------------------------------------------------------- -### 字符串类型 +``` -M表示字符大小 -W表示每个字符所用字节数(解析的时候并不关心字符集) -L 表示 M*W 也就是数据长度 +time -| 类型 | 大小(字节) | 范围 | 备注 | -| ------------------------ | ---------------------------------------- | --------- | ------ | -| char(M) | L | <=255 字符 | | -| BINARY(M) | M | <=255 字节 | | -| VARCHAR(M), VARBINARY(M) | 1 字节长度 + L: 当 L < 128
2 字节长度 + L: 当L >=128 | <=65535字节 | | -| TINYBLOB, TINYTEXT | L + 1 bytes, where L < 256 | < 256 B | | -| BLOB, TEXT | L + 2 bytes, where L < 2**16 | <=65535字节 | | -| MEDIUMBLOB, MEDIUMTEXT | L + 3 bytes, where L < 2**24 | 16M | 有限支持 | -| LONGBLOB, LONGTEXT | L + 4 bytes, where L < 2**32 | 4G | 有限支持 | -| ENUM | 1 or 2 bytes, depending on the number of enumeration values (65,535 values maximum) | | 使用数字表示 | -| SET | 1, 2, 3, 4, or 8 bytes, depending on the number of set members (64 members maximum) | | 使用数字表示 | -| JSON | | | 不支持 | +``` +1bit符号 hour:11bit minute:6bit second:6bit 精度1-3bytes +------------------------------------------------------------------- +| signed | 1 bit | +------------------------------------------------------------------- +| hour | 11 bit | +------------------------------------------------------------------- +| minute | 6 bit | +------------------------------------------------------------------- +| second | 6 bit | +------------------------------------------------------------------- +| fractional seconds storage | each 2 digits is stored 1 byte | +------------------------------------------------------------------- -char 虽然大小是固定的, 但还是要每行数据都记录char大小.... (解析的时候注意是个小坑....) +``` -enum 只能取一个值 所以1字节可以表示255种, 2字节表示65535 -set 可以取多个值, 所以还得表示位置, 即1字节最多表示8个值, 类似null bit mask -enum和set就使用数字来表示, 主要是节省空间.... 感兴趣的可以字节替换. 相关变量 +timestamp -```python -columns[x]['list'] 记录每个列的enum/set 的具体选项 +``` +4 bytes + fraction ``` -# 版本变化 -| 版本 | 变更时间 | 说明 | 备注 | -| ---- | ---------- | --------- | ---------------------------------------- | -| v0.1 | 2023.4.27 | 第一个版本.... | | -| v0.2 | 2023.08.30 | 支持更多数据类型 | 1. 修复year/tinyint的支持
2. 符号支持(对于数字类型)
3. 更多的数据类型支持
4. 更多的表属性支持
5. 美化输出
6. 支持表名替换
7. 支持--complete-insert | -| v0.3 | 2023.10.13 | 支持5.7升级到8.0的ibd文件 | | -| | | | | +## ONLINE DDL +对于使用类似如下DDL 添加字段默认ALGORITHM是 INSTANT -# 使用例子 +``` +ALTER TABLE tbl_name ADD COLUMN column_name column_definition, ALGORITHM=INSTANT; +``` -本次测试的版本为 - -MySQL 8.0.28 - -innodb_default_row_format = dynamic - -准备数据 - -```mysql -CREATE TABLE AllTypesExample ( - id INT AUTO_INCREMENT PRIMARY KEY, - int_col INT, - tinyint_col TINYINT, - smallint_col SMALLINT, - mediumint_col MEDIUMINT, - bigint_col BIGINT, - float_col FLOAT, - double_col DOUBLE, - decimal_col DECIMAL(10, 2), - date_col DATE, - datetime_col DATETIME, - timestamp_col TIMESTAMP, - time_col TIME, - year_col YEAR, - char_col CHAR(5), - varchar_col VARCHAR(20), - binary_col BINARY(5), - varbinary_col VARBINARY(20), - bit_col BIT(4), - enum_col ENUM('A', 'B', 'C'), - set_col SET('X', 'Y', 'Z') -); - --- 插入数据 -INSERT INTO AllTypesExample ( - int_col, tinyint_col, smallint_col, mediumint_col, bigint_col, - float_col, double_col, decimal_col, date_col, datetime_col, - timestamp_col, time_col, year_col, char_col, varchar_col, - binary_col, varbinary_col, bit_col, enum_col, set_col -) -VALUES ( - 2147483647, 127, 32767, 8388607, 9223372036854775807, - 3.14159, 2.71828, 12345.67, '2023-01-01', '2023-01-01 12:34:56', - NOW(), '12:34:56', 2023, 'ABCDE', 'HelloWorld', - BINARY '12345', BINARY 'abcdef', 15, 'A', 'X,Y' -); - --- 插入第二条数据 -INSERT INTO AllTypesExample ( - int_col, tinyint_col, smallint_col, mediumint_col, bigint_col, - float_col, double_col, decimal_col, date_col, datetime_col, - timestamp_col, time_col, year_col, char_col, varchar_col, - binary_col, varbinary_col, bit_col, enum_col, set_col -) -VALUES ( - -2147483648, -128, -32768, -8388608, -9223372036854775808, - -3.14, -2.71, -12345.67, '1990-12-31', '1990-12-31 23:59:59', - NOW(), '23:59:59', 1990, '12345', 'Negative', - BINARY '54321', BINARY 'ghijkl', 0, 'B', 'Y,Z' -); - --- 插入第三条数据 -INSERT INTO AllTypesExample ( - int_col, tinyint_col, smallint_col, mediumint_col, bigint_col, - float_col, double_col, decimal_col, date_col, datetime_col, - timestamp_col, time_col, year_col, char_col, varchar_col, - binary_col, varbinary_col, bit_col, enum_col, set_col -) -VALUES ( - 0, 0, 0, 0, 0, - 0, 0, 0, '2000-02-29', '2000-02-29 00:00:00', - NOW(), '00:00:00', 2000, '00000', 'Zero', - BINARY '00000', BINARY '00000', 0, 'C', 'X,Z' -); - --- 随机删除一条数据 -DELETE FROM AllTypesExample LIMIT 1; -``` - - - -使用ibd2sql解析数据 +为了快速添加字段, 会在元数据信息记录相关信息 -```shell -(venv) 11:00:23 [root@ddcw21 ibd2sql_v0.2]#python main.py --ddl --sql /data/mysql_3314/mysqldata/db1/AllTypesExample.ibd - - CREATE Table IF NOT EXISTS `db1`.`AllTypesExample`( -`id` int NOT NULL AUTO_INCREMENT , -`int_col` int DEFAULT NULL , -`tinyint_col` tinyint DEFAULT NULL , -`smallint_col` smallint DEFAULT NULL , -`mediumint_col` mediumint DEFAULT NULL , -`bigint_col` bigint DEFAULT NULL , -`float_col` float DEFAULT NULL , -`double_col` double DEFAULT NULL , -`decimal_col` decimal(10,2) DEFAULT NULL , -`date_col` date DEFAULT NULL , -`datetime_col` datetime DEFAULT NULL , -`timestamp_col` timestamp DEFAULT NULL , -`time_col` time DEFAULT NULL , -`year_col` year DEFAULT NULL , -`char_col` char(5) DEFAULT NULL , -`varchar_col` varchar(20) DEFAULT NULL , -`binary_col` binary(5) DEFAULT NULL , -`varbinary_col` varbinary(20) DEFAULT NULL , -`bit_col` bit(4) DEFAULT NULL , -`enum_col` enum('A','B','C') DEFAULT NULL , -`set_col` set('X','Y','Z') DEFAULT NULL , -PRIMARY KEY (`id`) -)ENGINE=InnoDB ; - -INSERT INTO `db1`.`AllTypesExample` VALUES(2, -2147483648, -128, -32768, -8388608, -9223372036854775808, -3.140000104904175, -2.71, -12345.67, '1990-12-31', '1990-12-31 23:59:59', '2023-8-30 10:58:41', '23:59:59', '1990', '12345', 'Negative', '54321', 'ghijkl', 0, 2, 6); -INSERT INTO `db1`.`AllTypesExample` VALUES(3, 0, 0, 0, 0, 0, 0.0, 0.0, 0.0, '2000-2-29', '2000-2-29 0:0:0', '2023-8-30 10:58:41', '0:0:0', '2000', '00000', 'Zero', '00000', '00000', 0, 3, 5); - -(venv) 11:00:29 [root@ddcw21 ibd2sql_v0.2]#python main.py --delete /data/mysql_3314/mysqldata/db1/AllTypesExample.ibd -INSERT INTO `db1`.`AllTypesExample` VALUES(1, 2147483647, 127, 32767, 8388607, 9223372036854775807, 3.141590118408203, 2.71828, 12345.67, '2023-1-1', '2023-1-1 12:34:56', '2023-8-30 10:58:41', '12:34:56', '2023', 'ABCDE', 'HelloWorld', '12345', 'abcdef', 15, 1, 3); -(venv) 11:00:35 [root@ddcw21 ibd2sql_v0.2]# ``` +dd_object: "se_private_data": "instant_col=1;" +column: "se_private_data": "default=636363;table_id=2041;" +``` + +对于某行数据而言, 如果 record header中instant标记位为True, 则表示这行数据新增字段不是默认值, 而是要从数据位置读取(放在其它字段数据后面) + +``` +if recorde_header.instant and col['instant']: + read key + raed filed + read filed with instant + +if not recorde_header.instant and col['instant']: + rad key + read field 新增字段取默认值 + +``` + +新增了多个字段之后, 需要注意下不是每行数据的字段数量都相等, 这时候就要使用到有instant之后在null bitmask和recored header之间记录的行数量了(含trx&rollptr).脚本对应变量为`_icc` + + + +## 寻找first leaf page +有时候inode里面记录的不准... 这时候就要从root page开始往后面找leaf page了. 注意non-leaf page 是不需要trx和rollptr的, innodb的这些信息是记录在leaf page的. \ No newline at end of file diff --git a/ibd2sql/COLLATIONS.py b/ibd2sql/COLLATIONS.py new file mode 100644 index 0000000..230a2b6 --- /dev/null +++ b/ibd2sql/COLLATIONS.py @@ -0,0 +1,4 @@ +# select * from information_schema.COLLATIONS +# select concat("{",group_concat(concat(id,":('",CHARACTER_SET_NAME, "','", COLLATION_NAME, "')")),"}") from information_schema.COLLATIONS; + +COLLID_TO_CHAR = {32:('armscii8','armscii8_general_ci'),64:('armscii8','armscii8_bin'),11:('ascii','ascii_general_ci'),65:('ascii','ascii_bin'),1:('big5','big5_chinese_ci'),84:('big5','big5_bin'),63:('binary','binary'),26:('cp1250','cp1250_general_ci'),34:('cp1250','cp1250_czech_cs'),44:('cp1250','cp1250_croatian_ci'),66:('cp1250','cp1250_bin'),99:('cp1250','cp1250_polish_ci'),14:('cp1251','cp1251_bulgarian_ci'),23:('cp1251','cp1251_ukrainian_ci'),50:('cp1251','cp1251_bin'),51:('cp1251','cp1251_general_ci'),52:('cp1251','cp1251_general_cs'),57:('cp1256','cp1256_general_ci'),67:('cp1256','cp1256_bin'),29:('cp1257','cp1257_lithuanian_ci'),58:('cp1257','cp1257_bin'),59:('cp1257','cp1257_general_ci'),4:('cp850','cp850_general_ci'),80:('cp850','cp850_bin'),40:('cp852','cp852_general_ci'),81:('cp852','cp852_bin'),36:('cp866','cp866_general_ci'),68:('cp866','cp866_bin'),95:('cp932','cp932_japanese_ci'),96:('cp932','cp932_bin'),3:('dec8','dec8_swedish_ci'),69:('dec8','dec8_bin'),97:('eucjpms','eucjpms_japanese_ci'),98:('eucjpms','eucjpms_bin'),19:('euckr','euckr_korean_ci'),85:('euckr','euckr_bin'),248:('gb18030','gb18030_chinese_ci'),249:('gb18030','gb18030_bin'),250:('gb18030','gb18030_unicode_520_ci'),24:('gb2312','gb2312_chinese_ci'),86:('gb2312','gb2312_bin'),28:('gbk','gbk_chinese_ci'),87:('gbk','gbk_bin'),92:('geostd8','geostd8_general_ci'),93:('geostd8','geostd8_bin'),25:('greek','greek_general_ci'),70:('greek','greek_bin'),16:('hebrew','hebrew_general_ci'),71:('hebrew','hebrew_bin'),6:('hp8','hp8_english_ci'),72:('hp8','hp8_bin'),37:('keybcs2','keybcs2_general_ci'),73:('keybcs2','keybcs2_bin'),7:('koi8r','koi8r_general_ci'),74:('koi8r','koi8r_bin'),22:('koi8u','koi8u_general_ci'),75:('koi8u','koi8u_bin'),5:('latin1','latin1_german1_ci'),8:('latin1','latin1_swedish_ci'),15:('latin1','latin1_danish_ci'),31:('latin1','latin1_german2_ci'),47:('latin1','latin1_bin'),48:('latin1','latin1_general_ci'),49:('latin1','latin1_general_cs'),94:('latin1','latin1_spanish_ci'),2:('latin2','latin2_czech_cs'),9:('latin2','latin2_general_ci'),21:('latin2','latin2_hungarian_ci'),27:('latin2','latin2_croatian_ci'),77:('latin2','latin2_bin'),30:('latin5','latin5_turkish_ci'),78:('latin5','latin5_bin'),20:('latin7','latin7_estonian_cs'),41:('latin7','latin7_general_ci'),42:('latin7','latin7_general_cs'),79:('latin7','latin7_bin'),38:('macce','macce_general_ci'),43:('macce','macce_bin'),39:('macroman','macroman_general_ci'),53:('macroman','macroman_bin'),13:('sjis','sjis_japanese_ci'),88:('sjis','sjis_bin'),10:('swe7','swe7_swedish_ci'),82:('swe7','swe7_bin'),18:('tis620','tis620_thai_ci'),89:('tis620','tis620_bin'),35:('ucs2','ucs2_general_ci'),90:('ucs2','ucs2_bin'),128:('ucs2','ucs2_unicode_ci'),129:('ucs2','ucs2_icelandic_ci'),130:('ucs2','ucs2_latvian_ci'),131:('ucs2','ucs2_romanian_ci'),132:('ucs2','ucs2_slovenian_ci'),133:('ucs2','ucs2_polish_ci'),134:('ucs2','ucs2_estonian_ci'),135:('ucs2','ucs2_spanish_ci'),136:('ucs2','ucs2_swedish_ci'),137:('ucs2','ucs2_turkish_ci'),138:('ucs2','ucs2_czech_ci'),139:('ucs2','ucs2_danish_ci'),140:('ucs2','ucs2_lithuanian_ci'),141:('ucs2','ucs2_slovak_ci'),142:('ucs2','ucs2_spanish2_ci'),143:('ucs2','ucs2_roman_ci'),144:('ucs2','ucs2_persian_ci'),145:('ucs2','ucs2_esperanto_ci'),146:('ucs2','ucs2_hungarian_ci'),147:('ucs2','ucs2_sinhala_ci'),148:('ucs2','ucs2_german2_ci'),149:('ucs2','ucs2_croatian_ci'),150:('ucs2','ucs2_unicode_520_ci'),151:('ucs2','ucs2_vietnamese_ci'),159:('ucs2','ucs2_general_mysql500_ci'),12:('ujis','ujis_japanese_ci'),91:('ujis','ujis_bin'),54:('utf16','utf16_general_ci'),55:('utf16','utf16_bin'),101:('utf16','utf16_unicode_ci'),102:('utf16','utf16_icelandic_ci'),103:('utf16','utf16_latvian_ci'),104:('utf16','utf16_romanian_ci'),105:('utf16','utf16_slovenian_ci'),106:('utf16','utf16_polish_ci'),107:('utf16','utf16_estonian_ci'),108:('utf16','utf16_spanish_ci'),109:('utf16','utf16_swedish_ci'),110:('utf16','utf16_turkish_ci'),111:('utf16','utf16_czech_ci'),112:('utf16','utf16_danish_ci'),113:('utf16','utf16_lithuanian_ci'),114:('utf16','utf16_slovak_ci'),115:('utf16','utf16_spanish2_ci'),116:('utf16','utf16_roman_ci'),117:('utf16','utf16_persian_ci'),118:('utf16','utf16_esperanto_ci'),119:('utf16','utf16_hungarian_ci'),120:('utf16','utf16_sinhala_ci'),121:('utf16','utf16_german2_ci'),122:('utf16','utf16_croatian_ci'),123:('utf16','utf16_unicode_520_ci'),124:('utf16','utf16_vietnamese_ci'),56:('utf16le','utf16le_general_ci'),62:('utf16le','utf16le_bin'),60:('utf32','utf32_general_ci'),61:('utf32','utf32_bin'),160:('utf32','utf32_unicode_ci'),161:('utf32','utf32_icelandic_ci'),162:('utf32','utf32_latvian_ci'),163:('utf32','utf32_romanian_ci'),164:('utf32','utf32_slovenian_ci'),165:('utf32','utf32_polish_ci'),166:('utf32','utf32_estonian_ci'),167:('utf32','utf32_spanish_ci'),168:('utf32','utf32_swedish_ci'),169:('utf32','utf32_turkish_ci'),170:('utf32','utf32_czech_ci'),171:('utf32','utf32_danish_ci'),172:('utf32','utf32_lithuanian_ci'),173:('utf32','utf32_slovak_ci'),174:('utf32','utf32_spanish2_ci'),175:('utf32','utf32_roman_ci'),176:('utf32','utf32_persian_ci'),177:('utf32','utf32_esperanto_ci'),178:('utf32','utf32_hungarian_ci'),179:('utf32','utf32_sinhala_ci'),180:('utf32','utf32_german2_ci'),181:('utf32','utf32_croatian_ci'),182:('utf32','utf32_unicode_520_ci'),183:('utf32','utf32_vietnamese_ci'),33:('utf8','utf8_general_ci'),76:('utf8','utf8_tolower_ci'),83:('utf8','utf8_bin'),192:('utf8','utf8_unicode_ci'),193:('utf8','utf8_icelandic_ci'),194:('utf8','utf8_latvian_ci'),195:('utf8','utf8_romanian_ci'),196:('utf8','utf8_slovenian_ci'),197:('utf8','utf8_polish_ci'),198:('utf8','utf8_estonian_ci'),199:('utf8','utf8_spanish_ci'),200:('utf8','utf8_swedish_ci'),201:('utf8','utf8_turkish_ci'),202:('utf8','utf8_czech_ci'),203:('utf8','utf8_danish_ci'),204:('utf8','utf8_lithuanian_ci'),205:('utf8','utf8_slovak_ci'),206:('utf8','utf8_spanish2_ci'),207:('utf8','utf8_roman_ci'),208:('utf8','utf8_persian_ci'),209:('utf8','utf8_esperanto_ci'),210:('utf8','utf8_hungarian_ci'),211:('utf8','utf8_sinhala_ci'),212:('utf8','utf8_german2_ci'),213:('utf8','utf8_croatian_ci'),214:('utf8','utf8_unicode_520_ci'),215:('utf8','utf8_vietnamese_ci'),223:('utf8','utf8_general_mysql500_ci'),45:('utf8mb4','utf8mb4_general_ci'),46:('utf8mb4','utf8mb4_bin'),224:('utf8mb4','utf8mb4_unicode_ci'),225:('utf8mb4','utf8mb4_icelandic_ci'),226:('utf8mb4','utf8mb4_latvian_ci'),227:('utf8mb4','utf8mb4_romanian_ci'),228:('utf8mb4','utf8mb4_slovenian_ci'),229:('utf8mb4','utf8mb4_polish_ci'),230:('utf8mb4','utf8mb4_estonian_ci'),231:('utf8mb4','utf8mb4_spanish_ci'),232:('utf8mb4','utf8mb4_swedish_ci'),233:('utf8mb4','utf8mb4_turkish_ci'),234:('utf8mb4','utf8mb4_czech_ci'),235:('utf8mb4','utf8mb4_danish_ci'),236:('utf8mb4','utf8mb4_lithuanian_ci'),237:('utf8mb4','utf8mb4_slovak_ci'),238:('utf8mb4','utf8mb4_spanish2_ci'),239:('utf8mb4','utf8mb4_roman_ci'),240:('utf8mb4','utf8mb4_persian_ci'),241:('utf8mb4','utf8mb4_esperanto_ci'),242:('utf8mb4','utf8mb4_hungarian_ci'),243:('utf8mb4','utf8mb4_sinhala_ci'),244:('utf8mb4','utf8mb4_german2_ci'),245:('utf8mb4','utf8mb4_croatian_ci'),246:('utf8mb4','utf8mb4_unicode_520_ci'),247:('utf8mb4','utf8mb4_vietnamese_ci'),255:('utf8mb4','utf8mb4_0900_ai_ci'),256:('utf8mb4','utf8mb4_de_pb_0900_ai_ci'),257:('utf8mb4','utf8mb4_is_0900_ai_ci'),258:('utf8mb4','utf8mb4_lv_0900_ai_ci'),259:('utf8mb4','utf8mb4_ro_0900_ai_ci'),260:('utf8mb4','utf8mb4_sl_0900_ai_ci'),261:('utf8mb4','utf8mb4_pl_0900_ai_ci'),262:('utf8mb4','utf8mb4_et_0900_ai_ci'),263:('utf8mb4','utf8mb4_es_0900_ai_ci'),264:('utf8mb4','utf8mb4_sv_0900_ai_ci'),265:('utf8mb4','utf8mb4_tr_0900_ai_ci'),266:('utf8mb4','utf8mb4_cs_0900_ai_ci'),267:('utf8mb4','utf8mb4_da_0900_ai_ci'),268:('utf8mb4','utf8mb4_lt_0900_ai_ci'),269:('utf8mb4','utf8mb4_sk_0900_ai_ci'),270:('utf8mb4','utf8mb4_es_trad_0900_ai_ci'),271:('utf8mb4','utf8mb4_la_0900_ai_ci'),273:('utf8mb4','utf8mb4_eo_0900_ai_ci'),274:('utf8mb4','utf8mb4_hu_0900_ai_ci'),275:('utf8mb4','utf8mb4_hr_0900_ai_ci'),277:('utf8mb4','utf8mb4_vi_0900_ai_ci'),278:('utf8mb4','utf8mb4_0900_as_cs'),279:('utf8mb4','utf8mb4_de_pb_0900_as_cs'),280:('utf8mb4','utf8mb4_is_0900_as_cs'),281:('utf8mb4','utf8mb4_lv_0900_as_cs'),282:('utf8mb4','utf8mb4_ro_0900_as_cs'),283:('utf8mb4','utf8mb4_sl_0900_as_cs'),284:('utf8mb4','utf8mb4_pl_0900_as_cs'),285:('utf8mb4','utf8mb4_et_0900_as_cs'),286:('utf8mb4','utf8mb4_es_0900_as_cs'),287:('utf8mb4','utf8mb4_sv_0900_as_cs'),288:('utf8mb4','utf8mb4_tr_0900_as_cs'),289:('utf8mb4','utf8mb4_cs_0900_as_cs'),290:('utf8mb4','utf8mb4_da_0900_as_cs'),291:('utf8mb4','utf8mb4_lt_0900_as_cs'),292:('utf8mb4','utf8mb4_sk_0900_as_cs'),293:('utf8mb4','utf8mb4_es_trad_0900_as_cs'),294:('utf8mb4','utf8mb4_la_0900_as_cs'),296:('utf8mb4','utf8mb4_eo_0900_as_cs'),297:('utf8mb4','utf8mb4_hu_0900_as_cs'),298:('utf8mb4','utf8mb4_hr_0900_as_cs'),300:('utf8mb4','utf8mb4_vi_0900_as_cs'),303:('utf8mb4','utf8mb4_ja_0900_as_cs'),304:('utf8mb4','utf8mb4_ja_0900_as_cs_ks'),305:('utf8mb4','utf8mb4_0900_as_ci'),306:('utf8mb4','utf8mb4_ru_0900_ai_ci'),307:('utf8mb4','utf8mb4_ru_0900_as_cs'),308:('utf8mb4','utf8mb4_zh_0900_as_cs'),309:('utf8mb4','utf8mb4_0900_bin')} diff --git a/ibd2sql/__init__.py b/ibd2sql/__init__.py new file mode 100644 index 0000000..391c7dd --- /dev/null +++ b/ibd2sql/__init__.py @@ -0,0 +1,5 @@ +#from .innodb_page import * +VERSION = (1,0) + +__version__ = ".".join(str(x) for x in VERSION) + diff --git a/ibd2sql/ibd2sql.py b/ibd2sql/ibd2sql.py new file mode 100644 index 0000000..07726ba --- /dev/null +++ b/ibd2sql/ibd2sql.py @@ -0,0 +1,322 @@ +#!/usr/bin/env python3 + +import datetime +from ibd2sql.innodb_page_sdi import * +from ibd2sql.innodb_page_spaceORxdes import * +from ibd2sql.innodb_page_inode import * +from ibd2sql.innodb_page_index import * +import sys + + +class ibd2sql(object): + def __init__(self,*args,**kwargs): + self.LIMIT = -1 + self.STATUS = False + self.PAGESIZE = 16384 + #先初始化一堆信息. + self.DEBUG = False + self.DEBUG_FD = sys.stdout + self.FILENAME = '' + self.DELETE = False + self.FORCE = False + self.SET = False + self.MULTIVALUE = False + self.COMPLETE_SQL = False + self.REPLACE = False + self.WHERE1 = '' + self.WHERE2 = (0,2**48) + self.WHERE3 = (0,2**56) + self.PAGE_ID = 0 + self.AUTO_DEBUG = True #自动DEBUG, 如果page解析有问题的话, 然后退出 + self.SQL_PREFIX = '' + self.SQL = True + self.IS_PARTITION = False #是否为分区表 + + self.PAGE_MIN = 0 + self.PAGE_MAX = 2**32 + self.PAGE_START = -1 + self.PAGE_COUNT = -1 + self.PAGE_SKIP = -1 + + + def _init_table_name(self): + try: + self.debug(f"OLD TABLENAME:{self.tablename}") + except: + pass + self.tablename = f"`{self.table.schema}`.`{self.table.table_name}`" + self.debug(f"NEW TABLENAME:{self.tablename}") + self._init_sql_prefix() + + def replace_schema(self,name): + self.table.schema = name + return self._init_table_name() + + def replace_name(self,name): + self.table.table_name = name + return self._init_table_name() + + def read(self): + """ + RETURN PAGE RAW DATA + """ + self.debug(f"ibd2sql.read PAGE: {self.PAGE_ID} ") + self.f.seek(self.PAGESIZE*self.PAGE_ID,0) + #self.PAGE_ID += 1 + return self.f.read(self.PAGESIZE) + + def _init_sql_prefix(self): + #self.table.remove_virtual_column() #把虚拟字段干掉 + #self.SQL_PREFIX = f"{ 'REPLACE' if self.REPLACE else 'INSERT'} INTO {self.tablename}{'(`'+'`,`'.join([ self.table.column[x]['name'] for x in self.table.column ]) + '`)' if self.COMPLETE_SQL else ''} VALUES " + SQL_PREFIX = f"{'REPLACE' if self.REPLACE else 'INSERT'} INTO {self.tablename}(" + for x in self.table.column: + if self.table.column[x]['is_virtual'] or not self.COMPLETE_SQL: + continue + else: + SQL_PREFIX += f"`{self.table.column[x]['name']}`," + SQL_PREFIX = SQL_PREFIX[:-1] + ") VALUES " if self.COMPLETE_SQL else SQL_PREFIX[:-1] + " VALUES " + self.SQL_PREFIX = SQL_PREFIX + + def init(self): + self.debug("DEBUG MODE ON") + self.debug("INIT ibd2sql") + self.debug("FORCE",self.FORCE) + self.debug("SET",self.SET) + self.debug("MULTIVALUE",self.MULTIVALUE) + self.debug("AUTO_DEBUG",self.AUTO_DEBUG) + self.debug(f"FILTER: \n\t{self.WHERE1} \n\t{self.WHERE2[0]} < TRX < {self.WHERE2[1]} \n\t{self.WHERE3[0]} < ROLLPTR < {self.WHERE3[1]}") + self.STATUS = True + self.debug(f"OPEN IBD FILE:",self.FILENAME) + self.f = open(self.FILENAME,'rb') + self.PAGE_ID = 0 + + #first page + self.PAGE_ID = 0 + self.debug("ANALYZE FIRST PAGE: FIL_PAGE_TYPE_FSP_HDR") + self.space_page = xdes(self.read()) #第一页 + if not self.space_page.fsp_status: + sys.stderr.write(f"\nrow_format = compressed or its damaged or its mysql 5.7 file\n\n") + sys.exit(2) + self.debug("ANALYZE FIRST PAGE FINISH") + sdino = self.space_page.SDI_PAGE_NO + self.debug("SDI PAGE NO:",sdino) + + #sdi page + if self.IS_PARTITION: + self.debug("THIS TABLE IS PARTITION TABLE") + self.tablename = "PARTITION TABLE NO NAME" + pass + else: + self.debug('ANALYZE SDI PAGE') + self.PAGE_ID = sdino + self.sdi = sdi(self.read(),debug=self.debug) #sdi页 + if not self.sdi: + self.debug("ANALYZE SDI PAGE FAILED (maybe page is not 17853), will exit 2") + sys.exit(2) + self.debug('ANALYZE SDI PAGE FINISH') + self.debug('SET ibd2sql.table = sdi.table (SDI的使命已结束 >_<)') + self.table = self.sdi.table + self.tablename = self.table.name + self.debug("META INFO") + for colno in self.table.column: + self.debug(f"COLNO: {colno} \n{self.table.column[colno]}") + for idxno in self.table.index: + self.debug(f"IDXNO: {colno} \n{self.table.index[idxno]}") + self.debug("INIT SQL PREFIX") + self.debug("DDL:\n",self.table.get_ddl()) + self._init_sql_prefix() #初始化表前缀, 要获取到SDI信息才能做 + + #inode page + self.debug(f'ANALYZE PAGE INODE (PAGE_ID=2) (for get index)') + self.PAGE_ID = 2 #inode + self.inode = inode(self.read()) + self.debug("FIRST INDEX (Non-leaf and leaf page) :",self.inode.index_page[0]," (-1 is None)") + self.first_no_leaf_page = self.inode.index_page[0][0] + self.first_leaf_page = self.inode.index_page[0][1] + #self.debug("START FIND FIRST LEAF PAGE") + if self.first_leaf_page < 3 or self.first_leaf_page >= 4294967295 or True: + self.init_first_leaf_page() + self.debug("FIRST LEAF PAGE ID:",self.first_leaf_page ) + self.debug("#############################################################################") + self.debug(" INIT ibd2sql FINISH ") + self.debug("#############################################################################\n\n") + return True + + def init_first_leaf_page(self): + _n = 0 + self.debug(f"INIT FIRST PAGE TO FIRST_NO_LEAF_PAGE ({self.first_no_leaf_page})") + self.PAGE_ID = self.first_no_leaf_page + while self.PAGE_ID < 4294967295 and self.PAGE_ID > 2: + _n += 1 + self.debug(f'COUNT: {_n} FIND LEAF PAGE, CURRENT PAGE ID:',self.PAGE_ID) + aa = find_leafpage(self.read(),table=self.table, idx=self.table.cluster_index_id, debug=self.debug) + aa.pageno = self.PAGE_ID + IS_LEAF_PAGE,PAGE_ID = aa.find() + if IS_LEAF_PAGE: + self.debug("FIND FINISH, PAGE_ID:",self.PAGE_ID,'\n') + self.first_leaf_page = self.PAGE_ID + break + else: + self.first_leaf_page = self.PAGE_ID = PAGE_ID + + + def debug(self,*args): + if self.DEBUG: + msg = f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] [DEBUG] {' '.join([ str(x) for x in args ])}\n" + self.DEBUG_FD.write(msg) + + def _get_index_page(self): + pn = 0 + while self.PAGE_ID > 0 and self.PAGE_ID < 4294967295: + pn += 1 + self.debug(f"CURRENT PAGE ID {self.PAGE_ID} PAGE NO:{pn}") + aa = page(self.read()) + self.PAGE_ID = aa.FIL_PAGE_NEXT + + def get_sql(self,): + self.PAGE_ID = self.PAGE_START if self.PAGE_START > 2 else self.first_leaf_page + self.MULTIVALUE = False if self.REPLACE else self.MULTIVALUE #冲突 + if self.FORCE: + self.debug("============================= WARNING ================================") + self.debug("========================== FORCE IS TRUE =============================") + self.debug("============================= WARNING ================================") + self.debug("ibd2sql get_sql BEGIN:",self.PAGE_ID,self.PAGE_MIN,self.PAGE_MAX,self.PAGE_COUNT) + while self.PAGE_ID > self.PAGE_MIN and self.PAGE_ID <= self.PAGE_MAX and self.PAGE_ID < 4294967295 and self.PAGE_COUNT != 0: + self.debug("INIT INDEX OBJECT") + aa = index(self.read(),table=self.table, idx=self.table.cluster_index_id, debug=self.debug) + aa.DELETED = True if self.DELETE else False + aa.pageno = self.PAGE_ID + self.debug("SET FILTER",self.WHERE2,self.WHERE3) + aa.mintrx = self.WHERE2[0] + aa.maxtrx = self.WHERE2[1] + aa.minrollptr = self.WHERE3[0] + aa.maxrollptr = self.WHERE3[1] + self.PAGE_ID = aa.FIL_PAGE_NEXT + + if self.PAGE_SKIP > 0: + self.PAGE_SKIP -= 1 + self.debug("SKIP THIS PAGE") + continue + self.PAGE_COUNT -= 1 + + sql = self.SQL_PREFIX + if self.MULTIVALUE: + try: + _tdata = aa.read_row() + except Exception as e: + if self.FORCE: + continue + else: + self.debug(e) + break + for x in _tdata: + if self.LIMIT == 0: + return None + self.LIMIT -= 1 + sql += self._tosql(x['row']) + ',' + sql = (sql[:-1] + ';') + print(sql) + + else: + try: + _tdata = aa.read_row() + except Exception as e: + if self.FORCE: + continue + else: + self.debug(e) + break + for x in _tdata: + if self.LIMIT == 0: + return None + self.LIMIT -= 1 + _sql = f"{sql}{self._tosql(x['row'])};" + print(_sql) + if self.PAGE_COUNT == 0: + break + + + def test(self): + """ + TEST ONLY + """ + #self.DEBUG = False + self.debug('AUTO TEST\n\n\n##########################################################\n\t\tBEGIN TEST -_- \n##########################################################\n\n') + #self.PAGE_ID = 4 + self.debug('CLUSTER INDEX ID:',self.table.cluster_index_id) + self.debug('FIRST LEAF PAGE:',self.first_leaf_page) + self.PAGE_ID = self.first_leaf_page + + self.debug('ANALYZE INDEX PAGE BEGIN: (FIRST LEAF PAGE):',self.PAGE_ID) + #DATA + _n = 0 + + #self.replace_schema('db2') + #aa = index(self.read(),table=self.table, idx=self.table.cluster_index_id, debug=self.debug) + pc = -1 #页数限制, 方便DEBUG + sp = 2329 #跳过的page数量 也是方便DEBUG的 + sp = 0 + #self.PAGE_ID = 2361 + self.MULTIVALUE = False if self.REPLACE else self.MULTIVALUE + while self.PAGE_ID > 0 and self.PAGE_ID < 4294967295 and pc != 0: + aa = index(self.read(),table=self.table, idx=self.table.cluster_index_id, debug=self.debug) + sp -= 1 + if sp >= 0: + self.PAGE_ID = aa.FIL_PAGE_NEXT + continue + #aa = index(self.read(),table=self.table, idx=self.table.cluster_index_id, ) + aa.pageno = self.PAGE_ID + aa.mintrx = self.WHERE2[0] + aa.maxtrx = self.WHERE2[1] + sql = self.SQL_PREFIX + if self.MULTIVALUE: + for x in aa.read_row(): + sql += self._tosql(x['row']) + ',' + _n += 1 + print(sql[:-1],';') + else: + for x in aa.read_row(): + print(f"{sql}{self._tosql(x['row'])};") + _n += 1 + self.PAGE_ID = aa.FIL_PAGE_NEXT + pc -= 1 + #break + self.debug('TOTAL ROWS:',_n) + + def get_ddl(self): + return self.table.get_ddl() + + def _tosql(self,row): + """ + 把 row 转为SQL, 不含INSERT INTO ;等 主要是数据类型引号处理 + """ + sql = '(' + for colno in self.table.column: + data = row[colno] + if data is None: + sql = f"{sql}NULL, " + elif self.table.column[colno]['ct'] in ['tinyint','smallint','int','float','double','bigint','mediumint','year','decimal',] : + sql = f"{sql}{data}, " + elif (not self.SET) and (self.table.column[colno]['ct'] in ['enum','set']): + sql = f"{sql}{data}, " + elif self.table.column[colno]['ct'] == 'binary': + sql = f"{sql}{hex(data)}, " #转为16进制, 好看点,但没必要, 就int吧 + else: + sql += repr(data) + ", " + + return sql[:-2] + ")" + + def _get_first_page(self,): + pass + + def close(self): + try: + self.f.close() + except: + pass + try: + if self.DEBUG: + self.DEBUG_FD.close() + except: + pass + return True diff --git a/ibd2sql/innodb_page.py b/ibd2sql/innodb_page.py new file mode 100644 index 0000000..10c338b --- /dev/null +++ b/ibd2sql/innodb_page.py @@ -0,0 +1,417 @@ +import struct +import time +from .page_type import * + +PAGE_SIZE = 16384 +FIL_PAGE_DATA_END = 8 +PAGE_NEW_INFIMUM = 99 +PAGE_NEW_SUPREMUM = 112 + +#MAGIC_SIZE=3 KEY_LEN=32 SERVER_UUID_LEN=36 +#(MAGIC_SIZE + sizeof(uint32) + (KEY_LEN * 2) + SERVER_UUID_LEN + sizeof(uint32)) +INFO_SIZE = 3+4+32*2+36+4 +INFO_MAX_SIZE = INFO_SIZE + 4 +#SDI_OFFSET = 38+112+40*256 + INFO_MAX_SIZE +SDI_VERSION = 1 + +#storage/innobase/rem/rec.h +REC_INFO_MIN_REC_FLAG = 0x10 +REC_INFO_DELETED_FLAG = 0x20 +REC_N_OWNED_MASK = 0xF +REC_HEAP_NO_MASK = 0xFFF8 +REC_NEXT_MASK = 0xFFFF + +#REC_STATUS_ORDINARY 0 +#REC_STATUS_NODE_PTR 1 +#REC_STATUS_INFIMUM 2 +#REC_STATUS_SUPREMUM 3 + +#storage/innobase/include/data0type.h +DATA_TRX_ID_LEN = 6 +DATA_ROLL_PTR_LEN = 7 + +REC_N_FIELDS_ONE_BYTE_MAX = 0x7F + +def _DEBUG(*args): + pass + +class XDES(object): + """ + |---> XDES_ID 8 bytes + |---> XDES_FLST_NODE 12 bytes +XDES-| + |---> XDES_STATE 4 bytes + |---> XDES_BITMAP 16 bytes + """ + def __init__(self,bdata): + self.XDES_ID = struct.unpack('>Q',bdata[:8])[0] + self.XDES_FLST_NODE = (FIL_ADDR(bdata[8:14]), FIL_ADDR(bdata[14:20])) + self.XDES_STATE = struct.unpack('>L',bdata[20:24])[0] + self.XDES_BITMAP = FLST_BASE_NODE(bdata[24:40]) + + def __str__(self): + return f"XDES_ID:{self.XDES_ID} XDES_FLST_NODE:{self.XDES_FLST_NODE} XDES_STATE:{self.XDES_STATE} XDES_BITMAP:{self.XDES_BITMAP}" + +class FIL_ADDR(object): + """ + |---> FIL_ADDR_PAGE 4 bytes +FIL_ADDR-| + |---> FIL_ADDR_BYTE 2 bytes + """ + def __init__(self,bdata): + self.FIL_ADDR_PAGE, self.FIL_ADDR_BYTE = struct.unpack('>LH',bdata[:6]) + + def __str__(self): + return f"FIL_ADDR_PAGE:{self.FIL_ADDR_PAGE} FIL_ADDR_BYTE:{self.FIL_ADDR_BYTE}" + +class FLST_BASE_NODE(object): + """ + |---> FLST_LEN 4 bytes +FLST_BASE_NODE-|---> FLST_FIRST 6 bytes + |---> FLST_LAST 6 bytes + """ + def __init__(self,bdata): + self.FLST_LEN = struct.unpack('>L',bdata[:4])[0] + self.FLST_FIRST = FIL_ADDR(bdata[4:10]) + self.FLST_LAST = FIL_ADDR(bdata[10:16]) + + def __str__(self,): + return f"FLST_LEN:{self.FLST_LEN} FLST_FIRST:{self.FLST_FIRST} FLST_LAST:{self.FLST_LAST}" + +class PAGE_BTR_SEG(object): + """ + |---> SAPCE_ID 4 bytes +PAGE_BTR_SEG-|---> PAGE_ID 4 bytes + |---> PAGE_OFFSET 2 bytes + + """ + def __init__(self,bdata): + self.SAPCE_ID, self.PAGE_ID, self.PAGE_OFFSET = struct.unpack('>LLH',bdata[:10]) + +class page_header(object): + """ + |---> PAGE_N_DIR_SLOTS 2 bytes + |---> PAGE_HEAP_TOP 2 bytes + |---> PAGE_N_HEAP 2 bytes + |---> PAGE_FREE 2 bytes + |---> PAGE_GARBAGE 2 bytes + |---> PAGE_LAST_INSERT 2 bytes + |---> PAGE_DIRECTION 2 bytes +PAGE_HEADER-|---> PAGE_N_DIRECTION 2 bytes + |---> PAGE_N_RECS 2 bytes + |---> PAGE_MAX_TRX_ID 2 bytes + |---> PAGE_LEVEL 2 bytes + |---> PAGE_INDEX_ID 2 bytes + |---> PAGE_BTR_SEG_LEAF 10 bytes + |---> PAGE_BTR_SEG_TOP 10 bytes + + + """ + def __init__(self,bdata): + self.PAGE_N_DIR_SLOTS, self.PAGE_HEAP_TOP, self.PAGE_N_HEAP, self.PAGE_FREE, self.PAGE_GARBAGE, self.PAGE_LAST_INSERT, self.PAGE_DIRECTION, self.PAGE_N_DIRECTION, self.PAGE_N_RECS, self.PAGE_MAX_TRX_ID, self.PAGE_LEVEL, self.PAGE_INDEX_ID = struct.unpack('>9HQHQ',bdata[:36]) + self.PAGE_BTR_SEG_LEAF = PAGE_BTR_SEG(bdata[36:46]) + self.PAGE_BTR_SEG_TOP = PAGE_BTR_SEG(bdata[46:56]) + + +def page_directory(object): + """ + 没必要遍历page directory, 因为是解析所有数据, 直接一行行访问就是.... + 如果要根据where解析的话, 可以解析page. + """ + def __init__(self,bdata): + pass + +class page(object): + """ + |---> FIL_PAGE_SPACE_OR_CHECKSUM 4 bytes + |---> FIL_PAGE_OFFSET 4 bytes + |---> FIL_PAGE_PREV 4 bytes + |---> FIL_HEADER(38 bytes)-|---> FIL_PAGE_NEXT 4 bytes + | |---> FIL_PAGE_LSN 8 bytes + | |---> FIL_PAGE_TYPE 2 bytes + | |---> FIL_PAGE_FILE_FLUSH_LSN 8 bytes + | |---> FIL_PAGE_SPACE_ID 4 bytes + | +INNODB_PAGE(16K)-|---> PAGE_DATA + | + | + | |---> CHECKSUM 4 bytes + |---> FIL_TRAILER(8 bytes)-| + |---> FIL_PAGE_LSN 4 bytes + + + """ + def __init__(self,*args,**kwargs): + self.bdata = args[0] + bdata = self.bdata + self.DEBUG = kwargs['debug'] if 'debug' in kwargs else _DEBUG + self.page_name = 'innodb page' + self.FIL_PAGE_SPACE_OR_CHKSUM, self.FIL_PAGE_OFFSET, self.FIL_PAGE_PREV, self.FIL_PAGE_NEXT, self.FIL_PAGE_LSN, self.FIL_PAGE_TYPE, self.FIL_PAGE_FILE_FLUSH_LSN = struct.unpack('>4LQHQ',bdata[:34]) + self.FIL_PAGE_SPACE_ID = struct.unpack('>L',bdata[34:38])[0] + self.CHECKSUM, self.FIL_PAGE_LSN = struct.unpack('>2L',bdata[-8::]) + self.offset = 38 + + if self.FIL_PAGE_TYPE in (FIL_PAGE_INDEX,FIL_PAGE_SDI): + self.page_header = page_header(self.read(56)) + self._offset = self.offset #读varsie的, + + #保存下一个字段的偏移量相对值 + self.next_offset = self.offset + self._bdata = b'' #保存read的值, 方便调试 + + def read_innodb_int(self,n,is_unsigned): + """ + 读Innodb的 tinyint,smallint,mediumint,int,bigint, year, bit + """ + _t = self._read_uint(n) + #_t = struct.unpack('>L',self.read(n))[0] + _s = n*8 - 1 + return (_t&((1<<_s)-1))-2**_s if _t < 2**_s and not is_unsigned else (_t&((1<<_s)-1)) + + def read_innodb_float(self,n): + """ + 读innodb的 float类型 + """ + return struct.unpack('f',self.read(n))[0] + + def read_innodb_double(self,n): + return struct.unpack('d',self.read(n))[0] + + def read_innodb_bit(self,n): + bdata = self.read(n) + return int.from_bytes(bdata,'big') + #return struct.unpack() + + + def read_innodb_decimal(self,n,extra): + """ +整数部分和小数部分是分开的, +每部分 的每9位10进制数占4字节, 剩余的就按 1-2 为1字节, 这样算 +Example: + (5,2) 整数就是2字节, 小数是1字节 + (10,3) 整数就是4字节, 小数是2字节 + """ + bdata = self.read(n) + p1 = extra[0] #整数部分字节数 + p2 = extra[1] #小数部分 + p1_bdata = bdata[:p1] + p2_bdata = bdata[p1:] + p1_data = int.from_bytes(p1_bdata,'big',signed=True) + p2_data = int.from_bytes(p2_bdata,'big',signed=True) + p1_n = (p1*8)-1 + p2_n = (p2*8)-1 + if p1_data < 0: + p1_data = p1_data + (2**(8*p1-1)) + else: + p1_data = p1_data - (2**(8*p1-1)) + 1 + if p2_data < 0: + p2_data = -(p2_data + 1) + return f"{p1_data}.{p2_data}" + + def read_innodb_set(self,): + pass + + def read_innodb_enum(slef): + pass + + def _read_innodb_varsize(self): + """ + 返回varchar等类型 记录大小 所需的空间(1-2bytes) + 2 bytes 直接表示64KB, 肯定不够(2**16 = 16384) + 所以, 第一字节小于等于 128 字节时, 就1字节. 否则就第一字节超过128字节的部分 *256 再加上第二字节部分来表示总大小 就是256*256 = 65536 这方法有点秀 + """ + _size = self.readreverse(1) + size = struct.unpack('>B',_size)[0] + if size > REC_N_FIELDS_ONE_BYTE_MAX: + size = struct.unpack('>B',self.readreverse(1))[0] + (size-128)*256 + return size + + + def read_innodb_varchar(self,willdecode=True): + """ + 所有变量 + """ + size = self._read_innodb_varsize() + self.debug("\tVAR FILED VAR SIZE:",size) + bdata = self.read(size) + rdata = '' + if willdecode: + try: + rdata = bdata.decode().rstrip() #默认去掉结尾的空,美观 + except Exception as e: + self.debug("ERORR:",e) + else: + rdata = bdata + return rdata + + #https://dev.mysql.com/doc/refman/8.0/en/storage-requirements.html + def read_innodb_datetime(self,n): + """ +5 bytes + fractional seconds storage +1bit符号 year_month:17bit day:5 hour:5 minute:6 second:6 +--------------------------------------------------------------------- +| signed | 1 bit | +|-------------------------------------------------------------------- +| year and month | 17 bit | +|-------------------------------------------------------------------- +| day | 5 bit | +|-------------------------------------------------------------------- +| hour | 5 bit | +|-------------------------------------------------------------------- +| minute | 6 bit | +|-------------------------------------------------------------------- +| second | 6 bit | +--------------------------------------------------------------------- +| fractional seconds storage |each 2 digits is stored 1 byte| +--------------------------------------------------------------------- + + """ + bdata = self.read(n) + idata = int.from_bytes(bdata[:5],'big') + year_month = ((idata & ((1 << 17) - 1) << 22) >> 22) + year = int(year_month/13) + month = int(year_month%13) + day = ((idata & ((1 << 5) - 1) << 17) >> 17) + hour = ((idata & ((1 << 5) - 1) << 12) >> 12) + minute = ((idata & ((1 << 6) - 1) << 6) >> 6) + second = (idata& ((1 << 6) - 1)) + great0 = True if idata&(1<<39) else False + fraction = int.from_bytes(bdata[5:],'big') if len(bdata)>5 else None + #就不转为datetime类型了(不会涉及到计算). 就字符串吧, 好看点 + #return f"{'' if great0 else '-'}{year}-{month}-{day} {hour}:{minute}:{second}{'' if fraction is None else '.'+str(fraction)}" + if fraction is None: + return f'{year}-{month}-{day} {hour}:{minute}:{second}' if great0 else f'-{year}-{month}-{day} {hour}:{minute}:{second}' + else: + return f'{year}-{month}-{day} {hour}:{minute}:{second}.{fraction}' if great0 else f'-{year}-{month}-{day} {hour}:{minute}:{second}.{fraction}' + + def read_innodb_time(self,n): + """ +1bit符号 hour:11bit minute:6bit second:6bit 精度1-3bytes +------------------------------------------------------------------- +| signed | 1 bit | +------------------------------------------------------------------- +| hour | 11 bit | +------------------------------------------------------------------- +| minute | 6 bit | +------------------------------------------------------------------- +| second | 6 bit | +------------------------------------------------------------------- +| fractional seconds storage | each 2 digits is stored 1 byte | +------------------------------------------------------------------- + """ + bdata = self.read(n) + idata = int.from_bytes(bdata[:3],'big') + hour = ((idata & ((1 << 10) - 1) << 12) >> 12) + minute = (idata & ((1 << 6) - 1) << 6) >> 6 + second = (idata& ((1 << 6) - 1)) + great0 = True if idata&(1<<23) else False + fraction = int.from_bytes(bdata[3:],'big') if len(bdata)>3 else None + if fraction is None: + return f'{hour}:{minute}:{second}' if great0 else f'-{hour}:{minute}:{second}' + else: + return f'{hour}:{minute}:{second}.{fraction}' if great0 else f'-{hour}:{minute}:{second}.{fraction}' + + def read_innodb_date(self,n): + """ +一共3字节 1bit符号, 14bit年 4bit月 5bit日 +----------------------------------- +| signed | 1 bit | +----------------------------------- +| year | 14 bit | +----------------------------------- +| month | 4 bit | +----------------------------------- +| day | 5 bit | +----------------------------------- + """ + bdata = self.read(n) + idata = int.from_bytes(bdata[:3],'big') + year = ((idata & ((1 << 14) - 1) << 9) >> 9) + month = (idata & ((1 << 4) - 1) << 5) >> 5 + day = (idata& ((1 << 5) - 1)) + great0 = True if idata&(1<<23) else False + return f'{year}-{month}-{day}' if great0 else f'-{year}-{month}-{day}' + + def read_innodb_timestamp(self,n): + """ + 4 bytes + fraction + """ + bdata = self.read(n) + ltime = time.localtime(int.from_bytes(bdata[:4],'big')) + fraction = int.from_bytes(bdata[4:],'big') if len(bdata)>4 else None + return f'{ltime.tm_year}-{ltime.tm_mon}-{ltime.tm_mday} {ltime.tm_hour}:{ltime.tm_min}:{ltime.tm_sec}.{fraction if fraction is not None else ""}' + if fraction is None: + return f'{ltime.tm_year}-{ltime.tm_mon}-{ltime.tm_mday} {ltime.tm_hour}:{ltime.tm_min}:{ltime.tm_sec}' + else: + return f'{ltime.tm_year}-{ltime.tm_mon}-{ltime.tm_mday} {ltime.tm_hour}:{ltime.tm_min}:{ltime.tm_sec}.{fraction}' + + def read_innodb_big(self): + """ + 读大字段 + """ + return self.read(20) + + def read_innodb_json(self): + pass + + + def read(self,n): + _tdata = self.bdata[self.offset:self.offset+n] + self.offset += n + self._bdata = _tdata + return _tdata + + def readreverse(self,n): #往前读n字节 + _tdata = self.bdata[self._offset-n:self._offset] + self._offset -= n + return _tdata + + def readvar(self,): + colsize = struct.unpack('>B',self.bdata[self._offset-1:self._offset])[0] + if colsize < REC_N_FIELDS_ONE_BYTE_MAX: + colsize = struct.unpack('H',bdata[-(2+FIL_PAGE_DATA_END+x*2):-(FIL_PAGE_DATA_END+x*2)])[0] + page_directorys.append(tdata) + if tdata == PAGE_NEW_SUPREMUM: + break + return page_directorys + + +class record_header(object): + """ +-------------------------------------------------------------------------------------------------------- +| NO USE | (1 bit) | INSTANT FLAG | +-------------------------------------------------------------------------------------------------------- +| NO USE | (1 bit) | 没使用 | +-------------------------------------------------------------------------------------------------------- +| deleted | (1 bit) | 表示是否被标记为删除 | +-------------------------------------------------------------------------------------------------------- +| min_rec | (1 bit) | 最小字段(except leaf) | +-------------------------------------------------------------------------------------------------------- +| owned | (4 bit) | slot第一个字段才有, 记录这个slot大小 | +-------------------------------------------------------------------------------------------------------- +| heap number | (13 bit) | 堆号(递增) 0:INFIMUM max:SUPREMUM (不一定准..) | +-------------------------------------------------------------------------------------------------------- +| record_type | (3 bit) | 0:rec 1:no-leaf 2:min 3:max | +-------------------------------------------------------------------------------------------------------- +| next_record | (16 bit) | 下一个字段的偏移量(距离当前offset) | +-------------------------------------------------------------------------------------------------------- + """ + def __init__(self,bdata): + if len(bdata) != 5: + return None + fb = struct.unpack('>B',bdata[:1])[0] + #print(fb&(REC_INFO_DELETED_FLAG*2),fb&(REC_INFO_DELETED_FLAG*4)) + self.instant = True if fb&128 else False + self.deleted = True if fb&REC_INFO_DELETED_FLAG else False #是否被删除 + self.min_rec = True if fb&REC_INFO_MIN_REC_FLAG else False #if and only if the record is the first user record on a non-leaf + self.owned = fb&REC_N_OWNED_MASK # 大于0表示这个rec是这组的第一个, 就是地址被记录在page_directory里面 + self.heap_no = struct.unpack('>H',bdata[1:3])[0]&REC_HEAP_NO_MASK #heap number, 0 min, 1 max other:rec + self.record_type = struct.unpack('>H',bdata[1:3])[0]&((1<<3)-1) #0:rec 1:no-leaf 2:min 3:max + self.next_record = struct.unpack('>h',bdata[3:5])[0] #有符号.... + + def __str__(self): + return f'deleted:{self.deleted} min_rec:{self.min_rec} owned:{self.owned} heap_no:{self.heap_no} record_type:{self.record_type} next_record:{self.next_record}' + + +#@mysql storage/innobase/row/row0row.cc :: row_build_low +class ROW(page): + """ +--------------------------------------------------------------------------- +| variabels of length(1-2 byes) | +--------------------------------------------------------------------------- +| null bitmask (1bit for per nullable filed) | +--------------------------------------------------------------------------- +| FILED COUNT WITH TRX&ROLLPTR (for INSTANT) | +--------------------------------------------------------------------------- +| record_header (5 bytes) | +--------------------------------------------------------------------------- +| KEY/ROW_ID | +--------------------------------------------------------------------------- +| TRX_ID (6 bytes) (only for leaf page) | +--------------------------------------------------------------------------- +| ROLL_PTR (7 bytes) (only for leaf page) | +--------------------------------------------------------------------------- +| Non-KEY FILEDS( PK only for seconday key) | +--------------------------------------------------------------------------- + """ + def __init__(self,*args,**kwargs): + super().__init__(*args,**kwargs) + self.debug('INIT ROW BASE INFO') + + self.table = kwargs['table'] #必须要表对象, 不然解析不了字段信息 + self.idxno = kwargs['idx'] #索引信息 索引号 self.table.index[idx] + + #基础信息 + self.row = [] #数据 + self.rowno = 0 #行数 + self.pageno = 0 #page no 没啥用... + self.haveindex = False #没得索引 + self.page_type = "INDEX PAGE" + self.HAVE_LEAF_PAGE = False #是否有叶子页 + self.HAVE_NONE_LEAF_PAGE = False #是否有非叶子页 + self.SET = True #默认将set/enum换成对应的值 + self.next_record = PAGE_NEW_INFIMUM #下一个字段的位置 + + #过滤条件 + self.maxtrx = 2**(6*8) + self.mintrx = 0 + self.maxrollptr = 2**(7*8) + self.minrollptr = 0 + self.DELETED = False #True 只要delete的数据, False只要非delete的数据 (鱼与熊掌不可兼得) + + self.null_bitmask_count = self.table.null_bitmask_count + self.null_bitmask_len = int((self.null_bitmask_count+7)/8) if self.table.have_null else 0 + self.debug(f"NULL BITMASK LENGTH: {self.null_bitmask_len} bytes. (nullable col count:{self.null_bitmask_count})") + + #有哪些字段, 仅ordinal_position + self.column_list = [ x for x in self.table.column ] + self.key_column_list = [] # + self.prekey = {} #索引字段是否为前缀索引 + if self.idxno: #如果有索引 + self.haveindex = True + for x in self.table.index[self.idxno]['element_col']: + self.prekey[x[0]] = True if x[1] == 0 else False + #self.key_column_list = [ x[0] for x in self.table.index[self.idxno]['element_col'] ] + + self.debug("######################################## FIELD INFO START ####################################") + for x in self.table.column: + self.debug('name:',self.table.column[x]['name'], ' type:',self.table.column[x]['type'], ' size:',self.table.column[x]['size'],' isvar:',self.table.column[x]['isvar'], ' is_nullable:',self.table.column[x]['is_nullable']) + self.debug("######################################## FIELD INFO END ######################################") + self.debug(f"CLUSTER INDEX: IDXNO: {self.idxno} IDX COLUMMN COUNT:{len(self.table.index[self.idxno]['element_col'])} INDEX ELEMENT:{[x[0] for x in self.table.index[self.idxno]['element_col']]}" if self.haveindex else "没得索引") + self.debug(f"ROW INIT FINISH FOR < {self.table.get_name()} >\n") + + + def init(self,bdata): + self.bdata = bdata + + def _read_key(self,): + pass + + def _read_null_bitmask(self,): + pass + + + def _read_field(self,col): + data = None + n = col['size'] + extra = col['extra'] + is_unsigned = col['is_unsigned'] + _expage = None + _bf_offset = self.offset + if col['isbig']: + #data = self.read_innodb_big() + size = self._read_innodb_varsize() + if size + self.offset > 16384: + size = 20 #超过这一页大小了, 就只要20bytes + data = self.read(size) + real_size = int.from_bytes(data[-4:],'big') + self.debug("THIS BIG COLUMN SIZE:",real_size,'bytes detail:',data) + _expage = data + data = None + + elif col['ct'] == "json": #json类型 + _tdata = self.read(size) + #data = _tdata + data = jsonob(_tdata[1:],int.from_bytes(_tdata[:1],'little')).init() + data = json.dumps(data) + else: #其它lob类型 + data = self.read(size).decode() + elif col['isvar']: #变量 + data = self.read_innodb_varchar(True) + elif col['ct'] in ['int','tinyint','smallint','bigint','mediumint']: #int类型 + data = self.read_innodb_int(n,is_unsigned) + elif col['ct'] == 'float': + data = self.read_innodb_float(n) + elif col['ct'] == 'double': + data = self.read_innodb_double(n) + elif col['ct'] == 'decimal': + data = self.read_innodb_decimal(n,extra) + elif col['ct'] == 'set': #set + data = self._read_uint(n) + if self.SET: + _sn = 0 + _sdata = '' + for x in col['elements_dict']: + if 1<<_sn & data: + _sdata += col['elements_dict'][x] + "," + _sn += 1 + data = _sdata[:-1] + data = repr(data) + elif col['ct'] in ['enum','set']: #枚举类型 + data = self._read_uint(n) + if self.SET: + #data = col['elements_dict'][data] + data = repr(col['elements_dict'][data]) + elif col['ct'] == 'time': + data = self.read_innodb_time(n) + elif col['ct'] == 'datetime': + data = self.read_innodb_datetime(n) + elif col['ct'] == 'date': + data = self.read_innodb_date(n) + elif col['ct'] == 'timestamp': + data = self.read_innodb_timestamp(n) + elif col['ct'] == 'year': + data = self._read_uint(n) + 1900 + elif col['ct'] == 'bit': + data = self._read_uint(n) + elif col['ct'] == 'binary': + data = self._read_uint(n)#.decode() + elif col['ct'] == 'tinytext': + s = int.from_bytes(self.readreverse(1),'big') + data = self.read(s).decode() + else: + self.debug("WARNING Unknown col:",col) + data = self.read(n) + + _af_offset = self.offset + self.debug(f"\t{_bf_offset} ----> {_af_offset} data:{data} bdata:{self._bdata}") + #self._crc32 += binascii.crc32(self._bdata,self._crc32) + return data,_expage + + def read_row(self): + self.debug(f"################## READ ROW START (PAGE NO:{self.pageno}) ########################") + self.debug(f"READ ALL ROWS FROM PAGE (PAGE_ID={self.pageno})") + self.debug(f"RESET offset TO PAGE_NEW_INFIMUM ({PAGE_NEW_INFIMUM})") + if self.DELETED: + self.next_offset = self.page_header.PAGE_FREE + self.debug(f"ONLY READ WITH DELETED FLAG") + else: + self.next_offset = PAGE_NEW_INFIMUM + #print(self.next_offset,self.bdata[38:38+56][6:8],self.page_header) + self.offset = PAGE_NEW_INFIMUM #懒得解析page directory了. 直接走PAGE_NEW_INFIMUM + self._read_all_row() + for x in self.row: + yield x + self.debug(f"################## READ ROW END (PAGE NO:{self.pageno}) #########################") + #return None + + def _read_all_row(self): + #先清空环境 + self.row = [] #数据 + self.rowno = 0 #行数 + row = [] #数据行 + #[{'type':xx,'trx':xx,'rollptr':xx,'have_extra':True,'extra_page':[xxx],'row':[0,66,'哈哈']]}, ] + rn = 0 #统计数据行数 + rhn = 0 + self.next_record = 1 + #含instant的字段的数量 instant column count + _icc = sum([ 1 if self.table.column[x]['instant'] else 0 for x in self.table.column ]) + while self.next_offset != 112 and self.next_offset < 16384 and self.next_offset > 0 and self.next_record != 0: + self._offset = self.offset = self.next_offset + _row = {} #这一行数据,有额外信息 + _data = {} #具体的字段值 + _expage = {} #额外页. + _row['trx'] = None + _row['rollptr'] = None + col_count = len(self.table.column) + + #读字段头 record header + rhn += 1 + self.debug(f"NO:{rhn} READ RECORD HEADER (5 bytes) _offset:{self._offset} offset:{self.offset} START") + rheader = record_header(self.readreverse(5)) + if rheader.record_type == 2: #最小字段 + self.next_offset += rheader.next_record + self.debug(f"\tTHIS ROW IS PAGE_NEW_INFIMUM, WILL CONTINUE. (offset:{self.offset})") + continue + elif rheader.record_type == 3: #最大字段 + self.debug(f"PAGE NO {self.pageno} READ FINISH.(offset:{self.offset})") + break + elif rheader.record_type == 1: #non leaf + self.next_offset += rheader.next_record + self.HAVE_NONE_LEAF_PAGE = True + continue + elif rheader.record_type == 0: #leaf + self.HAVE_LEAF_PAGE = True + self.next_offset += rheader.next_record #设置下一个字段的offset + self.next_record = rheader.next_record + _row['type'] = rheader.record_type + + #DELETE判断: + if self.DELETED and not rheader.deleted: + continue + + self.debug(f"\tREAD RECORD HEADER (5 bytes) _offset:{self._offset} offset:{self.offset} FINISH") + + self.debug(f'\tPAGE NO : {self.pageno}') + self.debug(f"\tREAD ROW NO : {rn} CURRENT_OFFSET:{self.offset}") + self.debug(f"\tREC INSTANT : {rheader.instant}") + self.debug(f"\tREC DELETED : {rheader.deleted}") + self.debug(f"\tREC MIN_REC : {rheader.min_rec}") + self.debug(f"\tREC OWNED : {rheader.owned}") + self.debug(f"\tREC HEAP_NO : {rheader.heap_no}") + self.debug(f"\tREC TYPE : {rheader.record_type}") + self.debug(f"\tREC NEXT : {rheader.next_record}") + self.debug(f"\t20 bytes ON BOTH SIDES OF RECORD, {self.bdata[self._offset-20:self._offset]}, {self.bdata[self._offset:self._offset+20]}") + + + #INSTANT + self.debug("GET COUNT COLUMN FOR THIS ROW") + if self.table.instant and rheader.instant: + self.debug(f"\tREAD INSTANT SIZE, _OFFSET:{self._offset} OFFSET:{self.offset} START.") + col_count = self._read_innodb_varsize() + self.debug(f"\tREAD INSTANT SIZE, _OFFSET:{self._offset} OFFSET:{self.offset} FINISH") + else: + self.debug(f"\tREAD COLUM COUNT") + col_count = len(self.table.column) + self.debug(f"\tREAD COLUM COUNT FINISH") + self.debug(f"\tTHIS ROW HAS {col_count} FILEDS") + + + #NULL + null_bitmask = 0 + null_bitmask_count = 0 + null_bitmask_len = 0 #null bitmask 的占用的字节数量 + #if self.null_bitmask_len > 0: + self.debug(f"READ NULL BITMASK") + if self.table.have_null and rheader.instant: + null_bitmask_count = self.table.null_bitmask_count+self.table.null_bitmask_count_instant + #null_bitmask_len = int((self.table.null_bitmask_count+self.table.null_bitmask_count_instant+7)/8) + elif self.table.have_null: + null_bitmask_count = self.table.null_bitmask_count + #self.debug("READ NULL BISTMASK",self.table.null_bitmask_count,self.table.null_bitmask_count_instant,rheader.instant) + #nbl = self.table.null_bitmask_count if self.table.null_bitmask_count_instant == 0 and not rheader.instant else self.table.null_bitmask_count_instant + self.table.null_bitmask_count + #null_bitmask = self._readreverse_uint(int((nbl+7)/8)) + #self.debug(f'NULL BITMASK: {null_bitmask} NULLABLE FILED COUNT: {self.null_bitmask_len}') + elif rheader.instant: + null_bitmask_count = self.table.null_bitmask_count_instant + else: + self.debug("\tNO NULLABLE FIELD.") + null_bitmask_len = int((null_bitmask_count+7)/8) + null_bitmask = self._readreverse_uint(null_bitmask_len) + self.debug(f"\tNULLABLE FILED COUNT: {self.table.null_bitmask_count} NULLABLE FIELD COUNT(FOR INSTANT):{self.table.null_bitmask_count_instant}") + _idnb = [ 1 if null_bitmask&(1<= self.maxtrx ): + self.debug(f"!!! SKIP ROW NO {rn} . {_row['trx']} not in ({self.mintrx},{self.maxtrx})") + continue + if _row['rollptr'] and (_row['rollptr'] <= self.minrollptr or _row['rollptr'] >= self.maxrollptr): + self.debug(f"!!! SKIP ROW NO {rn} . {_row['rollptr']} not in ({self.minrollptr},{self.maxrollptr})") + continue + self.debug(f"TRX:{_row['rollptr']} and ROLLPTR:{_row['rollptr']} is PASS") + + #读剩下的字段(FOR INSTANT) + for colno in self.table.column: + col = self.table.column[colno] + if colno in _data or (not col['instant']): + continue + self.debug(f'READ THE REST OF FILED (INSTANT) (column count:{col_count})') + col_count -= 1 + + if col_count + _icc < 1 and not self.haveindex: #记录的字段取完了, 剩余的就是默认值 + self.debug(f"\t NO MORE RECORD FILED, COL({colno})({col['name']}) WILL USE DEFAULT VALUE.{col['default']}") + #_data[colno],_expage[colno] = None if col['instant_null'] else col['default'],None + _data[colno],_expage[colno] = col['instant_value'],None + self.debug(col) + continue + + if not rheader.instant: + _data[colno],_expage[colno] = col['default'],None + self.debug(f"\tINSTANT:{rheader.instant}",col['instant_value']) + continue + else: + self.debug(f"\tINSTANT:{rheader.instant}",col['instant_value']) + #break + + if col['is_nullable']: + _nc += 1 + self.debug(f"\tINSTANT COL {colno} {col['name']} MAYBE NULL.") + if null_bitmask&(1<<_nc): + self.debug(f"\tINSTANT COL {colno} {col['name']} IS NULL. WILL CONTINE") + _data[colno],_expage[colno] = None,None + #_data[colno],_expage[colno] = col['default'],None + continue + else: + self.debug(f"\tINSTANT COL {colno} {col['name']} IS NOT NULL. READ DATA") + #_data[colno],_expage[colno] = col['default'],None + _data[colno],_expage[colno] = self._read_field(col) + else: + self.debug(f"\tINSTANT COL {colno} {col['name']} REQUIRE NOT NULL. READ DATA") + _data[colno],_expage[colno] = self._read_field(col) + + + + + rn += 1 + _row['row'] = _data + _row['expage'] = _expage + row.append(_row) + #self.debug("############################# AFTER READ INSTANT CRC32 ",self._crc32) + self.debug(f'READ ROW NO: {rn} FINISH. CURRENT_OFFSET: {self.offset}\t') + #self.debug('XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX',self.offset) + self.row = row + self.debug(f'################### THIS PAGE({self.pageno}) HAVE {rn} ROWS. ###################\n') + +class index(ROW): + """ +------------------------------------------------ +| FIL_HEADER(38 bytes) | +------------------------------------------------ +| PAGE_HEADER(56 bytes) | +------------------------------------------------ +| ROW(INFIMUM)(5+7+1) | +------------------------------------------------ +| ROW(SUPERMUM)(5+8+1) | +------------------------------------------------ +| ROW(var+bitmask+5+k+d) | +------------------------------------------------ +| ............ | +------------------------------------------------ +| ROW(var+bitmask+5+k+d) | +------------------------------------------------ +| PAGE_DIRECTORY(n*2) | +------------------------------------------------ +| FIL_TRAILER(8) | +------------------------------------------------ + """ + def __init__(self,*args,**kwargs): + super().__init__(*args,**kwargs) + self.table = kwargs['table'] #必须要表对象, 不然解析不了字段信息 + #self.HAS_NULL = self.table.have_null #是否有空值 + #self.offset += 5 #懒得解析page directory了. 直接走INFIMUM.. + #self._offset = self.offset + #rheader = record_header(self.readreverse(5)) + #self.offset += rheader.next_record #第一行先去掉 + + + +class find_leafpage(ROW): + def __init__(self,*args,**kwargs): + super().__init__(*args,**kwargs) + self.table = kwargs['table'] + idx = kwargs['idx'] #索引信息 索引号 self.table.index[idx] + self.IS_LEAF_PAGE = False + + self.offset += 5 #INFIMUM.. + #self.debug('') + + def find(self): + IS_LEAF_PAGE = False + NEXT_PAGE_ID = 0 + self.next_offset = PAGE_NEW_INFIMUM + self.debug("CURRENT PAGE ID(find leaf page):",self.pageno) + while self.next_offset != 112 and self.next_offset < 16384 and self.next_offset > 0: + self._offset = self.offset = self.next_offset + rheader = record_header(self.readreverse(5)) + self.next_offset += rheader.next_record + self.debug(f"FIND LEAF PAGE ----> OFFSET:{self.offset} RECORD TYPE:{rheader.record_type}") + if rheader.record_type == 2: #最小字段 + continue + elif rheader.record_type == 3: #最大字段 + break + elif rheader.record_type == 1: #non leaf + #解析得到page_id + if self.null_bitmask_len > 0: + null_bitmask = self._readreverse_uint(self.null_bitmask_len) + if self.haveindex: + for colno,prefix_key in self.table.index[self.idxno]['element_col']: + col = self.table.column[colno] + _,__ = self._read_field(col) + else: + self._read_uint(6) #ROW_ID + #cluster index Non-leaf page dont have trx and rollptr + #_ = self._read_uint(6) + #_ = self._read_uint(7) + NEXT_PAGE_ID = self._read_uint(4) + break + elif rheader.record_type == 0: + IS_LEAF_PAGE = True + break + return IS_LEAF_PAGE,NEXT_PAGE_ID + + + def init(self): + while self.offset != 112: + self._offset = self.offset + self.debug('offset',self.offset) + rheader = record_header(self.readreverse(5)) + self.offset += rheader.next_record + if rheader.record_type == 0: + self.IS_LEAF_PAGE = True + break + elif rheader.record_type == 3: + break + self.debug("CURRENT TYPE:",rheader.record_type) + diff --git a/ibd2sql/innodb_page_inode.py b/ibd2sql/innodb_page_inode.py new file mode 100644 index 0000000..752a6f5 --- /dev/null +++ b/ibd2sql/innodb_page_inode.py @@ -0,0 +1,72 @@ +from ibd2sql.innodb_page import * +import struct +class inode(page): + """ +---------------------------------------------------------------- +| FIL_HEADER(38 bytes) | +---------------------------------------------------------------- +| INODE INFO(pre and next inode page)(12 bytes) | +---------------------------------------------------------------- +| FSEG (SDI PAGE)(192 bytes) | +---------------------------------------------------------------- +| FSEG (SDI PAGE)(192 bytes) | +---------------------------------------------------------------- +| FSEG (general cluster index)(NONE LEAF PAGE)(192 bytes) | +---------------------------------------------------------------- +| FSEG (general cluster index)(LEAF PAGE) (192 bytes) | +---------------------------------------------------------------- +| FSEG (index)(NONE LEAF PAGE)(192 bytes) | +---------------------------------------------------------------- +| FSEG (index)(LEAF PAGE) (192 bytes) | +---------------------------------------------------------------- +| .............. | +---------------------------------------------------------------- +| FIL_TRAILER(8 bytes) | +---------------------------------------------------------------- + """ + def __init__(self,*args,**kwargs): + super().__init__(*args,**kwargs) + if self.FIL_PAGE_TYPE != 3: + return False + self.page_name = 'INODE' + self.EXTRA_PAGE = True #假装还有额外的Inode page + self._init_inodeinfo() + self._init_segment() + + def _init_inodeinfo(self): + self.inode_pre = FIL_ADDR(self.read(6)) + self.inode_next = FIL_ADDR(self.read(6)) + self.inode_pre_page = self.inode_pre.FIL_ADDR_PAGE + self.inode_pre_page_offset = self.inode_pre.FIL_ADDR_BYTE + self.inode_next_page = self.inode_next.FIL_ADDR_PAGE + self.inode_next_page_offset = self.inode_next.FIL_ADDR_BYTE + if self.inode_next_page == 4294967295: + self.EXTRA_PAGE = False + + + def _segment(self): + return { + 'FSEG_ID':self.read_uint8(), + 'FSEG_NOT_FULL_N_USED':self.read_uint4(), + 'FSEG_FREE':FLST_BASE_NODE(self.read(16)), + 'FSEG_NOT_FULL':FLST_BASE_NODE(self.read(16)), + 'FSEG_FULL':FLST_BASE_NODE(self.read(16)), + 'FSEG_MAGIC':self.read_uint4(), + 'FSEG_FRAG_ARR':[ self.read_uint4() for _ in range(32) ] #FSEG_FRAG_SLOT_SIZE = 4 + } + + def _init_segment(self): + self.FSEG_SDI = (self._segment(),self._segment()) + self.FSEG = [] #(non_leaf_page, leaf_page) + for x in range(85): + _fseg = self._segment() + if _fseg['FSEG_ID'] and _fseg['FSEG_MAGIC'] == 97937874: + self.FSEG.append(_fseg) + else: + break + index_page = [] + for x in range(int(len(self.FSEG)/2)): + index_page.append( (self.FSEG[x*2]['FSEG_FRAG_ARR'][0], self.FSEG[x*2+1]['FSEG_FRAG_ARR'][0]) ) + + self.index_page = index_page #if leaf_page = -1 , it means NO LEAF PAGE + diff --git a/ibd2sql/innodb_page_sdi.py b/ibd2sql/innodb_page_sdi.py new file mode 100644 index 0000000..2d436ed --- /dev/null +++ b/ibd2sql/innodb_page_sdi.py @@ -0,0 +1,382 @@ +from ibd2sql.innodb_page import * +from ibd2sql.COLLATIONS import COLLID_TO_CHAR +import struct,json,zlib +from ibd2sql.innodb_type import innodb_type_isvar +import base64 + + +class TABLE(object): + def __init__(self): + #表的基础信息 + self.schema = '' + self.table_name = '' + self.column = {} #NAME,TYPE,VISIBLE .. + self.index = {} #PK,KEY,FOREIGN KEY, UK(not in SDI) + self.check = [] #约束 CONSTRAINT + self.foreign = [] #外键 + self.table_options = {} + self.partitions = "" #only for one level + #self.nullable = True #是否有空值 判断null bitmask要 + self._ci = " " #4个空格开头(for column and index) pretty + self.cluster_index_id = None #cluster index的id, 空表示没得主键, 使用的rowid(6 bytes) + self.uindex_id = [] + self.have_null = False #标记这张表是否有null字段 + self.have_null_instant = False #标记这张表是否有null字段 + self.null_bitmask_count = 0 #可为空的字段数量 + self.null_bitmask_count_instant = 0#可为空的字段数量 + self.instant = False #是否有过instant online DDL + self.instant_list = [] # + + #可禁用一些功能, 比如外键 + self.FOREIGN = True + self.ENCRYPTION = True + self.AUTO_EXTEND = True + self.COLUMN_COLL = False #解析字段的排序规则/字符集 + self.COLLATION = True #表的排序规则还是要的 + self.HAS_EXIST = True #是否有has exist + self.CONSTRAINT = True #支持约束 + self.PARTITIONS = True #支持分区 + self.row_format = "DYNAMIC" + + def _set_name(self,): + self.name = f"`{self.schema}`.`{self.table_name}`" + + def get_name(self): + self._set_name() + return self.name + + def remove_virtual_column(self,): + """ + 把虚拟列去掉, 目前不支持虚拟列 (获取数据前 请删掉虚拟字段, 不然虚拟列会出现一些错误的数据) + """ + column = {} + for colno in self.column: + if self.column[colno]['is_virtual']: + #self.debug(f"remove virtual column {self.column[column]['name']}") + continue + column[colno] = self.column[colno] + self.column = column + + def get_ddl(self): + self._set_name() + ddl = f"CREATE TABLE{' IF NOT EXISTS' if self.HAS_EXIST else ''} {self.name}(\n" + ddl += self._column() + idx = self._index() + ddl += ",\n" + idx if idx != '' else '' + if self.FOREIGN: + fgk = self._foreign_keys() + ddl += ",\n" + fgk if fgk != '' else '' + chk = self._check() + ddl += ",\n" + chk if chk != '' else '' + ddl += "\n) " + ddl += self._options() + if self.PARTITIONS and self.partitions != "": + ddl += "\n" + self._partitions() + ddl += ";" + #self.remove_virtual_column() + return ddl + + def _column(self): + ddl = "" + for colid in self.column: + ddl += self._ci + col = self.column[colid] + ddl += f"`{col['name']}` {col['type']}" #column name + if self.COLUMN_COLL and col['type'] != 'int': + ddl += f" CHARACTER SET {col['character_set']} COLLATE {col['collation']}" + if not col['is_virtual']: + ddl += f"{' NOT' if not col['is_nullable'] else ''} NULL" #nullabel + else: + #虚拟列 VIRTUAL + ddl += f"{' GENERATED ALWAYS AS (' + col['generation_expression'] + ') VIRTUAL' if col['is_virtual'] else '' }" + ddl += f"{' DEFAULT '+repr(col['default']) if col['have_default'] else ''}" #default + ddl += f"{' AUTO_INCREMENT' if col['is_auto_increment'] else ''}" #auto_increment + ddl += f"{' COMMENT '+repr(col['comment']) if col['comment'] != '' else '' }" #comment + #COLUMN_FORMAT + #STORAGE + #SECONDARY_ENGINE_ATTRIBUTE + ddl += ",\n" + return ddl[:-2] #去掉',\n' + + def _index(self): + ddl = "" + for idxid in self.index: + ddl += self._ci + idx = self.index[idxid] + ddl += idx['idx_type'] + "KEY " + ddl += f"`{idx['name']}` " if idx['name'] else ' ' + ddl += "(" + ",".join( [ f"`{self.column[x[0]]['name']}`{'' if x[1] == 0 else '('+str(x[1])+')'}" for x in idx['element_col'] ] ) + ")" + #ddl += "(" + ",".join( [ f"`{self.column[x[0]]['name']}`" for x in idx['element_col'] ] ) + ")" #不考虑前缀索引 + ddl += " COMMENT " + repr(idx['comment']) if idx['comment'] != "" else '' + ddl += ",\n" + return ddl[:-2] + + def _check(self): + ddl = '' + for chk in self.check: + ddl += self._ci + chk + ",\n" + return ddl[:-2] + + def _partitions(self): + #/*!50100 PARTITION xxx */ + return self.partitions + + def _foreign_keys(self): + ddl = '' + for fgk in self.foreign: + ddl += self._ci + fgk + ",\n" + return ddl[:-2] + + def _options(self): + ddl = '' + ddl += f"ENGINE={self.table_options['engine']}" + if self.COLLATION: + ddl += f" DEFAULT CHARSET={self.table_options['charset']} COLLATE={self.table_options['collate']}" + ddl += f" {' COMMENT '+repr(self.table_options['comment']) if self.table_options['comment'] != '' else ''}" + return ddl + +class sdi(page): + """ + |---> FIL_HEADER 38 bytes + |---> PAGE_HEADER 56 bytes +SDI_PAGE-|---> INFIMUM 13 bytes + |---> SUPEREMUM 13 bytes + |---> SDI_DATA xx + |---> PAGE_DIRECTORY xx + |---> FIL_TRAILER 8 bytes + """ + def __init__(self,*args,**kwargs): + super().__init__(*args,**kwargs) + if self.FIL_PAGE_TYPE != 17853: + return None + self.page_name = 'SDI' + + self.HAS_IF_NOT_EXISTS = True + self.table = TABLE() #初始化一个表对象 + self._init_table() + self.table._set_name() + + def _init_table(self): + """ + 初始化表对象 + """ + dd = self.get_dict() + self.table.schema = dd['dd_object']['schema_ref'] + self.table.table_name = dd['dd_object']['name'] + + column = {} + nullable = False #是否有空值 + null_bitmask_count = 0 + null_bitmask_count_instant = 0 + #1:不是索引, 2:主键索引 3:唯一索引 4:普通索引 + idx_type = {1:'NONE', 2:'PK', 3:'UK', 4:'SK'} + for col in dd['dd_object']['columns']: + if col['name'] in ['DB_TRX_ID','DB_ROLL_PTR','DB_ROW_ID']: + continue + #if col['name'] == 'DB_ROW_ID': + # self.table.pk = False + coll_id = col['collation_id'] + ct,isvar,size,isbig,elements_dict,varsize,extra = innodb_type_isvar(col) + se_private_data = {} + for x in col['se_private_data'].split(";"): + if x == '': + continue + xk,xv = x.split('=') + se_private_data[xk] = xv + + #INSTANT + se_private_data_default_value = '' + col_instant = False + nullable_instant = False + instant_null = True + if 'default_null' in se_private_data: + se_private_data_default_value = None + col_instant = True + self.table.instant = True + self.table.instant_list.append(col['ordinal_position']) + instant_null = True + elif 'default' in se_private_data: + #se_private_data_default_value = se_private_data['default'] + se_private_data_default_value = col['default_value_utf8'] + col_instant = True + self.table.instant = True + self.table.instant_list.append(col['ordinal_position']) + instant_null = False + + #NULLABLE + if col['is_nullable'] and not col_instant: #instant的不需要使用null bitmask. 因为本来就自带默认值 + nullable = True + null_bitmask_count += 1 + if col['is_nullable'] and col_instant: + null_bitmask_count_instant += 1 + nullable_instant = True + + column[col['ordinal_position']] = { + 'name':col['name'], + 'is_autoincrement':col['is_auto_increment'], + 'type':col['column_type_utf8'], + 'isvar':isvar, + 'size':size, + 'isbig':isbig, + 'elements_dict':elements_dict, + 'varsize':varsize, + 'have_default':False if col['default_value_utf8_null'] else True, + 'default':col['default_value_utf8'], + 'comment':col['comment'], + 'collation':COLLID_TO_CHAR[coll_id][1], + 'character_set':COLLID_TO_CHAR[coll_id][0], + 'index_type':idx_type[col['column_key']], + 'is_nullable':col['is_nullable'], + 'is_zerofill':col['is_zerofill'], + 'is_unsigned':col['is_unsigned'], + 'is_auto_increment':col['is_auto_increment'], + 'is_virtual':col['is_virtual'], + 'hidden':col['hidden'], + 'char_length':col['char_length'], #作为字符串的最大长度 + 'extra':extra, + 'instant':col_instant, + 'instant_value':se_private_data_default_value, + 'instant_null':instant_null, + 'generation_expression':col['generation_expression'], + 'ct':ct #属于类型 + } + self.table.column = column + self.table.have_null = nullable + self.table.have_null_instant = nullable_instant + self.table.null_bitmask_count = null_bitmask_count + self.table.null_bitmask_count_instant = null_bitmask_count_instant + + + index = {} + for idx in dd['dd_object']['indexes']: + element_col = [] + comment = idx['comment'] + hidden = idx['hidden'] + for x in idx['elements']: + if x['length'] == 4294967295 or x['hidden']: + continue + #判断前缀索引 + prefix_key = 0 + if self.table.column[x['column_opx']+1]['ct'] in ['varbinary','char']: + if self.table.column[x['column_opx']+1]['char_length'] > x['length']: + prefix_key = int(x['length']/4) + element_col.append((x['column_opx']+1,prefix_key)) + #/*column[ordinal_position] 从1开始计数, idx['column_opx'] 从0开始计*/ + if len(element_col) == 0: + continue #没得k + if idx['type'] == 1: + idx_type = 'PRIMARY ' + self.table.cluster_index_id = idx['ordinal_position'] #设置主键 + elif idx['type'] == 2: + idx_type = 'UNIQUE ' + self.table.uindex_id.append(idx['ordinal_position']) + else: + idx_type = '' + name = idx['name'] if idx['name'] != "PRIMARY" else None + _options = {} + for x in idx['se_private_data'].split(';')[:-1]: + xk,xv = x.split('=') + _options[xk] = xv + index[idx['ordinal_position']] = { + 'name':name,#只有主键没得名字 + 'comment':comment, + 'idx_type':idx_type, + 'element_col':element_col, + 'options':_options + } + self.table.index = index + + #不会使用唯一索引作为cluster index + #if not self.table.cluster_index_id: + # self.table.cluster_index_id = self.table.uindex_id[0] if len(self.table.uindex_id) > 0 else None + + #FOREIGN KEY + foreign = [] + for fgk in dd['dd_object']['foreign_keys']: + fkid = f"{','.join([ '`'+x['referenced_column_name']+'`' for x in fgk['elements']])}" + foreign.append(f"CONSTRAINT `{fgk['name']}` FOREIGN KEY ({fkid}) REFERENCES `{fgk['referenced_table_schema_name']}`.`{fgk['referenced_table_name']}` ({fkid})") + self.table.foreign = foreign + + #CONSTRAINT CHECK + check = [] + for chk in dd['dd_object']['check_constraints']: + chkv = base64.b64decode(chk['check_clause']).decode() + check.append(f"CONSTRAINT `{chk['name']}` CHECK {chkv}") + self.table.check = check + + + #PARTITIONS + pt = "" + if dd['dd_object']['partition_type'] == 0:#非分区 + pass + elif dd['dd_object']['partition_type'] == 8: #list分区 + pt = f"/*!50100 PARTITION BY LIST({dd['dd_object']['partition_expression_utf8']})\n(" + for p in dd['dd_object']['partitions']: + pt += f" PARTITION {p['name']} VALUES IN ({p['description_utf8']}) ENGINE = {p['engine']},\n" + pt = pt[:-2] + ") */" + elif dd['dd_object']['partition_type'] == 7: #range分区 + pt = f"/*!50100 PARTITION BY RANGE({dd['dd_object']['partition_expression_utf8']})\n(" + for p in dd['dd_object']['partitions']: + pt += f" PARTITION {p['name']} VALUES LESS THAN ({p['description_utf8']}) ENGINE = {p['engine']},\n" + pt = pt[:-2] + ") */" + elif dd['dd_object']['partition_type'] == 3: #key分区 + pt = f"/*!50100 PARTITION BY KEY ({dd['dd_object']['partition_expression_utf8']})\nPARTITIONS {len(dd['dd_object']['partitions'])} */" + elif dd['dd_object']['partition_type'] == 1: #hash分区 + pt = f"/*!50100 PARTITION BY HASH ({dd['dd_object']['partition_expression_utf8']})\nPARTITIONS {len(dd['dd_object']['partitions'])} */" + else: #不支持其它分区了(就4种: https://dev.mysql.com/doc/refman/8.0/en/partitioning-types.html) + pass + self.table.partitions = pt + + + table_options = {} + table_options['engine'] = dd['dd_object']['engine'] + for x in dd['dd_object']['options'].split(';')[:-1]: + xk,xv = x.split('=') + table_options[xk] = xv + table_options['comment'] = dd['dd_object']['comment'] + coll_id = dd['dd_object']['collation_id'] + table_options['charset'] = COLLID_TO_CHAR[coll_id][0] + table_options['collate'] = COLLID_TO_CHAR[coll_id][1] + #table_options['se_private_data'] = dd['dd_object']['se_private_data'] #instant_col 做过ONLIE DDL的字段 + self.table.table_options = table_options + if dd['dd_object']['row_format'] == 3: + self.table.row_format = "COMPRESSED" + elif dd['dd_object']['row_format'] == 4: + self.table.row_format = "REDUNDANT" + elif dd['dd_object']['row_format'] == 5: + self.table.row_format = "COMPACT" + elif dd['dd_object']['row_format'] == 2: + self.table.row_format = "DYNAMIC" + else: + self.table.row_format = "Unknown" + + + + + + def get_ddl(self): + """ + 返回表的DDL 参考:https://dev.mysql.com/doc/refman/8.0/en/create-table.html + """ + self._init_table() + return self.table.get_ddl() + + def get_dict(self): + """ + 返回SDI信息(dict). (读一行数据) + """ + offset = struct.unpack('>H',self.bdata[PAGE_NEW_INFIMUM-2:PAGE_NEW_INFIMUM])[0] + PAGE_NEW_INFIMUM + dtype,did = struct.unpack('>LQ',self.bdata[offset:offset+12]) + dtrx = int.from_bytes(self.bdata[offset+12:offset+12+6],'big') + dundo = int.from_bytes(self.bdata[offset+12+6:offset+12+6+7],'big') + dunzip_len,dzip_len = struct.unpack('>LL',self.bdata[offset+33-8:offset+33]) + unzbdata = zlib.decompress(self.bdata[offset+33:offset+33+dzip_len]) + dic_info = json.loads(unzbdata.decode()) + return dic_info if len(unzbdata) == dunzip_len else {} + + + def get_columns(self): + """ + 返回字段信息dict, 字段名字, 大小, 是否可变长, 是否可为空, 默认值等 + """ + return self.table.column diff --git a/ibd2sql/innodb_page_spaceORxdes.py b/ibd2sql/innodb_page_spaceORxdes.py new file mode 100644 index 0000000..7f8d010 --- /dev/null +++ b/ibd2sql/innodb_page_spaceORxdes.py @@ -0,0 +1,58 @@ +from ibd2sql.innodb_page import * + +class xdes(page): + """ + |---> FIL_HEADER 38 bytes + |---> SPACE_HEADER (only fsp_hdr) 112 bytes + |---> XDES 0 40 bytes +XDES/FSP_HDR-|---> XDES ... 40 bytes + |---> XDES 255 40 bytes + |---> FIL_TRAILER 8 bytes + + + |---> FSP_SPACE_ID 4 bytes + |---> FSP_NOT_USED 4 bytes + |---> FSP_SIZE 4 bytes + |---> FSP_FREE_LIMIT 4 bytes + |---> FSP_SPACE_FLAGS 4 bytes +SPACE_HEADER-|---> FSP_FRAG_N_USED 4 bytes + |---> FSP_FREE 16 bytes + |---> FSP_FREE_FRAG 16 bytes + |---> FSP_FULL_FRAG 16 bytes + |---> FSP_SEG_ID 8 bytes + |---> FSP_SEG_INODES_FULL 16 bytes + |---> FSP_SEG_INODES_FREE 16 bytes + """ + def __init__(self,*args,**kwargs): + super().__init__(*args,**kwargs) + self.fsp_status = False + if self.FIL_PAGE_TYPE not in (9,8) : #FIL_PAGE_TYPE_XDES,FIL_PAGE_TYPE_FSP_HDR + return None + self.page_name = 'XDES' + + if self.FIL_PAGE_TYPE == 8: + self.page_name = "FSP_HDR" + self.FSP_SPACE_ID, self.FSP_NOT_USED, self.FSP_SIZE, self.FSP_FREE_LIMIT, self.FSP_SPACE_FLAGS, self.FSP_FRAG_N_USED = struct.unpack('>6L',self.read(24)) + self.FSP_FREE = FLST_BASE_NODE(self.read(16)) + self.FSP_FREE_FRAG = FLST_BASE_NODE(self.read(16)) + self.FSP_FULL_FRAG = FLST_BASE_NODE(self.read(16)) + self.FSP_SEG_ID = struct.unpack('>Q',self.read(8)) + self.FSP_SEG_INODES_FULL = FLST_BASE_NODE(self.read(16)) + self.FSP_SEG_INODES_FREE = FLST_BASE_NODE(self.read(16)) + + + #XDES + self.XDES = [] + for x in range(256): + self.XDES.append(XDES(self.read(40))) + + #SDI PAGE NUMBER for issue 5 https://github.com/ddcw/ibd2sql/issues/5 + self.offset += INFO_MAX_SIZE #SDI_OFFSET + sdi_version = struct.unpack('>I',self.read(4))[0] + if sdi_version == SDI_VERSION: + sdi_page_no = struct.unpack('>I',self.read(4))[0] + self.SDI_PAGE_NO = sdi_page_no + self.fsp_status = True + else: + self.fsp_status = False + #return False diff --git a/ibd2sql/innodb_type.py b/ibd2sql/innodb_type.py new file mode 100644 index 0000000..dc6e68c --- /dev/null +++ b/ibd2sql/innodb_type.py @@ -0,0 +1,156 @@ +#innodb type +# storage/innobase/include/data0type.h +# storage/innobase/include/data0type.ic +import re +import base64 + +INNODB_TYPE = { + 2: 'tinyint', + 3: 'smallint', + 4: 'int', + 5: 'float', + 6: 'double', + 9: 'bigint', + 10:'mediumint', + 14:'year', + 15:'date', + 16:'varbinary', #varchar + 17:'bit', + 18:'timestamp', + 19:'datetime', + 20:'time', + 21:'decimal', + 22:'enum', + 23:'set', + 24:'tinytext', #tinyblob + 25:'mediumblob', #mediumtext + 26:'longblob', #longtext + 27:'blob', #text + 29:'char', # not binary 虽然和char都是29, 但存储方式不同.... -_- + 31:'json' +} + +def innodb_type_isvar(col): + """ +varsize: varsize.size + | + | size:data size + | | +VARSIZE DATA ----< elements_dict: when type in (enum,set) + | | + | isbig:isbig? + | +isvar: isvar? + """ + ct = INNODB_TYPE[col['type']] + isvar = False + size = 0 + isbig = False + extra = None + elements_dict = {} + for e in col['elements']: + ename = base64.b64decode(e['name']).decode() + ekey = e['index'] + elements_dict[ekey] = ename + esize = len(elements_dict) + varsize = 0 #可变长度的大小 0:自适应(for varchar), 1+ 记录数据大小的 大小 VARSIZE:DATA Like:varsize.size + if ct == "tinyint": + size = 1 + elif col['column_type_utf8'][:6] == 'binary': + size = int(re.compile('binary\((.+)\)').findall(col['column_type_utf8'],)[0]) + ct = 'binary' + elif ct == "smallint": + size = 2 + elif ct == "int": + size = 4 + elif ct == "float": + try: + ext = int(re.compile('float\((.+)\)').findall(col['column_type_utf8'],)[0]) + except: + ext = 0 + + size = 4 if ext <= 24 else 8 + elif ct == "double": + size = 8 + elif ct == "bigint": + size = 8 + elif ct == "mediumint": + size = 3 + elif ct == "year": + size = 1 + elif ct == "date": + size = 3 + elif ct == "varbinary": + isvar = True + elif ct == "bit": + try: + ext = int(re.compile('bit\((.+)\)').findall(col['column_type_utf8'],)[0]) + except: + ext = 0 + size = int((ext+7)/8) + elif ct == "timestamp": + try: + ext = int(re.compile('timestamp\((.+)\)').findall(col['column_type_utf8'],)[0]) + except: + ext = 0 + size = 4+int((ext+1)/2) + elif ct == "datetime": + try: + ext = ext = int(re.compile('datetime\((.+)\)').findall(col['column_type_utf8'],)[0]) + except: + ext = 0 + size = 5+int((ext+1)/2) + elif ct == "time": + try: + ext = int(re.compile('time\((.+)\)').findall(col['column_type_utf8'],)[0]) + except: + ext = 0 + size = 3+int((ext+1)/2) + elif ct == "decimal": + try: + total_digits, decimal_digits = re.compile('decimal\((.+)\)').findall(col['column_type_utf8'],)[0].split(',') + total_digits = int(total_digits) + decimal_digits = int(decimal_digits) + integer_p1_count = int((total_digits - decimal_digits)/9) # + integer_p2_count = total_digits - decimal_digits - integer_p1_count*9 + integer_size = integer_p1_count*4 + int((integer_p2_count+1)/2) + decimal_p1_count = int(decimal_digits/9) + decimal_p2_count = decimal_digits - decimal_p1_count*9 + decimal_size = decimal_p1_count*4 + int((decimal_p2_count+1)/2) + total_size = integer_size + decimal_size + + size = total_size #decimal占用大小 + extra = (integer_size,decimal_size,(total_digits,decimal_digits)) + except: + size = 0 + elif ct == "enum": + size = 2 if esize >= 2**8 else 1 #只有一个值, 2字节能表示65535 value + elif ct == "set": + size = int((esize+7)/8) #多个值, 每个值一个bit. MAX:8bytes=64bit + elif ct == "tinytext": + varsize = 1 + elif ct == "mediumblob": + size = 20 + isvar = True + isbig = True + elif ct == "longblob": + size = 20 + isvar = True + isbig = True + elif ct == "blob": + size = 20 + isvar = True + isbig = True + elif ct == "char": + isvar = True #innodb_default_row_format != COMPACT + elif ct == "json": + size = 20 + isvar = True + isbig = True + + return ct,isvar,size,isbig,elements_dict,varsize,extra #数据类型, 是否为变长, 大小, 是否为大字段, set/enum elements + +def innodb_data_to_py(bdata,col): + if dtype == 'int': + pass + diff --git a/ibd2sql/mysql_json.py b/ibd2sql/mysql_json.py new file mode 100644 index 0000000..107e6c7 --- /dev/null +++ b/ibd2sql/mysql_json.py @@ -0,0 +1,266 @@ +#@mysql sql/json_binary.h +import struct +import sys + +_ = """ + - ----------------- + | JSON OBJECT/ARRAY | + - ----------------- + | + ------------------------------------------------------------------------- +| TYPE | ELEMENT_COUNT | KEY-ENTRY(if object) | VALUE-ENTRY | KEY | VALUE | + ------------------------------------------------------------------------- + | | | + | | -------------- + -------------------------- | | UTF8MB4 DATA | + | KEY-OFFSET | KEY-LENGTH | | -------------- + -------------------------- | + | + -------------------------------- + | TYPE | OFFSET/VALUE(if small) | + -------------------------------- + +small 2 bytes large 4 bytes +--------------------------------------------------- +TYPE 1 byte +COUNT 2/4 bytes +SIZE 2/4 bytes +VALUE VALUE/OBJECT/ARRAY +--------------------------------------------------- + +--------------------------------------------------- +OBJECT VALUE = KEY_ENTRY + VALUE_ENTRY + KEY + VALUE #KEY肯定是字符串, 所以不需要记录数据类型 +ARRAY VALUE = VALUE_ENTRY + VALUE #不需要KEY + +KEY_ENTRY = KEY_OFFSET(2/4bytes) + KEY_LNGTH(2 bytes) +VALUE_ENTRY = TYPE(1byte) + OFFSET(2/4 bytes)/VALUE (如果类型是int,literal之类的,就直接是值了, 否则就走OFFSET) +--------------------------------------------------- + +""" + +# type ::= +# 0x00 | // small JSON object +# 0x01 | // large JSON object +# 0x02 | // small JSON array +# 0x03 | // large JSON array +# 0x04 | // literal (true/false/null) +# 0x05 | // int16 +# 0x06 | // uint16 +# 0x07 | // int32 +# 0x08 | // uint32 +# 0x09 | // int64 +# 0x0a | // uint64 +# 0x0b | // double +# 0x0c | // utf8mb4 string +# 0x0f // custom data (any MySQL data type) + + +# value ::= +# object | +# array | +# literal | +# number | +# string | +# custom-data + +class jsonob(object): + def __init__(self,bdata,t): + """ + bdata = json data + t 类型 json类型 + """ + self.bdata = bdata + self.t = t + self.offset = 0 + self.ssize = 2 if self.t == 0x00 or self.t == 0x02 else 4 + self._type = None + self._bdata = b'' + #print("BEGIN JSON TO B, CURRENT TYPE:",self.t) + + def read_key_entry(self): + """ + read key-entry + """ + #print("READ KEY ENTRY") + key_entry = [] + for x in range(self.element_count): + key_offset = self.read_little() + key_length = self.read_little(2) + key_entry.append((key_offset,key_length)) + self.key_entry = key_entry + + def read_value_entry(self): + #print("READ VALUE ENTRY") + value_entry = [] + for x in range(self.element_count): + t = self.read_little(1) + #print("\t entry: type:",t) + data = None + if t < 0x04: + #print("READ VALUE ENTRY JSON object/array") + data = self.read_little() + elif t == 0x04: #literal + #print("READ VALUE ENTRY literal") + _data = self.read_little() + if _data == 1: + data = True + elif _data == 2: + data = False + elif _data == 0: + data = None + else: + data = '' + elif t >= 0x05 and t <= 0x0a: #inline data + #print("READ VALUE ENTRY Inline data for INT",t,0x05,0x0a) + data = self.read_inline_data(t) + elif t == 0x0b: #double + #print("READ VALUE ENTRY Double") + #data = struct.unpack('d',self.read(8))[0] + data = self.read_little() + elif t == 0x0c: #string + #print("READ DATA ENTRY STRING",self.offset) + data = self.read_little() #OFFSET + value_entry.append((t,data)) + self.value_entry = value_entry + #print("VALUE ENTRY LIST ---------",self.value_entry) + + def read_key(self): + #print("READ KEY") + key = [] + for x in self.key_entry: + key.append(self.bdata[x[0]:x[0]+x[1]].decode() ) + self.key = key + + def read_value(self): + #print("READ VALUE") + value = [] + for x in self.value_entry: + #print("VALUE TYPE:xxxxxxx",x[0]) + if x[0] == 0x0c: #字符串 + _s,size = self.read_var(x[1]) + #size = int.from_bytes(self.bdata[x[1]:x[1]+1],'little') #先都按1字节计算 + value.append(self.bdata[x[1]+_s:x[1]+_s+size].decode()) + elif x[0] == 0x0b: + value.append(struct.unpack('d',self.bdata[x[1]:x[1]+8])[0]) + elif x[0] <= 0x03: #json对象, 又递归 + s = self.ssize + size = int.from_bytes(self.bdata[x[1]+s: x[1]+s+s ], 'little') + data = self.bdata[x[1]:x[1]+size] + _aa = jsonob(data,x[0]) + value.append(_aa.init()) + else: + value.append(x[1]) + self.value = value + + def read_var(self,offset): + """ + 读mysql的varchar的 记录长度的大小, 范围字节数量和大小 + 如果第一bit是1 就表示要使用2字节表示: + 后面1字节表示 使用有多少个128字节, 然后加上前面1字节(除了第一bit)的数据(0-127) 就是最终数据 +----------------------------------------------------- +| 1 bit flag | 7 bit data | if flag, 8 bit data*128 | +----------------------------------------------------- + """ + _s = int.from_bytes(self.bdata[offset:offset+1],'little') + size = 1 + if _s & (1<<7): + size += 1 + _s = self.bdata[offset:offset+2] + _t = int.from_bytes(_s[1:2],'little')*128 + int.from_bytes(_s[:1],'little')-128 + else: + _t = _s + + return size,_t + + + def init(self,): + #print(self.bdata) + self.element_count = self.read_little() + #print("ELEMENT COUNT:",self.element_count) + #print(self.read_little()) + self._size = self.read_little() + #print(f"THIS OBJECT SIZE:",self._size, "ACTUAL SIZE:",len(self.bdata)) + if self._size != len(self.bdata): + return None + #print("WILL INIT") + if self.t == 0x00 or self.t == 0x01: #object + self._type = "JSON Object" + #print(f"THIS TYPE IS {self._type}") + self.data = {} + self.read_key_entry() + self.read_value_entry() + self.read_key() + self.read_value() + self.data = {k:v for k,v in zip(self.key,self.value)} + + elif self.t == 0x02 or self.t == 0x03: #array + self._type = "JSON Array" + #print(f"THIS TYPE IS {self._type}") + self.data = [] + self.read_value_entry() + self.read_value() + self.data = self.value + return self.data + + + def read_little(self,ssize=None): + ssize = self.ssize if ssize is None else ssize + s = int.from_bytes(self.read(ssize),'little') + #print(f"READ LITTLE SIZE: {ssize} bytes bdata:{self._bdata} value:{s} ") + return s + + def read(self,n): + _t = self.bdata[self.offset:self.offset+n] + self.offset += n + self._bdata = _t + return _t + + def _read_int(self,n): + data = self.read(n) + return int.from_bytes(data,'big') + + def read_uint(self,n,is_unsigned=True): + _t = self._read_int(n) + _s = n*8 - 1 + #print("read uint",self._bdata,_t,_s) + return (_t&((1<<_s)-1))-2**_s if _t < 2**_s and not is_unsigned else (_t&((1<<_s)-1)) + + def read_int(self,n): + return self.read_uint(n,False) + + def read_inline_data(self,t): + n = 0 + is_unsigned = True + #print("\tread_inline_data TYPE:",t) + if t == 0x05: #int16 + n = 2 + elif t == 0x06: #uint16 + n = 2 + is_unsigned = True + elif t == 0x07: #int32 + n = 4 + elif t == 0x08: #uint32 + n = 4 + is_unsigned = True + elif t == 0x09: #int64 + n = 8 + elif t == 0x0a: #uint64 + n = 8 + is_unsigned = True + #return self.read_uint(n,is_unsigned) + signed = False if is_unsigned else True + rs = int.from_bytes(self.read(n),'little',signed=signed) + #print("\tINLINE DATA:",rs) + return rs + + + + +#aa = btojson(b'\x00\x01\x00\r\x00\x0b\x00\x02\x00\x05{\x00t1') +#aa = btojson(b'\x00\x01\x00,\x00\x0b\x00\x02\x00\x0c\r\x00t1\x1eAAAAAAAAAAAAAAAAACBBBBBBBBBBBB') +#aa = btojson(b'\x00\x02\x00)\x00\x12\x00\x02\x00\x14\x00\x02\x00\x00\x16\x00\x0c&\x00a1a2\x01\x00\x10\x00\x0b\x00\x02\x00\x0c\r\x00b1\x02b1\x02a6') +#aa = jsonob(b'\x01\x00\r\x00\x0b\x00\x02\x00\x05{\x00t1',0x00) +#aa = jsonob(b'\x01\x00,\x00\x0b\x00\x02\x00\x0c\r\x00t1\x1eAAAAAAAAAAAAAAAAACBBBBBBBBBBBB',0x00) +#aa = jsonob(b'\x02\x00)\x00\x12\x00\x02\x00\x14\x00\x02\x00\x00\x16\x00\x0c&\x00a1a2\x01\x00\x10\x00\x0b\x00\x02\x00\x0c\r\x00b1\x02b1\x02a6',0x00) +#aa = jsonob(b'\x03\x00T\x00\x00\r\x00\x007\x00\x00G\x00\x01\x00*\x00\x0b\x00\x02\x00\x00\r\x0013\x01\x00\x1d\x00\x0b\x00\x02\x00\x00\r\x00CC\x01\x00\x10\x00\x0b\x00\x02\x00\x0c\r\x00DD\x02DD\x01\x00\x10\x00\x0b\x00\x02\x00\x0c\r\x00BB\x02BB\x01\x00\r\x00\x0b\x00\x02\x00\x05\x02\x00FF',0x02) +#print(aa.init()) diff --git a/ibd2sql/page_type.py b/ibd2sql/page_type.py new file mode 100644 index 0000000..93c12c4 --- /dev/null +++ b/ibd2sql/page_type.py @@ -0,0 +1,108 @@ +#mysql storage/innobase/include/fil0fil.h + +#/** File page types (values of FIL_PAGE_TYPE) @{ */ +#/** B-tree node */ +FIL_PAGE_INDEX = 17855; + +#/** R-tree node */ +FIL_PAGE_RTREE = 17854; + +#/** Tablespace SDI Index page */ +FIL_PAGE_SDI = 17853; + +#/** This page type is unused. */ +FIL_PAGE_TYPE_UNUSED = 1; + +#/** Undo log page */ +FIL_PAGE_UNDO_LOG = 2; + +#/** Index node */ +FIL_PAGE_INODE = 3; + +#/** Insert buffer free list */ +FIL_PAGE_IBUF_FREE_LIST = 4; + +#/* File page types introduced in MySQL/InnoDB 5.1.7 */ +#/** Freshly allocated page */ +FIL_PAGE_TYPE_ALLOCATED = 0; + +#/** Insert buffer bitmap */ +FIL_PAGE_IBUF_BITMAP = 5; + +#/** System page */ +FIL_PAGE_TYPE_SYS = 6; + +#/** Transaction system data */ +FIL_PAGE_TYPE_TRX_SYS = 7; + +#/** File space header */ +FIL_PAGE_TYPE_FSP_HDR = 8; + +#/** Extent descriptor page */ +FIL_PAGE_TYPE_XDES = 9; + +#/** Uncompressed BLOB page */ +FIL_PAGE_TYPE_BLOB = 10; + +#/** First compressed BLOB page */ +FIL_PAGE_TYPE_ZBLOB = 11; + +#/** Subsequent compressed BLOB page */ +FIL_PAGE_TYPE_ZBLOB2 = 12; + +#/** In old tablespaces, garbage in FIL_PAGE_TYPE is replaced with +#this value when flushing pages. */ +FIL_PAGE_TYPE_UNKNOWN = 13; + +#/** Compressed page */ +FIL_PAGE_COMPRESSED = 14; + +#/** Encrypted page */ +FIL_PAGE_ENCRYPTED = 15; + +#/** Compressed and Encrypted page */ +FIL_PAGE_COMPRESSED_AND_ENCRYPTED = 16; + +#/** Encrypted R-tree page */ +FIL_PAGE_ENCRYPTED_RTREE = 17; + +#/** Uncompressed SDI BLOB page */ +FIL_PAGE_SDI_BLOB = 18; + +#/** Compressed SDI BLOB page */ +FIL_PAGE_SDI_ZBLOB = 19; + +#/** Legacy doublewrite buffer page. */ +FIL_PAGE_TYPE_LEGACY_DBLWR = 20; + +#/** Rollback Segment Array page */ +FIL_PAGE_TYPE_RSEG_ARRAY = 21; + +#/** Index pages of uncompressed LOB */ +FIL_PAGE_TYPE_LOB_INDEX = 22; + +#/** Data pages of uncompressed LOB */ +FIL_PAGE_TYPE_LOB_DATA = 23; + +#/** The first page of an uncompressed LOB */ +FIL_PAGE_TYPE_LOB_FIRST = 24; + +#/** The first page of a compressed LOB */ +FIL_PAGE_TYPE_ZLOB_FIRST = 25; + +#/** Data pages of compressed LOB */ +FIL_PAGE_TYPE_ZLOB_DATA = 26; + +#/** Index pages of compressed LOB. This page contains an array of +#z_index_entry_t objects.*/ +FIL_PAGE_TYPE_ZLOB_INDEX = 27; + +#/** Fragment pages of compressed LOB. */ +FIL_PAGE_TYPE_ZLOB_FRAG = 28; + +#/** Index pages of fragment pages (compressed LOB). */ +FIL_PAGE_TYPE_ZLOB_FRAG_ENTRY = 29; + +#/** Note the highest valid non-index page_type_t. */ +FIL_PAGE_TYPE_LAST = FIL_PAGE_TYPE_ZLOB_FRAG_ENTRY; + diff --git a/main.py b/main.py index 776d100..f66de3c 100644 --- a/main.py +++ b/main.py @@ -1,190 +1,188 @@ -#@ddcw -# +#!/usr/bin/env python3 +#write by ddcw @https://github.com/ddcw/ibd2sql + +from ibd2sql import __version__ +from ibd2sql.ibd2sql import ibd2sql import argparse import sys,os -import innodb_fil -import innodb_sdi -import innodb_inode -import innodb_type -import innodb_index -import struct -import base64 - +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'ibd2sql/'))) + +_help = """ +--limit 限制行数, 返回指定的行数就退出, 默认-1 表示无限制 +--where1 根据字段匹配 比如 --where1="id>1 and id < 10" +--where2 限制TRX范围, 比如 --where2=2,10 #即trx在2(含)到10(含)的事务修改的数据才会输出 +--where3 限制rollptr的. 同trx +--force 是否跳过报错的 +--set 将set/enum(default) 的int转换为实际表示的字符串. (默认使用逗号隔开) +--ddl 输出信息包含DDL +--deleted 仅输出biao'ji +--debug 调试, 输出的信息较多 +--parallel 设置并发数量(默认4) +--debug-filename 调试的输出信息文件, 默认stdout +--multivalue 每页数据使用一个insert +--replace 使用replcae into 替换insert (和multialue冲突) +--replace-table 替换表名(含DDL) +--replcae-schema 替换数据库名 +--sdi-table 指定使用该表的sdi作为元数据信息(分区表要) +--sdi-file 指定sdi文件(json)作为元数据信息(可以使用ibd2sdi生成相关信息) + +--page-min 设置起始页 (方便调试) +--page-max 设置停止页 +--page-count 限制解析的页数量(方便调试) +--page-skip 跳过的page数量 也是方便调试的 + +""" + +stout = """ +输出格式说明: +int等输出为无引号字符串 +binary输出为base64 +其它均输出为字符串 +""" def _argparse(): parser = argparse.ArgumentParser(add_help=True, description='解析mysql8.0的ibd文件 https://github.com/ddcw/ibd2sql') parser.add_argument('--version', '-v', '-V', action='store_true', dest="VERSION", default=False, help='show version') - parser.add_argument('--sumary', '-s', action='store_true', dest="SUMMARY", default=True, help='print summary info(ddl and index info)') parser.add_argument('--ddl', '-d', action='store_true', dest="DDL", default=False, help='print ddl') - parser.add_argument('--sql', action='store_true', dest="SQL", default=False, help='print data with sql (without ddl)') - parser.add_argument('--data', action='store_true', dest="DATA", default=False, help='print data like [[],[]]') - parser.add_argument('--delete', action='store_true', dest="DELETED", default=False, help='print data with flag:deleted') - parser.add_argument('--complete-insert', action='store_true', dest="COLUMN_NAME", default=False, help='use complete insert statements for sql') - #parser.add_argument('--char-size', dest="CHAR_SIZE", default=3,choices=[1,2,3,4], help='size of per char, default 3') - parser.add_argument('--row', action='store_true', dest="ROW", default=False, help='print rows in filename without deleted') - parser.add_argument('--force','-f', action='store_true', dest="FORCE", default=False, help='force pasrser file') - parser.add_argument('--table-name', dest="TNAME", default=None, help='replace table name except ddl') - #parser.add_argument('--parallel','-p', action='store', dest="PARALLEL", default=4, help='parse to data/sql with N threads.(default 4) TODO') - - parser.add_argument(dest='FILENAME', help='ibd filename') + parser.add_argument('--sql', action='store_true', dest="SQL", default=False, help='print data by sql') + parser.add_argument('--delete', action='store_true', dest="DELETED", default=False, help='print data only for flag of deleted') + parser.add_argument('--complete-insert', action='store_true', dest="COMPLETE_INSERT", default=False, help='use complete insert statements for sql') + parser.add_argument('--force','-f', action='store_true', dest="FORCE", default=False, help='force pasrser file when Error Page') + parser.add_argument('--set', action='store_true', dest="SET", default=False, help='set/enum to fill in actual data instead of strings') + parser.add_argument('--multi-value', action='store_true', dest="MULTI_VALUE", default=False, help='single sql if data belong to one page') + parser.add_argument('--replace', action='store_true', dest="REPLACE", default=False, help='"REPLACE INTO" replace to "INSERT INTO" (default)') + parser.add_argument('--table', dest="TABLE_NAME", help='replace table name except ddl') + parser.add_argument('--schema', dest="SCHEMA_NAME", help='replace table name except ddl') + parser.add_argument('--sdi-table', dest="SDI_TABLE", help='read SDI PAGE from this file(ibd)(partition table)') + + #where条件 + parser.add_argument('--where-trx', dest="WHERE_TRX", help='default (0,281474976710656)') + parser.add_argument('--where-rollptr', dest="WHERE_ROLLPTR", help='default (0,72057594037927936)') + parser.add_argument('--where', dest="WHERE", help='filter data(TODO)') + parser.add_argument('--limit', dest="LIMIT", type=int, help='limit rows') + + #DEBUG相关, 方便调试 + parser.add_argument('--debug', '-D', action='store_true', dest="DEBUG", default=False, help="will DEBUG (it's too big)") + parser.add_argument('--debug-file', dest="DEBUG_FILE", help='default sys.stdout if DEBUG') + parser.add_argument('--page-min', action='store', type=int, dest="PAGE_MIN", default=0, help='if PAGE NO less than it, will break') + parser.add_argument('--page-max', action='store', type=int, dest="PAGE_MAX", default=4294967296, help='if PAGE NO great than it, will break') + parser.add_argument('--page-start', action='store', type=int, dest="PAGE_START", help='INDEX PAGE START NO') + parser.add_argument('--page-count', action='store', type=int, dest="PAGE_COUNT", help='page count NO') + parser.add_argument('--page-skip', action='store', type=int, dest="PAGE_SKIP", help='skip some pages when start parse index page') + + #TODO + parser.add_argument('--parallel','-p', action='store', dest="PARALLEL", default=4, help='parse to data/sql with N threads.(default 4) TODO') + + #IBD FILE + parser.add_argument(dest='FILENAME', help='ibd filename', nargs='?') if parser.parse_args().VERSION: - print("VERSION: v0.3 for mysql8.0") + #print("VERSION: v1.0 only for MySQL 8.0") + print(f"ibd2sql VERSION: v{__version__} only for MySQL 8.0") sys.exit(0) return parser.parse_args() if __name__ == '__main__': parser = _argparse() + #对部分默认值做处理 + if not parser.SQL: + parser.DDL = True filename = parser.FILENAME if not os.path.exists(filename): - #print(f'no file {filename}') - raise f'no file {filename}' - - if parser.SUMMARY and not (parser.DDL or parser.SQL or parser.DATA or parser.DELETED): - data = innodb_fil.page_summary(filename) - print(f"PAGE SUMMARY: { 'USED PERCENT:'+str(round(100-(data['FIL_PAGE_TYPE_ALLOCATED'])*100/sum([data[x] for x in data]),2))+'%' if 'FIL_PAGE_TYPE_ALLOCATED' in data else '' }") - for x in data: - print(f"{x}\t{data[x]}") - print('') - - sdata = b'' - with open(filename, 'rb') as f: - fsp_bdata = f.read(16384) - sdi_page_no = struct.unpack('>I',fsp_bdata[10509:10509+4])[0] - f.seek(16384*sdi_page_no,0) - sdata = f.read(16384) - - #DDL(sdi) - ddl = innodb_sdi.sdi(sdata) - print(ddl.get_ddl(),'\n') - - #打印行数量 - if parser.ROW: - rows = 0 - dic = innodb_sdi.sdi(filename).get_dic() - columns = dic['dd_object']['columns'] - _columns = [] - for col in columns: - if col['name'] in ['DB_TRX_ID','DB_ROLL_PTR','DB_ROW_ID']: - continue - _columns.append(col) - columns = [] - lcolumns = len(columns) - for x in range(len(_columns)): - extra = () - try: - isvar,size,dtype = innodb_type.innodb_isvar_size(_columns[x]) - except: - isvar,size,dtype,extra = innodb_type.innodb_isvar_size(_columns[x]) - columns.append({'name':_columns[x]['name'], 'isvar':isvar, 'size':size, 'dtype':dtype,'extra':extra,'charsize':3}) - - pk = [] - for x in dic['dd_object']['indexes'][0]['elements']: - if x['length'] < 4294967295: - pk.append(x['column_opx']) - pageno = innodb_index.first_leaf(filename,columns,pk) - with open(filename, 'rb') as f: - while True: - f.seek(pageno*16384,0) - bdata = f.read(16384) - if bdata == b'': - break - pageno = struct.unpack('>L',bdata[12:16])[0] - if struct.unpack('>H',bdata[24:26])[0] == 17855: - rows += struct.unpack('>H',bdata[38+16:38+18])[0] - print('ROWS:',rows) - sys.exit(0) + #raise f'no file {filename}' + sys.stderr.write(f"\nno file {filename}\n\n") + sys.exit(1) + #不管debug file了 + if parser.DEBUG_FILE is not None and os.path.exists(filename): + pass + + #初始化一个ibd2sql对象, 然后设置它的属性 + ddcw = ibd2sql() + ddcw.FILENAME = parser.FILENAME + if parser.DEBUG: + ddcw.DEBUG = True + if parser.SDI_TABLE: + ddcw.IS_PARTITION = True + + ddcw.COMPLETE_SQL = True if parser.COMPLETE_INSERT else False + + #基础过滤信息 + ddcw.REPLACE = True if parser.REPLACE else False + if parser.PAGE_COUNT: + ddcw.PAGE_COUNT = parser.PAGE_COUNT + if parser.PAGE_MIN: + ddcw.PAGE_MIN = parser.PAGE_MIN + if parser.PAGE_MAX: + ddcw.PAGE_MAX = parser.PAGE_MAX + if parser.PAGE_START: + ddcw.PAGE_START = parser.PAGE_START + if parser.PAGE_SKIP: + ddcw.PAGE_SKIP = parser.PAGE_SKIP + if parser.FORCE: + ddcw.FORCE = parser.FORCE + + #替换分区表的SDI信息 + if parser.SDI_TABLE: + ddcw.IS_PARTITION = True + aa = ibd2sql() + aa.FILENAME = parser.SDI_TABLE + aa.init() + ddcw.table = aa.table + ddcw._init_table_name() + aa.close() + + + if parser.DEBUG_FILE is not None: + f = open(parser.DEBUG_FILE,'a') + ddcw.DEBUG = True + ddcw.DEBUG_FD = f + + if parser.DELETED: + ddcw.DELETE = True + + if parser.SET: + ddcw.SET = True + + if parser.MULTI_VALUE: + ddcw.MULTIVALUE = True + + #条件 + if parser.WHERE_TRX: + _a = [ int(x) for x in parser.WHERE_TRX.split(',')] + ddcw.WHERE2 = _a[:2] + + if parser.WHERE_ROLLPTR: + _a = [ int(x) for x in parser.WHERE_ROLLPTR.split(',')] + ddcw.WHERE3 = _a[:2] + + + #初始化, 解析表 + ddcw.init() + + if parser.TABLE_NAME: + ddcw.replace_name(parser.TABLE_NAME) + + if parser.SCHEMA_NAME: + ddcw.replace_schema(parser.SCHEMA_NAME) if parser.DDL: - print('\n',innodb_sdi.sdi(filename).get_ddl(),'\n') - - if parser.SQL or parser.DATA or parser.DELETED: - dic = innodb_sdi.sdi(filename).get_dic() - columns = dic['dd_object']['columns'] - _columns = [] - for col in columns: - if col['name'] in ['DB_TRX_ID','DB_ROLL_PTR','DB_ROW_ID']: - continue - _columns.append(col) - columns = [] - lcolumns = len(columns) - for x in range(len(_columns)): - #isvar,size,dtype = innodb_type.innodb_isvar_size(_columns[x]) - extra = () - try: - isvar,size,dtype = innodb_type.innodb_isvar_size(_columns[x]) - except: - isvar,size,dtype,extra = innodb_type.innodb_isvar_size(_columns[x]) - columns.append({'name':_columns[x]['name'], 'isvar':isvar, 'size':size, 'dtype':dtype, 'is_unsigned':_columns[x]['is_unsigned'],'extra':extra,'charsize':3}) - if dtype in ['enum','set']: - #虽然我解析了, 但是后面每使用, 主要还是觉得数字方便.. - _set_list = ['',] - for el in _columns[x]['elements']: - _set_list.append(base64.b64decode(el['name']).decode()) - if dtype == 'enum': - columns[x]['size'] = 1 if len(_set_list) <= 8 else 2 - if dtype == 'set': - columns[x]['size'] = int((len(_set_list)+7)/8) - columns[x]['size'] = 8 if columns[x]['size'] > 4 else columns[x]['size'] - columns[x]['list'] = _set_list - - NO_COL = [] - for x in columns: - if x['dtype'] in ['longtext','longblob','mediumblob','mediumtext','json'] : - NO_COL.append({x['name']:x['dtype']}) - if len(NO_COL) > 0 and not parser.FORCE: - print('Some type are currently not supported.') - print(NO_COL) - sys.exit(2) - - pk = [] - for x in dic['dd_object']['indexes'][0]['elements']: - if x['length'] < 4294967295: - pk.append(x['column_opx']) - - TABLE_SCHEMA = f'`{dic["dd_object"]["schema_ref"]}`.`{dic["dd_object"]["name"]}`' if parser.TNAME is None else parser.TNAME - - #读取INDEX页 - with open(filename,'rb') as f: - - #先找到主键索引叶子节点第一个PAGE(不一定在碎片页中, 毕竟第一页可能多次更新就没用了...) - #非叶子节点还是固定的, 就在第4页(从0开始算). 也可以从sdi信息读主键索引的root - pageno = innodb_index.first_leaf(filename,columns,pk) - - while True: - if pageno > 4294967295: - break - f.seek(pageno*16384,0) - bdata = f.read(16384) - if len(bdata) <16384: - break - page = innodb_fil.page(bdata) - pageno = page.FIL_PAGE_NEXT - if page.FIL_PAGE_TYPE != 17855: - continue - if parser.DELETED: - ldata = innodb_index.index_deleted(bdata,pk,columns) - else: - ldata = innodb_index.index(bdata,pk,columns) - for x in range(len(ldata)): - if parser.DATA: - print(ldata[x]) - continue - elif parser.COLUMN_NAME: - sql = f"INSERT INTO {TABLE_SCHEMA}(" - for k in columns: - sql += f"{k['name']}, " - sql = sql[:-2] + ")" - else: - sql = f"INSERT INTO {TABLE_SCHEMA} " - sql += "VALUES(" - for i in range(len(ldata[x])): - if columns[i]['dtype'] in ['int','tinyint','smallint','mediumint','bigint','float','double','decimal','bit','set','enum']: - sql += f"{ldata[x][i]}, " - else: - sql += f"'{ldata[x][i]}', " - sql = sql[:-2] + ");" - print(str(sql)) - continue - + print(ddcw.get_ddl()) + + + ddcw.MULTIVALUE = True if parser.MULTI_VALUE and not parser.REPLACE else False + ddcw.REPLACE = True if parser.REPLACE else False + ddcw.LIMIT = parser.LIMIT if parser.LIMIT else -1 + if parser.SQL and ddcw.table.row_format in ['DYNAMIC','COMPACT']: + ddcw.get_sql() + elif not ddcw.table.row_format in ['DYNAMIC','COMPACT']: + sys.stderr.write(f"\nNot support row format. {ddcw.table.row_format}\n\n") + + + #记得关闭相关FD + ddcw.close() + if parser.DEBUG_FILE is not None: + try: + f.close() + except: + pass