GreenPlum中的is null可以走索引吗？is null和''空字符串一样吗？

教育 2024-09-26 20:30 陕西

简介

在 Greenplum 中，NULL 和空字符串 ('') 是两个不同的概念，他们在是否走索引方面也有不同。
1、 NULL：

表示“没有值”或“未知值”。在数据库中，NULL 代表缺失的信息。
在查询中，你可以使用 IS NULL 来检查 NULL 值，例如：

1SELECT * FROM your_table WHERE column_name IS NULL;

2、 空字符串 ('')：

表示一个长度为零的字符串，但它仍然是一个有效的值。它不是 NULL。
在查询中，你可以直接检查空字符串，例如：

1SELECT * FROM your_table WHERE column_name = '';

3、比较：

在比较中，NULL 与任何值（包括空字符串）比较时都会返回 UNKNOWN，而空字符串与空字符串的比较则返回 TRUE。
例如：

1  SELECT CASE 
2       WHEN NULL = '' THEN 'Equal' 
3       WHEN NULL IS NULL THEN 'NULL' 
4       ELSE 'Not Equal' 
5      END;

上面的查询将返回 'NULL'。

4、 常见用法：

使用 COALESCE 函数可以在处理 NULL 值时提供默认值，例如：

1  SELECT COALESCE(column_name, 'default_value') FROM your_table;

这将返回列的值，如果该值为 NULL，则返回 'default_value'。

5、 注意事项：

在设计数据库时，了解何时使用 NULL 和空字符串是重要的，以避免混淆和不一致性。

通过以上方法，可以在 Greenplum 中有效地区分和处理 NULL 值和空字符串。

测试SQL

 1CREATE TABLE t_hash3 AS 
 2SELECT id, md5(id::text)
 3FROM generate_series(1, 2000000) AS id;
 4
 5
 6update t_hash3 set md5=null where id=3;
 7update t_hash3 set md5='' where id=4;
 8
 9select * from t_hash3 where md5 is null;
10select * from t_hash3 where md5 ='';
11
12create index  idx_t_hash3_md5_null on t_hash3(md5) where md5 is null;
13create index  idx_t_hash3_md5_1 on t_hash3(md5) ;
14
15
16select * from t_hash3 where md5 is null;
17select * from t_hash3 where md5 ='';
18
19
20 set optimizer = off;
21select * from t_hash3 where md5 is null;
22
23
24-- 尝试
25set optimizer = off; 
26set enable_seqscan = off; 
27set random_page_cost=1;

GP6

 1db2=# CREATE TABLE t_hash3 AS 
 2db2-# SELECT id, md5(id::text)
 3db2-# FROM generate_series(1, 2000000) AS id;
 4SELECT 2000000
 5db2=# 
 6db2=# 
 7db2=# update t_hash3 set md5=null where id=3;
 8UPDATE 1
 9db2=# update t_hash3 set md5='' where id=4;
10UPDATE 1
11db2=# 
12db2=# 
13db2=# create index  idx_t_hash3_md5_null on t_hash3(md5) where md5 is null;
14CREATE INDEX
15db2=# create index  idx_t_hash3_md5_1 on t_hash3(md5) ;
16CREATE INDEX
17db2=# 
18db2=# select * from t_hash3 where md5 is null;
19select * from t_hash3 where md5 =''; id | md5 
20----+-----
21  3 | 
22(1 row)
23
24db2=# select * from t_hash3 where md5 ='';
25 id | md5 
26----+-----
27  4 | 
28(1 row)
29
30db2=# explain select * from t_hash3 where md5 is null;
31                                  QUERY PLAN                                   
32-------------------------------------------------------------------------------
33 Gather Motion 8:1  (slice1; segments: 8)  (cost=0.00..448.44 rows=1 width=37)
34   ->  Seq Scan on t_hash3  (cost=0.00..448.44 rows=1 width=37)
35         Filter: (md5 IS NULL)
36 Optimizer: Pivotal Optimizer (GPORCA)
37(4 rows)
38
39db2=# explain select * from t_hash3 where md5 ='';
40                                       QUERY PLAN                                       
41----------------------------------------------------------------------------------------
42 Gather Motion 8:1  (slice1; segments: 8)  (cost=0.00..6.00 rows=1 width=37)
43   ->  Index Scan using idx_t_hash3_md5_1 on t_hash3  (cost=0.00..6.00 rows=1 width=37)
44         Index Cond: (md5 = ''::text)
45 Optimizer: Pivotal Optimizer (GPORCA)
46(4 rows)
47
48db2=# 
49db2=#  set optimizer = off;
50SET
51db2=# explain select * from t_hash3 where md5 is null;
52                                         QUERY PLAN                                          
53---------------------------------------------------------------------------------------------
54 Gather Motion 8:1  (slice1; segments: 8)  (cost=0.17..200.18 rows=1 width=37)
55   ->  Index Scan using idx_t_hash3_md5_null on t_hash3  (cost=0.17..200.18 rows=1 width=37)
56         Index Cond: (md5 IS NULL)
57 Optimizer: Postgres query optimizer
58(4 rows)
59
60db2=# select * from t_hash3 where id in (3,4);
61 id | md5 
62----+-----
63  3 | 
64  4 | 
65(2 rows)
66
67db2=#

GP7

 1db2=# CREATE TABLE t_hash3 AS 
 2db2-# SELECT id, md5(id::text)
 3db2-# FROM generate_series(1, 2000000) AS id;
 4
 5
 6SELECT 2000000
 7db2=# 
 8db2=# 
 9db2=# 
10db2=# update t_hash3 set md5=null where id=3;
11update t_hash3 set md5='' where id=4;UPDATE 1
12db2=# update t_hash3 set md5='' where id=4;
13UPDATE 1
14db2=# 
15db2=# select * from t_hash3 where md5 is null;
16select * from t_hash3 where md5 =''; id | md5 
17----+-----
18  3 | 
19(1 row)
20
21db2=# select * from t_hash3 where md5 ='';
22 id | md5 
23----+-----
24  4 | 
25(1 row)
26
27db2=# 
28db2=# create index  idx_t_hash3_md5_null on t_hash3(md5) where md5 is null;
29create index  idx_t_hash3_md5_1 on t_hash3(md5) ;
30CREATE INDEX
31db2=# create index  idx_t_hash3_md5_1 on t_hash3(md5) ;
32
33CREATE INDEX
34db2=# 
35db2=# explain select * from t_hash3 where md5 is null;
36                                       QUERY PLAN                                       
37----------------------------------------------------------------------------------------
38 Gather Motion 2:1  (slice1; segments: 2)  (cost=0.00..6.00 rows=1 width=37)
39   ->  Index Scan using idx_t_hash3_md5_1 on t_hash3  (cost=0.00..6.00 rows=1 width=37)
40         Index Cond: (md5 IS NULL)
41 Optimizer: GPORCA
42(4 rows)
43
44db2=# explain select * from t_hash3 where md5 ='';
45                                       QUERY PLAN                                       
46----------------------------------------------------------------------------------------
47 Gather Motion 2:1  (slice1; segments: 2)  (cost=0.00..6.00 rows=1 width=37)
48   ->  Index Scan using idx_t_hash3_md5_1 on t_hash3  (cost=0.00..6.00 rows=1 width=37)
49         Index Cond: (md5 = ''::text)
50 Optimizer: GPORCA
51(4 rows)
52
53db2=#  set optimizer = off;
54SET
55db2=# explain select * from t_hash3 where md5 ='';
56                                       QUERY PLAN                                       
57----------------------------------------------------------------------------------------
58 Gather Motion 2:1  (slice1; segments: 2)  (cost=0.17..8.21 rows=1 width=37)
59   ->  Index Scan using idx_t_hash3_md5_1 on t_hash3  (cost=0.17..8.19 rows=1 width=37)
60         Index Cond: (md5 = ''::text)
61 Optimizer: Postgres-based planner
62(4 rows)
63
64db2=# explain select * from t_hash3 where md5 is null;
65                                        QUERY PLAN                                         
66-------------------------------------------------------------------------------------------
67 Gather Motion 2:1  (slice1; segments: 2)  (cost=0.12..8.16 rows=1 width=37)
68   ->  Index Scan using idx_t_hash3_md5_null on t_hash3  (cost=0.12..8.14 rows=1 width=37)
69 Optimizer: Postgres-based planner
70(3 rows)
71
72db2=# select * from t_hash3 where id in (3,4);
73 id | md5 
74----+-----
75  4 | 
76  3 | 
77(2 rows)
78
79db2=#

总结

1、在GP 6中，is null可以走索引，但只能使用PG的优化器；在GP7中GPORCA优化器也可以走索引

2、在GP中，is null和=''不一样，=''可以直接走索引，在Navicat中显示不一样：

http://mp.weixin.qq.com/s?__biz=MzIzOTA2NjEzNQ==&mid=2454788447&idx=1&sn=c3934abeba635e0094d6bebe6d9610b8

AiDBA

【PostgreSQL培训认证】【Oracle OCP、OCM、高可用（RAC+DG+OGG）培训认证】【MySQL OCP培训认证】【GreenPlum培训】【SQL Server培训】官网：www.dbaup.com，学习不止数据库

最新文章

Linux用户的福音！微信官方客户端发布，带来跨平台新体验

【原创脚本分享】一键获取Windows主机配置信息bat脚本