实验环境:
DB is PostgreSQL version 8.2.15
JDK1.8
测试一
使用JDBC查询一个SQL:
public static void test1(String url, Properties props){ String sql = "SELECT l.src_ip, l.location_id, " + "SUM(l.us_bytes) as up_usage, " + "SUM(l.ds_bytes) as down_usage, " + "(SUM(l.us_bytes) + SUM(l.ds_bytes) ) as total_usage " + "FROM unmapped_endpoint_location_hours l " + "where l.org_id = 195078 " + "AND date_time >= '2017-04-01 00:00:00.0' AND date_time < '2017-04-08 00:00:00.0' " + "AND l.location_id in (2638,2640,2654 ) " + "GROUP BY l.src_ip, l.location_id "; Connection conn = null; Statement sta = null; try { System.out.println("Start query1:" ); long s_time = System.currentTimeMillis(); conn = DriverManager.getConnection(url, props); sta = conn.createStatement(); sta.execute(sql); System.out.println("Using Time: " + (System.currentTimeMillis() - s_time)); } catch (SQLException e) { e.printStackTrace(); } finally { if (conn != null) { try { conn.close(); } catch (SQLException e) { e.printStackTrace(); } } if (sta != null) { try { sta.close(); } catch (SQLException e) { e.printStackTrace(); } } } }
结果:
Start query1:
Using Time: 11519 ms
测试二
使用JDBC PreparedStatement 查询相同的SQL:
public static void test2(String url, Properties props){ String sql2 = "SELECT l.src_ip, l.location_id, " + "SUM(l.us_bytes) as up_usage, " + "SUM(l.ds_bytes) as down_usage, " + "(SUM(l.us_bytes) + SUM(l.ds_bytes) ) as total_usage " + "FROM unmapped_endpoint_location_hours l " + "where l.org_id = ? " + "AND date_time >= ? AND date_time < ? " + "AND l.location_id in (2638,2640,2654 ) " + "GROUP BY l.src_ip, l.location_id"; Connection conn = null; PreparedStatement preSta = null; try { System.out.println("Start query2:"); long s_time = System.currentTimeMillis(); conn = DriverManager.getConnection(url, props); preSta = conn.prepareStatement(sql2); preSta.setString(1, "195078"); preSta.setString(2, "2017-04-01 00:00:00.0"); preSta.setString(3, "2017-04-09 00:00:00.0"); preSta.executeQuery(); System.out.println("Using Time: " + (System.currentTimeMillis() - s_time)); } catch (SQLException e) { e.printStackTrace(); } finally { if (conn != null) { try { conn.close(); } catch (SQLException e) { e.printStackTrace(); } } if (preSta != null) { try { preSta.close(); } catch (SQLException e) { e.printStackTrace(); } } } }
结果:
Start query2:
Using Time: 143031 ms
相同的SQL,测试二和测试一结果为什么差别这么大?
测试一的SQL没有使用PreparedStatement 方式,直接给了原始的SQL。测试二的使用了PreparedStatement ,但是在set参数的时候用的都是String。
两者查询速度相差10倍,这是不是很奇怪?
现在来做另一个实验:
测试三
使用JDBC PreparedStatement 查询相同的SQL:
public static void test3(String url, Properties props){ String sql2 = "SELECT l.src_ip, l.location_id, " + "SUM(l.us_bytes) as up_usage, " + "SUM(l.ds_bytes) as down_usage, " + "(SUM(l.us_bytes) + SUM(l.ds_bytes) ) as total_usage " + "FROM unmapped_endpoint_location_hours l " + "where l.org_id = ? " + "AND date_time >= ? AND date_time < ? " + "AND l.location_id in (2638,2640,2654 ) " + "GROUP BY l.src_ip, l.location_id"; Connection conn = null; PreparedStatement preSta = null; try { System.out.println("Start query3:"); long s_time = System.currentTimeMillis(); conn = DriverManager.getConnection(url, props); preSta = conn.prepareStatement(sql2); int org_id = 195078; SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); TimeZone.setDefault(TimeZone.getTimeZone("UTC")); Date d1 = null; Date d2 = null; try { d1 = df.parse("2017-04-01 00:00:00"); d2 = df.parse("2017-04-09 00:00:00"); } catch (ParseException e1) { e1.printStackTrace(); } preSta.setInt(1, org_id); preSta.setTimestamp(2, new java.sql.Timestamp(d1.getTime())); preSta.setTimestamp(3, new java.sql.Timestamp(d2.getTime())); preSta.executeQuery(); System.out.println("Using Time: " + (System.currentTimeMillis() - s_time)); } catch (SQLException e) { e.printStackTrace(); } finally { if (conn != null) { try { conn.close(); } catch (SQLException e) { e.printStackTrace(); } } if (preSta != null) { try { preSta.close(); } catch (SQLException e) { e.printStackTrace(); } } } }
结果:
Start query3:
Using Time: 16245 ms
测试结果和测试一的结果差不多,为什么?
这次测试同样使用了PreparedStatement,但是在设置参数的时候指定了参数的类型。
explan analyze
查看explan
dev=# explain analyze SELECT count(loc.name) AS totalNum dev-# FROM (SELECT t.src_ip, t.location_id, t.up_usage, t.down_usage, t.total_usage dev(# FROM (SELECT l.src_ip, l.location_id, dev(# SUM(l.us_bytes) as up_usage, dev(# SUM(l.ds_bytes) as down_usage, dev(# (SUM(l.us_bytes) + SUM(l.ds_bytes) ) as total_usage dev(# FROM unmapped_endpoint_location_hours l dev(# where l.org_id = 195078 dev(# AND date_time >= '2017-04-11 00:00:00.0' AND date_time < '2017-04-20 00:00:00.0' dev(# AND l.location_id in (2638,2640) dev(# GROUP BY l.src_ip, l.location_id ) t dev(# WHERE t.total_usage > 0.0 ) m dev-# LEFT OUTER JOIN locations loc on m.location_id = loc.id WHERE loc.org_id = 195078;
Time: 15202.518 ms
Prepare Expalin: PREPARE test(int,text,text,int) as SELECT count(loc.name) AS totalNum FROM (SELECT t.src_ip, t.location_id, t.up_usage, t.down_usage, t.total_usage FROM (SELECT l.src_ip, l.location_id, SUM(l.us_bytes) as up_usage, SUM(l.ds_bytes) as down_usage, (SUM(l.us_bytes) + SUM(l.ds_bytes) ) as total_usage FROM unmapped_endpoint_location_hours l where l.org_id = $1 AND date_time >= $2 AND date_time < $3 AND l.location_id in (2638,2640) GROUP BY l.src_ip, l.location_id ) t WHERE t.total_usage > 0.0 ) m LEFT OUTER JOIN locations loc on m.location_id = loc.id WHERE loc.org_id = $4; Explain analyze EXECUTE test(195078,'2017-04-11 00:00:00.0','2017-04-20 00:00:00.0',195078); dev=# EXECUTE test(195078,'2017-04-11 00:00:00.0','2017-04-20 00:00:00.0',195078);
Time: 98794.544 ms
结论
PostgreSQL 在使用原始SQL的时候会用表中类型来查,能有效根据where条件过滤结果。
当参数都是使用String的时候,没有指定类型时,PostgreSQL没有先做类型转换,而是扫描了所有的数据,对所有的数据根据where条件过滤结果。
当查询参数指定类型的时候,PostgreSQL可以先根据where条件过滤结果。
相关连接:
It seems when using JDBC with prepare statement, the query will be slow in postgresql:
http://www.postgresql-archive.org/Slow-statement-when-using-JDBC-td3368379.html
https://stackoverflow.com/questions/28236827/preparedstatement-very-slow-but-manual-query-quick