问题描述:
此次项目共有两个bug:
1.testWhileIdle is true, validationQuery not set
2.子模块的德鲁伊数据源一直冒重复的冒错信息:冒错信息如下:
原因分析:
1.testWhileIdle is true, validationQuery not set这个bug很好解决
直接把
改为
2.在学习SpringCloud时,子模块的德鲁伊数据源一直冒重复的冒错信息且冒错,Druid会一直不停的对数据源进行重试连接,这样的话我们的日志很快就爆了,在网上所搜索到的回答都无效,于是我花费了时间去读它的源码,源码如下:
public class CreateConnectionTask implements Runnable {
private int errorCount = 0;
@Override
public void run() {
runInternal();
}
private void runInternal() {
for (;;) {
// addLast
lock.lock();
try {
if (closed || closing) {
createTaskCount--;
return;
}
boolean emptyWait = true;
if (createError != null && poolingCount == 0) {
emptyWait = false;
}
if (emptyWait) {
// 必须存在线程等待,才创建连接
if (poolingCount >= notEmptyWaitThreadCount //
&& !(keepAlive && activeCount + poolingCount < minIdle)) {
createTaskCount--;
return;
}
// 防止创建超过maxActive数量的连接
if (activeCount + poolingCount >= maxActive) {
createTaskCount--;
return;
}
}
} finally {
lock.unlock();
}
PhysicalConnectionInfo physicalConnection = null;
try {
physicalConnection = createPhysicalConnection();
setFailContinuous(false);
} catch (OutOfMemoryError e) {
LOG.error("create connection OutOfMemoryError, out memory. ", e);
errorCount++;
if (errorCount > connectionErrorRetryAttempts && timeBetweenConnectErrorMillis > 0) {
// fail over retry attempts
setFailContinuous(true);
if (failFast) {
lock.lock();
try {
notEmpty.signalAll();
} finally {
lock.unlock();
}
}
if (breakAfterAcquireFailure) {
lock.lock();
try {
createTaskCount--;
} finally {
lock.unlock();
}
return;
}
this.errorCount = 0; // reset errorCount
if (closing || closed) {
createTaskCount--;
return;
}
createSchedulerFuture = createScheduler.schedule(this, timeBetweenConnectErrorMillis, TimeUnit.MILLISECONDS);
return;
}
} catch (SQLException e) {
LOG.error("create connection SQLException, url: " + jdbcUrl, e);
errorCount++;
if (errorCount > connectionErrorRetryAttempts && timeBetweenConnectErrorMillis > 0) {
// fail over retry attempts
setFailContinuous(true);
if (failFast) {
lock.lock();
try {
notEmpty.signalAll();
} finally {
lock.unlock();
}
}
if (breakAfterAcquireFailure) {
lock.lock();
try {
createTaskCount--;
} finally {
lock.unlock();
}
return;
}
this.errorCount = 0; // reset errorCount
if (closing || closed) {
createTaskCount--;
return;
}
createSchedulerFuture = createScheduler.schedule(this, timeBetweenConnectErrorMillis, TimeUnit.MILLISECONDS);
return;
}
} catch (RuntimeException e) {
LOG.error("create connection RuntimeException", e);
// unknow fatal exception
setFailContinuous(true);
continue;
} catch (Error e) {
lock.lock();
try {
createTaskCount--;
} finally {
lock.unlock();
}
LOG.error("create connection Error", e);
// unknow fatal exception
setFailContinuous(true);
break;
} catch (Throwable e) {
LOG.error("create connection unexecpted error.", e);
break;
}
if (physicalConnection == null) {
continue;
}
boolean result = put(physicalConnection);
if (!result) {
JdbcUtils.close(physicalConnection.getPhysicalConnection());
LOG.info("put physical connection to pool failed.");
}
break;
}
}
}
public class CreateConnectionThread extends Thread {
public CreateConnectionThread(String name){
super(name);
this.setDaemon(true);
}
public void run() {
initedLatch.countDown();
long lastDiscardCount = 0;
int errorCount = 0;
for (;;) {
// addLast
try {
lock.lockInterruptibly();
} catch (InterruptedException e2) {
break;
}
long discardCount = DruidDataSource.this.discardCount;
boolean discardChanged = discardCount - lastDiscardCount > 0;
lastDiscardCount = discardCount;
try {
boolean emptyWait = true;
if (createError != null
&& poolingCount == 0
&& !discardChanged) {
emptyWait = false;
}
if (emptyWait
&& asyncInit && createCount.get() < initialSize) {
emptyWait = false;
}
if (emptyWait) {
// 必须存在线程等待,才创建连接
if (poolingCount >= notEmptyWaitThreadCount //
&& !(keepAlive && activeCount + poolingCount < minIdle)) {
empty.await();
}
// 防止创建超过maxActive数量的连接
if (activeCount + poolingCount >= maxActive) {
empty.await();
continue;
}
}
} catch (InterruptedException e) {
lastCreateError = e;
lastErrorTimeMillis = System.currentTimeMillis();
if (!closing) {
LOG.error("create connection Thread Interrupted, url: " + jdbcUrl, e);
}
break;
} finally {
lock.unlock();
}
PhysicalConnectionInfo connection = null;
try {
connection = createPhysicalConnection();
setFailContinuous(false);
} catch (SQLException e) {
LOG.error("create connection SQLException, url: " + jdbcUrl + ", errorCode " + e.getErrorCode()
+ ", state " + e.getSQLState(), e);
errorCount++;
if (errorCount > connectionErrorRetryAttempts && timeBetweenConnectErrorMillis > 0) {
// fail over retry attempts
setFailContinuous(true);
if (failFast) {
lock.lock();
try {
notEmpty.signalAll();
} finally {
lock.unlock();
}
}
if (breakAfterAcquireFailure) {
break;
}
try {
Thread.sleep(timeBetweenConnectErrorMillis);
} catch (InterruptedException interruptEx) {
break;
}
}
} catch (RuntimeException e) {
LOG.error("create connection RuntimeException", e);
setFailContinuous(true);
continue;
} catch (Error e) {
LOG.error("create connection Error", e);
setFailContinuous(true);
break;
}
if (connection == null) {
continue;
}
boolean result = put(connection);
if (!result) {
JdbcUtils.close(connection.getPhysicalConnection());
LOG.info("put physical connection to pool failed.");
}
}
errorCount = 0; // reset errorCount
}
}
}
这就很骚了,从源码中我们可以看到,线程中使用了无参for循环再一直尝试进行数据源连接,代码中【errorCount > connectionErrorRetryAttempts && timeBetweenConnectErrorMillis > 0】当满足该判断条件时就会进行重试连接,接下来我们看一下源码中这两个属性值设置的是什么呢?(源码过长,只展示我们需要的代码,其他属性信息可自行扒源码~)
private static final long serialVersionUID = 1L;
public final static int DEFAULT_INITIAL_SIZE = 0;
public final static int DEFAULT_MAX_ACTIVE_SIZE = 8;
public final static int DEFAULT_MAX_IDLE = 8;
public final static int DEFAULT_MIN_IDLE = 0;
public final static int DEFAULT_MAX_WAIT = -1;
public final static String DEFAULT_VALIDATION_QUERY = null; //
public final static boolean DEFAULT_TEST_ON_BORROW = false;
public final static boolean DEFAULT_TEST_ON_RETURN = false;
public final static boolean DEFAULT_WHILE_IDLE = true;
public static final long DEFAULT_TIME_BETWEEN_EVICTION_RUNS_MILLIS = 60 * 1000L;
public static final long DEFAULT_TIME_BETWEEN_CONNECT_ERROR_MILLIS = 500;
public static final int DEFAULT_NUM_TESTS_PER_EVICTION_RUN = 3;
/*****************************华丽的分割线中间省略代码若干行*********************************/
protected volatile long timeBetweenEvictionRunsMillis = DEFAULT_TIME_BETWEEN_EVICTION_RUNS_MILLIS;
protected int connectionErrorRetryAttempts = 1;
protected boolean breakAfterAcquireFailure = false;
从中这段代码中我们可以看到connectionErrorRetryAttempts值为1,timeBetweenConnectErrorMillis值为500,而breakAfterAcquireFailure值为false,因此当我们数据源连接失败后,就会不断的进行重试连接,因此我对于对于该如何解决这样的问题我们就有了答案:
1.若不想让重试,我们可以设置breakAfterAcquireFailure(true);connectionErrorRetryAttempts(0);
2.若想要设置多久重试,我们只需要设置timeBetweenConnectErrorMillis(time);
解决方案:
知道原理之后,我们在我们自己编写的application.yml中去自定义配置druid的属性!
然后启动子模块启动类
可以说是完美解决bug了
总结
困扰了我两天的bug,这两天几乎都沉浸在谷歌浏览器中,阅读了无数的解决方案,未果,只好自己手撕druid源码,解决bug之后,一身释然,以前面对bug立马变痛苦面具,现在越来越渴望bug,bug会使我变得更秃变得更强,加油