Browse Source
* fix failover when node host is null * add failover execute thread * worker handle dead server * fix task instance failover time check * fix upgrade sql * failover logic update Co-authored-by: caishunfeng <534328519@qq.com>3.0.0/version-upgrade
wind
3 years ago
committed by
GitHub
24 changed files with 479 additions and 74 deletions
@ -0,0 +1,38 @@ |
|||||||
|
/* |
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||||
|
* contributor license agreements. See the NOTICE file distributed with |
||||||
|
* this work for additional information regarding copyright ownership. |
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||||
|
* (the "License"); you may not use this file except in compliance with |
||||||
|
* the License. You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
*/ |
||||||
|
|
||||||
|
SET sql_mode=(SELECT REPLACE(@@sql_mode,'ONLY_FULL_GROUP_BY','')); |
||||||
|
|
||||||
|
-- uc_dolphin_T_t_ds_process_instance_A_restart_time |
||||||
|
drop PROCEDURE if EXISTS uc_dolphin_T_t_ds_process_instance_A_restart_time; |
||||||
|
delimiter d// |
||||||
|
CREATE PROCEDURE uc_dolphin_T_t_ds_process_instance_A_restart_time() |
||||||
|
BEGIN |
||||||
|
IF NOT EXISTS (SELECT 1 FROM information_schema.COLUMNS |
||||||
|
WHERE TABLE_NAME='t_ds_process_instance' |
||||||
|
AND TABLE_SCHEMA=(SELECT DATABASE()) |
||||||
|
AND COLUMN_NAME ='restart_time') |
||||||
|
THEN |
||||||
|
ALTER TABLE t_ds_process_instance ADD COLUMN `restart_time` datetime DEFAULT NULL COMMENT 'process instance restart time'; |
||||||
|
END IF; |
||||||
|
END; |
||||||
|
|
||||||
|
d// |
||||||
|
|
||||||
|
delimiter ; |
||||||
|
CALL uc_dolphin_T_t_ds_process_instance_A_restart_time(); |
||||||
|
DROP PROCEDURE uc_dolphin_T_t_ds_process_instance_A_restart_time; |
@ -0,0 +1,16 @@ |
|||||||
|
/* |
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||||
|
* contributor license agreements. See the NOTICE file distributed with |
||||||
|
* this work for additional information regarding copyright ownership. |
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||||
|
* (the "License"); you may not use this file except in compliance with |
||||||
|
* the License. You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
*/ |
@ -0,0 +1,41 @@ |
|||||||
|
/* |
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||||
|
* contributor license agreements. See the NOTICE file distributed with |
||||||
|
* this work for additional information regarding copyright ownership. |
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||||
|
* (the "License"); you may not use this file except in compliance with |
||||||
|
* the License. You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
*/ |
||||||
|
delimiter d// |
||||||
|
CREATE OR REPLACE FUNCTION public.dolphin_update_metadata( |
||||||
|
) |
||||||
|
RETURNS character varying |
||||||
|
LANGUAGE 'plpgsql' |
||||||
|
COST 100 |
||||||
|
VOLATILE PARALLEL UNSAFE |
||||||
|
AS $BODY$ |
||||||
|
DECLARE |
||||||
|
v_schema varchar; |
||||||
|
BEGIN |
||||||
|
---get schema name |
||||||
|
v_schema =current_schema(); |
||||||
|
|
||||||
|
EXECUTE 'ALTER TABLE ' || quote_ident(v_schema) ||'.t_ds_process_instance ADD COLUMN IF NOT EXISTS "restart_time" timestamp DEFAULT NULL'; |
||||||
|
return 'Success!'; |
||||||
|
exception when others then |
||||||
|
---Raise EXCEPTION '(%)',SQLERRM; |
||||||
|
return SQLERRM; |
||||||
|
END; |
||||||
|
$BODY$; |
||||||
|
|
||||||
|
select dolphin_update_metadata(); |
||||||
|
|
||||||
|
d// |
@ -0,0 +1,16 @@ |
|||||||
|
/* |
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||||
|
* contributor license agreements. See the NOTICE file distributed with |
||||||
|
* this work for additional information regarding copyright ownership. |
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||||
|
* (the "License"); you may not use this file except in compliance with |
||||||
|
* the License. You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
*/ |
@ -0,0 +1,110 @@ |
|||||||
|
/* |
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||||
|
* contributor license agreements. See the NOTICE file distributed with |
||||||
|
* this work for additional information regarding copyright ownership. |
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||||
|
* (the "License"); you may not use this file except in compliance with |
||||||
|
* the License. You may obtain a copy of the License at |
||||||
|
* |
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* |
||||||
|
* Unless required by applicable law or agreed to in writing, software |
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
* See the License for the specific language governing permissions and |
||||||
|
* limitations under the License. |
||||||
|
*/ |
||||||
|
|
||||||
|
package org.apache.dolphinscheduler.server.master.runner; |
||||||
|
|
||||||
|
import org.apache.dolphinscheduler.common.Constants; |
||||||
|
import org.apache.dolphinscheduler.common.enums.NodeType; |
||||||
|
import org.apache.dolphinscheduler.common.thread.Stopper; |
||||||
|
import org.apache.dolphinscheduler.common.thread.ThreadUtils; |
||||||
|
import org.apache.dolphinscheduler.server.master.config.MasterConfig; |
||||||
|
import org.apache.dolphinscheduler.server.master.registry.MasterRegistryClient; |
||||||
|
import org.apache.dolphinscheduler.service.process.ProcessService; |
||||||
|
import org.apache.dolphinscheduler.service.registry.RegistryClient; |
||||||
|
|
||||||
|
import org.apache.commons.collections4.CollectionUtils; |
||||||
|
|
||||||
|
import java.util.Iterator; |
||||||
|
import java.util.List; |
||||||
|
|
||||||
|
import org.slf4j.Logger; |
||||||
|
import org.slf4j.LoggerFactory; |
||||||
|
import org.springframework.beans.factory.annotation.Autowired; |
||||||
|
import org.springframework.stereotype.Service; |
||||||
|
|
||||||
|
@Service |
||||||
|
public class FailoverExecuteThread extends Thread { |
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(FailoverExecuteThread.class); |
||||||
|
|
||||||
|
@Autowired |
||||||
|
private MasterRegistryClient masterRegistryClient; |
||||||
|
|
||||||
|
@Autowired |
||||||
|
private RegistryClient registryClient; |
||||||
|
|
||||||
|
@Autowired |
||||||
|
private MasterConfig masterConfig; |
||||||
|
|
||||||
|
/** |
||||||
|
* process service |
||||||
|
*/ |
||||||
|
@Autowired |
||||||
|
private ProcessService processService; |
||||||
|
|
||||||
|
@Override |
||||||
|
public synchronized void start() { |
||||||
|
super.setName("FailoverExecuteThread"); |
||||||
|
super.start(); |
||||||
|
} |
||||||
|
|
||||||
|
@Override |
||||||
|
public void run() { |
||||||
|
while (Stopper.isRunning()) { |
||||||
|
logger.info("failover execute started"); |
||||||
|
try { |
||||||
|
List<String> hosts = getNeedFailoverMasterServers(); |
||||||
|
if (CollectionUtils.isEmpty(hosts)) { |
||||||
|
continue; |
||||||
|
} |
||||||
|
logger.info("need failover hosts:{}", hosts); |
||||||
|
|
||||||
|
for (String host : hosts) { |
||||||
|
String failoverPath = masterRegistryClient.getFailoverLockPath(NodeType.MASTER, host); |
||||||
|
try { |
||||||
|
registryClient.getLock(failoverPath); |
||||||
|
masterRegistryClient.failoverMaster(host); |
||||||
|
} catch (Exception e) { |
||||||
|
logger.error("{} server failover failed, host:{}", NodeType.MASTER, host, e); |
||||||
|
} finally { |
||||||
|
registryClient.releaseLock(failoverPath); |
||||||
|
} |
||||||
|
} |
||||||
|
} catch (Exception e) { |
||||||
|
logger.error("failover execute error", e); |
||||||
|
} finally { |
||||||
|
ThreadUtils.sleep((long) Constants.SLEEP_TIME_MILLIS * masterConfig.getFailoverInterval() * 60); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
private List<String> getNeedFailoverMasterServers() { |
||||||
|
// failover myself && failover dead masters
|
||||||
|
List<String> hosts = processService.queryNeedFailoverProcessInstanceHost(); |
||||||
|
|
||||||
|
Iterator<String> iterator = hosts.iterator(); |
||||||
|
while (iterator.hasNext()) { |
||||||
|
String host = iterator.next(); |
||||||
|
if (registryClient.checkNodeExists(host, NodeType.MASTER)) { |
||||||
|
if (!host.equals(masterRegistryClient.getLocalAddress())) { |
||||||
|
iterator.remove(); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
return hosts; |
||||||
|
} |
||||||
|
} |
Loading…
Reference in new issue