Browse Source
* fix failover when node host is null * add failover execute thread * worker handle dead server * fix task instance failover time check * fix upgrade sql * failover logic update Co-authored-by: caishunfeng <534328519@qq.com>3.0.0/version-upgrade
wind
3 years ago
committed by
GitHub
24 changed files with 479 additions and 74 deletions
@ -0,0 +1,38 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0 |
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
SET sql_mode=(SELECT REPLACE(@@sql_mode,'ONLY_FULL_GROUP_BY','')); |
||||
|
||||
-- uc_dolphin_T_t_ds_process_instance_A_restart_time |
||||
drop PROCEDURE if EXISTS uc_dolphin_T_t_ds_process_instance_A_restart_time; |
||||
delimiter d// |
||||
CREATE PROCEDURE uc_dolphin_T_t_ds_process_instance_A_restart_time() |
||||
BEGIN |
||||
IF NOT EXISTS (SELECT 1 FROM information_schema.COLUMNS |
||||
WHERE TABLE_NAME='t_ds_process_instance' |
||||
AND TABLE_SCHEMA=(SELECT DATABASE()) |
||||
AND COLUMN_NAME ='restart_time') |
||||
THEN |
||||
ALTER TABLE t_ds_process_instance ADD COLUMN `restart_time` datetime DEFAULT NULL COMMENT 'process instance restart time'; |
||||
END IF; |
||||
END; |
||||
|
||||
d// |
||||
|
||||
delimiter ; |
||||
CALL uc_dolphin_T_t_ds_process_instance_A_restart_time(); |
||||
DROP PROCEDURE uc_dolphin_T_t_ds_process_instance_A_restart_time; |
@ -0,0 +1,16 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0 |
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
@ -0,0 +1,41 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0 |
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
delimiter d// |
||||
CREATE OR REPLACE FUNCTION public.dolphin_update_metadata( |
||||
) |
||||
RETURNS character varying |
||||
LANGUAGE 'plpgsql' |
||||
COST 100 |
||||
VOLATILE PARALLEL UNSAFE |
||||
AS $BODY$ |
||||
DECLARE |
||||
v_schema varchar; |
||||
BEGIN |
||||
---get schema name |
||||
v_schema =current_schema(); |
||||
|
||||
EXECUTE 'ALTER TABLE ' || quote_ident(v_schema) ||'.t_ds_process_instance ADD COLUMN IF NOT EXISTS "restart_time" timestamp DEFAULT NULL'; |
||||
return 'Success!'; |
||||
exception when others then |
||||
---Raise EXCEPTION '(%)',SQLERRM; |
||||
return SQLERRM; |
||||
END; |
||||
$BODY$; |
||||
|
||||
select dolphin_update_metadata(); |
||||
|
||||
d// |
@ -0,0 +1,16 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0 |
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
@ -0,0 +1,110 @@
|
||||
/* |
||||
* Licensed to the Apache Software Foundation (ASF) under one or more |
||||
* contributor license agreements. See the NOTICE file distributed with |
||||
* this work for additional information regarding copyright ownership. |
||||
* The ASF licenses this file to You under the Apache License, Version 2.0 |
||||
* (the "License"); you may not use this file except in compliance with |
||||
* the License. You may obtain a copy of the License at |
||||
* |
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* |
||||
* Unless required by applicable law or agreed to in writing, software |
||||
* distributed under the License is distributed on an "AS IS" BASIS, |
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
* See the License for the specific language governing permissions and |
||||
* limitations under the License. |
||||
*/ |
||||
|
||||
package org.apache.dolphinscheduler.server.master.runner; |
||||
|
||||
import org.apache.dolphinscheduler.common.Constants; |
||||
import org.apache.dolphinscheduler.common.enums.NodeType; |
||||
import org.apache.dolphinscheduler.common.thread.Stopper; |
||||
import org.apache.dolphinscheduler.common.thread.ThreadUtils; |
||||
import org.apache.dolphinscheduler.server.master.config.MasterConfig; |
||||
import org.apache.dolphinscheduler.server.master.registry.MasterRegistryClient; |
||||
import org.apache.dolphinscheduler.service.process.ProcessService; |
||||
import org.apache.dolphinscheduler.service.registry.RegistryClient; |
||||
|
||||
import org.apache.commons.collections4.CollectionUtils; |
||||
|
||||
import java.util.Iterator; |
||||
import java.util.List; |
||||
|
||||
import org.slf4j.Logger; |
||||
import org.slf4j.LoggerFactory; |
||||
import org.springframework.beans.factory.annotation.Autowired; |
||||
import org.springframework.stereotype.Service; |
||||
|
||||
@Service |
||||
public class FailoverExecuteThread extends Thread { |
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(FailoverExecuteThread.class); |
||||
|
||||
@Autowired |
||||
private MasterRegistryClient masterRegistryClient; |
||||
|
||||
@Autowired |
||||
private RegistryClient registryClient; |
||||
|
||||
@Autowired |
||||
private MasterConfig masterConfig; |
||||
|
||||
/** |
||||
* process service |
||||
*/ |
||||
@Autowired |
||||
private ProcessService processService; |
||||
|
||||
@Override |
||||
public synchronized void start() { |
||||
super.setName("FailoverExecuteThread"); |
||||
super.start(); |
||||
} |
||||
|
||||
@Override |
||||
public void run() { |
||||
while (Stopper.isRunning()) { |
||||
logger.info("failover execute started"); |
||||
try { |
||||
List<String> hosts = getNeedFailoverMasterServers(); |
||||
if (CollectionUtils.isEmpty(hosts)) { |
||||
continue; |
||||
} |
||||
logger.info("need failover hosts:{}", hosts); |
||||
|
||||
for (String host : hosts) { |
||||
String failoverPath = masterRegistryClient.getFailoverLockPath(NodeType.MASTER, host); |
||||
try { |
||||
registryClient.getLock(failoverPath); |
||||
masterRegistryClient.failoverMaster(host); |
||||
} catch (Exception e) { |
||||
logger.error("{} server failover failed, host:{}", NodeType.MASTER, host, e); |
||||
} finally { |
||||
registryClient.releaseLock(failoverPath); |
||||
} |
||||
} |
||||
} catch (Exception e) { |
||||
logger.error("failover execute error", e); |
||||
} finally { |
||||
ThreadUtils.sleep((long) Constants.SLEEP_TIME_MILLIS * masterConfig.getFailoverInterval() * 60); |
||||
} |
||||
} |
||||
} |
||||
|
||||
private List<String> getNeedFailoverMasterServers() { |
||||
// failover myself && failover dead masters
|
||||
List<String> hosts = processService.queryNeedFailoverProcessInstanceHost(); |
||||
|
||||
Iterator<String> iterator = hosts.iterator(); |
||||
while (iterator.hasNext()) { |
||||
String host = iterator.next(); |
||||
if (registryClient.checkNodeExists(host, NodeType.MASTER)) { |
||||
if (!host.equals(masterRegistryClient.getLocalAddress())) { |
||||
iterator.remove(); |
||||
} |
||||
} |
||||
} |
||||
return hosts; |
||||
} |
||||
} |
Loading…
Reference in new issue