Validate only task commands are run by executors

apache · Jun 8, 2020 · 1dda6fd · 1dda6fd
1 parent b4b84a1
commit 1dda6fd
Show file tree

Hide file tree

Showing 6 changed files with 44 additions and 3 deletions.
diff --git a/airflow/executors/celery_executor.py b/airflow/executors/celery_executor.py
@@ -71,6 +71,9 @@
 @app.task
 def execute_command(command_to_exec: CommandType) -> None:
     """Executes command."""
+    if command_to_exec[0:3] != ["airflow", "tasks", "run"]:
+        raise ValueError('The command must start with ["airflow", "tasks", "run"].')
+
     log.info("Executing command in Celery: %s", command_to_exec)
     env = os.environ.copy()
     try:

diff --git a/airflow/executors/dask_executor.py b/airflow/executors/dask_executor.py
@@ -72,6 +72,9 @@ def execute_async(self,
                       queue: Optional[str] = None,
                       executor_config: Optional[Any] = None) -> None:
 
+        if command[0:3] != ["airflow", "tasks", "run"]:
+            raise ValueError('The command must start with ["airflow", "tasks", "run"].')
+
         def airflow_run():
             return subprocess.check_call(command, close_fds=True)
 

diff --git a/airflow/executors/kubernetes_executor.py b/airflow/executors/kubernetes_executor.py
@@ -459,6 +459,9 @@ def run_next(self, next_job: KubernetesJobType) -> None:
         if isinstance(command, str):
             command = [command]
 
+        if command[0] != "airflow":
+            raise ValueError('The first element of command must be equal to "airflow".')
+
         pod = PodGenerator.construct_pod(
             namespace=self.namespace,
             worker_uuid=self.worker_uuid,

diff --git a/airflow/executors/local_executor.py b/airflow/executors/local_executor.py
@@ -284,6 +284,10 @@ def execute_async(self, key: TaskInstanceKeyType,
         """Execute asynchronously."""
         if not self.impl:
             raise AirflowException(NOT_STARTED_MESSAGE)
+
+        if command[0:3] != ["airflow", "tasks", "run"]:
+            raise ValueError('The command must start with ["airflow", "tasks", "run"].')
+
         self.impl.execute_async(key=key, command=command, queue=queue, executor_config=executor_config)
 
     def sync(self) -> None:

diff --git a/airflow/executors/sequential_executor.py b/airflow/executors/sequential_executor.py
@@ -49,6 +49,10 @@ def execute_async(self,
                       command: CommandType,
                       queue: Optional[str] = None,
                       executor_config: Optional[Any] = None) -> None:
+
+        if command[0:3] != ["airflow", "tasks", "run"]:
+            raise ValueError('The command must start with ["airflow", "tasks", "run"].')
+
         self.commands_to_run.append((key, command))
 
     def sync(self) -> None:

diff --git a/tests/executors/test_celery_executor.py b/tests/executors/test_celery_executor.py
@@ -36,6 +36,7 @@
 from parameterized import parameterized
 
 from airflow.configuration import conf
+from airflow.exceptions import AirflowException
 from airflow.executors import celery_executor
 from airflow.executors.celery_executor import BulkStateFetcher
 from airflow.models import TaskInstance
@@ -101,13 +102,18 @@ class TestCeleryExecutor(unittest.TestCase):
     @pytest.mark.integration("rabbitmq")
     @pytest.mark.backend("mysql", "postgres")
     def test_celery_integration(self, broker_url):
-        with _prepare_app(broker_url) as app:
+        success_command = ['airflow', 'tasks', 'run', 'true', 'some_parameter']
+        fail_command = ['airflow', 'version']
+
+        def fake_execute_command(command):
+            if command != success_command:
+                raise AirflowException("fail")
+
+        with _prepare_app(broker_url, execute=fake_execute_command) as app:
             executor = celery_executor.CeleryExecutor()
             executor.start()
 
             with start_worker(app=app, logfile=sys.stdout, loglevel='info'):
-                success_command = ['true', 'some_parameter']
-                fail_command = ['false', 'some_parameter']
                 execute_date = datetime.datetime.now()
 
                 cached_celery_backend = celery_executor.execute_command.backend
@@ -202,6 +208,24 @@ def test_gauge_executor_metrics(self, mock_stats_gauge, mock_trigger_tasks, mock
                  mock.call('executor.running_tasks', mock.ANY)]
         mock_stats_gauge.assert_has_calls(calls)
 
+    @parameterized.expand((
+        [['true'], ValueError],
+        [['airflow', 'version'], ValueError],
+        [['airflow', 'tasks', 'run'], None]
+    ))
+    @mock.patch('subprocess.check_call')
+    def test_command_validation(self, command, expected_exception, mock_check_call):
+        # Check that we validate _on the receiving_ side, not just sending side
+        if expected_exception:
+            with pytest.raises(expected_exception):
+                celery_executor.execute_command(command)
+            mock_check_call.assert_not_called()
+        else:
+            celery_executor.execute_command(command)
+            mock_check_call.assert_called_once_with(
+                command, stderr=mock.ANY, close_fds=mock.ANY, env=mock.ANY,
+            )
+
 
 def test_operation_timeout_config():
     assert celery_executor.OPERATION_TIMEOUT == 2