From c8ecd387c2cfe6551f5a84a23fc670c84b5a3ea4 Mon Sep 17 00:00:00 2001
From: Omri Rosner <omri96rosner@gmail.com>
Date: Thu, 4 May 2023 23:04:02 +0300
Subject: [PATCH 1/4] To get the export task's current state, use the
 'task/progress' endpoint instead of the deprecated
 'runningtaskxml.action?taskId'

---
 atlassian/confluence.py | 53 +++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 26 deletions(-)
diff --git a/atlassian/confluence.py b/atlassian/confluence.py
index 74fc6d52c..b5850364d 100644
--- a/atlassian/confluence.py
+++ b/atlassian/confluence.py
@@ -5,6 +5,7 @@
 import json
 
 from requests import HTTPError
+import requests
 from deprecated import deprecated
 from atlassian import utils
 from .errors import ApiError, ApiNotFoundError, ApiPermissionError, ApiValueError, ApiConflictError, ApiNotAcceptable
@@ -2433,7 +2434,11 @@ def get_page_as_pdf(self, page_id):
         url = "spaces/flyingpdf/pdfpageexport.action?pageId={pageId}".format(pageId=page_id)
         if self.api_version == "cloud":
             url = self.get_pdf_download_url_for_confluence_cloud(url)
-
+            if not url:
+                log.error("Failed to get download PDF url.")
+                raise ApiNotFoundError("Failed to export page as PDF", reason="Failed to get download PDF url.")
+            # To download the PDF file, the request should be with no headers of authentications.
+            return requests.get(url).content
         return self.get(url, headers=headers, not_json_response=True)
 
     def get_page_as_word(self, page_id):
@@ -2669,45 +2674,41 @@ def get_pdf_download_url_for_confluence_cloud(self, url):
         :param url: URL to initiate PDF export
         :return: Download url for PDF file
         """
-        download_url = None
         try:
-            long_running_task = True
+            running_task = True
             headers = self.form_token_headers
             log.info("Initiate PDF export from Confluence Cloud")
             response = self.get(url, headers=headers, not_json_response=True)
             response_string = response.decode(encoding="utf-8", errors="strict")
             task_id = response_string.split('name="ajs-taskId" content="')[1].split('">')[0]
-            poll_url = "runningtaskxml.action?taskId={0}".format(task_id)
-            while long_running_task:
-                long_running_task_response = self.get(poll_url, headers=headers, not_json_response=True)
-                long_running_task_response_parts = long_running_task_response.decode(
-                    encoding="utf-8", errors="strict"
-                ).split("\n")
-                percentage_complete = long_running_task_response_parts[6].strip()
-                is_successful = long_running_task_response_parts[7].strip()
-                is_complete = long_running_task_response_parts[8].strip()
-                log.info("Sleep for 5s.")
-                time.sleep(5)
+            poll_url = "/services/api/v1/task/{0}/progress".format(task_id)
+            while running_task:
                 log.info("Check if export task has completed.")
-                if is_complete == "<isComplete>true</isComplete>":
-                    if is_successful == "<isSuccessful>true</isSuccessful>":
-                        log.info(percentage_complete)
-                        log.info("Downloading content...")
-                        log.debug("Extract taskId and download PDF.")
-                        current_status = long_running_task_response_parts[3]
-                        download_url = current_status.split("href=&quot;/wiki/")[1].split("&quot")[0]
-                        long_running_task = False
-                    elif is_successful == "<isSuccessful>false</isSuccessful>":
+                progress_response = self.get(poll_url)
+                percentage_complete = int(progress_response.get("progress", 0))
+                task_state = progress_response.get("state")
+                if percentage_complete == 100:
+                    running_task = False
+                    log.info(f"Task completed - {task_state}")
+                    task_result_url = progress_response.get("result")
+                    log.debug("Extract task results to download PDF.")
+                    if not task_result_url or not task_result_url.startswith("/wiki/services/api/v1/download/pdf"):
                         log.error("PDF conversion not successful.")
                         return None
                 else:
-                    log.info(percentage_complete)
+                    time.sleep(3)
+                    log.info(f"{percentage_complete}% - {task_state}")
+            log.debug("Task successfully done, querying the task result for the download url")
+            # task result url starts with /wiki, remove it.
+            task_result_url = task_result_url[5:]
+            task_content = self.get(task_result_url, not_json_response=True)
+            download_url = task_content.decode(encoding="utf-8", errors="strict")
+            log.debug("Successfully got the download url")
+            return download_url
         except IndexError as e:
             log.error(e)
             return None
 
-        return download_url
-
     def audit(
         self,
         start_date=None,

From 34fa6c7824d3b795fb5045cb78294ac60fefa476 Mon Sep 17 00:00:00 2001
From: Omri Rosner <omri96rosner@gmail.com>
Date: Thu, 4 May 2023 23:46:34 +0300
Subject: [PATCH 2/4] fix Requests call without timeout issue

---
 atlassian/confluence.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/atlassian/confluence.py b/atlassian/confluence.py
index b5850364d..35a7b4fbd 100644
--- a/atlassian/confluence.py
+++ b/atlassian/confluence.py
@@ -2438,7 +2438,7 @@ def get_page_as_pdf(self, page_id):
                 log.error("Failed to get download PDF url.")
                 raise ApiNotFoundError("Failed to export page as PDF", reason="Failed to get download PDF url.")
             # To download the PDF file, the request should be with no headers of authentications.
-            return requests.get(url).content
+            return requests.get(url, timeout=75).content
         return self.get(url, headers=headers, not_json_response=True)
 
     def get_page_as_word(self, page_id):

From 02d83fca27d411c40de34e1f2002031668fdfff5 Mon Sep 17 00:00:00 2001
From: Omri Rosner <omri96rosner@gmail.com>
Date: Fri, 5 May 2023 00:08:21 +0300
Subject: [PATCH 3/4] log percentage_complete before sleeping

---
 atlassian/confluence.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/atlassian/confluence.py b/atlassian/confluence.py
index 35a7b4fbd..1b42337c9 100644
--- a/atlassian/confluence.py
+++ b/atlassian/confluence.py
@@ -2690,14 +2690,14 @@ def get_pdf_download_url_for_confluence_cloud(self, url):
                 if percentage_complete == 100:
                     running_task = False
                     log.info(f"Task completed - {task_state}")
-                    task_result_url = progress_response.get("result")
-                    log.debug("Extract task results to download PDF.")
-                    if not task_result_url or not task_result_url.startswith("/wiki/services/api/v1/download/pdf"):
+                    if task_state == "FAILED":
                         log.error("PDF conversion not successful.")
                         return None
+                    log.debug("Extract task results to download PDF.")
+                    task_result_url = progress_response.get("result")
                 else:
-                    time.sleep(3)
                     log.info(f"{percentage_complete}% - {task_state}")
+                    time.sleep(3)
             log.debug("Task successfully done, querying the task result for the download url")
             # task result url starts with /wiki, remove it.
             task_result_url = task_result_url[5:]

From 786aad92fc2824c591ca37018164a67a6cdd1d57 Mon Sep 17 00:00:00 2001
From: Omri Rosner <omri96rosner@gmail.com>
Date: Sun, 7 May 2023 10:49:11 +0300
Subject: [PATCH 4/4] task result url starts with /wiki, remove it

---
 atlassian/confluence.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/atlassian/confluence.py b/atlassian/confluence.py
index 1b42337c9..938e1e252 100644
--- a/atlassian/confluence.py
+++ b/atlassian/confluence.py
@@ -2700,8 +2700,7 @@ def get_pdf_download_url_for_confluence_cloud(self, url):
                     time.sleep(3)
             log.debug("Task successfully done, querying the task result for the download url")
             # task result url starts with /wiki, remove it.
-            task_result_url = task_result_url[5:]
-            task_content = self.get(task_result_url, not_json_response=True)
+            task_content = self.get(task_result_url[5:], not_json_response=True)
             download_url = task_content.decode(encoding="utf-8", errors="strict")
             log.debug("Successfully got the download url")
             return download_url