From c8ecd387c2cfe6551f5a84a23fc670c84b5a3ea4 Mon Sep 17 00:00:00 2001 From: Omri Rosner Date: Thu, 4 May 2023 23:04:02 +0300 Subject: [PATCH 1/4] To get the export task's current state, use the 'task/progress' endpoint instead of the deprecated 'runningtaskxml.action?taskId' --- atlassian/confluence.py | 53 +++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/atlassian/confluence.py b/atlassian/confluence.py index 74fc6d52c..b5850364d 100644 --- a/atlassian/confluence.py +++ b/atlassian/confluence.py @@ -5,6 +5,7 @@ import json from requests import HTTPError +import requests from deprecated import deprecated from atlassian import utils from .errors import ApiError, ApiNotFoundError, ApiPermissionError, ApiValueError, ApiConflictError, ApiNotAcceptable @@ -2433,7 +2434,11 @@ def get_page_as_pdf(self, page_id): url = "spaces/flyingpdf/pdfpageexport.action?pageId={pageId}".format(pageId=page_id) if self.api_version == "cloud": url = self.get_pdf_download_url_for_confluence_cloud(url) - + if not url: + log.error("Failed to get download PDF url.") + raise ApiNotFoundError("Failed to export page as PDF", reason="Failed to get download PDF url.") + # To download the PDF file, the request should be with no headers of authentications. + return requests.get(url).content return self.get(url, headers=headers, not_json_response=True) def get_page_as_word(self, page_id): @@ -2669,45 +2674,41 @@ def get_pdf_download_url_for_confluence_cloud(self, url): :param url: URL to initiate PDF export :return: Download url for PDF file """ - download_url = None try: - long_running_task = True + running_task = True headers = self.form_token_headers log.info("Initiate PDF export from Confluence Cloud") response = self.get(url, headers=headers, not_json_response=True) response_string = response.decode(encoding="utf-8", errors="strict") task_id = response_string.split('name="ajs-taskId" content="')[1].split('">')[0] - poll_url = "runningtaskxml.action?taskId={0}".format(task_id) - while long_running_task: - long_running_task_response = self.get(poll_url, headers=headers, not_json_response=True) - long_running_task_response_parts = long_running_task_response.decode( - encoding="utf-8", errors="strict" - ).split("\n") - percentage_complete = long_running_task_response_parts[6].strip() - is_successful = long_running_task_response_parts[7].strip() - is_complete = long_running_task_response_parts[8].strip() - log.info("Sleep for 5s.") - time.sleep(5) + poll_url = "/services/api/v1/task/{0}/progress".format(task_id) + while running_task: log.info("Check if export task has completed.") - if is_complete == "true": - if is_successful == "true": - log.info(percentage_complete) - log.info("Downloading content...") - log.debug("Extract taskId and download PDF.") - current_status = long_running_task_response_parts[3] - download_url = current_status.split("href="/wiki/")[1].split(""")[0] - long_running_task = False - elif is_successful == "false": + progress_response = self.get(poll_url) + percentage_complete = int(progress_response.get("progress", 0)) + task_state = progress_response.get("state") + if percentage_complete == 100: + running_task = False + log.info(f"Task completed - {task_state}") + task_result_url = progress_response.get("result") + log.debug("Extract task results to download PDF.") + if not task_result_url or not task_result_url.startswith("/wiki/services/api/v1/download/pdf"): log.error("PDF conversion not successful.") return None else: - log.info(percentage_complete) + time.sleep(3) + log.info(f"{percentage_complete}% - {task_state}") + log.debug("Task successfully done, querying the task result for the download url") + # task result url starts with /wiki, remove it. + task_result_url = task_result_url[5:] + task_content = self.get(task_result_url, not_json_response=True) + download_url = task_content.decode(encoding="utf-8", errors="strict") + log.debug("Successfully got the download url") + return download_url except IndexError as e: log.error(e) return None - return download_url - def audit( self, start_date=None, From 34fa6c7824d3b795fb5045cb78294ac60fefa476 Mon Sep 17 00:00:00 2001 From: Omri Rosner Date: Thu, 4 May 2023 23:46:34 +0300 Subject: [PATCH 2/4] fix Requests call without timeout issue --- atlassian/confluence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atlassian/confluence.py b/atlassian/confluence.py index b5850364d..35a7b4fbd 100644 --- a/atlassian/confluence.py +++ b/atlassian/confluence.py @@ -2438,7 +2438,7 @@ def get_page_as_pdf(self, page_id): log.error("Failed to get download PDF url.") raise ApiNotFoundError("Failed to export page as PDF", reason="Failed to get download PDF url.") # To download the PDF file, the request should be with no headers of authentications. - return requests.get(url).content + return requests.get(url, timeout=75).content return self.get(url, headers=headers, not_json_response=True) def get_page_as_word(self, page_id): From 02d83fca27d411c40de34e1f2002031668fdfff5 Mon Sep 17 00:00:00 2001 From: Omri Rosner Date: Fri, 5 May 2023 00:08:21 +0300 Subject: [PATCH 3/4] log percentage_complete before sleeping --- atlassian/confluence.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/atlassian/confluence.py b/atlassian/confluence.py index 35a7b4fbd..1b42337c9 100644 --- a/atlassian/confluence.py +++ b/atlassian/confluence.py @@ -2690,14 +2690,14 @@ def get_pdf_download_url_for_confluence_cloud(self, url): if percentage_complete == 100: running_task = False log.info(f"Task completed - {task_state}") - task_result_url = progress_response.get("result") - log.debug("Extract task results to download PDF.") - if not task_result_url or not task_result_url.startswith("/wiki/services/api/v1/download/pdf"): + if task_state == "FAILED": log.error("PDF conversion not successful.") return None + log.debug("Extract task results to download PDF.") + task_result_url = progress_response.get("result") else: - time.sleep(3) log.info(f"{percentage_complete}% - {task_state}") + time.sleep(3) log.debug("Task successfully done, querying the task result for the download url") # task result url starts with /wiki, remove it. task_result_url = task_result_url[5:] From 786aad92fc2824c591ca37018164a67a6cdd1d57 Mon Sep 17 00:00:00 2001 From: Omri Rosner Date: Sun, 7 May 2023 10:49:11 +0300 Subject: [PATCH 4/4] task result url starts with /wiki, remove it --- atlassian/confluence.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/atlassian/confluence.py b/atlassian/confluence.py index 1b42337c9..938e1e252 100644 --- a/atlassian/confluence.py +++ b/atlassian/confluence.py @@ -2700,8 +2700,7 @@ def get_pdf_download_url_for_confluence_cloud(self, url): time.sleep(3) log.debug("Task successfully done, querying the task result for the download url") # task result url starts with /wiki, remove it. - task_result_url = task_result_url[5:] - task_content = self.get(task_result_url, not_json_response=True) + task_content = self.get(task_result_url[5:], not_json_response=True) download_url = task_content.decode(encoding="utf-8", errors="strict") log.debug("Successfully got the download url") return download_url