diff --git a/.github/scripts/algolia.py b/.github/scripts/algolia.py deleted file mode 100644 index 5071ea58006..00000000000 --- a/.github/scripts/algolia.py +++ /dev/null @@ -1,118 +0,0 @@ -import os -from re import S -import sys -import json -from bs4 import BeautifulSoup -from algoliasearch.search_client import SearchClient - -url = "docs.dapr.io" -if len(sys.argv) > 1: - starting_directory = os.path.join(os.getcwd(), str(sys.argv[1])) -else: - starting_directory = os.getcwd() - -ALGOLIA_APP_ID = os.getenv('ALGOLIA_APP_ID') -ALGOLIA_API_KEY = os.getenv('ALGOLIA_API_WRITE_KEY') -ALGOLIA_INDEX_NAME = os.getenv('ALGOLIA_INDEX_NAME') - -client = SearchClient.create(ALGOLIA_APP_ID, ALGOLIA_API_KEY) -index = client.init_index(ALGOLIA_INDEX_NAME) - -excluded_files = [ - "404.html", -] - -exluded_directories = [ - "zh-hans", -] - -rankings = { - "Getting started": 0, - "Concepts": 100, - "Developing applications": 200, - "Operations": 300, - "Reference": 400, - "Contributing": 500, - "Home": 600 -} - -def scan_directory(directory: str, pages: list): - if os.path.basename(directory) in exluded_directories: - print(f'Skipping directory: {directory}') - return - for file in os.listdir(directory): - path = os.path.join(directory, file) - if os.path.isfile(path): - if file.endswith(".html") and file not in excluded_files: - if '' not in open(path, encoding="utf8").read(): - print(f'Indexing: {path}') - pages.append(path) - else: - print(f'Skipping hidden page: {path}') - else: - scan_directory(path, pages) - -def parse_file(path: str): - data = {} - data["hierarchy"] = {} - data["rank"] = 999 - data["subrank"] = 99 - data["type"] = "lvl2" - data["lvl0"] = "" - data["lvl1"] = "" - data["lvl2"] = "" - data["lvl3"] = "" - text = "" - subrank = 0 - with open(path, "r", errors='ignore') as file: - content = file.read() - soup = BeautifulSoup(content, "html.parser") - for meta in soup.find_all("meta"): - if meta.get("name") == "description": - data["lvl2"] = meta.get("content") - data["hierarchy"]["lvl1"] = meta.get("content") - elif meta.get("property") == "og:title": - data["lvl0"] = meta.get("content") - data["hierarchy"]["lvl0"] = meta.get("content") - data["hierarchy"]["lvl2"] = meta.get("content") - elif meta.get("property") == "og:url": - data["url"] = meta.get("content") - data["path"] = meta.get("content").split(url)[1] - data["objectID"] = meta.get("content").split(url)[1] - breadcrumbs = soup.find_all("li", class_="breadcrumb-item") - try: - subrank = len(breadcrumbs) - data["subrank"] = subrank - except: - subrank = 99 - data["subrank"] = 99 - for bc in breadcrumbs: - section = bc.text.strip() - data["lvl1"] = section - data["hierarchy"]["lvl0"] = section - try: - data["rank"] = rankings[section] + subrank - except: - print(f"Rank not found for section {section}") - data["rank"] = 998 - break - for p in soup.find_all("p"): - if p.text != "": - text = text + p.text - data["text"] = text - return data - -def index_payload(payload): - res = index.replace_all_objects(payload) - res.wait() - - -if __name__ == "__main__": - pages = [] - payload = [] - scan_directory(starting_directory, pages) - for page in pages: - data = parse_file(page) - if "objectID" in data: - payload.append(data) - index_payload(payload) diff --git a/.github/workflows/website-root.yml b/.github/workflows/website-root.yml index bce0ee19d66..10c09bc6b11 100644 --- a/.github/workflows/website-root.yml +++ b/.github/workflows/website-root.yml @@ -61,12 +61,6 @@ jobs: output_location: "/" skip_app_build: true skip_deploy_on_missing_secrets: true - - name: Upload Hugo artifacts for Algolia - uses: actions/upload-artifact@v4 - with: - name: hugo_build - path: ./daprdocs/public/ - if-no-files-found: error close_staging_site: if: github.event_name == 'pull_request' && github.event.action == 'closed' @@ -80,29 +74,3 @@ jobs: azure_static_web_apps_api_token: ${{ secrets.AZURE_STATIC_WEB_APPS_API_TOKEN_PROUD_BAY_0E9E0E81E }} action: "close" skip_deploy_on_missing_secrets: true - - algolia_index: - name: Index site for Algolia - if: github.event_name == 'push' - needs: ['build_and_deploy_job'] - runs-on: ubuntu-latest - env: - ALGOLIA_APP_ID: ${{ secrets.ALGOLIA_APP_ID }} - ALGOLIA_API_WRITE_KEY: ${{ secrets.ALGOLIA_API_WRITE_KEY }} - ALGOLIA_INDEX_NAME: daprdocs - steps: - - name: Checkout docs repo - uses: actions/checkout@v4 - with: - submodules: false - - name: Download Hugo artifacts for Algolia - uses: actions/download-artifact@v4 - with: - name: hugo_build - path: site/ - - name: Install Python packages - run: | - pip install --upgrade bs4 - pip install --upgrade 'algoliasearch>=2.0,<3.0' - - name: Index site - run: python ./.github/scripts/algolia.py ./site diff --git a/daprdocs/layouts/_partials/hooks/body-end.html b/daprdocs/layouts/_partials/hooks/body-end.html index eb321b47a22..dddb0caec61 100644 --- a/daprdocs/layouts/_partials/hooks/body-end.html +++ b/daprdocs/layouts/_partials/hooks/body-end.html @@ -5,9 +5,9 @@ diff --git a/hugo.yaml b/hugo.yaml index 980529773db..b74c0264499 100644 --- a/hugo.yaml +++ b/hugo.yaml @@ -188,9 +188,9 @@ params: search: algolia: - appId: 'O0QLQGNF38' - apiKey: '54ae43aa28ce8f00c54c8d5f544d29b9' - indexName: daprdocs + appId: O0QLQGNF38 + apiKey: 54ae43aa28ce8f00c54c8d5f544d29b9 + indexName: crawler_dapr offlineSearch: false # User interface configuration