diff --git a/.github/workflows/publish-docs-index-typesense.yml b/.github/workflows/publish-docs-index-typesense.yml new file mode 100644 index 0000000000..1b3da71527 --- /dev/null +++ b/.github/workflows/publish-docs-index-typesense.yml @@ -0,0 +1,26 @@ +name: "Publish : Docs search index (Typesense)" + +on: + # Triggered manually + workflow_dispatch: + +jobs: + doc-indexer: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v3 # You MUST checkout your repository first! + + - name: Run DocSearch Scraper + uses: ./packages/noco-docs + with: + # The secret containing your Typesense API key. Required. + api-key: ${{ secrets.TYPESENSE_API_KEY }} + # The hostname or IP address of your Typesense server. Required. + host: ${{ secrets.TYPESENSE_HOST }} + # The port on which your Typesense server is listening. Optional. Default: 8108. + port: 443 + # The protocol to use when connecting to your Typesense server. Optional. Default: http. + protocol: https + # The path to your DocSearch config file. Optional. Default: docsearch.config.json. + config: typesense-scrape-config.json \ No newline at end of file diff --git a/packages/noco-docs/typesense-scrape-config.json b/packages/noco-docs/typesense-scrape-config.json new file mode 100644 index 0000000000..58915fc277 --- /dev/null +++ b/packages/noco-docs/typesense-scrape-config.json @@ -0,0 +1,50 @@ +{ + "index_name": "nocodb-oss-docs-index", + "start_urls": [ + "https://docs.nocodb.com/" + ], + "sitemap_urls": [ + "https://docs.nocodb.com/sitemap.xml" + ], + "sitemap_alternate_links": true, + "stop_urls": [ + "/tests" + ], + "selectors": { + "lvl0": { + "selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]", + "type": "xpath", + "global": true, + "default_value": "Documentation" + }, + "lvl1": "header h1", + "lvl2": "article h2", + "lvl3": "article h3", + "lvl4": "article h4", + "lvl5": "article h5, article td:first-child", + "lvl6": "article h6", + "text": "article p, article li, article td:last-child" + }, + "strip_chars": " .,;:#", + "custom_settings": { + "separatorsToIndex": "_", + "attributesForFaceting": [ + "language", + "version", + "type", + "docusaurus_tag" + ], + "attributesToRetrieve": [ + "hierarchy", + "content", + "anchor", + "url", + "url_without_anchor", + "type" + ] + }, + "conversation_id": [ + "833762294" + ], + "nb_hits": 46250 + } \ No newline at end of file