Browse Source

typesense-scraper workflow job

pull/6478/head
starbirdtech383 1 year ago
parent
commit
00e6c2e9c2
  1. 26
      .github/workflows/publish-docs-index-typesense.yml
  2. 50
      packages/noco-docs/typesense-scrape-config.json

26
.github/workflows/publish-docs-index-typesense.yml

@ -0,0 +1,26 @@
name: "Publish : Docs search index (Typesense)"
on:
# Triggered manually
workflow_dispatch:
jobs:
doc-indexer:
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@v3 # You MUST checkout your repository first!
- name: Run DocSearch Scraper
uses: ./packages/noco-docs
with:
# The secret containing your Typesense API key. Required.
api-key: ${{ secrets.TYPESENSE_API_KEY }}
# The hostname or IP address of your Typesense server. Required.
host: ${{ secrets.TYPESENSE_HOST }}
# The port on which your Typesense server is listening. Optional. Default: 8108.
port: 443
# The protocol to use when connecting to your Typesense server. Optional. Default: http.
protocol: https
# The path to your DocSearch config file. Optional. Default: docsearch.config.json.
config: typesense-scrape-config.json

50
packages/noco-docs/typesense-scrape-config.json

@ -0,0 +1,50 @@
{
"index_name": "nocodb-oss-docs-index",
"start_urls": [
"https://docs.nocodb.com/"
],
"sitemap_urls": [
"https://docs.nocodb.com/sitemap.xml"
],
"sitemap_alternate_links": true,
"stop_urls": [
"/tests"
],
"selectors": {
"lvl0": {
"selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]",
"type": "xpath",
"global": true,
"default_value": "Documentation"
},
"lvl1": "header h1",
"lvl2": "article h2",
"lvl3": "article h3",
"lvl4": "article h4",
"lvl5": "article h5, article td:first-child",
"lvl6": "article h6",
"text": "article p, article li, article td:last-child"
},
"strip_chars": " .,;:#",
"custom_settings": {
"separatorsToIndex": "_",
"attributesForFaceting": [
"language",
"version",
"type",
"docusaurus_tag"
],
"attributesToRetrieve": [
"hierarchy",
"content",
"anchor",
"url",
"url_without_anchor",
"type"
]
},
"conversation_id": [
"833762294"
],
"nb_hits": 46250
}
Loading…
Cancel
Save