diff --git a/404.html b/404.html index 540284b..def8420 100644 --- a/404.html +++ b/404.html @@ -42,6 +42,8 @@ + + @@ -109,43 +111,6 @@ - - - @@ -203,6 +168,26 @@ + + + + + + +
  • + + + + + API Reference + + + + +
  • + + + diff --git a/api/index.html b/api/index.html new file mode 100644 index 0000000..2ae7b22 --- /dev/null +++ b/api/index.html @@ -0,0 +1,876 @@ + + + + + + + + + + + + + + + + + + + + + API Reference - Blueprint Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + + + +
    + + +
    + +
    + + + + + + +
    +
    + + + +
    +
    +
    + + + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    + + + + +

    API Reference

    + + +
    + + + +

    + opennotebookllm.preprocessing.data_cleaners + + +

    + +
    + + + + + + + + +
    + + + + + + + + + +
    + + +

    + clean_html(text) + +

    + + +
    + +

    Clean HTML text.

    + + +
    + This function removes +
      +
    • scripts
    • +
    • styles
    • +
    • links
    • +
    • meta tags
    • +
    +

    In addition, it calls clean_with_regex.

    + + +

    Examples:

    +
    >>> clean_html("<html><body><p>Hello,  world!  </p></body></html>"")
    +"Hello, world!"
    +
    + + +

    Parameters:

    + + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    + text + + str + +
    +

    The HTML text to clean.

    +
    +
    + required +
    + + +

    Returns:

    + + + + + + + + + + + + + +
    Name TypeDescription
    str + str + +
    +

    The cleaned text.

    +
    +
    + +
    + Source code in src/opennotebookllm/preprocessing/data_cleaners.py +
    36
    +37
    +38
    +39
    +40
    +41
    +42
    +43
    +44
    +45
    +46
    +47
    +48
    +49
    +50
    +51
    +52
    +53
    +54
    +55
    +56
    +57
    +58
    +59
    +60
    +61
    def clean_html(text: str) -> str:
    +    """Clean HTML text.
    +
    +    This function removes:
    +        - scripts
    +        - styles
    +        - links
    +        - meta tags
    +
    +    In addition, it calls [clean_with_regex][opennotebookllm.preprocessing.data_cleaners.clean_with_regex].
    +
    +    Examples:
    +        >>> clean_html("<html><body><p>Hello,  world!  </p></body></html>"")
    +        "Hello, world!"
    +
    +    Args:
    +        text (str): The HTML text to clean.
    +
    +    Returns:
    +        str: The cleaned text.
    +    """
    +    soup = BeautifulSoup(text, "html.parser")
    +    for tag in soup(["script", "style", "link", "meta"]):
    +        tag.decompose()
    +    text = soup.get_text()
    +    return clean_with_regex(text)
    +
    +
    +
    + +
    + +
    + + +

    + clean_markdown(text) + +

    + + +
    + +

    Clean Markdown text.

    + + +
    + This function removes +
      +
    • markdown images
    • +
    +

    In addition, it calls clean_with_regex.

    + + +

    Examples:

    +
    >>> clean_markdown('# Title   with image ![alt text](image.jpg "Image Title")')
    +"Title with image"
    +
    + + +

    Parameters:

    + + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    + text + + str + +
    +

    The Markdown text to clean.

    +
    +
    + required +
    + + +

    Returns:

    + + + + + + + + + + + + + +
    Name TypeDescription
    str + str + +
    +

    The cleaned text.

    +
    +
    + +
    + Source code in src/opennotebookllm/preprocessing/data_cleaners.py +
    64
    +65
    +66
    +67
    +68
    +69
    +70
    +71
    +72
    +73
    +74
    +75
    +76
    +77
    +78
    +79
    +80
    +81
    +82
    +83
    +84
    def clean_markdown(text: str) -> str:
    +    """Clean Markdown text.
    +
    +    This function removes:
    +        - markdown images
    +
    +    In addition, it calls [clean_with_regex][opennotebookllm.preprocessing.data_cleaners.clean_with_regex].
    +
    +    Examples:
    +        >>> clean_markdown('# Title   with image ![alt text](image.jpg "Image Title")')
    +        "Title with image"
    +
    +    Args:
    +        text (str): The Markdown text to clean.
    +
    +    Returns:
    +        str: The cleaned text.
    +    """
    +    text = re.sub(r'!\[.*?\]\(.*?(".*?")?\)', "", text)
    +
    +    return clean_with_regex(text)
    +
    +
    +
    + +
    + +
    + + +

    + clean_with_regex(text) + +

    + + +
    + +

    Clean text using regular expressions.

    + + +
    + This function removes +
      +
    • URLs
    • +
    • emails
    • +
    • special characters
    • +
    • extra spaces
    • +
    +
    + +

    Examples:

    +
    >>> clean_with_regex(" Hello,   world! http://example.com")
    +"Hello, world!"
    +
    + + +

    Parameters:

    + + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    + text + + str + +
    +

    The text to clean.

    +
    +
    + required +
    + + +

    Returns:

    + + + + + + + + + + + + + +
    Name TypeDescription
    str + str + +
    +

    The cleaned text.

    +
    +
    + +
    + Source code in src/opennotebookllm/preprocessing/data_cleaners.py +
     5
    + 6
    + 7
    + 8
    + 9
    +10
    +11
    +12
    +13
    +14
    +15
    +16
    +17
    +18
    +19
    +20
    +21
    +22
    +23
    +24
    +25
    +26
    +27
    +28
    +29
    +30
    +31
    +32
    +33
    def clean_with_regex(text: str) -> str:
    +    """
    +    Clean text using regular expressions.
    +
    +    This function removes:
    +        - URLs
    +        - emails
    +        - special characters
    +        - extra spaces
    +
    +    Examples:
    +        >>> clean_with_regex("\xa0Hello,   world! http://example.com")
    +        "Hello, world!"
    +
    +    Args:
    +        text (str): The text to clean.
    +
    +    Returns:
    +        str: The cleaned text.
    +    """
    +    text = re.sub(
    +        r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
    +        "",
    +        text,
    +    )
    +    text = re.sub(r"[\w\.-]+@[\w\.-]+\.[\w]+", "", text)
    +    text = re.sub(r'[^a-zA-Z0-9\s.,!?;:"\']', "", text)
    +    text = re.sub(r"\s+", " ", text).strip()
    +    return text
    +
    +
    +
    + +
    + + + +
    + +
    + +
    + + + + + + + + + + + + + +
    +
    + + + +
    + +
    + + + +
    +
    +
    +
    + + + + + + + + + + \ No newline at end of file diff --git a/assets/_mkdocstrings.css b/assets/_mkdocstrings.css new file mode 100644 index 0000000..b500381 --- /dev/null +++ b/assets/_mkdocstrings.css @@ -0,0 +1,143 @@ + +/* Avoid breaking parameter names, etc. in table cells. */ +.doc-contents td code { + word-break: normal !important; +} + +/* No line break before first paragraph of descriptions. */ +.doc-md-description, +.doc-md-description>p:first-child { + display: inline; +} + +/* Max width for docstring sections tables. */ +.doc .md-typeset__table, +.doc .md-typeset__table table { + display: table !important; + width: 100%; +} + +.doc .md-typeset__table tr { + display: table-row; +} + +/* Defaults in Spacy table style. */ +.doc-param-default { + float: right; +} + +/* Parameter headings must be inline, not blocks. */ +.doc-heading-parameter { + display: inline; +} + +/* Prefer space on the right, not the left of parameter permalinks. */ +.doc-heading-parameter .headerlink { + margin-left: 0 !important; + margin-right: 0.2rem; +} + +/* Backward-compatibility: docstring section titles in bold. */ +.doc-section-title { + font-weight: bold; +} + +/* Symbols in Navigation and ToC. */ +:root, :host, +[data-md-color-scheme="default"] { + --doc-symbol-parameter-fg-color: #df50af; + --doc-symbol-attribute-fg-color: #953800; + --doc-symbol-function-fg-color: #8250df; + --doc-symbol-method-fg-color: #8250df; + --doc-symbol-class-fg-color: #0550ae; + --doc-symbol-module-fg-color: #5cad0f; + + --doc-symbol-parameter-bg-color: #df50af1a; + --doc-symbol-attribute-bg-color: #9538001a; + --doc-symbol-function-bg-color: #8250df1a; + --doc-symbol-method-bg-color: #8250df1a; + --doc-symbol-class-bg-color: #0550ae1a; + --doc-symbol-module-bg-color: #5cad0f1a; +} + +[data-md-color-scheme="slate"] { + --doc-symbol-parameter-fg-color: #ffa8cc; + --doc-symbol-attribute-fg-color: #ffa657; + --doc-symbol-function-fg-color: #d2a8ff; + --doc-symbol-method-fg-color: #d2a8ff; + --doc-symbol-class-fg-color: #79c0ff; + --doc-symbol-module-fg-color: #baff79; + + --doc-symbol-parameter-bg-color: #ffa8cc1a; + --doc-symbol-attribute-bg-color: #ffa6571a; + --doc-symbol-function-bg-color: #d2a8ff1a; + --doc-symbol-method-bg-color: #d2a8ff1a; + --doc-symbol-class-bg-color: #79c0ff1a; + --doc-symbol-module-bg-color: #baff791a; +} + +code.doc-symbol { + border-radius: .1rem; + font-size: .85em; + padding: 0 .3em; + font-weight: bold; +} + +code.doc-symbol-parameter { + color: var(--doc-symbol-parameter-fg-color); + background-color: var(--doc-symbol-parameter-bg-color); +} + +code.doc-symbol-parameter::after { + content: "param"; +} + +code.doc-symbol-attribute { + color: var(--doc-symbol-attribute-fg-color); + background-color: var(--doc-symbol-attribute-bg-color); +} + +code.doc-symbol-attribute::after { + content: "attr"; +} + +code.doc-symbol-function { + color: var(--doc-symbol-function-fg-color); + background-color: var(--doc-symbol-function-bg-color); +} + +code.doc-symbol-function::after { + content: "func"; +} + +code.doc-symbol-method { + color: var(--doc-symbol-method-fg-color); + background-color: var(--doc-symbol-method-bg-color); +} + +code.doc-symbol-method::after { + content: "meth"; +} + +code.doc-symbol-class { + color: var(--doc-symbol-class-fg-color); + background-color: var(--doc-symbol-class-bg-color); +} + +code.doc-symbol-class::after { + content: "class"; +} + +code.doc-symbol-module { + color: var(--doc-symbol-module-fg-color); + background-color: var(--doc-symbol-module-bg-color); +} + +code.doc-symbol-module::after { + content: "mod"; +} + +.doc-signature .autorefs { + color: inherit; + border-bottom: 1px dotted currentcolor; +} diff --git a/index.html b/index.html index 602c880..c5e1737 100644 --- a/index.html +++ b/index.html @@ -10,6 +10,8 @@ + + @@ -42,6 +44,8 @@ + + @@ -114,43 +118,6 @@ - - - @@ -218,6 +185,26 @@ + + + + + + +
  • + + + + + API Reference + + + + +
  • + + + diff --git a/objects.inv b/objects.inv new file mode 100644 index 0000000..b530b31 Binary files /dev/null and b/objects.inv differ diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index c584e19..0000000 --- a/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -mkdocs -mkdocs-material -mkdocstrings[python] diff --git a/search/search_index.json b/search/search_index.json deleted file mode 100644 index 770181f..0000000 --- a/search/search_index.json +++ /dev/null @@ -1 +0,0 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Wellcome to Blueprint docs","text":""}]} \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz index cf5ee0d..bd75785 100644 Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ