From 5cc5efd108175b068bc3c40bcbb0f868f405f944 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 19 Nov 2024 10:44:51 +0000 Subject: [PATCH] Deployed e8ac586 with MkDocs version: 1.6.1 --- 404.html | 59 +-- api/index.html | 876 +++++++++++++++++++++++++++++++++++++++ assets/_mkdocstrings.css | 143 +++++++ index.html | 61 ++- objects.inv | Bin 0 -> 239 bytes requirements.txt | 3 - search/search_index.json | 1 - sitemap.xml.gz | Bin 127 -> 127 bytes 8 files changed, 1065 insertions(+), 78 deletions(-) create mode 100644 api/index.html create mode 100644 assets/_mkdocstrings.css create mode 100644 objects.inv delete mode 100644 requirements.txt delete mode 100644 search/search_index.json diff --git a/404.html b/404.html index 540284b..def8420 100644 --- a/404.html +++ b/404.html @@ -42,6 +42,8 @@ + + @@ -109,43 +111,6 @@ - - - @@ -203,6 +168,26 @@ + + + + + + +
  • + + + + + API Reference + + + + +
  • + + + diff --git a/api/index.html b/api/index.html new file mode 100644 index 0000000..2ae7b22 --- /dev/null +++ b/api/index.html @@ -0,0 +1,876 @@ + + + + + + + + + + + + + + + + + + + + + API Reference - Blueprint Docs + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + + + +
    + + +
    + +
    + + + + + + +
    +
    + + + +
    +
    +
    + + + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    + + + + +

    API Reference

    + + +
    + + + +

    + opennotebookllm.preprocessing.data_cleaners + + +

    + +
    + + + + + + + + +
    + + + + + + + + + +
    + + +

    + clean_html(text) + +

    + + +
    + +

    Clean HTML text.

    + + +
    + This function removes +
      +
    • scripts
    • +
    • styles
    • +
    • links
    • +
    • meta tags
    • +
    +

    In addition, it calls clean_with_regex.

    + + +

    Examples:

    +
    >>> clean_html("<html><body><p>Hello,  world!  </p></body></html>"")
    +"Hello, world!"
    +
    + + +

    Parameters:

    + + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    + text + + str + +
    +

    The HTML text to clean.

    +
    +
    + required +
    + + +

    Returns:

    + + + + + + + + + + + + + +
    Name TypeDescription
    str + str + +
    +

    The cleaned text.

    +
    +
    + +
    + Source code in src/opennotebookllm/preprocessing/data_cleaners.py +
    36
    +37
    +38
    +39
    +40
    +41
    +42
    +43
    +44
    +45
    +46
    +47
    +48
    +49
    +50
    +51
    +52
    +53
    +54
    +55
    +56
    +57
    +58
    +59
    +60
    +61
    def clean_html(text: str) -> str:
    +    """Clean HTML text.
    +
    +    This function removes:
    +        - scripts
    +        - styles
    +        - links
    +        - meta tags
    +
    +    In addition, it calls [clean_with_regex][opennotebookllm.preprocessing.data_cleaners.clean_with_regex].
    +
    +    Examples:
    +        >>> clean_html("<html><body><p>Hello,  world!  </p></body></html>"")
    +        "Hello, world!"
    +
    +    Args:
    +        text (str): The HTML text to clean.
    +
    +    Returns:
    +        str: The cleaned text.
    +    """
    +    soup = BeautifulSoup(text, "html.parser")
    +    for tag in soup(["script", "style", "link", "meta"]):
    +        tag.decompose()
    +    text = soup.get_text()
    +    return clean_with_regex(text)
    +
    +
    +
    + +
    + +
    + + +

    + clean_markdown(text) + +

    + + +
    + +

    Clean Markdown text.

    + + +
    + This function removes +
      +
    • markdown images
    • +
    +

    In addition, it calls clean_with_regex.

    + + +

    Examples:

    +
    >>> clean_markdown('# Title   with image ![alt text](image.jpg "Image Title")')
    +"Title with image"
    +
    + + +

    Parameters:

    + + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    + text + + str + +
    +

    The Markdown text to clean.

    +
    +
    + required +
    + + +

    Returns:

    + + + + + + + + + + + + + +
    Name TypeDescription
    str + str + +
    +

    The cleaned text.

    +
    +
    + +
    + Source code in src/opennotebookllm/preprocessing/data_cleaners.py +
    64
    +65
    +66
    +67
    +68
    +69
    +70
    +71
    +72
    +73
    +74
    +75
    +76
    +77
    +78
    +79
    +80
    +81
    +82
    +83
    +84
    def clean_markdown(text: str) -> str:
    +    """Clean Markdown text.
    +
    +    This function removes:
    +        - markdown images
    +
    +    In addition, it calls [clean_with_regex][opennotebookllm.preprocessing.data_cleaners.clean_with_regex].
    +
    +    Examples:
    +        >>> clean_markdown('# Title   with image ![alt text](image.jpg "Image Title")')
    +        "Title with image"
    +
    +    Args:
    +        text (str): The Markdown text to clean.
    +
    +    Returns:
    +        str: The cleaned text.
    +    """
    +    text = re.sub(r'!\[.*?\]\(.*?(".*?")?\)', "", text)
    +
    +    return clean_with_regex(text)
    +
    +
    +
    + +
    + +
    + + +

    + clean_with_regex(text) + +

    + + +
    + +

    Clean text using regular expressions.

    + + +
    + This function removes +
      +
    • URLs
    • +
    • emails
    • +
    • special characters
    • +
    • extra spaces
    • +
    +
    + +

    Examples:

    +
    >>> clean_with_regex(" Hello,   world! http://example.com")
    +"Hello, world!"
    +
    + + +

    Parameters:

    + + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    + text + + str + +
    +

    The text to clean.

    +
    +
    + required +
    + + +

    Returns:

    + + + + + + + + + + + + + +
    Name TypeDescription
    str + str + +
    +

    The cleaned text.

    +
    +
    + +
    + Source code in src/opennotebookllm/preprocessing/data_cleaners.py +
     5
    + 6
    + 7
    + 8
    + 9
    +10
    +11
    +12
    +13
    +14
    +15
    +16
    +17
    +18
    +19
    +20
    +21
    +22
    +23
    +24
    +25
    +26
    +27
    +28
    +29
    +30
    +31
    +32
    +33
    def clean_with_regex(text: str) -> str:
    +    """
    +    Clean text using regular expressions.
    +
    +    This function removes:
    +        - URLs
    +        - emails
    +        - special characters
    +        - extra spaces
    +
    +    Examples:
    +        >>> clean_with_regex("\xa0Hello,   world! http://example.com")
    +        "Hello, world!"
    +
    +    Args:
    +        text (str): The text to clean.
    +
    +    Returns:
    +        str: The cleaned text.
    +    """
    +    text = re.sub(
    +        r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
    +        "",
    +        text,
    +    )
    +    text = re.sub(r"[\w\.-]+@[\w\.-]+\.[\w]+", "", text)
    +    text = re.sub(r'[^a-zA-Z0-9\s.,!?;:"\']', "", text)
    +    text = re.sub(r"\s+", " ", text).strip()
    +    return text
    +
    +
    +
    + +
    + + + +
    + +
    + +
    + + + + + + + + + + + + + +
    +
    + + + +
    + +
    + + + +
    +
    +
    +
    + + + + + + + + + + \ No newline at end of file diff --git a/assets/_mkdocstrings.css b/assets/_mkdocstrings.css new file mode 100644 index 0000000..b500381 --- /dev/null +++ b/assets/_mkdocstrings.css @@ -0,0 +1,143 @@ + +/* Avoid breaking parameter names, etc. in table cells. */ +.doc-contents td code { + word-break: normal !important; +} + +/* No line break before first paragraph of descriptions. */ +.doc-md-description, +.doc-md-description>p:first-child { + display: inline; +} + +/* Max width for docstring sections tables. */ +.doc .md-typeset__table, +.doc .md-typeset__table table { + display: table !important; + width: 100%; +} + +.doc .md-typeset__table tr { + display: table-row; +} + +/* Defaults in Spacy table style. */ +.doc-param-default { + float: right; +} + +/* Parameter headings must be inline, not blocks. */ +.doc-heading-parameter { + display: inline; +} + +/* Prefer space on the right, not the left of parameter permalinks. */ +.doc-heading-parameter .headerlink { + margin-left: 0 !important; + margin-right: 0.2rem; +} + +/* Backward-compatibility: docstring section titles in bold. */ +.doc-section-title { + font-weight: bold; +} + +/* Symbols in Navigation and ToC. */ +:root, :host, +[data-md-color-scheme="default"] { + --doc-symbol-parameter-fg-color: #df50af; + --doc-symbol-attribute-fg-color: #953800; + --doc-symbol-function-fg-color: #8250df; + --doc-symbol-method-fg-color: #8250df; + --doc-symbol-class-fg-color: #0550ae; + --doc-symbol-module-fg-color: #5cad0f; + + --doc-symbol-parameter-bg-color: #df50af1a; + --doc-symbol-attribute-bg-color: #9538001a; + --doc-symbol-function-bg-color: #8250df1a; + --doc-symbol-method-bg-color: #8250df1a; + --doc-symbol-class-bg-color: #0550ae1a; + --doc-symbol-module-bg-color: #5cad0f1a; +} + +[data-md-color-scheme="slate"] { + --doc-symbol-parameter-fg-color: #ffa8cc; + --doc-symbol-attribute-fg-color: #ffa657; + --doc-symbol-function-fg-color: #d2a8ff; + --doc-symbol-method-fg-color: #d2a8ff; + --doc-symbol-class-fg-color: #79c0ff; + --doc-symbol-module-fg-color: #baff79; + + --doc-symbol-parameter-bg-color: #ffa8cc1a; + --doc-symbol-attribute-bg-color: #ffa6571a; + --doc-symbol-function-bg-color: #d2a8ff1a; + --doc-symbol-method-bg-color: #d2a8ff1a; + --doc-symbol-class-bg-color: #79c0ff1a; + --doc-symbol-module-bg-color: #baff791a; +} + +code.doc-symbol { + border-radius: .1rem; + font-size: .85em; + padding: 0 .3em; + font-weight: bold; +} + +code.doc-symbol-parameter { + color: var(--doc-symbol-parameter-fg-color); + background-color: var(--doc-symbol-parameter-bg-color); +} + +code.doc-symbol-parameter::after { + content: "param"; +} + +code.doc-symbol-attribute { + color: var(--doc-symbol-attribute-fg-color); + background-color: var(--doc-symbol-attribute-bg-color); +} + +code.doc-symbol-attribute::after { + content: "attr"; +} + +code.doc-symbol-function { + color: var(--doc-symbol-function-fg-color); + background-color: var(--doc-symbol-function-bg-color); +} + +code.doc-symbol-function::after { + content: "func"; +} + +code.doc-symbol-method { + color: var(--doc-symbol-method-fg-color); + background-color: var(--doc-symbol-method-bg-color); +} + +code.doc-symbol-method::after { + content: "meth"; +} + +code.doc-symbol-class { + color: var(--doc-symbol-class-fg-color); + background-color: var(--doc-symbol-class-bg-color); +} + +code.doc-symbol-class::after { + content: "class"; +} + +code.doc-symbol-module { + color: var(--doc-symbol-module-fg-color); + background-color: var(--doc-symbol-module-bg-color); +} + +code.doc-symbol-module::after { + content: "mod"; +} + +.doc-signature .autorefs { + color: inherit; + border-bottom: 1px dotted currentcolor; +} diff --git a/index.html b/index.html index 602c880..c5e1737 100644 --- a/index.html +++ b/index.html @@ -10,6 +10,8 @@ + + @@ -42,6 +44,8 @@ + + @@ -114,43 +118,6 @@ - - - @@ -218,6 +185,26 @@ + + + + + + +
  • + + + + + API Reference + + + + +
  • + + + diff --git a/objects.inv b/objects.inv new file mode 100644 index 0000000000000000000000000000000000000000..b530b31206d28db2a4fca5a40ca4d39b44d7ca6a GIT binary patch literal 239 zcmY#Z2rkIT%&Sny%qvUHE6FdaR47X=D$dN$Q!wIERtPA{&q_@$u~Kl#DNQXX%FHWK zaLG?D2Fip%R9Pt)=m8;6AS5GIp(r&sF*7eEwMZd9O`#+svsfW5GbdF6NGIp#78Iox z7pJBulon^^r7KkBWG3lxRoq&8<{;N010IG8yW;~dtQKAW&*$W=9BuE(6Vh`2?LD(w z;l